Repository: firecracker-microvm/firecracker Branch: main Commit: 40c67b2f428e Files: 755 Total size: 7.2 MB Directory structure: gitextract_tj40a4a3/ ├── .buildkite/ │ ├── common.py │ ├── pipeline_coverage.py │ ├── pipeline_cpu_template.py │ ├── pipeline_cross.py │ ├── pipeline_docker_popular.py │ ├── pipeline_perf.py │ ├── pipeline_pr.py │ ├── pipeline_pr_no_block.py │ └── pipeline_release_qa.py ├── .cargo/ │ ├── audit.toml │ └── config.toml ├── .dockerignore ├── .git-blame-ignore-revs ├── .github/ │ ├── CODEOWNERS │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── codecov.yml │ ├── dependabot.yml │ ├── pull_request_template.md │ └── workflows/ │ ├── deny_dirty_cargo_locks.yml │ ├── dependency_modification_check.yml │ ├── monitor_libseccomp_releases.yml │ ├── send_pr_notification.yml │ ├── send_release_notification.yml │ └── trigger_ab_tests.yml ├── .gitignore ├── .gitlint ├── .mailmap ├── .mdformat.toml ├── .python-version ├── CHANGELOG.md ├── CHARTER.md ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── CREDITS.md ├── Cargo.toml ├── DEPRECATED.md ├── FAQ.md ├── LICENSE ├── MAINTAINERS.md ├── NOTICE ├── PGP-KEY.asc ├── README.md ├── SECURITY.md ├── SPECIFICATION.md ├── THIRD-PARTY ├── deny.toml ├── docs/ │ ├── RELEASE_POLICY.md │ ├── api-change-runbook.md │ ├── api_requests/ │ │ ├── actions.md │ │ ├── block-caching.md │ │ ├── block-io-engine.md │ │ ├── block-vhost-user.md │ │ ├── patch-block.md │ │ └── patch-network-interface.md │ ├── ballooning.md │ ├── cpu_templates/ │ │ ├── boot-protocol.md │ │ ├── cpu-template-helper.md │ │ ├── cpu-templates.md │ │ ├── cpuid-normalization.md │ │ └── schema.json │ ├── design.md │ ├── dev-machine-setup.md │ ├── device-api.md │ ├── entropy.md │ ├── formal-verification.md │ ├── gdb-debugging.md │ ├── getting-started.md │ ├── hugepages.md │ ├── images/ │ │ └── vsock-connections.drawio │ ├── initrd.md │ ├── jailer.md │ ├── kernel-policy.md │ ├── logger.md │ ├── memory-hotplug.md │ ├── metrics.md │ ├── mmds/ │ │ ├── mmds-design.md │ │ └── mmds-user-guide.md │ ├── network-performance.md │ ├── network-setup.md │ ├── pmem.md │ ├── prod-host-setup.md │ ├── pvh.md │ ├── rootfs-and-kernel-setup.md │ ├── seccomp.md │ ├── seccompiler.md │ ├── snapshotting/ │ │ ├── handling-page-faults-on-snapshot-resume.md │ │ ├── network-for-clones.md │ │ ├── random-for-clones.md │ │ ├── snapshot-editor.md │ │ ├── snapshot-support.md │ │ └── versioning.md │ ├── tracing.md │ └── vsock.md ├── resources/ │ ├── chroot.sh │ ├── guest_configs/ │ │ ├── DISCLAIMER.md │ │ ├── ci.config │ │ ├── debug.config │ │ ├── ftrace.config │ │ ├── microvm-kernel-ci-aarch64-5.10.config │ │ ├── microvm-kernel-ci-aarch64-6.1.config │ │ ├── microvm-kernel-ci-x86_64-5.10-no-acpi.config │ │ ├── microvm-kernel-ci-x86_64-5.10.config │ │ ├── microvm-kernel-ci-x86_64-6.1.config │ │ ├── pcie.config │ │ ├── virtio-mem.config │ │ ├── virtio-pmem.config │ │ └── vmclock.config │ ├── overlay/ │ │ ├── etc/ │ │ │ └── systemd/ │ │ │ └── system/ │ │ │ ├── fcnet.service │ │ │ └── var-lib-systemd.mount │ │ └── usr/ │ │ └── local/ │ │ └── bin/ │ │ ├── devmemread.c │ │ ├── fast_page_fault_helper.c │ │ ├── fcnet-setup.sh │ │ ├── fillmem.c │ │ ├── go_sdk_cred_provider.go/ │ │ │ └── main.go │ │ ├── go_sdk_cred_provider_with_custom_endpoint.go/ │ │ │ └── main.go │ │ ├── init.c │ │ └── readmem.c │ ├── patches/ │ │ └── vmclock/ │ │ ├── 5.10/ │ │ │ ├── 0001-ptp-vmclock-add-vm-generation-counter.patch │ │ │ ├── 0002-ptp-vmclock-support-device-notifications.patch │ │ │ ├── 0003-dt-bindings-ptp-Add-amazon-vmclock.patch │ │ │ ├── 0004-ptp-ptp_vmclock-Add-device-tree-support.patch │ │ │ ├── 0005-ptp-ptp_vmclock-add-VMCLOCK-to-ACPI-device-match.patch │ │ │ ├── 0006-ptp-ptp_vmclock-remove-dependency-on-CONFIG_ACPI.patch │ │ │ └── 0007-ptp-ptp_vmclock-return-TAI-not-UTC.patch │ │ └── 6.1/ │ │ ├── 0001-ptp-vmclock-add-vm-generation-counter.patch │ │ ├── 0002-ptp-vmclock-support-device-notifications.patch │ │ ├── 0003-dt-bindings-ptp-Add-amazon-vmclock.patch │ │ ├── 0004-ptp-ptp_vmclock-Add-device-tree-support.patch │ │ ├── 0005-ptp-ptp_vmclock-add-VMCLOCK-to-ACPI-device-match.patch │ │ ├── 0006-ptp-ptp_vmclock-remove-dependency-on-CONFIG_ACPI.patch │ │ └── 0007-ptp-ptp_vmclock-return-TAI-not-UTC.patch │ ├── rebuild.sh │ └── seccomp/ │ ├── aarch64-unknown-linux-musl.json │ ├── unimplemented.json │ └── x86_64-unknown-linux-musl.json ├── rust-toolchain.toml ├── rustfmt.toml ├── src/ │ ├── acpi-tables/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── aml.rs │ │ ├── dsdt.rs │ │ ├── fadt.rs │ │ ├── lib.rs │ │ ├── madt.rs │ │ ├── mcfg.rs │ │ ├── rsdp.rs │ │ └── xsdt.rs │ ├── clippy-tracing/ │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── src/ │ │ │ └── main.rs │ │ └── tests/ │ │ └── integration_tests.rs │ ├── cpu-template-helper/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ └── src/ │ │ ├── fingerprint/ │ │ │ ├── compare.rs │ │ │ ├── dump.rs │ │ │ └── mod.rs │ │ ├── main.rs │ │ ├── template/ │ │ │ ├── dump/ │ │ │ │ ├── aarch64.rs │ │ │ │ ├── mod.rs │ │ │ │ └── x86_64.rs │ │ │ ├── mod.rs │ │ │ ├── strip/ │ │ │ │ ├── aarch64.rs │ │ │ │ ├── mod.rs │ │ │ │ └── x86_64.rs │ │ │ └── verify/ │ │ │ ├── aarch64.rs │ │ │ ├── mod.rs │ │ │ └── x86_64.rs │ │ └── utils/ │ │ ├── aarch64.rs │ │ ├── mock_kernel/ │ │ │ └── main.c │ │ ├── mod.rs │ │ └── x86_64.rs │ ├── firecracker/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ ├── examples/ │ │ │ ├── README.md │ │ │ ├── seccomp/ │ │ │ │ ├── harmless.rs │ │ │ │ ├── jailer.rs │ │ │ │ ├── malicious.rs │ │ │ │ └── panic.rs │ │ │ └── uffd/ │ │ │ ├── fault_all_handler.rs │ │ │ ├── malicious_handler.rs │ │ │ ├── on_demand_handler.rs │ │ │ └── uffd_utils.rs │ │ ├── src/ │ │ │ ├── api_server/ │ │ │ │ ├── mod.rs │ │ │ │ ├── parsed_request.rs │ │ │ │ └── request/ │ │ │ │ ├── actions.rs │ │ │ │ ├── balloon.rs │ │ │ │ ├── boot_source.rs │ │ │ │ ├── cpu_configuration.rs │ │ │ │ ├── drive.rs │ │ │ │ ├── entropy.rs │ │ │ │ ├── hotplug/ │ │ │ │ │ ├── memory.rs │ │ │ │ │ └── mod.rs │ │ │ │ ├── instance_info.rs │ │ │ │ ├── logger.rs │ │ │ │ ├── machine_configuration.rs │ │ │ │ ├── metrics.rs │ │ │ │ ├── mmds.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── net.rs │ │ │ │ ├── pmem.rs │ │ │ │ ├── serial.rs │ │ │ │ ├── snapshot.rs │ │ │ │ ├── version.rs │ │ │ │ └── vsock.rs │ │ │ ├── api_server_adapter.rs │ │ │ ├── generated/ │ │ │ │ ├── mod.rs │ │ │ │ └── prctl.rs │ │ │ ├── lib.rs │ │ │ ├── main.rs │ │ │ ├── metrics.rs │ │ │ └── seccomp.rs │ │ ├── swagger/ │ │ │ └── firecracker.yaml │ │ └── tests/ │ │ └── verify_dependencies.rs │ ├── jailer/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── cgroup.rs │ │ ├── chroot.rs │ │ ├── env.rs │ │ ├── main.rs │ │ └── resource_limits.rs │ ├── log-instrument/ │ │ ├── Cargo.toml │ │ ├── README.md │ │ ├── examples/ │ │ │ ├── five.rs │ │ │ ├── four.rs │ │ │ ├── one.rs │ │ │ ├── six.rs │ │ │ ├── three.rs │ │ │ └── two.rs │ │ └── src/ │ │ └── lib.rs │ ├── log-instrument-macros/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── pci/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── lib.rs │ ├── rebase-snap/ │ │ ├── Cargo.toml │ │ └── src/ │ │ └── main.rs │ ├── seccompiler/ │ │ ├── Cargo.toml │ │ ├── build.rs │ │ └── src/ │ │ ├── bin.rs │ │ ├── bindings.rs │ │ ├── lib.rs │ │ └── types.rs │ ├── snapshot-editor/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── edit_memory.rs │ │ ├── edit_vmstate.rs │ │ ├── info.rs │ │ ├── main.rs │ │ └── utils.rs │ ├── utils/ │ │ ├── Cargo.toml │ │ └── src/ │ │ ├── arg_parser.rs │ │ ├── lib.rs │ │ ├── time.rs │ │ └── validators.rs │ └── vmm/ │ ├── Cargo.toml │ ├── benches/ │ │ ├── block_request.rs │ │ ├── cpu_templates.rs │ │ ├── memory_access.rs │ │ └── queue.rs │ ├── src/ │ │ ├── acpi/ │ │ │ ├── mod.rs │ │ │ └── x86_64.rs │ │ ├── arch/ │ │ │ ├── aarch64/ │ │ │ │ ├── cache_info.rs │ │ │ │ ├── fdt.rs │ │ │ │ ├── gic/ │ │ │ │ │ ├── gicv2/ │ │ │ │ │ │ ├── mod.rs │ │ │ │ │ │ └── regs/ │ │ │ │ │ │ ├── dist_regs.rs │ │ │ │ │ │ ├── icc_regs.rs │ │ │ │ │ │ └── mod.rs │ │ │ │ │ ├── gicv3/ │ │ │ │ │ │ ├── mod.rs │ │ │ │ │ │ └── regs/ │ │ │ │ │ │ ├── dist_regs.rs │ │ │ │ │ │ ├── icc_regs.rs │ │ │ │ │ │ ├── its_regs.rs │ │ │ │ │ │ ├── mod.rs │ │ │ │ │ │ └── redist_regs.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── regs.rs │ │ │ │ ├── kvm.rs │ │ │ │ ├── layout.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── output_GICv2.dtb │ │ │ │ ├── output_GICv3.dtb │ │ │ │ ├── output_initrd_GICv2.dtb │ │ │ │ ├── output_initrd_GICv3.dtb │ │ │ │ ├── regs.rs │ │ │ │ ├── vcpu.rs │ │ │ │ └── vm.rs │ │ │ ├── mod.rs │ │ │ └── x86_64/ │ │ │ ├── cpu_model.rs │ │ │ ├── gdt.rs │ │ │ ├── generated/ │ │ │ │ ├── arch_prctl.rs │ │ │ │ ├── hyperv.rs │ │ │ │ ├── hyperv_tlfs.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── mpspec.rs │ │ │ │ ├── msr_index.rs │ │ │ │ └── perf_event.rs │ │ │ ├── interrupts.rs │ │ │ ├── kvm.rs │ │ │ ├── layout.rs │ │ │ ├── mod.rs │ │ │ ├── mptable.rs │ │ │ ├── msr.rs │ │ │ ├── regs.rs │ │ │ ├── vcpu.rs │ │ │ ├── vm.rs │ │ │ └── xstate.rs │ │ ├── builder.rs │ │ ├── cpu_config/ │ │ │ ├── aarch64/ │ │ │ │ ├── custom_cpu_template.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── static_cpu_templates/ │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── v1n1.rs │ │ │ │ └── test_utils.rs │ │ │ ├── mod.rs │ │ │ ├── templates.rs │ │ │ ├── templates_serde.rs │ │ │ ├── test_utils.rs │ │ │ └── x86_64/ │ │ │ ├── cpuid/ │ │ │ │ ├── amd/ │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── normalize.rs │ │ │ │ ├── common.rs │ │ │ │ ├── intel/ │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── normalize.rs │ │ │ │ ├── mod.rs │ │ │ │ └── normalize.rs │ │ │ ├── custom_cpu_template.rs │ │ │ ├── mod.rs │ │ │ ├── static_cpu_templates/ │ │ │ │ ├── c3.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── t2.rs │ │ │ │ ├── t2a.rs │ │ │ │ ├── t2cl.rs │ │ │ │ └── t2s.rs │ │ │ └── test_utils.rs │ │ ├── device_manager/ │ │ │ ├── acpi.rs │ │ │ ├── legacy.rs │ │ │ ├── mmio.rs │ │ │ ├── mod.rs │ │ │ ├── pci_mngr.rs │ │ │ └── persist.rs │ │ ├── devices/ │ │ │ ├── acpi/ │ │ │ │ ├── generated/ │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── vmclock_abi.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── vmclock.rs │ │ │ │ └── vmgenid.rs │ │ │ ├── legacy/ │ │ │ │ ├── i8042.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── rtc_pl031.rs │ │ │ │ └── serial.rs │ │ │ ├── mod.rs │ │ │ ├── pci/ │ │ │ │ ├── mod.rs │ │ │ │ └── pci_segment.rs │ │ │ ├── pseudo/ │ │ │ │ ├── boot_timer.rs │ │ │ │ └── mod.rs │ │ │ └── virtio/ │ │ │ ├── balloon/ │ │ │ │ ├── device.rs │ │ │ │ ├── event_handler.rs │ │ │ │ ├── metrics.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── persist.rs │ │ │ │ ├── test_utils.rs │ │ │ │ └── util.rs │ │ │ ├── block/ │ │ │ │ ├── device.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── persist.rs │ │ │ │ ├── vhost_user/ │ │ │ │ │ ├── device.rs │ │ │ │ │ ├── event_handler.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── persist.rs │ │ │ │ └── virtio/ │ │ │ │ ├── device.rs │ │ │ │ ├── event_handler.rs │ │ │ │ ├── io/ │ │ │ │ │ ├── async_io.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── sync_io.rs │ │ │ │ ├── metrics.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── persist.rs │ │ │ │ ├── request.rs │ │ │ │ └── test_utils.rs │ │ │ ├── device.rs │ │ │ ├── generated/ │ │ │ │ ├── mod.rs │ │ │ │ ├── virtio_blk.rs │ │ │ │ ├── virtio_config.rs │ │ │ │ ├── virtio_ids.rs │ │ │ │ ├── virtio_mem.rs │ │ │ │ ├── virtio_net.rs │ │ │ │ └── virtio_ring.rs │ │ │ ├── iov_deque.rs │ │ │ ├── iovec.rs │ │ │ ├── mem/ │ │ │ │ ├── device.rs │ │ │ │ ├── event_handler.rs │ │ │ │ ├── metrics.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── persist.rs │ │ │ │ └── request.rs │ │ │ ├── mod.rs │ │ │ ├── net/ │ │ │ │ ├── device.rs │ │ │ │ ├── event_handler.rs │ │ │ │ ├── generated/ │ │ │ │ │ ├── if_tun.rs │ │ │ │ │ ├── iff.rs │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── sockios.rs │ │ │ │ ├── metrics.rs │ │ │ │ ├── mod.rs │ │ │ │ ├── persist.rs │ │ │ │ ├── tap.rs │ │ │ │ └── test_utils.rs │ │ │ ├── persist.rs │ │ │ ├── pmem/ │ │ │ │ ├── device.rs │ │ │ │ ├── event_handler.rs │ │ │ │ ├── metrics.rs │ │ │ │ ├── mod.rs │ │ │ │ └── persist.rs │ │ │ ├── queue.rs │ │ │ ├── rng/ │ │ │ │ ├── device.rs │ │ │ │ ├── event_handler.rs │ │ │ │ ├── metrics.rs │ │ │ │ ├── mod.rs │ │ │ │ └── persist.rs │ │ │ ├── test_utils.rs │ │ │ ├── transport/ │ │ │ │ ├── mmio.rs │ │ │ │ ├── mod.rs │ │ │ │ └── pci/ │ │ │ │ ├── common_config.rs │ │ │ │ ├── device.rs │ │ │ │ └── mod.rs │ │ │ ├── vhost_user.rs │ │ │ ├── vhost_user_metrics.rs │ │ │ └── vsock/ │ │ │ ├── csm/ │ │ │ │ ├── connection.rs │ │ │ │ ├── mod.rs │ │ │ │ └── txbuf.rs │ │ │ ├── device.rs │ │ │ ├── event_handler.rs │ │ │ ├── metrics.rs │ │ │ ├── mod.rs │ │ │ ├── packet.rs │ │ │ ├── persist.rs │ │ │ ├── test_utils.rs │ │ │ └── unix/ │ │ │ ├── mod.rs │ │ │ ├── muxer.rs │ │ │ ├── muxer_killq.rs │ │ │ └── muxer_rxq.rs │ │ ├── dumbo/ │ │ │ ├── mod.rs │ │ │ ├── pdu/ │ │ │ │ ├── arp.rs │ │ │ │ ├── bytes.rs │ │ │ │ ├── ethernet.rs │ │ │ │ ├── ipv4.rs │ │ │ │ ├── mod.rs │ │ │ │ └── tcp.rs │ │ │ └── tcp/ │ │ │ ├── connection.rs │ │ │ ├── endpoint.rs │ │ │ ├── handler.rs │ │ │ └── mod.rs │ │ ├── gdb/ │ │ │ ├── arch/ │ │ │ │ ├── aarch64.rs │ │ │ │ ├── mod.rs │ │ │ │ └── x86.rs │ │ │ ├── event_loop.rs │ │ │ ├── mod.rs │ │ │ └── target.rs │ │ ├── initrd.rs │ │ ├── io_uring/ │ │ │ ├── generated.rs │ │ │ ├── mod.rs │ │ │ ├── operation/ │ │ │ │ ├── cqe.rs │ │ │ │ ├── mod.rs │ │ │ │ └── sqe.rs │ │ │ ├── probe.rs │ │ │ ├── queue/ │ │ │ │ ├── completion.rs │ │ │ │ ├── mmap.rs │ │ │ │ ├── mod.rs │ │ │ │ └── submission.rs │ │ │ └── restriction.rs │ │ ├── lib.rs │ │ ├── logger/ │ │ │ ├── logging.rs │ │ │ ├── metrics.rs │ │ │ └── mod.rs │ │ ├── mmds/ │ │ │ ├── data_store.rs │ │ │ ├── mod.rs │ │ │ ├── ns.rs │ │ │ ├── persist.rs │ │ │ ├── token.rs │ │ │ └── token_headers.rs │ │ ├── pci/ │ │ │ ├── bus.rs │ │ │ ├── configuration.rs │ │ │ ├── mod.rs │ │ │ └── msix.rs │ │ ├── persist.rs │ │ ├── rate_limiter/ │ │ │ ├── mod.rs │ │ │ └── persist.rs │ │ ├── resources.rs │ │ ├── rpc_interface.rs │ │ ├── seccomp.rs │ │ ├── signal_handler.rs │ │ ├── snapshot/ │ │ │ ├── crc.rs │ │ │ ├── mod.rs │ │ │ └── persist.rs │ │ ├── test_utils/ │ │ │ ├── mock_resources/ │ │ │ │ ├── dirtying_init.tgz │ │ │ │ ├── make_noisy_kernel.sh │ │ │ │ └── mod.rs │ │ │ └── mod.rs │ │ ├── utils/ │ │ │ ├── byte_order.rs │ │ │ ├── mod.rs │ │ │ ├── net/ │ │ │ │ ├── ipv4addr.rs │ │ │ │ ├── mac.rs │ │ │ │ └── mod.rs │ │ │ ├── signal.rs │ │ │ └── sm.rs │ │ ├── vmm_config/ │ │ │ ├── balloon.rs │ │ │ ├── boot_source.rs │ │ │ ├── drive.rs │ │ │ ├── entropy.rs │ │ │ ├── instance_info.rs │ │ │ ├── machine_config.rs │ │ │ ├── memory_hotplug.rs │ │ │ ├── metrics.rs │ │ │ ├── mmds.rs │ │ │ ├── mod.rs │ │ │ ├── net.rs │ │ │ ├── pmem.rs │ │ │ ├── serial.rs │ │ │ ├── snapshot.rs │ │ │ └── vsock.rs │ │ └── vstate/ │ │ ├── bus.rs │ │ ├── interrupts.rs │ │ ├── kvm.rs │ │ ├── memory.rs │ │ ├── mod.rs │ │ ├── resources.rs │ │ ├── vcpu.rs │ │ └── vm.rs │ └── tests/ │ ├── devices.rs │ ├── integration_tests.rs │ └── io_uring.rs ├── tests/ │ ├── README.md │ ├── conftest.py │ ├── data/ │ │ ├── cpu_template_helper/ │ │ │ ├── fingerprint_AMD_GENOA_5.10host.json │ │ │ ├── fingerprint_AMD_GENOA_6.1host.json │ │ │ ├── fingerprint_AMD_MILAN_5.10host.json │ │ │ ├── fingerprint_AMD_MILAN_6.1host.json │ │ │ ├── fingerprint_ARM_NEOVERSE_N1_5.10host.json │ │ │ ├── fingerprint_ARM_NEOVERSE_N1_6.1host.json │ │ │ ├── fingerprint_ARM_NEOVERSE_V1_5.10host.json │ │ │ ├── fingerprint_ARM_NEOVERSE_V1_6.1host.json │ │ │ ├── fingerprint_ARM_NEOVERSE_V2_5.10host.json │ │ │ ├── fingerprint_ARM_NEOVERSE_V2_6.1host.json │ │ │ ├── fingerprint_INTEL_CASCADELAKE_5.10host.json │ │ │ ├── fingerprint_INTEL_CASCADELAKE_6.1host.json │ │ │ ├── fingerprint_INTEL_GRANITE_RAPIDS_5.10host.json │ │ │ ├── fingerprint_INTEL_GRANITE_RAPIDS_6.1host.json │ │ │ ├── fingerprint_INTEL_ICELAKE_5.10host.json │ │ │ ├── fingerprint_INTEL_ICELAKE_6.1host.json │ │ │ ├── fingerprint_INTEL_SAPPHIRE_RAPIDS_5.10host.json │ │ │ └── fingerprint_INTEL_SAPPHIRE_RAPIDS_6.1host.json │ │ ├── custom_cpu_templates/ │ │ │ ├── AARCH64_WITH_SVE_AND_PAC.json │ │ │ ├── C3.json │ │ │ ├── GNR_TO_T2_5.10.json │ │ │ ├── GNR_TO_T2_6.1.json │ │ │ ├── SPR_TO_T2_5.10.json │ │ │ ├── SPR_TO_T2_6.1.json │ │ │ ├── T2.json │ │ │ ├── T2A.json │ │ │ ├── T2CL.json │ │ │ ├── T2S.json │ │ │ └── V1N1.json │ │ ├── metadata.json │ │ ├── metadata_invalid.json │ │ └── msr/ │ │ ├── msr_list_GNR_TO_T2_5.10_INTEL_GRANITE_RAPIDS_5.10host_5.10guest.csv │ │ ├── msr_list_GNR_TO_T2_5.10_INTEL_GRANITE_RAPIDS_5.10host_6.1guest.csv │ │ ├── msr_list_GNR_TO_T2_6.1_INTEL_GRANITE_RAPIDS_6.1host_5.10guest.csv │ │ ├── msr_list_GNR_TO_T2_6.1_INTEL_GRANITE_RAPIDS_6.1host_6.1guest.csv │ │ ├── msr_list_SPR_TO_T2_5.10_INTEL_SAPPHIRE_RAPIDS_5.10host_5.10guest.csv │ │ ├── msr_list_SPR_TO_T2_5.10_INTEL_SAPPHIRE_RAPIDS_5.10host_6.1guest.csv │ │ ├── msr_list_SPR_TO_T2_6.1_INTEL_SAPPHIRE_RAPIDS_6.1host_5.10guest.csv │ │ ├── msr_list_SPR_TO_T2_6.1_INTEL_SAPPHIRE_RAPIDS_6.1host_6.1guest.csv │ │ ├── msr_list_T2A_AMD_MILAN_5.10host_5.10guest.csv │ │ ├── msr_list_T2A_AMD_MILAN_5.10host_6.1guest.csv │ │ ├── msr_list_T2A_AMD_MILAN_6.1host_5.10guest.csv │ │ ├── msr_list_T2A_AMD_MILAN_6.1host_6.1guest.csv │ │ ├── msr_list_T2CL_INTEL_CASCADELAKE_5.10host_5.10guest.csv │ │ ├── msr_list_T2CL_INTEL_CASCADELAKE_5.10host_6.1guest.csv │ │ ├── msr_list_T2CL_INTEL_CASCADELAKE_6.1host_5.10guest.csv │ │ ├── msr_list_T2CL_INTEL_CASCADELAKE_6.1host_6.1guest.csv │ │ ├── msr_list_T2CL_INTEL_ICELAKE_5.10host_5.10guest.csv │ │ ├── msr_list_T2CL_INTEL_ICELAKE_5.10host_6.1guest.csv │ │ ├── msr_list_T2CL_INTEL_ICELAKE_6.1host_5.10guest.csv │ │ ├── msr_list_T2CL_INTEL_ICELAKE_6.1host_6.1guest.csv │ │ ├── msr_list_T2S_INTEL_CASCADELAKE_5.10host_5.10guest.csv │ │ ├── msr_list_T2S_INTEL_CASCADELAKE_5.10host_6.1guest.csv │ │ ├── msr_list_T2S_INTEL_CASCADELAKE_6.1host_5.10guest.csv │ │ ├── msr_list_T2S_INTEL_CASCADELAKE_6.1host_6.1guest.csv │ │ ├── msr_reader.c │ │ ├── msr_writer.sh │ │ └── wrmsr_list.txt │ ├── framework/ │ │ ├── __init__.py │ │ ├── ab_test.py │ │ ├── artifacts.py │ │ ├── defs.py │ │ ├── gitlint_rules.py │ │ ├── guest_stats.py │ │ ├── http_api.py │ │ ├── jailer.py │ │ ├── microvm.py │ │ ├── microvm_helpers.py │ │ ├── properties.py │ │ ├── static_analysis.py │ │ ├── swagger_validator.py │ │ ├── utils.py │ │ ├── utils_cpu_templates.py │ │ ├── utils_cpuid.py │ │ ├── utils_drive.py │ │ ├── utils_fio.py │ │ ├── utils_ftrace.py │ │ ├── utils_imdsv2.py │ │ ├── utils_iperf.py │ │ ├── utils_repo.py │ │ ├── utils_uffd.py │ │ ├── utils_vsock.py │ │ ├── vm_config.json │ │ ├── vm_config_cpu_template_C3.json │ │ ├── vm_config_missing_mem_size_mib.json │ │ ├── vm_config_missing_vcpu_count.json │ │ ├── vm_config_network.json │ │ ├── vm_config_smt_true.json │ │ ├── vm_config_with_mmdsv1.json │ │ ├── vm_config_with_mmdsv2.json │ │ └── with_filelock.py │ ├── host_tools/ │ │ ├── __init__.py │ │ ├── cargo_build.py │ │ ├── change_net_config_space.c │ │ ├── cpu_load.py │ │ ├── drive.py │ │ ├── fcmetrics.py │ │ ├── jailer_time.c │ │ ├── memory.py │ │ ├── metrics.py │ │ ├── network.py │ │ ├── proc.py │ │ ├── test_syscalls.c │ │ ├── udp_offload.py │ │ ├── vmclock-abi.h │ │ ├── vmclock.c │ │ ├── vsock_helper.c │ │ └── waitpkg.c │ ├── integration_tests/ │ │ ├── build/ │ │ │ ├── __init__.py │ │ │ ├── test_clippy.py │ │ │ ├── test_coverage.py │ │ │ ├── test_dependencies.py │ │ │ ├── test_gdb.py │ │ │ ├── test_seccomp_no_redundant_rules.py │ │ │ └── test_unittests.py │ │ ├── functional/ │ │ │ ├── __init__.py │ │ │ ├── test_api.py │ │ │ ├── test_api_server.py │ │ │ ├── test_balloon.py │ │ │ ├── test_binary.py │ │ │ ├── test_binary_size.py │ │ │ ├── test_cmd_line_parameters.py │ │ │ ├── test_cmd_line_start.py │ │ │ ├── test_concurrency.py │ │ │ ├── test_cpu_all.py │ │ │ ├── test_cpu_features_aarch64.py │ │ │ ├── test_cpu_features_host_vs_guest.py │ │ │ ├── test_cpu_features_x86_64.py │ │ │ ├── test_cpu_template_helper.py │ │ │ ├── test_dirty_pages_in_full_snapshot.py │ │ │ ├── test_drive_vhost_user.py │ │ │ ├── test_drive_virtio.py │ │ │ ├── test_error_code.py │ │ │ ├── test_feat_parity.py │ │ │ ├── test_gdb.py │ │ │ ├── test_instrumented_firecracker.py │ │ │ ├── test_kernel_cmdline.py │ │ │ ├── test_kvm_ptp.py │ │ │ ├── test_log_instrument.py │ │ │ ├── test_logging.py │ │ │ ├── test_max_devices.py │ │ │ ├── test_metrics.py │ │ │ ├── test_mmds.py │ │ │ ├── test_net.py │ │ │ ├── test_net_config_space.py │ │ │ ├── test_pause_resume.py │ │ │ ├── test_pci.py │ │ │ ├── test_pmem.py │ │ │ ├── test_pvtime.py │ │ │ ├── test_rng.py │ │ │ ├── test_rtc.py │ │ │ ├── test_serial_io.py │ │ │ ├── test_shut_down.py │ │ │ ├── test_signals.py │ │ │ ├── test_snapshot_basic.py │ │ │ ├── test_snapshot_editor.py │ │ │ ├── test_snapshot_not_losing_dirty_pages.py │ │ │ ├── test_snapshot_phase1.py │ │ │ ├── test_snapshot_restore_cross_kernel.py │ │ │ ├── test_topology.py │ │ │ ├── test_uffd.py │ │ │ ├── test_vmclock.py │ │ │ └── test_vsock.py │ │ ├── performance/ │ │ │ ├── __init__.py │ │ │ ├── test_balloon.py │ │ │ ├── test_block.py │ │ │ ├── test_boottime.py │ │ │ ├── test_drive_rate_limiter.py │ │ │ ├── test_hotplug_memory.py │ │ │ ├── test_huge_pages.py │ │ │ ├── test_initrd.py │ │ │ ├── test_jailer.py │ │ │ ├── test_memory_overhead.py │ │ │ ├── test_mmds.py │ │ │ ├── test_network.py │ │ │ ├── test_pmem.py │ │ │ ├── test_process_startup_time.py │ │ │ ├── test_rate_limiter.py │ │ │ ├── test_snapshot.py │ │ │ ├── test_steal_time.py │ │ │ ├── test_vhost_user_metrics.py │ │ │ └── test_vsock.py │ │ ├── security/ │ │ │ ├── __init__.py │ │ │ ├── conftest.py │ │ │ ├── test_custom_seccomp.py │ │ │ ├── test_jail.py │ │ │ ├── test_nv.py │ │ │ ├── test_sec_audit.py │ │ │ ├── test_seccomp.py │ │ │ ├── test_seccomp_validate.py │ │ │ └── test_vulnerabilities.py │ │ ├── style/ │ │ │ ├── __init__.py │ │ │ ├── test_gitlint.py │ │ │ ├── test_licenses.py │ │ │ ├── test_markdown.py │ │ │ ├── test_python.py │ │ │ ├── test_repo.py │ │ │ ├── test_rust.py │ │ │ └── test_swagger.py │ │ └── test_kani.py │ ├── pyproject.toml │ └── pytest.ini └── tools/ ├── ab_plot.py ├── ab_test.py ├── bindgen-patches/ │ ├── 0001-change-c_char-to-c_uchar-in-ifrn_name.patch │ ├── 0002-derive-clone-copy-in-io-uring.patch │ ├── 0003-vmclock.patch │ └── 0004-vmclock-notify.patch ├── bindgen.sh ├── bump-version.sh ├── devctr/ │ ├── Dockerfile │ ├── ctr_gitconfig │ └── pyproject.toml ├── devtool ├── functions ├── gh_release.py ├── release-notes.py ├── release-prepare.sh ├── release-tag.sh ├── release.sh ├── sandbox.py ├── setup-ci-artifacts.sh ├── test-popular-containers/ │ ├── build_rootfs.sh │ ├── fcnet.start │ ├── setup-minimal.sh │ └── test-docker-rootfs.py ├── test.sh ├── test_bindings.py └── update-credits.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .buildkite/common.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Common helpers to create Buildkite pipelines """ import argparse import ast import json import os import random import string import subprocess from pathlib import Path # fmt: off DEFAULT_INSTANCES = [ "m5n.metal", # Intel Cascade Lake "m6i.metal", # Intel Ice Lake "m7i.metal-24xl", # Intel Sapphire Rapids "m7i.metal-48xl", # Intel Sapphire Rapids "m8i.metal-48xl", # Intel Granite Rapids "m8i.metal-96xl", # Intel Granite Rapids "m6a.metal", # AMD Milan "m7a.metal-48xl", # AMD Genoa "m6g.metal", # Graviton2 "m7g.metal", # Graviton3 "m8g.metal-24xl", # Graviton4 1 socket "m8g.metal-48xl", # Graviton4 2 sockets ] # fmt: on DEFAULT_PLATFORMS = [ ("al2", "linux_5.10"), ("al2023", "linux_6.1"), ] def get_arch_for_instance(instance): """Return instance architecture""" return "x86_64" if instance[2] != "g" else "aarch64" def overlay_dict(base: dict, update: dict): """Overlay a dict over a base one""" base = base.copy() for key, val in update.items(): if key in base and isinstance(val, dict): base[key] = overlay_dict(base.get(key, {}), val) else: base[key] = val return base def field_fmt(field, args): """If `field` is a string, interpolate variables in `args`""" if not isinstance(field, str): return field return field.format(**args) def dict_fmt(dict_tmpl, args): """Apply field_fmt over a hole dict""" res = {} for key, val in dict_tmpl.items(): if isinstance(val, dict): res[key] = dict_fmt(val, args) else: res[key] = field_fmt(val, args) return res def group(label, command, instances, platforms, **kwargs): """ Generate a group step with specified parameters, for each instance+kernel combination https://buildkite.com/docs/pipelines/group-step """ steps = [] commands = command if isinstance(command, str): commands = [command] for instance in instances: for os_, kv in platforms: # fill any templated variables args = {"instance": instance, "os": os_, "kv": kv} step = { "command": [cmd.format(**args) for cmd in commands], "label": f"{label}-{instance}-{os_}-{kv}", "agents": args, } step_kwargs = dict_fmt(kwargs, args) step = overlay_dict(step_kwargs, step) steps.append(step) return {"group": label, "steps": steps} def get_changed_files(): """ Get all files changed since `branch` """ # Files are changed only in context of a PR if os.environ.get("BUILDKITE_PULL_REQUEST", "false") == "false": return [] branch = os.environ.get("BUILDKITE_PULL_REQUEST_BASE_BRANCH", "main") stdout = subprocess.check_output(f"git diff --name-only origin/{branch}".split(" ")) return [Path(line) for line in stdout.decode().splitlines()] def run_all_tests(changed_files): """ Check if we should run all tests, based on the files that have been changed """ # run the whole test suite if either of: # - any file changed that is not documentation nor GitHub action config file # - no files changed return not changed_files or any( x.suffix != ".md" and not (x.parts[0] == ".github" and x.suffix == ".yml") for x in changed_files ) class DictAction(argparse.Action): """An argparse action that can receive a nested dictionary Examples: --step-param a/b/c=3 {"a": {"b": {"c": 3}}} """ def __init__(self, option_strings, dest, nargs=None, **kwargs): if nargs is not None: raise ValueError("nargs not allowed") super().__init__(option_strings, dest, **kwargs) def __call__(self, parser, namespace, value, option_string=None): res = getattr(namespace, self.dest, {}) key_str, val = value.split("=", maxsplit=1) keys = key_str.split("/") # Interpret it as a literal iff it starts like one update = {keys[-1]: ast.literal_eval(val) if val[0] in "[{'" else val} for key in list(reversed(keys))[1:]: update = {key: update} res = overlay_dict(res, update) setattr(namespace, self.dest, res) COMMON_PARSER = argparse.ArgumentParser() COMMON_PARSER.add_argument( "--instances", required=False, nargs="+", default=DEFAULT_INSTANCES, ) COMMON_PARSER.add_argument( "--platforms", metavar="OS-KV", required=False, nargs="+", default=DEFAULT_PLATFORMS, type=lambda arg: tuple(arg.split("-", maxsplit=1)), ) COMMON_PARSER.add_argument( "--step-param", metavar="PARAM=VALUE", help="parameters to add to each step", required=False, action=DictAction, default={}, type=str, ) COMMON_PARSER.add_argument( "--binary-dir", help="Use the Firecracker binaries from this path", required=False, default=None, type=str, ) COMMON_PARSER.add_argument( "--artifacts", help="Use the Firecracker binaries from this S3 uri", required=False, default=os.environ.get("ARTIFACTS_OVERRIDE"), type=str, ) COMMON_PARSER.add_argument( "--no-kani", help="Don't add kani step", action="store_true", default=False, ) COMMON_PARSER.add_argument( "--parallelism", help="How many instances of test to create", required=False, default=1, type=int, ) def random_str(k: int): """Generate a random string of hex characters.""" return "".join(random.choices(string.hexdigits, k=k)) def ab_revision_build(revision): """Generate steps for building an A/B-test revision""" return [f"./tools/devtool -y build --rev {revision} --release"] def shared_build(): """Helper function to make it simple to share a compilation artifacts for a whole Buildkite build """ # We need to support 3 scenarios here: # 1. We are running in the nightly pipeline - only compile the HEAD of main. # 2. We are running in a PR pipeline - compile HEAD of main as revision A and HEAD of PR branch as revision B. # 3. We are running in an A/B-test pipeline - compile what is passed via REVISION_{A,B} environment variables. rev_a = os.environ.get("REVISION_A") if rev_a is not None: rev_b = os.environ.get("REVISION_B") assert rev_b is not None, "REVISION_B environment variable not set" build_cmds = ab_revision_build(rev_a) if rev_a != rev_b: build_cmds += ab_revision_build(rev_b) elif os.environ.get("BUILDKITE_PULL_REQUEST", "false") != "false": build_cmds = ab_revision_build( os.environ.get("BUILDKITE_PULL_REQUEST_BASE_BRANCH", "main") ) + ["./tools/devtool -y build --release"] else: build_cmds = ["./tools/devtool -y build --release"] binary_dir = f"build_$(uname -m)_{random_str(k=8)}.tar.gz" build_cmds += [ "du -sh build/*", f"tar czf {binary_dir} build", f"buildkite-agent artifact upload {binary_dir}", ] return build_cmds, binary_dir class BKPipeline: """ Buildkite Pipeline class abstraction Helper class to easily construct pipelines. """ parser = COMMON_PARSER def __init__(self, with_build_step=True, **kwargs): self.steps = [] self.args = args = self.parser.parse_args() # Retry one time if agent was lost. This can happen if we terminate the # instance or the agent gets disconnected for whatever reason retry = { "automatic": [{"exit_status": -1, "limit": 1}], } retry = overlay_dict(retry, kwargs.pop("retry", {})) # Calculate step defaults with parameters and kwargs per_instance = { "instances": args.instances, "platforms": args.platforms, "artifact_paths": ["./test_results/**/*"], "retry": retry, **kwargs, } self.per_instance = overlay_dict(per_instance, args.step_param) self.per_arch = self.per_instance.copy() self.per_arch["instances"] = ["m6i.metal", "m7g.metal"] self.per_arch["platforms"] = [("al2023", "linux_6.1")] self.binary_dir = args.binary_dir self.artifacts = args.artifacts # Build sharing, if a binary dir wasn't already supplied if not args.binary_dir and with_build_step: build_cmds, self.shared_build = shared_build() self.build_group_per_arch( "build", build_cmds, depends_on_build=False, set_key=self.shared_build ) else: self.shared_build = None def add_step(self, step, depends_on_build=True): """ Add a step to the pipeline. https://buildkite.com/docs/pipelines/step-reference :param step: a Buildkite step :param depends_on_build: inject needed commands for sharing builds """ if depends_on_build and isinstance(step, dict): step = self._adapt_group(step) self.steps.append(step) return step def _adapt_group(self, group): """""" prepend = [] if self.shared_build is not None: prepend = [ f'buildkite-agent artifact download "{self.shared_build}" .', f"tar xzf {self.shared_build}", ] if self.binary_dir is not None: prepend.extend( [ f'buildkite-agent artifact download "{self.binary_dir}/$(uname -m)/*" .', f"chmod -v a+x {self.binary_dir}/**/*", ] ) for step in group["steps"]: step["command"] = prepend + step["command"] if self.shared_build is not None: if "depends_on" not in step: step["depends_on"] = [] elif isinstance(step["depends_on"], str): step["depends_on"] = [step["depends_on"]] elif isinstance(step["depends_on"], list): pass else: raise ValueError( f"depends_on should be a string or a list but is {type(step['depends_on'])}" ) step["depends_on"].append(self.shared_build) step["depends_on"] = [ self.build_key( dep, get_arch_for_instance(step["agents"]["instance"]) ) for dep in step["depends_on"] ] return group def build_group(self, *args, **kwargs): """ Build a group, parametrizing over the selected instances/platforms. https://buildkite.com/docs/pipelines/group-step """ depends_on_build = kwargs.pop("depends_on_build", True) combined = overlay_dict(self.per_instance, kwargs) combined["parallelism"] = self.args.parallelism return self.add_step( group(*args, **combined), depends_on_build=depends_on_build ) def build_key(self, key, arch): """Return the Buildkite key for the build step, for the specified arch""" return key.replace("$(uname -m)", arch).replace(".tar.gz", "") def build_group_per_arch(self, label, *args, **kwargs): """ Build a group, parametrizing over the architectures only. kwargs consumed by this method and not passed down to `group`: - `depends_on_build` (default: `True`): Whether the steps in this group depend on the artifacts from the shared compilation steps - `set_key`: If a string, causes the generated steps to have a "key" field replacing "$(uname -m)" with arch and removing trailing tar.gz """ depends_on_build = kwargs.pop("depends_on_build", True) set_key = kwargs.pop("set_key", None) combined = overlay_dict(self.per_arch, kwargs) grp = group(label, *args, **combined) if set_key: for step in grp["steps"]: step["key"] = self.build_key( set_key, get_arch_for_instance(step["agents"]["instance"]) ) return self.add_step(grp, depends_on_build=depends_on_build) def to_dict(self): """Render the pipeline as a dictionary.""" return {"steps": self.steps} def to_json(self): """Serialize the pipeline to JSON""" return json.dumps(self.to_dict(), indent=4, sort_keys=True, ensure_ascii=False) def devtool_download_artifacts(self, artifacts): """Generate a `devtool download_ci_artifacts` command""" parts = ["./tools/devtool -y download_ci_artifacts"] parts += artifacts return " ".join(parts) def devtool_test(self, devtool_opts=None, pytest_opts=None): """Generate a `devtool test` command""" cmds = [] parts = ["./tools/devtool -y test"] if self.shared_build is not None: parts.append("--no-build") if self.artifacts is not None: parts.append(f"--artifacts '{self.artifacts}'") if devtool_opts: parts.append(devtool_opts) parts.append("--") if self.binary_dir is not None: parts.append(f"--binary-dir=../{self.binary_dir}/$(uname -m)") if pytest_opts: parts.append(pytest_opts) cmds.append(" ".join(parts)) return cmds ================================================ FILE: .buildkite/pipeline_coverage.py ================================================ #!/usr/bin/env python3 # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Generate Buildkite pipelines dynamically""" from common import BKPipeline pipeline = BKPipeline(with_build_step=False) pipeline.build_group( "coverage", pipeline.devtool_test( devtool_opts="--no-build", pytest_opts="integration_tests/build/test_coverage.py", ), ) print(pipeline.to_json()) ================================================ FILE: .buildkite/pipeline_cpu_template.py ================================================ #!/usr/bin/env python3 # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Generate Buildkite CPU Template pipelines dynamically""" from enum import Enum from common import DEFAULT_PLATFORMS, BKPipeline, group class BkStep(str, Enum): """ Commonly used BuildKite step keywords """ LABEL = "label" TIMEOUT = "timeout" COMMAND = "command" ARTIFACTS = "artifact_paths" cpu_template_test = { "rdmsr": { BkStep.COMMAND: [ "tools/devtool -y test --no-build -- -m no_block_pr -n4 --dist worksteal integration_tests/functional/test_cpu_features_x86_64.py -k 'test_cpu_rdmsr' " ], BkStep.LABEL: "rdmsr", "instances": [ "m5n.metal", "m6i.metal", "m7i.metal-24xl", "m7i.metal-48xl", "m6a.metal", "m7a.metal-48xl", ], }, "fingerprint": { BkStep.COMMAND: [ "tools/devtool -y test --no-build -- -m no_block_pr integration_tests/functional/test_cpu_template_helper.py -k test_guest_cpu_config_change", ], BkStep.LABEL: "fingerprint", }, "cpuid_wrmsr": { "snapshot": { BkStep.COMMAND: [ "tools/devtool -y test --no-build -- -m nonci -n4 --dist worksteal integration_tests/functional/test_cpu_features_x86_64.py -k 'test_cpu_wrmsr_snapshot or test_cpu_cpuid_snapshot'", "mkdir -pv tests/snapshot_artifacts_upload/{instance}_{os}_{kv}", "sudo mv tests/snapshot_artifacts/* tests/snapshot_artifacts_upload/{instance}_{os}_{kv}", ], BkStep.LABEL: "snapshot-create", BkStep.ARTIFACTS: "tests/snapshot_artifacts_upload/**/*", BkStep.TIMEOUT: 30, }, "restore": { BkStep.COMMAND: [ "buildkite-agent artifact download tests/snapshot_artifacts_upload/{instance}_{os}_{kv}/**/* .", "mv tests/snapshot_artifacts_upload/{instance}_{os}_{kv} tests/snapshot_artifacts", "tools/devtool -y test --no-build -- -m nonci -n4 --dist worksteal integration_tests/functional/test_cpu_features_x86_64.py -k 'test_cpu_wrmsr_restore or test_cpu_cpuid_restore'", ], BkStep.LABEL: "snapshot-restore-src-{instance}-{snapshot_os}-{snapshot_kv}-dst-{restore_instance}-{restore_os}-{restore_kv}", BkStep.TIMEOUT: 30, }, "cross_instances": { "m5n.metal": ["m6i.metal"], "m6i.metal": ["m5n.metal"], }, "instances": [ "m5n.metal", "m6i.metal", "m7i.metal-24xl", "m7i.metal-48xl", "m6a.metal", ], }, } def group_snapshot_restore(test_step): """ Generate a group step with specified parameters for each instance and kernel combination and handle "wait" command between steps https://buildkite.com/docs/pipelines/group-step """ groups = [] groups.append( { "key": "snapshot", **group( label=test_step["snapshot"][BkStep.LABEL], command=test_step["snapshot"][BkStep.COMMAND], instances=test_step["instances"], platforms=DEFAULT_PLATFORMS, timeout=test_step["snapshot"][BkStep.TIMEOUT], artifacts=test_step["snapshot"][BkStep.ARTIFACTS], ), } ) snapshot_restore_combinations = [] for dp in DEFAULT_PLATFORMS: for src_instance in test_step["instances"]: for dst_instance in [src_instance] + test_step["cross_instances"].get( src_instance, [] ): snapshot_restore_combinations.append( ((dp, src_instance), (dp, dst_instance)) ) steps = [] for combination in snapshot_restore_combinations: (snapshot_os, snapshot_kv), snapshot_instance = combination[0] (restore_os, restore_kv), restore_instance = combination[1] restore_commands = [ command.format(instance=snapshot_instance, os=snapshot_os, kv=snapshot_kv) for command in test_step["restore"][BkStep.COMMAND] ] restore_label = test_step["restore"][BkStep.LABEL].format( instance=snapshot_instance, snapshot_os=snapshot_os, snapshot_kv=snapshot_kv, restore_instance=restore_instance, restore_os=restore_os, restore_kv=restore_kv, ) steps.append( { BkStep.COMMAND: restore_commands, BkStep.LABEL: restore_label, BkStep.TIMEOUT: test_step["restore"][BkStep.TIMEOUT], "agents": { "instance": restore_instance, "kv": restore_kv, "os": restore_os, }, } ) groups.append( {"group": "snapshot-restore", "steps": steps, "depends_on": "snapshot"} ) return groups if __name__ == "__main__": BKPipeline.parser.add_argument( "--test", choices=list(cpu_template_test), help="CPU template test", action="append", ) pipeline = BKPipeline() for test in pipeline.args.test or list(cpu_template_test): if test == "cpuid_wrmsr": groups = group_snapshot_restore(cpu_template_test[test]) for grp in groups: pipeline.add_step(grp) else: test_data = cpu_template_test[test] pipeline.build_group(**test_data, artifacts=["./test_results/**/*"]) print(pipeline.to_json()) ================================================ FILE: .buildkite/pipeline_cross.py ================================================ #!/usr/bin/env python3 # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Generate Buildkite Cross Snapshot/Restore pipelines dynamically 1. Generate snapshots for each instance and kernel version 2. wait 3. Restore snapshots across instances and kernels """ import itertools from common import DEFAULT_PLATFORMS, BKPipeline if __name__ == "__main__": pipeline = BKPipeline() per_instance = pipeline.per_instance.copy() per_instance.pop("instances") per_instance.pop("platforms") instances_x86_64 = [ "m5n.metal", "m6i.metal", "m7i.metal-24xl", "m7i.metal-48xl", "m6a.metal", "m7a.metal-48xl", ] instances_aarch64 = ["m7g.metal"] commands = [ "./tools/devtool -y test --no-build --no-archive -- -m nonci -n4 integration_tests/functional/test_snapshot_phase1.py", # punch holes in mem snapshot tiles and tar them so they are preserved in S3 "find test_results/test_snapshot_phase1 -type f -name mem |xargs -P4 -t -n1 fallocate -d", "mv -v test_results/test_snapshot_phase1 snapshot_artifacts", "mkdir -pv snapshots", "tar cSvf snapshots/{instance}_{kv}.tar snapshot_artifacts", ] pipeline.build_group( "snapshot-create", commands, timeout=30, artifact_paths="snapshots/**/*", instances=instances_x86_64, platforms=DEFAULT_PLATFORMS, ) pipeline.add_step("wait") # allow-list of what instances can be restores on what other instances (in # addition to itself) supported = { "m5n.metal": ["m6i.metal"], "m6i.metal": ["m5n.metal"], } # https://github.com/firecracker-microvm/firecracker/blob/main/docs/kernel-policy.md#experimental-snapshot-compatibility-across-kernel-versions aarch64_platforms = [("al2023", "linux_6.1")] perms_aarch64 = itertools.product( instances_aarch64, aarch64_platforms, instances_aarch64, aarch64_platforms ) perms_x86_64 = itertools.product( instances_x86_64, DEFAULT_PLATFORMS, instances_x86_64, DEFAULT_PLATFORMS ) steps = [] for ( src_instance, (_, src_kv), dst_instance, (dst_os, dst_kv), ) in itertools.chain(perms_x86_64, perms_aarch64): # the integration tests already test src == dst, so we skip it if src_instance == dst_instance and src_kv == dst_kv: continue # newer -> older is not supported, and does not work if src_kv > dst_kv: continue if src_instance != dst_instance and dst_instance not in supported.get( src_instance, [] ): continue pytest_keyword_for_instance = { "m5n.metal": "-k 'not None'", "m6i.metal": "-k 'not None'", "m6a.metal": "", } k_val = pytest_keyword_for_instance.get(dst_instance, "") step = { "command": [ f"buildkite-agent artifact download snapshots/{src_instance}_{src_kv}.tar .", f"tar xSvf snapshots/{src_instance}_{src_kv}.tar", *pipeline.devtool_test( pytest_opts=f"-m nonci -n8 --dist worksteal {k_val} integration_tests/functional/test_snapshot_restore_cross_kernel.py", ), ], "label": f"snapshot-restore-src-{src_instance}-{src_kv}-dst-{dst_instance}-{dst_kv}", "timeout": 30, "agents": {"instance": dst_instance, "kv": dst_kv, "os": dst_os}, **per_instance, } steps.append(step) pipeline.add_step( {"group": "snapshot-restore-across-instances-and-kernels", "steps": steps} ) print(pipeline.to_json()) ================================================ FILE: .buildkite/pipeline_docker_popular.py ================================================ #!/usr/bin/env python3 # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Buildkite pipeline for testing popular Docker containers """ from common import BKPipeline, random_str pipeline = BKPipeline() ROOTFS_TAR = f"rootfs_$(uname -m)_{random_str(k=8)}.tar.gz" pipeline.build_group_per_arch( "rootfs-build", [ "sudo tools/devtool sh 'tools/test-popular-containers/build_rootfs.sh'", "cd tools/test-popular-containers", f'tar czf "{ROOTFS_TAR}" *.squashfs *.id_rsa', f'buildkite-agent artifact upload "{ROOTFS_TAR}"', ], depends_on_build=False, set_key=ROOTFS_TAR, ) pipeline.build_group( "docker-popular-containers", [ "./tools/devtool ensure_current_artifacts", f'buildkite-agent artifact download "{ROOTFS_TAR}" .', f'tar xzf "{ROOTFS_TAR}" -C tools/test-popular-containers', './tools/devtool sh "cd ./tools/test-popular-containers; PYTHONPATH=../../tests ./test-docker-rootfs.py"', ], depends_on=ROOTFS_TAR, ) print(pipeline.to_json()) ================================================ FILE: .buildkite/pipeline_perf.py ================================================ #!/usr/bin/env python3 # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Generate Buildkite performance pipelines dynamically""" # pylint: disable=invalid-name import os from common import BKPipeline # In `devtool_opts`, we restrict both the set of CPUs on which the docker container's threads can run, # and its memory node. For the cpuset, we pick a continuous set of CPUs from a single NUMA node # that is large enough so that every firecracker thread can get its own core. We exclude core #0, as # the operating system sometimes uses it for book-keeping tasks. The memory node (-m parameter) # has to be the node associated with the NUMA node from which we picked CPUs. perf_test = { "virtio-block-sync": { "label": "virtio-block-sync", "tests": "integration_tests/performance/test_block.py::test_block_performance -k 'not Async'", "devtool_opts": "-c 1-10 -m 0", }, "virtio-block-async": { "label": "virtio-block-async", "tests": "integration_tests/performance/test_block.py::test_block_performance -k Async", "devtool_opts": "-c 1-10 -m 0", }, "vhost-user-block": { "label": "vhost-user-block", "tests": "integration_tests/performance/test_block.py::test_block_vhost_user_performance", "devtool_opts": "-c 1-10 -m 0", "ab_opts": "--noise-threshold 0.1", }, "network": { "label": "network", "tests": "integration_tests/performance/test_network.py", "devtool_opts": "-c 1-10 -m 0", }, "snapshot-latency": { "label": "snapshot-latency", "tests": "integration_tests/performance/test_snapshot.py::test_restore_latency integration_tests/performance/test_snapshot.py::test_post_restore_latency integration_tests/performance/test_snapshot.py::test_snapshot_create_latency", "devtool_opts": "-c 1-12 -m 0", }, "population-latency": { "label": "population-latency", "tests": "integration_tests/performance/test_snapshot.py::test_population_latency", "devtool_opts": "-c 1-12 -m 0", }, "vsock-throughput": { "label": "vsock-throughput", "tests": "integration_tests/performance/test_vsock.py", "devtool_opts": "-c 1-10 -m 0", }, "memory-overhead": { "label": "memory-overhead", "tests": "integration_tests/performance/test_memory_overhead.py", "devtool_opts": "-c 1-10 -m 0", }, "boottime": { "label": "boottime", "tests": "integration_tests/performance/test_boottime.py::test_boottime", "devtool_opts": "-c 1-10 -m 0", }, "process-startup": { "label": "process-startup", "tests": "integration_tests/performance/test_process_startup_time.py", "devtool_opts": "-c 1-10 -m 0", }, "jailer": { "label": "jailer", "tests": "integration_tests/performance/test_jailer.py", "devtool_opts": "-c 1-10 -m 0", }, "pmem": { "label": "pmem", "tests": "integration_tests/performance/test_pmem.py", "devtool_opts": "-c 1-10 -m 0", }, "mmds": { "label": "mmds", "tests": "integration_tests/performance/test_mmds.py", "devtool_opts": "-c 1-10 -m 0", }, "memory-hotplug": { "label": "memory-hotplug", "tests": "integration_tests/performance/test_hotplug_memory.py", "devtool_opts": "-c 1-10 -m 0", }, } REVISION_A = os.environ.get("REVISION_A") REVISION_B = os.environ.get("REVISION_B") REVISION_A_ARTIFACTS = os.environ.get("REVISION_A_ARTIFACTS") REVISION_B_ARTIFACTS = os.environ.get("REVISION_B_ARTIFACTS") # Either both are specified or neither. Only doing either is a bug. If you want to # run performance tests _on_ a specific commit, specify neither and put your commit # into buildkite's "commit" field. assert (REVISION_A and REVISION_B) or (not REVISION_A and not REVISION_B) assert (REVISION_A_ARTIFACTS and REVISION_B_ARTIFACTS) or ( not REVISION_A_ARTIFACTS and not REVISION_B_ARTIFACTS ) BKPipeline.parser.add_argument( "--test", choices=list(perf_test.keys()), required=False, help="performance test", action="append", ) retry = {} if REVISION_A: # Enable automatic retry and disable manual retries to suppress spurious issues. retry["automatic"] = [ {"exit_status": -1, "limit": 1}, {"exit_status": 1, "limit": 1}, ] retry["manual"] = False pipeline = BKPipeline( # Boost priority from 1 to 2 so these jobs are preferred by ag=1 agents priority=2, # use ag=1 instances to make sure no two performance tests are scheduled on the same instance agents={"ag": 1}, retry=retry, ) tests = [perf_test[test] for test in pipeline.args.test or perf_test.keys()] for test in tests: devtool_opts = test.pop("devtool_opts") test_selector = test.pop("tests") ab_opts = test.pop("ab_opts", "") devtool_opts += " --performance" test_script_opts = "" artifacts = [] if REVISION_A: devtool_opts += " --ab" test_script_opts = f'{ab_opts} run --binaries-a build/{REVISION_A}/ --binaries-b build/{REVISION_B} --pytest-opts "{test_selector}"' if REVISION_A_ARTIFACTS: artifacts.append(REVISION_A_ARTIFACTS) artifacts.append(REVISION_B_ARTIFACTS) test_script_opts += f" --artifacts-a {REVISION_A_ARTIFACTS} --artifacts-b {REVISION_B_ARTIFACTS}" else: # Passing `-m ''` below instructs pytest to collect tests regardless of # their markers (e.g. it will collect both tests marked as nonci, and # tests without any markers). test_script_opts += f" -m '' {test_selector}" command = [] if artifacts: command.append(pipeline.devtool_download_artifacts(artifacts)) command.extend(pipeline.devtool_test(devtool_opts, test_script_opts)) pipeline.build_group( command=command, # and the rest can be command arguments **test, ) # Stores the info about pinning tests to agents with particular kernel versions. # For example, the following: # pins = { # "linux_6.1-pinned": {"instance": "m6i.metal", "kv": "linux_6.1"}, # } # will pin steps running on instances "m6i.metal" with kernel version tagged "linux_6.1" # to a new kernel version tagged "linux_6.1-pinned" pins = {} def apply_pins(steps): """Apply pins""" new_steps = [] for step in steps: if isinstance(step, str): pass elif "group" in step: step["steps"] = apply_pins(step["steps"]) else: agents = step["agents"] for new_kv, match in pins.items(): # if all keys match, apply pin if all(agents[k] == v for k, v in match.items()): step["agents"]["kv"] = new_kv break new_steps.append(step) return new_steps pipeline.steps = apply_pins(pipeline.steps) print(pipeline.to_json()) ================================================ FILE: .buildkite/pipeline_pr.py ================================================ #!/usr/bin/env python3 # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Generate Buildkite pipelines dynamically""" from common import BKPipeline, get_changed_files, run_all_tests # Buildkite default job priority is 0. Setting this to 1 prioritizes PRs over # scheduled jobs and other batch jobs. DEFAULT_PRIORITY = 1 DEFAULTS_PERF = { "priority": DEFAULT_PRIORITY + 1, "agents": {"ag": 1}, } changed_files = get_changed_files() DOC_ONLY_CHANGE = False if changed_files and all(f.suffix == ".md" for f in changed_files): DOC_ONLY_CHANGE = True pipeline = BKPipeline( priority=DEFAULT_PRIORITY, timeout_in_minutes=45, with_build_step=not DOC_ONLY_CHANGE, ) pipeline.add_step( { "command": "./tools/devtool -y checkstyle", "label": "style", }, depends_on_build=False, ) # run sanity build of devtool if Dockerfile is changed if any(x.parent.name == "devctr" for x in changed_files): pipeline.build_group_per_arch( "dev-container-sanity-build", "./tools/devtool -y build_devctr && DEVCTR_IMAGE_TAG=latest ./tools/devtool test --no-build -- integration_tests/functional/test_api.py", ) if any( x.parent.name == "tools" and ("release" in x.name or x.name == "devtool") for x in changed_files ): pipeline.build_group_per_arch( "release-sanity-build", "./tools/devtool -y make_release", depends_on_build=False, ) if not pipeline.args.no_kani and ( not changed_files or any(x.suffix in [".rs", ".toml", ".lock"] for x in changed_files) or any(x.parent.name == "devctr" for x in changed_files) or any(x.name == "test_kani.py" for x in changed_files) ): kani_grp = pipeline.build_group( "kani", "./tools/devtool -y test --no-build -- ../tests/integration_tests/test_kani.py -n auto", # Kani step default # Kani runs fastest on m6a.metal instances=["m6a.metal", "m7g.metal"], platforms=[("al2023", "linux_6.1")], timeout_in_minutes=300, **DEFAULTS_PERF, depends_on_build=False, ) if run_all_tests(changed_files): pipeline.build_group( "build", pipeline.devtool_test(pytest_opts="integration_tests/build/"), depends_on_build=False, ) pipeline.build_group( "functional-and-security", pipeline.devtool_test( pytest_opts="-n 16 --dist worksteal integration_tests/{{functional,security}}", ), ) pipeline.build_group( "performance", pipeline.devtool_test( devtool_opts="--performance -c 1-10 -m 0", pytest_opts="../tests/integration_tests/performance/", ), **DEFAULTS_PERF, ) print(pipeline.to_json()) ================================================ FILE: .buildkite/pipeline_pr_no_block.py ================================================ #!/usr/bin/env python3 # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Generate Buildkite pipelines dynamically""" from common import BKPipeline, get_changed_files, run_all_tests # Buildkite default job priority is 0. Setting this to 1 prioritizes PRs over # scheduled jobs and other batch jobs. DEFAULT_PRIORITY = 1 pipeline = BKPipeline( with_build_step=False, timeout_in_minutes=45, # some non-blocking tests are performance, so make sure they get ag=1 instances priority=DEFAULT_PRIORITY + 1, agents={"ag": 1}, ) pipeline.build_group( "optional", pipeline.devtool_test( devtool_opts="--performance -c 1-10 -m 0", pytest_opts="integration_tests/ -m no_block_pr --log-cli-level=INFO", ), ) if not run_all_tests(get_changed_files()): pipeline.steps = [] print(pipeline.to_json()) ================================================ FILE: .buildkite/pipeline_release_qa.py ================================================ #!/usr/bin/env python3 # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Buildkite pipeline for release QA """ from common import BKPipeline pipeline = BKPipeline(with_build_step=False) # NOTE: we need to escape $ using $$ otherwise buildkite tries to replace it instead of the shell pipeline.add_step( { "label": "download-release", "if": 'build.env("VERSION") != "dev"', "command": [ "aws s3 sync --no-sign-request s3://spec.ccfc.min/firecracker-ci/firecracker/$$VERSION release-$$VERSION", 'buildkite-agent artifact upload "release-$$VERSION/**/*"', ], }, depends_on_build=False, ) pipeline.build_group_per_arch( "make-release", # if is a keyword for python, so we need this workaround to expand it as a kwarg **{"if": 'build.env("VERSION") == "dev"'}, command=[ "./tools/devtool -y make_release", "RELEASE_DIR=$$(echo release-*dev-$$(uname -m))", "RELEASE_SUFFIX=$${{RELEASE_DIR#release}}", "OUT_DIR=release-$$VERSION/$$(uname -m)", "mkdir -p $$OUT_DIR", ( "for f in $$RELEASE_DIR/*-$$(uname -m); do" " mv $$f $$OUT_DIR/$$(basename $$f $$RELEASE_SUFFIX);" " mv $$f.debug $$OUT_DIR/$$(basename $$f $$RELEASE_SUFFIX).debug;" "done" ), 'buildkite-agent artifact upload "release-$$VERSION/**/*"', ], depends_on_build=False, ) # The devtool expects the examples to be in the same folder as the binaries to run some tests # (for example, uffd handler tests). Build them and upload them in the same folder. pipeline.build_group_per_arch( "build-examples", command=[ "CARGO_TARGET=$$(uname -m)-unknown-linux-musl", "./tools/devtool -y sh cargo build --target $$CARGO_TARGET --release --examples", "mkdir -p release-$$VERSION/$$(uname -m)/", "cp -R build/cargo_target/$$CARGO_TARGET/release/examples release-$$VERSION/$$(uname -m)/", 'buildkite-agent artifact upload "release-$$VERSION/**/*"', ], depends_on_build=False, ) pipeline.add_step("wait", depends_on_build=False) pipeline.add_step( { "label": "run-pr-pipeline", "command": ( ".buildkite/pipeline_pr.py --binary-dir release-$$VERSION " "| jq '(..|select(.priority? != null).priority) += 100' " "| buildkite-agent pipeline upload" ), }, depends_on_build=False, ) print(pipeline.to_json()) ================================================ FILE: .cargo/audit.toml ================================================ [advisories] # The `paste` dependency is transitively included via `gdbstub`. # While the crate is archived/unmaintained, the author considers it feature-complete # and functionally stable. gdbstub will be update once they migrate # to an alternative solution. # See https://github.com/daniel5151/gdbstub/issues/168 ignore = ["RUSTSEC-2024-0436"] ================================================ FILE: .cargo/config.toml ================================================ [build] target-dir = "build/cargo_target" rustflags = [ "-Ccodegen-units=1", ] [net] git-fetch-with-cli = true [env] AWS_LC_SYS_NO_JITTER_ENTROPY = "1" # disable AVX512 as it adds 600k of binary size # this was only used for MMDS token generation AWS_LC_SYS_CFLAGS = "-DMY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX" ================================================ FILE: .dockerignore ================================================ .git/ build/ src/ tests/ docs/ resources/ tools/test-popular-containers/ test_results/ ================================================ FILE: .git-blame-ignore-revs ================================================ 8d2463fa21386d6c0c90b2010aaee5550b505c87 ae93e49470433648b144a64514eef708cce15143 45739f4b57b7b824473b612aefebec8b7c2e31fd 7221ae7f943bcd127c6710e81d9fa2f520afa073 7c4687fab64d4dd574fe43cf583a302b43ce53ab ================================================ FILE: .github/CODEOWNERS ================================================ # All markdown files *.md @xmarcalx @kalyazin @pb8o @Manciukic # But not the ones in docs/ docs/*.md # Except these specific ones docs/getting-started.md @xmarcalx @kalyazin @pb8o @Manciukic docs/prod-host-setup.md @xmarcalx @kalyazin @pb8o @Manciukic # Also cover all "*policy*.md" documents **/*policy*.md @xmarcalx @kalyazin @pb8o @Manciukic **/*POLICY*.md @xmarcalx @kalyazin @pb8o @Manciukic # Also these non-md files in the repository root THIRD_PARTY @xmarcalx @kalyazin @pb8o @Manciukic LICENSE @xmarcalx @kalyazin @pb8o @Manciukic NOTICE @xmarcalx @kalyazin @pb8o @Manciukic PGP-KEY.asc @xmarcalx @kalyazin @pb8o @Manciukic ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '[Bug] Title' labels: 'Quality: Bug' assignees: '' --- # Describe the bug `[Author TODO: A clear and concise description of what the bug is.]` ## To Reproduce `[Author TODO: Steps to reproduce the behaviour:]` 1. Start Firecracker via.... 1. Configure Firecracker via... 1. ... ## Expected behaviour `[Author TODO: A clear and concise description of what you expected to happen.]` ## Environment `[Author TODO: Please supply the following information):]` - Firecracker version: - Host and guest kernel versions: - Rootfs used: - Architecture: - Any other relevant software versions: ## Additional context `[Author TODO: How has this bug affected you?]` `[Author TODO: What are you trying to achieve?]` `[Author TODO: Do you have any idea of what the solution might be?]` ## Checks - [ ] Have you searched the Firecracker Issues database for similar problems? - [ ] Have you read the existing relevant Firecracker documentation? - [ ] Are you certain the bug being reported is a Firecracker issue? ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '[Feature Request] Title' labels: '' assignees: '' --- # Feature Request `[Author TODO: Why is this feature request important? What are the use cases? Please describe.]` ## Describe the desired solution `[Author TODO: A clear and concise description of how you would like the feature to work.]` ## Describe possible alternatives `[Author TODO: A clear and concise description of any alternative solutions or features you have considered.]` `[Author TODO: How do you work around not having this feature?]` ## Additional context `[Author TODO: Add additional context about this feature request here.]` ## Checks - [ ] Have you searched the Firecracker Issues database for similar requests? - [ ] Have you read all the existing relevant Firecracker documentation? - [ ] Have you read and understood Firecracker's core tenets? ================================================ FILE: .github/codecov.yml ================================================ codecov: # We utilize optional statuses that are okay to fail, so # having codecov only post if all statuses pass won't work. require_ci_to_pass: no notify: # No need to wait for long running tests if the build tests are done. wait_for_ci: false coverage: # Our target is 80% coverage range: 80..100 status: project: firecracker: # Allow drop of up to 0.5% threshold: 0.5% target: 80% # There are 15 uploads per commit (|{instance type} x {kernel version}| = 15). # Codecov will update the comment with every new upload. If we want to instead # only post the comment after all 15 reports are received, add `after_n_builds: 15` # below comment: # Only relevant for initial report: We want a report even though # codecov integration is not merged to main yet, to see it works # without having to go through multiple PR cycles. require_base: false ================================================ FILE: .github/dependabot.yml ================================================ version: 2 updates: - package-ecosystem: "cargo" directory: "/" schedule: interval: "weekly" day: "monday" allow: - dependency-type: "all" groups: rust-vmm: patterns: - "vmm-sys-util" - "kvm-bindings" - "kvm-ioctls" - "vm-memory" - "vhost" - "linux-loader" - "vm-allocator" - "event-manager" - "vm-superio" firecracker: patterns: - "*" exclude-patterns: - "vmm-sys-util" - "kvm-bindings" - "kvm-ioctls" - "vm-memory" - "vhost" - "linux-loader" - "vm-allocator" - "event-manager" - "vm-superio" - package-ecosystem: "rust-toolchain" directory: "/" schedule: interval: "weekly" day: "monday" ================================================ FILE: .github/pull_request_template.md ================================================ ## Changes ... ## Reason ... ## License Acceptance By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. For more information on following Developer Certificate of Origin and signing off your commits, please check [`CONTRIBUTING.md`][3]. ## PR Checklist - [ ] I have read and understand [CONTRIBUTING.md][3]. - [ ] I have run `tools/devtool checkbuild --all` to verify that the PR passes build checks on all supported architectures. - [ ] I have run `tools/devtool checkstyle` to verify that the PR passes the automated style checks. - [ ] I have described what is done in these changes, why they are needed, and how they are solving the problem in a clear and encompassing way. - [ ] I have updated any relevant documentation (both in code and in the docs) in the PR. - [ ] I have mentioned all user-facing changes in `CHANGELOG.md`. - [ ] If a specific issue led to this PR, this PR closes the issue. - [ ] When making API changes, I have followed the [Runbook for Firecracker API changes][2]. - [ ] I have tested all new and changed functionalities in unit tests and/or integration tests. - [ ] I have linked an issue to every new `TODO`. ______________________________________________________________________ - [ ] This functionality cannot be added in [`rust-vmm`][1]. [1]: https://github.com/rust-vmm [2]: https://github.com/firecracker-microvm/firecracker/blob/main/docs/api-change-runbook.md [3]: https://github.com/firecracker-microvm/firecracker/blob/main/CONTRIBUTING.md ================================================ FILE: .github/workflows/deny_dirty_cargo_locks.yml ================================================ name: Check no Cargo.lock files are dirty on: pull_request permissions: contents: read jobs: no_dirty_cargo_locks_check: runs-on: ubuntu-latest steps: - name: "Checkout repository" uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - name: "Check no Cargo.lock files are dirty" run: | exit_code=0 # This breaks for paths with whitespaces in them, but we have an integration test # that prevents those from existing in this repository. for f in $(find . -name 'Cargo.lock' -not -path "./build/*"); do is_dirty=0 ( cd "$(dirname "$f")" cargo --locked metadata --format-version 1 >/dev/null 2>&1 ) || is_dirty=$?; # GitHub Actions execute run steps as `bash -e`, so we need the temporary # variable to not exit early. if [ $is_dirty -ne 0 ]; then echo "Lockfile $f is dirty" exit_code=1 fi done exit $exit_code ================================================ FILE: .github/workflows/dependency_modification_check.yml ================================================ name: Check no dependencies were modified on: pull_request permissions: contents: read jobs: dependency_changed_check: runs-on: ubuntu-latest steps: - name: "Checkout repository" uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - name: "Check Cargo.lock not in changeset" run: | git fetch origin git diff origin/$GITHUB_BASE_REF.. --name-only| ( ! grep "Cargo.lock") ================================================ FILE: .github/workflows/monitor_libseccomp_releases.yml ================================================ name: Monitor libseccomp Releases on: schedule: - cron: '0 0 * * *' # Daily at midnight UTC workflow_dispatch: # Allow manual trigger permissions: issues: write contents: read jobs: check-release: runs-on: ubuntu-latest steps: - name: Checkout repository uses: actions/checkout@v4 - name: Get current libseccomp version from Dockerfile id: current run: | CURRENT=$(grep 'LIBSECCOMP_VER' tools/devctr/Dockerfile | grep -oP "v[0-9.]+") echo "version=$CURRENT" >> $GITHUB_OUTPUT - name: Check for new libseccomp release id: latest run: | LATEST=$(curl -s https://api.github.com/repos/seccomp/libseccomp/releases/latest | jq -r '.tag_name') echo "version=$LATEST" >> $GITHUB_OUTPUT - name: Check latest version is newer id: semver_check run: | CURRENT=$(echo ${{ steps.current.outputs.version }} | grep -oP "[0-9.]+") LATEST=$(echo ${{ steps.latest.outputs.version }} | grep -oP "[0-9.]+") if ! printf '%s\n%s' "$LATEST" "$CURRENT" | sort -VC && [ "$CURRENT" != "$LATEST" ]; then echo "is_newer=true" >> $GITHUB_OUTPUT; else echo "is_newer=false" >> $GITHUB_OUTPUT; fi - name: Check if issue exists if: steps.semver_check.outputs.is_newer == 'true' # New release has higher semantic version id: issue_check run: | ISSUES=$(curl -s -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \ "https://api.github.com/repos/${{ github.repository }}/issues?state=open&labels=dependencies" | \ jq -r --arg tag "${{ steps.latest.outputs.version }}" '[.[] | select(.title | contains("chore(deps): update libseccomp to \($tag)"))] | length') echo "exists=$ISSUES" >> $GITHUB_OUTPUT - name: Create issue for new release id: create_issue if: steps.semver_check.outputs.is_newer == 'true' && steps.issue_check.outputs.exists == '0' # No existing issue for new version run: | gh issue create \ --title "chore(deps): update libseccomp to ${{ steps.latest.outputs.version }}" \ --body "$(cat < Alexandra Ghecenco Alexandru Branciog <31914537+alexbranciog@users.noreply.github.com> Bogdan Ionita Liu Jiang Marc Brooker Radu Weiss <31901393+raduweiss@users.noreply.github.com> Rolf Neugebauer Serban Iorga Julian Stecklina Tamio-Vesa Nakajima Iulian Barbu Petre Eftime karthik nedunchezhiyan Alin Dima Andrei Sandu <54316454+sandreim@users.noreply.github.com> Diana Popa Alexandru Cihodaru Liviu Berciu Jonathan Woollett-Light Jonathan Woollett-Light Sudan Landge <119602619+sudanl0@users.noreply.github.com> Sudan Landge karthik nedunchezhiyan Babis Chalios Pablo Barbáchano Nikita Kalyazin Trăistaru Andrei Cristian Trăistaru Andrei Cristian <56828222+andreitraistaru@users.noreply.github.com> Takahiro Itazuri Jack Thomson Ashwin Ginoria Muskaan Singla Egor Lazarchuk Nikita Zakirov Tomoya Iwata Andrea Manzini Colin Percival ================================================ FILE: .mdformat.toml ================================================ wrap = 80 # Defaults from https://mdformat.readthedocs.io/en/stable/users/configuration_file.html number = false end_of_line = "lf" ================================================ FILE: .python-version ================================================ 3.10.14 ================================================ FILE: CHANGELOG.md ================================================ # Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] ### Added - [#5323](https://github.com/firecracker-microvm/firecracker/pull/5323): Add support for Vsock Unix domain socket path overriding on snapshot restore. More information can be found in the [docs](docs/vsock.md/#unix-domain-socket-renaming). ### Changed ### Deprecated ### Removed ### Fixed - [#5762](https://github.com/firecracker-microvm/firecracker/pull/5762): Cap virtio-rng per-request entropy to 64 KiB. Previously, a guest could construct a descriptor chain that caused Firecracker to allocate more host memory than the guest actually provided, potentially leading to excessive host memory consumption. ## [1.15.0] ### Added - [#5510](https://github.com/firecracker-microvm/firecracker/pull/5510), [#5593](https://github.com/firecracker-microvm/firecracker/pull/5593), [#5564](https://github.com/firecracker-microvm/firecracker/pull/5564): Add support for the [VMClock device](https://uapi-group.org/specifications/specs/vmclock). The implementation supports the snapshot safety features proposed [here](https://lore.kernel.org/lkml/20260107132514.437-1-bchalios@amazon.es/), but doesn't provide currently any clock-specific information for helping the guest synchronize its clocks. More information can be found in [docs](docs/snapshotting/snapshot-support.md#userspace-notifications-of-loading-virtual-machine-snapshots). - [#5574](https://github.com/firecracker-microvm/firecracker/pull/5574), [#5671](https://github.com/firecracker-microvm/firecracker/pull/5671), [#5674](https://github.com/firecracker-microvm/firecracker/pull/5674) [#5690](https://github.com/firecracker-microvm/firecracker/pull/5690) Added Intel Granite Rapids as a supported and tested platform for Firecracker on 6.1 host kernel versions. ### Changed - [#5564](https://github.com/firecracker-microvm/firecracker/pull/5564): which added support for VMClock, uses one extra GSI for the VMClock device itself which reduces the available GSIs for VirtIO devices. New maximum values is 92 devices on Aarch64 and 17 devices on x86. - [#5631](https://github.com/firecracker-microvm/firecracker/pull/5631): Update binary copy process inside Jailer to disallow symlinks and hardlinks at the destination path and change ownership of the copied binary to the specified uid/gid. ### Deprecated ### Removed ### Fixed - [#5688](https://github.com/firecracker-microvm/firecracker/pull/5688): Fixed vsock local port reuse across snapshot restore by saving the last used local port into the snapshot, so users need to regenerate snapshots. - [#5698](https://github.com/firecracker-microvm/firecracker/pull/5698): Fixed the possible ENXIO error which could occur during file open operation if the underlying file is FIFO without active readers already attached. - [#5705](https://github.com/firecracker-microvm/firecracker/pull/5705): Fixed a bug that caused Firecracker to corrupt the memory files of differential snapshots for VMs with multiple memory slots. This affected VMs using memory hot-plugging or any x86 VMs with a memory size larger than 3GiB. - [#5739](https://github.com/firecracker-microvm/firecracker/pull/5739): Fixed validation of TCP SYN options length when MMDS is enabled. ## [1.14.0] ### Added - [#5350](https://github.com/firecracker-microvm/firecracker/pull/5350): Added a `/serial` endpoint, which allows setting `serial_out_path` to the path of a pre-created file into which Firecracker should redirect output from the guest's serial console. Not configuring it means Firecracker will continue to print serial output to stdout. Similarly to the logger, this configuration is not persisted across snapshots. - [#5463](https://github.com/firecracker-microvm/firecracker/pull/5463): Added support for `virtio-pmem` devices. See [documentation](docs/pmem.md) for more information. - [#5534](https://github.com/firecracker-microvm/firecracker/pull/5534): Added support for memory hot-plugging through the `virtio-mem` device. See [documentation](docs/memory-hotplug.md) for more information. - [#5491](https://github.com/firecracker-microvm/firecracker/pull/5491): Added support for `virtio-balloon` free page reporting and hinting. Free page reporting is a developer preview not for production feature. See [documentation](docs/ballooning.md) for more information. ### Changed - [#4028](https://github.com/firecracker-microvm/firecracker/pull/4028): Firecracker now creates the log and metrics files if they do not exist, simplifying the launch of Firecracker by removing a manual step. - [#5516](https://github.com/firecracker-microvm/firecracker/pull/5516): Balloon stats now supports guest kernel >= 6.12, adding metrics on guest OOM kills, memory allocation stalls, and memory scan/reclaim info. - [#5526](https://github.com/firecracker-microvm/firecracker/pull/5526): Specify IA32_MTRRdefType MSR on VM boot to allow it to set page attributes for memory regions. ### Deprecated ### Removed - [#5439](https://github.com/firecracker-microvm/firecracker/pull/5439): Removed the `rx_partial_writes`, `tx_partial_reads`, `sync_response_fails`, `sync_vmm_send_timeout_count`, `deprecated_cmd_line_api_calls`, `log_fails` and `device_events` metrics, as they were never incremented. ### Fixed - [#5418](https://github.com/firecracker-microvm/firecracker/pull/5418): Fixed typo in Swagger definition of `MmdsConfig`, where the property `imds_compat` was spelled as `imds_comat`. This caused auto-generated clients to create bad requests. - [#5447](https://github.com/firecracker-microvm/firecracker/pull/5447): Fixed Intel AMX enabling for kernels that support dynamic XSTATE features for userspace applications but not for KVM guests (e.g. kernel versions >= 5.16 and < 5.17). - [#5485](https://github.com/firecracker-microvm/firecracker/pull/5485): Fixed a bug causing a read/write from an iovec to be duplicated when receiving an error on an iovec other than the first. This caused a data corruption issue in the vsock device starting from guest kernel 6.17. - [#5494](https://github.com/firecracker-microvm/firecracker/pull/5494): Fixed a watchdog soft lockup bug on microVMs restored from snapshots by calling KVM_KVMCLOCK_CTRL ioctl before resuming. - [#5538](https://github.com/firecracker-microvm/firecracker/pull/5538): Fixed a cache coherency issue on non-FWB aarch64 platforms by adding `dma-coherent` property to virtio-mmio nodes in the FDT. ## [1.13.0] ### Added - [#5139](https://github.com/firecracker-microvm/firecracker/pull/5139): Added support for [PVTime](https://docs.kernel.org/virt/kvm/arm/pvtime.html). This is used to support steal time on ARM machines. - [#5175](https://github.com/firecracker-microvm/firecracker/pull/5175): Allow including a custom cpu template directly in the json configuration file passed to `--config-file` under the `cpu_config` key. - [#5274](https://github.com/firecracker-microvm/firecracker/pull/5274): Allow taking diff snapshots even if dirty page tracking is disabled, by using `mincore(2)` to overapproximate the set of dirty pages. Only works if swap is disabled. - [#5290](https://github.com/firecracker-microvm/firecracker/pull/5290): Extended MMDS to support the EC2 IMDS-compatible session token headers (i.e. "X-aws-ec2-metadata-token" and "X-aws-ec2-metadata-token-ttl-seconds") alongside the MMDS-specific ones. - [#5290](https://github.com/firecracker-microvm/firecracker/pull/5290): Added `mmds.rx_invalid_token` and `mmds.rx_no_token` metrics to track the number of GET requests that were rejected due to token validation failures in MMDS version 2. These metrics also count requests that would be rejected in MMDS version 2 when MMDS version 1 is configured. They helps users assess readiness for migrating to MMDS version 2. - [#5310](https://github.com/firecracker-microvm/firecracker/pull/5310): Added an optional `imds_compat` field (default to false if not provided) to PUT requests to `/mmds/config` to enforce MMDS to always respond plain text contents in the IMDS format regardless of the `Accept` header in requests. Users need to regenerate snapshots. - [#5364](https://github.com/firecracker-microvm/firecracker/pull/5364): Added PCI support in Firecracker. PCI support is optional. Users can enable it passing the `--enable-pci` flag when launching the Firecracker process. When Firecracker process is launched with PCI support, it will create all VirtIO devices using a PCI VirtIO transport. If not enabled, Firecracker will use the MMIO transport instead. ### Changed - [#5165](https://github.com/firecracker-microvm/firecracker/pull/5165): Changed Firecracker snapshot feature from developer preview to generally available. Incremental snapshots remain in developer preview. - [#5282](https://github.com/firecracker-microvm/firecracker/pull/5282): Updated jailer to no longer require the executable file name to contain `firecracker`. - [#5290](https://github.com/firecracker-microvm/firecracker/pull/5290): Changed MMDS to validate the value of "X-metadata-token-ttl-seconds" header only if it is a PUT request to /latest/api/token, as in EC2 IMDS. - [#5290](https://github.com/firecracker-microvm/firecracker/pull/5290): Changed MMDS version 1 to support the session oriented method as in version 2, allowing easier migration to version 2. Note that MMDS version 1 accepts a GET request even with no token or an invalid token so that existing workloads continue to work. ### Deprecated - [#5274](https://github.com/firecracker-microvm/firecracker/pull/5274): Deprecated the `enable_diff_snapshots` parameter of the `/snapshot/load` API. Use `track_dirty_pages` instead. ### Removed - [#5411](https://github.com/firecracker-microvm/firecracker/pull/5411): Removed official support for Intel Skylake instances. Firecracker will continue to work on those instances, but we will no longer perform automated testing on them. ### Fixed - [#5222](https://github.com/firecracker-microvm/firecracker/pull/5222): Fixed network and rng devices locking up on hosts with non 4K pages. - [#5226](https://github.com/firecracker-microvm/firecracker/pull/5226): Fixed MMDS to set `Content-Type` header correctly (i.e. `Content-Type: text/plain` for IMDS-formatted or error responses and `Content-Type: application/json` for JSON-formatted responses). - [#5260](https://github.com/firecracker-microvm/firecracker/pull/5260): Fixed a bug allowing the block device to starve all other devices when backed by a sufficiently slow drive. - [#4207](https://github.com/firecracker-microvm/firecracker/issues/4207): Fixed GSI numbering on aarch64 to correctly allow up to 96 devices being attached simultaneously. - [#5290](https://github.com/firecracker-microvm/firecracker/pull/5290): Fixed MMDS to reject PUT requests containing `X-Forwarded-For` header regardless of its casing (e.g. `x-forwarded-for`). - [#5328](https://github.com/firecracker-microvm/firecracker/pull/5328): Fixed MMDS to set the token TTL header (i.e. "X-metadata-token-ttl-seconds" or "X-aws-ec2-metadata-token-ttl-seconds") in the response to "PUT /latest/api/token", as EC2 IMDS does. ## [1.12.0] ### Added - [#5048](https://github.com/firecracker-microvm/firecracker/pull/5048): Added support for [PVH boot mode](docs/pvh.md). This is used when an x86 kernel provides the appropriate ELF Note to indicate that PVH boot mode is supported. Linux kernels newer than 5.0 compiled with `CONFIG_PVH=y` set this ELF Note, as do FreeBSD kernels. - [#5065](https://github.com/firecracker-microvm/firecracker/pull/5065) Added support for Intel AMX (Advanced Matrix Extensions). To be able to take and restore a snapshot of Intel AMX state, `Xsave` is used instead of `kvm_xsave`, so users need to regenerate snapshots. - [#4731](https://github.com/firecracker-microvm/firecracker/pull/4731): Added support for modifying the host TAP device name during snapshot restore. - [#5146](https://github.com/firecracker-microvm/firecracker/pull/5146): Added Intel Sapphire Rapids as a supported and tested platform for Firecracker. - [#5148](https://github.com/firecracker-microvm/firecracker/pull/5148): Added ARM Graviton4 as a supported and tested platform for Firecracker. ### Changed - [#5118](https://github.com/firecracker-microvm/firecracker/pull/5118): Cleared WAITPKG CPUID bit in CPUID normalization. The feature enables a guest to put a physical processor into an idle state, which is undesirable in a FaaS environment since that is what the host wants to decide. - [#5142](https://github.com/firecracker-microvm/firecracker/pull/5142): Clarified what CPU models are supported by each existing CPU template. Firecracker exits with an error if a CPU template is used on an unsupported CPU model. ### Deprecated - [#4948](https://github.com/firecracker-microvm/firecracker/pull/4948): Deprecated the `page_size_kib` field in the [UFFD handshake](docs/snapshotting/handling-page-faults-on-snapshot-resume.md#registering-memory-to-be-handled-via-userfault-file-descriptors), and replaced it with a `page_size` field. The `page_size_kib` field is misnamed, as the value Firecracker sets it to is actually the page size in _bytes_, not KiB. It will be removed in Firecracker 2.0. ### Fixed - [#5074](https://github.com/firecracker-microvm/firecracker/pull/5074) Fix the `SendCtrlAltDel` command not working for ACPI-enabled guest kernels, by dropping the i8042.nopnp argument from the default kernel command line Firecracker constructs. - [#5122](https://github.com/firecracker-microvm/firecracker/pull/5122): Keep the UFFD Unix domain socket open to prevent the race condition between the guest memory mappings message and the shutdown event that was sometimes causing arrival of an empty message on the UFFD handler side. - [#5143](https://github.com/firecracker-microvm/firecracker/pull/5143): Fixed to report `process_startup_time_us` and `process_startup_time_cpu_us` metrics for `api_server` right after the API server starts, while previously reported before applying seccomp filter and starting the API server. Users may observe a bit longer startup time metrics. ## [1.11.0] ### Added - [#4987](https://github.com/firecracker-microvm/firecracker/pull/4987): Reset physical counter register (`CNTPCT_EL0`) on VM startup. This avoids VM reading the host physical counter value. This is only possible on 6.4 and newer kernels. For older kernels physical counter will still be passed to the guest unmodified. See more info [here](https://github.com/firecracker-microvm/firecracker/blob/main/docs/prod-host-setup.md#arm-only-vm-physical-counter-behaviour) - [#5088](https://github.com/firecracker-microvm/firecracker/pull/5088): Added AMD Genoa as a supported and tested platform for Firecracker. ### Changed - [#4913](https://github.com/firecracker-microvm/firecracker/pull/4913): Removed unnecessary fields (`max_connections` and `max_pending_resets`) from the snapshot format, bumping the snapshot version to 5.0.0. Users need to regenerate snapshots. - [#4926](https://github.com/firecracker-microvm/firecracker/pull/4926): Replace underlying implementation for seccompiler from in house one in favor of `libseccomp` which produces smaller and more optimized BPF code. ### Deprecated ### Removed ### Fixed - [#4921](https://github.com/firecracker-microvm/firecracker/pull/4921): Fixed swagger `CpuConfig` definition to include missing aarch64-specific fields. - [#4916](https://github.com/firecracker-microvm/firecracker/pull/4916): Fixed `IovDeque` implementation to work with any host page size. This fixes virtio-net device on non 4K host kernels. - [#4991](https://github.com/firecracker-microvm/firecracker/pull/4991): Fixed `mem_size_mib` and `track_dirty_pages` being mandatory for all `PATCH /machine-config` requests. Now, they can be omitted which leaves these parts of the machine configuration unchanged. - [#5007](https://github.com/firecracker-microvm/firecracker/pull/5007): Fixed watchdog softlockup warning on x86_64 guests when a vCPU is paused during GDB debugging. - [#5021](https://github.com/firecracker-microvm/firecracker/pull/5021) If a balloon device is inflated post UFFD-backed snapshot restore, Firecracker now causes `remove` UFFD messages to be sent to the UFFD handler. Previously, no such message would be sent. - [#5034](https://github.com/firecracker-microvm/firecracker/pull/5034): Fix an integer underflow in the jailer when computing the value it passes to Firecracker's `--parent-cpu-time-us` values, which caused development builds of Firecracker to crash (but production builds were unaffected as underflows do not panic in release mode). - [#5045](https://github.com/firecracker-microvm/firecracker/pull/5045): Fixed an issue where firecracker intermittently receives SIGHUP when using jailer with `--new-pid-ns` but without `--daemonize`. - [#4995](https://github.com/firecracker-microvm/firecracker/pull/4995): Firecracker no longer overwrites CPUID leaf 0x80000000 when running AMD hardware, meaning the guest can now discover a greater range of CPUID leaves in the extended function range (this range is host kernel dependent). - [#5046](https://github.com/firecracker-microvm/firecracker/pull/5046): Retry KVM_CREATE_VM on EINTR that occasionally happen on heavily loaded hosts to improve reliability of microVM creation. - [#5052](https://github.com/firecracker-microvm/firecracker/pull/5052): Build the empty seccomp policy as default for debug builds to avoid crashes on syscalls introduced by debug assertions from Rust 1.80.0. ## [1.10.0] ### Added - [#4834](https://github.com/firecracker-microvm/firecracker/pull/4834): Add `VIRTIO_NET_F_RX_MRGBUF` support to the `virtio-net` device. When this feature is negotiated, guest `virtio-net` driver can perform more efficient memory management which in turn improves RX and TX performance. - [#4460](https://github.com/firecracker-microvm/firecracker/pull/4460): Add a call to [`KVM_KVMCLOCK_CTRL`](https://docs.kernel.org/virt/kvm/api.html#kvm-kvmclock-ctrl) after pausing vCPUs on x86_64 architectures. This ioctl sets a flag in the KVM state of the vCPU indicating that it has been paused by the host userspace. In guests that use kvmclock, the soft lockup watchdog checks this flag. If it is set, it won't trigger the lockup condition. Calling the ioctl for guests that don't use kvmclock will fail. These failures are not fatal. We log the failure and increase the `vcpu.kvmclock_ctrl_fails` metric. - [#4869](https://github.com/firecracker-microvm/firecracker/pull/4869): Added support for Aarch64 systems which feature CPU caches with a number of sets higher than `u16::MAX`. - [#4797](https://github.com/firecracker-microvm/firecracker/pull/4797), [#4854](https://github.com/firecracker-microvm/firecracker/pull/4854): Added GDB debugging support for a microVM guest kernel. Please see our [GDB debugging documentation](docs/gdb-debugging.md) for more information. ### Changed - [#4844](https://github.com/firecracker-microvm/firecracker/pull/4844): Upgrade `virtio-net` device to use `readv` syscall to avoid unnecessary memory copies on RX path, increasing the RX performance. ### Deprecated ### Removed - [#4804](https://github.com/firecracker-microvm/firecracker/pull/4804): Drop Support for guest kernel 4.14. Linux 4.14 reached end-of-life in [January 2024](https://lore.kernel.org/lkml/2024011046-ecology-tiptoeing-ce50@gregkh/) The minimum supported guest kernel now is 5.10. ### Fixed - [#4796](https://github.com/firecracker-microvm/firecracker/pull/4796): Fixed Vsock not notifying guest about `TRANSPORT_RESET_EVENT` event after snapshot restore. This resulted in guest waiting indefinitely on a connection which was reset during snapshot creation. - [#4790](https://github.com/firecracker-microvm/firecracker/pull/4790): v1.9.0 was missing most of the debugging information in the debuginfo file, due to a change in the Cargo defaults. This has been corrected. - [#4826](https://github.com/firecracker-microvm/firecracker/pull/4826): Add missing configuration of tap offload features when restoring from a snapshot. Setting the features was previously [moved](https://github.com/firecracker-microvm/firecracker/pull/4680/commits/49ed5ea4b48ccd98903da037368fa3108f58ac1f) from net device creation to device activation time, but it was not reflected in the restore path. This was leading to inability to connect to the restored VM if the offload features were used. ## [1.9.0] ### Added - [#4687](https://github.com/firecracker-microvm/firecracker/pull/4687): Added VMGenID support for microVMs running on ARM hosts with 6.1 guest kernels. Support for VMGenID via DeviceTree bindings exists only on mainline 6.10 Linux onwards. Users of Firecracker will need to backport the relevant patches on top of their 6.1 kernels to make use of the feature. As a result, Firecracker snapshot version is now 3.0.0 - [#4732](https://github.com/firecracker-microvm/firecracker/pull/4732), [#4733](https://github.com/firecracker-microvm/firecracker/pull/4733), [#4741](https://github.com/firecracker-microvm/firecracker/pull/4741), [#4746](https://github.com/firecracker-microvm/firecracker/pull/4746): Added official support for 6.1 microVM guest kernels. - [#4743](https://github.com/firecracker-microvm/firecracker/pull/4743): Added support for `-h` help flag to the Jailer. The Jailer will now print the help message with either `--help` or `-h`. ### Changed ### Deprecated - Support for guest kernel 4.14 is now deprecated. We will completely remove 4.14 support with Firecracker version v1.10 ### Removed - [#4689](https://github.com/firecracker-microvm/firecracker/pull/4689): Drop support for host kernel 4.14. Linux 4.14 reached end-of-life in [January 2024](https://lore.kernel.org/lkml/2024011046-ecology-tiptoeing-ce50@gregkh/). The minimum supported kernel now is 5.10. Guest kernel 4.14 is still supported. ### Fixed - [4680](https://github.com/firecracker-microvm/firecracker/pull/4680): Fixed an issue ([#4659](https://github.com/firecracker-microvm/firecracker/issues/4659)) where the virtio-net device implementation would always assume the guest accepts all VirtIO features the device offers. This is always true with the Linux guest kernels we are testing but other kernels, like FreeBSD make different assumptions. This PR fixes the emulation code to set the TAP features based on the features accepted by the guest. ## [1.8.0] ### Added - [#4428](https://github.com/firecracker-microvm/firecracker/pull/4428): Added ACPI support to Firecracker for x86_64 microVMs. Currently, we pass ACPI tables with information about the available vCPUs, interrupt controllers, VirtIO and legacy x86 devices to the guest. This allows booting kernels without MPTable support. Please see our [kernel policy documentation](docs/kernel-policy.md) for more information regarding relevant kernel configurations. - [#4487](https://github.com/firecracker-microvm/firecracker/pull/4487): Added support for the Virtual Machine Generation Identifier (VMGenID) device on x86_64 platforms. VMGenID is a virtual device that allows VMMs to notify guests when they are resumed from a snapshot. Linux includes VMGenID support since version 5.18. It uses notifications from the device to reseed its internal CSPRNG. Please refer to [snapshot support](docs/snapshotting/snapshot-support.md) and [random for clones](docs/snapshotting/random-for-clones.md) documention for more info on VMGenID. VMGenID state is part of the snapshot format of Firecracker. As a result, Firecracker snapshot version is now 2.0.0. ### Changed - [#4492](https://github.com/firecracker-microvm/firecracker/pull/4492): Changed `--config` parameter of `cpu-template-helper` optional. Users no longer need to prepare kernel, rootfs and Firecracker configuration files to use `cpu-template-helper`. - [#4537](https://github.com/firecracker-microvm/firecracker/pull/4537) Changed T2CL template to pass through bit 27 and 28 of `MSR_IA32_ARCH_CAPABILITIES` (`RFDS_NO` and `RFDS_CLEAR`) since KVM consider they are able to be passed through and T2CL isn't designed for secure snapshot migration between different processors. - [#4537](https://github.com/firecracker-microvm/firecracker/pull/4537) Changed T2S template to set bit 27 of `MSR_IA32_ARCH_CAPABILITIES` (`RFDS_NO`) to 1 since it assumes that the fleet only consists of processors that are not affected by RFDS. - [#4388](https://github.com/firecracker-microvm/firecracker/pull/4388): Avoid setting `kvm_immediate_exit` to 1 if are already handling an exit, or if the vCPU is stopped. This avoids a spurious KVM exit upon restoring snapshots. - [#4567](https://github.com/firecracker-microvm/firecracker/pull/4567): Do not initialize vCPUs in powered-off state upon snapshot restore. No functional change, as vCPU initialization is only relevant for the booted case (where the guest expects CPUs to be powered off). ### Deprecated - Firecracker's `--start-time-cpu-us` and `--start-time-us` parameters are deprecated and will be removed in v2.0 or later. They are used by the jailer to pass the value that should be subtracted from the (CPU) time, when emitting the `start_time_us` and `start_time_cpu_us` metrics. These parameters were never meant to be used by end customers, and we recommend doing any such time adjustments outside Firecracker. - Booting with microVM kernels that rely on MPTable on x86_64 is deprecated and support will be removed in v2.0 or later. We suggest to users of Firecracker to use guest kernels with ACPI support. For x86_64 microVMs, ACPI will be the only way Firecracker passes hardware information to the guest once MPTable support is removed. ### Fixed - [#4526](https://github.com/firecracker-microvm/firecracker/pull/4526): Added a check in the network TX path that the size of the network frames the guest passes to us is not bigger than the maximum frame the device expects to handle. On the TX path, we copy frames destined to MMDS from guest memory to Firecracker memory. Without the check, a mis-behaving virtio-net driver could cause an increase in the memory footprint of the Firecracker process. Now, if we receive such a frame, we ignore it and increase `Net::tx_malformed_frames` metric. - [#4536](https://github.com/firecracker-microvm/firecracker/pull/4536): Make the first differential snapshot taken after a full snapshot contain only the set of memory pages changed since the full snapshot. Previously, these differential snapshots would contain all memory pages. This will result in potentially much smaller differential snapshots after a full snapshot. - [#4578](https://github.com/firecracker-microvm/firecracker/pull/4578): Fix UFFD support not being forward-compatible with new ioctl options introduced in Linux 6.6. See also . - [#4618](https://github.com/firecracker-microvm/firecracker/pull/4618): On x86_64, when taking a snapshot, if a vCPU has MSR_IA32_TSC_DEADLINE set to 0, Firecracker will replace it with the MSR_IA32_TSC value from the same vCPU. This is to guarantee that the vCPU will continue receiving TSC interrupts after restoring from the snapshot even if an interrupt is lost when taking a snapshot. - [#4666](https://github.com/firecracker-microvm/firecracker/pull/4666): Fixed Firecracker sometimes restoring `MSR_IA32_TSC_DEADLINE` before `MSR_IA32_TSC`. Now it always restores `MSR_IA32_TSC_DEADLINE` MSR after `MSR_IA32_TSC`, as KVM relies on the guest TSC for correct restoration of `MSR_IA32_TSC_DEADLINE`. This fixed guests using the `TSC_DEADLINE` hardware feature receiving incorrect timer interrupts after snapshot restoration, which could lead to them seemingly getting stuck in sleep-related syscalls (see also ). ## [1.7.0] ### Added - [#4346](https://github.com/firecracker-microvm/firecracker/pull/4346): Added support to emit aggregate (minimum/maximum/sum) latency for `VcpuExit::MmioRead`, `VcpuExit::MmioWrite`, `VcpuExit::IoIn` and `VcpuExit::IoOut`. The average for these VM exits is not emitted since it can be deduced from the available emitted metrics. - [#4360](https://github.com/firecracker-microvm/firecracker/pull/4360): Added dev-preview support for backing a VM's guest memory by 2M hugetlbfs pages. Please see the [documentation](docs/hugepages.md) for more information - [#4486](https://github.com/firecracker-microvm/firecracker/pull/4486): Added block and net device metrics for file/tap access latencies and queue backlog lengths, which can be used to analyse saturation of the Firecracker VMM thread and underlying layers. Queue backlog length metrics are flushed periodically. They can be used to esimtate an average queue length by request by dividing its value by the number of requests served. ### Changed - [#4230](https://github.com/firecracker-microvm/firecracker/pull/4230): Changed microVM snapshot format version strategy. Firecracker snapshot format now has a version that is independent of Firecracker version. The current version of the snapshot format is v1.0.0. From now on, the Firecracker binary will define the snapshot format version it supports and it will only be able to load snapshots with format that is backwards compatible with that version. Users can pass the `--snapshot-version` flag to the Firecracker binary to see its supported snapshot version format. This change renders all previous Firecracker snapshots (up to Firecracker version v1.6.0) incompatible with the current Firecracker version. - [#4449](https://github.com/firecracker-microvm/firecracker/pull/4449): Added information about page size to the payload Firecracker sends to the UFFD handler. Each memory region object now contains a `page_size_kib` field. See also the [hugepages documentation](docs/hugepages.md). - [#4498](https://github.com/firecracker-microvm/firecracker/pull/4498): Only use memfd to back guest memory if a vhost-user-blk device is configured, otherwise use anonymous private memory. This is because serving page faults of shared memory used by memfd is slower and may impact workloads. ### Fixed - [#4409](https://github.com/firecracker-microvm/firecracker/pull/4409): Fixed a bug in the cpu-template-helper that made it panic during conversion of cpu configuration with SVE registers to the cpu template on aarch64 platform. Now cpu-template-helper will print warnings if it encounters SVE registers during the conversion process. This is because cpu templates are limited to only modify registers less than 128 bits. - [#4413](https://github.com/firecracker-microvm/firecracker/pull/4413): Fixed a bug in the Firecracker that prevented it to restore snapshots of VMs that had SVE enabled. - [#4414](https://github.com/firecracker-microvm/firecracker/pull/4360): Made `PATCH` requests to the `/machine-config` endpoint transactional, meaning Firecracker's configuration will be unchanged if the request returns an error. This fixes a bug where a microVM with incompatible balloon and guest memory size could be booted, due to the check for this condition happening after Firecracker's configuration was updated. - [#4259](https://github.com/firecracker-microvm/firecracker/pull/4259): Added a double fork mechanism in the Jailer to avoid setsid() failures occurred while running Jailer as the process group leader. However, this changed the behaviour of Jailer and now the Firecracker process will always have a different PID than the Jailer process. [#4436](https://github.com/firecracker-microvm/firecracker/pull/4436): Added a "Known Limitations" section in the Jailer docs to highlight the above change in behaviour introduced in PR#4259. [#4442](https://github.com/firecracker-microvm/firecracker/pull/4442): As a solution to the change in behaviour introduced in PR#4259, provided a mechanism to reliably fetch Firecracker PID. With this change, Firecracker process's PID will always be available in the Jailer's root directory regardless of whether new_pid_ns was set. - [#4468](https://github.com/firecracker-microvm/firecracker/pull/4468): Fixed a bug where a client would hang or timeout when querying for an MMDS path whose content is empty, because the 'Content-Length' header field was missing in a response. ## [1.6.0] ### Added - [#4145](https://github.com/firecracker-microvm/firecracker/pull/4145): Added support for per net device metrics. In addition to aggregate metrics `net`, each individual net device will emit metrics under the label `"net_{iface_id}"`. E.g. the associated metrics for the endpoint `"/network-interfaces/eth0"` will be available under `"net_eth0"` in the metrics json object. - [#4202](https://github.com/firecracker-microvm/firecracker/pull/4202): Added support for per block device metrics. In addition to aggregate metrics `block`, each individual block device will emit metrics under the label `"block_{drive_id}"`. E.g. the associated metrics for the endpoint `"/drives/{drive_id}"` will be available under `"block_drive_id"` in the metrics json object. - [#4205](https://github.com/firecracker-microvm/firecracker/pull/4205): Added a new `vm-state` subcommand to `info-vmstate` command in the `snapshot-editor` tool to print MicrovmState of vmstate snapshot file in a readable format. Also made the `vcpu-states` subcommand available on x86_64. - [#4063](https://github.com/firecracker-microvm/firecracker/pull/4063): Added source-level instrumentation based tracing. See [tracing](./docs/tracing.md) for more details. - [#4138](https://github.com/firecracker-microvm/firecracker/pull/4138), [#4170](https://github.com/firecracker-microvm/firecracker/pull/4170), [#4223](https://github.com/firecracker-microvm/firecracker/pull/4223), [#4247](https://github.com/firecracker-microvm/firecracker/pull/4247), [#4226](https://github.com/firecracker-microvm/firecracker/pull/4226): Added **developer preview only** (NOT for production use) support for vhost-user block devices. Firecracker implements a vhost-user frontend. Users are free to choose from existing open source backend solutions or their own implementation. Known limitation: snapshotting is not currently supported for microVMs containing vhost-user block devices. See the [related doc page](./docs/api_requests/block-vhost-user.md) for details. The device emits metrics under the label `"vhost_user_{device}_{drive_id}"`. ### Changed - [#4309](https://github.com/firecracker-microvm/firecracker/pull/4309): The jailer's option `--parent-cgroup` will move the process to that cgroup if no `cgroup` options are provided. - Simplified and clarified the removal policy of deprecated API elements to follow semantic versioning 2.0.0. For more information, please refer to [this GitHub discussion](https://github.com/firecracker-microvm/firecracker/discussions/4135). - [#4180](https://github.com/firecracker-microvm/firecracker/pull/4180): Refactored error propagation to avoid logging and printing an error on exits with a zero exit code. Now, on successful exit "Firecracker exited successfully" is logged. - [#4194](https://github.com/firecracker-microvm/firecracker/pull/4194): Removed support for creating Firecracker snapshots targeting older versions of Firecracker. With this change, running 'firecracker --version' will not print the supported snapshot versions. - [#4301](https://github.com/firecracker-microvm/firecracker/pull/4301): Allow merging of diff snapshots into base snapshots by directly writing the diff snapshot on top of the base snapshot's memory file. This can be done by setting the `mem_file_path` to the path of the pre-existing full snapshot. ### Deprecated - [#4209](https://github.com/firecracker-microvm/firecracker/pull/4209): `rebase-snap` tool is now deprecated. Users should use `snapshot-editor` for rebasing diff snapshots. ### Fixed - [#4171](https://github.com/firecracker-microvm/firecracker/pull/4171): Fixed a bug that ignored the `--show-log-origin` option, preventing it from printing the source code file of the log messages. - [#4178](https://github.com/firecracker-microvm/firecracker/pull/4178): Fixed a bug reporting a non-zero exit code on successful shutdown when starting Firecracker with `--no-api`. - [#4261](https://github.com/firecracker-microvm/firecracker/pull/4261): Fixed a bug where Firecracker would log "RunWithApiError error: MicroVMStopped without an error: GenericError" when exiting after encountering an emulation error. It now correctly prints "RunWithApiError error: MicroVMStopped _with_ an error: GenericError". - [#4242](https://github.com/firecracker-microvm/firecracker/pull/4242): Fixed a bug introduced in #4047 that limited the `--level` option of logger to Pascal-cased values (e.g. accepting "Info", but not "info"). It now ignores case again. - [#4286](https://github.com/firecracker-microvm/firecracker/pull/4286): Fixed a bug in the asynchronous virtio-block engine that rendered the device non-functional after a PATCH request was issued to Firecracker for updating the path to the host-side backing file of the device. - [#4301](https://github.com/firecracker-microvm/firecracker/pull/4301): Fixed a bug where if Firecracker was instructed to take a snapshot of a microvm which itself was restored from a snapshot, specifying `mem_file_path` to be the path of the memory file from which the microvm was restored would result in both the microvm and the snapshot being corrupted. It now instead performs a "write-back" of all memory that was updated since the snapshot was originally loaded. ## [1.5.0] ### Added - [#3837](https://github.com/firecracker-microvm/firecracker/issues/3837): Added official support for Linux 6.1. See [prod-host-setup](./docs/prod-host-setup.md) for some security and performance considerations. - [#4045](https://github.com/firecracker-microvm/firecracker/pull/4045) and [#4075](https://github.com/firecracker-microvm/firecracker/pull/4075): Added `snapshot-editor` tool for modifications of snapshot files. It allows for rebasing of memory snapshot files, printing and removing aarch64 registers from the vmstate and obtaining snapshot version. - [#3967](https://github.com/firecracker-microvm/firecracker/pull/3967/): Added new fields to the custom CPU templates. (aarch64 only) `vcpu_features` field allows modifications of vCPU features enabled during vCPU initialization. `kvm_capabilities` field allows modifications of KVM capability checks that Firecracker performs during boot. If any of these fields are in use, minimal target snapshot version is restricted to 1.5. ### Changed - Updated deserialization of `bitmap` for custom CPU templates to allow usage of '\_' as a separator. - Changed the strip feature of `cpu-template-helper` tool to operate bitwise. - Better logs during validation of CPU ID in snapshot restoration path. Also Firecracker now does not fail if it can't get CPU ID from the host or can't find CPU ID in the snapshot. - Changed the serial device to only try to initialize itself if stdin is a terminal or a FIFO pipe. This fixes logged warnings about the serial device failing to initialize if the process is daemonized (in which case stdin is /dev/null instead of a terminal). - Changed to show a warning message when launching a microVM with C3 template on a processor prior to Intel Cascade Lake, because the guest kernel does not apply the mitigation against MMIO stale data vulnerability when it is running on a processor that does not enumerate FBSDP_NO, PSDP_NO and SBDR_SSDP_NO on IA32_ARCH_CAPABILITIES MSR. - Made Firecracker resize its file descriptor table on process start. It now preallocates the in-kernel fdtable to hold `RLIMIT_NOFILE` many fds (or 2048 if no limit is set). This avoids the kernel reallocating the fdtable during Firecracker operations, resulting in a 30ms to 70ms reduction of snapshot restore times for medium to large microVMs with many devices attached. - Changed the dump feature of `cpu-template-helper` tool not to enumerate program counter (PC) on ARM because it is determined by the given kernel image and it is useless in the custom CPU template context. - The ability to create snapshots for an older version of Firecracker is now deprecated. As a result, the `version` body field in `PUT` on `/snapshot/create` request in deprecated. - Added support for the /dev/userfaultfd device available on linux kernels >= 6.1. This is the default for creating UFFD handlers on these kernel versions. If it is unavailable, Firecracker falls back to the userfaultfd syscall. - Deprecated `cpu_template` field in `PUT` and `PATCH` requests on `/machine-config` API, which is used to set a static CPU template. Custom CPU templates added in v1.4.0 are available as an improved iteration of the static CPU templates. For more information about the transition from static CPU templates to custom CPU templates, please refer to [this GitHub discussion](https://github.com/firecracker-microvm/firecracker/discussions/4135). - Changed default log level from [`Warn`](https://docs.rs/log/latest/log/enum.Level.html#variant.Warn) to [`Info`](https://docs.rs/log/latest/log/enum.Level.html#variant.Info). This results in more logs being output by default. ### Fixed - Fixed a change in behavior of normalize host brand string that breaks Firecracker on external instances. - Fixed the T2A CPU template not to unset the MMX bit (CPUID.80000001h:EDX[23]) and the FXSR bit (CPUID.80000001h:EDX[24]). - Fixed the T2A CPU template to set the RstrFpErrPtrs bit (CPUID.80000008h:EBX[2]). - Fixed a bug where Firecracker would crash during boot if a guest set up a virtio queue that partially overlapped with the MMIO gap. Now Firecracker instead correctly refuses to activate the corresponding virtio device. - Fixed the T2CL CPU template to pass through security mitigation bits that are listed by KVM as bits able to be passed through. By making the most use of the available hardware security mitigations on a processor that a guest is running on, the guest might be able to benefit from performance improvements. - Fixed the T2S CPU template to set the GDS_NO bit of the IA32_ARCH_CAPABILITIES MSR to 1 in accordance with an Intel microcode update. To use the template securely, users should apply the latest microcode update on the host. - Fixed the spelling of the `nomodule` param passed in the default kernel command line parameters. This is a **breaking change** for setups that use the default kernel command line which also depend on being able to load kernel modules at runtime. This may also break setups which use the default kernel command line and which use an init binary that inadvertently depends on the misspelled param ("nomodules") being present at the command line, since this param will no longer be passed. ## [1.4.0] ### Added - Added support for custom CPU templates allowing users to adjust vCPU features exposed to the guest via CPUID, MSRs and ARM registers. - Introduced V1N1 static CPU template for ARM to represent Neoverse V1 CPU as Neoverse N1. - Added support for the `virtio-rng` entropy device. The device is optional. A single device can be enabled per VM using the `/entropy` endpoint. - Added a `cpu-template-helper` tool for assisting with creating and managing custom CPU templates. ### Changed - Set FDP_EXCPTN_ONLY bit (CPUID.7h.0:EBX[6]) and ZERO_FCS_FDS bit (CPUID.7h.0:EBX[13]) in Intel's CPUID normalization process. ### Fixed - Fixed feature flags in T2S CPU template on Intel Ice Lake. - Fixed CPUID leaf 0xb to be exposed to guests running on AMD host. - Fixed a performance regression in the jailer logic for closing open file descriptors. Related to: [#3542](https://github.com/firecracker-microvm/firecracker/issues/3542). - A race condition that has been identified between the API thread and the VMM thread due to a misconfiguration of the `api_event_fd`. - Fixed CPUID leaf 0x1 to disable perfmon and debug feature on x86 host. - Fixed passing through cache information from host in CPUID leaf 0x80000006. - Fixed the T2S CPU template to set the RRSBA bit of the IA32_ARCH_CAPABILITIES MSR to 1 in accordance with an Intel microcode update. - Fixed the T2CL CPU template to pass through the RSBA and RRSBA bits of the IA32_ARCH_CAPABILITIES MSR from the host in accordance with an Intel microcode update. - Fixed passing through cache information from host in CPUID leaf 0x80000005. - Fixed the T2A CPU template to disable SVM (nested virtualization). - Fixed the T2A CPU template to set EferLmsleUnsupported bit (CPUID.80000008h:EBX[20]), which indicates that EFER[LMSLE] is not supported. ## [1.3.0] ### Added - Introduced T2CL (Intel) and T2A (AMD) CPU templates to provide instruction set feature parity between Intel and AMD CPUs when using these templates. - Added Graviton3 support (c7g instance type). ### Changed - Improved error message when invalid network backend provided. - Improved TCP throughput by between 5% and 15% (depending on CPU) by using scatter-gather I/O in the net device's TX path. - Upgraded Rust toolchain from 1.64.0 to 1.66.0. - Made seccompiler output bit-reproducible. ### Fixed - Fixed feature flags in T2 CPU template on Intel Ice Lake. ## [1.2.0] ### Added - Added a new CPU template called `T2S`. This exposes the same CPUID as `T2` to the Guest and also overwrites the `ARCH_CAPABILITIES` MSR to expose a reduced set of capabilities. With regards to hardware vulnerabilities and mitigations, the Guest vCPU will apear to look like a Skylake CPU, making it safe to snapshot uVMs running on a newer host CPU (Cascade Lake) and restore on a host that has a Skylake CPU. - Added a new CLI option `--metrics-path PATH`. It accepts a file parameter where metrics will be sent to. - Added baselines for m6i.metal and m6a.metal for all long running performance tests. - Releases now include debuginfo files. ### Changed - Changed the jailer option `--exec-file` to fail if the filename does not contain the string `firecracker` to prevent from running non-firecracker binaries. - Upgraded Rust toolchain from 1.52.1 to 1.64.0. - Switched to specifying our dependencies using caret requirements instead of comparison requirements. - Updated all dependencies to their respective newest versions. ### Fixed - Made the `T2` template more robust by explicitly disabling additional CPUID flags that should be off but were missed initially or that were not available in the spec when the template was created. - Now MAC address is correctly displayed when queried with GET `/vm/config` if left unspecified in both pre and post snapshot states. - Fixed a self-DoS scenario in the virtio-queue code by reporting and terminating execution when the number of available descriptors reported by the driver is higher than the queue size. - Fixed the bad handling of kernel cmdline parameters when init arguments were provided in the `boot_args` field of the JSON body of the PUT `/boot-source` request. - Fixed a bug on ARM64 hosts where the upper 64bits of the V0-V31 FL/SIMD registers were not saved correctly when taking a snapshot, potentially leading to data loss. This change invalidates all ARM64 snapshots taken with versions of Firecracker \<= 1.1.3. - Improved stability and security when saving CPU MSRs in snapshots. ## [1.1.0] ### Added - The API `PATCH` methods for `machine-config` can now be used to reset the `cpu_template` to `"None"`. Until this change there was no way to reset the `cpu_template` once it was set. - Added a `rebase-snap` tool for rebasing a diff snapshot over a base snapshot. - Mmds version is persisted across snapshot-restore. Snapshot compatibility is preserved bidirectionally, to and from a Firecracker version that does not support persisting the Mmds version. In such cases, the default V1 option is used. - Added `--mmds-size-limit` for limiting the mmds data store size instead of piggy-backing on `--http-api-max-payload-size`. If left unconfigured it defaults to the value of `--http-api-max-payload-size`, to provide backwards compatibility. - Added optional `mem_backend` body field in `PUT` requests on `/snapshot/load`. This new parameter is an object that defines the configuration of the backend responsible for handling memory loading during snapshot restore. The `mem_backend` parameter contains `backend_type` and `backend_path` required fields. `backend_type` is an enum that can take either `File` or `Uffd` as value. Interpretation of `backend_path` field depends on the value of `backend_type`. If `File`, then the user must provide the path to file that contains the guest memory to be loaded. Otherwise, if `backend_type` is `Uffd`, then `backend_path` is the path to a unix domain socket where a custom page fault handler process is listening and expecting a UFFD to be sent by Firecracker. The UFFD is used to handle the guest memory page faults in the separate process. - Added logging for the snapshot/restore and async block device IO engine features to indicate they are in development preview. ### Changed - The API `PATCH` method for `/machine-config` can be now used to change `track_dirty_pages` on aarch64. - MmdsV2 is now Generally Available. - MmdsV1 is now deprecated and will be removed in Firecracker v2.0.0. Use MmdsV2 instead. - Deprecated `mem_file_path` body field in `PUT` on `/snapshot/load` request. ### Fixed - Fixed inconsistency that allowed the start of a microVM from a JSON file without specifying the `vcpu_count` and `mem_size_mib` parameters for `machine-config` although they are mandatory when configuring via the API. Now these fields are mandatory when specifying `machine-config` in the JSON file and when using the `PUT` request on `/machine-config`. - Fixed inconsistency that allowed a user to specify the `cpu_template` parameter and set `smt` to `True` in `machine-config` when starting from a JSON file on aarch64 even though they are not permitted when using `PUT` or `PATCH` in the API. Now Firecracker will return an error on aarch64 if `smt` is set to `True` or if `cpu_template` is specified. - Fixed inconsistent behaviour of the `PUT` method for `/machine-config` that would reset the `track_dirty_pages` parameter to `false` if it was not specified in the JSON body of the request, but left the `cpu_template` parameter intact if it was not present in the request. Now a `PUT` request for `/machine-config` will reset all optional parameters (`smt`, `cpu_template`, `track_dirty_pages`) to their default values if they are not specified in the `PUT` request. - Fixed incosistency in the swagger definition with the current state of the `/vm/config` endpoint. ## [1.0.0] ### Added - Added jailer option `--parent-cgroup ` to allow the placement of microvm cgroups in custom cgroup nested hierarchies. The default value is `` which is backwards compatible to the behavior before this change. - Added jailer option `--cgroup-version <1|2>` to support running the jailer on systems that have cgroup-v2. Default value is `1` which means that if `--cgroup-version` is not specified, the jailer will try to create cgroups on cgroup-v1 hierarchies only. - Added `--http-api-max-payload-size` parameter to configure the maximum payload size for PUT and PATCH requests. - Limit MMDS data store size to `--http-api-max-payload-size`. - Cleanup all environment variables in Jailer. - Added metrics for accesses to deprecated HTTP and command line API endpoints. - Added permanent HTTP endpoint for `GET` on `/version` for getting the Firecracker version. - Added `--metadata` parameter to enable MMDS content to be supplied from a file allowing the MMDS to be used when using `--no-api` to disable the API server. - Checksum file for the release assets. - Added support for custom headers to MMDS requests. Accepted headers are: `X-metadata-token`, which accepts a string value that provides a session token for MMDS requests; and `X-metadata-token-ttl-seconds`, which specifies the lifetime of the session token in seconds. - Support and validation for host and guest kernel 5.10. - A [kernel support policy](docs/kernel-policy.md). - Added `io_engine` to the pre-boot block device configuration. Possible values: `Sync` (the default option) or `Async` (only available for kernels newer than 5.10.51). The `Async` variant introduces a block device engine that uses io_uring for executing requests asynchronously, which is in **developer preview** (NOT for production use). See `docs/api_requests/block-io-engine.md`. - Added `block.io_engine_throttled_events` metric for measuring the number of virtio events throttled because of the IO engine. - New optional `version` field to PUT requests towards `/mmds/config` to configure MMDS version. Accepted values are `V1` and `V2` and default is `V1`. MMDS `V2` is **developer preview only** (NOT for production use) and it does not currently work after snapshot load. - Mandatory `network_interfaces` field to PUT requests towards `/mmds/config` which contains a list of network interface IDs capable of forwarding packets to MMDS. ### Changed - Removed the `--node` jailer parameter. - Deprecated `vsock_id` body field in `PUT`s on `/vsock`. - Removed the deprecated the `--seccomp-level parameter`. - `GET` requests to MMDS require a session token to be provided through `X-metadata-token` header when using V2. - Allow `PUT` requests to MMDS in order to generate a session token to be used for future `GET` requests when version 2 is used. - Remove `allow_mmds_requests` field from the request body that attaches network interfaces. Specifying interfaces that allow forwarding requests to MMDS is done by adding the network interface's ID to the `network_interfaces` field of PUT `/mmds/config` request's body. - Renamed `/machine-config` `ht_enabled` to `smt`. - `smt` field is now optional on PUT `/machine-config`, defaulting to `false`. - Configuring `smt: true` on aarch64 via the API is forbidden. ### Fixed - GET `/vm/config` was returning a default config object after restoring from a snapshot. It now correctly returns the config of the original microVM, except for boot_config and the cpu_template and smt fields of the machine config, which are currently lost. - Fixed incorrect propagation of init parameters in kernel commandline. Related to: [#2709](https://github.com/firecracker-microvm/firecracker/issues/2709). - Adapt T2 and C3 CPU templates for kernel 5.10. Firecracker was not previously masking some CPU features of the host or emulated by KVM, introduced in more recent kernels: `umip`, `vmx`, `avx512_vnni`. - Fix jailer's cgroup implementation to accept properties that contain multiple dots. ## [0.25.0] ### Added - Added devtool build `--ssh-keys` flag to support fetching from private git repositories. - Added option to configure block device flush. - Added `--new-pid-ns` flag to the Jailer in order to spawn the Firecracker process in a new PID namespace. - Added API metrics for `GET`, `PUT` and `PATCH` requests on `/mmds` endpoint. - Added `--describe-snapshot` flag to Firecracker to fetch the data format version of a snapshot state file provided as argument. - Added `--no-seccomp` parameter for disabling the default seccomp filters. - Added `--seccomp-filter` parameter for supplying user-provided, custom filters. - Added the `seccompiler-bin` binary that is used to compile JSON seccomp filters into serialized BPF for Firecracker consumption. - Snapshotting support for GICv2 enabled guests. - Added `devtool install` to deploy built binaries in `/usr/local/bin` or a given path. - Added code logic to send `VIRTIO_VSOCK_EVENT_TRANSPORT_RESET` on snapshot creation, when the Vsock device is active. The event will close active connections on the guest. - Added `GET` request on `/vm/config` that provides full microVM configuration as a JSON HTTP response. - Added `--resource-limit` flag to jailer to limit resources such as: number of file descriptors allowed at a time (with a default value of 2048) and maximum size of files created by the process. ### Changed - Changed Docker images repository from DockerHub to Amazon ECR. - Fixed off-by-one error in virtio-block descriptor address validation. - Changed the `PATCH` request on `/balloon/statistics` to schedule the first statistics update immediately after processing the request. - Deprecated the `--seccomp-level parameter`. It will be removed in a future release. Using it logs a runtime warning. - Experimental gnu libc builds use empty default seccomp filters, allowing all system calls. ### Fixed - Fixed non-compliant check for the RTC device ensuring a fixed 4-sized data buffer. - Unnecessary interrupt assertion was removed from the RTC. However, a dummy interrupt is still allocated for snapshot compatibility reasons. - Fixed the SIGPIPE signal handler so Firecracker no longer exits. The signal is still recorded in metrics and logs. - Fixed ballooning API definitions by renaming all fields which mentioned "MB" to use "MiB" instead. - Snapshot related host files (vm-state, memory, block backing files) are now flushed to their backing mediums as part of the CreateSnapshot operation. - Fixed the SSBD mitigation not being enabled on `aarch64` with the provided `prod-host-setup.md`. - Fixed the balloon statistics not working after a snapshot restore event. - The `utc_timestamp_ms` now reports the timestamp in ms from the UTC UNIX Epoch, as the name suggests. It was previously using a monotonic clock with an undefined starting point. ## [0.24.0] ### Added - Added optional `resume_vm` field to `/snapshot/load` API call. - Added support for block rate limiter PATCH. - Added devtool test `-c|--cpuset-cpus` flag for cpus confinement when tests run. - Added devtool test `-m|--cpuset-mems` flag for memory confinement when tests run. - Added the virtio traditional memory ballooning device. - Added a mechanism to handle vCPU/VMM errors that result in process termination. - Added incremental guest memory snapshot support. - Added aarch64 snapshot support. ### Changed - Change the information provided in `DescribeInstance` command to provide microVM state information (Not started/Running/Paused) instead of whether it's started or not. - Removed the jailer `--extra-args` parameter. It was a noop, having been replaced by the `--` separator for extra arguments. - Changed the output of the `--version` command line parameter to include a list of supported snapshot data format versions for the firecracker binary. - Increased the maximum number of virtio devices from 11 to 19. - Added a new check that prevents creating v0.23 snapshots when more than 11 devices are attached. - If the stdout buffer is full and non-blocking, the serial writes no longer block. Any new bytes will be lost, until the buffer is freed. The device also logs these errors and increments the `uart.error_count` metric for each lost byte. ### Fixed - Fixed inconsistency in YAML file InstanceInfo definition ## [0.23.0] ### Added - Added metric for throttled block device events. - Added metrics for counting rate limiter throttling events. - Added metric for counting MAC address updates. - Added metrics for counting TAP read and write errors. - Added metrics for counting RX and TX partial writes. - Added metrics that measure the duration of pausing and resuming the microVM, from the VMM perspective. - Added metric for measuring the duration of the last full/diff snapshot created, from the VMM perspective. - Added metric for measuring the duration of loading a snapshot, from the VMM perspective. - Added metrics that measure the duration of pausing and resuming the microVM, from the API (user) perspective. - Added metric for measuring the duration of the last full/diff snapshot created, from the API (user) perspective. - Added metric for measuring the duration of loading a snapshot, from the API (user) perspective. - Added `track_dirty_pages` field to `machine-config`. If enabled, Firecracker can create incremental guest memory snapshots by saving the dirty guest pages in a sparse file. - Added a new API call, `PATCH /vm`, for changing the microVM state (to `Paused` or `Resumed`). - Added a new API call, `PUT /snapshot/create`, for creating a full or diff snapshot. - Added a new API call, `PUT /snapshot/load`, for loading a snapshot. - Added new jailer command line argument `--cgroup` which allow the user to specify the cgroups that are going to be set by the Jailer. - Added full support for AMD CPUs (General Availability). More details [here](README.md#supported-platforms). ### Fixed - Boot time on AMD achieves the desired performance (i.e under 150ms). ### Changed - The logger `level` field is now case-insensitive. - Disabled boot timer device after restoring a snapshot. - Enabled boot timer device only when specifically requested, by using the `--boot-timer` dedicated cmdline parameter. - firecracker and jailer `--version` now gets updated on each devtool build to the output of `git describe --dirty`, if the git repo is available. - MicroVM process is only attached to the cgroups defined by using `--cgroups` or the ones defined indirectly by using `--node`. - Changed `devtool build` to build jailer binary for `musl` only targets. Building jailer binary for `non-musl` targets have been removed. ## [0.22.0] ### Added - Added a new API call, `PUT /metrics`, for configuring the metrics system. - Added `app_name` field in InstanceInfo struct for storing the application name. - New command-line parameters for `firecracker`, named `--log-path`, `--level`, `--show-level` and `--show-log-origin` that can be used for configuring the Logger when starting the process. When using this method for configuration, only `--log-path` is mandatory. - Added a [guide](https://github.com/firecracker-microvm/firecracker/blob/v0.22.0/docs/devctr-image.md) for updating the dev container image. - Added a new API call, `PUT /mmds/config`, for configuring the `MMDS` with a custom valid link-local IPv4 address. - Added experimental JSON response format support for MMDS guest applications requests. - Added metrics for the vsock device. - Added `devtool strip` command which removes debug symbols from the release binaries. - Added the `tx_malformed_frames` metric for the virtio net device, emitted when a TX frame missing the VNET header is encountered. ### Fixed - Added `--version` flag to both Firecracker and Jailer. - Return `405 Method Not Allowed` MMDS response for non HTTP `GET` MMDS requests originating from guest. - Fixed folder permissions in the jail (#1802). - Any number of whitespace characters are accepted after ":" when parsing HTTP headers. - Potential panic condition caused by the net device expecting to find a VNET header in every frame. - Potential crash scenario caused by "Content-Length" HTTP header field accepting negative values. - Fixed #1754 - net: traffic blocks when running ingress UDP performance tests with very large buffers. ### Changed - Updated CVE-2019-3016 mitigation information in [Production Host Setup](docs/prod-host-setup.md) - In case of using an invalid JSON as a 'config-file' for Firecracker, the process will exit with return code 152. - Removed the `testrun.sh` wrapper. - Removed `metrics_fifo` field from the logger configuration. - Renamed `log_fifo` field from LoggerConfig to `log_path` and `metrics_fifo` field from MetricsConfig to `metrics_path`. - `PATCH /drives/{id}` only allowed post-boot. Use `PUT` for pre-boot updates to existing configurations. - `PATCH /network-interfaces/{id}` only allowed post-boot. Use `PUT` for pre-boot updates to existing configurations. - Changed returned status code from `500 Internal Server Error` to `501 Not Implemented`, for queries on the MMDS endpoint in IMDS format, when the requested resource value type is unsupported. - Allowed the MMDS data store to be initialized with all supported JSON types. Retrieval of these values within the guest, besides String, Array, and Dictionary, is only possible in JSON mode. - `PATCH` request on `/mmds` before the data store is initialized returns `403 BadRequest`. - Segregated MMDS documentation in MMDS design documentation and MMDS user guide documentation. ## [0.21.0] ### Added - Support for booting with an initial RAM disk image. This image can be specified through the new `initrd_path` field of the `/boot-source` API request. ### Fixed - Fixed #1469 - Broken GitHub location for Firecracker release binary. - The jailer allows changing the default api socket path by using the extra arguments passed to firecracker. - Fixed #1456 - Occasional KVM_EXIT_SHUTDOWN and bad syscall (14) during VM shutdown. - Updated the production host setup guide with steps for addressing CVE-2019-18960. - The HTTP header parsing is now case insensitive. - The `put_api_requests` and `patch_api_requests` metrics for net devices were un-swapped. ### Changed - Removed redundant `--seccomp-level` jailer parameter since it can be simply forwarded to the Firecracker executable using "end of command options" convention. - Removed `memory.dirty_pages` metric. - Removed `options` field from the logger configuration. - Decreased release binary size by ~15%. - Changed default API socket path to `/run/firecracker.socket`. This path also applies when running with the jailer. - Disabled KVM dirty page tracking by default. - Removed redundant RescanBlockDevice action from the /actions API. The functionality is available through the PATCH /drives API. See `docs/api_requests/patch-block.md`. ## [0.20.0] ### Added - Added support for GICv2. ### Fixed - Fixed CVE-2019-18960 - Fixed a logical error in bounds checking performed on vsock virtio descriptors. - Fixed #1283 - Can't start a VM in AARCH64 with vcpus number more than 16. - Fixed #1088 - The backtrace are printed on `panic`, no longer causing a seccomp fault. - Fixed #1375 - Change logger options type from `Value` to `Vec` to prevent potential unwrap on None panics. - Fixed #1436 - Raise interrupt for TX queue used descriptors - Fixed #1439 - Prevent achieving 100% cpu load when the net device rx is throttled by the ratelimiter - Fixed #1437 - Invalid fields in rate limiter related API requests are now failing with a proper error message. - Fixed #1316 - correctly determine the size of a virtio device backed by a block device. - Fixed #1383 - Log failed api requests. ### Changed - Decreased release binary size by 10%. ## [0.19.0] ### Added - New command-line parameter for `firecracker`, named `--no-api`, which will disable the API server thread. If set, the user won't be able to send any API requests, neither before, nor after the vm has booted. It must be paired with `--config-file` parameter. Also, when API server is disabled, MMDS is no longer available now. - New command-line parameter for `firecracker`, named `--config-file`, which represents the path to a file that contains a JSON which can be used for configuring and starting a microVM without sending any API requests. - The jailer adheres to the "end of command options" convention, meaning all parameters specified after `--` are forwarded verbatim to Firecracker. - Added `KVM_PTP` support to the recommended guest kernel config. - Added entry in FAQ.md for Firecracker Guest timekeeping. ### Changed - Vsock API call: `PUT /vsocks/{id}` changed to `PUT /vsock` and no longer appear to support multiple vsock devices. Any subsequent calls to this API endpoint will override the previous vsock device configuration. - Removed unused 'Halting' and 'Halted' instance states. - Vsock host-initiated connections now implement a trivial handshake protocol. See the [vsock doc](docs/vsock.md#host-initiated-connections) for details. Related to: [#1253](https://github.com/firecracker-microvm/firecracker/issues/1253), [#1432](https://github.com/firecracker-microvm/firecracker/issues/1432), [#1443](https://github.com/firecracker-microvm/firecracker/pull/1443) ### Fixed - Fixed serial console on aarch64 (GitHub issue #1147). - Upon panic, the terminal is now reset to canonical mode. - Explicit error upon failure of vsock device creation. - The failure message returned by an API call is flushed in the log FIFOs. - Insert virtio devices in the FDT in order of their addresses sorted from low to high. - Enforce the maximum length of the network interface name to be 16 chars as specified in the Linux Kernel. - Changed the vsock property `id` to `vsock_id` so that the API client can be successfully generated from the swagger definition. ## [0.18.0] ### Added - New device: virtio-vsock, backed by Unix domain sockets (GitHub issue #650). See `docs/vsock.md`. ### Changed - No error is thrown upon a flush metrics intent if logger has not been configured. ### Fixed - Updated the documentation for integration tests. - Fixed high CPU usage before guest network interface is brought up (GitHub issue #1049). - Fixed an issue that caused the wrong date (month) to appear in the log. - Fixed a bug that caused the seccomp filter to reject legit syscalls in some rare cases (GitHub issue #1206). - Docs: updated the production host setup guide. - Docs: updated the rootfs and kernel creation guide. ### Removed - Removed experimental support for vhost-based vsock devices. ## [0.17.0] ### Added - New API call: `PATCH /machine-config/`, used to update VM configuration, before the microVM boots. - Added an experimental swagger definition that includes the specification for the vsock API call. - Added a signal handler for `SIGBUS` and `SIGSEGV` that immediately terminates the process upon intercepting the signal. - Added documentation for signal handling utilities. - Added [alpha] aarch64 support. - Added metrics for successful read and write operations of MMDS, Net and Block devices. ### Changed - `vcpu_count`, `mem_size_mib` and `ht_enabled` have been changed to be mandatory for `PUT` requests on `/machine-config/`. - Disallow invalid seccomp levels by exiting with error. ### Fixed - Incorrect handling of bind mounts within the jailed rootfs. - Corrected the guide for `Alpine` guest setup. ## [0.16.0] ### Added - Added [alpha] AMD support. - New `devtool` command: `prepare_release`. This updates the Firecracker version, crate dependencies and credits in preparation for a new release. - New `devtool` command: `tag`. This creates a new git tag for the specified release number, based on the changelog contents. - New doc section about building with glibc. ### Changed - Dropped the JSON-formatted `context` command-line parameter from Firecracker in favor of individual classic command-line parameters. - When running with `jailer` the location of the API socket has changed to `/api.socket` (API socket was moved _inside_ the jail). - `PUT` and `PATCH` requests on `/mmds` with data containing any value type other than `String`, `Array`, `Object` will returns status code 400. - Improved multiple error messages. - Removed all kernel modules from the recommended kernel config. ### Fixed - Corrected the seccomp filter when building with glibc. ### Removed - Removed the `seccomp.bad_syscalls` metric. ## [0.15.2] ### Fixed - Corrected the conditional compilation of the seccomp rule for `madvise`. ## [0.15.1] ### Fixed - A `madvise` call issued by the `musl` allocator was added to the seccomp allow list to prevent Firecracker from terminating abruptly when allocating memory in certain conditions. ## [0.15.0] ### Added - New API action: SendCtrlAltDel, used to initiate a graceful shutdown, if the guest has driver support for i8042 and AT Keyboard. See [the docs](docs/api_requests/actions.md#sendctrlaltdel) for details. - New metric counting the number of egress packets with a spoofed MAC: `net.tx_spoofed_mac_count`. - New API call: `PATCH /network-interfaces/`, used to update the rate limiters on a network interface, after the start of a microVM. ### Changed - Added missing `vmm_version` field to the InstanceInfo API swagger definition, and marked several other mandatory fields as such. - New default command line for guest kernel: `reboot=k panic=1 pci=off nomodules 8250.nr_uarts=0 i8042.noaux i8042.nomux i8042.nopnp i8042.dumbkbd`. ### Fixed - virtio-blk: VIRTIO_BLK_T_FLUSH now working as expected. - Vsock devices can be attached when starting Firecracker using the jailer. - Vsock devices work properly when seccomp filtering is enabled. ## [0.14.0] ### Added - Documentation for development environment setup on AWS in `dev-machine-setup.md`. - Documentation for microVM networking setup in `docs/network-setup.md`. - Limit the maximum supported vCPUs to 32. ### Changed - Log the app version when the `Logger` is initialized. - Pretty print panic information. - Firecracker terminates with exit code 148 when a syscall which is not present in the allow list is intercepted. ### Fixed - Fixed build with the `vsock` feature. ## [0.13.0] ### Added - Documentation for Logger API Requests in `docs/api_requests/logger.md`. - Documentation for Actions API Requests in `docs/api_requests/actions.md`. - Documentation for MMDS in `docs/mmds.md`. - Flush metrics on request via a PUT `/actions` with the `action_type` field set to `FlushMetrics`. ### Changed - Updated the swagger definition of the `Logger` to specify the required fields and provide default values for optional fields. - Default `seccomp-level` is `2` (was previously 0). - API Resource IDs can only contain alphanumeric characters and underscores. ### Fixed - Seccomp filters are now applied to all Firecracker threads. - Enforce minimum length of 1 character for the jailer ID. - Exit with error code when starting the jailer process fails. ### Removed - Removed `InstanceHalt` from the list of possible actions. ## [0.12.0] ### Added - The `/logger` API has a new field called `options`. This is an array of strings that specify additional logging configurations. The only supported value is `LogDirtyPages`. - When the `LogDirtyPages` option is configured via `PUT /logger`, a new metric called `memory.dirty_pages` is computed as the number of pages dirtied by the guest since the last time the metric was flushed. - Log messages on both graceful and forceful termination. - Availability of the list of dependencies for each commit inside the code base. - Documentation on vsock experimental feature and host setup recommendations. ### Changed - `PUT` requests on `/mmds` always return 204 on success. - `PUT` operations on `/network-interfaces` API resources no longer accept the previously required `state` parameter. - The jailer starts with `--seccomp-level=2` (was previously 0) by default. - Log messages use `anonymous-instance` as instance id if none is specified. ### Fixed - Fixed crash upon instance start on hosts without 1GB huge page support. - Fixed "fault_message" inconsistency between Open API specification and code base. - Ensure MMDS compatibility with C5's IMDS implementation. - Corrected the swagger specification to ensure `OpenAPI 2.0` compatibility. ## [0.11.0] ### Added - Apache-2.0 license - Docs: - [charter](CHARTER.md) - [contribution guidelines](CONTRIBUTING.md) - [design](docs/design.md) - [getting started guide](docs/getting-started.md) - [security policy](SECURITY.md) - [specifications](SPECIFICATION.md) - **Experimental** vhost-based vsock implementation. ### Changed - Improved MMDS network stack performance. - If the logging system is not yet initialized (via `PUT /logger`), log events are now sent to stdout/stderr. - Moved the `instance_info_fails` metric under `get_api_requests` - Improved [readme](README.md) and added links to more detailed information, now featured in subject-specific docs. ### Fixed - Fixed bug in the MMDS network stack, that caused some RST packets to be sent without a destination. - Fixed bug in `PATCH /drives`, whereby the ID in the path was not checked against the ID in the body. ## [0.10.1] ### Fixed - The Swagger definition was corrected. ## [0.10.0] ### Added - Each Firecracker process has an associated microVM Metadata Store (MMDS). Its contents can be configured using the `/mmds` API resource. ### Changed - The boot source is specified only with the `kernel_image_path` and the optional parameter `boot_args`. All other fields are removed. - The `path_on_host` property in the drive specification is now marked as _mandatory_. - PATCH drive only allows patching/changing the `path_on_host` property. - All PUT and PATCH requests return the status code 204. - CPUID brand string (aka model name) now includes the host CPU frequency. - API requests which add guest network interfaces have an additional parameter, `allow_mmds_requests` which defaults to `false`. - Stopping the guest (e.g. using the `reboot` command) also terminates the Firecracker process. When the Firecracker process ends for any reason, (other than `kill -9`), metrics are flushed at the very end. - On startup `jailer` closes all inherited file descriptors based on `sysconf(_SC_OPEN_MAX)` except input, output and error. - The microVM ID prefixes each Firecracker log line. This ID also appears in the process `cmdline` so it's now possible to `ps | grep ` for it. ## [0.9.0] ### Added - Seccomp filtering is configured via the `--seccomp-level` jailer parameter. - Firecracker logs the starting addresses of host memory areas provided as guest memory slots to KVM. - The metric `panic_count` gets incremented to signal that a panic has occurred. - Firecracker logs a backtrace when it crashes following a panic. - Added basic instrumentation support for measuring boot time. ### Changed - `StartInstance` is a synchronous API request (it used to be an asynchronous request). ### Fixed - Ensure that fault messages sent by the API have valid JSON bodies. - Use HTTP response code 500 for internal Firecracker errors, and 400 for user errors on InstanceStart. - Serialize the machine configuration fields to the correct data types (as specified in the Swagger definition). - NUMA node assignment is properly enforced by the jailer. - The `is_root_device` and `is_read_only` properties are now marked as required in the Swagger definition of `Drive` object properties. ### Removed - `GET` requests on the `/actions` API resource are no longer supported. - The metrics associated with asynchronous actions have been removed. - Remove the `action_id` parameter for `InstanceStart`, both from the URI and the JSON request body. ## [0.8.0] ### Added - The jailer can now be configured to enter a preexisting network namespace, and to run as a daemon. - Enabled PATCH operations on `/drives` resources. ### Changed - The microVM `id` supplied to the jailer may now contain alphanumeric characters and hyphens, up to a maximum length of 64 characters. - Replaced the `permissions` property of `/drives` resources with a boolean. - Removed the `state` property of `/drives` resources. ## [0.7.0] ### Added - Rate limiting functionality allows specifying an initial one time burst size. - Firecracker can now boot from an arbitrary boot partition by specifying its unique id in the driver's API call. - Block device rescan is triggered via a PUT `/actions` with the drive ID in the action body's `payload` field and the `action_type` field set to `BlockDeviceRescan`. ### Changed - Removed `noapic` from the default guest kernel command line. - The `action_id` parameter is no longer required for synchronous PUT requests to `/actions`. - PUT requests are no longer allowed on `/drives` resources after the guest has booted. ### Fixed - Fixed guest instance kernel loader to accelerate vCPUs launch and consequently guest kernel boot. - Fixed network emulation to improve IO performance. ## [0.6.0] ### Added - Firecracker uses two different named pipes to record human readable logs and metrics, respectively. ### Changed - Seccomp filtering can be enabled via setting the `USE_SECCOMP` environment variable. - It is possible to supply only a partial specification when attaching a rate limiter (i.e. just the bandwidth or ops parameter). - Errors related to guest network interfaces are now more detailed. ### Fixed - Fixed a bug that was causing Firecracker to panic whenever a `PUT` request was sent on an existing network interface. - The `id` parameter of the `jailer` is required to be an RFC 4122-compliant UUID. - Fixed an issue which caused the network RX rate limiter to be more restrictive than intended. - API requests which contain unknown fields will generate an error. - Fixed an issue related to high CPU utilization caused by improper `KVM PIT` configuration. - It is now possible to create more than one network tun/tap interface inside a jailed Firecracker. ## [0.5.0] ### Added - Added metrics for API requests, VCPU and device actions for the serial console (`UART`), keyboard (`i8042`), block and network devices. Metrics are logged every 60 seconds. - A CPU features template for C3 is available, in addition to the one for T2. - Seccomp filters restrict Firecracker from calling any other system calls than the minimum set it needs to function properly. The filters are enabled by setting the `USE_SECCOMP` environment variable to 1 before running Firecracker. - Firecracker can be started by a new binary called `jailer`. The jailer takes as command line arguments a unique ID, the path to the Firecracker binary, the NUMA node that Firecracker will be assigned to and a `uid` and `gid` for Firecracker to run under. It sets up a `chroot` environment and a `cgroup`, and calls exec to morph into Firecracker. ### Changed - In case of failure, the metrics and the panic location are logged before aborting. - Metric values are reset with every flush. - `CPUTemplate` is now called `CpuTemplate` in order to work seamlessly with the swagger code generator for Go. - `firecracker-beta.yaml` is now called `firecracker.yaml`. ### Fixed - Handling was added for several untreated KVM exit scenarios, which could have led to panic. - Fixed a bug that caused Firecracker to crash when attempting to disable the `IA32_DEBUG_INTERFACE MSR` flag in the T2 CPU features. ### Removed - Removed a leftover file generated by the logger unit tests. - Removed `firecracker-v1.0.yaml`. ## [0.4.0] ### Added - The CPU Template can be set with an API call on `PUT /machine-config`. The only available template is T2. - Hyperthreading can be enabled/disabled with an API call on `PUT /machine-config`. By default, hyperthreading is disabled. - Added boot time performance test (`tests/performance/test_boottime.py`). - Added Rate Limiter for VirtIO/net and VirtIO/net devices. The Rate Limiter uses two token buckets to limit rate on bytes/s and ops/s. The rate limiter can be (optionally) configured per drive with a `PUT` on `/drives/{drive_id}` and per network interface with a `PUT` on `/network-interface/{iface_id}`. - Implemented pre-boot PUT updates for `/boot-source`, `/drives`, `/network-interfaces` and `/vsock`. - Added integration tests for `PUT` updates. ### Changed - Moved the API definition (`swagger/firecracker-beta.yaml`) to the `api_server` crate. - Removed `"console=ttyS0"` and added `"8250.nr_uarts=0"` to the default kernel command line to decrease the boot time. - Changed the CPU topology to have all logical CPUs on a single socket. - Removed the upper bound on CPU count as with musl there is no good way to get the total number of logical processors on a host. - Build time tests now print the full output of commands. - Disabled the Performance Monitor Unit and the Turbo Boost. - Check the expected KVM capabilities before starting the VM. - Logs now have timestamps. ### Fixed - `testrun.sh` can run on platforms with more than one package manager by setting the package manager via a command line parameter (`-p`). - Allow correct set up of multiple network-interfaces with auto-generated MAC. - Fixed sporadic bug in VirtIO which was causing lost packages. - Don't allow `PUT` requests with empty body on `/machine-config`. - Deny `PUT` operations after the microvm boots (exception: the temporarily fix for live resize of block devices). ### Removed - Removed examples crate. This used to have a Python example of starting Firecracker. This is replaced by `test_api.py` integration tests. - Removed helper scripts for getting coverage and coding style errors. These were replaced by `test_coverage.py` and `test_style.py` test integration tests. - Removed `--vmm-no-api` command line option. Firecracker can only be started via the API. ## [0.3.0] ### Added - Users can interrogate the Machine Configuration (i.e. vcpu count and memory size) using a `GET` request on `/machine-config`. - The logging system can be configured through the API using a `PUT` on `/logger`. - Block devices support live resize by calling `PUT` with the same parameters as when the block was created. - Release builds have Link Time Optimization (LTO) enabled. - Firecracker is built with `musl`, resulting in a statically linked binary. - More in-tree integration tests were added as part of the continuous integration system. ### Changed - The vcpu count is enforced to `1` or an even number. - The Swagger definition of rate limiters was updated. - Syslog-enabled logs were replaced with a host-file backed mechanism. ### Fixed - The host topology of the CPU and the caches is not leaked into the microvm anymore. - Boot time was improved by advertising the availability of the TSC deadline timer. - Fixed an issue which prevented Firecracker from working on 4.14 (or newer) host kernels. - Specifying the MAC address for an interface through the API is optional. ### Removed - Removed support for attaching vsock devices. - Removed support for building Firecracker with glibc. ## [0.2.0] ### Added - Users can now interrogate Instance Information (currently just instance state) through the API. ### Changed - Renamed `api/swagger/all.yaml` to `api/swagger/firecracker-v1.0.yaml` which specifies targeted API support for Firecracker v1.0. - Renamed `api/swagger/firecracker-v0.1.yaml` to `api/swagger/firecracker-beta.yaml` which specifies the currently supported API. - Users can now enforce that an emulated block device is read-only via the API. To specify whether a block device is read-only or read-write, an extra "permissions" field was added to the Drive definition in the API. The root filesystem is automatically mounted in the guest OS as `ro`/`rw` according to the specified "permissions". It's the responsibility of the user to mount any other read-only block device as such within the guest OS. - Users can now stop the guest VM using the API. Actions of type `InstanceHalt` are now supported via the API. ### Fixed - Added support for `getDeviceID()` in `virtIO-block`. Without this, the guest Linux kernel would complain at boot time that the operation is unsupported. - `stdin` control is returned to the Firecracker process when guest VM is inactive. Raw mode `stdin` is forwarded to the guest OS when guest VM is running. ### Removed - Removed `api/swagger/actions.yaml`. - Removed `api/swagger/devices.yaml`. - Removed `api/swagger/firecracker-mvp.yaml`. - Removed `api/swagger/limiters.yaml`. ## [0.1.1] ### Changed - Users can now specify the MAC address of a guest network interface via the `PUT` network interface API request. Previously, the guest MAC address parameter was ignored. ### Fixed - Fixed a guest memory allocation issue, which previously led to a potentially significant memory chunk being wasted. - Fixed an issue which caused compilation problems, due to a compatibility breaking transitive dependency in the tokio suite of crates. ## [0.1.0] ### Added - One-process virtual machine manager (one Firecracker per microVM). - RESTful API running on a unix socket. The API supported by v0.1 can be found at `api/swagger/firecracker-v0.1.yaml`. - Emulated keyboard (`i8042`) and serial console (`UART`). The microVM serial console input and output are connected to those of the Firecracker process (this allows direct console access to the guest OS). - The capability of mapping an existing host tun-tap device as a VirtIO/net device into the microVM. - The capability of mapping an existing host file as a GirtIO/block device into the microVM. - The capability of creating a VirtIO/vsock between the host and the microVM. - Default demand fault paging & CPU oversubscription. ================================================ FILE: CHARTER.md ================================================ # Firecracker Charter ## Mission Our mission is to enable secure, multi-tenant, minimal-overhead execution of container and function workloads. ## Tenets (unless you know better ones) These tenets guide Firecracker's development: 1. **Built-In Security**: We provide compute security barriers that enable multi-tenant workloads, and cannot be mistakenly disabled by customers. Customer workloads are simultaneously considered sacred (shall not be touched) and malicious (shall be defended against). We continuously invest in defense in depth and maintain mechanisms that ensure security best practices. 1. **Light-Weight Virtualization**: We prioritize measuring Firecracker's hardware overhead in the dimensions that are important for our customers, and we strive to make this overhead negligible. 1. **Minimalist in Features**: If it's not clearly required for our mission, we won't build it. We maintain a single implementation per capability, and deprecate obsolete implementations; resolving exceptions is a high priority issue. 1. **Compute Oversubscription**: All of the hardware compute resources exposed by Firecracker to guests can be securely oversubscribed. ## Contributions & Project Roles All contributions must align with this charter and follow Firecracker's [contribution process](CONTRIBUTING.md). Firecracker [maintainers](MAINTAINERS.md) merge contributions into the main branch and create Firecracker releases. Maintainers are also subject to the mission and tenets outlined above. Anyone may submit and review contributions. ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Code of Conduct This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact [opensource-codeofconduct@amazon.com](mailto:opensource-codeofconduct@amazon.com) with any additional questions or comments. ================================================ FILE: CONTRIBUTING.md ================================================ # Contributions Welcome Firecracker is running serverless workloads at scale within AWS, but it's still day 1 on the journey guided by our [mission](CHARTER.md). There's a lot more to build and we welcome all contributions. There's a lot to contribute to in Firecracker. We've opened issues for all the features we want to build and improvements we want to make. Good first issues are labeled accordingly. We're also keen to hearing about your use cases and how we can support them, your ideas, and your feedback for what's already here. If you're just looking for quick feedback for an idea or proposal, open an [issue](https://github.com/firecracker-microvm/firecracker/issues) or chat with us on the [Firecracker Slack workgroup](https://firecracker-microvm.slack.com). Follow the [contribution workflow](#contribution-workflow) for submitting your changes to the Firecracker codebase. If you want to receive high-level but still commit-based feedback for a contribution, follow the [request for comments](#request-for-comments) steps instead. ## Contribution Workflow Firecracker uses the “fork-and-pull” development model. Follow these steps if you want to merge your changes to Firecracker: 1. Within your fork of [Firecracker](https://github.com/firecracker-microvm/firecracker), create a branch for your contribution. Use a meaningful name. 1. Create your contribution, meeting all [contribution quality standards](#contribution-quality-standards) 1. [Create a pull request](https://help.github.com/articles/creating-a-pull-request-from-a-fork/) against the main branch of the Firecracker repository. 1. Add two reviewers to your pull request (a maintainer will do that for you if you're new). Work with your reviewers to address any comments and obtain a minimum of 2 approvals from [maintainers](MAINTAINERS.md). To update your pull request, amend existing commits whenever applicable. Then force-push the new changes to your pull request branch. Address all review comments you receive. 1. Once the pull request is approved, one of the maintainers will merge it. ## Request for Comments If you just want to receive feedback for a contribution proposal, open an “RFC” (“Request for Comments”) pull request: 1. On your fork of [Firecracker](https://github.com/firecracker-microvm/firecracker), create a branch for the contribution you want feedback on. Use a meaningful name. 1. Create your proposal based on the existing codebase. 1. [Create a pull request](https://help.github.com/articles/creating-a-pull-request-from-a-fork/) against the main branch of the Firecracker repository. Prefix your pull request name with `[RFC]`. 1. Discuss your proposal with the community on the pull request page (or on any other channel). Add the conclusion(s) of this discussion to the pull request page. ## Contribution Quality Standards Most quality and style standards are enforced automatically during integration testing. For ease of use you can set up a git pre-commit hook by running the following in the Firecracker root directory: ``` cat >> .git/hooks/pre-commit << EOF ./tools/devtool checkstyle || exit 1 ./tools/devtool checkbuild --all || exit 1 EOF ``` The first command will automatically lint your Rust, markdown and python changes when running `git commit`, as well as running any other checks our CI validates as part of its 'Style' step. Most reported violations can be automatically fixed using `./tools/devtool fmt`. The second command will then check that the code correctly compiles on all supported architectures, and that it passes Rust clippy rules defined for the project. Your contribution needs to meet the following standards: - Separate each **logical change** into its own commit. - Each commit must pass all unit & code style tests, and the full pull request must pass all integration tests. See [tests/README.md](tests/README.md) for information on how to run tests. - Unit test coverage must _increase_ the overall project code coverage. - Include integration tests for any new functionality in your pull request. - Document all your public functions. - Add a descriptive message for each commit. Follow [commit message best practices](https://github.com/erlang/otp/wiki/writing-good-commit-messages). - A good commit message may look like ``` A descriptive title of 72 characters or fewer A concise description where each line is 72 characters or fewer. Signed-off-by: Co-authored-by: ``` - **Usage of `unsafe` is heavily discouraged**. If `unsafe` is required, it should be accompanied by a comment detailing its... - Justification, potentially including quantifiable reasons why safe alternatives were not used (e.g. via a benchmark showing a valuable[^1] performance improvements). - Safety, as per [`clippy::undocumented_unsafe_blocks`](https://rust-lang.github.io/rust-clippy/master/#undocumented_unsafe_blocks). This comment must list all invariants of the called function, and explain why there are upheld. If relevant, it must also prove that [undefined behavior](https://doc.rust-lang.org/reference/behavior-considered-undefined.html) is not possible. E.g. ```rust // Test creating a resource. // JUSTIFICATION: This cannot be accomplished without unsafe as // `external_function()` returns `RawFd`. An alternative here still uses // unsafe e.g. `drop(unsafe { OwnedFd::from_raw_fd(external_function()) });`. // SAFETY: `external_function()` returns a valid file descriptor. unsafe { libc::close(external_function()); } ``` - Avoid using `Option::unwrap`/`Result::unwrap`. Prefer propagating errors instead of aborting execution, or using `Option::expect`/`Result::except` if no alternative exists. Leave a comment explaining why the code will not panic in practice. Often, `unwrap`s are used because a previous check ensures they are safe, e.g. ```rs let my_value: u32 = ...; if my_value <= u16::MAX { Ok(my_value.try_into::().unwrap()) } else { Err(Error::Overflow) } ``` These can often be rewritten using `.map`/`.map_err` or `match`/`if let` constructs such as ```rs my_value.try_into::() .map_err(|_| Error::Overflow) ``` See also [this PR](https://github.com/firecracker-microvm/firecracker/pull/3557) for a lot of examples. - Document your pull requests. Include the reasoning behind each change, and the testing done. - Acknowledge Firecracker's [Apache 2.0 license](LICENSE) and certify that no part of your contribution contravenes this license by signing off on all your commits with `git -s`. Ensure that every file in your pull request has a header referring to the repository license file. ## Developer Certificate of Origin Firecracker is an open source product released under the [Apache 2.0 license](LICENSE). We respect intellectual property rights of others and we want to make sure all incoming contributions are correctly attributed and licensed. A Developer Certificate of Origin (DCO) is a lightweight mechanism to do that. The DCO is a declaration attached to every contribution made by every developer. In the commit message of the contribution, the developer simply adds a `Signed-off-by` statement and thereby agrees to the DCO, which you can find below or at DeveloperCertificate.org (). ``` Developer's Certificate of Origin 1.1 By making a contribution to this project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as Indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. ``` We require that every contribution to Firecracker is signed with a Developer Certificate of Origin. DCO checks are enabled via , and your PR will fail CI without it. Additionally, we kindly ask you to use your real name. We do not accept anonymous contributors, nor those utilizing pseudonyms. Each commit must include a DCO which looks like this: ``` Signed-off-by: Jane Smith ``` You may type this line on your own when writing your commit messages. However, if your `user.name` and `user.email` are set in your git config, you can use `-s` or `--signoff` to add the `Signed-off-by` line to the end of the commit message automatically. Forgot to add DCO to a commit? Amend it with `git commit --amend -s`. [^1]: Performance improvements in non-hot paths are unlikely to be considered valuable. ================================================ FILE: CREDITS.md ================================================ # Firecracker Credits and Thanks (This file is autogenerated using [update-credits.sh](tools/update-credits.sh).) Firecracker started with the code from the Chrome OS Virtual Machine Monitor ([crosvm](https://github.com/google/crosvm)), a VMM written in Rust with a focus on safety and security. Thanks go to: - [Zach Reizner](https://github.com/zachreizner) - [Dylan Reid](https://github.com/dgreid) - [Daniel Verkamp](https://github.com/danielverkamp) - [Stephen Barber](https://github.com/smibarber) - [Chirantan Ekbote](https://github.com/jynnantonix) - [Jason D. Clinton](https://github.com/jclinton) - Sonny Rao Contributors to the Firecracker repository: - Aaqa Ishtyaq - Aaron Hill - Aaron O'Mullan - Abhijeet Kasurde - acatangiu - acsmyth <54330152+acsmyth@users.noreply.github.com> - Adam Jensen - Adam Leskis - Adrian Catangiu - Ahmed Abouzied - Akhil Mohan - Alakesh - Aleksa Sarai - Alex Chan - Alex Glikson - Alexandra Iordache - Alexandru Agache - Alexandru Branciog - Alexandru Cihodaru - Alexandru-Cezar Sardan - Alin Dima - Anatoli Babenia - Andrea Manzini - Andreea Florescu - Andrei Casu-Pop - Andrei Cipu - Andrei Sandu - Andrew Laucius - Andrew Yao - Andrii Radyk - andros21 - Angus McInnes - Arjun Ramachandrula - Arun Gupta - Arunav Sanyal - Ashwin Ginoria - Atsushi Ishibashi - Aussie Schnore - Austin Vazquez - Ayato Tokubi - Babis Chalios - Bartosz Zbytniewski - Begley Brothers Inc - Ben Holmes - Benjamin Fry - bin liu - binoychitale - Bob Potter - Bogdan Ionita - Brandon Duffany - Brandon Pike - Caleb Albers <7110138+CalebAlbers@users.noreply.github.com> - Cam Mannett - Carlos López - chaos matrix - Char1ee - Chinmay Kousik - Chris Christensen - Christian González - Christopher Diehl - Christos Katsakioris - cneira - Colin Percival - Colton J. McCurdy - Constantin Musca - CuriousCorrelation - czybjtu - Dakshin Devanand - Damien Stanton - Dan Horobeanu - Dan Lemmond - David Michael - David Nguyen - David Son - David Ventura - Deepesh Pathak - defunct - Denis Andrejew - Derek Manwaring - Diana Popa - Dmitrii - Echo-Head-Wall <101990236+Echo-Head-Wall@users.noreply.github.com> - Eddie Cazares - Eduard Kyvenko - Egor Lazarchuk - Emmanuel Ferdman - EvanJP - Felipe R. Monteiro - Filippo Sironi - Fraser Pringle - Gabe Jackson - Gabriel Ionescu - Gabriel Kopper <41166074+gckopper@users.noreply.github.com> - Garrett Squire - George Pisaltu - George Siton - german gomez - Gilad Naaman - Greg Dunn - Gregory Brzeski - Grzegorz Uriasz - Gudmundur Bjarni Olafsson - Gulshan Kumar - Gábor Lipták - hacker65536 - hatf0 - Henri Yandell - Hermes - Himanshu Neema - HQ01 - huang-jl <1046678590@qq.com> - Iggy Jackson - ihciah - Ioana Chirca - Ishwor Gurung - Iulian Barbu - Ives van Hoorne - Jack Thomson - jackabald - James Curtis - James Turnbull - Javier Romero - Jeff Widman - Jerome Gravel-Niquet - Jing Yang - joaoleal - Joel Winarske - jonas serrano - Jonathan Browne <12983479+JBYoshi@users.noreply.github.com> - Jonathan Woollett-Light - Josh Abraham - Josh McConnell - Joshua Abraham - Julian Stecklina - Justus Adam - Ján Mochňak - kanpov - karthik nedunchezhiyan - KarthikVelayutham - Kazuyoshi Kato - keyangxie - Kornel - Krishna Kumar T - krk - kumargu - Laura Loghin - lifupan - Liu Jiang - Liviu Berciu - Lloyd - lloydmeta - longxiangqiao - Lorenzo Fontana - LOU Xun - Lucas Zanela - Lukas Schwerdtfeger - Luminita Voicu - maciejhirsz - Malhar Vora - Manohar Castelino - Marc Brooker - Marco Cali - Marco Vedovati - Markus Ziller - Masatoshi Higuchi - Massimiliano Torromeo - Matias Teragni - Matt Wilson - Matthew Buckingham-Bishop - Matthew Schlebusch - Max Wittek - Mehrdad Arshad Rad - Michael Jarrett <1077485+EmeraldShift@users.noreply.github.com> - Michael Saah - Mihai Stan - milahu - moricho - Muki Kiboigo - Muskaan Singla - Narek Galstyan - Nathan Hoang - Nathan Sizemore - Nicolas Mesa - NikeNano - Nikita Kalyazin - Nikita Zakirov - Nikolay Edigaryev - Noah Meyerhans - not required - one230six <723682061@qq.com> - Pablo Barbáchano - Patrick Roy - Paweł Bęza - Peng Tao - Penny Zheng - Peter Hrvola - Petre Eftime - Petros Angelatos - Piyush Jain - python273 - Radu Iliescu - Radu Matei Lăcraru - Radu Weiss - Ram Sripracha - rares - razn - Ria - Riccardo Mancini - Richard Case - River Phillips - Rob Devereux - Robert Grimes - Rodrigue Chakode - Rolf Neugebauer - Ryan Russell - Ryan Summerlin - Sam Jackson - Samuel Knox - Samuel Ortiz - ScmTble - seafoodfry <99568361+seafoodfry@users.noreply.github.com> - Sean Lavine - Sebastien Boeuf - Serban Iorga - shakram02 - Shen Jiale - Sheng-Wei (Way) Chen - Shion Yamashita - singwm - sladynnunes - Sripracha - Stefan Nita <32079871+stefannita01@users.noreply.github.com> - StemCll - Steven Wirges - Sudan Landge - sundar.preston.789@gmail.com - Takahiro Itazuri - Tal Hoffman - Tamio-Vesa Nakajima - tidux - Tim Bannister - Tim Deegan - timvisee - Tobias Pfandzelter - Tomas Valenta - tommady - Tomoya Iwata - Trăistaru Andrei Cristian - Tyler Anton - Urvil Patel - Vibha Acharya - Volker Simonis - Wei Yang - Weixiao Huang - Wesley Norris - William Hammond - wllenyj - wt-l00 - Xiangpeng Hao - xibz - xiekeyang - Ye Sijun - Yichen Zhang - YLyu - YUAN LYU - Yuval Kohavi - Yılmaz ŞEN - Zhenyu Qi - Zi Shen Lim - Zicklag - Дамјан Георгиевски ================================================ FILE: Cargo.toml ================================================ [workspace] members = ["src/*"] # We exclude the jailer below so that it does not get build by default. This is because "cargo build" compiles # for the gnu target, and the jailer needs a statically compiled binary to work correctly. # See https://github.com/firecracker-microvm/firecracker/commit/3bf285c8f8a815149923c562dd7edaffcaf10c4e # and https://github.com/firecracker-microvm/firecracker/issues/2102 default-members = [ "src/clippy-tracing", "src/cpu-template-helper", "src/firecracker", "src/rebase-snap", "src/seccompiler", "src/snapshot-editor", "src/acpi-tables", ] resolver = "2" [workspace.lints.rust] missing_debug_implementations = "warn" unexpected_cfgs = { level = "warn", check-cfg = ['cfg(kani)'] } [workspace.lints.clippy] ptr_as_ptr = "warn" undocumented_unsafe_blocks = "warn" cast_possible_truncation = "warn" cast_possible_wrap = "warn" cast_sign_loss = "warn" exit = "warn" tests_outside_test_module = "warn" assertions_on_result_states = "warn" error_impl_error = "warn" or_fun_call = "warn" [profile.dev] panic = "abort" [profile.release] panic = "abort" lto = true strip = "none" [profile.bench] strip = "debuginfo" ================================================ FILE: DEPRECATED.md ================================================ # Deprecated Features The following functionality of Firecracker is deprecated, and will be removed in a future major Firecracker release, in accordance with our [release policy](docs/RELEASE_POLICY.md). - \[[#2763](https://github.com/firecracker-microvm/firecracker/pull/2763)\] The `vsock_id` body field in `PUT` requests on `/vsock` - \[[#2980](https://github.com/firecracker-microvm/firecracker/pull/2980)\] The `mem_file_path` body field in `PUT` requests on `/snapshot/load` - \[[#2973](https://github.com/firecracker-microvm/firecracker/pull/2973)\] MicroVM Metadata Service v1 (MMDSv1) - \[[#4126](https://github.com/firecracker-microvm/firecracker/pull/4126)\] Static CPU templates - \[[#4209](https://github.com/firecracker-microvm/firecracker/pull/4209)\] The `rebase-snap` tool - \[[#4500](https://github.com/firecracker-microvm/firecracker/pull/4500)\] The `--start-time-cpu-us` and `--start-time-us` CLI arguments - \[[#4428](https://github.com/firecracker-microvm/firecracker/pull/4428)\] Booting microVMs using MPTable and command line parameters for VirtIO devices. The functionality is substituted with ACPI. - \[[#2628](https://github.com/firecracker-microvm/firecracker/pull/2628)\] The `--basic` parameter of `seccompiler-bin`. - \[[#5274](https://github.com/firecracker-microvm/firecracker/pull/5274)\]: The `enable_diff_snapshots` body field in `PUT` requests on `/snapshot/load` ================================================ FILE: FAQ.md ================================================ # Firecracker Frequently Asked Questions ## About Firecracker ### What is Firecracker? Firecracker is an open source Virtual Machine Monitor (VMM) that enables secure, multi-tenant, minimal-overhead execution of container and function workloads. ### Who developed Firecracker? Firecracker was built by developers at Amazon Web Services to enable services such as [AWS Lambda](https://aws.amazon.com/lambda/) and [AWS Fargate](https://aws.amazon.com/fargate/) to improve resource utilization and customer experience, while providing the security and isolation required of public cloud infrastructure. Firecracker started from Chromium OS's Virtual Machine Monitor, [crosvm](https://github.com/google/crosvm), an open source VMM written in Rust. Today, crosvm and Firecracker have diverged to serve very different customer needs. [Rust-vmm](https://github.com/rust-vmm) is an open source community where we collaborate with the crosvm maintainers and other groups and individuals to build and share quality Rust virtualization components. ### Why did you develop Firecracker? When we launched Lambda in November of 2014, we were focused on providing a secure [serverless](https://aws.amazon.com/serverless/) experience. At launch we used per-customer EC2 instances to provide strong security and isolation between customers. As Lambda grew, we saw the need for technology to provide a highly secure, flexible, and efficient runtime environment for services like Lambda and Fargate. Using our experience building isolated EC2 instances with hardware virtualization technology, we started an effort to build a VMM that was tailored to integrate with container ecosystems. ### What processors does Firecracker support? The Firecracker VMM is built to be processor agnostic. Intel, AMD and 64 bit ARM processors are supported for production workloads. You can find more details [here](README.md#supported-platforms). ### Can Firecracker be used within the container ecosystem? Yes. Firecracker is integrated with [Kata Containers](https://github.com/kata-containers/documentation/wiki/Initial-release-of-Kata-Containers-with-Firecracker-support), [Weave FireKube](https://www.weave.works/oss/firekube/) (via [Weave Ignite](https://github.com/weaveworks/ignite)), and containerd via [firecracker-containerd](https://github.com/firecracker-microvm/firecracker-containerd). We welcome contributions that enable Firecracker to integrate naturally with the container ecosystem and provide more choices in how container workloads are isolated. ### What is the difference between Firecracker and QEMU? Firecracker is an [alternative to QEMU](https://www.redhat.com/en/blog/all-you-need-know-about-kvm-userspace) that is purpose-built for running serverless functions and containers safely and efficiently, and nothing more. Firecracker is written in Rust, provides a minimal required device model to the guest operating system while excluding non-essential functionality (only 6 emulated devices are available: virtio-net, virtio-balloon, virtio-block, virtio-vsock, serial console, and a minimal keyboard controller used only to stop the microVM). This, along with a streamlined kernel loading process enables a < 125 ms startup time and a < 5 MiB memory footprint. The Firecracker process also provides a RESTful control API, handles resource rate limiting for microVMs, and provides a microVM metadata service to enable the sharing of configuration data between the host and guest. ### What operating systems are supported by Firecracker? Firecracker supports Linux host and guest operating systems as well as [OSv](http://blog.osv.io/blog/2019/04/19/making-OSv-run-on-firecraker/) guests. Currently supported host/guest kernel versions can be found in the [kernel support policy](docs/kernel-policy.md). ### What is the open source license for Firecracker? Firecracker is licensed under the Apache License, version 2.0, allowing you to freely use, copy, and distribute your changes under the terms of your choice. [Read more about Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0). Crosvm code sections are licensed under a [BSD-3-Clause license](https://opensource.org/licenses/BSD-3-Clause) that also allows you to use, copy, and distribute your changes under the terms of your choice. ### How can I contribute? Firecracker is an AWS open source project that encourages contributions from customers and the developer community. Any contribution is welcome as long as it aligns with our [charter](CHARTER.md). You can learn more about how to contribute in [CONTRIBUTING.md](CONTRIBUTING.md). You can chat with others in the community on the [Firecracker Slack workspace](https://firecracker-microvm.slack.com). ### How is Firecracker project governed? The Firecracker [team at Amazon Web Services](MAINTAINERS.md) owns project maintainer responsibilities, permissions to merge pull requests, and the ability to create new Firecracker releases. ## Technical FAQ & Troubleshooting ### Can I emulate a different architecture in the guest than the one on the host? Guest operating systems must be built for the same CPU architecture as the host on which it will run. Firecracker does not support running microVMs on any architecture other than the one the host is running on. In other words, running an OS built for a `x86_64` on an `aarch64` system will not work, and vice versa. ### I tried using an initrd for boot but it doesn't seem to be used. Is initrd supported? Initrds are only recently supported in Firecracker. If your release predates issue [#228](https://github.com/firecracker-microvm/firecracker/issues/208) being resolved, please update. ### Firecracker is not showing any output on the console. In order to debug the issue, check the response of the `InstanceStart` API request. Possible responses: - **Error**: Submit a new issue with the label "Support: Failure". - **Success**: If the boot was successful, you should get a response with 204 as the status code. If you have no output in the console, most likely you will have to update the kernel command line. By default, Firecracker starts with the serial console disabled for boot time performance reasons. Example of a kernel valid command line that enables the serial console (which goes in the `boot_args` field of the `/boot-source` Firecracker API resource): ```console console=ttyS0 reboot=k panic=1 nomodule ``` ### How can I configure multiple Ethernet devices through the kernel command line? The `ip=` boot param in the linux kernel only actually supports configuring a single interface. Multiple interfaces can be set up in Firecracker using the API, but guest IP configuration at boot time through boot arguments can only be done for a single interface. ### My guest wall-clock is drifting, how can I fix it? The canonical solution is to use NTP in your guests. However, if you want to run Firecracker at scale, we suggest using a PTP emulated device as the guest's NTP time source so as to minimize network traffic and resource overhead. With this solution the guests will constantly update time to stay in sync with host wall-clock. They do so using cheap para-virtualized calls into kvm ptp instead of actual network NTP traffic. To be able to do this you need to have a guest kernel compiled with `KVM_PTP` support: ```console CONFIG_PTP_1588_CLOCK=y CONFIG_PTP_1588_CLOCK_KVM=y ``` Our [recommended x86_64 guest kernel config](resources/guest_configs) already has these included. Now `/dev/ptp0` should be available in the guest. Next you need to configure `/dev/ptp0` as a NTP time source. For example when using `chrony`: 1. Add `refclock PHC /dev/ptp0 poll 3 dpoll -2 offset 0` to the chrony conf file (`/etc/chrony/chrony.conf`) 1. Restart the `chrony` daemon. You can see more info about the `refclock` parameters [here](https://chrony-project.org/doc/3.4/chrony.conf.html#refclock). Adjust them according to your needs. ### Each Firecracker opens 20+ file descriptors. Is this an issue? The relatively high FD usage is expected and correct. Firecracker heavily relies on event file descriptors to drive device emulation. ### How can I gracefully reboot the guest? How can I gracefully poweroff the guest? Regardless of architecture, Firecracker does not currently support guest reboot. On **ARM**, issuing either `poweroff` or `reboot` inside a Linux guest results in a graceful system shutdown and the termination of the Firecracker process. This works because KVM emulates the PSCI interface for power management and notifies Firecracker when the guest tries to change the power state of the virtual machine. On **x86**, Firecracker does not virtualize power management (e.g. there is no ACPI PM support). Consequently: - `poweroff`: This will shut down the guest OS, but because the guest has no way of requesting a power-off, the Firecracker process will remain alive. - `reboot`: Running reboot will successfully terminate the Firecracker process if the guest is booted with `reboot=k` in the kernel command line. This option instructs Linux to reset the CPU(s) via the i8042 (keyboard controller) reset line when rebooting. Firecracker, which emulates the i8042 controller, intercepts the reset command and terminates the process. - API Command: Issuing a `SendCtrlAltDel` action via the Firecracker API injects a `Ctrl+Alt+Del` keyboard sequence into the guest. Depending on the guest configuration (see ctrl-alt-del.target on systemd systems), this typically triggers the same reboot behavior described above. ### How can I create my own rootfs or kernel images? Check out our [rootfs and kernel image creation guide](docs/rootfs-and-kernel-setup.md). ### We are seeing page allocation failures from Firecracker in the `dmesg` output. If you see errors like ... ```console [] fc_vmm: page allocation failure: order:6, mode:0x140c0c0 (GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null) [] fc_vmm cpuset= mems_allowed=0 ``` ... then your host is running out of memory. KVM is attempting to do an allocation of 2^`order` bytes (in this case, 6) and there aren't sufficient contiguous pages. Possible mitigations are: - Reduce memory pressure on the host. - Maybe the host has memory but it's too fragmented for the kernel to use. The allocation above of order 6 means the kernel could not find 2^6 **consecutive** pages. One way to mitigate memory fragmentation is to [set a higher value](https://linuxhint.com/vm_min_free_kbytes_sysctl/) for `vm.min_free_kbytes` - Or investigate other [mitigations](https://savvinov.com/2019/10/14/memory-fragmentation-the-silent-performance-killer/) ### How can I configure and start a microVM without sending API calls? Passing an optional command line parameter, `--config-file`, to the Firecracker process allows this type of configuration. This parameter must be the path to a file that contains the JSON specification that will be used to configure and start the microVM. One example of such file can be found at `tests/framework/vm_config.json`. ### Firecracker fails to start and returns an Out of Memory error If the Firecracker process exits with `12` exit code (`Out of memory` error), the root cause is that there is not enough memory on the host to be used by the Firecracker microVM. If the microVM was not configured in terms of memory size through an API request, the host needs to meet the minimum requirement in terms of free memory size, namely 128 MB of free memory which the microVM defaults to. This may be related to "We are seeing page allocation failures ..." above. To validate, run this: ```sh sudo dmesg | grep "page allocation failure" ``` ### Firecracker fails to start and returns "Resource busy" error If another hypervisor like VMware or VirtualBox is running on the host and locks `/dev/kvm`, Firecracker process will fail to start with "Resource busy" error. This issue can be resolved by terminating the other hypervisor running on the host, and allowing Firecracker to start. ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: MAINTAINERS.md ================================================ # Maintainers Firecracker is maintained by a dedicated team within Amazon: - Babis Chalios - Egor Lazarchuk - Ilias Stamatis - Jack Thomson - Marco Cali - Nikita Kalyazin - Riccardo Mancini - Takahiro Itazuri ================================================ FILE: NOTICE ================================================ Firecracker Copyright 2017-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. SPDX-License-Identifier: Apache-2.0 Portions Copyright 2017 The Chromium OS Authors. All rights reserved. Use of this source code is governed by a BSD-style license that can be found in the THIRD-PARTY file. The Firecracker release bundle includes libseccomp which is available under the LGPLv2.1 license. This is used in the Firecracker build process to produce cBPF bytecode that is shipped alongside Firecracker for use by the Linux kernel. ================================================ FILE: PGP-KEY.asc ================================================ -----BEGIN PGP PUBLIC KEY BLOCK----- mDMEXoNLVhYJKwYBBAHaRw8BAQdA/RBM+jgzq6EXzTc7zDmDgcSNENKJqZ7djI7G Iz1Os720TkZpcmVjcmFja2VyIFNlY3VyaXR5IERpc2Nsb3N1cmVzIDxmaXJlY3Jh Y2tlci1zZWN1cml0eS1kaXNjbG9zdXJlc0BhbWF6b24uY29tPoiZBBMWCABBAhsD BQkJZgGABQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAFiEEp07t53fN17pTgiajKCi5 jaKQW+AFAl6DTtACGQEACgkQKCi5jaKQW+BnBwEA/+VBs/9t5aUBb20HZMSYL3tu Ffh+fI1FSOsMSjXfAe8A/2MlxfdDdYtAhxwwN1Y+hGrm/PdT1LULJqoz14r3xUUJ tCVCYXJidSwgSXVsaWFuIE1hcmlhbiA8aXVsQGFtYXpvbi5jb20+iHgEMBYIACAW IQSnTu3nd83XulOCJqMoKLmNopBb4AUCXoNO2QIdAAAKCRAoKLmNopBb4EHTAQDS ZzcN7MU7mtM79+ev9aoX0/OltBPq/QQy22qa3wbCwQD/cd1nXu3pKUKvyHtTpVFB Jd+YNpG2ox3e3SfVZyXeRgiIlgQTFggAPgIbAwUJCWYBgAULCQgHAgYVCgkICwIE FgIDAQIeAQIXgBYhBKdO7ed3zde6U4ImoygouY2ikFvgBQJeg07QAAoJECgouY2i kFvgKggBAIUpB5stOoo/NjSMwpTMwa/jiqephv/GarCrluYRbQ/aAQDTn7aCX4ab b/vqLjiePos5tuToBoRXkL8QzdGyeONpBrQsQ29tcHV0ZSBDYXBzdWxlIDxjb21w dXRlLWNhcHN1bGVAYW1hem9uLmNvbT6IlgQTFggAPhYhBKdO7ed3zde6U4Imoygo uY2ikFvgBQJeg09JAhsDBQkJZgGABQsJCAcCBhUKCQgLAgQWAgMBAh4BAheAAAoJ ECgouY2ikFvgyNcBAKSacUhi9Kb4C7Ybdzu4EKgQxfKnBucoubonF658K/GVAQDA d0sVeTVu3g/Z/7DHCBg8Jn+LSl3fCoEUeK4Z2fBeAbg4BF6DS1YSCisGAQQBl1UB BQEBB0B8rM1FwSBZzXAg2IxqJ8qUkwMVorbXlA+p/MNXQdeACAMBCAeIfgQYFggA JhYhBKdO7ed3zde6U4ImoygouY2ikFvgBQJeg0tWAhsMBQkJZgGAAAoJECgouY2i kFvgz8sA/jz0dUAwtTMtkdvjP4Dewrq1d8o7sgy1wV5Ax/a20DFyAQCJQIMf4MtF jrojJjAIHRUlukf4VVEtGS7IroK56JS3Bw== =1xBx -----END PGP PUBLIC KEY BLOCK----- ================================================ FILE: README.md ================================================ Firecracker Logo Title Our mission is to enable secure, multi-tenant, minimal-overhead execution of container and function workloads. Read more about the Firecracker Charter [here](CHARTER.md). ## What is Firecracker? Firecracker is an open source virtualization technology that is purpose-built for creating and managing secure, multi-tenant container and function-based services that provide serverless operational models. Firecracker runs workloads in lightweight virtual machines, called microVMs, which combine the security and isolation properties provided by hardware virtualization technology with the speed and flexibility of containers. ## Overview The main component of Firecracker is a virtual machine monitor (VMM) that uses the Linux Kernel Virtual Machine (KVM) to create and run microVMs. Firecracker has a minimalist design. It excludes unnecessary devices and guest-facing functionality to reduce the memory footprint and attack surface area of each microVM. This improves security, decreases the startup time, and increases hardware utilization. Firecracker has also been integrated in container runtimes, for example [Kata Containers](https://github.com/kata-containers/kata-containers) and [Flintlock](https://github.com/liquidmetal-dev/flintlock). Firecracker was developed at Amazon Web Services to accelerate the speed and efficiency of services like [AWS Lambda](https://aws.amazon.com/lambda/) and [AWS Fargate](https://aws.amazon.com/fargate/). Firecracker is open sourced under [Apache version 2.0](LICENSE). To read more about Firecracker, check out [firecracker-microvm.io](https://firecracker-microvm.github.io). ## Getting Started To get started with Firecracker, download the latest [release](https://github.com/firecracker-microvm/firecracker/releases) binaries or build it from source. You can build Firecracker on any Unix/Linux system that has Docker running (we use a development container) and `bash` installed, as follows: ```bash git clone https://github.com/firecracker-microvm/firecracker cd firecracker tools/devtool build toolchain="$(uname -m)-unknown-linux-musl" ``` The Firecracker binary will be placed at `build/cargo_target/${toolchain}/debug/firecracker`. For more information on building, testing, and running Firecracker, go to the [quickstart guide](docs/getting-started.md). The overall security of Firecracker microVMs, including the ability to meet the criteria for safe multi-tenant computing, depends on a well configured Linux host operating system. A configuration that we believe meets this bar is included in [the production host setup document](docs/prod-host-setup.md). ## Contributing Firecracker is already running production workloads within AWS, but it's still Day 1 on the journey guided by our [mission](CHARTER.md). There's a lot more to build and we welcome all contributions. To contribute to Firecracker, check out the development setup section in the [getting started guide](docs/getting-started.md) and then the Firecracker [contribution guidelines](CONTRIBUTING.md). ## Releases New Firecracker versions are released via the GitHub repository [releases](https://github.com/firecracker-microvm/firecracker/releases) page, typically every two or three months. A history of changes is recorded in our [changelog](CHANGELOG.md). The Firecracker release policy is detailed [here](docs/RELEASE_POLICY.md). ## Design Firecracker's overall architecture is described in [the design document](docs/design.md). ## Features & Capabilities Firecracker consists of a single micro Virtual Machine Manager process that exposes an API endpoint to the host once started. The API is [specified in OpenAPI format](src/firecracker/swagger/firecracker.yaml). Read more about it in the [API docs](docs/api_requests). The **API endpoint** can be used to: - Configure the microvm by: - Setting the number of vCPUs (the default is 1). - Setting the memory size (the default is 128 MiB). - Configuring a [CPU template](docs/cpu_templates/cpu-templates.md). - Add one or more network interfaces to the microVM. - Add one or more read-write or read-only disks to the microVM, each represented by a file-backed block device. - Trigger a block device re-scan while the guest is running. This enables the guest OS to pick up size changes to the block device's backing file. - Change the backing file for a block device, before or after the guest boots. - Configure rate limiters for virtio devices which can limit the bandwidth, operations per second, or both. - Configure the logging and metric system. - `[BETA]` Configure the data tree of the guest-facing metadata service. The service is only available to the guest if this resource is configured. - Add a [vsock socket](docs/vsock.md) to the microVM. - Add a [entropy device](docs/entropy.md) to the microVM. - Add a [pmem device](docs/pmem.md) to the microVM. - Configure and manage [memory hotplugging](docs/memory-hotplug.md). - Start the microVM using a given kernel image, root file system, and boot arguments. - [x86_64 only] Stop the microVM. **Built-in Capabilities**: - Demand fault paging and CPU oversubscription enabled by default. - Advanced, thread-specific seccomp filters for enhanced security. - [Jailer](docs/jailer.md) process for starting Firecracker in production scenarios; applies a cgroup/namespace isolation barrier and then drops privileges. ## Tested platforms We test all combinations of: | Instance | Host OS & Kernel | Guest Rootfs | Guest Kernel | | :------------------------------------------ | :--------------- | :----------- | :----------- | | m5n.metal (Intel Cascade Lake) | al2 linux_5.10 | ubuntu 24.04 | linux_5.10 | | m6i.metal (Intel Ice Lake) | al2023 linux_6.1 | | linux_6.1 | | m7i.metal-24xl (Intel Sapphire Rapids) | | | | | m7i.metal-48xl (Intel Sapphire Rapids) | | | | | **m8i.metal-48xl (Intel Granite Rapids)\*** | | | | | **m8i.metal-96xl (Intel Granite Rapids)\*** | | | | | m6a.metal (AMD Milan) | | | | | m7a.metal-48xl (AMD Genoa) | | | | | m6g.metal (Graviton 2) | | | | | m7g.metal (Graviton 3) | | | | | m8g.metal-24xl (Graviton 4) | | | | | m8g.metal-48xl (Graviton 4) | | | | **\***: We **only** support AWS EC2 8th Gen Intel (\*8i) instances using a 6.1 host kernel. This is due to poor kernel support for Granite Rapids CPUs on 5.10. ## Known issues and Limitations - The `pl031` RTC device on aarch64 does not support interrupts, so guest programs which use an RTC alarm (e.g. `hwclock`) will not work. ## Performance Firecracker's performance characteristics are listed as part of the [specification documentation](SPECIFICATION.md). All specifications are a part of our commitment to supporting container and function workloads in serverless operational models, and are therefore enforced via continuous integration testing. ## Policy for Security Disclosures The security of Firecracker is our top priority. If you suspect you have uncovered a vulnerability, contact us privately, as outlined in our [security policy document](SECURITY.md); we will immediately prioritize your disclosure. ## FAQ & Contact Frequently asked questions are collected in our [FAQ doc](FAQ.md). You can get in touch with the Firecracker community in the following ways: - Security-related issues, see our [security policy document](SECURITY.md). - Chat with us on our [Slack workspace](https://join.slack.com/t/firecracker-microvm/shared_invite/zt-2tc0mfxpc-tU~HYAYSzLDl5XGGJU3YIg) _Note: most of the maintainers are on a European time zone._ - Open a GitHub issue in this repository. - Email the maintainers at [firecracker-maintainers@amazon.com](mailto:firecracker-maintainers@amazon.com). When communicating within the Firecracker community, please mind our [code of conduct](CODE_OF_CONDUCT.md). ================================================ FILE: SECURITY.md ================================================ ## Reporting Security Issues We take all security reports seriously. When we receive such reports, we will investigate and subsequently address any potential vulnerabilities as quickly as possible. If you discover a potential security issue in this project, please notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/) or directly via email to [AWS Security](mailto:aws-security@amazon.com). Please do *not* create a public GitHub issue in this project. ================================================ FILE: SPECIFICATION.md ================================================ # Specification The specifications below quantify Firecracker's promise to enable minimal-overhead execution of container and serverless workloads. These specifications are enforced by integration tests (that run for each PR and main branch merge). On an [M5D.metal instance][1] (with hyperthreading disabled) and an [M6G.metal instance][2] and given host system resources are available (e.g., there are enough free CPU cycles, there is enough RAM, etc.), customers can rely on the following: 1. **Stability:** The Firecracker virtual machine manager starts (up to API socket availability) within `8 CPU ms`[^1] and never crashes/halts/terminates for internal reasons once started. _Note_: The wall-clock time has a large standard deviation, spanning `6 ms to 60 ms`, with typical durations around `12 ms`. 1. **Failure Information:** When failures occur due to external circumstances, they are logged[^2] by the Firecracker process. 1. **API Stability:** The API socket is always available and the API conforms to the in-tree [Open API specification](src/firecracker/swagger/firecracker.yaml). API failures are logged in the Firecracker log. 1. **Overhead:** For a Firecracker virtual machine manager running a microVM with `1 CPUs and 128 MiB of RAM`, and a guest OS with the Firecracker-tuned kernel: - Firecracker's virtual machine manager threads have a memory overhead `<= 5 MiB`. The memory overhead is dependent on the **workload** (e.g. a workload with multiple [vsock](docs/vsock.md) connections might generate a memory overhead > 5MiB) and on the VMM **configuration** (the overhead does not include the memory used by the [MMDS](docs/mmds/mmds-design.md) data store. The overhead is tested as part of the Firecracker CI using a [memory cop](tests/host_tools/memory.py). - It takes `<= 125 ms` to go from receiving the Firecracker InstanceStart API call to the start of the Linux guest user-space `/sbin/init` process. The boot time is measured using a VM with the serial console disabled and a minimal kernel and root file system. For more details check the [boot time](tests/integration_tests/performance/test_boottime.py) integration tests. - The compute-only guest CPU performance is `> 95%` of the equivalent bare-metal performance. _`[integration test pending]`_ 1. **IO Performance:** With a host CPU core dedicated to the Firecracker device emulation thread, - the guest achieves up to `14.5 Gbps` network throughput by using `<= 80%` of the host CPU core for emulation. _`[integration test pending]`_ - the guest achieves up to `25 Gbps` network throughput by using `100%` of the host CPU core for emulation. _`[integration test pending]`_ - the virtualization layer adds on average `0.06ms` of latency. _`[integration test pending]`_ [See further details on network performance](docs/network-performance.md) - the guest achieves up to `1 GiB/s` storage throughput by using `<= 70%` of the host CPU core for emulation. _`[integration test pending]`_ 1. **Telemetry:** Firecracker emits logs and metrics to the named pipes passed to the logging API. Any logs and metrics emitted while their respective pipes are full will be lost. Any such events will be signaled through the `lost-logs` and `lost-metrics` counters. [^1]: CPU ms are actual ms of a user space thread's on-CPU runtime; useful for getting consistent measurements for some performance metrics. [^2]: No logs are currently produced in the span of time between the `jailer` process start-up and the logging system initialization in the `firecracker` process. [1]: https://aws.amazon.com/ec2/instance-types/m5/ [2]: https://aws.amazon.com/ec2/instance-types/m6/ ================================================ FILE: THIRD-PARTY ================================================ // Copyright 2017 The Chromium OS Authors. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ================================================ FILE: deny.toml ================================================ [licenses] version = 2 allow = [ "MIT", "Apache-2.0", "BSD-3-Clause", "ISC", "Unicode-3.0", "OpenSSL" ] [[bans.deny]] name = "serde_derive" version = ">1.0.171, < 1.0.184" ================================================ FILE: docs/RELEASE_POLICY.md ================================================ # Firecracker Release Policy This document describes Firecracker release planning, API support, and the Firecracker release lifetime. Firecracker provides this Release Policy to help customers effectively plan their Firecracker based operations. ## Firecracker releases Firecracker uses [semantic versioning 2.0.0](https://semver.org/spec/v2.0.0.html) for all releases. By definition, the API version implemented by a Firecracker binary is equivalent to that binary’s version. Semantic versions are comprised of three fields in the form: `vMAJOR.MINOR.PATCH`. Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format. For example: v0.20.0, v0.22.0-beta5, and v99.123.77+foo.bar.baz.5. Firecracker publishes major, minor and patch releases: - Patch release - The `PATCH` field is incremented whenever critical bugs and/or security issues are found in a supported release. The fixes in a PATCH release do not change existing behavior or the user interface. Upgrade is recommended. - Minor release - When the `MINOR` field is incremented, the new release adds new features, bug fixes, or both without changing the existing user interface or user facing functionality. Adding new APIs can be done in a `MINOR` Firecracker release as long as it doesn’t change the functionality of the APIs available in the previous release. Minor releases are shipped when features are ready for production. Multiple features may be bundled in the same release. - Major release - When the `MAJOR` field is incremented, the new release adds new features and/or bug fixes, changing the existing user interface or user facing functionality. This may make the new release it incompatible with previous ones. A major release will likely require changes from other components interacting with Firecracker, e.g. API request, commands, or guest components. The changes will be detailed in the release notes. Major releases are published whenever features or bug fixes that changes the existing user interface, or user facing functionality, are ready for production. ## Release support The Firecracker maintainers will only provide support for Firecracker releases under our [repository's release page](https://github.com/firecracker-microvm/firecracker/releases). The Firecracker maintainers will provide patch releases for critical bugs and security issues when they are found, for: - the last two Firecracker `vMAJOR.MINOR` releases for up to 1 year from release date; - any Firecracker `vMAJOR.MINOR` release for at least 6 months from release date; - for each `vMAJOR`, the latest `MINOR` for 1 year since release date; Starting with release v1.0, for each major and minor release, we will also be specifying the supported kernel versions. ### Examples 1. Considering an example where the last Firecracker releases are: - v2.10.0 released on 2022-05-01 - v2.11.0 released on 2022-07-10 - v2.12.0 released on 2022-09-11 In case of an event occurring in 2022-10-03, all three releases will be patched since less than 6 months elapsed from their MINOR release time. 1. Considering an example where the last Firecracker releases are: - v2.10.0 released on 2022-05-01 - v2.11.0 released on 2022-07-10 - v2.12.0 released on 2022-09-11 In case of of an event occurring in 2023-05-04, v2.11 and v2.12 will be patched since those were the last 2 Firecracker major releases and less than an year passed since their release time. 1. Considering an example where the last Firecracker releases are: - v2.14.0 released on 2022-05-01 - v3.0.0 released on 2022-07-10 - v3.1.0 released on 2022-09-11 In case of of an event occurring in 2023-01-13, v2.14 will be patched since is the last minor of v2 and has less than one year since release while v3.0 and v3.1 will be patched since were the last two Firecracker releases and less than 6 months have passed since release time. ## Release Status | Release | Release Date | Latest Patch | Min. end of support | Official end of Support | | ------: | -----------: | -----------: | ------------------: | :------------------------------ | | v1.15 | 2026-03-09 | v1.15.0 | 2026-09-09 | Supported | | v1.14 | 2025-12-17 | v1.14.3 | 2026-06-17 | Supported | | v1.13 | 2025-08-28 | v1.13.2 | 2026-02-28 | 2026-03-09 (v1.15 released) | | v1.12 | 2025-05-07 | v1.12.1 | 2025-11-07 | 2025-12-17 (v1.14 released) | | v1.11 | 2025-03-18 | v1.11.0 | 2025-09-18 | 2025-09-18 (end of 6mo support) | | v1.10 | 2024-11-07 | v1.10.1 | 2025-05-07 | 2025-05-07 (v1.12 released) | | v1.9 | 2024-09-02 | v1.9.1 | 2025-03-02 | 2025-03-18 (v1.11 released) | | v1.8 | 2024-07-10 | v1.8.0 | 2025-01-10 | 2025-01-10 (end of 6mo support) | | v1.7 | 2024-03-18 | v1.7.0 | 2024-09-18 | 2024-09-18 (end of 6mo support) | | v1.6 | 2023-12-20 | v1.6.0 | 2024-06-20 | 2024-07-10 (v1.8 released) | | v1.5 | 2023-10-09 | v1.5.1 | 2024-04-09 | 2024-04-09 (end of 6mo support) | | v1.4 | 2023-07-20 | v1.4.1 | 2024-01-20 | 2024-01-20 (end of 6mo support) | | v1.3 | 2023-03-02 | v1.3.3 | 2023-09-02 | 2023-10-09 (v1.5 released) | | v1.2 | 2022-11-30 | v1.2.1 | 2023-05-30 | 2023-07-20 (v1.4 released) | | v1.1 | 2022-05-06 | v1.1.4 | 2022-11-06 | 2023-03-02 (v1.3 released) | | v1.0 | 2022-01-31 | v1.0.2 | 2022-07-31 | 2022-11-30 (v1.2 released) | | v0.25 | 2021-03-13 | v0.25.2 | 2021-09-13 | 2022-03-13 (end of 1y support) | ## API support The Firecracker API follows the semantic versioning standard. For a new release, we will increment the: - MAJOR version when we make breaking changes in our API; - MINOR version when we add or change functionality in a backwards compatible manner; - PATCH version when we make backwards compatible bug fixes. Given a Firecracker version X.Y.Z user-generated client, it is guaranteed to work as expected with all Firecracker binary versions X.V.W, where V >= Y. ### Deprecation of elements in the API Firecracker uses [semantic versioning 2.0.0](https://semver.org/spec/v2.0.0.html) in terms of deprecating and removing API elements. We will consider a deprecated API element to be an element which still has backing functionality and will be supported at least until the next MAJOR version, where they _will be removed_. The support period of deprecated API elements is tied to [the Firecracker release support](https://github.com/firecracker-microvm/firecracker/blob/main/docs/RELEASE_POLICY.md#release-support). ## Developer preview features A feature is "in" developer preview if it’s marked as such in the [Firecracker roadmap](https://github.com/orgs/firecracker-microvm/projects/42) and/or in the [Firecracker release notes](https://github.com/firecracker-microvm/firecracker/releases). Features in developer preview should not be used in production as they are not supported. Firecracker team may not provide patch releases for critical bug fixes or security issues found in features marked as developer preview. Features in developer preview may be subject to changes at any time. Changes in existing user interface or user facing functionality of a feature marked as developer preview can be released without changing the major version. ## Release planning Firecracker feature planning is outlined in the [Firecracker roadmap](https://github.com/firecracker-microvm/firecracker/projects). ================================================ FILE: docs/api-change-runbook.md ================================================ # Runbook for Firecracker API changes This runbook will cover triaging API changes and ways to implement them appropriately. ## Definitions - *Deprecated* - We will consider a deprecated API element (endpoint and/or parts of an endpoint) to be an element which still provides users with access to its backing functionality and can be used, but will soon be removed completely along with said functionality in an upcoming version. - *Mandatory endpoint* - We will consider an endpoint mandatory if Firecracker cannot operate normally without performing a request to it. - *Optional endpoint* - We will consider an endpoint optional if Firecracker can operate normally without performing a request to it and the functionality behind it is not essential. - *Mandatory header/field* - We will consider a header/field mandatory in an HTTP message if the request will fail without specifying said header/field. - *Optional header/field* - We will consider a header/field optional in an HTTP message if the request will succeeds without specifying said header/field. ## Triaging API changes For the purposes of this document, there are 2 main categories for API changes, namely *breaking* and *non-breaking*. ### What is a breaking change? A breaking change in the API is a change that makes the API incompatible with the previous version (backwards incompatible). In an effort to avoid a breaking change, we may take the route of deprecation and incrementing the minor version in an effort to preserve backwards compatibility, but breaking changes will always ultimately result in incrementing the major version. Here is a non-exhaustive list of such changes: 1. Adding a new mandatory endpoint/HTTP method. 1. Removing an endpoint/method. 1. Adding a mandatory request header/field. 1. Removing a request header/field. 1. Adding a mandatory response field. 1. Removing a response header/field. ### What is NOT a breaking change? A change in the API is not a breaking change if the version resulting from it is compatible with the previous one (backwards compatible). The outcome of a non-breaking change should always include incrementing the minor version but must not lead to incrementing the major version by itself. Here is a non-exhaustive list of such changes: 1. Deprecating an endpoint/method/field. 1. Adding a new optional endpoint/method. 1. Adding an optional request header/field. 1. Adding a response header. 1. Adding additional valid inputs for fields in API requests. 1. Making mandatory headers/fields optional. 1. Making mandatory endpoints optional. 1. Changing the URI of an endpoint. 1. Changing the metrics output format. ## Implementing API changes API changes result in version increases. As Firecracker’s support policy is based on [semantic versioning 2.0.0][1], we will look at API changes from this point of view. > Given a version number MAJOR.MINOR.PATCH, increment the: MAJOR version when > you make incompatible API changes; MINOR version when you add functionality in > a backwards compatible manner; PATCH version when you make backwards > compatible bug fixes. ![Flowchart for changing the Firecracker API](images/api_change_flowchart.png?raw=true "Flowchart for changing the Firecracker API") *All deprecated endpoints are supported until at least the next major version release, where they may be _removed_.* ### How to follow the flowchart - with examples We will go through multiple types of API changes and provide ways to ensure we don’t break our backwards compatibility promise to our customers. The list is split into categories of components changed. - *Entire endpoints* - Adding an optional endpoint with new functionality - Increment minor version. - Adding a command line parameter - Increment minor version. - Removing an endpoint - Deprecate endpoint and increment minor version → Remove endpoint when incrementing major version. - Adding a mandatory endpoint - Increment major version. - *Request* - Adding an optional header/field - Increment minor version. - Renaming a header/field - Accept both names and deprecate the old one → Remove old name when incrementing major version. - Removing a header/field - Make said header/field optional → Remove header/field when incrementing major version. - Changing the URI of an endpoint - Redirect the old endpoint to the new one and deprecate the old one → Remove old endpoint when incrementing major version. - Adding a mandatory header/field - Increment major version. - *Response* - Adding a header/field - Create a new, separate endpoint with the changes and deprecate the old one → Remove old endpoint when incrementing major version. - Removing a header/field - Create a new, separate endpoint with the changes and deprecate the old one → Remove old endpoint when incrementing major version. - *Command line parameter* - Renaming a command line parameter - Accept both names and deprecate the old one → Remove old name when incrementing major version. - Changing expected value taken by a command line parameter - Accept both names and deprecate the old one → Remove old name when incrementing major version. In case the outlined solution for your case is not feasible (e.g. because of security concerns), break the glass and increment the major version. ## How to deprecate As outlined in the diagram above, sometimes we have to deprecate endpoints partially or entirely. In this section we will go through different situations where we have to deprecate something and ways of avoiding common pitfalls when doing so. ### Deprecating endpoints Some paths in the flowchart above lead to deprecation. Based on the initial conditions, there are 2 major cases where we need to deprecate an endpoint: - *Changing an existing endpoint* - Often happens because directly changing the endpoint would be a breaking change. - We usually create a clone of the old endpoint we want to deprecate and make the necessary changes to it. - We usually expose both endpoints in the next minor version while marking the old one as deprecated. - The old endpoint retains its previous name. When naming the new endpoint: - for HTTP endpoints we follow a “per-endpoint versioning” scheme; in cases where we can’t find a fitting name for the new endpoint, the simplest way forward is to take the old URI and append `/v2` to it. - for command line endpoints, we can usually find a different name for the new endpoint. - *Deprecating an endpoint without adding a replacement to it* - Often happens when we want to phase out a certain feature or functionality, but doing so immediately would be a breaking change. - We just mark the endpoint as deprecated. ### Keeping Swagger updated Make sure that any changes you make in the code are also reflected in the swagger specification. Some tips: - There is nothing in the swagger file that shows whether an endpoint is mandatory or optional, it’s all code logic. - Mandatory fields in a request or response body are marked with `required: true` in the swagger definition. All other fields are optional. - If you need to redirect an endpoint, you have to clone the old one under the new URI in the swagger specification. ### Marking endpoints as deprecated When marking: - an HTTP endpoint as deprecated: - Add a comment for the parsing function of the endpoint stating that it is deprecated. - Log a `warn!` message stating that the user accessed a deprecated endpoint. - Increment the `deprecatedHttpApi` metric. - Include the `Deprecated` header in the response. - a header field in an HTTP endpoint as deprecated: - Add a comment in the parsing function where we check the presence of the header stating that it is deprecated. - If the header is present, log a `warn!` message stating that the user used a deprecated field. - Increment the `deprecatedHttpApi` metric. - Include the Deprecated header in the response. - a command line parameter as deprecated: - Mention it is deprecated in the help message of the parameter in the argument parser. - Add it in the `warn_deprecated_parameters` function where we log it and increment the `deprecatedCmdLineApi` metric. ### Removing deprecated endpoints on a major release When doing a major release, the API can have breaking changes. This is the _only time_ where we can safely remove deprecated elements of the API. To remove a deprecated element of the API: - Remove the associated functionality from the codebase (usually in `vmm` or `mmds`); - Remove the parsing logic in `api_server`; - Remove any unit and integration tests associated with this element. ## Practical example of an API change In this guide we set out to remove the `vsock_id` field in `PUT`s on `/vsock`. This was implemented in [PR #2763][2] and we will go step by step through the changes in order to understand the process of changing something in the Firecracker API. - We go through the flowchart; we want to remove a field in the body of a HTTP request. So we follow the flowchart like this: - → Change an existing endpoint - → Request - → Remove header or field - → Make it optional - → Deprecate - → Increment minor version. - Now that we know we need to make the field optional and deprecate it, it’s time for the code changes (reference implementation in [this commit][3]). We go to the function in `api_server/src/requests` which is responsible for parsing this request, which is `parse_put_vsock` in this case, and do the following. - We find the associated `vmm_config` struct which `serde_json` uses for deserialization, in this case `VsockDeviceConfig`. - In the struct referenced above, we make the parameter optional by encapsulating it in an `Option` with `#[serde(default)]` and `#[serde(skip_serializing_if = "Option::is_none")]` so that we don’t break existing implementations, but we follow the new, desired usage of the endpoint. - After deserializing the body of the request into the struct, we check for the existence of the field we want to deprecate, in this case by calling `vsock_cfg.vsock_id.is_some()`. - If the field is there, we must mark this request as being deprecated, so we craft a deprecation message (`"PUT /vsock: vsock_id field is deprecated."`) and increment the deprecated HTTP API metric (`METRICS.deprecated_api.deprecated_http_api_calls.inc()`). - We create a new `ParsedRequest` where, if we marked the request as deprecated, we append the deprecation message into its `parsing_info` structure, in this case by calling `parsed_req.parsing_info().append_deprecation_message(msg)`. - Don’t forget to comment your code! Comments should reflect what is deprecated and clearly describe the code paths where you handle the deprecation case. - Add a unit test where you test your new code paths. - Fix all other failing unit tests. - Update the swagger file to reflect the change, in this case by removing the `vsock_id` field from the required parameter list in the `Vsock` definition and adding a description to it stating that it is deprecated since the current version. - Update any relevant documentation. - We update the python integration tests to reflect the change (reference implementation in [this commit][4]). - We refactor the relevant `tests/integration_tests/functional/test_api.py` test to use the artifact model instead of the fixture one. If the test already uses the artifact model, you can skip this step. - We make sure to run the test with the current build, as well as with future Firecracker versions by specifying the unreleased version in the `min_version` parameter of `artifacts.firecrackers()`. We do this in order to ensure that, when we create patch releases on older branches, we test the API with future binaries to enforce backwards compatibility. _Disclaimer_: This test will fail when running with the binary artifact fetched from S3 until you update the binary there with your current build. You should only do this once your PR has all necessary approves and this test is the last thing keeping it from getting merged. - We check that, when the deprecated field is present in the request, the `Deprecation` header is also present in the response by asserting `response.headers['deprecation']`. We do not also check that the header is not present when the field is not present because, in a future version, some other field may be deprecated in the same request and would return the header anyway, resulting in a fail in our test when it shouldn’t. - Fix all other failing integration tests. [1]: https://semver.org/spec/v2.0.0.html [2]: https://github.com/firecracker-microvm/firecracker/pull/2763 [3]: https://github.com/firecracker-microvm/firecracker/commit/83aa098245a42ad93a6b70ccd70ad593cf453a3c [4]: https://github.com/firecracker-microvm/firecracker/commit/472a81dbccd89562578919b76d87c30ee7db17aa ================================================ FILE: docs/api_requests/actions.md ================================================ # Actions API Request Firecracker microVMs can execute actions that can be triggered via `PUT` requests on the `/actions` resource. Details about the required fields can be found in the [swagger definition](../../src/firecracker/swagger/firecracker.yaml). ## InstanceStart The `InstanceStart` action powers on the microVM and starts the guest OS. It does not have a payload. It can only be successfully called once. ### InstanceStart Example ```bash curl --unix-socket ${socket} -i \ -X PUT "http://localhost/actions" \ -d '{ "action_type": "InstanceStart" }' ``` ## FlushMetrics The `FlushMetrics` action flushes the metrics on user demand. ### FlushMetrics Example ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT "http://localhost/actions" \ -d '{ "action_type": "FlushMetrics" }' ``` ## [Intel and AMD only] SendCtrlAltDel This action will send the CTRL+ALT+DEL key sequence to the microVM. By convention, this sequence has been used to trigger a soft reboot and, as such, most Linux distributions perform an orderly shutdown and reset upon receiving this keyboard input. Since Firecracker exits on CPU reset, `SendCtrlAltDel` can be used to trigger a clean shutdown of the microVM. For this action, Firecracker emulates a standard AT keyboard, connected via an i8042 controller. Driver support for both these devices needs to be present in the guest OS. For Linux, that means the guest kernel needs `CONFIG_SERIO_I8042` and `CONFIG_KEYBOARD_ATKBD`. > [!NOTE] > > At boot time, the Linux driver for i8042 spends a few tens of milliseconds > probing the device. This can be disabled by using these kernel command line > parameters: > > ```console > i8042.noaux i8042.nomux i8042.nopnp i8042.dumbkbd > ``` ### SendCtrlAltDel Example ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT "http://localhost/actions" \ -d '{ "action_type": "SendCtrlAltDel" }' ``` ================================================ FILE: docs/api_requests/block-caching.md ================================================ # Block device caching strategies Firecracker offers the possiblity of choosing the block device caching strategy. Caching strategy affects the path data written from inside the microVM takes to the host persistent storage. ## How it works When installing a block device through a PUT /drives API call, users can choose the caching strategy by inserting a `cache_type` field in the JSON body of the request. The available cache types are: - `Unsafe` - `Writeback` ### Unsafe mode (default) When configuring the block caching strategy to `Unsafe`, the device will not advertise the VirtIO `flush` feature to the guest driver. ### Writeback mode When configuring the block caching strategy to `Writeback`, the device will advertise the VirtIO `flush` feature to the guest driver. If negotiated when activating the device, the guest driver will be able to send flush requests to the device. When the device executes a flush request, it will perform an `fsync` syscall on the backing block file, committing all data in the host page cache to disk. ## Supported use cases The caching strategy should be used in order to make a trade-off: - `Unsafe` - enhances performance as fewer syscalls and IO operations are performed when running workloads - sacrifices data integrity in situations where the host simply loses the contents of the page cache without committing them to the backing storage (such as a power outage) - recommended for use cases with ephemeral storage, such as serverless environments - `Writeback` - ensures that once a flush request was acknowledged by the host, the data is committed to the backing storage - sacrifices performance, from boot time increases to greater emulation-related latencies when running workloads - recommended for use cases with low power environments, such as embedded environments ## How to configure it Example sequence that configures a block device with a caching strategy: ```bash curl --unix-socket ${socket} -i \ -X PUT "http://localhost/drives/dummy" \ -H "accept: application/json" \ -H "Content-Type: application/json" \ -d "{ \"drive_id\": \"dummy\", \"path_on_host\": \"${drive_path}\", \"is_root_device\": false, \"is_read_only\": false, \"cache_type\": \"Writeback\" }" ``` ================================================ FILE: docs/api_requests/block-io-engine.md ================================================ # Block device IO engine For all Firecracker versions prior to v1.0.0, the emulated block device uses a synchronous IO engine for executing the device requests, based on blocking system calls. Firecracker 1.0.0 adds support for an asynchronous block device IO engine. > [!WARNING] > > Support is currently in **developer preview**. See > [this section](#developer-preview-status) for more info. The `Async` engine leverages [`io_uring`](https://kernel.dk/io_uring.pdf) for executing requests in an async manner, therefore getting overall higher throughput by taking better advantage of the block device hardware, which typically supports queue depths greater than 1. The block IO engine is configured via the PUT /drives API call (pre-boot only), with the `io_engine` field taking two possible values: - `Sync` (default) - `Async` (in [developer preview](../RELEASE_POLICY.md)) The `Sync` variant is the default, in order to provide backwards compatibility with older Firecracker versions. > [!NOTE] > > [vhost-user block device](./block-vhost-user.md) is another option for block > IO that requires an external backend process. ## Example configuration ```bash curl --unix-socket ${socket} -i \ -X PUT "http://localhost/drives/rootfs" \ -H "accept: application/json" \ -H "Content-Type: application/json" \ -d "{ \"drive_id\": \"rootfs\", \"path_on_host\": \"${drive_path}\", \"is_root_device\": true, \"is_read_only\": false, \"io_engine\": \"Sync\" }" ``` ## Host requirements Firecracker requires a minimum host kernel version of 5.10.51 for the `Async` IO engine. This requirement is based on the availability of the `io_uring` subsystem, as well as a couple of features and bugfixes that were added in newer kernel versions. If a block device is configured with the `Async` io_engine on a host kernel older than 5.10.51, the API call will return a 400 Bad Request, with a suggestive error message. ## Performance considerations The performance is strictly tied to the host kernel version. The gathered data may not be relevant for modified/newer kernels than 5.10. ### Device creation When using the `Async` variant, there is added latency on device creation (up to ~110 ms), caused by the extra io_uring system calls performed by Firecracker. This translates to higher latencies on either of these operations: - API call duration for block device config - Boot time for VMs started via JSON config files - Snapshot restore time For use-cases where the lowest latency on the aforementioned operations is desired, it is recommended to use the `Sync` IO engine. ### Block IOPS and efficiency The `Async` engine performance potential is showcased when the block device backing files are placed on a physical disk that supports efficient parallel execution of requests, like an NVME drive. It's also recommended to evenly distribute the backing files across the available drives of a host, to limit contention in high-density scenarios. The performance measurements we've done were made on NVME drives, and we've discovered that: For __read__ workloads which operate on data that is not present in the host page cache, the performance improvement for `Async` is about 1.5x-3x in overall efficiency (IOPS per CPU load) and up to 30x in total IOPS. For __write__ workloads, the `Async` engine brings an improvement of about 20-45% in total IOPS but performs worse than the `Sync` engine in total efficiency (IOPS per CPU load). This means that while Firecracker will achieve better performance, it will be at the cost of consuming more CPU for the kernel workers. In this case, the VMM cpu load is also reduced, which should translate into performance increase in hybrid workloads (block+net+vsock). Whether or not using the `Async` engine is a good idea performance-wise depends on the workloads and the amount of spare CPU available on a host. According to our NVME experiments, io_uring will always bring performance improvements (granted that there are enough available CPU resources). It is recommended that users perform some tests with examples of expected workloads and measure the efficiency as (IOPS/CPU load). ## Developer preview status View the [release policy](../RELEASE_POLICY.md) for information about developer preview terminology. The `Async` io_engine is not yet suitable for production use. It will be made available for production once Firecracker has support for a host kernel that implements mitigation mechanisms for the following threats: ### Threat 1: PID exhaustion The number of io_uring kernel workers assigned to one Firecracker block device is upper-bounded by: ``` (1 + NUMA_COUNT * min(size_of_ring, 4 * NUMBER_OF_CPUS) ``` This formula is derived from the 5.10 linux kernel code, while `size_of_ring` is hardcoded to `128` in Firecracker. Depending on the number of microVMs that can concurrently live on a host and the number of block devices configured for each microVM, the kernel PID limit may be reached, resulting in failure to create any new process. Kernels starting with 5.15 expose a configuration option for customising this upper bound. Once possible, we plan on exposing this in the Firecracker drive configuration interface. ### Threat 2: worker thread resource consumption The io_uring kernel workers are spawned in the root cgroup of the system. They don’t inherit the Firecracker cgroup, cannot be moved out of the root cgroup and their names don't contain any information about the microVM's PID. This makes it impossible to attribute a worker to a specific Firecracker VM and limit the CPU and memory consumption of said workers via cgroups. Starting with kernel 5.12 (currently unsupported), the Firecracker cgroup is inherited by the io_uring workers. ### Path to GA We plan on marking the Async engine as production ready once an LTS linux kernel including mitigations for the aforementioned mitigations is released and support for it is added in Firecracker. Read more about Firecracker's [kernel support policy](../kernel-policy.md). ================================================ FILE: docs/api_requests/block-vhost-user.md ================================================ # Vhost-user block device > [!WARNING] > > Support is currently in **developer preview**. See > [this section](../RELEASE_POLICY.md#developer-preview-features) for more info. As an alternative to [file-backed block device](block-io-engine.md) `Sync` and `Async` engines, Firecracker supports a vhost-user block device. There is a good introduction of how a vhost-user block device works in general at [FOSDEM23](https://archive.fosdem.org/2023/schedule/event/sds_vhost_user_blk). [Vhost-user](https://qemu-project.gitlab.io/qemu/interop/vhost-user.html) is a userspace protocol that allows to delegate Virtio queue processing to another userspace process on the host, as opposed to performing this task within Firecracker's VMM thread. In the vhost-user architecture, the VMM acts as a vhost-user frontend and it is responsible for: - connecting to the backend via a Unix domain socket (UDS) - feature negotiation with the backend and the guest - handling device configuration requests from the guest - sharing sufficient information about the guest memory and Virtio queues with the backend The vhost-user backend receives the information from the frontend and performs handling of IO requests from the guest. The UDS socket is only used for control plane purposes and does not participate in the data plane. Firecracker only implements a vhost-user frontend. Users are free to choose from [existing open source backends](#backends) or implement their own. ## Topology Each vhost-user device connects to its own UDS socket. There is no way for multiple devices to share a single socket, as there is no way to differentiate messages related to devices at the vhost-user protocol level. Each device can be served by a separate backend or a single backend can serve multiple devices. ## Interactions with the backend There are three points when the vhost-user frontend communicates with the backend: 1. Device initialisation. When a vhost-user device is created, Firecracker connects to the corresponding UDS socket and negotiates Virtio and Vhost features with backend and retrieves device configuration. 1. Device activation. When the guest driver finishes setting up the device, Firecracker shares memory tables and Virtio queue information with the backend. As a part of this, Firecracker shares file descriptors for guest's memory regions, as well as file descriptors for queue notifications. 1. Config update. When receving a [`PATCH` request](./patch-block.md#updating-vhost-user-block-devices-after-boot) on a vhost-user backed drive, Firecracker rerequests the device config from the backend in order to make the new config available to the guest. ## Advantages While vhost-user block is considered an optimisation to Firecracker IO, a naive implementation of the backend is not going to improve performance. The major advantage of using a vhost-user device is that the backend can implement custom processing logic. It can use intelligent algorithms to serve block requests, eg by fetching the block device data over the network or using sophisticated readahead logic. In such cases, the performance improvement will be coming from the fact that the custom logic is implemented in the same process that handles Virtio queues, which reduces the number of required context switches. ## Disadvantages In order for the backend to be able to process virtio requests, guest memory needs to be shared by the frontend to the backend. This means, a shared memory mapping is required to back guest memory. When a vhost-user device is configured, Firecracker uses `memfd_create` instead of creating an anonymous private mapping to achieve that. It was observed that page faults to a shared memory mapping take significantly longer (up to 24% in our testing), because Linux memory subsystem has to use atomic memory operations to update page status, which is an expensive operation under specific conditions. We advise users to profile performance on their workloads when considering to use vhost-user devices. ## Other considerations Compared to virtio block device where Firecracker interacts with a drive file on the host, vhost-user block device is handled by the backend directly. Some workloads may benefit from caching and readahead that the host pagecache offers for the backing file. This benefit is not available in vhost-user block case. Users may need to implement internal caching within the backend if they find it appropriate. ## Backends There are a number of open source implementations of a vhost-user backend available for reference that can help developing a custom backend: 1. [Qemu backend](https://github.com/qemu/qemu/tree/master/contrib/vhost-user-blk) 1. [Cloud Hypervisor backend](https://github.com/cloud-hypervisor/cloud-hypervisor/tree/main/vhost_user_block) 1. [crosvm backend](https://github.com/google/crosvm/blob/main/devices/src/virtio/vhost/user/device/block.rs) 1. [SPDK backend](https://github.com/spdk/spdk/blob/master/lib/vhost/vhost_blk.c) ## Security considerations ### Guest memory sharing By design, a vhost-user frontend must share file descriptors of all guest memory regions to the backend. In order to achive that, guest memory is created as a [memfd](https://man7.org/linux/man-pages/man2/memfd_create.2.html) and mapped as `MAP_SHARED`. #### File descriptor in procfs An open `memfd` is reflected in `procfs` as any other open file descriptor: ```shell $ ls -l /proc/{pid}/fd | grep memfd lrwx------ 1 1234 1234 64 Nov 2 13:39 32 -> /memfd:guest_mem (deleted) ``` Any process on the host that has access to this file in `procfs` will be able to map the file descriptor and observe runtime behaviour of the guest. At the moment, Firecracker does not close the `memfd`, because it must remain open until all the configured vhost-user devices have been activated and their info shared with the backends. This kind of tracking is not implemented in Firecracker, but may be implemented in the future. Meanwhile, users need to make sure that the access to the Firecracker's `procfs` tree is restricted to trusted processes on the host. On the backend side, it is advised that the backend closes the guest memory region file descriptors after mapping them into its own address space. #### Resource limit in jailer The Firecracker [jailer](../jailer.md) allows to configure resource limits for the Firecracker process. Specifically, it allows to set the maximum file size. Since `memfd` that is used to back the guest memory is considered a file, the file size resource limit cannot be less than the biggest guest memory region. This does not require any special action from a user, but needs to be taken into consideration. ### Remote code execution in the backend It is recommended to run Firecracker using the [jailer](../jailer.md). Since the vhost-user backend interacts with the guest via a Virtio queue, there is a potential for the guest to exercise issues in the backend codebase to trigger undesired behaviours. Users should consider running their backend in a jailer or applying other adequate security measures to restrict it. > [!NOTE] > > [Firecracker jailer](../jailer.md) is currently only capable of running > Firecracker as the binary. Vhost-user block device users are expected to use > another jailer to run the backend. It is also recommended to use proactive security measures like running a Virtio-level fuzzer in the guest during testing to make sure that the backend correctly handles all possible classes of inputs (including invalid ones) from the guest. ### Rate limiting / cgroups Virtio block device in Firecracker has a [rate limiting capability](../design.md#io-storage-networking-and-rate-limiting). In the vhost-user case, Firecracker does not participate in handling requests from the guest, so rate limiting becomes backend's responsibility. As an additional indirect measure, users can make use of `cgroups` settings (either via Firecracker jailer or independently) in order to restrict host CPU consumption of the guest, which would transitively limit guest's IO activity. ### Protection against defects in the backend code Due to potential defects in the backend (eg mislocating Virtio queues or writes to a wrong location in the guest memory), the guest execution may be affected. It is advised that customers monitor guest's health periodically. Additionally, in order to avoid orhpaned Firecracker processes if the backend crashes, the backend may need to send a signal, such as `SIGBUS`, to the Firecracker process for it to exit as well. ### Backend timeouts In order to correctly handle the case where the Firecracker process exits before it exchanges all the expected data with the backend, the backend may need to implement a timeout for how long it waits for Firecracker to connect and/or to exchange the data via the vhost-user protocol and exit to avoid resource exhaustion. ## Snapshot support At the moment, [snapshotting](../snapshotting) is not supported for microVMs that have vhost-user devices configured. An attempt to take a snapshot of such a microVM will fail. It is planned to add support for that in the future. ## Example configuration Run a vhost-user backend, eg Qemu backend: ```bash vhost-user-blk --socket-path=${backend_socket} --blk-file=${drive_path} ``` Firecracker API request to add a vhost-user block device: ```bash curl --unix-socket ${fc_socket} -i \ -X PUT "http://localhost/drives/scratch" \ -H "accept: application/json" \ -H "Content-Type: application/json" \ -d "{ \"drive_id\": \"scratch\", \"socket\": \"${backend_socket}\", \"is_root_device\": false }" ``` > [!NOTE] > > Unlike Virtio block device, there is no way to configure a `readonly` > vhost-user drive on the Firecracker side. Instead, this configuration belongs > to the backend. Whenever the backend advertises the `VIRTIO_BLK_F_RO` feature, > Firecracker will accept it, and the device will act as readonly. > [!NOTE] > > Whenever a `PUT` request is sent to the `/drives` endpoint for a vhost-user > device with the `id` that already exists, Firecracker will close the existing > connection to the backend and will open a new one. Users may need to restart > their backend if they do so. ================================================ FILE: docs/api_requests/patch-block.md ================================================ # Updating block devices after boot ## Updating Virtio block devices after boot Firecracker offers support to update attached block devices after the microVM has been started. This is provided via PATCH /drives API which notifies Firecracker that the underlying block file has been changed on the host. It should be called when the path to the block device is changed or if the file size has been modified. It is important to note that external changes to the block device file do not automatically trigger a notification in Firecracker so the explicit PATCH API call is mandatory. ### How it works The implementation of the PATCH /drives API does not modify the host backing file. It only updates the emulation layer block device properties, path and length and then triggers a virtio device reconfiguration that is handled by the guest driver which will update the size of the raw block device. With that being said, a sequence which performs resizing/altering of the block underlying host file followed by a PATCH /drives API call is not an atomic operation as the guest can also modify the block file via emulation during the sequence, if the raw block device is mounted or accessible. ### Supported use case This feature was designed to work with a cooperative guest in order to effectively simulate hot plug/unplug functionality for block devices. The following guarantees need to be provided: - guest did not mount the device - guest does not read or write from the raw block device `/dev/vdX` during the update sequence Example sequence that configures a microVM with a placeholder drive and then updates it with the real one: ```bash # Create and set up a block device. touch ${ro_drive_path} curl --unix-socket ${socket} -i \ -X PUT "http://localhost/drives/scratch" \ -H "accept: application/json" \ -H "Content-Type: application/json" \ -d "{ \"drive_id\": \"scratch\", \"path_on_host\": \"${ro_drive_path}\", \"is_root_device\": false, \"is_read_only\": true \"rate_limiter\": { \"bandwidth\": { \"size\": 100000, \"one_time_burst\": 4096, \"refill_time\": 150 }, \"ops\": { \"size\": 10, \"refill_time\": 250 } } }" # Finish configuring and start the microVM. Wait for the guest to boot. # Before mounting the block device in the guest: # Use another backing file of different size to effectively resize the # vm block device. touch ${updated_ro_drive_path} truncate --size ${new_size}M ${updated_ro_drive_path} # Create a filesystem in it. mkfs.ext4 ${updated_ro_drive_path} # PATCH the block device to use the new backing file. curl --unix-socket ${socket} -i \ -X PATCH "http://localhost/drives/scratch" \ -H "accept: application/json" \ -H "Content-Type: application/json" \ -d "{ \"drive_id\": \"scratch\", \"path_on_host\": \"${updated_ro_drive_path}\" }" # It's now safe to mount the block device in the guest and use it # with the updated backing file. ``` ### Data integrity and other issues We do not recommend using this feature outside of its supported use case scope. If the required guarantees are not provided, data integrity and potential other issues may arise depending on the actual use case. There are two major aspects that need be considered here: #### Atomicity of the update sequence If the guest has the opportunity to perform I/O against the block device during the update sequence it can either read data while it is changed or can overwrite data already written by a host process. For example a truncate operation can be undone if the guest issues a write for the last sector of the raw block device, or the guest application can become inconsistent or/and can create inconsistency in the block device itself. #### In flight I/O requests If the atomicity of the operation is guaranteed by using methods to make the microVM quiescence during the update sequence (for example pausing the microVM) the guest itself or block device can still become incosistent from in flight I/O requests in the guest that will be executed after it is resumed. ## Updating vhost-user block devices after boot Unlike with Virtio block device, with vhost-user block devices, Firecracker does not interact with the underlying block file directly (the vhost-user backend does). It means that changes to the file are not automatically seen by Firecracker. There is a mechanism in the [vhost-user protocol](https://qemu-project.gitlab.io/qemu/interop/vhost-user.html) for the backend to notify the frontend about changes in the device config via `VHOST_USER_BACKEND_CONFIG_CHANGE_MSG` message. This requires an extra UDS socket connection between the frontend and backend used for backend-originated messages. This mechanism **is not supported** by Firecracker. Instead, Firecracker makes use of the `PATCH /drives` API request to get notified about such changes. Such an API request only includes the required property (`drive_id`), because optional properties are not relevant to vhost-user. Example of a `PATCH` request for a vhost-user drive: ```bash curl --unix-socket ${socket} -i \ -X PATCH "http://localhost/drives/scratch" \ -H "accept: application/json" \ -H "Content-Type: application/json" \ -d "{ \"drive_id\": \"scratch\" }" ``` A `PATCH` request to a vhost-user drive will make Firecracker retrieve the new device config from the backend and send a config change notification to the guest. ================================================ FILE: docs/api_requests/patch-network-interface.md ================================================ # Updating A Network Interface After the microVM is started, the rate limiters assigned to a network interface can be updated via a `PATCH /network-interfaces/{id}` API call. E.g. for a network interface created with: ```console PUT /network-interfaces/iface_1 HTTP/1.1 Host: localhost Content-Type: application/json Accept: application/json { "iface_id": "iface_1", "host_dev_name": "fctap1", "guest_mac": "06:00:c0:a8:34:02", "rx_rate_limiter": { "bandwidth": { "size": 1024, "one_time_burst": 1048576, "refill_time": 1000 } }, "tx_rate_limiter": { "bandwidth": { "size": 1024, "one_time_burst": 1048576, "refill_time": 1000 } } } ``` A `PATCH` request can be sent at any future time, to update the rate limiters: ```console PATCH /network-interfaces/iface_1 HTTP/1.1 Host: localhost Content-Type: application/json Accept: application/json { "iface_id": "iface_1", "rx_rate_limiter": { "bandwidth": { "size": 1048576, "refill_time": 1000 }, "ops": { "size": 2000, "refill_time": 1000 } } } ``` The full specification of the data structures available for this call can be found in our [OpenAPI spec](../../src/firecracker/swagger/firecracker.yaml). > [!NOTE] > > The data provided for the update is merged with the existing data. In the > above example, the RX rate limit is updated, but the TX rate limit remains > unchanged. ## Removing Rate Limiting A rate limit can be disabled by providing a 0-sized token bucket. E.g., following the above example, the TX rate limit can be disabled with: ```console PATCH /network-interfaces/iface_1 HTTP/1.1 Host: localhost Content-Type: application/json Accept: application/json { "iface_id": "iface_1", "tx_rate_limiter": { "bandwidth": { "size": 0, "refill_time": 0 }, "ops": { "size": 0, "refill_time": 0 } } } ``` ================================================ FILE: docs/ballooning.md ================================================ # Using the balloon device with Firecracker ## What is the balloon device A memory balloon device is a virtio device that can be used to reclaim and give back guest memory through API commands issued by the host. It does this by allocating memory in the guest, and then sending the addresses of that memory to the host; the host may then remove that memory at will. The device is configured through a number of options, and an integer, which represents the target size of the balloon, in MiB. The options cannot be changed during operation, but the target size can be changed. The behaviour of the balloon is the following: while the actual size of the balloon (i.e. the memory it has allocated) is smaller than the target size, it continually tries to allocate new memory -- if it fails, it prints an error message (`Out of puff! Can't get %d pages`), sleeps for 0.2 seconds, and then tries again. While the actual size of the balloon is larger than the target size, it will free memory until it hits the target size. The device can be configured with the following options: - `deflate_on_oom`: if this is set to `true` and a guest process wants to allocate some memory which would make the guest enter an out-of-memory state, the kernel will take some pages from the balloon and give them to said process instead asking the OOM killer process to kill some processes to free memory. Note that this applies to physical page allocations in the kernel which belong to guest processes. This does not apply to instances when the kernel needs memory for its activities (i.e. constructing caches), when the user requests more memory than the currently available to the balloon for releasing, or when guest processes try to allocate large amounts of memory that are refused by the guest memory manager, which is possible when the guest runs with `vm.overcommit_memory=0` and the allocation does not pass the MM basic checks. Setting `vm.memory_overcommit` to 1 would make the MM approve all allocations, no matter how large, and using the memory mapped for those allocations will always deflate the balloon instead of making the guest enter an OOM state. Note: we do not recommend running with `vm.overcommit_memory=1` because it requires complete control over what allocations are done in the guest and can easily result in unexpected OOM scenarios. - `stats_polling_interval_s`: unsigned integer value which if set to 0 disables the virtio balloon statistics and otherwise represents the interval of time in seconds at which the balloon statistics are updated. The device has two optional features which can be enabled with the following options: - `free_page_reporting`: A mechanism for the guest to continually report ranges of memory which the guest is not using and can be reclaimed. [Read more here](#virtio-balloon-free-page-reporting) - [(Developer Preview)](../docs/RELEASE_POLICY.md#developer-preview-features) `free_page_hinting`: A mechanism to reclaim memory from the guest, this is instead triggered from the host. [Read more here](#virtio-balloon-free-page-hinting) ## Security disclaimer **The balloon device is a paravirtualized virtio device that requires cooperation from a driver in the guest.** In normal conditions, the balloon device will: - not change the target size, which is set directly by the host - consume exactly as many pages as required to achieve the target size - correctly update the value of the actual size of the balloon seen by the host - not use pages that were previously inflated if they were not returned to the guest via a deflate operation (unless the `deflate_on_oom` flag was set and the guest is in an out of memory state) - provide correct statistics when available However, Firecracker does not and cannot introspect into the guest to check the integrity of the balloon driver. As the guest is not trusted, if the driver in the guest becomes compromised, the above statements are **no longer guaranteed**. This means that even though users use the balloon to impose restrictions on memory usage, they can be broken by a compromised driver in the guest. The balloon device operates on a best effort model and users should always ensure the host is prepared to handle a situation in which the Firecracker process uses all of the memory it was given at boot even if the balloon was used to restrict the amount of memory available to the guest. It is also the users' responsibility to monitor the memory consumption of the VM and, in case unexpected increases in memory usage are observed, we recommend the following options: - migrate the VM to a machine with higher memory availability through snapshotting at the cost of disrupting the workload; - kill the Firecracker process that exceeds memory restrictions; - enable swap with a sufficient amount of memory to handle the demand at the cost of memory access speed; Users should also never rely solely on the statistics provided by the balloon when controlling the Firecracker process as they are provided directly by the guest driver and should always be viewed as an indication rather than a guarantee of what the memory state looks like in the guest. Please note that even in the case where the driver is not working properly, the balloon will never leak memory from one Firecracker process to another, nor can a guest within Firecracker access information in memory outside its own guest memory. In other words, memory cannot leak in or out of Firecracker if the driver becomes corrupted. This is guaranteed by the fact that the page frame numbers coming from the driver are checked to be inside the guest memory, then `madvise`d with the `MADV_DONTNEED` flag, which breaks the mappings between host physical memory (where the information is ultimately stored) and Firecracker virtual memory, which is what Firecracker uses to build the guest memory. On subsequent accesses on previously `madvise`d memory addresses, the memory is zeroed. Furthermore, the guest memory is `mmap`ped with the `MAP_PRIVATE` and `MAP_ANONYMOUS` flags, which ensure that even if a Firecracker yields some information through an inflate and that same physical page containing the information is mapped onto another Firecracker process, reads on that address space will see zeroes. ## Prerequisites To support memory ballooning, you must use a kernel that has the memory ballooning driver installed (on Linux 4.14.193, the relevant settings are `CONFIG_MEMORY_BALLOON=y`, `CONFIG_VIRTIO_BALLOON=y`). Other than that, only the requirements mentioned in the `getting-started` document are needed. ## Installing the balloon device In order to use a balloon device, you must install it during virtual machine setup (i.e. before starting the virtual machine). This can be done either through a PUT request on "/balloon" or by inserting the balloon into the JSON configuration file given as a command line argument to the Firecracker process. Here is an example command on how to install the balloon through the API: ```console socket_location=... amount_mib=... deflate_on_oom=... polling_interval=... curl --unix-socket $socket_location -i \ -X PUT 'http://localhost/balloon' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"amount_mib\": $amount_mib, \ \"deflate_on_oom\": $deflate_on_oom, \ \"stats_polling_interval_s\": $polling_interval \ }" ``` To use this, set `socket_location` to the location of the firecracker socket (by default, at `/run/firecracker.socket`. Then, set `amount_mib`, `deflate_on_oom` and `stats_polling_interval_s` as desired: `amount_mib` represents the target size of the balloon, and `deflate_on_oom` and `stats_polling_interval_s` represent the options mentioned before. To install the balloon via the JSON config file, insert the following JSON object into your configuration file: ```console "balloon": { "amount_mib": 0, "deflate_on_oom": false, "stats_polling_interval_s": 1 }, ``` After installing the balloon device, users can poll the configuration of the device at any time by sending a GET request on "/balloon". Here is an example of such a request: ```console socket_location=... curl --unix-socket $socket_location -i \ -X GET 'http://localhost/balloon' \ -H 'Accept: application/json' ``` On success, this request returns a JSON object of the same structure as the one used to configure the device (via a PUT request on "/balloon"). ## Operating the traditional balloon device After it has been installed, the balloon device can only be operated via the API through the following command: ```console socket_location=... amount_mib=... polling_interval=... curl --unix-socket $socket_location -i \ -X PATCH 'http://localhost/balloon' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"amount_mib\": $amount_mib, \ \"stats_polling_interval_s\": $polling_interval \ }" ``` This will update the target size of the balloon to `amount_mib` and the statistics polling interval to `polling_interval`. > [!NOTE] Balloon inflation instructs the guest to reclaim memory which may > cause performance issues in the guest. The balloon statistics defined > [below](#virtio-balloon-statistics) can be used to decide whether it's > necessary to reclaim memory. ## Virtio balloon statistics The statistics are enabled by setting the `stats_polling_interval_s` field in the balloon configuration to a non-zero value. If enabled, users can receive the latest balloon statistics by issuing a GET request on "/balloon". Here is an example of such a request: ```console socket_location=... curl --unix-socket $socket_location -i \ -X GET 'http://localhost/balloon/statistics' \ -H 'Accept: application/json' ``` The request, if successful, will return a JSON object containing the latest statistics. The JSON object contains information about the target and actual sizes of the balloon as well as virtio traditional memory balloon statistics. The target and actual sizes of the balloon are expressed as follows: - `target_pages`: The target size of the balloon, in 4K pages. - `actual_pages`: The number of 4K pages the device is currently holding. - `target_mib`: The target size of the balloon, in MiB. - `actual_mib`: The number of MiB the device is currently holding. These values are taken directly from the config space of the device and are always up to date, in the sense that they are exactly what the Firecracker process reads when polling the config space. The `actual` fields being accurate are subject to the guest driver working correctly. As defined in the virtio 1.1 specification, the traditional virtio balloon device has support for the following statistics: - `VIRTIO_BALLOON_S_SWAP_IN`: The amount of memory that has been swapped in (in bytes). - `VIRTIO_BALLOON_S_SWAP_OUT`: The amount of memory that has been swapped out to disk (in bytes). - `VIRTIO_BALLOON_S_MAJFLT`: The number of major page faults that have occurred. - `VIRTIO_BALLOON_S_MINFLT`: The number of minor page faults that have occurred. - `VIRTIO_BALLOON_S_MEMFREE`: The amount of memory not being used for any purpose (in bytes). - `VIRTIO_BALLOON_S_MEMTOT`: The total amount of memory available (in bytes). - `VIRTIO_BALLOON_S_AVAIL`: An estimate of how much memory is available (in bytes) for starting new applications, without pushing the system to swap. - `VIRTIO_BALLOON_S_CACHES`: The amount of memory, in bytes, that can be quickly reclaimed without additional I/O. Typically these pages are used for caching files from disk. - `VIRTIO_BALLOON_S_HTLB_PGALLOC`: The number of successful hugetlb page allocations in the guest. - `VIRTIO_BALLOON_S_HTLB_PGFAIL`: The number of failed hugetlb page allocations in the guest. Since linux v6.12, following metrics added(omitted in < v6.12): - `VIRTIO_BALLOON_S_OOM_KILL`: OOM killer invocations, indicating critical memory pressure. - `VIRTIO_BALLOON_S_ALLOC_STALL`: Counter of Allocation enter a slow path to gain more memory page. The reclaim/scan metrics can reveal what is actually happening. - `VIRTIO_BALLOON_S_ASYNC_SCAN`: Amount of memory scanned asynchronously. - `VIRTIO_BALLOON_S_DIRECT_SCAN`: Amount of memory scanned directly. - `VIRTIO_BALLOON_S_ASYNC_RECLAIM`: Amount of memory reclaimed asynchronously. - `VIRTIO_BALLOON_S_DIRECT_RECLAIM`: Amount of memory reclaimed directly. When the pages_high watermark is reached, Linux `kswapd` performs asynchronous page reclaim, which increases ASYNC_SCAN and ASYNC_RECLAIM. When a process allocates more memory than the kernel can provide, the process is stalled while pages are reclaimed directly, which increases DIRECT_SCAN and DIRECT_RECLAIM. > `man sar`: %vmeff Calculated as pgsteal(RECLAIM) / pgscan(SCAN), this is a > metric of the efficiency of page reclaim. If it is near 100% then almost every > page coming off the tail of the inactive list is being reaped. If it gets too > low (e.g. less than 30%) then the virtual memory is having some difficulty. The driver is queried for updated statistics every time the amount of time specified in that field passes. The driver may not provide all the statistics when queried, in which case the old values of the missing statistics are preserved. To change the statistics polling interval, users can sent a PATCH request on "/balloon/statistics". Here is an example of such a request: ```console socket_location=... polling_interval=... curl --unix-socket $socket_location -i \ -X PATCH 'http://localhost/balloon' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"stats_polling_interval_s\": $polling_interval }" ``` This will change the statistics polling interval to `polling_interval`. Note that if the balloon was configured without statistics pre-boot, the statistics cannot be enabled later by providing a `polling_interval` non-zero value. Furthermore, if the balloon was configured with statistics pre-boot through a non-zero `stats_polling_interval_s` value, the statistics cannot be disabled through a `polling_interval` value of zero post-boot. ## Virtio balloon free page reporting Free page reporting is a virtio balloon feature which allows the guest OS to report ranges of memory which are not being used. In Firecracker, the balloon device will `madvise` the range with the `MADV_DONTNEED` flag, reducing the RSS of the guest. Reporting can only be enabled pre-boot and will run continually with no option to stop it running. The feature also requires the guest to have the Linux kernel config option `PAGE_REPORTING` enabled. To enable free page reporting when creating the balloon device, the `free_page_reporting` attribute should be set in the JSON object. An example of how to configure the device to enable free page reporting: ```console socket_location=... amount_mib=... deflate_on_oom=... polling_interval=... curl --unix-socket $socket_location -i \ -X PUT 'http://localhost/balloon' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"amount_mib\": $amount_mib, \ \"deflate_on_oom\": $deflate_on_oom, \ \"stats_polling_interval_s\": $polling_interval, \ \"free_page_reporting\": true \ }" ``` The Linux driver uses a hook in the free page path to trigger the reporting process, which will begin after a short delay (~2 seconds) and report the ranges. The runtime impact of this feature is heavily workload dependent. The driver gets ranges from the buddy allocator with a minimum page order. This page order dictates the minimum size of ranges reported and can be configured with the `page_reporting_order` module parameter in the guest kernel. The page order comes with trade-offs between performance and memory reclaimed; a good target to maximise memory reclaim is to have the reported ranges match the backing page size. ## Virtio balloon free page hinting Free page hinting is a [developer-preview](../docs/RELEASE_POLICY.md#developer-preview-features) feature, which allows the guest driver to report ranges of memory which are not being used. In Firecracker, the balloon device will `madvise` the range with the `MADV_DONTNEED` flag, reducing the RSS of the guest. Free page hinting differs from reporting as this is instead initiated from the host side, giving more flexibility on when to reclaim memory. To enable free page hinting when creating the balloon device, the `free_page_hinting` attribute should be set in the JSON object. An example of how to configure the device to enable free page hinting: ```console socket_location=... amount_mib=... deflate_on_oom=... polling_interval=... curl --unix-socket $socket_location -i \ -X PUT 'http://localhost/balloon' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"amount_mib\": $amount_mib, \ \"deflate_on_oom\": $deflate_on_oom, \ \"stats_polling_interval_s\": $polling_interval, \ \"free_page_hinting\": true \ }" ``` Free page hinting is initiated and managed by Firecracker, the core mechanism to control the run is with the `cmd_id`. When Firecracker sets the `cmd_id` to a new number, the driver will acknowledge this and start reporting ranges, which Firecracker will free. Once the device has reported all the ranges it can find, it will update the `cmd_id` to reflect this. The device will then hold these ranges until Firecracker sends the stop command which allows the guest driver to reclaim the memory. The time required for the guest to complete a hinting run is dependant on a multitude of different factors and is mostly dictated by the guest, however, in testing the average time is ~200 milliseconds for a 1GB VM. This control mechanism in Firecracker is managed through three separate endpoints `/balloon/hinting/start`, `/balloon/hinting/status` and `/balloon/hinting/stop`. For simple operation, call the start endpoint with `acknowledge_on_stop = true`, which will automatically send the stop command once the driver has finished. An example of sending this command: ```console curl --unix-socket $socket_location -i \ -X POST 'http://localhost/balloon/hinting/start' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"acknowledge_on_stop\": true \ }" ``` For fine-grained control, using `acknowledge_on_stop = false`, Firecracker will not send the acknowledge message. This can be used to get the guest to hold onto more memory. Using the `/status` endpoint, you can get information about the last `cmd_id` sent by Firecracker and the last update from the guest. An example of the status request and response: ```console curl --unix-socket $socket_location -i \ -X GET 'http://localhost/balloon/hinting/status' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' ``` Response: ```json { "host_cmd": 1, "guest_cmd": 2 } ``` An example of the stop endpoint: ```console curl --unix-socket $socket_location -i \ -X POST 'http://localhost/balloon/hinting/stop' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{}" ``` On snapshot restore, the `cmd_id` is **always** set to the stop `cmd_id` to allow the guest to reclaim the memory. If you have a particular use-case which requires this not to be the case, please raise an issue with a description of your scenario. > [!WARNING] > > Free page hinting was primarily designed for live migration, because of this > there is a caveat to the device spec which means the guest is able to reclaim > memory before Firecracker even receives the range to free. This can lead to a > scenario where the device frees memory that has been reclaimed in the guest, > potentially corrupting memory. The chances of this race happening are low, but > not impossible; hence the developer-preview status. > > We are currently working with the kernel community on a feature that will > eliminate this race. Once this has been resolved, we will update the device. > > One way to safely use this feature when using UFFD is: > > 1. Enable `WRITEPROTECT` on the VM memory before starting a hinting run. > 1. Track ranges that are written to. > 1. Skip these ranges when Firecracker reports them for freeing. > > This will prevent ranges which have been reclaimed from being freed. ## Balloon Caveats - Firecracker has no control over the speed of inflation or deflation; this is dictated by the guest kernel driver. - The traditional balloon will continually attempt to reach its target size, which can be a CPU-intensive process. It is therefore recommended to set realistic targets or, after a period of stagnation in the inflation, update the target size to be close to the inflated size. - The `deflate_on_oom` flag is a mechanism to prevent the guest from crashing or terminating processes; it is not meant to be used continually to free memory. Doing this will be a CPU-intensive process, as the traditional balloon driver is designed to deflate and release memory slowly. This is also compounded if the balloon has yet to reach its target size, as it will attempt to inflate while also deflating. ================================================ FILE: docs/cpu_templates/boot-protocol.md ================================================ # Boot protocol register settings Firecracker makes certain modifications to the guest's registers regardless of whether a CPU template is used to comply with the boot protocol. If a CPU template is used the boot protocol settings are performed _after_ the CPU template is applied. That means that if the CPU template configures CPUID bits used in the boot protocol settings, they will be overwritten. See also: [CPUID normalization](cpuid-normalization.md) ## Boot protocol MSRs (x86_64 only) On x86_64, the following MSRs are set to `0`: - MSR_IA32_SYSENTER_CS - MSR_IA32_SYSENTER_ESP - MSR_IA32_SYSENTER_EIP - MSR_STAR - MSR_CSTAR - MSR_KERNEL_GS_BASE - MSR_SYSCALL_MASK - MSR_LSTAR - MSR_IA32_TSC and MSR_IA32_MISC_ENABLE is set to `1`. ## Boot protocol ARM registers (aarch64 only) On aarch64, the following registers are set: - PSTATE to PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT - PC to kernel load address (vCPU0 only) - X0 to DTB/FDT address (vCPU0 only) ================================================ FILE: docs/cpu_templates/cpu-template-helper.md ================================================ # CPU template helper tool The `cpu-template-helper` tool is a program designed to assist users with creating and managing their custom CPU templates. ## Usage The `cpu-template-helper` tool has two sets of commands: template-related commands and fingerprint-related commands. ### Template-related commands #### Dump command This command dumps guest CPU configuration in the custom CPU template JSON format. ``` cpu-template-helper template dump \ --output \ [--template ] \ [--config ] ``` Users can utilize this as an entry point of a custom CPU template creation to comprehend what CPU configuration are exposed to guests. The guest CPU configuration consists of the following entities: - x86_64 - CPUID - MSRs (Model Specific Registers) - aarch64 - ARM registers It retrieves the above entities exposed to a guest by applying the same preboot process as Firecacker and capturing them in the state just before booting a guest. More details about the preboot process can be found [here](boot-protocol.md) and [here](cpuid-normalization.md). > [!NOTE] > > Some MSRs and ARM registers are not included in the output, since they are not > reasonable to modify with CPU templates. The full list of them can be found in > [Appendix](#appendix). > [!NOTE] > > Since the output depends on underlying hardware and software stack (BIOS, CPU, > kernel, Firecracker), it is required to dump guest CPU configuration on each > combination when creating a custom CPU template targeting them all. #### Strip command This command strips identical entries from multiple guest CPU configuration files generated with the dump command. ``` cpu-template-helper template strip \ --paths [..] \ --suffix ``` One practical use case of the CPU template feature is to provide a consistent CPU feature set to guests running on multiple CPU models. When creating a custom CPU template for this purpose, it is efficient to focus on the differences in guest CPU configurations across those CPU models. Given that a dumped guest CPU configuration typically amounts to approximately 1,000 lines, this command considerably narrows down the scope to consider. #### Verify command This command verifies that the given custom CPU template is applied correctly. ``` cpu-template-helper template verify \ --template \ [--config ] ``` Firecracker modifies the guest CPU configuration after the CPU template is applied. Occasionally, due to hardware and/or software limitations, KVM might not set the given configuration. Since Firecracker does not check them at runtime, it is required to ensure that these situations don't happen with their custom CPU templates before deploying it. When a template is specified both through `--template` and in Firecracker configuration file provided via `--config`, the template specified with `--template` takes precedence. > [!NOTE] > > This command does not ensure that the contents of the template are sensible. > Thus, users need to make sure that the template does not have any inconsistent > entries and does not crash guests. ### Fingerprint-related commands #### Dump command This command not only dumps the guest CPU configuration, but also host information that could affect the validity of custom CPU templates. ``` cpu-template-helper fingerprint dump \ --output \ [--template ] \ [--config ] ``` Keeping the underlying hardware and software stack updated is essential for maintaining security and leveraging new technologies. On the other hand, since the guest CPU configuration can vary depending on the infrastructure, updating it could lead to a situation where a custom CPU template loses its validity. In addition, even if values of the guest CPU configuration don't change, its internal behavior or semantics could still change. For instance, a kernel version update may introduce changes to KVM emulation and a microcode update may alter the behavior of CPU instructions. To ensure awareness of these changes, it is strongly recommended to store the fingerprint file at the time of creating a custom CPU template and to continuously compare it with the current one. #### Compare command This command compares two fingerprint files: one was taken at the time of custom CPU template creation and the other is taken currently. ``` cpu-template-helper fingerprint compare \ --prev \ --curr \ --filters [..] ``` By continously comparing fingerprint files, users can ensure they are aware of any changes that could require revising the custom CPU template. However, it is worth noting that not all of these changes necessarily require a revision, and some changes could be inconsequential to the custom CPU template depending on its use case. To provide users with flexibility in comparing fingerprint files based on situations or use cases, the `--filters` option allows users to select which fields to compare. As examples of when to compare fingerprint files: - When bumping the Firecracker version up - When bumping the kernel version up - When applying a microcode update (or launching a new host (e.g. AWS EC2 metal instance)) ## Sample scenario This section gives steps of creating and managing a custom CPU template in a sample scenario where the template is designed to provide a consistent set of CPU features to a heterogeneous fleet consisting of multiple CPU models. ### Custom CPU template creation 1. Run the `cpu-template-helper template dump` command on each CPU model to retrieve guest CPU configuration. 1. Run the `cpu-template-helper template strip` command to remove identical entries across the dumped guest CPU configuration files. 1. Examine the differences of guest CPU configuration in details, determine which CPU features should be presented to guests and draft a custom CPU template. 1. Run the `cpu-template-helper template verify` command to check the created custom CPU template is applied correctly. 1. Conduct thorough testing of the template as needed to ensure that it does not contain any inconsistent entries and does not lead to guest crashes. ### Custom CPU template management 1. Run the `cpu-template-helper fingerprint dump` command on each CPU model at the same time when creating a custom CPU template. 1. Store the dumped fingerprint files together with the custom CPU template. 1. Run the `cpu-template-helper fingerprint dump` command to ensure the template's validity whenever you expect changes to the underlying hardware and software stack. 1. Run the `cpu-template-helper fingerprint compare` command to identify changes of the underlying environment introduced after creating the template. 1. (if changes are detected) Review the identified changes, make necessary revisions to the CPU template, and replace the fingerprint file with the new one. > [!NOTE] > > It is recommended to review the update process of the underlying stack on your > infrastructure. This can help identify points that may require the above > validation check. ## Appendix ### MSRs excluded from guest CPU configuration dump | Register name | Index | | --------------------------------------- | ----------------------- | | MSR_IA32_TSC | 0x00000010 | | MSR_ARCH_PERFMON_PERFCTRn | 0x000000c1 - 0x000000d2 | | MSR_ARCH_PERFMON_EVENTSELn | 0x00000186 - 0x00000197 | | MSR_ARCH_PERFMON_FIXED_CTRn | 0x00000309 - 0x0000030b | | MSR_CORE_PERF_FIXED_CTR_CTRL | 0x0000038d | | MSR_CORE_PERF_GLOBAL_STATUS | 0x0000038e | | MSR_CORE_PERF_GLOBAL_CTRL | 0x0000038f | | MSR_CORE_PERF_GLOBAL_OVF_CTRL | 0x00000390 | | MSR_K7_EVNTSELn | 0xc0010000 - 0xc0010003 | | MSR_K7_PERFCTR0 | 0xc0010004 - 0xc0010007 | | MSR_F15H_PERF_CTLn + MSR_F15H_PERF_CTRn | 0xc0010200 - 0xc001020c | | MSR_IA32_VMX_BASIC | 0x00000480 | | MSR_IA32_VMX_PINBASED_CTLS | 0x00000481 | | MSR_IA32_VMX_PROCBASED_CTLS | 0x00000482 | | MSR_IA32_VMX_EXIT_CTLS | 0x00000483 | | MSR_IA32_VMX_ENTRY_CTLS | 0x00000484 | | MSR_IA32_VMX_MISC | 0x00000485 | | MSR_IA32_VMX_CR0_FIXEDn | 0x00000486 - 0x00000487 | | MSR_IA32_VMX_CR4_FIXEDn | 0x00000488 - 0x00000489 | | MSR_IA32_VMX_VMCS_ENUM | 0x0000048a | | MSR_IA32_VMX_PROCBASED_CTLS2 | 0x0000048b | | MSR_IA32_VMX_EPT_VPID_CAP | 0x0000048c | | MSR_IA32_VMX_TRUE_PINBASED_CTLS | 0x0000048d | | MSR_IA32_VMX_TRUE_PROCBASED_CTLS | 0x0000048e | | MSR_IA32_VMX_TRUE_EXIT_CTLS | 0x0000048f | | MSR_IA32_VMX_TRUE_ENTRY_CTLS | 0x00000490 | | MSR_IA32_VMX_VMFUNC | 0x00000491 | | MSR_IA32_MCG_STATUS | 0x0000017a | | MSR_IA32_MCG_CTL | 0x0000017b | | MSR_IA32_MCG_EXT_CTL | 0x000004d0 | | HV_X64_MSR_GUEST_OS_ID | 0x40000000 | | HV_X64_MSR_HYPERCALL | 0x40000001 | | HV_X64_MSR_VP_INDEX | 0x40000002 | | HV_X64_MSR_RESET | 0x40000003 | | HV_X64_MSR_VP_RUNTIME | 0x40000010 | | HV_X64_MSR_VP_ASSIST_PAGE | 0x40000073 | | HV_X64_MSR_SCONTROL | 0x40000080 | | HV_X64_MSR_STIMER0_CONFIG | 0x400000b0 | | HV_X64_MSR_CRASH_Pn | 0x40000100 - 0x40000104 | | HV_X64_MSR_CRASH_CTL | 0x40000105 | | HV_X64_MSR_REENLIGHTENMENT_CONTROL | 0x40000106 | | HV_X64_MSR_TSC_EMULATION_CONTROL | 0x40000107 | | HV_X64_MSR_TSC_EMULATION_STATUS | 0x40000108 | | HV_X64_MSR_SYNDBG_CONTROL | 0x400000f1 | | HV_X64_MSR_SYNDBG_STATUS | 0x400000f2 | | HV_X64_MSR_SYNDBG_SEND_BUFFER | 0x400000f3 | | HV_X64_MSR_SYNDBG_RECV_BUFFER | 0x400000f4 | | HV_X64_MSR_SYNDBG_PENDING_BUFFER | 0x400000f5 | | HV_X64_MSR_SYNDBG_OPTIONS | 0x400000ff | | HV_X64_MSR_TSC_INVARIANT_CONTROL | 0x40000118 | ### ARM registers excluded from guest CPU configuration dump | Register name | ID | | --------------------- | ------------------ | | Program Counter | 0x6030000000100040 | | KVM_REG_ARM_TIMER_CNT | 0x603000000013df1a | ================================================ FILE: docs/cpu_templates/cpu-templates.md ================================================ # CPU templates Firecracker allows users to customise how the vCPUs are represented to the guest software by changing the following configuration: - CPUID (x86_64 only) - MSRs (Model Specific Registers, x86_64 only) - ARM registers (aarch64 only) - vCPU features (aarch64 only) - KVM capabilities (both x86_64 and aarch64) A combination of the changes to the above entities is called a CPU template. The functionality can be used when a user wants to mask a feature from the guest. A real world use case for this is representing a heterogeneous fleet (a fleet consisting of multiple CPU models) as a homogeneous fleet, so the guests will experience a consistent feature set supported by the host. > [!NOTE] > > Representing one CPU vendor as another CPU vendor is not supported. > [!NOTE] > > CPU templates shall not be used as a security protection against malicious > guests. Disabling a feature in a CPU template does not generally make it > completely unavailable to the guest. For example, disabling a feature related > to an instruction set will indicate to the guest that the feature is not > supported, but the guest may still be able to execute corresponding > instructions if it does not obey the feature bit. Firecracker supports two types of CPU templates: - Static CPU templates - a set of built-in CPU templates for users to choose from - Custom CPU templates - users can create their own CPU templates in json format and pass them to Firecracker > [!NOTE] > > Static CPU templates are deprecated starting from v1.5.0 and will be removed > in accordance with our deprecation policy. Even after the removal, custom CPU > templates are available as an improved iteration of static CPU templates. For > more information about the transition from static CPU templates to custom CPU > templates, please refer to > [this GitHub discussion](https://github.com/firecracker-microvm/firecracker/discussions/4135). > [!NOTE] > > CPU templates for ARM (both static and custom) require the following patch to > be available in the host kernel: > [Support writable CPU ID registers from userspace](https://lore.kernel.org/kvm/20230212215830.2975485-1-jingzhangos@google.com/#t). > Otherwise KVM will fail to write to the ARM registers. ## Static CPU templates At the moment the following set of static CPU templates are supported: | CPU template | CPU vendor | CPU model | | ------------ | ---------- | ------------------------------- | | C3 | Intel | Skylake, Cascade Lake, Ice Lake | | T2 | Intel | Skylake, Cascade Lake, Ice Lake | | T2A | AMD | Milan | | T2CL | Intel | Cascade Lake, Ice Lake | | T2S | Intel | Skylake, Cascade Lake | | V1N1 | ARM | Neoverse V1 | T2 and C3 templates are mapped as close as possible to AWS T2 and C3 instances in terms of CPU features. Note that on a microVM that is lauched with the C3 template and running on processors that do not enumerate FBSDP_NO, PSDP_NO and SBDR_SSDP_NO on IA32_ARCH_CAPABILITIES MSR, the kernel does not apply the mitigation against MMIO stale data vulnerability. The T2S template is designed to allow migrating [snapshots](../snapshotting/versioning.md) between hosts with Intel Skylake and Intel Cascade Lake securely by further restricting CPU features for the guest, however this comes with a performance penalty. Users are encouraged to carry out a performance assessment if they wish to use the T2S template. Note that Firecracker expects the host to always be running the latest version of the microcode. The T2CL template is mapped to be close to Intel Cascade Lake. It is only safe to use it on Intel Cascade Lake and Ice Lake. The only AMD template is T2A. It is considered safe to be used with AMD Milan. Intel T2CL and AMD T2A templates together aim to provide instruction set feature parity between CPUs running them, so they can form a heterogeneous fleet exposing the same instruction sets to the application. The V1N1 template is designed to represent ARM Neoverse V1 as ARM Neoverse N1. ### Configuring static CPU templates Configuration of a static CPU template is performed via the `/machine-config` API endpoint: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/machine-config' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "vcpu_count": 2, "mem_size_mib": 1024, "cpu_template": "T2CL" }' ``` ## Custom CPU templates Users can create their own CPU templates by creating a json file containing modifiers for CPUID, MSRs or ARM registers. > [!NOTE] > > Creating custom CPU templates requires expert knowledge of CPU architectures. > Custom CPU templates must be tested thoroughly before use in production. An > inappropriate configuration may lead to guest crashes or making guests > vulnerable to security attacks. For example, if a CPU template signals a > hardware vulnerability mitigation to the guest while the mitigation is in fact > not supported by the hardware, the guest may decide to disable corresponding > software mitigations which will make the guest vulnerable. > [!NOTE] > > Having MSRs or ARM registers in the custom CPU template does not affect access > permissions that guests will have to those registers. The access control is > handled by KVM and is not influenced by CPU templates. > [!NOTE] > > When setting guest configuration, KVM may reject setting some bits quietly. > This is user's responsibility to make sure that their custom CPU template is > applied as expected even if Firecracker does not report an error. In order to assist with creation and usage of CPU templates, there exists a CPU template helper tool. More details can be found [here](cpu-template-helper.md). ### Configuring custom CPU templates Configuration of a custom CPU template is performed via the `/cpu-config` API endpoint. An example of configuring a custom CPU template on x86_64: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/cpu-config' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "kvm_capabilities": ["!56"], "cpuid_modifiers": [ { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bxxxx000000000011xx00011011110010" } ] } ], "msr_modifiers": [ { "addr": "0x10a", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] }' ``` This CPU template will do the following: - removes check for KVM capability: KVM_CAP_XCRS. This allows Firecracker to run on old cpus. See [this](https://github.com/firecracker-microvm/firecracker/discussions/3470) discussion. - in leaf `0x1`, subleaf `0x0`, register `eax`: - clear bits `0b00001111111111000011100100001101` - set bits `0b00000000000000110000011011110010` - leave bits `0b11110000000000001100000000000000` intact. - in MSR `0x10`, it will clear all bits. An example of configuring a custom CPU template on ARM: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/cpu-config' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "kvm_capabilities": ["171", "172"], "vcpu_features": [{ "index": 0, "bitmap": "0b11xxxxx" }] "reg_modifiers": [ { "addr": "0x603000000013c020", "bitmap": "0bxxxxxxxxxxxx0000xxxxxxxxxxxx0000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" } ] }' ``` This CPU template will do the following: - add checks for KVM capabilities: KVM_CAP_ARM_PTRAUTH_ADDRESS and KVM_CAP_ARM_PTRAUTH_GENERIC. These checks are to ensure that the host have capabilities needed for the vCPU features. - enable additional vCPU features: KVM_ARM_VCPU_PTRAUTH_ADDRESS and KVM_ARM_VCPU_PTRAUTH_GENERIC - modify ARM register `0x603000000013c020`: - clear bits `0b0000000000001111000000000000111100000000000000000000000000000000` - leave bits `0b1111111111110000111111111111000011111111111111111111111111111111` intact. Information about KVM capabilities can be found in the [kernel source](https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/kvm.h). Information about vCPU features on aarch64 can be found in the [kernel source](https://elixir.bootlin.com/linux/latest/source/arch/arm64/include/uapi/asm/kvm.h). Information on how the ARM register addresses are constructed can be found in the [KVM API documentation](https://docs.kernel.org/virt/kvm/api.html#kvm-set-one-reg). ### Custom CPU templates language schema The full description of the custom CPU templates language can be found [here](schema.json). > [!NOTE] > > You can also use `_` to visually separate parts of a bitmap. So instead of > writing: `0b0000xxxx`, it can be `0b0000_xxxx`. #### Expansion of contracted bitmaps If a contracted version of a bitmap is given, for example, `0b101` where a 32-bit bitmap is expected, missing characters are implied to be `x` (`0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxx101`). #### CPUID normalization and boot protocol register settings Some of the configuration set by a custom CPU template may be overwritten by Firecracker. More details can be found [here](cpuid-normalization.md) and [here](boot-protocol.md). #### Information about architecture-specific settings For detailed information when working with custom CPU templates, please refer to hardware specifications from CPU vendors, for example: - [Intel Software Developer Manual](https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html) - [AMD Architecture Programmer's Manual](https://www.amd.com/en/support/tech-docs?keyword=programmer%27s+manual) - [ARM Architecture Refernce Manual](https://developer.arm.com/documentation/ddi0487/latest) ## A note about configuration of both static and custom CPU templates If a user configured both a static CPU template (via `/machine-config`) and a custom CPU template (via `/cpu-config`) in the same Firecracker process, only the configuration that was performed the _last_ is applied. This means that if a static CPU template was configured first and a custom CPU template was configured later, only the custom CPU template configuration will be applied when starting a microVM. ================================================ FILE: docs/cpu_templates/cpuid-normalization.md ================================================ # CPUID normalization (x86_64 only) On x86_64, Firecracker makes certain modifications to the guest's CPUID regardless of whether a CPU template is used. This is referred to as `CPUID normalization`. If a CPU template is used the normalization is performed _after_ the CPU template is applied. That means that if the CPU template configures CPUID bits used in the normalization process, they will be overwritten. See also: [boot protocol settings](boot-protocol.md) ## x86_64 common CPUID normalization | Description | Leaf | Subleaf | Register | Bits | | ------------------------------------------------------------------------------------ | :--------: | :-----: | :-----------: | :---: | | Pass through vendor ID from host | 0x0 | - | EBX, ECX, EDX | all | | Set CLFLUSH line size | 0x1 | - | EBX | 15:8 | | Set maximum number of addressable IDs for logical processors in the physical package | 0x1 | - | EBX | 23:16 | | Set initial APIC ID | 0x1 | - | EBX | 31:24 | | Disable PDCM (Perfmon and Debug Capability) | 0x1 | - | ECX | 15 | | Enable TSC_DEADLINE | 0x1 | - | ECX | 24 | | Enable HYPERVISOR | 0x1 | - | ECX | 31 | | Set HTT value if the microVM's CPU count is greater than 1 | 0x1 | - | EDX | 28 | | Insert leaf 0xb, subleaf 0x1 if not present | 0xb | 0x1 | all | all | | Fill extended topology enumeration leaf | 0xb | all | all | all | | Pass through L1 cache and TLB information from host | 0x80000005 | - | all | all | | Pass through L2 cache and TLB and L3 cache information from host | 0x80000006 | - | all | all | ## Intel-specific CPUID normalization | Description | Leaf | Subleaf | Register | Bits | | -------------------------------------------------------------- | :--------------------------------: | :-----: | :------: | :---: | | Update deterministic cache parameters | 0x4 | all | EAX | 31:14 | | Disable Intel Turbo Boost technology | 0x6 | - | EAX | 1 | | Disable frequency selection | 0x6 | - | ECX | 3 | | Set FDP_EXCPTN_ONLY bit | 0x7 | 0x0 | EBX | 6 | | Set "Deprecates FPU CS and FPU DS values" bit | 0x7 | 0x0 | EBX | 13 | | Disable WAITPKG (UMONITOR / UMWAIT / TPAUSE) | 0x7 | 0x0 | ECX | 5 | | Disable performance monitoring | 0xa | - | all | all | | Fill v2 extended topology enumeration leaf | 0x1f | all | all | all | | Update brand string to use a default format and real frequency | 0x80000002, 0x80000003, 0x80000004 | - | all | all | ## AMD-specifc CPUID normalization | Description | Leaf | Subleaf | Register | Bits | | --------------------------------------------- | :--------------------------------: | :-----: | :----------------: | :---: | | Set IA32_ARCH_CAPABILITIES MSR as not present | 0x7 | - | EDX | 29 | | Set topology extension bit | 0x80000001 | - | ECX | 22 | | Update brand string with a default AMD value | 0x80000002, 0x80000003, 0x80000004 | - | EAX, EBX, ECX, EDX | all | | Update number of physical threads | 0x80000008 | - | ECX | 7:0 | | Update APIC ID size | 0x80000008 | - | ECX | 15:12 | | Update cache topology information | 0x8000001d | all | all | all | | Update extended APIC ID | 0x8000001e | - | EAX, EBX, ECX | all | ================================================ FILE: docs/cpu_templates/schema.json ================================================ { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://github.com/firecracker-microvm/firecracker/tree/main/docs/cpu-templates/schema.json", "title": "Custom CPU template", "description": "Custom CPU template language description", "type": "object", "properties": { "kvm_capabilities": { "type": "array", "items": { "description": "Additional KVM capabilities can be added or existing (built-in) capabilities can be removed from the firecracker checks. To add KVM capability to the checklist specify decimal number of the corresponding KVM capability. To remove a KVM capability from the checklist specify decimal number of the corresponding KVM capability with '!' mark in the front. Works on both x86_64 and aarch64.", "type": "string", "examples": ["171", "!172"] } }, "vcpu_features": { "type": "array", "items": { "description": "vCPU features to enable during vCPU initialization. Only for aarch64.", "type": "object", "properties": { "index": { "description": "Index into kvm_vcpu_init::features array. As of Linux kernel 6.4.10, only value 0 is allowed.", "type": "integer" }, "bitmap": { "description": "Bitmap for modifying the 32 bit field in kvm_vcpu_init::features. Must be in the format `0b[01x]{1,32}`. Corresponding bits will be cleared (`0`), set (`1`) or left intact (`x`). (`_`) can be used as a separator.", "type": "string", "examples": ["0b11xxxxx"] } } } }, "cpuid_modifiers": { "type": "array", "items": { "description": "CPUID modifiers. Only for x86_64.", "type": "object", "properties": { "leaf": { "description": "CPUID leaf index (or function). Must be a string containing an integer.", "type": "string", "examples": ["0x1", "0x2"] }, "subleaf": { "description": "CPUID subleaf index (or subfunction). Must be a string containing an integer.", "type": "string", "examples": ["0x1", "0x2"] }, "flags": { "description": "KVM CPUID flags, see https://docs.kernel.org/virt/kvm/api.html#kvm-get-supported-cpuid", "type": "integer" }, "modifiers": { "type": "array", "items": { "description": "CPUID register modifiers.", "type": "object", "properties": { "register": { "description": "CPUID register name.", "type": "string", "enum": ["eax", "ebx", "ecx", "edx"] }, "bitmap": { "description": "CPUID register value bitmap. Must be in format `0b[01x]{32}`. Corresponding bits will be cleared (`0`), set (`1`) or left intact (`x`). (`_`) can be used as a separator.", "type": "string", "examples": ["0bxxxx000000000011xx00011011110010", "0bxxxxxxxxxxxxx0xx00xx00x0_0000_00xx"] } } } } } } }, "msr_modifiers": { "type": "array", "items": { "description": "MSR modifiers. Only for x86_64.", "type": "object", "properties": { "addr": { "description": "MSR address/identifier. Must be a string containing an integer.", "type": "string", "examples": ["0x10a"] }, "bitmap": { "description": "MSR value bitmap. Must be in format `0b[01x]{64}`. Corresponding bits will be cleared (`0`), set (`1`) or left intact (`x`). (`_`) can be used as a separator.", "type": "string", "examples": ["0bxxxx0000000000000000000000000000000000000000000000000000_11101011"] } } } }, "reg_modifiers": { "type": "array", "items": { "description": "ARM register modifiers. Only for aarch64.", "type": "object", "properties": { "addr": { "description": "ARM register address/identifier. Must be a string containing an integer. See https://docs.kernel.org/virt/kvm/api.html#kvm-set-one-reg .", "type": "string", "examples": ["0x603000000013c020"] }, "bitmap": { "description": "ARM register value bitmap. Must be in format `0b[01x]{1,128}`. The actual length of the bitmap should be less or equal to the size of the register in bits. Corresponding bits will be cleared (`0`), set (`1`) or left intact (`x`). (`_`) can be used as a separator.", "type": "string", "examples": ["0bxxxxxxxxxxxx_0000_xxxx_xxxx_xxxx_0000_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx"] } } } } } } ================================================ FILE: docs/design.md ================================================ # Firecracker Design ## Scope ### What is Firecracker Firecracker is a new virtualization technology that enables customers to deploy lightweight *micro* Virtual Machines or microVMs. Firecracker microVMs combine the security and workload isolation properties of traditional VMs with the speed, agility and resource efficiency enabled by containers. They provide a secure, trusted environment for multi-tenant services, while maintaining minimal overhead. The scope of this document is to describe the features and architecture of the Firecracker virtual machine manager (VMM). ### Features 1. Firecracker can safely run workloads from different customers on the same machine. 1. Customers can create microVMs with any combination of vCPU (up to 32) and memory to match their application requirements. 1. Firecracker microVMs can oversubscribe host CPU and memory. The degree of oversubscription is controlled by customers, who may factor in workload correlation and load in order to ensure smooth host system operation. 1. With a microVM configured with a minimal Linux kernel, single-core CPU, and 128 MiB of RAM, Firecracker supports a steady mutation rate of 5 microVMs per host core per second (e.g., one can create 180 microVMs per second on a host with 36 physical cores). 1. The number of Firecracker microVMs running simultaneously on a host is limited only by the availability of hardware resources. 1. Each microVM exposes a host-facing API via an in-process HTTP server. 1. Each microVM provides guest-facing access to host-configured metadata via the `/mmds` API. ### Specifications Firecracker's technical specifications are available in the [Specifications document](../SPECIFICATION.md). ## Host Integration The following diagram depicts an example host running Firecracker microVMs. ![Firecracker Host Integration](images/firecracker_host_integration.png?raw=true "Firecracker Host Integration") Firecracker runs on Linux hosts and with Linux guest OSs (from this point on, referred to as guests). For a complete list of currently supported kernel versions, check out the [kernel support policy](kernel-policy.md). In production environments, Firecracker should be started only via the `jailer` binary. See [Sandboxing](#Sandboxing) for more details. After launching the process, users interact with the Firecracker API to configure the microVM, before issuing the `InstanceStart` command. ### Host Networking Integration Firecracker emulated network devices are backed by TAP devices on the host. To make use of Firecracker, we expect our customers to leverage on-host networking solutions. ### Storage Firecracker emulated block devices are backed by files on the host. To be able to mount block devices in the guest, the backing files need to be pre-formatted with a filesystem that the guest kernel supports. ## Internal Architecture Each Firecracker process encapsulates one and only one microVM. The process runs the following threads: API, VMM and vCPU(s). The API thread is responsible for Firecracker's API server and associated control plane. It's never in the fast path of the virtual machine. The VMM thread exposes the machine model, minimal legacy device model, microVM metadata service (MMDS) and VirtIO device emulated Net, Block and Vsock devices, complete with I/O rate limiting. In addition to them, there are one or more vCPU threads (one per guest CPU core). They are created via KVM and run the `KVM_RUN` main loop. They execute synchronous I/O and memory-mapped I/O operations on devices models. ### Threat Containment From a security perspective, all vCPU threads are considered to be running malicious code as soon as they have been started; these malicious threads need to be contained. Containment is achieved by nesting several trust zones which increment from least trusted or least safe (guest vCPU threads) to most trusted or safest (host). These trusted zones are separated by barriers that enforce aspects of Firecracker security. For example, all outbound network traffic data is copied by the Firecracker I/O thread from the emulated network interface to the backing host TAP device, and I/O rate limiting is applied at this point. These barriers are marked in the diagram below. ![Firecracker Threat Containment](images/firecracker_threat_containment.png?raw=true "Firecracker Threat Containment") ## Components and Features ### Machine Model #### Layout Firecracker provides guests with storage and network access via emulated VirtIO Net and VirtIO Block devices. It also exposes a serial console and partial keyboard controller, the latter being used by guests to reset the VM (either soft or hard reset). Within Firecracker, the purpose of the I8042 device is to signal the microVM that the guest has requested a reboot. In addition to the Firecracker provided device models, guests also see the Programmable Interrupt Controllers (PICs), the I/O Advanced Programmable Interrupt Controller (IOAPIC), and the Programmable Interval Timer (PIT) that KVM supports. #### Exposing the CPU to the guest Firecracker allows control of what processor information is exposed to the guest by using [CPU templates](cpu_templates/cpu-templates.md). CPU templates can be set via the Firecracker API. Users can choose from existing static CPU templates and/or creating their own custom CPU templates. #### Clocksources available to guests Firecracker only exposes kvm-clock to customers. ### I/O: Storage, Networking and Rate Limiting Firecracker provides VirtIO/block and VirtIO/net emulated devices, along with the application of rate limiters to each volume and network interface to make sure host hardware resources are used fairly by multiple microVMs. These are implemented using a token bucket algorithm based on two buckets. One is associated with the number of operations per second and the other one with the bandwidth. The customer can create and configure rate limiters via the API by specifying token bucket configurations for ingress and egress. Each token bucket is defined via the bucket size, I/O cost, refill rate, maximum burst, and initial value. This enables the customer to define flexible rate limiters that support bursts or specific bandwidth/operations limitations. For vhost-user devices, customers should implement rate limiting on the side of the vhost-user backend that they provide. ### MicroVM Metadata Service Firecracker microVMs expose access to a minimal MicroVM-Metadata Service (MMDS) to the guest through the API endpoint. The metadata stored by the service is fully configured by users. ### Sandboxing #### __Firecracker process__ Firecracker is designed to assure secure isolation using multiple layers. The first layer of isolation is provided by the Linux KVM and the Firecracker virtualization boundary. To assure defense in depth, Firecracker should only run constrained at the process level. This is achieved by the following: seccomp filters for disallowing unwanted system calls, cgroups and namespaces for resource isolation, and dropping privileges by jailing the process. Seccomp filters are automatically installed by Firecracker, while for the latter, we recommend starting Firecracker with the `jailer` binary that's part of each Firecracker release. ##### Seccomp Seccomp filters are used by default to limit the host system calls Firecracker can use. The default filters only allow the bare minimum set of system calls and parameters that Firecracker needs in order to function correctly. The filters are loaded in the Firecracker process, on a per-thread basis, before executing any guest code. For more information, check out the [seccomp documentation](seccomp.md). #### __Jailer process__ The Firecracker process can be started by another `jailer` process. The jailer sets up system resources that require elevated permissions (e.g., cgroup, chroot), drops privileges, and then exec()s into the Firecracker binary, which then runs as an unprivileged process. Past this point, Firecracker can only access resources that a privileged third-party grants access to (e.g., by copying a file into the chroot, or passing a file descriptor). ##### Cgroups and Quotas Each Firecracker microVM can be further encapsulated into a cgroup. By setting the affinity of the Firecracker microVM to a node via the cpuset subsystem, one can prevent the migration of said microVM from one node to another, something that would impair performance and cause unnecessary contention on shared resources. In addition to setting the affinity, each Firecracker microVM can have its own dedicated quota of the CPU time via the cpu subsystem, thus guaranteeing that resources are fairly shared across Firecracker microVMs. ### Monitoring Firecracker emits logs and metric counters, each on a named pipe that is passed via the API. Logs are flushed line by line, whereas metrics are emitted when the instance starts, then every 60 seconds while it's running, and on panic. Firecracker customers are responsible for collecting data in the Firecracker log files. In production builds, Firecracker does not expose the serial console port, since it may contain guest data that the host should not see. ================================================ FILE: docs/dev-machine-setup.md ================================================ # Setting up a Development Environment for Firecracker Firecracker uses KVM for the actual resource virtualization, hence setting up a development environment requires either a bare-metal machine (with hardware virtualization), or a virtual machine that supports nested virtualization. The different options are outlined below. Once the environment is set up, one can continue with the specific steps of setting up Firecracker (e.g., as outlined in the [Getting Started](getting-started.md) instructions). ## Local ### Local Bare-Metal Machine `[TODO]` ### Local Virtual Machine #### macOS with VMware Fusion Note that Firecracker development on macOS has no hard dependency on VMware Fusion or Ubuntu. All that is required is a Linux VM that supports nested virtualization. This is but one example of that setup: 1. Download and install [VMware Fusion](https://www.vmware.com/products/fusion/fusion-evaluation.html). 1. Download an [Ubuntu 18.04.2 LTS](https://www.ubuntu.com/download/desktop) ISO image. 1. Open VMware Fusion, open the **File** menu, and select **New...** to bring up the **Select the Installation Method** window. 1. Find the ISO image you downloaded in step 2, and drag it onto the VMware window opened in step 3. 1. You should now be at the **Create a New Virtual Machine** window. Ensure the Ubuntu 18.04.2 image is highlighted, and click **Continue**. 1. On the **Linux Easy Install** window, leave the **Use Easy Install** option checked, enter a password, and click **Continue**. 1. On the **Finish** window, click **Finish**, and save the `.vmwarevm` file if prompted. 1. After the VM starts up, open the **Virtual Machine** menu, and select **Shut Down**. 1. After the VM shuts down, open the **Virtual Machine** menu, and select **Settings...**. 1. From the settings window, select **Processors & Memory**, and then unfurl the **Advanced options** section. 1. Check the **Enable hypervisor applications in this virtual machine** option, close the settings window, open the **Virtual Machine** menu, and select **Start Up**. 1. Network adapter setting for the VM must use auto-detect bridged networking. Go to Virtual Machine, Settings, Network Adapter, select Autodetect under Bridged Networking. 1. If you receive a **Cannot connect the virtual device sata0:1 because no corresponding device is available on the host.** error, you can respond **No** to the prompt. 1. Once the VM starts up, log in as the user you created in step 6. 1. After logging in, open the **Terminal** app, and run `sudo apt install curl -y` to install cURL. 1. Now you can continue with the Firecracker [Getting Started](getting-started.md) instructions to install and configure Firecracker in the new VM. ## Cloud ### AWS Firecracker development environment on AWS can be setup using bare metal instances. Follow these steps to create a bare metal instance. 1. If you don't already have an AWS account, create one using the [AWS Portal](https://portal.aws.amazon.com/billing/signup). 1. Login to [AWS console](https://console.aws.amazon.com/console/home). You must select a region that offers bare metal EC2 instances. To check which regions support bare-metal, visit [Amazon EC2 On-Demand Pricing](https://aws.amazon.com/ec2/pricing/on-demand/) and look for `*.metal` instance types. 1. Click on `Launch a virtual machine` in `Build Solution` section. 1. Firecracker requires a relatively new kernel, so you should use a recent Linux distribution - such as `Ubuntu Server 22.04 LTS (HVM), SSD Volume Type`. 1. In `Step 2`, scroll to the bottom and select `c5.metal` instance type. Click on `Next: Configure Instance Details`. 1. In `Step 3`, click on `Next: Add Storage`. 1. In `Step 4`, click on `Next: Add Tags`. 1. In `Step 5`, click on `Next: Configure Security Group`. 1. In `Step 6`, take the default security group. This opens up port 22 and is needed so that you can ssh into the machine later. Click on `Review and Launch`. 1. Verify the details and click on `Launch`. If you do not have an existing key pair, then you can select `Create a new key pair` to create a key pair. This is needed so that you can use it later to ssh into the machine. 1. Click on the instance id in the green box. Copy `Public DNS` from the `Description` tab of the selected instance. 1. Login to the newly created instance: ```console ssh -i ubuntu@ ``` Now you can continue with the Firecracker [Getting Started](getting-started.md) instructions to use Firecracker to create a microVM. ### GCP One of the options to set up Firecracker for development purposes is to use a VM on Google Compute Engine (GCE), which supports nested virtualization and allows to run KVM. If you don't have a Google Cloud Platform (GCP) account, you can find brief instructions in the Addendum [below](#addendum). Here is a brief summary of steps to create such a setup (full instructions to set up a Ubuntu-based VM on GCE with nested KVM enablement can be found in GCE [documentation](https://cloud.google.com/compute/docs/instances/enable-nested-virtualization-vm-instances)). 1. Select a GCP project and zone ```console $ FC_PROJECT=your_name-firecracker $ FC_REGION=us-east1 $ FC_ZONE=us-east1-b ```
Click here for instructions to create a new project

It might be convenient to keep your Firecracker-related GCP resources in a separate project, so that you can keep track of resources more easily and remove everything easily once your are done. For convenience, give the project a unique name (e.g., your_name-firecracker), so that GCP does not need to create a project id different than project name (by appending randomized numbers to the name you provide). ```console $ gcloud projects create ${FC_PROJECT} --enable-cloud-apis --set-as-default ```

```console $ gcloud config set project ${FC_PROJECT} $ gcloud config set compute/region ${FC_REGION} $ gcloud config set compute/zone ${FC_ZONE} ``` 1. The next step is to create a VM image able to run nested KVM (as outlined [here](https://cloud.google.com/compute/docs/instances/nested-virtualization/enabling)). 1. Now we create the VM: Keep in mind that you will need an instance type that supports nested virtualization. `E2` and `N2D` instances will not work. If you want to use a `N1` instance (default in some regions), make sure it uses at least a processor of the `Haswell` architecture by specifying `--min-cpu-platform="Intel Haswell"` when you create the instance. Alternatively, use `N2` instances (such as with `--machine-type="n2-standard-2"`). ```console $ FC_VM=firecracker-vm $ gcloud compute instances create ${FC_VM} --enable-nested-virtualization \ --zone=${FC_ZONE} --min-cpu-platform="Intel Haswell" \ --machine-type=n1-standard-2 ``` 1. Connect to the VM via SSH. ```console $ gcloud compute ssh ${FC_VM} ``` When doing it for the first time, a key-pair will be created for you (you will be propmpted for a passphrase - can just keep it empty) and uploaded to GCE. Done! You should see the prompt of the new VM: ```console [YOUR_USER_NAME]@firecracker-vm:~$ ``` 1. Verify that VMX is enabled, enable KVM ```console $ grep -cw vmx /proc/cpuinfo 1 $ apt-get update $ apt-get install acl $ sudo setfacl -m u:${USER}:rw /dev/kvm $ [ -r /dev/kvm ] && [ -w /dev/kvm ] && echo "OK" || echo "FAIL" OK ``` Depending on your machine you will get a different number, but anything except 0 means `KVM` is enabled. Now you can continue with the Firecracker [Getting Started](getting-started.md) instructions to install and configure Firecracker in the new VM. #### Addendum ##### Setting up a Google Cloud Platform account In a nutshell, setting up a GCP account involves the following steps: 1. Log in to GCP [console](https://console.cloud.google.com/) with your Google credentials. If you don't have account, you will be prompted to join the trial. 1. Install GCP CLI & SDK (full instructions can be found [here](https://cloud.google.com/sdk/docs/quickstart-debian-ubuntu)). ```console $ export CLOUD_SDK_REPO="cloud-sdk-$(lsb_release -c -s)" $ echo "deb http://packages.cloud.google.com/apt $CLOUD_SDK_REPO main" \ | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list $ curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ | sudo apt-key add - $ sudo apt-get update && sudo apt-get install -y google-cloud-sdk ``` 1. Configure the `gcloud` CLI by running: ```console $ gcloud init --console-only ``` Follow the prompts to authenticate (open the provided link, authenticate, copy the token back to console) and select the default project. ### Microsoft Azure `[TODO]` ================================================ FILE: docs/device-api.md ================================================ # Device The Device-API following functionality matrix indicates which devices are required for an API call to be usable. **O** - Optional: The device (column) **is not required** for a Firecracker microVM API call to succeed. If the device (column) is omitted from a uVM definition, a call to one of the [API Endpoints](#api-endpoints) will succeed. **R** - Required: The device (column) **is required** for a Firecracker microVM API call to succeed. If the device (column) is omitted from a uVM definition, a call to one of the [API Endpoints](#api-endpoints) will fail with a 400 - BadRequest - HTTP response. ## API Endpoints | Endpoint | keyboard | serial console | virtio-block | vhost-user-block | virtio-net | virtio-vsock | virtio-rng | virtio-pmem | virtio-mem | | ------------------------- | :------: | :------------: | :----------: | :--------------: | :--------: | :----------: | :--------: | :---------: | :--------: | | `boot-source` | O | O | O | O | O | O | O | O | O | | `cpu-config` | O | O | O | O | O | O | O | O | O | | `drives/{id}` | O | O | **R** | **R** | O | O | O | O | O | | `hotplug/memory` | O | O | O | O | O | O | O | O | **R** | | `logger` | O | O | O | O | O | O | O | O | O | | `machine-config` | O | O | O | O | O | O | O | O | O | | `metrics` | O | O | O | O | O | O | O | O | O | | `mmds` | O | O | O | O | **R** | O | O | O | O | | `mmds/config` | O | O | O | O | **R** | O | O | O | O | | `network-interfaces/{id}` | O | O | O | O | **R** | O | O | O | O | | `snapshot/create` | O | O | O | O | O | O | O | O | O | | `snapshot/load` | O | O | O | O | O | O | O | O | O | | `vm` | O | O | O | O | O | O | O | O | O | | `vsock` | O | O | O | O | O | O | O | O | O | | `entropy` | O | O | O | O | O | O | **R** | O | O | | `pmem/{id}` | O | O | O | O | O | O | O | **R** | O | ## Input Schema All input schema fields can be found in the [Swagger](https://swagger.io) specification: [firecracker.yaml](./../src/firecracker/swagger/firecracker.yaml). | Schema | Property | keyboard | serial console | virtio-block | vhost-user-block | virtio-net | virtio-vsock | virtio-rng | virtio-pmem | virtio-mem | | ------------------------- | ------------------ | :------: | :------------: | :----------: | :--------------: | :--------: | :----------: | :--------: | :---------: | :--------: | | `BootSource` | boot_args | O | O | O | O | O | O | O | O | O | | | initrd_path | O | O | O | O | O | O | O | O | O | | | kernel_image_path | O | O | O | O | O | O | O | O | O | | `CpuConfig` | cpuid_modifiers | O | O | O | O | O | O | O | O | O | | | msr_modifiers | O | O | O | O | O | O | O | O | O | | | reg_modifiers | O | O | O | O | O | O | O | O | O | | `CpuTemplate` | enum | O | O | O | O | O | O | O | O | O | | `CreateSnapshotParams` | mem_file_path | O | O | O | O | O | O | O | O | O | | | snapshot_path | O | O | O | O | O | O | O | O | O | | | snapshot_type | O | O | O | O | O | O | O | O | O | | | version | O | O | O | O | O | O | O | O | O | | `Drive` | drive_id \* | O | O | **R** | **R** | O | O | O | O | O | | | is_read_only | O | O | **R** | O | O | O | O | O | O | | | is_root_device \* | O | O | **R** | **R** | O | O | O | O | O | | | partuuid \* | O | O | **R** | **R** | O | O | O | O | O | | | path_on_host | O | O | **R** | O | O | O | O | O | O | | | rate_limiter | O | O | **R** | O | O | O | O | O | O | | | socket | O | O | O | **R** | O | O | O | O | O | | `InstanceActionInfo` | action_type | O | O | O | O | O | O | O | O | O | | `LoadSnapshotParams` | track_dirty_pages | O | O | O | O | O | O | O | O | O | | | mem_file_path | O | O | O | O | O | O | O | O | O | | | mem_backend | O | O | O | O | O | O | O | O | O | | | snapshot_path | O | O | O | O | O | O | O | O | O | | | resume_vm | O | O | O | O | O | O | O | O | O | | `Logger` | level | O | O | O | O | O | O | O | O | O | | | log_path | O | O | O | O | O | O | O | O | O | | | show_level | O | O | O | O | O | O | O | O | O | | | show_log_origin | O | O | O | O | O | O | O | O | O | | `MachineConfiguration` | cpu_template | O | O | O | O | O | O | O | O | O | | | smt | O | O | O | O | O | O | O | O | O | | | mem_size_mib | O | O | O | O | O | O | O | O | O | | | track_dirty_pages | O | O | O | O | O | O | O | O | O | | | vcpu_count | O | O | O | O | O | O | O | O | O | | `Metrics` | metrics_path | O | O | O | O | O | O | O | O | O | | `MmdsConfig` | network_interfaces | O | O | O | O | **R** | O | O | O | O | | | version | O | O | O | O | **R** | O | O | O | O | | | ipv4_address | O | O | O | O | **R** | O | O | O | O | | | imds_compat | O | O | O | O | O | O | O | O | O | | `NetworkInterface` | guest_mac | O | O | O | O | **R** | O | O | O | O | | | host_dev_name | O | O | O | O | **R** | O | O | O | O | | | iface_id | O | O | O | O | **R** | O | O | O | O | | | rx_rate_limiter | O | O | O | O | **R** | O | O | O | O | | | tx_rate_limiter | O | O | O | O | **R** | O | O | O | O | | `PartialDrive` | drive_id | O | O | **R** | O | O | O | O | O | O | | | path_on_host | O | O | **R** | O | O | O | O | O | O | | `PartialNetworkInterface` | iface_id | O | O | O | O | **R** | O | O | O | O | | | rx_rate_limiter | O | O | O | O | **R** | O | O | O | O | | | tx_rate_limiter | O | O | O | O | **R** | O | O | O | O | | `RateLimiter` | bandwidth | O | O | O | O | **R** | O | O | O | O | | | ops | O | O | **R** | O | O | O | O | O | O | | `TokenBucket` \*\* | one_time_burst | O | O | **R** | O | O | O | O | O | O | | | refill_time | O | O | **R** | O | O | O | O | O | O | | | size | O | O | **R** | O | O | O | O | O | O | | `TokenBucket` \*\* | one_time_burst | O | O | O | O | **R** | O | O | O | O | | | refill_time | O | O | O | O | **R** | O | O | O | O | | | size | O | O | O | O | **R** | O | O | O | O | | `Vm` | state | O | O | O | O | O | O | O | O | O | | `Vsock` | guest_cid | O | O | O | O | O | **R** | O | O | O | | | uds_path | O | O | O | O | O | **R** | O | O | O | | | vsock_id | O | O | O | O | O | **R** | O | O | O | | `EntropyDevice` | rate_limiter | O | O | O | O | O | O | **R** | O | O | | `Pmem` | id | O | O | O | O | O | O | O | **R** | O | | | path_on_host | O | O | O | O | O | O | O | **R** | O | | | root_device | O | O | O | O | O | O | O | **R** | O | | | read_only | O | O | O | O | O | O | O | **R** | O | | `MemoryHotplugConfig` | total_size_mib | O | O | O | O | O | O | O | O | **R** | | | slot_size_mib | O | O | O | O | O | O | O | O | **R** | | | block_size_mi | O | O | O | O | O | O | O | O | **R** | | `MemoryHotplugSizeUpdate` | requested_size_mib | O | O | O | O | O | O | O | O | **R** | \* `Drive`'s `drive_id`, `is_root_device` and `partuuid` can be configured by either virtio-block or vhost-user-block devices. \*\* The `TokenBucket` can be configured with any combination of virtio-net, virtio-block and virtio-rng devices. ## Output Schema All output schema fields can be found in the [Swagger](https://swagger.io) specification: [firecracker.yaml](./../src/firecracker/swagger/firecracker.yaml). | Schema | Property | keyboard | serial console | virtio-block | vhost-user-block | virtio-net | virtio-vsock | virtio-mem | | ---------------------- | ------------------ | :------: | :------------: | :----------: | :--------------: | :--------: | :----------: | :--------: | | `Error` | fault_message | O | O | O | O | O | O | O | | `InstanceInfo` | app_name | O | O | O | O | O | O | O | | | id | O | O | O | O | O | O | O | | | state | O | O | O | O | O | O | O | | | vmm_version | O | O | O | O | O | O | O | | `MachineConfiguration` | cpu_template | O | O | O | O | O | O | O | | | smt | O | O | O | O | O | O | O | | | mem_size_mib | O | O | O | O | O | O | O | | | track_dirty_pages | O | O | O | O | O | O | O | | | vcpu_count | O | O | O | O | O | O | O | | | vmm_version | O | O | O | O | O | O | O | | `MemoryHotplugStatus ` | total_size_mib | O | O | O | O | O | O | **R** | | | slot_size_mib | O | O | O | O | O | O | **R** | | | block_size_mib | O | O | O | O | O | O | **R** | | | plugged_size_mib | O | O | O | O | O | O | **R** | | | requested_size_mib | O | O | O | O | O | O | **R** | ## Instance Actions All instance actions can be found in the [Swagger](https://swagger.io) specification: [firecracker.yaml](./../src/firecracker/swagger/firecracker.yaml). | Action | keyboard | serial console | virtio-block | vhost-user-block | virtio-net | virtio-vsock | | ---------------- | :------: | :------------: | :----------: | :--------------: | :--------: | :----------: | | `FlushMetrics` | O | O | O | O | O | O | | `InstanceStart` | O | O | O | O | O | O | | `SendCtrlAltDel` | **R** | O | O | O | O | O | ================================================ FILE: docs/entropy.md ================================================ # Using the Firecracker entropy device ## What is the entropy device An entropy device is a [`virtio-rng` device][1] that provides guests with "high-quality randomness for guest use". Guests issue requests in the form of a buffer that will be filled with random bytes from the device. The source of random bytes that the device will use to fill the buffers is an implementation decision. On the guest side, the kernel uses random bytes received through the device as an extra source of entropy. Moreover, the guest VirtIO driver exposes the `/dev/hwrng` character device. User-space applications can use this device to request random bytes from the device. ## Firecracker implementation Firecracker offers the option of attaching a single `virtio-rng` device. Users can configure it through the `/entropy` API endpoint. The request body includes a single (optional) parameter for configuring a rate limiter. For example, users can configure the entropy device with a bandwidth rate limiter of 10KB/sec like this: ```console curl --unix-socket $socket_location -i \ -X PUT 'http://localhost/entropy' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"rate_limiter\": { \"bandwidth\": { \"size\": 1000, \"one_time_burst\": 0, \"refill_time\": 100 } } }" ``` If a configuration file is used for configuring a microVM, the same setup can be achieved by adding a section like this: ```json "entropy": { "rate_limiter": { "bandwidth" { "size": 1000, "one_time_burst": 0, "refill_time": 100 } } } ``` On the host side, Firecracker relies on [`aws-lc-rs`][2] to retrieve the random bytes. `aws-lc-rs` uses the [`AWS-LC` cryptographic library][3]. ## Prerequisites In order to use the entropy device, users must use a kernel with the `virtio-rng` front-end driver compiled in or loaded as a module. The relevant kernel configuration option is `CONFIG_HW_RANDOM_VIRTIO` (which depends on `CONFIG_HW_RANDOM` and `CONFIG_VIRTIO`). [1]: https://docs.oasis-open.org/virtio/virtio/v1.2/cs01/virtio-v1.2-cs01.html#x1-3050004 [2]: https://docs.rs/aws-lc-rs/latest/aws_lc_rs/index.html [3]: https://github.com/aws/aws-lc ================================================ FILE: docs/formal-verification.md ================================================ # Formal Verification in Firecracker According to Firecracker’s [threat model](https://github.com/firecracker-microvm/firecracker/blob/main/docs/design.md#threat-containment), all vCPUs are considered to be running potentially malicious code from the moment they are started. This means Firecracker can make no assumptions about well-formedness of data passed to it by the guest, and have to operate *safely* no matter what input it is faced with. Traditional testing methods alone cannot guarantee about the general absence of safety issues, as for this we would need to write and run every possible unit test, exercising every possible code path - a prohibitively large task. To partially address these limitations, Firecracker is additionally using formal verification to go further in verifying that safety issues such as buffer overruns, panics, use-after-frees or integer overflows cannot occur in critical components. We employ [Kani](https://github.com/model-checking/kani/), a formal verification tool written specifically for Rust, which allows us to express functional properties (such as any user-specified assertion) in familiar Rust-style by replacing concrete values in unit tests with `kani::any()`. For more details on how Kani works, and what properties it can verify, check out its official [Kani book](https://model-checking.github.io/kani/) or try out this [tutorial](https://model-checking.github.io/kani/kani-tutorial.html). We aim to have Kani harnesses for components that directly interact with data from the guest, such as the TCP/IP stack powering our microVM Metadata Service (MMDS) integration, or which are difficult to test traditionally, such as our I/O Rate Limiter. Our Kani harnesses live in `verification` modules that are tagged with `#[cfg(kani)]`, similar to how unit tests in Rust are usually structured. Note that for some harnesses, Kani uses a “bounded” approach, where the inputs are restricted based on some assumptions (e.g. the size of an Ethernet frame being 1514 bytes). **Harnesses are only as strong as the assumptions they make, so all guarantees from the harness are only valid based on the set of assumptions we have in our Kani harnesses.** Generally, they should strive to *over-approximate*, meaning it is preferred they cover some “impossible” situations instead of making too strong assumptions that cause them to exclude realistic scenarios. ## How to run Kani harnesses To ensure that no incoming code changes cause regressions on formally verified properties, **all Kani harnesses are run on every pull request in our CI.** To check whether the harnesses all work for your pull request, check out the “Kani” [Buildkite](https://buildkite.com/) step. To run our harnesses locally, you can either enter our CI docker container via `./tools/devtool shell -p`, or by [installing Kani](https://model-checking.github.io/kani/install-guide.html#installing-the-latest-version) locally. Note that the first invocation of Kani post-installation might take a while, due to it setting up some dependencies. Individual harnesses can then be executed using `cargo kani` similarly to how `cargo test` can run individual unit tests, the only difference being that the harness needs to be specified via `--harness`. Note, however, that many harnesses require significant memory, and might result in OOM conditions. ## An example harness The following is adapted from our Rate Limiter harness suite. It aims to verify that creation of a rate-limiting policy upholds all [Kani supported safety invariants](https://model-checking.github.io/kani/tutorial-kinds-of-failure.html) (which can roughly be summarized as “everything that leads to a panic in a debug build”), as well as results in a valid policy. A first attempt might look something like this: ``` #[kani::proof] fn verify_token_bucket_new() { let token_budget = kani::any(); let complete_refill_time_ms = kani::any(); // Checks if the `TokenBucket` is created with invalid inputs, the result // is always `None`. match TokenBucket::new(token_budget, 0, complete_refill_time_ms) { None => assert!(size == 0 || complete_refill_time_ms == 0), Some(bucket) => assert!(bucket.is_valid()), } } ``` The `#[kani::proof]` attribute tells us that the function is a harness to be picked up by the Kani compiler. It is the Kani equivalent of `#[test]`. Lines 3-5 indicate that we want to verify that policy creation works for arbitrarily sized token buckets and arbitrary refill times. **This is the key difference to a unit test**, where we would be using concrete values instead (e.g. `let token_budget = 10;`). Note that Kani will not produce an executable, but instead *statically* verifies that code does not violate invariants. We do not actually execute the creation code for all possible inputs. The final match statement tells us the property we want to verify, which is “*bucket creation only fails if size of refill time are zero*”. In all other cases, we assert `new` to give us a valid bucket. We mapped these properties with assertions. If the verification fails, then that is because one of our properties do not hold. Now that we understand the code in the harness, let's try to verify `TokenBucket::new` with the Kani! If we run `cargo kani --harness verify_token_bucket_new` we will be greeted by ``` SUMMARY: ** 1 of 147 failed Failed Checks: attempt to multiply with overflow File: "src/rate_limiter/src/lib.rs", line 136, in TokenBucket::new VERIFICATION:- FAILED Verification Time: 0.21081695s ``` In this particular case, Kani has found a safety issue related to an integer overflow! Due to `complete_refill_time_ms` getting converted from milliseconds to nanoseconds in the constructor, we have to take into consideration that the nanosecond value might not fit into a `u64` anymore. Here, the finding is benign, as no one would reasonably configure a `ratelimiter` with a replenish time of 599730287.457 *years*. A [quick check](https://github.com/firecracker-microvm/firecracker/commit/0db2a130ca4eeffeca9a46e7b6bd45c1bc1c9e21) in the constructor fixes it. However, we will also have to adjust our harness! Rerunning the harness from above now yields: ``` SUMMARY: ** 1 of 149 failed Failed Checks: assertion failed: size == 0 || complete_refill_time_ms == 0 File: "src/rate_limiter/src/lib.rs", line 734, in verification::verify_token_bucket_new VERIFICATION:- FAILED Verification Time: 0.21587047s ``` This makes sense: There are now more scenarios in which we explicitly fail construction. Changing our failure property from `size == 0 || complete_refill_time_ms == 0` to `size == 0 || complete_refill_time_ms == 0 || complete_refill_time >= u64::MAX / 1_000_000` in the harness will account for this change, and rerunning the harness will now tell us that no more issues are found: ``` SUMMARY: ** 0 of 150 failed VERIFICATION:- SUCCESSFUL Verification Time: 0.19135727s ``` ## FAQ **Q:** What is the Kani verifier?\ **A:** The [Kani Rust Verifier](https://github.com/model-checking/kani) is a bit-precise model checker for Rust. Kani is particularly useful for verifying unsafe code blocks in Rust, where the “[unsafe superpowers](https://doc.rust-lang.org/stable/book/ch19-01-unsafe-rust.html#unsafe-superpowers)" are unchecked by the compiler. **Q:** What safety properties does Kani verify?\ **A:** Kani verifies memory safety properties (e.g., invalid-pointer dereferences, out-of-bounds array access), user-specified assertions (i.e., `assert!(...)`), the absence of `panic!()`s (e.g., `unwrap()` on `None` values), and the absence of some types of unexpected behavior (e.g., arithmetic overflows). For a full overview, see the [Kani documentation](https://model-checking.github.io/kani/tutorial-kinds-of-failure.html). **Q:** Do we expect all contributors to write harnesses for newly introduced code?\ **A:** No. Kani is complementary to unit testing, and we do not have target for “proof coverage”. We employ formal verification in especially critical code areas. Generally we do not expect someone who might not be familiar with formal tools to contribute harnesses. We do expect all contributed code to pass verification though, just like we expect it to pass unit test! **Q:** How should I report issues related to any Firecracker harnesses?\ **A:** Our Kani harnesses verify safety critical invariants. If you discover a flaw in a harness, please report it using the [security issue disclosure process](https://github.com/firecracker-microvm/firecracker/blob/main/SECURITY.md). **Q:** How do I know which properties I should prove in the Kani harness?\ **A:** Generally, these are given by some sort of specification. This can either be the function contract described in its document (e.g. what relation between input and output do callers expect?), or even something formal such as the TCP/IP standard. Don't forget to mention the specification in your proof harness! **Q:** Where do I debug a broken proof?\ **A:** Check out the Kani book section on [debugging verification failures](https://model-checking.github.io/kani/debugging-verification-failures.html). ================================================ FILE: docs/gdb-debugging.md ================================================ # GDB Debugging with Firecracker **The GDB feature is not for production use.** Firecracker supports debugging the guest kernel via GDB remote serial protocol. This allows us to connect GDB to the firecracker process and step through debug the guest kernel. ## Prerequisites Firstly, to enable GDB debugging we need to compile Firecracker with the `gdb` feature enabled, this will enable the necessary components for the debugging process. To build firecracker with the `gdb` feature enabled we run: ```bash cargo build --features "gdb" ``` Secondly, we need to compile a kernel with specific features enabled for debugging to work. The key config options to enable are: ``` CONFIG_FRAME_POINTER=y CONFIG_DEBUG_INFO=y ``` For GDB debugging the `gdb_socket_path` option under `machine-config` should be set. When using the API the socket address must be set before instance start. In this example we set the address to `/tmp/gdb.socket` in the config file: ``` { ... "machine-config": { ... "gdb_socket_path": "/tmp/gdb.socket" ... } ... } ``` Using the API the socket address can be configured before boot like so: ``` sudo curl -X PATCH --unix-socket "${API_SOCKET}" \ --data "{ \"gdb_socket_path\": \"/tmp/gdb.socket\" }" "http://localhost/machine-config" ``` ## Starting Firecracker with GDB With all the prerequisites in place you can now start firecracker ready to connect to GDB. When you start the firecracker binary now you'll notice it'll be blocked waiting for the GDB connection. This is done to allow us to set breakpoints before the boot process begins. With Firecracker running and waiting for GDB we are now able to start GDB and connect to Firecracker. You may need to set the permissions of your GDB socket E.g. `/tmp/gdb.socket` to `0666` before connecting. An example of the steps taken to start GDB, load the symbols and connect to Firecracker: 1. Start the GDB process, you can attach the symbols by appending the kernel blob, for example here `vmlinux` ```bash gdb vmlinux ``` 1. When GDB has started set the target remote to `/tmp/gdb.socket` to connect to Firecracker ```bash (gdb) target remote /tmp/gdb.socket ``` With these steps completed you'll now see GDB has stopped at the entry point ready for us to start inserting breakpoints and debugging. ## Notes ### Software Breakpoints not working on start When at the initial paused state you'll notice software breakpoints won't work and only hardware breakpoints will until memory virtualisation is enabled. To circumvent this one solution is to set a hardware breakpoint at `start_kernel` and continue. Once you've hit the `start_kernel` set the regular breakpoints as you would do normally. E.g. ```bash > hbreak start_kernel > c ``` ### Pausing Firecracker while it's running While Firecracker is running you can pause vcpu 1 by pressing `Ctrl+C` which will stop the vcpu and allow you to set breakpoints or inspect the current location. ### Halting execution of GDB and Firecracker To end the debugging session and shut down Firecracker you can run the `exit` command in the GDB session which will terminate both. ## Known limitations - The multi-core scheduler can in some cases cause issues with GDB, this can be mitigated by setting these kernel config values: ``` CONFIG_SCHED_MC=n CONFIG_SCHED_MC_PRIO=n ``` - Currently we support a limited subset of cpu registers for get and set operations, if more are required feel free to contribute. - On ARM the guest virtual address translation will only work on guests with 4kb pages and not all physical address sizes are supported. If the current translation implementation doesn't cover a specific setup, feel free to contribute. ================================================ FILE: docs/getting-started.md ================================================ # Getting Started with Firecracker **All resources are used for demonstration purposes and are not intended for production.** ## Prerequisites You can check if your system meets the requirements by running `firecracker/tools/devtool checkenv`. An opinionated way to run Firecracker is to launch an [EC2](https://aws.amazon.com/ec2/) `c5.metal` instance with Ubuntu 24.04. Firecracker requires [the KVM Linux kernel module](https://www.linux-kvm.org/) to perform its virtualization and emulation tasks. We exclusively use `.metal` instance types, because EC2 only supports KVM on `.metal` instance types. ### Architecture & OS Firecracker supports **x86_64** and **aarch64** Linux, see [specific supported kernels](kernel-policy.md). ### KVM Firecracker requires read/write access to `/dev/kvm` exposed by the KVM module. The presence of the KVM module can be checked with: ```bash lsmod | grep kvm ``` An example output where it is enabled: ```bash kvm_intel 348160 0 kvm 970752 1 kvm_intel irqbypass 16384 1 kvm ``` Some Linux distributions use the `kvm` group to manage access to `/dev/kvm`, while others rely on access control lists. If you have the ACL package for your distro installed, you can grant Read+Write access with: ```bash sudo setfacl -m u:${USER}:rw /dev/kvm ``` If access is managed via the `kvm` group, check that the KVM group exists: ```bash getent group kvm ``` and check that `/dev/kvm` is associated with the kvm group: ```bash ls -l /dev/kvm ``` You can see if your current user is already in the kvm group by running: ```bash groups ``` Otherwise, add your current user to the group by running: ```bash [ $(stat -c "%G" /dev/kvm) = kvm ] && sudo usermod -aG kvm ${USER} \ && echo "Access granted." ``` If none of the above works, you will need to either install the file system ACL package for your distro and use the `setfacl` command as above, or run Firecracker as `root` (via `sudo`). You can check if you have access to `/dev/kvm` with: ```bash [ -r /dev/kvm ] && [ -w /dev/kvm ] && echo "OK" || echo "FAIL" ``` ## Running Firecracker In production, Firecracker is designed to be run securely inside an execution jail, set up by the [`jailer`](../src/jailer/) binary. This is how our [integration test suite](#running-the-integration-test-suite) does it. For simplicity, this guide will not use the [`jailer`](../src/jailer/). ### Getting a rootfs and Guest Kernel Image To successfully start a microVM, you will need an uncompressed Linux kernel binary, and an ext4 file system image (to use as rootfs). This guide uses the latest kernel image and Ubuntu rootfs available in our CI for the latest release. ```bash ARCH="$(uname -m)" release_url="https://github.com/firecracker-microvm/firecracker/releases" latest_version=$(basename $(curl -fsSLI -o /dev/null -w %{url_effective} ${release_url}/latest)) CI_VERSION=${latest_version%.*} latest_kernel_key=$(curl "http://spec.ccfc.min.s3.amazonaws.com/?prefix=firecracker-ci/$CI_VERSION/$ARCH/vmlinux-&list-type=2" \ | grep -oP "(?<=)(firecracker-ci/$CI_VERSION/$ARCH/vmlinux-[0-9]+\.[0-9]+\.[0-9]{1,3})(?=)" \ | sort -V | tail -1) # Download a linux kernel binary wget "https://s3.amazonaws.com/spec.ccfc.min/${latest_kernel_key}" latest_ubuntu_key=$(curl "http://spec.ccfc.min.s3.amazonaws.com/?prefix=firecracker-ci/$CI_VERSION/$ARCH/ubuntu-&list-type=2" \ | grep -oP "(?<=)(firecracker-ci/$CI_VERSION/$ARCH/ubuntu-[0-9]+\.[0-9]+\.squashfs)(?=)" \ | sort -V | tail -1) ubuntu_version=$(basename $latest_ubuntu_key .squashfs | grep -oE '[0-9]+\.[0-9]+') # Download a rootfs from Firecracker CI wget -O ubuntu-$ubuntu_version.squashfs.upstream "https://s3.amazonaws.com/spec.ccfc.min/$latest_ubuntu_key" # The rootfs in our CI doesn't contain SSH keys to connect to the VM # For the purpose of this demo, let's create one and patch it in the rootfs unsquashfs ubuntu-$ubuntu_version.squashfs.upstream ssh-keygen -f id_rsa -N "" cp -v id_rsa.pub squashfs-root/root/.ssh/authorized_keys mv -v id_rsa ./ubuntu-$ubuntu_version.id_rsa # create ext4 filesystem image sudo chown -R root:root squashfs-root truncate -s 1G ubuntu-$ubuntu_version.ext4 sudo mkfs.ext4 -d squashfs-root -F ubuntu-$ubuntu_version.ext4 # Verify everything was correctly set up and print versions echo echo "The following files were downloaded and set up:" KERNEL=$(ls vmlinux-* | tail -1) [ -f $KERNEL ] && echo "Kernel: $KERNEL" || echo "ERROR: Kernel $KERNEL does not exist" ROOTFS=$(ls *.ext4 | tail -1) e2fsck -fn $ROOTFS &>/dev/null && echo "Rootfs: $ROOTFS" || echo "ERROR: $ROOTFS is not a valid ext4 fs" KEY_NAME=$(ls *.id_rsa | tail -1) [ -f $KEY_NAME ] && echo "SSH Key: $KEY_NAME" || echo "ERROR: Key $KEY_NAME does not exist" ``` ### Getting a Firecracker Binary There are two options for getting a firecracker binary: - Downloading an official firecracker release from our [release page](https://github.com/firecracker-microvm/firecracker/releases), or - Building firecracker from source. To download the latest firecracker release, run: ```bash ARCH="$(uname -m)" release_url="https://github.com/firecracker-microvm/firecracker/releases" latest=$(basename $(curl -fsSLI -o /dev/null -w %{url_effective} ${release_url}/latest)) curl -L ${release_url}/download/${latest}/firecracker-${latest}-${ARCH}.tgz \ | tar -xz # Rename the binary to "firecracker" mv release-${latest}-$(uname -m)/firecracker-${latest}-${ARCH} firecracker ``` To instead build firecracker from source, you will need to have `docker` installed: ```bash ARCH="$(uname -m)" # Clone the firecracker repository git clone https://github.com/firecracker-microvm/firecracker firecracker_src # Start docker sudo systemctl start docker # Build firecracker # # It is possible to build for gnu, by passing the arguments '-l gnu'. # # This will produce the firecracker and jailer binaries under # `./firecracker/build/cargo_target/${toolchain}/debug`. # sudo ./firecracker_src/tools/devtool build # Rename the binary to "firecracker" sudo cp ./firecracker_src/build/cargo_target/${ARCH}-unknown-linux-musl/debug/firecracker firecracker ``` ### Starting Firecracker Running firecracker will require two terminals, the first one running the firecracker binary, and a second one for communicating with the firecracker process via HTTP requests: ```bash API_SOCKET="/tmp/firecracker.socket" # Remove API unix socket sudo rm -f $API_SOCKET # Run firecracker sudo ./firecracker --api-sock "${API_SOCKET}" --enable-pci ``` The `--enable-pci` flag instructs Firecracker to create all VirtIO devices using a PCI VirtIO transport. This flag is optional. If not passed, Firecracker will create devices using the legacy MMIO transport. We suggest that users enable the PCI transport, as it yields higher throughput and lower latency for VirtIO devices. For more information regarding guest kernel requirements for using PCI look at our [kernel policy documentation](./kernel-policy.md). In a new terminal (do not close the 1st one): ```bash TAP_DEV="tap0" TAP_IP="172.16.0.1" MASK_SHORT="/30" # Setup network interface sudo ip link del "$TAP_DEV" 2> /dev/null || true sudo ip tuntap add dev "$TAP_DEV" mode tap sudo ip addr add "${TAP_IP}${MASK_SHORT}" dev "$TAP_DEV" sudo ip link set dev "$TAP_DEV" up # Enable ip forwarding sudo sh -c "echo 1 > /proc/sys/net/ipv4/ip_forward" sudo iptables -P FORWARD ACCEPT # This tries to determine the name of the host network interface to forward # VM's outbound network traffic through. If outbound traffic doesn't work, # double check this returns the correct interface! HOST_IFACE=$(ip -j route list default |jq -r '.[0].dev') # Set up microVM internet access sudo iptables -t nat -D POSTROUTING -o "$HOST_IFACE" -j MASQUERADE || true sudo iptables -t nat -A POSTROUTING -o "$HOST_IFACE" -j MASQUERADE API_SOCKET="/tmp/firecracker.socket" LOGFILE="./firecracker.log" # Set log file sudo curl -X PUT --unix-socket "${API_SOCKET}" \ --data "{ \"log_path\": \"${LOGFILE}\", \"level\": \"Debug\", \"show_level\": true, \"show_log_origin\": true }" \ "http://localhost/logger" KERNEL="./$(ls vmlinux* | tail -1)" KERNEL_BOOT_ARGS="console=ttyS0 reboot=k panic=1" ARCH=$(uname -m) if [ ${ARCH} = "aarch64" ]; then KERNEL_BOOT_ARGS="keep_bootcon ${KERNEL_BOOT_ARGS}" fi # Set boot source sudo curl -X PUT --unix-socket "${API_SOCKET}" \ --data "{ \"kernel_image_path\": \"${KERNEL}\", \"boot_args\": \"${KERNEL_BOOT_ARGS}\" }" \ "http://localhost/boot-source" ROOTFS="./$(ls *.ext4 | tail -1)" # Set rootfs sudo curl -X PUT --unix-socket "${API_SOCKET}" \ --data "{ \"drive_id\": \"rootfs\", \"path_on_host\": \"${ROOTFS}\", \"is_root_device\": true, \"is_read_only\": false }" \ "http://localhost/drives/rootfs" # The IP address of a guest is derived from its MAC address with # `fcnet-setup.sh`, this has been pre-configured in the guest rootfs. It is # important that `TAP_IP` and `FC_MAC` match this. FC_MAC="06:00:AC:10:00:02" # Set network interface sudo curl -X PUT --unix-socket "${API_SOCKET}" \ --data "{ \"iface_id\": \"net1\", \"guest_mac\": \"$FC_MAC\", \"host_dev_name\": \"$TAP_DEV\" }" \ "http://localhost/network-interfaces/net1" # API requests are handled asynchronously, it is important the configuration is # set, before `InstanceStart`. sleep 0.015s # Start microVM sudo curl -X PUT --unix-socket "${API_SOCKET}" \ --data "{ \"action_type\": \"InstanceStart\" }" \ "http://localhost/actions" # API requests are handled asynchronously, it is important the microVM has been # started before we attempt to SSH into it. sleep 2s KEY_NAME=./$(ls *.id_rsa | tail -1) # Setup internet access in the guest ssh -i $KEY_NAME root@172.16.0.2 "ip route add default via 172.16.0.1 dev eth0" # Setup DNS resolution in the guest ssh -i $KEY_NAME root@172.16.0.2 "echo 'nameserver 8.8.8.8' > /etc/resolv.conf" # SSH into the microVM ssh -i $KEY_NAME root@172.16.0.2 # Use `root` for both the login and password. # Run `reboot` to exit. ``` Issuing a `reboot` command inside the guest will gracefully shutdown Firecracker. This is due to the fact that Firecracker doesn't implement guest power management. ### Configuring the microVM without sending API requests You can boot a guest without using the API socket by passing the parameter `--config-file` to the Firecracker process. E.g.: ```wrap sudo ./firecracker --api-sock /tmp/firecracker.socket --config-file ``` `path_to_the_configuration_file` is the path to a JSON file with the configuration for all of the microVM's resources. The JSON **must** contain the configuration for the guest kernel and rootfs, all of the other resources are optional. This configuration method will also start the microVM, as such you need to specify all desired pre-boot configurable resources in the JSON. The names of the resources can be seen in \[`firecracker.yaml`\] (../src/firecracker/swagger/firecracker.yaml) and the names of their fields are the same that are used in the API requests. An example of configuration file is provided: [`tests/framework/vm_config.json`](../tests/framework/vm_config.json). Once the guest is booted, refer [network-setup](./network-setup.md#in-the-guest) to bring up the network in the guest machine. After the microVM is started you can still use the socket to send API requests for post-boot operations. ### Building Firecracker SSH can be used to work with libraries from private git repos by passing the `--ssh-keys` flag to specify the paths to your public and private SSH keys on the host. Both are required for git authentication when fetching the repositories. ```bash tools/devtool build --ssh-keys ~/.ssh/id_rsa.pub ~/.ssh/id_rsa ``` Only a single set of credentials is supported. `devtool` cannot fetch multiple private repos which rely on different credentials. `tools/devtool build` builds in `debug` to build release binaries pass `--release` e.g. `tools/devtool build --release` Documentation on `devtool` can be seen with `tools/devtool --help`. ## Running the Integration Test Suite Integration tests can be run with `tools/devtool test`. The test suite is designed to ensure our [SLA parameters](../SPECIFICATION.md) as measured on EC2 .metal instances, as such performance tests may fail when not run on these machines. Specifically, don't be alarmed if you see `tests/integration_tests/performance/test_process_startup_time.py` failing when not run on an EC2 .metal instance. You can skip performance tests with: ```bash ./tools/devtool test -- --ignore integration_tests/performance ``` If you run the integration tests on an EC2 .metal instance, and encounter failures such as the following `FAILED integration_tests/style/test_markdown.py::test_markdown_style - requests.exceptions.ReadTimeout: HTTPConnectionPool(host='169.254.169.254', port=80): Read timed out. (read timeout=2)` try running `aws ec2 modify-instance-metadata-options --instance-id i- --http-put-response-hop-limit 2`. The integration tests framework uses IMDSv2 to determine information such as instance type. The additional hop is needed because the IMDS requests will pass through docker. ## Errors while using `curl` to access the API Points to check to confirm the API socket is running and accessible: - Check that the user running the Firecracker process and the user using `curl` have equivalent privileges. For example, if you run Firecracker with **sudo** that you run `curl` with **sudo** as well. - [SELinux](https://man7.org/linux/man-pages/man8/selinux.8.html) can regulate access to sockets on RHEL based distributions. How user's permissions are configured is environmentally specific, but for the purposes of troubleshooting you can check if it is enabled in `/etc/selinux/config`. - With the Firecracker process running using `--api-sock /tmp/firecracker.socket`, confirm that the socket is open: - `ss -a | grep '/tmp/firecracker.socket'` - If you have socat available, try `socat - UNIX-CONNECT:/tmp/firecracker.socket` This will throw an explicit error if the socket is inaccessible, or it will pause and wait for input to continue. ================================================ FILE: docs/hugepages.md ================================================ # Backing Guest Memory by Huge Pages Firecracker supports backing the guest memory of a VM by 2MB hugetlbfs pages. This can be enabled by setting the `huge_pages` field of `PUT` or `PATCH` requests to the `/machine-config` endpoint to `2M`. Backing guest memory by huge pages can bring performance improvements for specific workloads, due to less TLB contention and less overhead during virtual->physical address resolution. It can also help reduce the number of KVM_EXITS required to rebuild extended page tables post snapshot restore, as well as improve boot times (by up to 50% as measured by Firecracker's [boot time performance tests](../tests/integration_tests/performance/test_boottime.py)) Using hugetlbfs requires the host running Firecracker to have a pre-allocated pool of 2M pages. Should this pool be too small, Firecracker may behave erratically or receive the `SIGBUS` signal. This is because Firecracker uses the `MAP_NORESERVE` flag when mapping guest memory. This flag means the kernel will not try to reserve sufficient hugetlbfs pages at the time of the `mmap` call, trying to claim them from the pool on-demand. For details on how to manage this pool, please refer to the [Linux Documentation][hugetlbfs_docs]. ## Huge Pages and Snapshotting Restoring a Firecracker snapshot of a microVM backed by huge pages will also use huge pages to back the restored guest. There is no option to flip between regular, 4K, pages and huge pages at restore time. Furthermore, snapshots of microVMs backed with huge pages can only be restored via UFFD. When restoring snapshots via UFFD, Firecracker will send the configured page size (in KiB) for each memory region as part of the initial handshake, as described in our documentation on [UFFD-assisted snapshot-restore](snapshotting/handling-page-faults-on-snapshot-resume.md). ## Known Limitations Enabling dirty page tracking for hugepage memory negates the performance benefits of using huge pages. This is because KVM will unconditionally establish guest page tables at 4K granularity if dirty page tracking is enabled, even if the host uses huge mappings. The traditional balloon device reports free pages at 4k granularity, this means the device is unable to reclaim the hugepage backing of the guest and drop RSS. However, the balloon can still be inflated and used to restrict memory usage in the guest. ## FAQ ### Why does Firecracker not offer a transparent huge pages (THP) setting? Firecracker's guest memory can be memfd based. Linux (as of 6.1) does not offer a way to dynamically enable THP for such memory regions. Additionally, UFFD does not integrate with THP (no transparent huge pages will be allocated during userfaulting). Please refer to the [Linux Documentation][thp_docs] for more information. [hugetlbfs_docs]: https://docs.kernel.org/admin-guide/mm/hugetlbpage.html [thp_docs]: https://www.kernel.org/doc/html/next/admin-guide/mm/transhuge.html#hugepages-in-tmpfs-shmem ================================================ FILE: docs/images/vsock-connections.drawio ================================================ zLzX0rNImy14NX24J3AScJh47/3JBE54jwBx9ZN8VdXd/+7eM7MPJmK+Cr2SUpAkj1nPWplJ/RvODpe4pnOtT0XZ/xuGFNe/4dy/YRiKvCn49rT8/mmhiL9aqrUp/m77jwa3uct/Dvy79dsU5fYvB+7T1O/N/K+N+TSOZb7/S1u6rtP5r4d9pv5frzqnVflfGtw87f9ra9gUe/1XK/VC/qNdKpuq/ufKKPL3L0P6z8F/N2x1Wkznf2rC+X/D2XWa9r8+DRdb9o/1/rHLX+cJ/4tf/31gaznu/29O0E+H3fTP3elf+vwfDvV/2i/1f/zdy5H2379v+O/B7r9/LFCOBXgMCb/lfbptTf5vOFPvQw8bUPjx0/Q9O/XT+udw/EPlZf4csu3r1JX/6ZeMehHQbjjzX0f+9zDK4l+c8fd9iOU0lPv6gwec/+GCfzxQ/yfr/9O2ln26N8e/ujD9OxKqf+/u369gTQ0cCYb8E7b4+/9A38h//Ps7iP+OYYJ4/WuP2/Rd8/LvTv6zB/6nfnEC+b/vaE/Xqtz/S0fww3+ywn80/XHw/4azsf9nZ6/TdyzK4m/XnnWzl+6c5s+vJ8zwf/X8/9KPR7nu5fV/68l/fqX+1SSvf0z0nzz9Rv8bV/+7r/87t/6L4f53rYT//9FK2L9Y6f3fGOmfIP0XI1H/XxmJ+G+M9O7hZZmiOf7FWO/l+yAc85nG/X9sf/AdwAOw13z9Mc4/v8NP1fMufstt/6czOLY//f3103/xAzTf/q/G/lfQGaex/J8Q6u+mtG+q8UE06JQStjOPMxoI++DvH4amKJ7L/Lfe/Q//I3/f2d+FC3v98/3vQf43KP2/7X2a+lc0wv8lFjCU/i/BQPx3CfP/VSi8/p/zBZa++fnYDH+qLfPnHWzzXwX7sWH6z5dPcz2G/Xd/aGlW9ta0NXszPX7Jpn2fhv/GYfs0/9Mz/FakewoD7a+vmDCP1b9hbBMwpnMiqlhNAP4zXL/m/Qp+8mz4R5ZZEMN3rkIczYcfKsD3vB04xGj+ioKs8yAbYV8cNIIso8CuF1rhurGy7IbtVd9rKuZtNzVbTQ3YFf2t4an5rqMrih0VzY8Anosn61YJclyJ7HOqMdAejHWB/EjQicKqReZqDDdN/crBw+7X60qgs5niJdGK/NrujfbG40sTmM5yfn+qp++hGjwAnusrrN0paODLXy42dZvHZVes8T7mbzYRQoDnyEjS8BB4mGjrcb2DgfGAYs5ZniWJNZMDzgIeAx5fK2rw408C4TEm5wagxUBXwlsr2yPLU8aL6vXp5+1OfLKdGXZqwADgBvrJb2ww6nBA7eGlERioi8lCnTFcvp5SYlX0q4v23mcXNAFizGEgAxHg7rcEb2AERpOeA5LWBT1EoFoqNu4Uwylyn80c3pCY1UwEob6BC68Dz5XSeucwJgOWPsMr+tX8ToGy2J3A4AKjLFvL3MoRqa/eIEhbqYyT1QD7G+C1aCQ4ZcIv2tufe3CyDQ2H2EzluV53G0u6gr6TQ59uKRZ04FU+YCOgVe4pyaBEWdhbXQE7qm6em9TsRB5Lk03bkdP1SnBjcrVOq3geLCc3Aem0BVuioCWhXQRxIn3AfJ0MQ5b1i+wabT5nWz0D/dYFCujOFlEWPQYeiCoZJfOJktJhKj8hqutenJZ/3Z+tImnhaUNFV8oJe/oZFVMBt9JQQO4lNofSyCmcHdjMnzszEYWwJ8BkNi9gNQs02meGCq9UsCCkz7Bo0rEpD2OjRyNOhe5cm53bAeab7e62GGLmCsdfsut2fm0rm+qPJKPPPw04J3PKJ8Aq3S5gx3an0hPg7FBJbuYIEWFSOjPJIeAzHAuO0ziBaYMfACU4QNkpz9V5NRcLJuEGoZY9l9dVklEzb5P07/I9bKYSc85kcmAd9gjBihkmd1jt18b4cT4orOv3nUrtOuqk0TdlcC05Ln0ZbU0vKn4mgwa4rQKDqcLr2OX92lwa7ODKdsCngku5PaRHCESUmHK4thvOO4y6E8KxIHBPJmjLOLFp6mwUKiZ2KKLG4QEsD/WMXZVEkG7mGr16fNIh9VvUSLssntXpnjiI3n6yaIYj+qVYH0gI9BviTC2n4rc5c5u5IR4wiGkNi/JKv2dEm7aZDEK1Kqznh+ekWNcYw0M0hO1O3Y5hSFbK8Hj/UHMnW8gP6saM/niXvnwQ/q4HCW5fe3DA8Zdn+LL7v3qJFesG98aendIGX+VWQ7szWKJ/V+vj56EG6vIhR/XBrBfdBCHs24WvBTYQIV9Ls1UES2JM3YnkRPeMeTgnsAbkE+Zpkp/kAIaZtX2Rmi9ub3Xjq2+/0cd0asxx+huf/J+8GXtOPpzu6RY71Z9w3skg3Xfunk4DUaN42azBsuD6jlOrErp9LUEqjBisSozVKQyMianBRQfEJ5dJFMzT9GQxG3YHJj2VxkqZ/mBVClNAaKavOwAFlRw7xiBUgD/x0wwyyDwdcDgDQkoVJvDXOUdqxvKTd5yeOVb04y/hx1adQicJ21XKZhWYb3NK1ZxN3Li/8RlN9murORN/rctBt0NAxI75sXjZbU4clu3Zz8/AE0Q2/rKiJAdESr6Hi14KQ/4k23OF1i6FUpydu+wb9J1Cn3OHerIoYbnGMAsMGBLoZJle7WZqbNF6Rkug+YOXrP2/fhW9Xp1/+g+/aFhtA2DzB2G9ZmlD7n3E3BsPBfSvChBFwVstxvdXR9gLep0Jnj/D6lnHyjuy++4NgeZh02/JhE2CqA4MnYhRo62ub+xARGf6xyJmtYJWoblrjffbZk8lZnebzaSse7LMgjDgAc5TPwB6xPMfWyG9BHRINTBCZdM6BZgNa9BcWTEQ4SnM8y2pOcJLvXOg1UEtQ3rPpycCGWIBWGCri4pAbIqFjTVBZHObOyCwzGwMJhNsVpEosSTXuE/dP/mownNH/qS3q/Mh7sOSQlfGi+sVaYNABa+JWjbYmIyNqwGpmTsqfdi4TajMncgsVkWpfSB4ue9GuJgXid8LLFiSXNL5FMKuvFMjQFsF8N4ZC5aT4NR52CyfDodhtWdToy1AZBPQBB3ptEZ/r2GGXmKDgKk+HzOptLge56H5tTpXSadnM31gfj8AdofJBZu5nwWFUJtDnsxgoi1ng6NQd4wuL1lD80ZRZNdGuhz25iJtXJDJBwASDqA4DQJ8qgg4b1QEQQW8xS2xRrVNeIcusiEkOEFrf5IXqDwjgjFUGwVw6dE/l4/oOCKG8rAWBhUTMyyrX1ATCA6ATkdYNrDGaqzf4Y9jXjwrJ1R8KDKZ4vkQdVOrp9eXZU1RKPsArbhYUowzZzTwqQO//dnAZPzOJ4HyWOVONXsKhdHvGC29z+LcbBKX38+15gF5qM99ZfRGJviJivLCiayhmXikGe7IVsyutzhCWrBcXMzxPgFYqRgMlVSJCkfuPWRxe8JXTkIciU4MPNZTxuI+hHGnCEgcmXAxPInyO9X3X67zEyui1vcO5YoT/XoV/1CqA51s2VL/0DCm1lNwPRCIJyIP+VUEa92AjoUO6x/F6uurzCp/vcjrZ+fKJ8l4O4JXi1dOeMvyoAfKtzsf0F3p+fND2U9qv23RwFN5Z/hKjSYvJcXCPmyTtPuntn5LtuQ/G9egyqQJdo6bdicpWgAW8Dp1gtOBIytkE4HlFaYNktxB4NbK72Q/yGSj1OfzaxI8iUw7CkQ46ksgunAyLhcwChxFH/Ess3FfGcKtkHyKEAzgrgpGIopfpIoSx4UDhI5jf+22NiXH9qbXOp9js3AJjj/aAuudOFH4D5tSXPCtUgrNeI5BGaZtkjLlZ0yhFEJuXePirjd3CbbdYvnXFZWK9evotDaYfQIjd9iDPUwvsT7QLi7OYqvOP6Ygpxs3II+9XmCkuGlkAmJ9Y03MovKb3116kd1ylU3Gg2z2+/W+C9qfvMe05B1oX/3iX+HrxzSL8pQTpqo2V1vikH83E2BbcsOVhU3dXN0zBEigdh1tgxwQqV7OfRAYi6lEE13G8hkq8ohX0pQNSKroOxkdn90teuLDlpoKwu8TVlsxPOutnVQ6r86+zIpfbsBZCsK2T9hg1fIpldE8stKDXz95S6a0ssiUb+qAUsXEZTl20mAqlNUfGvnIBetr251lIKZiuVntL1zZKMLuCIDAd/NFaPlTifEhFmPGlFdWa+6OA0bKi+52NMiGi1rb7e3HeJPcmnD4YKDeVt0/9PDuJB4i8+ON4dt6t612kTWAtbhSLb3dvrdKZjA6ILOcrPfdMMZ7+s5aKlfYKXY1rmfkJgyKoyoftRBNqX1RfnKrWouBg3kBgRI25SwPEQWW3CWL1Bi+z7UVRIZaA2f6O/x74rW6P1pqUPPypOLbGKObpj2yaiv+hCqVeV6ct7DOk6MiFcuzvvXqGyACy5UhlDQ5E7OMEur9AT5JMUx+YSStqdT32hl2JkkixrnrKBq1B2uYIH6zel+eBAgZWnj6DCin9+kjuoXjRw5R/iOr1w3MmZInlmZwAxxrT4MielvN2uvHiW+4hEtVbciqFIJzIS1IIRfSnLfo62v+03UDL6Q/pC8epOhs8alNJPqDmxlY0ixgZ7s0mN+pj6zmVfg9QDYntIhmc7cvQRKFKvjckoXElNXeExFS1RNooxGwJ0dZNotwLhPxwy3PJ2SiIjVB8kWlF68dr+Y6bHVLblluKO/oau47e8LRbA9H0pODOfyHZgpgDr+ZZUIP3KsPqKNEppNJuYInNsbtezF90mxvLVrHdY7B59x2kQEx46c+Sj5ILRpkKQar0gzZF1NNFKrd0Tl8iHlhlXNbuocSZAACN2PWV2dG0UNu2ZNMhxlwgWszYU191GCNuV9xmpXrwJopJkyUo5VpMO/vO6z2c9WX1rBh2ukqkRLlBh6MuatIyVeKOUF4wpL9AkPNWill54azfso+xXxZLmQjICV+wXLzNzQ3P1aE6zQUT2Ww39oOLahine6p69UD5Qytrd92m8/fG3emTFBGJ8PHlbPM5pz1p+p05GE/udjTsGg0eE9g5IpiE3fZxBOTYkAtvX/zOWhyAUUIRuBGKJIU3tgxr8RsopLLoAQaIelHvi+CaD9lR6Rtfo6Kju1f4bZce2/WDqxixaGFkiorARYnVPbxiYkBvEovNxOjmelHfXGynWfnxI8Ng5bG3cuv++3LRODn8gXfQ8vUb8tVcnGg/Rcnm/eGjoe3dFrZPZmEaTdxCUIQ/56KK4nep95nxvbedUJwMcBaxzcbB5sS9n1UpA5Mtf/YaXdKdRTomikurFETelEKdDxNDtOGDzwRw8ek/EiMm3LtBJZMK/drjn+VSWlnYIlkPE7lCuDJWFpA+h6CKq/1+PvadK+KBuXUolqZLnykGAsqBZULbpdVwXXEs66yBXoykzQxns93uqbUwDatA0GnVLo8rIg/TMb9aAtRYCSMYEow/d11rUnI+pc7wfl4cAUm+urMjLI/60MjeAP1By7HVf6qY3bw05REPLR3X9lTfn6gC1Sj/PQ1V+mJ2OkUs7hHxrmubz2FHhVhclYCJVHA6tpnnkjdhYHQTU0uFPsSKuLSz8KzCs79xT9lOJ95kpS/VklE8E97mlJzUq8EVuuQa9hexK7sydDFvo7yAFmVnxaJ0FfKlWJB1bNlVM4eEWHqDweSvxGweR3Wumwl03GcfoTSgmfL75IaNgNd1EsMD+qt2jU3XGQV1f0j5saD9L1n+km8/EMzrjKiwjlI4S+y71MMyt+vo3nbgfSTE/crjufINTtGxkPdGH+oWfCRtlPJjLb+meeyR+4IhJ3/kOftTtz7U6GjcWjB94Ng9DTVo7ws6wMO58Cbk8E4ZO56fBlZZL0WXdgb2vqCv36/1WJ6ob5Z4gtDpA9yWS9iypgs/vKTqKidPFh8L4vXlKwcVf0VUC8hAN57ebHSfhLuHTbilkCmvT5EkfmMpx3hn4/adnRjO3f8bhI4fBujc0kv3SzS2LJ1oiWrJUJP93l5jWl3ZJFi2FIzMUmudiKg5C3gszcjfuz5CZ6zHs/CUlpu+XqwN0b1qrfqmmd4UlI5iVCWHa9XDKQqSTqq+WuUUdBzSmj7MgrHuVZUTz+1v9myb11vyTeVHvpRVo8H+pWlwSayyPQ2vWqhxplUua1wpJS78B/3hZgMR/GL+Y0wRUbYZ7Sz4Ig9lxE+GqAmWIaldbMUoBM8pDFP/TluP+kHqElimz5/Pe3x4lUj9+l62Ju32W1x6dsbu12fJIRNwM8DiXwJUCW5D0gwzCv5rJuMORvCff1HYg/cmwpo6bhgjUoWzVV2+9ZJhz0ZWegRm5qrxkaXI3p/h3T1ZqjRO8aY2ttTmFmJIAWW3dze0YiwuIt472OV2ZZmO/B00KitApIp3x3xmTdVfjtPpj3d1oLt3jfVEExCMSodLXXGFHmHE0Yo2DzyGZx3rs0nKswAjrsmm4pPrV/pQQn17kuIFwTBfoESqClMIqMsz3myXV/YuMenyQ0Q4saSNK06eZPND0W/0ku0uS/x5mCu81owPeNlDC522wF7csA4FiOwbGN5qgaIgPUEOOI19fm2ht6QboU1NuFX0X6iCm9dHV3/BHIVhQ8762xGf0tf9XSm4W28n1mQngLqDrTxnfwqzmTIXiLuvKeDW8bCQZCsrLJ2LsGtQ2bHY5xumS2veDEigPr8Qb6vLXbFJC/3GWYf+1ElUE3wQqRdVBwkdW8WVyiaEZT5ODmnwOoZAs153gm9hsJmeidlSUfxHX2tKf3wdViv0V6Ec3td2qMHLPLlaa8sM16RlqXzgpF5K/+YW9vW5E1RCJ68X+URTb+K75wV43/pUVRZ9XnARiM8Vz0Cr2FLksxy43QxPiOeeoAUXMWmKUIEb+/iS+YBY2XkBUhiuYHsBH9fcMuJIs3X0R8xJp/xLU/UL1Ksvgl3T/ev7M4NE1+LAiVwzjPI6yvS6TWHEY3h5f35PBoLeQWaUr4NW9Pexe/6dBH7HhN4Kz8KHViMwEKcyzNDQDecvYV8H3LK+V2S+npG0/YsETkR9/qota1evlr77Ok3yHuOuEwbl897eltHC3P1aH34Z/4tgfNWk492JK32eqiO90xehuXPydXoR4QNmfvsFK3wxQafnno050VZI8X/TE6OAq5wyUmbjpKhVzVClypPitvHqt7QgV/eK9ht+1pWJjX68DgfDQm7pwo6k790O9VOUcJLgtcqb/fL6MuQFp5VVMEuPvrPK4yvf0f7+7V8YRs1nbdT7Ab8+A3TYvilxWKmf+Z7CCyRNrKjJC7XLjyozZPYMELoMSe8s6DBwOubnNRPOKmzf61zYshxzpATtUXP3fqawII8aPg0cMXnFUZWhZTPhdb7Ae6MrP9avSDED+4g67NUQ6b3ddAaTePP0kcTsnfpu1AzN13YUDuTrlXo6/TXfeg00wo5j3GLazqxm3Ydo/0wQolrJosygOkRZhguxoUIH0OCIArbgpUXFRUfvGe9OdD339ra+huXOP0hp+s7T8+5/rbf4QWM857qrtcNornGaxs43eRjT71jpQo9+ZvAMOsnxlcZx1adUBPkKshT+/qN37xeDMnxPw/m3/R3euHDK14lxHt8fB9o92c27TM+X7ug/eo5M7AMcdc3EH8Oveo/3QA5ajwSa/UVqGHZSJjsNCXE1LUlcRutTZ6/gqtsJgdOHdNS2lFUX/t91W8/h714fiOChTkhnpnaLE53exdye4XYvLkOwa/hR0GJCFbOGzY/mAvJQW7laC3vsKiHDb4+R/hhovtD09TcU49Q2Jb7eWtx5AbNxH7CEvRcmxJoPJxfTrzpIrYX4PmJ6HwTuVsd1f/4aLNMkSCE3DTMtsAErfXOwgzh7kOE0qIMXp/Gs5cWI/z4Z/IVLLYQCj5Z4asjbm7Lby+/bML7sK63mFCKK3N9iaVDPxqCe8UzF10PokW7RoT6QW/zng32MmUN8SM9wNwjC6RujxGjzvEZa7Fin2pE0cCTDQyqPf3GG94jlPC/GqAnKm+9zOmniNCdZpo8WAX5sDnNzH95/emx4BVOq8U+xQgS7k/28YpyenH2qTPgSb9g7E5my4wDy7jyT/T/SfhM4LLXEgJhkhF+gUrxco7hu8nPqh+OnUqqzaWksDu07Omjnwafh+7+HjXFSLneaBMyFa8gNBv7ftYB8VyzEi7rqU/rEG8RlwQfaL9IDX9DRByPh77He6L0P5HmswTO3N6HxqSErlu9hCW1OZ4J/JQuv3gmazfineEk75DbHcqnFVRbWewVqemK/UbZjrnN6/NoGeTbExRFy+Rq7xFyVHkavfZQnSEbWY2lVCLttsU9Yl+cQZ3pIASM94dFNkRt1o1bAtSO4qr0HIp3yOiip6GR+O9UfEPyO4XqhVmcneD6K9VWh0ag5YcREigVyWtcrm3JoHDEb37+tEyprvA56yVyAI+tf6RCBYn6+dieXirs5bry7XN4Y0lyV7zUw0OaSQ2HdO8fAu9LnMSNP85+EF17QTMItROQ4bdqfdEI1y6njSoFFH5bpveYO5ts9jXXYd11oVxl/LD+ZMId5Og+IJUHqKB9zB/fGRqDkcU0718od8aoik+w99TB7fNzVQZdrkCdTYGzFcP4GMbhqedwEY61Cn82Fgmj38K7hQL6L45o0B/s1wYL9CajTJ+O8jDvYFbujOzaUB0Cs4/flauf1gA/gk5ASGAqwYVgB9RNFHPYYdzKKO/3Yv7WP5zz4nSbD9+/OIVyf3xm1Bj2DZKTzW943zhepGhUHROb+UHaL9kg1oGgLNx4era+RezWDvnOoDa8K2Vn1JenIJFfDk5SXbCG0COxNJ743IZot9d9NMV8G3ZYepSD2l0V/wqiyuNGYzxVwF7Z930hwPooBiu/QjGeKyYFbxCwVSQ3+xT+2BnoP06jKmz/WCnj8MzpnWo2UB/RExGLEBEzhZBQ40z9lmzGUAV/Lo+H5JNWEnHxa2JCG1Y80rBFJFe5Pl9HPhbiK4jQyHdihKI7KWaScK55HttjcEytmphm5CtDpBBg6DUVtJH6enabMDcW3EQVUk4q+yyIpRVSx8kyODXxGrO7OVldg7lbZHjoM430LHyvp5AEOpKAa83we/qcXAeCnSMNLcV7Hx2GWC4hAlWGLvSi1nvUMbACPlbYCVLzSdskY9J3dtj0H7yobq1u7B6er1YbJ2cJ1x/lTr/GgPUkYmbXv4KmhTlp3aZVhucmu+xv5LXZDUT/DZmFnfYkweggXITYgaqsxt7xx/4i/tLxrZfhBdQrYGVemJiXINOnzhKEXbxMvI8Z9lmP8E+EPJJbK9xRH3DS2grUITdmF9Wr4dUa6N8n0M/XQzT/IN8zaVv0hW9YH7RxtQonb9aorsnU7fAcEvQrisyzWPH7pFY2SSndfOrR7lHnnczknB+CJBsc/mNt0ZRwXMqPSkiN+djV7SQWfVwTaYwIMy+JZLiUvKeEkzvqOECZD4ABh1rTXHiLtb+J2I+MMbi/z+oliYV3C2iKgRGiKVn9LTqZa2ORxH5MOw/TW9FRgtc5kZR0qxTLjBE58SGOEPWd9sN5gOtj/OREp6nI2S5XBWI483aoyeGOU8rlVA9cFtp7CufrB5l9uD7z+ts8EXsZjmPxnXkOg6OumG4ddoyt6A6Tle+jAK30MUdaPwg+rwaIcfw3NnZTCIy9I0Elo/lDmjbVfILSAC72UQ1WnR1ZvVxF/72mE+o3v6Fs0q37QUjr+ekw/ysmT86u5uPzIqQtfojwUFkUk4OR3qNh0pR5sJUvGy60fvK/qTzVswvxjMPS28zoqNJjStz367pxe9u7XBACTQoVRsNQItSW/AOpL1kplZWsuklvwmnQjm+PAC2cdkCsAwnaWtPoLZwYfbG2VKrJcgrANBxXTZ5htfk7Yry2YkeYO2JKF7fwZ+bzboYhiZHM+N7MGYtMo4JrTvQRqerruyWBSQbnBIYYotbHluJLy/bx9K3yV4JgqJxUDX4i9EVsXwNxClPty8zc+6SyfiNPyHzEEZSyZtFPMjPPag0KYIUGqUJLijcCoSNv5Lb5W3bfGvX9tdtryN8mza3O7xtfzWZ73jZuZSoHLMjmem+ReUK/9Qx1yRMT/PukxnRY0+ZUZomk0O29jDSbg+a94oYzhCnz1P9UEcGACriytbeRF7DkCOx9bIddwRroN5JeMuknHhCoQT48XTwBULLD8pQmcQ9277ira8Hm/o4/z0oe/uwFQW0X9N+6q9mJ9T7eZ0ufCOhq0xh34ehBTHphBYUwaDNf+4ZvbCbY8L4+1bfvJtXxtH34LvN4cCdljnlApCJNEFISzRtuTs4dR30kG1DTK8mM7ir0QC9PIFNI7fVMZZjm97oqJdNeCWe9yM00uDSyXodG255NHT27bBDifWkP3FcOURetLh0NoQZ+X0p1ZS5V/DLWS8Vv+rO0fCWPr/malocRTHtb/rT7j6iIoBYSyZXT6NtloG1jUiAup4goj39Yh0j6JKBkzDz43RvPei0rkTcf0s3pVmzC0bzKqaCPSbKm3V28X0g2U/hI1A609kvQmFTbRGx8I5z2+ljFqHgO90zOGp3rzAnqjJbVQX3JGHZev82Kv6eUmDuq14sAx9dXyOHzIvXbMV5PqXWdrUokYS/UOaq/IhlB+xivhVmOwN8Ggo2MZlXeu5nvV5jXTpY3G4GP8W7775ZEuvtupT9aSoriMRJDgYWEFXOSDGe87vth7jAmudaL7jQ2R+SdZHMV+4nH+RxKV5Ro7EgP72uo2CgIoLrnQrrBKCw7N57qJk3sDwVhhwHFk+7CtNfQpBIVi2q7UMVJAO6ZN3JQA2FA08u0iZG4nhs4F+M2wa4FQ0+GYBBU0KiBWCWR/hLF7N3d6juGwqMd4t8QbiLTqdEcOyn2MBoMChql/wYvLLrKfTjYLe4kQ0uuQ7CbI/F20GDZ7fMwKcj+8zsPr4hK4JXfEIH4IeZ8rCg1llPvrM5ohnM/5XAkmZb0kzZ8Mdz09HXiDbZRqZol7IRh7Rapg8CXpCLfgx4L6XO0P8YzBf3QwEN+QUoGKPTCBU9MrFJNsyXpf878It1P9BNrvRze1q4jd1Ksv3p8s2L7pDhbU/ivoCfuWdmx2kLDltWFwJ46X/Iv1fEXg95Xa0selQCVmszU2FUMOTk+YbqcT6W8VogSTfAbufUjYd/hMZGiFwMdL8vQEm/9p5bPpkPn1Fgb2yLySbOUKp7NEwhaCYtV7Q+jA8Yz+dCnR3BY6LfoL+M3tOI7efDoSzEsS0OqhCb9YfKZt9m2CiP8206qdTt+6fxRKuJx42lxrMQBZXKaTZe6jDP9x2u3/4h0zdCxIisO51kJwgww1HXQVH4cLFgldFx3Bc+UZLkGy/t5CkQITo1OxfJzvTNTJsknAxeRdAmgFRSPyOsMTgS4S+jCs7dJHVr5ZyYrsf24Rfy6aBSA8xDi5fWeZQKEOwdhUInpiCJ9xSAB1xY6ZB7UtflJJKU3WdjSOYJkTvYDW7N7qUDYeral05wxlxwUDztA0GFe6tZCiMycESQMBKigotfw4dxam4aTBMdWI3hiNjWsB3ZSP5ytD1FOJC6vjQsX3SnJcwXBzwJPU5vwmbF7IQo9iSdmPq754VlUx++RBOEztWG/mdc4FXlbexbTf9OED/YvY1Q3a63xu1a2Q2GV2UyGI7u11mOSYA1HyOjT7ZkJzxGQQZbAA2dd39TtoMgZAqN4R9OyoHbg8sJPXTrJozUC1oV1FhXR994RfYDdiI0BO7Jo/87j5PkVm5o3moxGV3aJCDX3NrN91manxRKaRt0/vZ1I9i6zNzM07soD/H7mrL5hZgwbzITyzSsqANk7zV7jZ9WKe+N2jVXWFRENg7hl+pUYtTPECNtG/RQPVv1BcZuzGhxaSIWI+rH9P/ZOgifGn8lSdIywinX3D2q5d1aMW5LKoV+ZyUsaIfDqPvknLBdjJjV20tOvT2lnM06TN3wbBPJOzreLJTZG8R1Pw2o2orQNFrQjrAo11HPK6xvSwUfbmmdzC4aIc+skPGgJDhxafnk8MjDpWmvbsfyZ/ZWQWfOsqgB8lybJg91/8iKQICh5tqkC4UTuYfyIvXj4ZrpCPvST7LICGO5pWjFeSre/M7rt7KECrK6HCBkUOoLIjQ22FXMayQbHfWtaIknwpbkx5BhL1C/7C42ip/5szE/yskSIaPRjWRZJK9i3YFLl0owcyzwdMu6OXZ+EjHcvZYM/w7MHMbwToOwkb6ll2Ktj/teWAgoAFgDXd+AtQEVwT8J7H6CKSoQ/vTKXhpB20s5Gkd1sTLuSXfUI+7JbqxVREn0deV6/sJ5Oj6iHEvmOdtoLgQpJfNphYuxdnfl7vaWhQhXnW4BgmRIqFYa93/ahs9XsF12DT/+98OEe1+gVwHp4RTRHyLfW0xqyuzLzvmaSNM8MiX9rcjCYwc906oG6N8gWNLOxNiR+Zhv5vBSdIXkyLnvxzCyBlVWyeXoKM3J6sMTxCWQYsqvpt4yjWRarE7NMdCi8nPHynNt8zPVt1d49BAO869JQHhT9YNizcQlmCMOfLF6PCS/qz1SpXK+fyN2VVU+OqXo7qBeoWKfqzw6+NKG8KRKr10N7hE9twfilxdhI6EmO8DyNGQMs94dr9VBpYXWHNCg6XUZYn1VexrxHCfvQgeMvynq1mWc1OyA6VZsSesAI2peGFq+rIF70Z5XgUQUrmqCCa+yTODi7W9GRFIGn9AxJmQeO11HkR29QXWc3ta/DlEqZyUg+p7oHLNFGvwrjICFbnFmRDOXtq1vUHsn6jcstHelnIZI6ZToDvpcdga7iDmWzyAeUDOTSQCfJYknQDqqxKo3FXzu2ZUgv0CKrLWhh/ivrcY4g/YUerGQIXWH25G2PRwm5JqNKZMoxJanVUx+l7vO7i+uqV7pZf0+qYPu/R9l4NiaFzzQamPsvDOhsuL0POiTIO0fkwl/1Mz2TBXCZW3wQBiGVZ7726qeLMzVt/u2Z71nXTji8kcsz0E5k+bNGN3Tv7GHRwUxztxMgOKp0DXnQVue5XcXYIqLHjIp6rqo/biTFPBeHtFjstBhKSOxlTmzNEiip6lxDFSw3YVIoQkL6iVWMhuCBoTap9U37JV3EGOppwFSF5OCZuodHnOadSgPDfvldWj6LKkMt1DazdRDWwVzpE2fIw/nMVXpmdgE+gzov+LMtwj3dM5g4Pz9oNJmMkNSJLztAvoT9yKCx7Pc5MeKUxtm1V2z8zipCfyZEwWvqIPHp9KPscv8txA303vXqhlOMwQA0IOiMB/RzLCbkiZ09Qx7xc4t2iblqPQ9HV//I7NFf9oLUgzDM0icsG2GXzwNFOAi3HeTnjxLBL4x7gDwjpkvxA1fTFlfRkHH1zFVgeXs9f69ZMKacTbmB0YCmg0zHk+DwomSXYQcnYRpluAc2MM+uxN7WHcHgvd9s2KIxk7wzKrFQv5kC1tITPtxrpWIgR1iY7fvz3SrsciqamE0MAI9QPfkk5fhxLW1n5RJqxXGHZZuRb1s+xUfsgg3afK9MbNJA/hsPMkeZYkMgbHqykQCvkFACwTX1u4hIVIDmNO+FEZj1u2MSswZxWpRJKCrV+DLY3qUTcob8sznjZ68QcpPADnJAEhiPdYiyaFMUtxXzbHeRT0ZZmpPb7JE+5JjZF+MjHpwv7jHCZJrzpstVGYaMcuncUMCAYNU1kqe4++JZJA+Wfgm2522aG5IUiHM9OxdNKvgFqcOCn4zuHdvGnpgOQrRlZ6K6mRhgYITEgQ3hDWsnRwC6Mk/tj9PKE+h+8Tw3AZ61lifWRub+oW0UeYdxnlr65EFlJMigjK3FGcsZ4pP00QlTagTbfR4iEkhdmhToimQu+yntpz8A8sORI2BzRul1SNeLam8s9LJiNmdhETrh25dpALsBiX/yPYsoUJ5KfPPCLlRPp/PJQWpvKZloJe2SzUwDla+81H/Na7Q9VWEeEPkTMozbfFU6C8JUqHhkdR6OKzjXN1jRYzmSwwnEYcaxJzrRlIFc8ZXQxvTjGuscfnY5SAfzATNQKRgTor+EHfhUn0quFurwminzdfpIu2qjXl+DP7Wfre+rWmGS2BqZjx4S1sC84vOkQjygjeoz9bhXi5/6dYFFfFYH5x/u5p0V73mFnojePrWZFe9WYXSKD0p6e3YZzcptg7Om34mWqsCsPGCfgHL4jaE4Xt295zkOP/S52ux+6GogeCT5z5NfTJVMrfe7F2G68dbS/IV5dkXjrKnYEWtvN/iZf11/9odnzxaTJeuwspkVYepeS9jeSczNXtNwV+bLQDeVYIG69HIxcM+iOSF9gy2bBTKVAOMVBfr2IdGmGcvGVuSOb3BzmrUyqBGiemVvtLTthlt3i6Y01az49VvNy4x4y1WCYzjn6WBf1wqHAjvz3720O6BjM4AzxazpIRCBqoPP1Kg9GXuVNqRzg6GZXlS6uUmTHr4g7i2GvbFainNhQWvNyaZd+/KRNjHQOweDkVW3JZmOxu46qDCQQWFaSamhMN2pxRWNceTzAE/FdF76PNH1qYBrJrdwMpBy7i9sdTYaHDYxuQmd/W5SaftYz+J6f12T2NUvZ0aTxe25xEW8gEdbvnXBCbC2rY+D/4jv9pXIn70jv+WFVY2wAJjDp+riycn5LUvWQNsLwL6UiQkZi2E63mPXiWwl801kE2EZl86FO6qIWAc0V2Z4rTiNg0kjrSBnZ3aCZxGRDENaDPfZdiFi4RH3ug4f3Ewrh8+8OyKIS5ZfySa/TzZ8rflbGAVgPBPDzw7xHwdBB8gCthNAqIeHMzgyhyK2FNa/+Mo6XL5Gk67TSjH7HVVRpjlPmimczvmM5bO6FcqtAm2COwhliebXSib9Y++bXHtN/dmaF6s7nHlWHZRvqwHRVqLYsh/C4k3OyUgxa1yZO5/fTdJ6SgvVt6Fhf+5N2AqNJYwUWygyKyCH+7NO/axhrZr0U5nXgCCCOqDNx9CMCl4/uBA8Jc1KmT4JqzfTvAG2ez/IH2TWuSnbSQEph4yA8dUv2ormM9vJ6Rb/hRV8HYdqezY7eYdYhs+TjpGOi+ePRVmXQVd6VQuZF9xsVRtbMLFokepw5X5qUVq6LUbxef3s74butrUM+zu1B0JUZzJ+rr732nKaZ2wscYaPKT+HDdaItjbBYqudBcDqh9j8RoDnxTWq+3b9ooroAIbEXMg+k7R6numQw1jHSxd1cDvO1QvrMrx9/TXqXXK3pzZf8y7e1/Qs89+ggKIRU94Db4Vvzc1yfcAsT4qCqYih8nlIo/BMU2z4XBBeskCCfoAPOG3OZCCPWBPy5zyB4A0MKoCskUivWS+KDXSDa9qnvn8tAzRxkqwvpRPiILtgyuAs9XloFeShyYI+wNclCNqb/AhLA0J6n6md3Hgnk6B2cvaUcoZi++KRwcLzBNQp7fPH3hBp4BikGDx7fFtJ8BuqrGXyZ9+m8KDV5pQCNvKVZJvQJedP5oLFwOUTo3WEjHb9oUuZx8yrZ6cMxNoL019QDGvWRvMTGlf6/uv1ZymYJJEl8cx4C4Rn4yqlGNvdCaiAEM4pF7aVqBqeBk8xMh3A6C17DelNc5w2WuDPQ5W3tZUUZLrWwVfXUr5VV+Eqf2ZHOlWATSGsTU5cAcqrwTbiXQyFGKuJpaqMcDZnJmEZoysWmnGLOeALDHh3Pp1+9gp3oD8GLe7YQhdrColigoUGePwKLZO2oSpzGWNw7Exhv50D2eggmD5/8du2PnaZOIuJqMj2hjS4Vq6W9Onlz85cWEG8OelXT95L3NajeTYzWQZaUhwnJGX+3Wolln4HIiqSBepAQNvPNNX7TCcwbDN54fdu0Hey/ZRLuLHMtu8tRdPAMbuaR40CxK8P+WESqH5DCHV93bdoTbsvZuPEiYoeQg6qBRsDPqHW5qF4YzUCY4sayqe9/PiJmeMzq64wmrNtZDBwuaX/vvsVKQrv2k4nZU2+IpAWqjUsvCFupMmTXSvbqJbhfOIS4yI/tSqtkk6mZEZQ/yESj6hDdK0U4y2DwtGQIeV0XjKrdz9Vs2bySa92w4dJf7iamYRGnM4jbdnfacvoQ0VwT/qatZGKIKiD4apPAYKGcgJzDwqO3z8x3QbT4WWOcGWF7hwdbz27ALxvZKJcmSObZSLcZ9p/RK8u+vi9dfkrMSXraXjcRbNr0uLniScOwAgLa180mGtbgfFTN7SD5dHeUvwPULFImKyy9OlksJbwP6NPt3s6CfK8UQpndeX7WXZevxXtRHUF43wHcvFfX2cXPP0NqbchFeLsbdVSvXYVX8NQEnbPwdMayOlS68zHGIVCsEVDJCyk4cwCyoOq+HGkHbvm8JKOn7G14YL3z8rW9XtmKS/suoMC13SjDQwK+UJ15FOkqWepJd5DjiGGEYHWekOkKOuaCJPNU63MIdVw4YKvT3m+ur8g2ijlzJYqHc+8doZYrdFoM4B3ZwPyhe6uhx2k2+p9BQiwcRyQCJMAgz2EgaYaLjYXVJAswm+ksvJQYW6shq6sprTOe0imwWV37F1WwkQWaYqG8aNtio/z6114WKssVWIYGIgTTj2nZVG8V27QtJre0Yo9W2ybWcxTyMOPKCMONfLKz2YaOB4SpSlTnnHBQQprrE1sYevTrlV2cuitBiLwqovZpHXq1n44gbvspisl3obOs+KShk3aCMlm8dpuzlsyq2iy+mdOmntbVCZ4rbv5RysiN1Rc3BMfnUdxNo9AXofUag/V9xrc5ihz9Vydn2SexBsTCjAQsILIV2Hn04I2Qr6+xd8S9o/kfpb8LlpigxI9LPSZbo1oWkHwpJdtdcvSNbOR3ZaTLzKAyxGkj8VbKv12x2UO5Il/ZiYUkOJJVzyTX9IIS2E4L0MWH2cZVtRPXM24WLd3XLOVnoQ3hTBf78i41P0d18kYTNmZUuwsdZlS5lh/WybbsAzWivnZdPIsbsf8PilAs4k4TJHkle5Ufi5xGH/5Id2zzrB4rFZD0EssiZVprPdOyhruqr2JqyhLWSho/pNc8ic5eWabFNRnsDph1/OceFRsxZ+qmIP0NcYcAzoJdUuY13I1VTAj2h/3CKBifwpYOIt7BAWlmEKOEsjuZ7X46v1SnM4fmhWilNFLMmJsO/Z5ViYS6UYE7D2rwJ5AwN07CrBZ8lmVpWt/FfN79+vXaVwvG2CQ8/9ZqfvzDNhOMP3k7mHyBWKLdhXHm9h+iPgUIFqlz5iY5gISQZ0tCuQ05Cxou1NnU00w0E9w0DeuQ7a/VFo8n7POugiHJXaCg7YCWuf3yNXrbGvzQwMDEB0ms3gzUPeOY1hh3GlOxEvrzxJHMj8NsrxZmH6cI6MTn1Dhy/DAyZ0n/YtAIx9191GQAplcjfb18p4nbrtbieMBKD/hDchTIqwYcudnioaR3s9i2HHjNEXqE2+MdOh6NrpACvoaljSiDGu5Wz5sCggB6wjsqsAKReOBUp2wItLhd3+mBOehzTAxL77NpSTPwt5q+cbzeEybM2yfET5zB6FMJdbvy2a/ubArTYK8KkJQvABFKO2B7HSm/Gxzz7rM0VrBWG4IA3kyGpBaInUFMWlF4zIlskFu9/mYBb596+yiihWGlKPl2QObGX3r6n3kUXUvmC5v1Ifrmyp7oUs+Lwrmby4qY7RljiYKPjhSLv33E0/4y3BvEs1YyWCwynQRRguJ0WZf4iL0ZHKlAZJCqSSJbkgAV8Sqwpw5plhLIoTiT6jn4NGAFgLp/Ej+NUVa5vjbA2N5fEutZjBq26LQoHr8+V+/CEB1j/zkl/FzUc8evWOwS2ojDSrvSv8WhOgOhWS3gLov8TVo3UvccDH++RPyE/fpCNyW3MQJkIUpdbWA5q3+fWdfpFHY+yKAxyxDOs7XeWZ0B97nFPbauw2RvwamvShfYlpPvVXjhzlLWI9WqDLFt5A7y/iDm29kWrYhYuMTn9ggSelsG52BG8oDw6d2Z8CrS9/iWDHz+O5nA8Hi7pTVcymCeo2veqBXj0G3ijY9idV4uzcp72HoqcokkPCMxLUs62cyO3GokvR7GSt/qun7TiNGrNZniWv86foenvm9TbY//Wxxr5J8cNu+OeT8rfDd+0AQ8c+yQ302ScRgyK8ceq883aB6ZoFlwXBNQiZpJ7tRtN9vIfDvighz4gymO3YTqvim783UuVtPEGm+gD+7V0dN2XpE2/lQi7F19GB2cYEPE7yKuXNJwo0Q9zOOxLgw1/T1y6KJTzkyEdch40RhdunymxR3VlZMyoeBcd/FshmlTutounSJ5Fp3U+hKUGiebexxXd4XSQyw7H0H0fzNCzZt4X1Tpl7F0ChhFnmr/sKuJcj9iHkXPtp2xG8WZ4aPgBZiOuRirKvoaBCIaBGqu7GeplErfaHO154/anL4soQCuUNFSB5EotT87C9DqVr+ZxJ/9nvRSfH5jP3SYTk9MHXnv4+XW+ianU7ZXpWzTyMhGhtlgvKAOvD88MyfakIhHQztgZThyy3ZyZdcm9PfQHMGkvq57G1C9vNMwe5vvHPb4I0W7nWTz96Y/krIc0CsD48XRt87evJzMoxsPnIRmBnor0OfTykRk4JbZS0SZCucTFQl7jQoI4lRs2Muw62K8EB505MfRXvrGywyHwyk34Q0fAey1SFPbMUsD1cr9Q3jisxapH4mdr2+DiwM7WW7DGXIi2/o2CpgZNGt+nYh+ZlRvSuM4b60uhN6GXbE3PZnO8nHh/j3PAnA0JJ3VPzycNXI8f8vlq5jvXFehz7N3auXpXqvVrN26rJ6b09/xcy/SDL54rFJggDOOQApcObQWel2Vj8fRjop46dt0ODxMkQzHJbEdPEimM+e5Y5Vx9PVU/jYMkoYf5+Nfin0i2DuVFuqIKonkichN0iRn9y63MGbYQA+WT77K2KEddoZTKL4l/OfPCa9gJKJTicGRGFnXHf93I2PD3IdE09BDkINPS76AmZSNqEDkZEqdBDGbe4Hr6YBQx02FnfKQCyVCw2u6ycBCep7lDC5Kho4tUErN7HMZLohRB2Kg3Gxe90iK1oFLVVmrMLZL4jlT3ZkspcGIMoKyOmDZMzAILcjnwexIlcWqum4KZZVErQp9B8Sj9qiQp3DCaGv2sQr/2mcuIpz9EIUvIGpJoW8qfm7bmbHId3Cq25lN170OnAvgTT4kBXPz54zm8929voxbMBKfP6Pj58C/fH5Xa2JBN0+D4LV9ett0DAuTv4iuWkwyLpTn5fZjHy09Z5/2YnVqQrr8ann81ASec6GJRmKFmPM6iXy11m79/N+RAtNIQWP8dgn1lnpMp3USW8Zjc8P7+X+N2UY6AXenMXqXSqdimrngwMoBban0fliegli4Z/qJBT/RGnW9+kTDPm4VeszUV8yu2MI6I1aaHSyGdbxjX7H6YiuCE1Cu2TI8old2b8rA+YGRdY2a6xbkX8K09k/yvLRFHawPvQm4ozgePwqyzTjX263hslXGDbPOl9OgyAV4hPp0PWvV5irnv1eiB3X3yRtdp1f6b4omqBYyF3cljz3ahXTAgEup7vRmL9rSxyMly5wusBms24mW5nNZdogsUsFrQRM3VqBtmMiVsA7W8h0ENKHh4u21/48/K/1kv3FSyUwI7SkG/N623Bxmz5uybqnXf+F2CdZ0bGCNVMTTZ+PlgC/iXuYQSticxsNtWcSW30Rqa4U1h0XOK5k/197BcaDG8jEGhYehq9YXZEnlXbCJdxq9N3rayFPxDZrC4+uV8HG02ZBMB7/0KBfXnKRXBg4PAZ7cy12Z0bR/ZtIAdvdbJAzwM6BFYntV2nnpd/jCLoQ+axmMJhIibfncCsfCgm7DU3GnCgzKedqE/uI9I1cuWlI106U6F8vC0ughTGkGUpivz7sE+U/OprnejZeHKX9chAdrUXbxZFBFvqoVDvYXNcNQCeqR9FTaDSqzTsvBsY4zNpMZoS3alvlSzBS9JYHtg3iVP2FoMcxkKYKgrSTgwTgTyvhkianMbAZSUjT5z1AK0cQC0yvUDIGBbYSDdmV1aB/GdCdOJLHxL8jb1aYlHuG6Q/RxVJ+ZCkse/zz+vNmHMwJ7ml69zKCcX7KnSKoDlAn3/Ynn3A3V6ABVk3u+rI+XopgtcKzOZCoHbjW1icsqfCOsq2eRDEmoz2GygcSMEaeWuJq4VjMkkYrzix0mTgNxoUPS6uDRp/vUZUhox6GIfseDgn5FcAQk8g7qRxENxc/1M81W91+9TUQySj9XHG/UcHBV87vHFIivKnkRQWx64IBUofGeYZ8iq1fv7FWsrLft8sPZHmBCiuaITELNUvQTOPpL+5EH1k3G1C2t+xzHBczNchBPnTNnlew1UmGxmsg9Aq53dVbfSOg7SozBiWKGAIXgi56CuUluX7Ep6bpaoCsolfypvDam71SJDf5HRE4ShWdceWcd2U7ymfasTeMvinslNWiWWQQYQ7vIwD8H+4Nj7xruN7n8p3jeCUsJllWRllto1X6B0qDHvZel3+ceLTR3YrBeb+NKyjljBeyZDpNBbi1bJvjMJG4hFebyvUZ1XwYI8Lur0iPQIgrXt9fa/PM6cjz5ufQdHJU8qaij7Vg0SlrjrQY5+HIlTqBNqYHhV+MyK+crJQXsjjXVtJIyv1IrRj7LVgc3jQ5PO4uaGVyjT3NQ7eW1+iylg2fx7/nkcy2ouQt9FItk7MkxUUJTvsCgQXjVC7CzR8cQLdFb1JSwztd9PM9Umuj3KJufZAXDNHHy6birz6LjnXxzPfDmcSX4oo3hcInnzGqIlhMBYR8AVmjBqMo0oc9OISF2p+md0XvZXJcJlCr2N6yVTrnB7LlOC9IQzIuYo0ZRLk35681Vyzfr7G8aztRa3f5q64ps6cI3e3CmPwPf9+w2uq2JRLJJM2O+h2yF3vearSSjo8GOfcDTH/zmFOs70pAU4w9fBw27eQSFdAtAfCxKD7HlMid4roA1WOSMGGGH1f71FHl/qxJlJPxjqazOM/xoJBUOjE/SDHqxDt1QTb1L2Fgb3JljfnJdTPVyqiSDwY7atnrviiGJbX9aJ8LIn/GpUPni16Qz2cUydCnw/ooTw5V8sPZpc8XFjPqI/h6A1lPa6BOOUTJheAAvJyBAV16WRR5jnxN/rXJ0J+F1O/8d9jwjbCqsLpuJKVINq8K2irZqTBNsA+HTfJQmjYtz6gHUP6K3oTIv9YAEKwHDRRiBa+6gCTO01dXv+0wE0TbOa+JxH9+RcZwlIK/WWUbXaCD8EXRBnJUFaXM1KagfCzSwVTL80GZvaELx5IICmaL8sdRcXkwFiocBVeuHCACGFoI+f4cGtBXbxUVw8FB0dqjGgjrwVlIcVhx7Tdo/r1FS+95sThI2WP6QddY40aGJTMjss/PsNlwlItOy00LcfACgKtPqwccNj50u/6GQRnkpn44CZSoL2h29BGxPkbP88u3AEVZMX+Nwmec5BQQtA8HgZxqjUgCSuQiOnfB+zMG/BmybhDJWckk77/jIeAiSYSlAFVNxA253rnl7swCJsqecgr5xjikZ0kD9ZzKBZVIsHluPrO2DnU1auPeOCAzIBkvh7n3IGPCJRdJk9c185K7Yl5z9GUEmtktmGAaVn15U3l/RyLcafN3xgkZldY2in2/1v1KNd0J6SZl6d+iNgfk7rPUjV8bmgyE7kcgWB+Rr2zUfb2EMEN7UBRqnyJr8BUizIpsokMdEhnbi2hQAKJMknpjt3c8X7keXvsC3azE/OTYnFlHZKeHUWYJmrpjiNijxn+eCsrlRESFXLyvs8j4eLYxcMrKcr8YIWKRRZh/EFxPXi5drKs/psLlBHETUw+TAiBS8IWH5NApwXSVX+bY7Jt0ak8eLhYGPSZ3N+/E5yv6PGHLZ+/6DgpzcFZ08ttsqEnQwBWHUbmt5FYdLouGX29Oz6q2SrSqMOGYMnBCJ88HhQ4mP0ZWTLj8ADDnglqijbblehE8mf96ajxz17Ytpxn6PAGlgKdT2yFRK3E6/7tMsof1tGL0L8icKVqETZot8l/fh3hnDF1to+4LK96Y+Ijuj6YiCf/lDRhCfZCEsBN6gA/C9K9CHqPpbo5UoMXavtYUTm7ULXQayIiHEwJwJ5LEYpjmFD+hc/XNkHwxO9dcwDnfRNOEO9RelWTm4fzkCJ3Wt3F6X1gpKT+4R0tPYI6deAmXwq5fhUfSddaTNSfOfJR680wSDErEvkDR+Em1RI99y1j55guqAsGwQNg2YM+/Ob8+fF0QxCzyX6P9R/oXfaFUjsQC6WrHNA0XbUny/KBDLwvzL6ceTNu+5kjobSmiy+76ui3Aw7i7XmF8bQWNGrZmrETldkcVA9idTpyUT/kEbB+vN9kGyj30vhyiFuU+yl/dFjnbzcvQdTL3uk/7WJk1EXHqrWeBfma3/T6YScJ38qIsTyBTPh+Pf6c8jmaBkMalSjGW4UAf3uQgP+DMFW9iooyxIAXO8vlU28sQHB5TAVK2ycejq5fxLaLuv15pCM2CtiidQQPoVRXdCUgmkpyGgMzyIdwt+jCZUw86Ejy2OeBuYwR4YXgvzXz1zagLRkp3miPxlVAI51iH6ldInDPZG/HPEK2K12GKhu3/GrjwrL5jBBeJs2Wp+1qkN2z5praNzQfPSpASqKamDNCf1wj8htolC2TknaYM4jcWWcxe7xQCJyl+ZMrRuPriE3C7xYSHAekURAXjfS8lSceYVUsalx2qFdvzzu+T076Zwnu8GKkjW1kx4fEjzs9Punq/NRIFkqUze2Y4aZO10vEGlJmaTXpKeOSR8yNh80MWUJ5HUqnEwfp1WXFkp89mV0Pptq47yzdf489wKGgO0KuMJtSBVkM6HWy4QPYMY/RPyAk4MBHYNek7UMMqqDDSwaEnUNvlo6fdgk8VLrIY39fKY6VzGeriN3JEvZOiBKbD8XZUJdCAVEZJOeb+4FUKDyk7b1kdwv6F1Y6YTGEk1iKx9mpqYcdPd0suRIX2YHa7sguXx/mTIh/0fI5BsOYJKlfIPhwqDmnspjGepBy61ek5zLKPnMJV32N+jQagrxOM9fL/GiA9r0VcgMSj+Ulh/eNwJsmoy/IQdwR9H+gRlLsfBI9RRTSupIpOVVT9tXPDgjzxcRVOd7QtN+/Ct8nr2+YleDN/pJupeD+xoumIxDQI+412guUXLY1R0jvSm4VG+1Bt1HkHb4csTTpiazW/fLtyo9tDEupjM3ic2xg0RuVAWYtCaCdruSfVD9rgTYuZubj7JYP7xmp++DQ5CT2fPKpseUwPzQrPdzF8ZUU4yxpA+w06EvcnZ5CUX7Sil2i+LFi0ay685L0wVPmf/McGSUoAF1ewj/cCquAdIGiD7pEDZH/4JFJd+8V1IIaxg6TPuIeBX6ZEdBp3X9U+kqa5u6LdUqc6nAAMkwwx9BH77mt3yKSslapCMU/CQrCd0qmgk6R/AY3b+cGZDZ+xv1zj0AL/nQPheErUEk6317A33tKCS3Ekxeinzo8mih777FCU7d80WJyUPL2tTeElK0jsmLZxnp04DQra7j2/CO7QRaAhaEp/tQcT1AIg7k6fxl3G2Z+s9Vq3PFnpOOmTiXPLhKEPhxWDH2fGwk9aL6SSU1/YC4DHqvEpkLLbg4+f+YkwULQr1RPItPMNQ9fJMArDqq4g+oQ8cpuy27VcupjjzvhFa6Oc7soL8+2p5hoghvUW6nSJ/2ajllC4aV/EHmGJYvBi1PrFqp+SztyUrv4i2JsY+RVJSEacNzNcG5h5Gb1zjX5pN8UatXmxORZgvGIhZPYWvrmm9eo3F7tGjFhH9+169h4ldJkhIZZa+/hFH6Ao0XyNiRdeULn61bIMJ7EAxUdhHFBq/LtaOw9LA3QPvy6Qm5PvfbP1Rov2ed+VM16UOiuT7lLGHoWVlMSiYyZ1N8cYRtopx1t5pjYwyShgYDMI+ATQB86mb/l4czVQKj9tIkbQthhO4oTEcEMTk7EJYiocAHEoTwPLezvx368M/2IIwGSpZThAdG8bGeJ3+rdXIKMoP1FAj/vs0X4mjgFlOplhkPbnTfbl7g+SPOLxPV3G2pjFRGzdxLYwydN3vQMmwgGqxLQrLArXElEbszqtdmd+cQp9C6r4IE8CQfmH3wjOCpocKwmso0h+8RJcYDzh6aT31QB9PhH0p4xZJPCmw6mHlwztrV2Tsf455plzVZ7wzFH4dOBKyAWp+x5PU+zyHNvcrCXvvcBiRo8ufpkWvyy3G9DNjYEbZuAb9dCY8Q/hifYJciTlTuAPRVP+nbmkVIgEVFccFPzlbSdJrhY6ZhKClYn/BThZtJ+pqUzTnz5J5UQwMms/aWuaxjr77iAHWGofz7RdU0LTje/9H8Ouk7YQE31cz++DFwk/1ZNkxHT8w8KnHYLuxfDDcTDykZGZ0/PwBTSdR04xgF8RBT1lEipwQFT2Wu/2UvwkOIHR1tVXZcSoKJPvhPkrmJyArA5PSMuf/ZfCKDjZbPxfqWIhyr0SVf+D6z0Uqh8xCupY+8ETLElU+WNG2C/d1STX6zM3oR4+vyW5q39Iw2HkFcQ4HlwOcAKcBHTVgrH/WoookSTE6Qv2EN3hJBKYP6SZuTrL+c+LPUvVDkby5YIV1WU7cf5dCGV9GOhTs4JsEenHGqcSjvN8at38zTd7sm7y4G/tt42DgEuhah3ZXgpL2cCGejDSkRl/pRVPps0s1Z4NIDU1K7+uT8mCEeoexf4H8UqigSnzz1cXrEHPzwVMnTfj2TaOVSWfarPvKh5Z6KlNdWIhOVk1YVlg5NAK4e/S0XC1n2Ijey3AdDtjcDpSITgMlEuBjmCiiGqLPS1OXTcovX0yCceFeC9PGLw3CtL49hnkPPfTNVrSt/rxhMRcq7Md/VtasfWpvysN68TS/1vs4k153JUep9wBHJQbrJQWxh8Dc1CcwwMkOaAeMSNAuVjIJmirJz7U3oOjO+gH73S9/bldEKiwPyLM1E/byd31uQ3nFtlmF+Zt9BdIqrzyYH4jG3kaGviU/33+6xXDgbXwy4h5lK5Ji41DJ7jBQxnE+tFyWJ2F7kun3NHDJeptycPODih7ha3rsJb+G/up3/EutqMmiaxJdeTvo2t5eb9G3hy+n2MZxdsuQraQpIj1KOV4260fbcHyLOeIgkXxdDaGvIwg3o1WWdm2N6FA1YBVt2XfqYHFi8uHYKmH46gvln3WUrsqGticQh6Un7nZhZjt221+p2ZhAcBFWMKdfaNx2FA7Ht/gdIjIiiIcT7sD3vzWNA4Ww29vygKybsmKFG0MvYT8oek3g8J7DYLcl3bdcfjzS4grKxTNgs3MC+mXM9Df+oEhrOPrr62TMOZZkj80eHnjp9SfN2/xaSyahdR5hef4sACniCdY3LXZTdht4RGlv3YhBu7Hw9n34Z08bZKUNy7ubE/UXJ8MGa8epBYEiAJATCKcj6+ehr1i4VHbKGP+OjoxI7JPhFYbhHyvpqyC9KthJwkIJ8dnf3Y7yjVDS4TE2MnPuGjZiCFMv+BBVwRLMijj9glk65MrJLgIzE1Grp/r/sx7PkpGt7X2Zq7jXyGZtvoy8tA/UFX+STmil6e8YDKKvZvz3/4eKaNGMkuVRt6hGjDg12cULH9ze+bzKkJy3JuVyGTz7+HFa47Ih22pmRYh9gOiJ9NiZUJdj+weqRQy3dpUyDX2gdsgQDxaPZiS5ua5Fkc1EBe4154i357H0U18JYm/ExJ0/jM2KOFfFEqXH0g3WQIHEezbphdGv5h0d+AF0aFDp4UqT4jiQyugJx4PYKAAiBt0Xq1qn9jtKhYzoPrXLWagqEpR6ohiRINDRksw38XwJLskrXj7NN9tJ5C94rmFWs8EnYZg9kI9D7PQPHsH4qpKxd5Qj1IWSb7+Btxx9QmveHf9+/qjRlEqGFSKhuPwB60gYC4PWDsehAolii0eiQSItB/n+/sduRwTac6++MKs+/Xd5JHfVI7HXkqYJDkogejGIU8GsuyVcn6C1P/n6YPCJMIp0XuOZPLdSPH85Cfx+bJYRkFZqCYiNmTl+PBkwEeGfFTYZ6Rf8Dts4D2Nn+QxUpX+s/PLRmTx9vXNsiSMRWCJ9MCArzmX7e83Xu5IbN4hcrSqSIsnaj+Cii2oD4Jo/d0llLRH1MkBUbmM8LDNN/x+BKRsyI0zeM9O6x88YG9W/BLwQK+8jJmv5SLVLhE5exI+Gg4KPyQrVIGYBY0XYJKiLaCgHAWEiZYXfXz6DXBHhsZ/Cqn+mf+e7pEOSNw+SxyFS/AStvSDi59OJOMLhZuKhaQzciSIG9XhQgrYHqTgedwTBGYUAxNf7egvcrbhUdnln57iwCX/Z62Gsv4FBG4j6AACF6phwJiTjT5mf/Reeme2gc52PKAwE34gcsSttvaDugfXfUDe3KKLQrAqwP8tUFpic8k5NTtucCJUfJZL0duc561UonVQhOvuZ1b6p9uhCtC0vQwQHqH4f0M8QXs0U+wVmHYZfGqqcP3244Pr186XRops9LcvyJkHSjqmkjAd5Pqbk6e9k1o0n5b8088vQCvmOcESPN0QvEaOexZGVpWCAV/eDwdL/gLUzQgfVVkGBlJBgen9bfuu5iLLdeMm3YuPG4CxuLN4mje/odEJBjX8NhDarRadPRry4v13djBeZAXTLTRRvIMx8QqmNy2p4QmTfLDKnuU6ZgRBCrHb9Ob6CBks4IAEMUpInDLXwz/yl2BI85v6MKyuWL/mV8tLv/5XKtFLJ0Ek530Its9vlfe0/KJ4FA8dFMUW+lvsSWBAIofR0kZezoVcWNfKUwlaJQndhb7nNX9DP3t8Zlu34imhF+7k9gMqNiQGsS3A0LXX/SBQImkTqGf4KUjo7+e2FvbR1Tr0kj/cSatc38i2bi0Y+BX1owaNGJLcQeO2MVNnD3bukOcwmxxgleXKiumEld5Z4RYebl6godLn3GmPOGJxC7mJ/g5jUqyzZt06OAHvm+sKpEmJbWPJnVKNc64TNxDtzXrIDllPvqtr2LYM29j6Nsk0hdqnTCgnhrHhsVPa/W8/A0rFTstDmbhrl6DkMLxE78U6DVjM54YXjMYSB+YeJJZ4IZSGA20mv7+/YKvR9LaSU4Y8jD+4ckveZH+tM9rquo/CU19gTAGuIGPD9LEkqY8c/4FQyQHILl7EmH+RRASQz4P93QNO6i34VMnOfgaE5dG/ESIPEONFR4mr3D69YxfEv5oQ/QMn1dky7T4MvAt3M5Bq8Ztntu0+absGMbg/Dzb9yCfnH7Fcj/pyRn8bl65fLdLoEwrFVPBYlbAgCsAq2LIuzUTUvmi04oe87oAgkJxMJV1+pyq3Pkd5DsX6Peoh5SDgpF+qaLc5AoFpnXnWnJFwND3ktijXA/B0fcjRm+qjk4D/9/7T47MjWfxXM67dAuolzqxod+PuEHyTWC2RKzHXF1M+Rcja/HG44DV2XNSUEqRKNjQThagbdoMbCMSBI0vOCmnVHaXg2El3+WIuxwMQ/X2hQ/Wid/XnXjS075N0f+91OX9iXLGIULub/45DTLYe2a6PZD6Rw18q9XqUy31ZfFGBxt//BZl/4Q+gqsEgFwhs3QlavjbauxeMffXpeemo1BTPFlAKWS5c4xhbuTt3XOtEvL1oG2c4mO39LC2ommMg+rWtA/XJF2/3ieGQRlNzZd105Kdx81Ikqre+b/Sy/Xh/s5kTQqCvdr8XY7FTS+f/4OufbgbGRMglTQV/0RnnbTLvCBXIUsDrob5XOalJVB5kZeboG5IC56tEYxlBuEEmnJXcl045Ugz9PT9r6dDMjzdM/cAChZPoS+qvEfF4g9iYJ69yNqGMv4YZB1m88zY2PYd87QADejGKGHkoMqBXvIdoqeVhZD5PoTOLINfL36Uxosi3g9vKBm3d4EptEZGuyCYtXH+zIC7kZ1neYawNtVofg9Hev3z1qL9Ug5Uvylq/++ewqkEjCBohFen1sV7HxkA++e1K/GIrzQVocPhnnmWP9qFe+wYMsrjF+fv0Mv9GUFZDzBck3YdGaxjrxVoLKOUshSE5uO7eI15qINtch0l+mjvoYee+fZKIUJ0fU5n3HQUclUBULIdPgS4jpMPeCMRERPkPiBj1APOwx7wD5vAEWIgx1YuaIlgKZp7CztD3aGShIIYsHHo8XiPB2UddtLoZkECXnY+rYClRUNrLxY6Xan3fIBl+l6B/4sUyVBvBBzfPtjykmhfTZ9EI9XhfQCGx9QE0cD7HxF3ITL9qtNHTxHFkcyzpMSFdA5dksqj2D+WwXDsNclDpxpnwApZWtiwuWEW/bCv5jyp/406hU2iQqiLLXia1zG3uPGc0BYjZJWkwYe2PSRLWoGnVCfLA/cAhjU4uNLdUmyE+3kMPa/pHk8S7uH7xH2y9bOQXv+RRuXoxuwyOTiSAbGVQnYEFbiW38NS3hU3dKOqrUptH8LAAVkixGDlZE1c/lL1dnOR7GxVR24gKVcGB7d6YlIB9G5tAW1JvOoqjyWtSNO8zsSdW8qZh6IXxrfB5P2Fwv7U+3qBzEkt2oWpCh4N/Zw8qhHEDmdA96f0Qny4iSF/O/EMrok+NK/SovyIE7RqU6BhXZm7StQkz5xnacT3WLwfXetoJ+M7vuwsehhB9jpdXtfVvpLOe+ilpEZZ/SRpvbCgBEE8SlHfxo7hcRfFS3YNIoL8qpmW6+KhC58984D9xWhjArSGcfkDrhaKQXt/E+97RCIChetiuPew3zr+xIK8P2whNw1oNAk9Vpmo/ZXe7mIDY/EPZXReG1+hOSskuKrzGi23iPboxfbVxLt/3cd+z2eufJQr/2aYsPZn6u3/gT4N++dNfZkw5vfCPtAAFfIPH80b0RUA2OvPkLf42sKGXcxfvYWNdo8HxjWkGPG8VPdhic2mUAOzI8FQzaVoqr/TBgFZGPMP5/hNQ0hYqa7DyTTa57/RK5OUYrJccVjEm4rCi2UREdPTzyKNfwB1pR7S8SMNIVGUtYdt7ySbK5t767Bfz2igIFxPhkKK5G+vXVgwFCVN5MH+HiN3y7/iByXUvh9Gsj4ulCdttwtYZOFnCprMCPeP6yoqNpRP8V7Mop2djLKSdfn1WDMYv/15qKEfJ53nA/lqcslqeE9i6L60apOt0fz/GHleGTEAdDMhPsknC062TR5alMPGFu8+fGkXCyt14ucRCceuRqRDXnTF1WyCgumnvP6dBnhQgFZsQf88e2PmADAH/2PGJ4Jryp3SVfPTajBCOP4vF6j+Pjt6QJtWdWJeyN8FssTU7FftbA5avp2iA6gHgoGBBbnk6J2lALjb6uKBFp7HoJun8Y6O/jyfEUABD0/HyvULOCxbsCKSBuXHwC3rT7QpM+AJ1eJCM2NL5ewoTsPhaNd97kLgD7OnlcGHt71hnkvb+1SebQiHQ7zMxRZqHQ/q8WG5uguX4a7lsAu55Vi+eDTX+NjF+c5CVgP9Mez2LXBBQMMDjC2CQWailgGz7Z0y3SD/GcnosvvIn1vGGiuPzAZw/ouXKPnh1WWUjhOJ3I37+EUBHzqOp+sSd38l6AmxvWuUUU9ffQLuFcAvkZXEn0CDEoh/RoSL/LfBmVxGa3RRwFIzFxxV8jvzXgAaYg5cHkE2Y7dL2frzM+wO7vhced/1x7PVF+H09Bi99g+MrDpIr3XpPBBerHUL0t5Kgt1AkN/HvOMiW7x4XH0XpHgX13VTL/BqS8ELVnSRWGgSFHz2ejaV17gsmkqQm/TdLCsb3ODbqufE3f91cueMXsHgp2q13fHY49kc7iYHNwGfVC21bmMmQ3/RNfHRa7w86/Fg06XXDV4NOFQKv/d2QHCT+jbgvLUZ7mH2MvqW4i5uz1FPUVvDe7N/BMZXEaXOA/b3HMiUW+Z+89MQQMBeBUkPGsQEBdtJhBdoMfecBnAWdi1akt5QuOmpbdO2FaJxExyImISnTWOPLUh4DE/HZx4GhMBbdkKhhA7DcKNiZNML3i+GTB+6xKoEuFTizWiFvmDn2WG/+2S0PJQEhydT9tWMPtur4qwbVUR4vN/uPMiY7BU1CH1+HbknQY85Hau165e4GDzU60zlkjMSQBKbCujQlHsRMwaAR7kUKivRyF2oW04jEdHTt9JC0PHYXKbJLIFkfJ2l8hrJ1qEuMS+fe33n9xBlo1PP8Re+p7+t/5mDt7EDwDnbJ5HPYNdh9H3Qu86xOL2iv6R5ES3ouud2+ovCvHMB4XpHKZktj4eFF3fzEK9ynLzxtqT77AsKm1tMSbT2TMHDgNakY/0RYncUijOGvlHzV6gqcUtJNyiYfvf8LaFoQG1rXhoKvPwSle9X7s/L9cY9uF8QW+/tkDkncqeN1mEaSDq0ksI1PGyeAq0bWuTywzvDZnkfW6CThyy0AKJVqnCjVjkj+QVNNoyieAhklGvvdKw7eqMjcEPIu9AhIf1F8r7mgcATSLTCjYt3Coii70ZKgRlc/T9lwSOJJ8ZNF8fgdGHZhHp6t05fhoRMUvTugYdbmWFq0F3QUCtDfQJpYKBnzQDJkg4CwiLJigw9pDrSOtSH3k3qSdVkIL5kCuO1BrFWbfxrqqfp+MpU8QMPlZ40n/g+56lgOMAxTyh97MIBjiq/1BloXpHkY7UbZAiZwQ4e2chOi0JUx+0W2nk2By4+UI5kQ/xCp/0DB86QlPHktsgzKm80IR7MCIN6kevnX1Uvg7VhapPcLokkCcYPkl6urEIT7Os0bDTB5d4LDBRtjr09whfe7Pf/O3oEVB5x+ql+kqDrl37n0//27C7VgQxYZ4KiVybwg//VGJfYp0nBZBeOG/ylHyDcp3n9Kx5l6Y4TervUyfTH7q3o1xmqkGnlO/a0bO7lVNyT8tdp8EyAOuKIEZwa1Yt+rH19KKCArkqIvHdz2dbDRoKR48jE7B7TFN2knOaKUIQA4UQ2smicZhx2Z7+Cyy/9Qa2DAmdsF7t/0fjSg6RsMIptxlodpEGNh/gHh4KBMkrAMPwH+tTnKmy/CfxpXJfDbNrxZ7ErPF15n3swiFJbXy0CAHOB4bG3gxAwEZYTlAUKMXTNADo60RkM30jo1LveU4H4TjDTsZ+OcR7FvQBXwOuJLiTggW2Zg55Jt2vb7HLwIyR6m9TsyYzSIBqkikRT0wAi7Vvoe/icxfZj8UBwSnHXJBmrsaf/nidZfGeivOax+MPA3Sli03Nn/UMehvhtyd0ECjdzSlVqwkWn+70qV338I6yFcd54o6V/26z8+uWBC/zvhKF4R/yv0MzFD4HzEEs8cks2B9mTFnTXl/OQBCOi6pbOyrZs7FNN/z5uxzm+d2lfyBkH1h33VwfPcfagbSiYeMAAn4CXQmwbIVOTz03KcDj20AhaRYwN3cOJ2kguwi7AKPdG/fMDrfhpJ9Gq7yZ8BhpN4XHXiBJuxtlE3YKQRoq1Pve0TFa7q4/0a/7D2SCn3RofAU/WiUSd+E7iWcDA6KgvZSrHxvJ/JxUBQeQk2BUHVyR03NF5R28xWBDE9X2N651HhWMxZn098ecE+mf9CFpUDZWfxqLKDNUL1nBXqUOg/Vo1611qEO/LXttvdvx/7IpjmR6jEd1+llFpfzCuDbgwXqKzYzZDZa6+z54qbqh39zeoPHA3dqtohhQMPxE2Sf7O+/SttRu4BdGG39QnnA1JtiNS2qZq2ADzkVHQ6CYYkjwnVJo7rnDlen9rCIuTL8s3JJ2PqmX4qPWUim2+xOwy35+OZ9T/HcdB1eJAY1XHT/do0He1KjXozuQ6x5m+ubVFV8l9dLS3q5Qkr6Q1ZIADbmkzfovMp9TJdRQ2/IOCsCYT6DT0GQOd3FwNgS7IfsAa9s6LvoiyRhykNa/weQexAzCP6PCPwvr9G4mu2gI/ktwG5IXw/Ji+Ay4iShWjnuZSqrU9TEsN9XAvaFADyjxj7loal5NTCRp6txZj3RyAye1mj8SDWYm/q4B2rUdHXqWelKsz+9C3kbyeiJ747dATHMknrfV5l4/SBBSIDAO4bkat8/4MKIhMNfLE2C4pFxAaEaSAgZMrcGWRmoaFDJXTwSZjS7EQv0P60GhALKRfcOfGfi/GyRsjA84LCeaNO8de1WzpIWq8xXgB1MMSFd33eXH7Y12CRODEZZEI+pFb8jnGujETcFmUdLiC85fBXjn+wMUgIlc9EYPB2i/4exNn5NlnhGad6ry9fH8VxLd+3j9vqNVTRiPzaUfONN96c38+eFQxoT2uV6Dfwmm8bOW6FzAOQbFh6NuK9rxw1iu6OEW1wv2T3A2MZMpb+dR6MH7Cba8B5GbhkgO8COM3aecEDTLf//uaYfwCXJBjgKBsuStM/FG3WMan/XkjKtQsaCYqeQMEGuttHUlllenUS9e9pH+C153Gz+xD0dDfHb35MY7Vnw/BpUtDljX0kM4sV4DaKYnjfEPDWNWiBUGrtxVl0xl9Oy+kFvF8IDTylE3BrOcXuFE9JUm+oIT4woBWUxGINDPN2GtpB8tdADIRnITmKVRE5kroNIZlXbDZhbYzShyjBx5buQhF/FB4EJ2u42hipT4InVUuVde3nbDI00IWIpLPbOiZ03udfBi3l56Olv2vrt45exHrqr7ovI6/0U+B1W2ReyUpzdGMdhfdabCwTsIOgsVQXuJpuXKR3bClu3pt9nST+DbYqjZjEVIQQTXzaCZMiOhXj4RNYmhSXuTYEnuYdCidelAJMVh32Zzf5Z1uM+Y1Faq2SXQiQgnAcRL4vwbQCPdQrKtZIfd1EU5YN3TNP52VuIfEOydS73ngJtk2SmC4mBqYhUmAHxMW/5DruGkY+l5WTKhmjGxkP+zChOf4USzzkQ5nEFaNU7xc+nc2H0Y2vkFpWkRmtxPXMFKx5sCOyOandjoQIJFrrNiO6nm9sMP9cTb3fXAb2Sf7XyragcLrAxHJps699xJwiUcTIZfE30wAigXGWILWWR15u8aDh1U39PaA5Nc+uW+U3ZYQzhITcifSSRlXykiWKvmQTelxHPyakegN/VVHUrcoL9tgYePwR2xRXcBtaddgeEPqQ8JI9HG8iVfvBNShLi2ZEZtQNA22ZNUUNJAIgP4tSiQJfPfujdGTgSIPIrX8hQ0107Qtak1geInx+RdlfXH14UFwQCsstzUUd9HiMyMHLLjqGjq+VLB4i9xqZXVRsHmfYIm5zLVPTYtJ59bJU978nQkz8YH/a8ElLk8HwRQ1PuflPBuho5vuA6WDzbYBFAiBjhaPfVes7/NGKWrSiqWBYCJsgbrP+mupC2vOaUUNkJ96eQrRIhx1PlARZf0CAaF8mpqIUC2FG3sauQoMq9BPe8F8+rbzTevEvdkcx+CwnztMYqZDzF/9JZflYL3+NIhdkQebpZf8wn2gud7mfF7FfKJTXhyfcSNU8b+AxIbGm/Q/8JR4hbZ/5RXPZdffIf7WfBn8jY3H1lscc7QIxV+Xsfk2hfSFSh1AO2wcSkmxkj+BkrS6VCGQQR87OJgcuvgK/YLT0/Js1cbpPUsCyZLoJPwqIfLrbI313MosWg98zp9gmGZd2/LQaKGha9PydoU1+YjG1OJQjIl6IBwV+idqjPDTvJ4yGdUKF07l5VC/w58vJZ087bgkoi1swu5BDtUeGxXx3Hwqbz/XB16fNrrKhXhoCBEJ0v8SfU6onoa9syv5bqkMS0KCRwPk7ulZB8Rsn9+MZ1pPZg9Qc9/zbWk18EeZA/9X5Zrv/qZiRJFnPkgWEAvkfB/8dFicB4OCjLIp93w7YnWMoXczxCNzqkoTrsuI3Wun6+JiKzehWQzws60JwVmaIPm8Aa/P2hqpf+83c7y/1xi/G0LdCmZwfGnZYYj3J9s1sGdh7WB8vZp/0WaijGZI2Ok+NVoTl6IsnPjwuiS5dqUD+GbfKMNJzMAUlttHiQ8h5tlMxpK3BKJY+lBpMLtTC81jVkWTYm+SZvMZ40qs0ZEp1IHnlrNyg6LfQvZPkv/hSrtvW5rhJ0MW9e+2kfdoNJwgCsXvoDtNrMvGxrsStFPHFwBzU/qEkqSvvvP4eq+yl6XiOTz47Gh18d5WMMD3Hv8QY1wnU3335Y6Ehf+J9A0Iji2T5l71RV5G7b6dDjn+FpHBqr+GNePmVpJNqfAnMGXMQf8OYWCynRPsvtmS1sBKxjXWfwS/tEIIu3fdG9fVFe8NM6aFj7hjaNOFX+6/hcGPhoZMQO6Bq7cVH3baDDPbzJvlAyQPJ0dcPMNkV10qpcgM2eKSZi9SMU1oZn3jG7aXiqe48CNPOeg/qxj55dGT5ACwaViayvquOR6BUPz8gpkon8gTNBPJWrvN3FX5cNK76e5f/pj3CYeVeX0ZWTyy9sbjFO7XwmE6WFga3TPfvYcnXdrXsoi2Bc82gAumvRffm/TKEmI1nqWuyyM/riz+V2MYpYO31opKOnb5Ej3w8LJEd2vskoKhIv9YEwKy7Q+ZMZQlHyxFeT29LiDSF8k9uvngbhMFoGQ0a0hqOJAMKHQHlxk/selddwwMQtcB8wE++f3w9aIpn3u74cv9a56kmpx8x2HujSLgJz3C1Ln4ri6MeSIcX80xwIZQjJIurmm6ODYVNNHb+ok6xP+otgg5DkoFzb2JBF59GNskwEhuJPAGWDQXfTrrRrMaM8TZZSRa7Of9IUaTKbdYiPjEq6xivUNI4bDWfXG8/130zrCGtUpkoKlAh8PXaPx5HBuswS3GuY/YCP0iCcis/o6fjAOCK/ujGI8h40w65VRxbO295O5XiKg8eHQrCiiOUCtDpEbEUlVD242h4WMHExCxwrHmOEicvH2HJTieeQxzEBeWcYCadxf+IL/8yaUqJmr+n+taQ/YV6g0rMqzFV4I7iEHbPHE+C9Swj/yXSnay7vUTL3tgwuUxSXkZie2u4r53hDT5DBKQeXB9J3yiBGPlziJf8olAdJ92xjg3s0Pp4aIiMMNQOoFL6Re1lTCuWwBRWDX8ZAcD6Mz75p0xF5C9syjTBv96WNCNfzFLg7LBiFUtnzElhirb6JobBqVJrHbbvMP4yOVwzF/251Xg9xdNbJ3qs1W3H3PliaSC2cFm/iWPa1ve+AfhsqweLBAy8QpUUE3Zib8d0boDZl62c9h4CHmyRt+oNaiicZHYhT6yIyeLUxVmPDesiS6V3I8cEwcriyWR9Ux+Sk0cKP40jdKL6isZP6e5muCB16DPgPkmhE5zAsTyYYm9FtOsDSt6IeDsa+SfdH8Gnw+0WfpIPJ4+ZDjY+LE92VaYn2wpDjnRbqX4ayz2sAKa0Qqv6cSJ6O61Ik+Ko9GFzD1VxGyBRba5aB5o607VkQwEIY7qj6LBdDEYgrmynJxagN2D8kgxx4NAg7GjnLTJivsQ1EZoB0ei5x9AfgqkKaGcewC76j1QQ506VLCbJwcmkD0gVpyz916xkKDm79txSXPX13UH4eE4uZ9mVWl4EM6vRwTRtlz+4wPFL8IyM8SsO1eXB+Ma1qPAygUJxXumPDOre4mJO38xY0455oha+9we2Hyk+XzJCx78XjdFvBHn4rM938wbEVxybl4idat8knA2DZm02qHLig+kx7Szi8EWCSsGCCsbW2s4JM7wsUAX76U/um4zpF0EkbU7cGL22fLuoxmztSWiAOACSstk0O78QdubU5ccCJkKKv2hx49D5gsvzHr7IhxljWEktCp0TmYdtQ2Nh2ZLhTTCq9JtXf4IF+VJk5DZjwu1ocP41vEO3b+NYJwG91JEyrw3dAddImHB2xc/nMWatw3H1CemtCzkSIGnT+GRIAYUQV6RWdiY7vy9MiqoJkkba9Ns7xSz0KqQviZUAITZZvY7YDERB3niRvexFnQxdFBq3al2BGHYat7oZHx5DXtN+g7O0sPD7Ej/n/sXocOaqlLu4Rj35Iysh2z1Joqp4sg/dly068UkgeeihY7INvAxPci+YftqshDeRuMEmWPqSH7CjZyMga3pjKg6EcXuxY+Fwj6tWV3cyYgAorD6DWqu+TTTrx9kbrUfuEaf+qrEhZkBNB8C9BNHeGJs8neQ5rt6X5sHd29K3SKCM5mb5tbRawy1LCABB3UYl7jJ5G/PF9idP8A6vR3r7gr+NP+UVkz7ugKUXhFuk3tB9PBFy+6erma1g6AqQT/P6SpjAolszx1LTu4jf4Tp/josx/Rg5SgmN+5s6GCy0w0x6KHPeNdqDT/gwqFoRZ3o5s7P8dmkOXy8XFLst3/5uqIRnD1eJImqdqXGu8fuBEJr61PHvyz6e9TodyN+desVA5qk83l+MQ/TfwGhqWZhHNyQdnQ5m3iq0pLCE9NcyYtQ4ZAhPby+Z6iQQ98YyeKQ6silf/pkz/IDdWM5iPR963Qu7WLQcWKnxAm9vGHixnOFn62eAH6ocFOWgiUBN1STLmJzWs7HSMEDOp/eEpaeTIb1FG+OvDg2Tk1TYyMMITWhd9Dn3YegT8KSGJ+YJRXOANv9E0yNoNYhTD/LV8ySmafofXgZqpkfKKYgrDwabBaKQyRPXMpmnbJwyZbdBL4+q+Cfio5mUSlflGwtPh+SRDldSQc0zzeI61ysh106NvjSll1OIsYbTym0Gimt8qcyg/5H1bk7lnprXr0KKKmPxcfQ5dz9/YUBScUmwNMW/hOXQMZmCFzt1nAmEOcg4uK2Z8MaWZ+vzw7cXQNt2jdE7Rj2z7Gdsj9pndgdc15h4w85JOe56/IxzURqs4a55s6W/tZLpDSr467QmoHlpLwUJ/+w2Q6VO72gJiOBYtCZNUxRiFA/10degXuWlaRY59WFp5Fx9orc9Q5fnZQ+zKwkQz3iCa9p4FVQiabAXWhvsSryo54UiXiVKzzu068jEb5OsCZ33X+ibP/vVuSKtkJVCWO+28EA0/u5KfqFAuDdXFMvgXLTRxdYdBh3MWNHLWVSz/vPz5nCnlbLnIdIy9qXcRx2dPwvIwCnkbe2uT/s+5X3xElo7IWWWtG3mwxzRG5wF3hNo+4PwXt6PbnlSPXVaD5IvG4KgPIa3uPH4YG0+/LiInRGcoiB5tkhvraUicXFoNxcm6Adynv7hcO39PvJWUyU5kAhtFdQDBRR0p4n2fiL16V7Q1QMluKg4GT3kPram0W716YoMy19PwVuM75UOpUzq7dWHf9egm/HrKnjTETAkZHP/UkyHiEueDMfoB/1AEwXgHDOty5PGkAWtQiLZHHvlbtOD2QORvYxGZuTl1xsXoH9xBBbVtjzOCWcKsX7ZoDIqO0WpCpoMWPMwRe5B9gsq5YtqTnrkx6xBi7UzENnu14Cgset8TsVxeCN2VHz3ocDkl08auLpzTx+i/9BzVDG57VBEaX7tRPcOFrQhsZlTh/MTjYs6FM2t71lujvXeRvYLIilNFwD8NlzIuPHO40E16QFzQ7FSZPeykT8Axj9/EasDrXr13znfTzLFS1fcrC1+E/dwKCaf6IfN6PgmglMagSJYGRsiXUltAFlSfDPCVWaozRpeYfOVE9TMiHtcOmSjAmL7w6gLT8TjJY0Aq4E18ObysSkaOzLi/PZJuyLfxEzteJjl6TiMFF5Rv9o62ApnwHPb0oScfeUS9o5J71Cl6iioJQbZd9XReI7pqVdIZbf3Z336HhQOz92oakQ9GRQScAOOGeNAD4y8LBkrfTQiQcWPTrIr4WSxNbI5MIicL3EG2SopCQnjpX7sUUGpjb5R1DZyblcIdmMRICai7j77ba9sMV67wMg4/ot+0xbMFY3WjOZDuvpTE1SduS5Js78HBe2ZpsjHt8dohMz/xUaNJd2/ZzqR9IX16i+nRaNfX7BuQCfGxQCmRCwoYbB/tFwQZluz+/+zdB3rjXJL8GnunhyWZERGZHZkRM7p6S/H8y8848+WbQTdXVWdTpeLMHcFRJ/OM99WZ/IMqtSgggdKa3H9DV5Wtcag8svWVsM/EnNG6mfh7y/KZE1dl7o6xZd1DghsDGR5PdsROfERZFcxk9lW04FQugeXvfEgWdlGSU7SomaSoqyhKFHUAGdgiWoZkeImPdjL1sAzPYfAhJHOWzVrYFJ3NJiEIvVXT/F4GQsvQ2E+H5eLF4uEc/WX2ITeQ4CCsSUzlNt27nfWD6BqGg3Co0eDaSgenHwp4mrxJeutgdsc5gI3kmKWtTaYV3nDO2Xp9BxzNPhBQSWPUsTjb622dM/cwd8iHaARib/cGwby1S1PYhxBfWHd5bFJdgxr8NcSiaRc6bq8ZFdADAZSJI+gCKvnCjSyhZiXuAkcUflrLTXHnQT1NR/Ov45fkD+s90GWegHuYV+x0a1z7q/tvZd2tivkwZYr2LrBmP/hwtowyshTqXzhK8+33WWyjww8h6c++XP+IiPaIe7Ld1IMhmrlIn42ViYovkxOiRYT/7shK2t9b72oo09RdeR6Sc8omQPj5WYFEcVnYReLEhKTuiAFa0YDkW13e2xjtarBLqvSBETtVB/Xofugl24zAP5CuSXgPuszDuazR9ZigGILU7h2hB9SXUTZRBpMKBXmq8MWDVilbx+OTnCIhhbDJzz56en+doTCzuFQWoOMGp7d3Z0kwW1S4g/QTqooIQbk+dkWZ48jPehIClZ7f74fL+GTVHb1B51/DD44eMf/1t/yGmxNFzMdEvkpMivdAZWLdL9o++RoivdjYPQGt7Ibgwn5htMdBneWmC48qS14wof30OQuY/QnUmAmtfKyWynIkGeV34VLRfebCTGJCfLYrRHTvXwjEkaG4Sj/tTjl9dp09bp14N6UcLqCTHw14zW2VkEbNdDfFw7QX6SBfc19QKJnjK66JpyE8oVWDUtx71eKEfxUhtKeiPx58OT2xISus8n5AYMTE/Jniucc6AePRa+beV0nGo5VVp9X2b8vsBMTFxO1EHsbeRKb63/NKwIzRHRlXW4uTJd2OW6U1BoZIv1FneL8Jc5MayKKrqz97AFhGkWQTKyn6S6JkvQbeGBpJdJBPQfvQjh8wSGDn7sxU5NpE4NSIINoNGlU+5cDMS8HenlQztPzzZPfiUeaxt14Qi7fiKgrOonKOmhIYsUNbQ/HaolWU+HeHHHrl6dwdKdayOaz7eFrXxdt0Zz2emfOaF1omv8I/C8auwbWECUYdz28IrSv+/CW6VX+G+br8D7cS5jbjcNfNM0+ImyY+crXOdJyf5ykbS4SH1Tyc0bJx3aDIE9tTUxT+jQqZSaUOWlDZs3+DtoU/FxmkHYN+9GLHkPi+0wbr78nDNJQ48usn0ZfJocJf5ggFwa80vLzUnJzQIRTok5gRSyyFnmQ0TW/fPnRhGTbOJCKTHcGQWOGnqHfaWwm0uT+BpZpMnAPcYsxssHyMdGUQdYFxem0idxDpizo9tJX/1BQzGLw+8sFEkQvag6iPf1er13tF71F9fMxGS75m3YewBy4GJh6qCiT/53z2v9ApQmBVnUXaqZbvoJE/oYGYxlNkXiClDYBe3WRJLAj5rLH2k+/iGZRQfaQEELuC1tIxVBPzfiUvO+OtH5ZOSY9GTjRLN4vBlP7Aw2ic4dbpWhNKXDlEXc+0gqtoc1RUuUPh+dHD0FN/qC0wfP+nUea0UqMFp0zuuRh3e0rSTygHR+LunWl+Vvg9rtwua+/9qrlA5CUl8TBgUwWpO6DJoJXXjvVZIlYyfFhSUz5TW5ftRMl868BBKR8ldo1z8HJL9UsARzlPFITM0xyS1LE+fdO4CDhTUwxlVvwjI0Q6ZiKOEDccuQLmxLz9E06MSs8bz+i6KcaF443vM0ioKDek79x2YxD9/lCr3h99s+x/VKsyo2P/XiUuNLK6RnhkL9xF2vwSba17e55azGZJulmlFnk/v3HIPjFTB776rtiY2+64DoVYTeEwUmRpj4Hzuh4918zwfx8PoCwtWGFWap2yx2kU8hRNpFA6q+5zHQZnbdfgO6c8GNP/I7Ras8RaqH0YCUe9Mv4vgwFKjVXQ2qH2QK53k/6YHF21bliefBf65JagnA6BmN+1JR3CzXbdyJDDId1TquyJvBoFan21xRDYWRlQ2wWoucJJnDEIlRuYrSq4JTEA92kLwMx3ZV+YqXCsZRPMRnpw6AFhiyxgcxUTwi9oRhJ87HHkdYoDXqUELLyX0RDsyDTTciaNm69Z9DXtPCLbPuHXrh6qPcMFaNlvYXjB0nTkC3T08YPhm8JVcG/id6APCQTq1HImvpxn1aFKcNkXmVBmtrHRhtr+nwoVOXI7WfHUwJYvm8opJBQye1c65d2v4GD73hgm8uV/OW3fyG0zs0Z/XI02ZTkmdo4pbiZ8T6faU76SNPJaFEDCFFB0PheCW/LeP9+639/Z2PornkzmMibj97F1YwhDhS04uGPRqkVQuGapKUxfRTar9qsRiJjvmj/ajuKe60oQqKjhruQ7xuZsf3+nPop8hPysl+8kk/DMdu5sPSndAuVLLwQ2w/bOk7GowPyLz0ifuHn1VPeCUSnnCxiZPZ/8yKWm0YWkzFKZQy9sLo71X/2bE9hZx7n5IUe0IAwtonwzC0UbioTfEsHcHjBMMAiAlbDjXTlqyLT6jJLKypz63zNh0PPt/Xs1xtLH7fgq/kBRR5FKTXYjWAj8Z7kivSX/0VcWzeWoGvJixnomeBBJrHKPvV1wgKd+gY7/GrsGrBqnp/o33tPMCZf5CiPyV4ojYpgciThAWDj7ZPQrYEsp7bJgpyYOqIG087MaMGIM96s0pqy/gdxJozCU6wcmr8uCi8yqPK6tQkY8ednJM8pePKvzcOKVZrJioxr/dFqLrl8XA7uHgJtpLwuO7xYUyZVQqOFXVoBrdCps0CjkWsmlfruQsYW45Gg4EyZ85nHnDIJjM33uxakDIjWMEopYxaBi5CZUqujhaxXXYE+3FO8/og34F1fhbgSdLwY8ZBspb7zFiDcNErjI43D3Mke/qK/8VVfL8FcCYGaDrJ5vUbQL/1M3g+EsJhFkLig9F+/i/xH6b/t5FVffzjlbhXOxncPhvi340oUUjTkc7L64PvGo3+jX/6RPi8o8CQ9rq+OL5v4d6wDCZlZ77Loy/YwbGOvcWIGZKrbNsPl2lq6zodxTfgl/jMNKlXjx2DikJ5RtZUnvjjTRAr8hPakqSRMeQdRToESFwnWu4+T1wjed3xshzzSg9on/ArU8dJz5/rUma49EJ7Dg9HhSjCac3zFKCwgwyblDD3BPyEH50IEght0Ssvn4sYnT8L6xDPo8Kh6y4taKtyyJNl15cjFZgp5+AZUmAA4S9UnSJTyc5SxPHUWzWgPtTWtPPUnOHhQXNNfZifS9Hd/cYCCNwDA8C2o9rWg6/G5h2CqNQFGrQtZwUG4bIQOdzzf5aDehmYn3kkj1NCEDReKLxPV0nufzg9cuAH7/Rafl9WCLAZ9VX+tlUAf/u1Gqqtv4r1srl0PMP61+ZnmRtZLqiNZY0hZaTAsztk3YLHhr2UOENnbgWn0Uh9js2KonGUxgqUPKUJiiEcGz94Ui18+qPwobxQWYOdRn5TpZpJLvz2DNF0wpzfssJ0Kierasuq68wZIV4LggRUgbDJLcljf/Ok/UOqCTe0sfb+IT38cHgO9jLoAhQtmTU9e8r8HnG0vOjtVeKsLkm7T+MKY3RA5c7gehFTukzDWTw70F8quKRc20J23TzgkSs4uPIGtR3fex6VETN3hJuczGjasQ3Nn2WlU/3aAYfprOWOxDdigvliJ1nJzDHyzzfU0wtkZIUz5G+uKT3C+KNgFdm6JQa5eRKjvMxJsK22qQEG64PJSSn+vQX/FjN/Lfv9yucPD+OBgTShzt/GEU/sVXL1frPHDvdxGmN6IYR81mZ9FwVnaUbgZj+YMZOos8shuEGGA3hNs/uD71eGyiKhEwv3TDGMwEwNehnnfiMeqy9cpUjh4Of37fMGpPmxF6BpWgNKlaktG/iBlKjE76Yp4hMUg0Yx2OmB7NsU9CiEeKYqO7SdRhA0EGEUboQ+2Kg37gHoBG2lytFK61K9lE7jjZRahX8PDeOb9KJ1slv814734huvl4IBNBxH5ecxTJ7J2ZoOUUuMNkSX9JOjGjLYCY9MiNBepJRd8m071ooxgfRnNyagZwW135aKTPlUAT4jPuZ66/2OyZG22H2lEErcADZiwYpPjLhFX464Lg0Il1Q4tLmUhGyjPAt/06Ib63F6kTYAZZq+DAcpoxbLo8qk3FNaXcuV4OjRsdZrPr6iIot5N1DppIlyT1b/YkVT2nkuFCGVXpEatJAaHx5Mciw+gNCueew2zi4lJN7QiMfGq6J190ZlBE5mDjXJfJ0aZnvhnzLFcZCdG7cleUVd+xxX8KlpDyxY34ognOVlMNvpDNeK9VluT2VuVtQrsd2HNTXFQzrbYKekBoa3D3omDmxeAk+E6DjrjTBWKiK1q2pcAsBlo/2OJkhnsD7Y71rNgx6cITveAfTCKNH6GkUjLptFOfWVcyJmkm5X5VgN7uQDQNNOa90XkMaDRBA0vIUBTmfrJFh5yxMIlLhFShoRUjLXHd5GTyE+vYjq5Yl2pRKMSvJ4fqCiI0fON4VExahHBhU8jRaF7auYwJjn/s+AMkvwweOROQO8zfYlsmWxjMcXffKRO0r5miEp6TFpy6BMserFJ45aYXgJZiv+RvKUwXJ3lBpPhe7q2wKIQHIIgf4nShgxbYOCGWbqHw8v4KCNtXlx1cD61mqdiUtGoo+LGG7oMytl1wmO1Hv2B0T+RIwZ3kolPHoudt3JEZw6ppHbpfAcOgmkksCoDDsNh6NzXH6Qhn7a7xC1jXvuTqbvnOxmn2I5Gm2ntRZftPfV2yJAqoOYCwZMfqQpfzyHDyu/dfIsFiDFAHmiCeSnymT6OP1yo2dHiYA5qxjy1ygmLu7fL08qKRkfSSf8SgCyfXvhcGkx/lZ81CvU5/0s9V2XiUUi1XS/lwJ0fTsAqG8x/NaCeGLq/YTHgPgtEQNbGMbtWnq/6IjZNixn9Wgw0ERSx+kE6TiqOq4UvmMs+bz2ndzJkFmiLhCHqy7kUPBiJ8LVZiQyUPDRKHV+n6wPjEd4bYT6I6bVf08pMfOVOPJOjh6XZxzOKK4/Q3xo2Wyj4eyPU3zEAT+G0xFZz7ecqvYfwlVePKubGy1NR4yOm4pu6dsch9X2Sp5FmRJFmpXnCBD3dQ4X26kgqnPXvbKhfgX/lX76jFGy6fgdLJ1drrz8VdWBzOB0jMUj764bOTtPIVPdiThP9hVhSiWvYGYLH38hvqsQh05/SEvv1ViXQIGeQAzd5wzrdCe8RE9CFvv6UbzkSRw2294qLfl6SItfkcUTkRAE8W2vAkvYU4PE0Ev5J3IpuVA65qQXpDPsb3OHdPxSylx+E3oipMuhrsDVdzpGhlnMoUuRFgpIg1vKIFDJDDyXS+lvq1azzV8lQNnhFDcFkiDIDyWeoNWqMX31Njqj0Aj5oTyt5AtDHn2w7JKb5oo5IahubJGgNPqadNsVXME/pFIneqmaS030Ysj95zj6kwm47CrPSZcLeCIUW14kGf/k+kNxD7BH/2xXMQNCC5dp1FSzNnmKZpGIIfOkh5C61VSihNWGGlNUf+VK1AThdvhu2QTWsNE0ONG4WDrZyyYuqVqCLWyg1VLJ9GQqtqe6qK9L/NlKBbRIT2F+LGW01gbB+ejD8o/MHS/Qv8kLvI2f1apQIRCId3zilZFtEybbgHuKnrwHvC6yjKqjvL/7jNbX+PlMd2hiq+7Qjh4KmKV3EKVjpS78B16jBZM/hCYMqVFQoxyi3dMgDP+LULyYW9SNHIlOQo0LLEU8oBjx+6NAh9Oz9K/nL2vCcJUz6ZX0Vm+qTF2BxmWdfMI3Y3NFDsq2hwQL3IDD0MuBu9IprEfv72osYq7c5BmeAiGW6lq0F08PSPi8dY174fCwCSX6cw4sP87JzdQlAd1Ggny2bve9mKrkAr0wS7zx4F6jNdlufmdyw0T53BhgYNeERa+SNm8tHxG/7qX9P6ziu+SBzCh9DM22jv2JZbDrYBnoQY++DWYlAelkyq8XVoQkenhhIlPR+LlOT1iLHMMcwK+ICndpRgwrRy14i/pOKnm0ZeKjRdOYKl/CKWOQn5z3gXRVpJk/7EBj33+y+nY4YL0jrKe+PPD22vJpVGA8ZwV69Fu5sD31P0kiD3FkRqlni5W6CtFdhCmpjOH3xv30JdAj8kLltnej0CS+x6oYO5EuvyhXX+KrckkGTBjGPSXlIFqWQ2E/CkiKFKXMRPhNHkA5vGdLGuKEyubR9zgYyRz9y+zbD2NkqVh760wXCR3ZRPrl43c1LN9FUCjhkSrgJumx5DroqSFO5sy4c5z42I8t7QMF9+8jkMEzq+yTVIEWcRnp+lLkOWamLv643Bnbwl4zdpCj/4aM/Q8ziv/ojjiRPq7Kp61/b/M0U620xgAAjk9YmnjKZzSeVJ/OKQ3j6key4iY00oTYjxHjE/5t5M/HaTyZYYeSZUlhjfLSEtQzAuFk445fEDfUpR+bYCqAZ8wENKCIYPGEokHPnF3znbsWwB/A7WSeGcUzinT13FytTMh5IZWSlZwDXhJ+vRCnDrjqiLBYQZjmQaig3BpCUgtkGKdqCU04LzVwUkl+UlF/kD5zovgpZH1+WovhFEykSol2T5PwBiKFjG/P06VQxb0hw4aFrVYqiS740NH21sB6mf1PvGpb+9OL7to8shyouFp87XxDnJ1fGAgtL/bdDqgD66IYhGNURwGnmCe/XfKp0pbnjE4JmOiDQ4oCvfghc9VXbFlTvKEm83gyh3bM/FXbmdII3UPmiJLeZkJTiEdkrFfHQCQMnkLQEhq3Pb6wDx7QFL4XtJkr1Jp41pzPsBumcIikLzuk2pnB6WLWH3hjkDuY5BErVE+Trm+YRYiW7iLQlPS/ShQNM4NUUMuKd1eIH3RX8cXbI/eG/g/IGCXUqeP1mPSgneyO2vHA5ZADl1xDIp3XfhF2Dgzuh38j/M+4YwnxS6YxZ+/BIjpuu91j/al34IkYy3OtS0UeYkINVFZgQm8wnMakawGNf9j+8JftHmVE9zIPj7KFwVGLjr8/CD1/7nRZ27OOvNBqAadeqTVHDKfYBg0uZYP+gERE/3qrAWaMMz/1HVKeq/BueBTGgCa3sAGAJoTUcgYfGOBZnitRnLBNVOdqhpYNmvjjJbRs+tYe+C+wIITfl6FBVwCtI2eAOtQoGzfr1af60MODp/RLYjHVmcanRwZaPn/D71ykgaP4TuDE6rCmYGGXDM7lIC/mF3IuX5tPo28gg6fuO/tqMdH80giz++TEmeTHP+K95TjjPkS2j3JHdU5ffVmzc6ok1TK4pYZtrOgG1afQqzlPDaNATprqIkEPoG+5NJCzdU/FcJNobd7YI+CNCqHW8cYP0NF7SSuXMJLRvy68tUB/O/ebTlSJNfV8DBD39elqPlKYG8hIDVMDfYJlqSznYeUP2c48w9mjFq1QIx8Ub8cawI3XHd4Qy+eTp0vHa2nwK6Xe3kj31E7TWxVUJJkz0pkUZ8va+4i53GAlv9VtOpuwRg68aiYeEy8aSfK9Vlqahe3h8EMfbg7HbMbBf9yU4CVGKhoxNjVFxL3lRhk3tGJ9eFEEKu4AD1r//PW9y6H9vqI9bZWkka+9aL3aLSGZHq7bXm4mXgBDJlRZo/EM/nhY0pnYqA2513CO5RkhtHfbdAe3h4+Zq5rgUEM548AGPQIdR/z6LpFUfDmtP+FCfCqq+2eqv7KvO629fsU6bPD/EUPjHuYboeb1z8Fsu+yXibRfoVZp/qd7b285fm/mZDZRTYNPEfpLQ6s1YOa9cX6feZ1AmY6wbUkpgjRsFSDgDAnu+yd+W9uiMoJqyLh8h0OsiqHiP9J8yfPPGgDtEiRblR6A2n+zfAqrQDArQTl7RdsmRwtFfbqmrR/vM12pGRpy5s9bbwYQqUFfhV8mD5PqlL5iMVLM91p6eipPtYYYhIpOGzwKy/Am6s1HtCT+gsYzX9XkB65Uf4X9zGHO6N67zbWTNSMU69KVPuSxpCVBsfXmujTLDU+cLyJmKBRb3izT1Acg1ir9vIdn8BupWB9OjKZPPd5MZq440jbKxBBekL4pJL4opQfyKttibDIYN9foNjaqakaK9n3HR826BBlr1vRKiHE43OQJp/TpChtzy637yREzj4EM4MdaHxQNUWt+g+qunjap6EzI3Sqry7W9bRp0x5WU/IwfyXmEsf7DvRmWu99WrQr91tq+RXQRZynoN9W1CqiX+PMn1hVc0qLZgx0349YCl5TblQg1hve3P4trMiD+W6s/CcHt0M7lzsIjqz3dEZuH5gnNaLq3pmcLmnvoWN0oj1yOoDEyhU2LFD0qekRhsvydxY1/zEJS4WiGR5ACizkCFzt5DcD2KQarObbOlDXYDKfrglaSL00yNqc4qyO8Jv0Z+3O3fI2vUpSu7SpnOV7EMRrD28e2ymo+wwSgrPX4VPKye4pyj3HMpEeluHsLQ65XeCD7vR1w2MvRRILSp/7h+p8sG+zSFYfE+PCmsC1JABUxevQKsIBaNKOPM1Bl6SCoSzHdXnfZDNBX8NNTp6+FkO2VZ2YMeW0pW+G+kE+T8fvxgX2sJPsXLiYiHsh70KhN/T7zKc9yN+PDNqjT4Jyh+rbTG321cUbaX/uZ3CflkGI5hHO/L+gK/+Bmx/lZz1Viz7XRzTThwUuSjaz+X73GKgwbdjvnKmuSxSaw2wWPGy9KIsfDCFaViOpUolqUx7EY1cxEPnOKWfkZho9xSPFjcVzKD/Nhwq4JBDMgjYHYsYnq0afNHdQf3k5kp86vgjVHdhr7+C+XaVi56VMQ7fxc/0NJBvvosP/OhMppU56Fui6lAvhSK6f4Ww9RkOTAfI4ZeCXaNyJp2CSouX4QPGlit3mj0u22qve21uu2296rL21huKhhRdRicK8kZeYFGD14LNwfbms1KdNbGxueyhXhrT74aucJi7prPssOF5/2dEDqIlJm8Ukuqpw5ZmS8jJLxRHowjown20g309jR715zkSRKVTNK4Ko3nJNLqleQdZLS47o7hlzU4rfellWLokffXMy9bLpGNszcEVXCDUA9Af26UxMegfKQgJhX+h/nG+m/ukDVTg1M3737yAGQ7LjL2ZOQavJY3FUkvUktXQatW7gQZioTc8iG+X/AFhpI20DmeV6/NCS6zSNU+Br7P/K7vXMy52IOAyWcRt38k43OI6xZOr6WPPEVISAM10lRNYhdTDpekAo6FbNe8MbI6DFvE0OS8f1BuYNklCBDP96GIz3DxaCkiUxAnu5ikD31MzpbQkPnP3Oa1W1W4jVgrzb/5QLn6pzvMbX8RGgMngkDWZv8WkLV03khs5o6iFwH/eiSVPv3kgFVtbBK+fMi9sQCTDPnzOGBnShDrgKupgLa17DOPQhBHBkOQ9pi2Sv++AJXspLnLsK5Bp4EEmBHeT2zBNHSbL0fJ9a6nxaSImzqug5x7UpaKia/HBJM8vfiNgfpL/zqxefMKd+cK5xhsBHNo48EimA7X22bhN75u4QsfOQP/AAuTRrTyk407mRRIZ7bj08iRgjN2PyF7arEnIc/jKXvYLNcAIEHYHtHNev+X65MdslJt9HKzxW6dDzWzFLN2FG1ynwgkhAC2y4uBe2zR0gFcshDRsZ76Oy/kJEUu1BMTidlPAfV6ScJ3n5noE9Cs1fCNbVmIjAiDw6Ag652x0vdQmL+6H+erdDiKqfKAOwVme5Yp/OiP/15XTXrUaOJb2I+Ps9no/vGplyKsROruvmPrd1u9/IpHyMiiRX1DhhkRYrI8vjjUBxrER0qk+ZgFFmhVSIfxI9s/tLwnn7TI3HTtv8kCDx+ViBALPfMJTSJMP7FH7Zl+ENuxZ4NWZxhTlsLhVak5zZSpw+If+JSQk0Dsv3UgO+x/PDxgFgHhNa9pkTuBfKfnmwCFHidgyIeS+4o+fkUGaehxQnGZfnGKxFzJqJX1NwRkwNLd6Yb/hg5Za3eThISJ4HAa2Vq+OmzTT6DCZ/CD0pEiIfHc4TsblDn/6dIajFVrk4x+9q6Mf3l/pD1w8kCvt/mTaB4miu4sRcZtsQqkyg0vLbzy8uReb0Y15syyxBkB5hZc5JZI3PC/CoR/j8NizdX2wnipQctQEJSyqkMWDNjcYlzCGEeMf9zStQxV108V6prGcZ34mQtuED0CN8jHecFnMSJBGoDUkR0SVDYdRi6rHE18EbV8z+WmF4UArr7fEjHLogf0I/YG+7eZdvfvo6Bf0zKiVCUKI9rbVmro/zD1tRtKWLKRW7JT0DKKUf1V8pbPK94qf5IqxZFENWO9tnq+W1RkHs+SjcesgnS1+PvDQO7ElHd5dExje52cWh7MbYDwTbYZdyL117yhESKVxl9jdzWzOfaz1moz93S4zNm9vAv/0c1hZFxuZBZpAxTO8mH7jFJrop+E/mCFadvd19qUnEB6pDQtA7R4iVRg3Gw3OR4tZGZNWCkRfuifhJ9+ZfR2nryxj5446CzRMUCT78tVbeeBGvsN55+XnSQRA07rESX1u2jPPkQWO34MR3J/zRe4qQ05CgxBy5ik7BtFpHXpXvKfDbDRzOAURbaRPR65NCNj+hOB/3YoNPew6+TJfvsVVB8/dWozOZUwIMNLndxo/p31CWPPC0qUfHI35pz7YX95hV+K7847GlwuTMRH4VOF+t/U5/TR2VUPNirnK0Ad5A+R4YwSPXCaLByu8/DLZEzdOEW1G9Ph+Thqt/DuWCWmKvBPRU+Ajp9NUka/XPr53voQq6ovc3JdMzY6U7uelefbohHK5ml7369vMM8vtIhb/1tPLd5wbzsUBv/waAjUyZzp5NVtZ8akhfp3mlzLad8ENsS/YzGe417mUgNzRwLce5eGn9eoTeVyQK/sXrr+oOtsoO/a/AlSewY7aC9z+E793v3Hi3UUVuMZ31tgnYpx15Aou6NqNrae8zUGn2s1qV/L4E2wo4CtvvYIo3ArAW0VLgk6Ggx4v563JzU9FvuKHbXmXcOBb785jJHKprsEQ5FOclhrK9fwb2uRzxxZTlMmt7zPY8KTOPGjRhbf2JU8VzA9uw9+qRXkzI5awwNCQ5pacWlOl/lEZE+yI+MfSDTHk7+Wzw2CEBuAGizyVe1xcj8+6LmSTu8OGGVn3lD26sg3htBDfRWq7pK+2z573SUr/qIY2435nVx/y0N0rm9mZp6eyNoa+vO3uKOlFvRvTopFbHdcvvhlsW4fjg0aZG4U0592Fpsdvy09z5RZF8D2ra2F+gehmjnfheMNxtHV3yc46UE0+4xxoNfWBiVd8jJ7dQ9/K2i8Ckgdbet+SfwVERPaWt8zOUc2BwdNiwlP7+9HDQkyX+WcayfpQ0A5SIAYEx+/QuhJp4pA1OFh/en9LsygMAPvPb9VGMxM0GdurNX5WzM3XWEjsYg1DQfNKYg7JcGDtVL7Te0/Vs7RtBKzwa9MRhmlbuM0R43XDC3ejA6yPCunkUTrvrNxbR+89jvH61QHCuosWnTyMOTg0WytdoSXWovah7rOY3itaPp9x9q1yQFSIWu0kmeFzUhR0xHS5SAfRNrRlmjHkq18Krh/cVrnEAhA8VGBzKdE2qsDCwp6q1QgVFxwC3KACRC2C3yAjdw2tDTyW5RYANOsyueg2WpxOzv97l/2+X29JcgfKueOMaMQEBY2KTwxJU7UespdtNGV7YUfCmSlrd/f2bfyy3qejZa3ugcb5UhcIbr+cfiEE+7jflKa6r4l9kG/w9fbQLVrMMC/qBaNWUjv+d0lkWgs83rNivQVbjZHylNnzm62lgLRqsbz284GBFSwtlzrKp9ZoJFLus4EWq958YT96O1n8Ut+yLM5pD76aLxOix66F38UeqabFqKf7ThZwYAEBbJKdMFH8vN8rt3NIG6nUfyaHt4VLxPou0N0rAz+IWevRCfvrOURoRpmCvXlHNXsVMl0V9pmvtErP3+QhWWkYYONDqznW0svSEajJiytIgGmvljzZIsfyJl0dFGkbgFnSuL+Y0lHwswi8/tScW3nCDK5QKottL4c7DcgGyBIh4cNp3a7jDVSBFSBoReXSew9s87V3CQ2/2uBDb5BB1uZFcfjDzeoY6sZ2UgWv8i0IYtA5cIEs6Limpjdrvcc8ZSFLbWRKAq2ZSBk5U4m8eOCFj9nqWea6H4Z3BA/K8am+b3Kqhg+t2em6tC9tzEruLPlB+HbjEjp7h4Jqx7k0YDshUKVnX7hXfWdliarHZUNE66zFC7a3n85ig2Zc32ho3b10fQrksQcciXFED8J86qHLH9TH8qj5Vpf4QtJaUrHj0EUG3gm4s7eQZoOLldJ9vPfBpqXRszxsNMVVe2wsMkhaoW11jX8IDqkhwwFR9cYQ7oHM34jLtUm26QmycJnRGWybKbEYIMEcVuAt0esKX/EYsdlNv3b5494TRhB9eDZo2hYPQ2B011Y9O4Kzp8xVrChRni1KqDrhKgUAixm2M72Q8IdEyXdf33Qg+K9/gJbDlIRRGcm9JStxrIqo5VSfKa88m1ZyOeebgJUQa/y2/S5sJliKqicIVHoiXInp9qYm6i+ZtvxbUQsImhOaSNQ5dNyFSVYMwoGJ2yc1GCCLkvxIPPfvY04xAmVWdi+s92yBkS0YoW/rxQRlK75bLMsp8rbuTUnGDQySzkL6mzB7NgNMbeAR6irnj/PD5Nm/zSo4Vz+OGzx+1p2tIhdfjNWyoCKIeeJxuEOjkovYkpq6cFLX6Vvk6f7HXYFDVLDZQlanvBxuardTq49t9egm4fRzSzyYdlZ3ypoDGOhypePHvPm8caEOb4TbXODxGO3DUyKexcjK3E4fn3dnMTskiEh53DcihmiJmSfQjZoul7SF79XMWdI9mVMONN7kA7XVRUvG30edX8OXm0RTItBjrwS9Z2m6ic4hvxVPTiaSqkxNlWP/gqXjQ3+wmuNO5MHqx37GoLKKve/7Qtnkg7Ih8ROBMVUvRG9m98A+NSTL8hZ0EByQMMEF1xL9Wr/33yep5secI35RWmGlfHdYh35/Tavwp0N07Ea3aoNI50n27kOKn5CDxQoPJkiugf2axCGw6VGmzt54fYXkaKebMSHRXwtfb2DGXTjTaiYTpSOGhloA9b89Myi56ySBLLdhScpPaXlH8QQWxaBMjEkz/eIfpf61H8qC5xzmp4WXS7fXK9uRRUk+ei64ePhDCPLII4d37YBYBuiTx2GqCKhDHrqWEhdtyriNgMsEWL35fkZpwfy8y4kjOWFzzzmzf7d5+S57E+7gOTSMGi9YZpTv9FpI/TWY40IISBxx2n6MEokutyeTaRwMELSQf0g99KYg315z0H7Djq/vDgqtejUr9qQ+CLS+Y76NI3uTCWD4KuBLGAh43RsCyW8CNp1T1KxpMszHtNUUBnp4f7XjoSgqlnI8ywGCzJGiaBsnf2B7wS51ssMHPhVttymsK/DT/WT+2+M7I/ZxsPB2NPbnyF2fnkgA3qG2YkXozB+uHCIqcTMbmyqygfTbBKrPtCR4IOieZDh0Hp8GM8vSGLy+USPnm/GsfO9jiRfu7qLyOXpQ3E/QqDZ3hwm6vpr3udHGMThsPnMPk7V1frHSH1XekgpP8QRTXDmCUtup+aJSFuYcV93mSdQWu+JV6x8UwoTjayf5g0wtdUZC7ZfJjpZfSRybsRHu5nAVqSSp2h9rxg8YeOOGQjNJ+y1MpAkWWNTj+uV3qer0KK+/wF0cgj+LPlSDghUgSL5A9miDnaXFgdmTfSOELalI0tlbci61xuwFDZ4AcW8eG6x/UNS+K+lcCZqX/MSK6lwSZBjIwhfXJ9jGPNVb7+bMPq2swsBdUA5crCfuSpf044iV1KODXAd7jfpbDKUYGngHHyNDx/rQzbMo6peGSIrCxPD6N8S1SGu2NSPG5mTiGEilF/WkcYGxJDzuHJpFLkJXsiipo+UBJ9D8SOqIzPG5bHjKHqBBUAs9ck5Gkz6abBlWT/MmOgYZTT7kEgQoBg2kxGKqZf6sc/X5EdQ+WPfsMxljWQ96Q6TMc9sUzAV9koG5HcSYKv5/qISMdKrQvhgjYyq1m2c27zlVZY/lfLjyTwpr5uLObiU8BdlyKMgcKsquJGoXuefDi3QRE8Xq8+oDmcc/BbsMSo9We2Pl7306xrauSQnpJR9QJFPlPDPHmKVDom5nUEFfiZkyIL5vldGUcUboAaXtgw9p0SM+WPN+9YhMAI2FXAPI2jbUAt8z/a6UgNALP5WowIf8PRI8jVRCfpKdrdXkiSLLcBDPCUhGlWwfDEtwfWMaxMl821pvuKDbUPE3EEcCTeYaexhIyWYG/KA64nqD9xzyNxMAcJzSDA2hx86Cl+6eYpv8shRBfZBLkEwvMlyXDPpv65eldSwDiaPXzGg6CHzG2L0Y1ZsTFsZumc6+1t9u+t7ge0ouA8QGDW1pyalNUg1EU+l91qCsqyWbIW/9Bi/jNVEo3NkepHMI+hkEzO4QNb0HmMMQQuBfaI4HT5Cl4mBcJKRoQDc+rUSP1rb8RtLvisp6limQIuDpz8qPG5XITbHiI9JaU91NEixT/UKFtXKvC8RTLbBz/lzv+q9vDVw25kZQ4dlWSKKHDiiefBOhAxJl7Nfy46y2MwDG+pB1yUJA3OYTOTjGwM7jUEm0m7NzT8heAsst67H2LJTZwW5OKh0crc/bzQFtfMXwk4ByYTuL/0iuk2XIUDRSUuvzuHuWcZAnmuP6ZofsA9IQ8CaJ0YTkhoq2dwtPQwI6fh7ahJ8cv6lVIxEnDeaG/JAYgz02r3NAZQITipxmz8BCr+xj4KzHPzJGNpdXDe0DdIIS6EaRIPdZLmK5Oqs3vAhYYZaXt09Ywn0gN1qE0rw4kducCZLJ11AIkmj8gZIy+Tuj62kjC73dmRlRcDwu3/LdpRb1XWrfwdkZk8+XjdmbjU0w9fLCxko3XQhEISiYcX9kreGsxj8DKfxpK6krT7Vq2RH4+ASohtguXM25j6GyPVaIqPma2B16rh+hYxCHd7yblKzgr6O9T5hyH4aoN9PhNJfkQRk/wptd4ldvc1y1+V/Crkjj1fFTltNTh8VGmagB85XHr8gNYzOilyfxF1MY5LINN9K2oiv0lAx+GwG2DrX+4wZtgAN8EZyDH9x+pKurNQW9rEkRv7kcPJcWzHF819Xo1bHAruh3x/Wvu8ordVhMC9dfZVEmm/bmne/DZ4mIYMOwEZXdnR5MV24b1GR1ve23DSk2LQ9xSOwp/WvI+EDPbkWfJHt5Vv2RMapjIxspG/ByccnYHbWwl/PnlfasZNy+voS/LWT8/hycZD39TTn/px7PDJMPZbMKhuMVQEXKScu5dW2+uin4j0krp4+SZc+T9YJOsJokqcEqJBklHkNzPre+6bAa+iSSMZg0+Ds2vCNmvUEOo9ZAi1IgUywL1FpZ3wlC/ktlByJHS2D26owO5Bv57Z75PG/lXxgOnQCVfsd1YlaZNOYfvWmxii7BBSoUX0IS7aZyFSBaFja14wiLPNsMru2gOz00Hm23lhQhk92hcC3MYZHy1+U9Le9ZBLe4qCCnTVC1fm2uRVHPq6q43bDZHswF69Yr+hD/OXLN1QJuM1tNmsrMTviiphwMCP3qeZuHyzpnH2Sj9/A3lJZaCB81BxcOGW6f558lM2fMKXDl9CCpeFV/f3by2jIGTcsf3XYKTPjQGnw3KIKBbTRFVXejdUe5iVSNNKpjK3HbGDnqf94Wiky5PxREn5txizmitJ55vwAOUqYlF0ZuTYE9m2E0tht7KfEMV0eWRbm5VUbFrFLGPmJDqtp0iiRYYpI9xGTNXAiaIVMFg/EJOA2i3Hsw+inwMh3PLIc8MuhJi+x1/3L/BkkNA84Ufs3UifGFsbc6LrF2Jq9l+S+dktvuEDdSxu7SWqQTTYslfm0UQNazWu61+EVzjTBQhMu76+B2O56ltbhb/f0/nURci+jk07mCn4fArIybHmiqscfQsUnHoNqkyI0aDiHLvLr8a2M8kcICTtANF9cXc/YW1uF2bxfZkmbEmJ05jGOfKmfQWbjlRPg7PCXdsXM9vLm7yB76f6oO2nejfYjVvxTNtqnzgKwHs7SYJYRjQS7eSaDHt3uHyS+i0QkxEM4ewgUzBPNjwfD2pPLN9ZOt9WV8v47534/Ki1eeQaDB9h0xzROq+lVfU3owRL/2AVSsZqftZ/jjWKJ8FMoVghtsPUnmLqXoIgj6B/4pC3awewSVG6y4hG+8UGTEw2fBb/Y/drx+H58CndyEfdjGudrqB2AqfXl1CwLHikoIL2R8tDyNYI+Njm/n1ZcEiyxP6xLHQ96lIXfm8V3VKtyq27Yw6PN7a+etN0me5vFScgYQTlPstv/eIgebxv6qKlmUy+FLXpGgJMzBP2vNtqQNGYi6fmyTSZN9hjWeDdUljIYUXE+2L9D9Bg3ABKtf2zGDT3T+EmKliElW7f6JxeRAqM1gySSfiWR4JhalQ+OFkpTnpS3VmD/fJ+AhEhJDn/pjW81X2J1n+rGjXayZBWjeZSsxkXlL5PH+JuiNlRopPPjlW8AUSrOm2T2O1hJXrmSC6qSVRy0vOaTxq/86CaDoth5Q5QRE9zMx0OGhcKKzCvwBJK+keFho6X062p6NQy+KbrfHFlItiq0COImmYtFO0Q+MMrIdFMVQCgD72VlgEYMyCLp0OjZiD1aqRk/5mB/crOyb9VzPxYaOyNs7ry3rXK4RiLvssbalySveKLJLwEGJwYyUzoTYEbjRcLuy8YmS9LZOZAsmAKZ5l1UgLhcLFjhU67x+91gtP/V1Izjmbr1BmVXBIMQ4oaHH6kn6Q8nnfbevRFzRv1wbFqf7oMHKfHjtWTQIZYyzBNUoCXtlI1UkzlLxkp0w2hh4z3vszKJMtkQtpf6ZkeI3ozeN863+H7JxSyTRtSOmj99Db5vvpOp2Y3HGmw7S53gceMv0VoGVB/lSmMeLQS9yV9ZociW3NPkDborPpPr+XeaZGdxqN9T+F/FStM1IX+odD2h8iSQAQnhv2Xz/trXMAKMCz0ECpnAM2NRwNiwM2mIAPslSMTavPnUJegqW5V2A9sJuPS76ciPxA2XerWNsCGYj8pju1v+XOno68D2Z97LIGuZnVqJQd4qIs+YHlMj3y5RZmsxsu7nN8JO4s02M0m7js/K0Od0kLN2kDduhXKXkqmBmvFZoWsQg3FcdwpO4vyS/Y1DG7SG9V9FkQ3airVb4CmLgky/g8BOIfnNId2sDPCyAquPmUifz2tJGJcj0dn3jKOLBvuUMzlgBgRqLsrrlnw6D8oSdHw62SaMJixtO+rI814JsY13C9V+W+yrTqwFzfI5I9RHPfACvknEbJ3NfWwYWbJ7C+C6ksgu2QYBnPshGhLEpfD4qsrAmEP98FY2gSL7hdcoT1BnsZHJ1w1nx9aWWzrMCGJXeRUf60+yvmEoqzF3l3Kdmtyd47z/34r24UeWFn2HHtBiMcPkjCFtMn7TsSCR2eSNT3riFxBUd1HKlksrJBVdMytME0F2Rt/8j8UQIZgtyPgqq6Oh9ygX8AKrwVB8Xbu1E07oZNw4Ynqqm6/868IJv/XPYO/8jGY92tiKMSenMlvpS4eT9Q394D6WCbjpLNliMJM8YlJfqyJqX3qwF5ueha1pve8nTiaI7MxE4pFrniX451km67oVOAQBOsVoG9VI3joWsN7zJbiwcGfEDyAI3qfk1iihpkdLMCS3BEy2Z4I1XMXfwf4U1Wt5CUUUi7o+kT19AnfKZPIkwTHYNb5oViGDFVnAchCgAr/hCeBHMgcgMZHlq+urZPB6XB1/M3JrwGCMbojImOaF/DcCY/OaR3PAn9nNswP4Y5tQlKHs3/UNPIUay34Hw7TLAbKU+P54MGx898UXkQTZvLRHunuGOyd42dkS3gaTIF79t2J156quWpw9OjbIUOEe91hkqciEJgVsQGjExJVLQC5YOuNOBPVr09ZAmjDy/6asic20X4rCpCxoyF0uzZjm6qgoGVJTS+x3nggcHuTV1VcE99Z8e0sPbVM368OlX6/t99OcdbBxE+3LbXu4ep3zwgOpquhVnZvpu7Gf98lwPU6nENZXjM9r9EdPGHw1YQuvPn11O5Yh0Qs/oavVKICTculG6slUGN52r+IguqTQh8aRM+D0Nla/HbqoNpCBYQlrG8cgQR6d7WkLj1UVFy0fRv8KFEkaR0mgsvGWls/YaPQaWXa+gt1uXmm5P3mHtbGd5fDADOVJlg08VcxFqtc8u9Q+WIMxq89LeKcrKd/vg5VPoUEX7DoXaLaUf+dY8CxZbs5eIKWz8nm+9d8EylIOSm3lCHVsIPr5dxnJyaYFW/3RT/PFmS4xIXWaxhVSlo6XSL9PWF+Kxhm6Xk6fFGzH0Z2X2+EXL4T85qGTWAjDYen/03QVy5YqWfRreo5zGOIc3G2Gy8Edvr6TW90V78aLqmuQuWWtraVui7kC2AUpia3Yvr3thtITzPMg048uLSWRN73924iixQNOZ+RSZVxbZ0s3pFyaHj/7UYpFjTju0+bCT28vLfNNd7F7vZeIVebvMY1WIxnwCsLmch6WhGBEa7RPOWnViGpGQwOWzViyp7t7gCEJNphXbQt9DQOep+IhM5nCyZyrKTJXDkCE2Kuix3NXl8DUdGu/A8o6+PrMk+iz8e3FhbMZUQDBtE2B52PDXTkTdnedEEFR9Fds+/y3RAAq2+Y8MehH/dgbR8tKtQ984x9Wd9g9DYohH2HdRUY2ZScC6TkGYngzQLG3V1qAoW9TmXMta6lNytjBl8BiX6uI2MXMp5OSIb9ISPbJSFhDClZzthQOtxzEtTCEW3z5ThYsmkc5uW1lr0/Gw5IaUvIN+2psnhZ5qyHKWNiA+dp8q0nPr45tmUB4Vwtkp69S1EA+P4BBcCykiQH6QS0SU+3rgMwBOrxMhLAs19FnlOJm7EzdNUO+IqNoJ4NN3Uvc/MpHd7/yhnw4JBvHRv6nT6ElrwjuECP4O3I5YgAfnJz2cFGJqJCwcODdd0CsAe/aOV1vaPKTu9m7zapbFzcS0ejZyFlrYI0UiBEKvedosuEz51CoTPmU0mKKyN1FSNFJMPJu3zsiPoiH9f2veI4N/8dRXjTw1hunUnJrmqHLJkD/mpQ/9JPTDVTa+nghscxvLLGNDJLGAcSBexCRFs03tpD8k5SjUoSdAhLJvymhyFtiI80Jc/zzDXglUqh+rTSU08wbcb/kkT/oY67w+buYUJYMMNl9E54T3BpRZ0Q5EaOdgUWxuhfcfFiET22YRlq1aCJmA7B55OFFSCp4ERVnLCIOQb/iVo+GnohzrnNKnNB+krBJpGu4aE61x/8Ux9TA1ySoie/4OyOUzL2hiHWY+ljCwYnNCCtXWymPETk3nDejhdjj7GghCpsGOBqH+1FD1btO6o5PsY8pRpfaNeoSdGQeq3LiYqRvmlNN9wVxnflMc55G8LTydE0ILvZ9k4ztrS+iSX16vaGppqJbr0TUvyo5fTyqv/4oM/+cHxrzETUlKgLWTxb6IgCmZDNsA2ssb7uXpfLv/VBg86G32oGwmqdf5A/fP03gnN09+DR4SG9E98BriWtWVYDKnC2y8RquvBVfYmxYnzrqTmrHRexdeSU53Z6ptIlBkHsrdxDAf0WX6SNHUmPpCSPQTyuat1//LTD/pJZnHBm/zVPiydt2kh61j4TuyoQoWlh3U7NwCV4woJZK3YnE6z7v2/STFlBC7GJA7b9LZwPOP8G360uCzCOc4ABIVmZSdwqnQJ4nHCLqT0kErEuvO+WSNs9xIGu89bB3ikkmArWTUb+5kaQbjEoNdPiELG5x6ZinMgULOX3xzKdg42p7DlOWJ05Ljx0l/0UPTwOJe/7HAyDaa9BpVxo8bqsKjmutAOSlkSc6VykYv31DO7hJwyvKwI0BcGw2dJ/5xVsJi4ioXXFXJiHEy/4Qrbb0tfV/N90C3mtr0vXF32H1Bho2s0jSOx9oY7yz79luSeZb3wyK+v1XWsQeRMqhahZNf2lGtnnhjQvZZ1oULWC00xd9SIRnJO5Z4PcNrKfiWvK5rnV9wgcGiO0YNmf90NAbRrnGhrWxnZqS7iwfVSLpzpG076b0U+JXa1cFMt5xmjoewmfK/ua99tQeYMZt7lfvfJn+89OoqxBF3U3ipH7j5NmjeMLmIBUazOOcccH22f6WoE+WeLlANw+FqVElPgIIuCoLs09D7zyNigRd8EwybzaLRQGIqh6ASMWtdPURgZL+JzLp5TgvhxMSAQn4n74AWdDBeflE3STzyGxLFxCMYsN/O8W+UZlWotb2aBHmpATwuUJqFOmvPrdw0XJ3cduh4+eIirvT3bBuI0QdRfoS+Wc1Mwyv8EHBozQj0dZLPnYBToLR3gi9cEGo/ojuCrtIYTLiLWbZrUmVsP1tVKmnqgBCIVQEhSUGyU4oM8aPxcfGyRO/9WDkme2xPP5EsgG8K7CdDGMR1LJzZGA8ziJdsxTc4GPcNCgRTp/Q23mtHVT+GZu8vWNctjlSt6XnA2skTzSEFEkfqrOI3l+P5vdDPMuEakPZi52eLZlvaJsA0A4Ba/EHodfZV7JbYD51tFc7mg5YytxSUmTZY5vyouIhXq3Ub5ent9peS6l/igD++1soR6JMLOOGrtPsl6gQ+Hv17P38+4LOsNoVJf6Qe2Zsb92YGDQJQLclQPU/nSkhaBPomUaW0UgH+EjETFxvWI0BPzclT/bdhBJG0ahZtwgDep+WMX8SiRY3o9J/Bp1Dhl6WQ9Th5iOeqhbnfeOj/rKxWfBay54NiyGDoDTi3nyOkFPUB7DMDN2o35Md4YGat5OYqIi7Yvog54eDNBlOfokqxNjRfllkoRE+GJmUvQ022UJAM3D9IaC3vw+4jpaBuCOs+JyzQvAh5YZl/c4b+eb91jjkHG4JT+jHy0yn5DjWKBV1NO1T4K/CkHkaRHkhwRE9zNil+T2xabACNpfelAOJiz0JsWamzQauqqDhvgCWBA59ZVdGPfhsXeIz3YzDdIYEk6Sc715eRlax5KGj3MzOQlCUFFXzsHrHb0h6S8ynvQ5PeAULindf4PMk7IvvhCpk0jOxa6zfyzQX+GMQ1cn3x4Y2RWygLAGwWOOdboXmqW1ZOngFdj7R8mN52MevluTwKeeyclmEeGyqhnjDG9aFVd2H2tRCdzgz8JbIq5RNebY7XTgDfl+iRt4grQLvqpz4uKwrUZXBVFJaNuFL2MaYggOurd5N3AxWiBvJoXIkHuWl0HsF0OMb8NPta30zEOiSwFM1f4CJF5KWJmNFDK6r6gaUOxKg9Z2/r8MruHAbBpztGkt6xysjSm0ZKj8CitsflyEekaDWb++39FfJR+4kWo6SQ/46WM22UZ0sUzoZ8sx5bBQbopj7pro/rRQ5Hv4XE2KDk7zPFh1WPqLgVDboFNNKHrWtgZnzZasxlHA/SEpGnQxis5rrad6jKRUpkUZ6FgN6IeF69JuISNjKRAf/ADsK3VdcHHrsltKbAgmH62gFaWRbPch7+rE5o0oApnvdCOIYyJbcFG6iFoA60QSQw5fccsIRuNdWm9d0tni1wUvFpYHcQPsGGVM29O127RQO/ryv+gTKdQ2IuYkNLTS3KDZwANScZH5tWtQY9U5C+PlHKmz3RhU7pr8AzhP19kvwkqV8cka90DrSNh/V55HUU37oj/hum++bVYm3N96ykmcTImQvXwCotuQOreq6GYtvKAAuOgGyupFsZsDkncDv/0R1dqyEqeWg5QCnJnDvgm9IWrSfsGh47b7RSgxmF6cyNqxnF1f0qxmrGEgcH8vBcoZEv8A13BrE8U+C5SWduYlvc5JbCt8DUXTFi388p/tiyBRmYVwt3DTtYU1zPpSVW2HY0LvdCVhyBwGX0YoUz6pBBdDJUJWDSKNJPc9iT2zvjFkmnQEy7bHNoa2ue0EjE7aYaAwRTzGKhPw8H/46IofRRgxtFfIps2FkE4mqkbf23mNMndsFugxoF8gDx/Ynm/YbE1AJrc9U/6vhdMSYRJgKuq+n9Sbr30z3pxp1ySR9p1uranzFdqkX9W4WuUeXEBLWiJP4qipgjgu5k32lb7v/lRa+Sf9F1gFhaEI5k/DLNf9QZpDlTwFYtRHeIf+D3i97c0moq0Vt123U0hjp5wwSddqfoOVxRutGEV62YpsG75ta6YpkoThMXe4SFKQgy6pvbxSfnBj4LdFxdxxAv452LTn56aSVYAVzQaTyk7b1FRAvVmn9LTZmjdfyM0cmWiskbTWsTHCMvMgvt5px8TOPEyddmSs44fnNw8SEQ+6rssfJPaUJPVciHxVwICy2+3lsbQnepmi4/ZAVotZVtTTXIXEeVa6o75qoc1Am1zZRNMjyJFujDBQ1ulAdwzssUqWUu4NER99WSHJDSAot2zcLGFlFIm3tX2fNJ49/kZEpzcaeECJRD8vIq5m2a0PXa4xU+RtCct0R4V4HuQqIwos3Z35TJXfPbTV/xIaIrBrbKf/wkKJX9y6aLi1sPi7SLe5tdLAVXNgSE6zhn8ei9fyOxUAY0bVp801OmUBtnv53dbBnNKjfTWx9lYcdvsVCj5ShkCMDdDIeasS9mQxKqWg32epIOh6buIBTKz1+l4IaELFW5TrPJao7wPXQz9RG4cjOqghe5Hk4ubfbFt7gkx7Ugmex0OLQtt5CP15OvfgrlLelFYYDMe7+aBuQjspijOWNETtYkPbak2EdIsM6td8ClhPUaBbyHdyySHGru5cDvyKxkR7i9SlR8Xv3uxYaAW0C79tdu9698xee+HudqZWv/ZQnT0gzU9id04oCr2D6k4WcwAh/oDI5YNaAeCkfJFkdYY4sZRsAcvWC6EM6A+rReCSvW8hryPGy7x7yjGo85VUSR+CxjBnZZhrgkqYGQksMSLPnIuVHAgRfyVQ3Db6FEV7PGJcWvrEEnp5o29wFesrmM0Yv1tV8xC1yu7F0Yje0Yjd1J/dxVatvLWA2X9+8/exwYqMc1782pKyD9V7NM43LyNNUt974ecBtdWA6E6A2ElDls2u/OnyHLAkRKhLpTWPkot8nMD5ytP1NARZUlvOa2HVi7JErAo1JTGKx7U4SDdUR0Xr2g5TiwdavKu4sSeF6CeI3gATQAWIJvZksAmcrvMoeoKXGVuC6JDYpFYk9ADcvlQB4Vd8rYK5m6K+uYs7yFD8kJfsrMB0ZzovTb/ETpZl2ziitqaWSqqAoZ1yvRIhkrrdCiXuIc064YPWEaxPRZc92/0wBF493IOm6wiy++FrFbZk0LgFg2oF9EVkogCAOfBQKIxOC0H9+ZH9HFwRdE1U8QOxx4+MhDnXneqij8MI6eRAv2lZjThY6u/22csGWn7BNvAuaXGnfGD4+Tqu59KV5XaZueSnj45OhCUVt01KiMYCKjP6jfs5BVbiA0M/fYjMCaQOrPcJsivw7qQ9q8AG2ajjbI4gFMDUTeQNrGKKRdIFn8ZIv4JFnwG2ZzdrnDQXv6f+t0j0OuEqh8Q24ARdfURPBBPP2priRWpjid9xdNe0h+DeNHfmE2EYhMz9vx4dE9Tn3+uDmgy6kEGY9M57k1rT7Inp5T8BLoA0DwcnUB/3+hp/xF7kBNiKdfc18SSGZcgdPrNs2ZuZOQYUho8ZDkgjy3rPQ9a3xGF2nMc2NnT1kREZyWeSwIQSnv+DrTi97hDfdcxtonUXYhfOQra4xRwGy+76FBxBRDTvyxa8sZXJzfSp9itRi7DnS/krv29fMlSB6/NWMjMyQTc5fyQacFJWSC6YUtCJREe3RdSuP4h2/6UZ7m17xOPwNS6joDU+m3IJHAhmFtEEWpCjTg1GgxPJHHs3XNtsY8WkjojU8RZhDrptJTrWQNhM3uUKgoulG2FaRB35z6kw46ztpqtfVVjaqeb/v9jfBjzKY/mfrHw9zqr2cbaOVlyAueQChk0e0kWjxpki7Pjv6JScp6mDpg1Z9o8M7ijPjsT44VSTpi4IIgLIi9Lr0MTsjeDzF4mVMBQcAniG9aTdqBC6gmaiU7rf5GOakeU8k9TkPFnJL9D0c4GHuS7mnqL/K58kKLc8C4gCLlSFzlVllSVoIfLbvD/ZsVWc5ZLoZGhJ1W2C2BKBKNgHabSTCZv1G3Zst1oT75HRLA/IAq8ixRNvfPRQk84KYk3mmkyELRh8YG3Yw4zLgt34y9Z0dWcu2W3zAKTFijfgqeAT+Ku5Ny6GcpUxEzmx8j3ru+/ZVCM0b3Y0V60bou8y3sZ8NeAh5sQdMrzr20gYAGX6UMU9udxMwLumenO6Xed/Gf/3bTBvLH5eB39aIPDmLYy4Qb1SAv/JPihkWSjYamL9ndKridKnIiPtF5rZhVmSs2kUgG1wlgGyaKlshCECMwN3hSRbkbcnUwACXAulI+/OzpQ3JyAoepHhLvURAB/KD/7WGHO7u7ikacUfKue5Kz72OfjI4hWyFYPFQJCMpIt4Mt18LkFgRAqH2dECrMWZl8FoX+9JdwRsPe2G+w0W6IiKOTc1HIQBTGyBjjqxDk1T5DxL00ON9z9yw5sULG5EYoziPXHEFg6hhnm7ooyhiuiXCU8/sqmVAeT18T4G03txPHqGMkz+p2KoiYWQSEA1DMoylOPnYQlYPGJ8v4k2A0DjgoBwspSYns8ksMtdgBNf/7MxNhAR6N3usnG9PDYK+HIE4sDNtYfnk1AS3IgwCRPliGdfTRylULH0dgZ12JjJqG8FMGXe+XyznXNl5zfAdjKG7GkUX0KTVjl2maxHApNHTr9OQTrzRChV0t8zTZutPkKhFI9CSKeRNqKlM2AT0r8/PPaLkLw9/zdrTofUBTPTf7lkyuTlM/aT99yE2SipXDaGik83KYK9nKHZ893PO/dQ81rQHVRv/tJSG4hF8fGUktV6UChgDDGxan451d35S+m6f4nQBtXxOMkCNNjunMU6LAyf1a4493Hg0258+8HtAfuhp7GaehRuxdqXVc3uwKSCggwdtF0rbxBqPKqkBQhriACa8IyHs2lz/pk2sjpER0BeDRyDdvbuEWQKHqDyYk9VtjIHGsvh7EuMTTC0mJyb6mhIuOKXQrhjKrNwGE5Lddb45k8Zw32LmJMu1wFQf3zJTWt6epGlmnvFkk4aXPKdTKaONoY+MI1l0Kyijd1ThmjL4XkbwIcDKyfZIBeTzw+/hL6CMOIaYffibrJbjGkKP0PxOY2IsungTcE1zHz1BzugqVCsx/I0xpsYx6YvPDM+5MLi9GhCzownaBx4jdNjQ/MjZ1phG/jLcAcIg93p5xl+mJH26ft3an4R01zsO3TwAK39DsKRFbWHyVnA8FjWLkM65FoJUBeAZ0mVhCJ0ilQ98laMOj+rh+pyQevzRXDskIrtg65ijHl6weOQLU/FvNYN5dAC5/vVM3308PNGuZMkA70oquZdH6ByRWHoWSdIH+C8VQaFLZyh7HmZXRyLerz+FuFVMKBvKYIt/JpNQw/f2m1ax6uYTJv3gBpnOpnm0P4fqxatJYvMj4hwSqX+x1Xd9w27I+G9g34iidH1ed4Mi+/VsbYgO3IisZxCgJGH0ubACPJRvV6SlOkRE30NZCwjcTyAasZcW5VTtxJftAUek9dpDfabkdI9DCPI8naQSantN0JCxpylcm5SOriIyyHVH9e66YoDHbfnNv8C/bX09jgmX4fn0iXRPEAAS6pNJ8rBi47HFU9vLkiKHf/zR/mrTVkFwEKc2apAyfvVvq8Y7bclpbZMfsL/OZ4NDypuzaW0belLPzcyYmk9Bi5BKL4BQu5a2EZGsq36KUagZIEBEcbyBKiWdFRh8+QThwGLg/iVk1U3lDAd93xTr1gbR0xanJNkSvLmwZiFKFgvla2OneVzLPpGlBdZz4RMkBrYtwOcCTmrRMVo9uM2aC1bkbBpD7IHfPWydwrsT7j74D4tUmAGvyBrk4lVF4hO7/mFLDOpP5W5SH6Qj3lXFArWSb7SVHZf2KC9od/rAyWYPnMIK/7RWg7tw9gtADRuNV/Vja6pHTOnUPJoTjSLSOKJadCxRlkYauRC7ihHsN2yH2dwe1xSvWdN55JSMj4VForbCEnoEBekEXQ39bZrCMfxejg2dO0D4lnx3yKWcfQK/i1gkXgiAvjgsQAd0RJNPE1pJfuhZThJcGsGvGWn1H+A+Bn/1mki886SdOA0/Yl/73OM0RMaFkA8D3KUfKWxpkK5SdjZqAZNGKxtgmJSq3b4DNF8BcM6ScZ1JiVH72g2s5mZXSlX4qnXE2ifdIZlxLeD4WqKvdphZMkSb8YmnCzhkX6wi9T/tb6mLe379hKbazHi1AZtWiTAH5pbG2NXepjl2Ed9uDYFB834XCydV0h0xPuLvF/Mwut31bzdIZbeQO3ox49oaxL5IOAvvdnBn91TDC5pHYUS9s+xuojtownI2VH3sDs19vwn0PD7a3tEM4504Z98NSbTlV/yJ25MJYlSiBJO7g2kpNuVw/NViSiJBtX747piwSOExawE8exyzyunKt/nWKDaoycNbPtMIh3CR9mzY8P1Og3XdEZLa7ocEQPoy5zyYdeARi5dnRYSeUKlDWVCm7u9wc2IssDeK8KF/6QpxW/KWkq2znhov72uKLz7X2m0dnw+gHDhS9O8mXiYxWvOzB1H5dlruWQe/BesQRe4Xo7/y1aP675aoJ17Rsr22V61G5m15BUL6lNWuV8Uyi+NfW2b7NwoYkG9jCAHhhIgpeIfP9+36nFr81jOZfB7qn0fMMLGLZOLMI3q7XLrbYHYVYeqiDQSz6b9KOt57sGjs6z6sfoTaDpX3zkFe5FHkEZPpNnZrfb9qoI1hL6UbFbwVb9KNkI/S1+YALwrAYh7DMwf4LUPEQWStc3XnQor3yNppPPTT9+TeO1eO/wjT+GzgkW+8JSbKl4oJ3U0DbjFgw5GFnIz2q63UEzEApy7UfRzMc2B9GhTA4XjQTapv04uGWK2g+4Ldmcjc0r2djFvIK3NnGH1X3N9UJnQsTjwE2wqEuTcKvLvg5kijn86efyc4+jiYAiB4ylfF76Z+lJtJgFx2gyB/G4oEbpphevL6HJFu401HzBvp2xXy8GJ0bGNjIG3MOFrNtifQtrSappT4rKnQrWb6ZdO5RogkLck+OJPva8sHMtCq+60/dcj5+qWLHQRcrevu5qkhN+xiu6nCB7+2q/bChmiBkI7kglhRBVPEgW8dZt3oh11TS7pkdSftpS9xmP9bRpTEdbWFsHMsM8Gk+fraoiKWrn248Wwr9RtWP9dWkEXpiXRdrzqJGwHmtabzG+lp7Z/LSEMABuTpa4Wah0hKmvWTPXM/0l75qf8b3NfV9sz2jeAh7ppdq8jfEqLM7Ub95C2UlFEIm3f6IZ8MAxfr0CH1Uxoqle53AAmYbGyYAAsBhslIgkUncKRbZpbrZspJnOQesnRfbuSGR67OOyqmjJF7eDij9mMn1LhvnfGVjFdD/DxKL4Rf5hJtTT2AdXEPk/1gx/AUXhOK/2KpmY5BdSV/SkJ9bb86VVQEfMEll1hs0vwgJZUPUkla2H+455ITBsqqeOIVRvUL5on8BN2YaBYRV0map3gunshPbHqiZ/cNtcZnMipoIcAut3geY47gQG2gGDZkB+Jt4/FgZj1zBqdvPylSZro5RVhru02+uLLBEJr/fi45T9fvyXpIv+vPQQwx4La/xkln/IQ+MFLrvSPNryFRyj3QK8HhvClmoQxOOuFdZi2E/0iYhuit4GEuE98n9CwsbN0wKVyimJsYutS+0KXO2cDlCnOLJax/+Qe6q+z5nqG/Sq6c6bSbUeQ+j6qRuHw9fXU8Iri+6uEVqx0oEqx3qEt2lr/M2WSvZsK0CEy7Nojw/n5Gz4o9CngR8Rl/iMEMGfJqs/0Ll4L7zWh+r3odZvHIkuEWVs8wIwm5TKq6Sr+4fCABO3Aqkus8YK8fk6N+KOms9kfEY/Hal+f5XxpTncU30EJDS/mRRM+EEzq+9TIBRMoSZmlhOgoua6A2xRIOdFcfA3zIXmr6NNCI06DL4Y6nhAiAK+x+1mVN89vL1cwxJ8lV/xb1UV4XTUbl1F6e+hSPhOvgcFzw9r6muoU3f2Vw1g7fZwu4peYvItXxgN77+eKxjBp++EuNJT+rrx9eEZGBW4bacRhzR9wgYYn/IxV4ryouTmkPZXKCmpn9TFQKbfLf5/m+CXJ8vdG9G9V5UEfu7SEl+zObCl47Ds/jluGIz26wuTzz+kS5qA2pAY5IRmSbibujR0FxUtf7DYsmH47qKUmPZrrChirx98P4EQO6br2xnpsJlkhKulW0WX2IhFPzfnTKyTxqfz11E5rRKIf32TMOFudBWOT0WzlMGjaiL55S3BaMbnmIGRNileijEs/Jc7SjkFj05iBm/RWQHb+yL/ahUWH8ZjaDZ2QG64BB7XeqTCv4X8wbJC2MsPdYSoAYpaRXKqbTwDyiHoqWiwe9QxwQPB3bj/7mI7qEdFOldR2onZJJv46wdz4z+DPs1TdPHd7jVw75nib6SX0LW2u9xfg16U0GmagIh5MKQCZGjRk24/oaiCRCJhJWnPHfMjJ2ca0iabX0jZzCtKQ7gL6iBw/1GdJWJswBaUx4VawKgJcYfFRjSpYBKYXoLJwfxy1Cw+ncjfyypFgwr2dnTE/UfqP76DRX2j2HEUYVuHDjQahPdFCGd4pOacLahE7vvaVEfUXlEHriF+50+kgFHqyp2vWNCTc7tI6sM7M+dlHsqRTJJQtejd070ePjL3Wo17GZh5gI/MBD9EkTCFUGpRC0pUi9FvJfn04zOHMc6qmNr3YoAF/2gfWU29ohXz4BA84+veFYfr3HrJ9tcztzkdS1s+S+oiVyanYP+df2W4+KP2vJjlskgBY4lQtjGpAqr/8uNK/LtId/e0FqR0SMwsdRipsNqIiJ4BaigP8iBzlEzwF/zMd5axcgxuSCCkhZCKkLhf9KSbnXmRhCucC5IkIqe02LJ0C1lNlK2MXEZ6Y/OZz0JLE7wd7VSVnukeHNQoZsSb2m0hbZgQlYc86EwjmMdoLY93fdfgCcekrKejPnwIHHbo7HrafwNz3qIiB/XwL45b3ynJzTvH9Ked/GvYvUXaKNK7tcvgX6bkCxsopQyaYSpUUNy9scfiLA1BdPOXHb4nDlYP9hRoaIpQTJSRmaaXK4MnrRq+7rtv4eGtlkkX96uL8dQoFtkPnyDk7ktGHuekufKrhCjWbY3oKJPeKRRWN+7OWIKQt0/VA/8epLXsL/vrn2t0eaahDInRWPS1WgugeewzKqVYuBh/fjHUY3jOicAD3Ah2lO3sIJ2CtRT6lXDT8EhiULLlP0KqmMZFqcAFyIGck/3gMeXqhE+DMIxwM8AhQRlW/SyZAwC78WWdWuDqk9XkWq82OB96JWB9fQnL8kfzA8owP/AFkGGcRHwAR6lkY7SvHKcbjphwyoEbjmtaCH/xeqvGQVjtBe2MKFuGD+O0nknyfga1zsk5cA/KZuWKirTKeeWSKinbqvbrEDWavKVKKYlxqfnE+QQ9/d4PCgo4eXyKhT/RHk55av0OOXBq55bOv97RndyI7eq+JlEdvo3M7+bjYoTYNrpWHU3qDr4kqDg3z3RWva50h1uN9idMiRrB2db7s8S4yhQhdIV7dIGBNlr78g0Hxw0uUNveSO+8ZRwpwyCbNIDFz31DhPgzmhfn8IPhdqxmAQN3lx/ohdaCP3UKeIAbPQcM6HnJ8ypvQBlzAGI/pEHy2vHKMFLsSGtDUMvmybCZL18gvhcr4WRLHdm91Fd9UDFnH3qjrWgf5t63hG2xPV8AzBtBsPMFxh6n0LhhmtlywMXxo7fjupLaUHk4pDwNhlNlu65COhc+BAxVsakGuaY0z3NIItD1V/W1IirRjNgA2kn3l9Z+DD5mehy5lbb06ayVc4g7NvehvejGXupx1QrDeh0l987k7zS1mi6iaPQ2gvSE5l6+ZVLSFNTW3Me7aVRDcss8JNxyRDst93kQkmIq0rwqjzJfiGnBL/+Cxo2NpvsXLsboZMaGBwurURvibCjx7yXdrTTQWaTGSldiNxpxGpV03jW1EtZK5Ss7XwVWH1i4jYwsTwwF3JcbteEx6bdgucJmC53LgxLpBnyq3Qux0nxCEz4yXpOgZA/pKrr/iGXKkWVd2U/k3s5bdoDI/6yjGa4lv0avRnJ76HGuVG63/MN80hf7tfviRJT3XT+Qu0xY67Cyo+h97DloPrboLR6UiwBGAQk8S+vNGLpsXOtR/EpejvBYIw3UAon7ONJ2lKPb9R3o+sFbPU0/hP2XI5UhZ4T9HK9eRfUAvvrQXYzPWvnDBcBpjYSpjKYMOEMydpXBo/D7XDrYyWnIzDKd5JaSpyCFNstm/fIcRMGl1MAnMAxwJDDr8pgU91cUv3dGkpov6QWsEzhUVbdvAhqhS5UvDwCGMSH6p4snlOzuiYOr4Onx+LeCU3lBHwvo8ZzcaR7LSg5CZrolhIhVcwmumPedUqN6u8dbhbGcrHEh4gzclFnIa7SafbEwDvlFrAPdMsPCOwDt03NpKPshXuXGdBP3rPyT2H0PHsZ/0V5Erohg4eVVY5L+0IOl517ZkXaYOK1ik+hEVIMmkNZcAjcid+5DVwS69rIPHvQY7SpcjZcxo635O+W6Kf1zAo/IyrLr+PmpXZyqkztmYG+yR9Na0VSqu+tdMhsEM3UXOzOz8iVcjxrL9RK3olkDFXKNypQvwU07+OiQ8pnUXZfo30Ozjel2oVDFYtbKcyeWUzj8a3A5oZ3wevrGF8RsiCLffP81miMOb3VAfwZF38JXgxtK/9JDMnkj4jr8etnyr9onmjl45rAc9Mr+Vrpho9oL5CAv/cLQXyChPo8iKvo9vRD5U0H6xL03sMO52EnfQdkNG/bGTPmZopWLWrPZwsixC5BtreTeYaao3XNFELzuitWv+kcPDxxwRvhIQIn9H5ey4eKyLd476xq9dHcwuak/RYUsxNjHTzGzSaxjDZNA9WonHWOg9muw+9K0J8C0VIp1c15wheqRtm3uP3vgveCpId2omYvJLFHruYam8JQ/eX/URAtIHavNxp70R0/P4rvekC9LW+lz6Rwt9qWhoovP0X1CzCDAPmqS2OuILeNza+ttnf8ChuWgI8JnBVJNrpl0YcnK2/+WhpSP4yIFLw/kZrhG5u/WIsp9OA1b2q56n03GLSgVYBvrzC/5fPysO/XPabz8ummUO7LFiupGiyC/imb+5muuTcrabIAFVTAFU3TOZeU67kUriJ5/6cZ1ejfiu7evUGFdODzb0N2CG9/N4eACBpva74Tt11BkxjgXHZurWegKicddG/tkAMmqZtEQIQx3cXAGTTgqCg+N4OOmjV/aPH2WvL1xCPaejsIY6ry8XqDgBxdrIZm19WOwc6WJjtGTW6gUed8o3U41X1Q3Ya6eQHiTryF8yqW1i9CclQvyexRYYaNdsSh6YbEy8H3IreGF5bjs2+f0/QVoV1AWeY8do+B1vJzs7hyMM/ZEZaGnzcwEKNqfwWXgrZN1R4Odzojekt2N6aI5SHhLrsNk+ItSdGBcckai3LMXmn5TbfIxORaY3O9m0h2/TzQxy6vZDb67eYz7GlIqNeCdSxn6/W/0UgWVRY+cy1bZuRhE/SjxFglo8L/Wg53fPV9gN6+VBu4PQ52/F36Lmn7m6/1WDL0JKvSN3QmP1QrGl+tEuXBLavVJnj1PgvobahDZFLzQ78DGllyNX6Qsq8hTXb2rJ9w8WMyaPRX7qHYXi40pREhSr4cDyiSBWpQb7kO3n3/XRy2u2Zv/WWLDwmxxus1Z5eLGl4nPjwEFMmhsXRZ0YANXIMZyG2f6a6nrba31uOC4+D/g5SP0kYcCR5cCpJlVEeu6eszxM0If+M6XYSYG8EZ++MZ99Rvb/eANfrkk4IPjh0yYJUuPPRc8dbKwGiVnJVfyOTqBG5XPnUvyZKQ8l9XMwSLwK8Q0TgGR6guEbcQgzVcGtzpQ/A46s30Vu3T6Lski59bupW/Lc+dQOUyuP+QqH0dIn0m5BuXj0dB6vt0Vq7hPmFeGrAigZGAabApkbbwJYivg8r7r93QLAy+HUMJysh4yFTBHW4PCNGqTka/1ygVbOaIZE4rZN4kucPPog0QzzcN+4N043FyPb384n05VKo7poj6YuFhCV4pezqb4DMa1H8d7qCSifTYTyk6HXhLB0muuAQkStv3X+GMG7jK4T30SDijXow5wGMxTQxwrag9vy+WHWi4QYIZ3Bd2OVCt6xZ9ZX9HF4abq6K6ngCP7r/dlTO+GpNe4SWXDhhpFaUUZzFNXO25E8qvxHfs4DP5+t/ENoEcA5RZFH37yp2gScoXtjbcN4OydNXh0uoEtqCK703AzBzRMyl4+VS9F9Y8G3tJZNvvy1JsJL4mxpsXwYp5UoSWwUBPeSQQGJA7viWO1U8oJ+AcsoA2O0XOmnWdcUK8O4x/w1jLPxZb9QrFqMkpvH6zmgi/6jT9bezl/3sNdng4vxtas+SyWea6sU3zNtDmtwnglRuxaqNUiop9M/DAYQXSCEFiaYzFJ75g7NG9lZ0hR1i95OuJzZC/mI+GgrtB2K8gbpHtPtTnVQTrK3Th1SkaJF8y0mp4eiUeZzVhhOz+N+FHjCQUAVqEL2dk6Ku65idTLm1WZxqtFagLhb/ADD8GpbTT/42JnqfmEwJFTnc/O3uLITJZonCKlxWzJuyrDjyU1PvubgfRciuwBlKBJtUnkgS8peDCxQAlsDCIheA9XpjI6Ydr0yAlQpT3llVKsmUwdQI7lRVC3KOR0YFn87HPEJs1JiHzTrQX7UIhjQGW03IyA+TP+zD02+DcQpAFXoPAhEC33m75FD3sm4nmB5rECZPyPNdRTzuzkd+1anM9QADfyOZVZQjASLlk8oeExCKj7B/OwFp1e/3BbgCgFj2lQNKxEv4p/I2YBmTKtbawR/2T05USpLQ+Vr8CmzNcQaKqUEZAtgj+4cqIzRHSoXEkmdqklUCL0RRwjFiMXegqE+e2Jlcxx6q8L8r2V8TahZ4bimMSxy9EC9o6cs/gV0Vj+QAqF3svmeyLdBMg+8d5q2cOixjkKm9kmq8jU9yhivmU8hOYxiFF0Eti4Rz0b/yxyD1DMSNxo16k8K1qKmKYno1VL5V/r1ZZEZSehvL10p1MbdvvzCa9Jnp3XzBOIAiVfgdysCuxCgSpQ+wDuy9OfxDIiG+7lWcHb5321YdQ1SdSkivtpWfm+g8IjfQZFhihuoONqIDJIatH/yuKZJXvQvVVcZV+2i61LvR8JpkzxW0sQoHXKczKIPb9p7st8Kxt38Z1cylnJsG7bQl2sYFvMJI+5USvdbbS+HBkUzwlgMG3HFHitJ5A2GaFZQlF4TZQbQlci0BxcPCk9Xmm9l1TestOhAMDSqD8dA50WnbbibcULA91JAAbIS7KeiS8jNrLIqVDX/L1OsDvql1omld2yPrsSJSYFk6eQDHdLpNDx1gYKvv9P39+XyfvbepSyhUX4FT7kcCjkLuUIYR6p4aeQtNaGeeypUmNRSTP+6bAiv8aXcWyR+JsgfqU8BlrL8al08B9F3L/JiM2FmY3RSYOPA79a8hHoeY1w5rAEMoPlqHAdG56hs3laBTA4/9tTzY5TQZZhWM2j6Hld5EWAn6nmruPSHU5yVGd4BObw8VZag2/byYRE/qlu4yhG5sVuCfZ09VJzPz9bKW+Sllf1YGIOQFhcHm9TGw00SrZNrn+6/lQnxi6sZOq06l5TwNWvOnxTrwLI6v8SX5GcDMeTZCixdbf8I8pu4bMkLO9iKWRST3S8AzogiJq0dii1TQfIk/cy44HcCKNXMG5+8y079MsWX+UY60trVaD0MYmPrNkKKQPBOSM80q9fPSxzPzR8Yag+Ji/g6dfOXKpJa+TrD19wR4tofHRQZUhPDrj3x+k59y7ifvkSFejfOr6ptoK9Trn2PwGgKZDgqDbD8Lpq+YM9XPF5vrZEDVPMg7zTD8xwupUe2xLIye5cLLClk7eJZ2iuZe1mlYC0lK/4PQhvUaRdlXR/3kdeey0p+Divsb04uX//zbYP38Md4Mg9vk+Q3vd/5N8s5g7zs3nUnQmhnZGJRYdf3izVGL9PEUZj7f2EtkvxmtgPPhaz9p8JyJ4Be9n8W7/Yr5vFRlVklCIrBlH4+hEmG7NL6SWudY/dYn0MR6j/t5vj6Tg5StIlg+6r7MQ3/umXb1VdM1FoMiSFwG4Xp/3PG3ZFsyb/qRZOQE31S/XU/CGxULTzEZfXbMGSCvXlrVAAYRNd6fdv+IAZPPVpI3FotgJN3+HK/CDl6nKU1P8j6oIw261n5XRFa85DWLhfHnkn/rcl/0tiPtgX3j2L07Svix548abNtqJ7v+ka79C/9mWK3ut0XyftcukhxWS+jzVomUhI3b4B1hQD8ta9MOyfCAxed9fC4YQp3Pe2AjhWrRTMO+5wtXTbyjDZnojH0fhZVZRupr2jjOIvn+RwfVo5FqxYScxkDfExuZXo/yeN2nHzOgEFlZT6a96M8V41X7ytVCEA9BqqZ5rBpuQlj88ikJHbOdT77UPptFc8p21a1O/gHITpioWR/lGHipvmWOkHX4TkJRCHLLT7lh7BZpNKph+IF9LLFa/hBnhyY3aYnL+uy4T2uwrujpmkBMhfJ6POGxt/h/qpvJR/nAfZVPyU6899h0r063rVTO71cEMDj8qWK2jeurbZ6QOBmTWQzL4AqmoQWONcrS22NbCEZfT+pjwsdvW63o5H5gE4/xEE7nWV2rMU579skgdYfuxrZnwzlVlZ12TWVnzOA2dmJ5j2EBxtge6dOp9A0O34s2NAVVlq0N3LOB9UpOaYp2cjehLP4ARrn9PbXqPdOypeYLY4skYmJNgUj+Azl7xgsPLTrzuPgE/rHufJWebHtrXJgNpQDN5VdtU2ouIZVHfO94BugOUPn7udOKGXLl88HdAi/Lh58rjtXofF/J44u/YwGFx9LTGp7Dme7SM47YiAsQGYbxIKL1z53WuoKe2ANzwgVAQaxCpHPwqxxoPeCFR14/oz2qeTViqDjFI1A13jUe8e75mAEu/nl0S/9NVIM5bEcxK5uX7Ses3BlL+mk+ZoDKaVQp5JwiYEAxhfG7dU01pcWDxH1C2/LvkCMrWx5n+1vk8mdBKo/NwMmuX9ZFxj8CIuvQU3LkcXjidoxKGCHRJeuPWa3ItN2cKZrLBRH+9/SuaCzd6V8Y/LjyvefXUXZiKYeYjR4ib6TTCWAgKx9/CUTqUwyYZhiLeLAmFrkYUz8nVPfawwfdK3VW5nleDj6LDA15V3IwYZbodAHbPLOkzVodixnfigCvzc6yhl+hKgMPxb0s8/sm04uiBgOmwy4hl7fYInk9W7CxwV9tZ05A0wZF8XuvwMVmQVD8RSW1uxz1BzthhXh97cd8rTFDD2j9yL8npGzghdmepxL/TdTLI9vPGw3PMzlfLf1DKXPnrW+KcOLDs3F87qGjlvLobMASvVVwIR5pFbwqb2tU3mfwxrhvILlKzPJvmuAznFQX6ff4Ha11Ue43/AbuVE67OPwNH41QnMKDh4Rn7if3FG2HGBnSpzWbDPgnPM08EJQoI1b5Rqousnyvw7W+QiJ/kSscXQsi4u5VGpOMyq+fApg2hQQTETO3MUgLTG4E7iBfBcrzuxe83Qb1ADRkqVHnIFNuua0JR95KwgzA5anPIJ1F9HhfpOtcNugn4cb8b4ETZNCnP2t/u1TeBBm2h3kWEl9H3KkJ3K64ugHxI70G1hAiGr9XCTd9Lmi3yxQzBA0hNZxXH0smF0nYX7ASo5dQL3nPGitAo/277RLMDC8rnLVqFqy3Mul4y2at7h6aQfqcXceFNmtUGXlulgIT9sqTipKEJVVuH8Zafs1HrT8qNdbmU6x/43rNP72lEwQHeEgLUeMm6RoC3wGeYOpfUIz+c8GPwDYWSU4nlimvvTV5SC39FACqnzGmkSD25FW8sKsPB8Td+9qoSTbprr4DjJJ68A9O3QUFfkpt2fsA8IZefkdbj7A3mDi6GszVwLdsZ5U6ChBSmvTeTwAU09btWaK/01Pg72MM7oEKt6DE+P4qjt3Xi6aR+k8eoUZ6zTxX1K8inOzfVj9eu4w+DpE5Ce0mDsLudWQb5VZfgeho9sMH+k8INJH08sIrPfZxHiWh4aqb+tBgR/sXNb49xYJxZUw1Qc05x/iRwdcQT8aCzvkpffyRC4TmBQpJ9lEl3R/CDIoQ6A8vRGSJ34w18kQ3Ny+YMbNEKyPr/pGKwaQJMjyb+u+EmdcmHTsx7UfKOlJJHRvJXc7oQYe7vYjHy0vkG3sZ8SgIvlFk7P4NuVMMDVXdg81jyPdbFdneVRvCg/Fs7vxtJeH3/GcqBsQIG9tX65lACelDJIZFsrVWBf1arxdCoqsuKK3KrnTezymlKK6NP3p9qzY+KsFZ67gxXst8wMjhwROvlDBjAyjgmbQpsJXbyYGfp/tOnFqsYZ8z/Wulou/zAhqbLRQyllNc+YqhR0m0B5w14k5AvBsLhYNCs1ABP0Se3feQxg8wINkI8FwP9PVXqjAiTitTqwDFSbLCcocmmltZDpRZvao+CkAuJngCmCTCzvTNdkNSUM0o5MREFnJvoP7sEtZV3bWHdQibzMEdfdeMJHazbaZgkYxrP9w11kZAEJ3T0Q6a/02BhEgqnfflhPeTrryZ0eRTRJLpQNrn+InO1PW9kJA0gU4AUKJhULoaC59vwWgNOH1/uxDQgzM/pOoDvttcIOjzz1/EsXiLCrkfs0tto5/mSb550uYaeIK8ByexfjciaHrqs09X6ew8lVCyv+Jg7HJimOnIWuMNVGd4DyRkRFUrs9Z26OcYkLl3VB7LOGPzWc/2L/n0Dkmbyssk7MVxszLrgyHWzx/89GMihdI/HTf7F6GaWEYDbet4ydENZAFOSv9+b8pumiX9rKK17gEk/9mluIbIOM9SzMnS6BTZ24TZyuIhBZwR8bz5qlApoViH7kdNsH2fPHS0NUHbjLvPyOgmBGv+gmuERXCNPDEydVtC6xf5BJiXZC6yj6K1yMD2DsvmDP+l6z2WHQeWJcGvectugyLEMqG11psxgNBaEerrBzh170xvuspY5xBFQqTwcI+IjGz0a2hge2qMV/x5QvV3NfXbA7VYfju2NTETV6qqwJsbB/vYZ5oBpY0qmPhvfiCPwQq2iAW/Q3MC/Zxg9BTamDowUnqeTC3aomJNyW6AdA/SN9JDkKv6XQiSLAZilOp/VAdFos/3QOgBQJSXeNwwbHVY1elm/9kellEtTdWzX5j1mXA5ryo7soX5PrDnSjqMYF+DXzpTJoeQf7uAUlq2qYb29jr4Lm8uh8tGpXd6KHYyf8c2s1ml8+uyii5Nb8VRfLRJzIDLqyuCwAWb8Iz34gZacuwDtSBc+/b28Q9Qnms9aDsqFvPVfzxzviygMEKlC/4xenrE//Ph46bx7181w4DvXlXkZ3NW39Fy3N5nHws+kh1s8nCc/IL3j+P5hq18mEiS/gdl/wd9rvYgCDQlSz5s7xEEgf8d2vNly8//4xDK/Q/K9KeQj32+Ldfzkf/8L0n+788H+v///OcE13+++xY4fd8fdfYAy9+xD/W/EfTf0Sqvy+q/V4agfweT9d+B8v+71t/2IH938PpVTibvuv/e0N/vCFRn/76jHTazasXdaj/q+F82+f9YH+V/4f95pqT75f8+9u/Aul3dfw6sVTK9v9Z9Uj4/6b+fYJ3y73t30HMk+e+boj7z52r020T1N+nUJM07c1zrrR6H5//TcdvG/v/4AHjY7Psf2zj998zPuyzZkv9Bwb+3j9EeHlRgap827ANShHJ8+0t3vIrzyuc313r+kSQGRM9PtoRs9RnIoATcG0CyscG4soyovn76Gkr2aQRJgoFVzZTMtkNpWjXTKZ5blzRu1RVTjjXYZA1X0cTAq/AMI1uBv/sLwGi8rCUvRaXAvF/Ve+rdF5UnCvGlFosaGove3xR55b2L3J/PGb9mMPuIlCx91nul3GH/URiiMazXHcrhubD6D6U8mbFa+ZGh0o99qLLFoQ/HqtAu4m4m5gOAfqGBoJ6PPB8TLC2qNtDTLpCNKf2mcWxORI8ygEOAy1Wy4l/cgUHcM1vYB0gioMnBrebNnn4T2g2r5T0P7oxcvB4pcqhAB+CZbge3Mv7wxuGb3U1C0JMnnQYarTtcNSbYImtnG26dx8xwDISIRUAKQsDe+AsiA9Dr5OihpMqoPgTlXDJRK+t29vWY1OZ0kV6MmOerGzjHmxoHxKTaWIROgalNzxW9csITIM9Wy9MoT8vz2tC3vIfKp9MxwpJL/WBUwFxvkgYF+YeEeVlzew/UHUxNPbdYj/mxnHcTiZoM4/GujbcY8RpwSw8wIVBL5xClR3gwz9mqEljhg0zsqKQH9LY0UTctMZ6fGNVHR23VkuPAfLAjEA+Lt0TyaclXCgoj4QH6Z6cINC8/aFMp4/222dFPv7W+DNqjgeRZi4ALwlKCie9Iikk/5kUAa5obJfm/57MUKMlctS+pUj6eM116SZfAKVUYEFuOTIE4sDJr+Rb992QGJGPWCOjU4nikYoBKeXRfoqUCZojwaAaOWybhnrHRwSGrPN251Bu7AcQzms1pEMj4yix3So7TepUlr4o3ELQ2XSqwD/qQDoCUmpU9J7ZahRoBawVyfNN7AD26sTXi75v7yDJgP/QDGBa4AMjBDvJWfq/OKV8ho2O25yvJdThNIWgldVdR+82/3aJL4csa9BeYu/VWy6T70ekX67PSXvTtZcbxulYhNw22k/CX0Kga76c2D5aqZSU3EX4NnEZ+BlOJVpHDeZUx18jO5k2PjhmbsFtADQ8QkULCouqm23gQtsfrgefZdyao81tKI7FXEhZiKxBgfXcB8g20lFnkmBdv+hzcaninQ+I1sJ60aTQp4z2yD4J78azqtuDlQrVDAdDeZe+VlAi/+vha9P26TSDD7Gf5k/yOkDIsI+75cpEZ1wuOUTbPv30GVYhpD82KniFZyv3b+7vytdOZKGAnorW3d6nTA8F1vkhwe+qLA7Y3v7cvOf+3l1Ayjn+vzNHKjf+TbyWwWp3BOrxc3n7uK6DMBTG86YToh6r9Nxfg1USvXxYLuEqczMx/WNXYHtAXa9977o8RLD7xDvMk/h5ED/qJsTyBnE52azT9p63X4CEaOXxR6hcd3N+8GTpW2u039QhDDuXijzvuxfv+OoddP6iRfSxGZxhw/oaxUTDNOmc/4Qfk1WBmK9PPmBhrVLBBdLCpSD7zNDkY5JXsYNQScSjl8Q+rkmcK8PX4c3ogw6JtRcgDFeBv/NS9BFJXAyxKg4BU+BH8+86eGJH0zjtWS20zvLiTv5iylak4ZtpSXs0M8SxWLuujjmrnGt67Sa+mnFLhahz26fYHEJF9els8b1c7CvLm6N44rRlDkv6vFUXJxxIC709qznSpiNf3Co2V87kw2Xfe1TCePH3O7srBwJjp6P3E06CPn06WqMWqx9oSzPduMfj74iVj/d9fWaeVx9/5gx8clGsPmO+LsG49NwGL7xGLowEP/7MAYejjSjbgPw1iznftjv+vKrtr7gtnSw7e6fzr+qavOeVX8UF1oGtYBOtNef4i++XO3dsiRrmARqbYc4m222IOOXrIG5OKafvOMvOBARewrlKAp0dc720rqBOB9lANBFOYpEoAYj02aCrNCAhv1vr7Lq5YzE3co6eUXskDavuO7wiksRkgvqXMCvRgU8SvjAFCi12dHnrMzEojEsakJQFjc3wO29j+dz4qz3cH7qDWs/Ue3H9MClXqH7aTxRW8G7bQsGmBlU6ZqOyhir7D/K2Jt46wxB7QJJRZrhYPeDl4zZ/0h0Dv+TFYopRT3zF4TuUeKgaa0n+enTYfc+IfGvcclg6bRZDKtcjB4t90FDiGByqp4OvTT08vMb5Pl0VhxKUaVcPU11ejsaV4uBbd+cavAM/pECljUqeY4Qdq35rhNCJYUtrbMnlH8PyR1DdQLEuOBbXf52wO1EQZERcPS39uIDt0DBRlCGwcFoBfAnd2cqRWLON5QgdaIQIcoLGK+ANKVw+fMVTpGXCowTvmQrBtAYG5xxb6JR3RDKOdr3/GBk+nQwzjm0M5VHhwsfSHY6SYjHZZIhL024ft2GjJ+TrrBD7vfLhkI1HWjy+tgqLyveaygEF7rUcA+W2VO1GtMeAHr6XV5D6yY7UIVHo5uj31f3VC7jOlViJGD1iQZlZgdNVAQ1V3BqakN61BIcJ8zMX5rucCYCEj0JdiKcgssXUPi9tirrRjbH/kec8hHanPb6mUaSOxVxkEs+6KpNcqnvdx7EsosUrbWpjNHu3rltxLqXZ4tCRT+acuKi0B5wuBaCxwD78KH1vXw0OmPfaPZLTlk6elt5zEeVlfuYhTznq9cNHC8rgk9Zov/9rjBd2FmooLZorEwi1BRxNpo7lSCUc3IYTM2i2DsLrXtv5yJueKla1heVR564saVivKqg9m8Dk0jNWALclEHYL5EyQ1FN++71TydTAFNFowWRRXHaNxaFih//pFTx5rg1E/HfBWZ+K7kGPolf1Jb8JNXGQB6MFdZrSIZVeoCCLLBv0DHfv22Sx1jPcVp5bqO0VG5mAstzcZ0tlRLHMFk5Cs/ysTEk45loZpuqnjPOEmRCZlTGoc/WRPnD15y2qQ788R3h0AqvAw17eaJS29ycV0T3ci4wH1ZKM0MqtvYfBSsrI99LbXBwwkOw60jy04UkcMLOHc5lCz5OSLZNDuw2Z/P/c3w93BuXRD3L76007uE3wuup7l15zQZbk66hwFHF6PgGmIFZVnJnG+ypZCQASVY6vrwwGh8mPfO4YwiILV4anPRV8Se7QQhqS/qx7ueLA9ZjOpkQsacswwr4sZdUHQtDM3Qm7dKv3RC3o6PmvKENO8wwYp5yKXB2NP8ze4UHwbIqHkWSI9QwOkIsQOwzKj+kyFvPyjka9cMH+W1Zo6ZMimk1bezOa1zG82DzB0Mz6Y+jqXE7SPhIg2pIVR67tlgZ5wgrPuNbSigtq0W1PoOMEuMYv2Ouyu5X3Bu3vHUR8ahTsEuIk3jfqIfvDY4lIxtWb93QqRvnFI3RlN/K5pHR9/k5pIJXIIbYVqKbHyvWwrcqFkgiE2H9KLb0VtELDTH8CT/Cof+S7AwJTaeBZr3fPYpnyQoVLBkVy7d4+cWnV7Q/bKNz/I6NaH8KYolyibkjve6Oj7Yt2Zsd85KpCRNGlrp+AA4hk2Dx5J86UjhpYDrdtBEWf96GV63BhydS+tbqWiKCCsswyCXrl/u8D+0mqb/wIDNPW3PZdP2p1H7eHN7xfRh9+LKD83MCZSGhmKRnWwLx0FshA366XT9gNdUREVy0qXFDEAx0yYD4WcCWNaw5+n/u038qZMaS/pi3oxPBp0bGKRKlAjBXOS+sxk5Tp9HdrAqG6J3v3rsmgg1WJvT3xIFCyjU0NkIp2XW4eFUFmNoAkHwBwsaVoMxDp0yPW3NB0PExXI8SFfZHJy6v6pz91S1viWpJp097Zif5PL7/W7mh5o8U7v3kszeTAFv9Q0nh64Fw+Qew6NB52wGYettNN1QvJOs60xKQ3VWBqdvpYDvbV7XvsoeiAxKZAmyGOVpnfNUTmSsHqHR19g08zIxzq3LyVIweuaN6qzNd6YHa0yB5H0E2B9x6KDiiwUf4nYKzuM0rEfmynEdPiFS0On8R8elNuxaHOjW8+00xQswfIVvBhzl6H8XUj6AMHxmOwP6CvGTEjrq9tLkXcJ4klSJuk+IXIz8jWuvr65ocQcuyY5Mn3OW1mB+ahYu33tevlCOU2py69Zp+PCUXtMeXmwU3RYWNOoj0l7rU5L7NY7FzvqMRo12mEIscDIyJ7Wm3EmCD45d97NfUH95WEIo3l2eESSzOkb4uaIhZVS7udAxURt/24zL/xV6BIoi5vCrGW6T7DO59YZlf1YsWxXA1GRZB+JYjItPGykAadQ801HcGp4YZcdTOtaX+xiAr+hUOf0qm790SG4HC7juqdlKtx05K/QU96HlYx7hYfdnVs1f/caZBH1xk6e96Prtbii4BbVNtGWi1cxxkYAaWzPqG1kjBl8LwkNGEpXWEl7iFXoa6ohzIxeYVqW81Q0jjbdBC88YX1hkF4oRHW+tDxDJKXzM4Z/ZlLc6MdE0i6rsBlwJSTJHvoegPJbadHvs2puGfbyoYaVPJ7oQNLmoxQU1r8dRgHnHk2awmTwQY/iSLse12qqXAHLMHcIHhPxdJEsKuiUvSgTkp+RMIAxRjS8bRsDk7Qfe4B3+wV+AQb8aY2UtIo3IHxzOuz17BdVuLOKmN5LEgJy4c75/NXTukDrK3pedBVbarHQaiQ9O3vKOo5nvoYeFp7JWfKkSAKzfTNJWGXje0wzVCmTrbeg5KkdmWtmrHNFl9wfr58k4c5FFCC0aA5DrA/yEz/WOmBrphOQ840sgtk693wHafk9TAKizoTNhYysJlMv7S3EgsTrd+iLQ2B1W6RxmFKioigp+PzN1ZPwnOxXHZ6VUwh2Elesiu1Pogyr7hVzw054b20oQji9XdXPPCSDyX/jRZLnkTTM3Z+9xi1fvKTY+QnDMbD1hhBRX9X6HzXzC3E95FRvqss45i10BsBvXEEctzOyeFHCg76r/q+AEGocq0Ga5+Vvd42eM0adtomv43J5aBLVkrVBp6vLu2rl9ytnww201RQ+CCQW0Gl+sDGl0+jHjYKstFJvcp0knGO8sGT5b0B9eB+4+PxhxO3AnDuohTV+mPbyEkW6GA4rRItCaVqqtuw7wus3O8RCqK+o5U4aqkze2OGcViKmJds0f4ak3dNQ1i2xHun4q7QCIKXV51KcFgrrjWEtRzUcmSk37Pxz/zYAdktccYwjOEgxH4VHlu2fTwTEMo5bsv53l6HfsXJgeRL83OdSkt2bSYHUa/qrqjX+JeJLP/LyLzFjYSiwCgw04oZbzuQwEQq7ZraYsCd6sb8Hk5+7uCJuxQyB5rcJbs3njl2H5gsVkONjhsVlNWWgYdxDY177s99e3PWPJoks6rg6yuWEs4Luw3ERnLOYdXao2x3aTRtFiInB5YJYOvlHJTl/G5LSn7hYVgmxV4j9ea/E7lmc9ClxPx8bFc+qI2/WrRE2c9AS30EWOZW1Bc97iP/6ZHGnR6O3tD42tyvTkxw+FFhyvtYGh5jJnhi+DWVqmaplP18HtdLIIB6/my28flP52jgi6aim4i3nvskao2OSVqhwrlI6+7Yopge8xUFFb+NfdTpgfgLPfVdEXXKJeeXuI6HwLn/wAsOYH5B9JXkmkZ7nxzRajsev7Nun8Q0g7EbiJClbaZWMgqQ+ySlY7A/D2Weuc6o/vvdL62zkND3yzgF9n3XftPT5tRogBOY7wCG3rg7c7DtdvGVGX/mrpLxY4XFNGRzvAFIZ/lWTbi1aw8Wfcthjj+t/VVI6EigbUAc8vkrWoIlOxO5vR/m3hAQ9L5ppaW5sjJq7xAz7MN4Sk5/RrIcA9ridwM81coT4m2/TM/uYQhFB+WY2ElZWsg+pwxlUJimal6f9YO0MqaYHaI7jjqkl4FfDPUhT3LPf4Kl17gWf3fwM1swf6+dU/4r3EB9X/aSp/gnVNJlmhPg20kXf6rrEOElCaIx/8j0cr5Jr7QXhrmTPyrQsXrBRMddRdt+tmZwg0q9+OAiXYq89gDK2ZJIEwnzcPbmcfsFYHjj+IbFsT7S8t82oaYeh6mnwhQ1xMeDSSF6hbHZ1sLmad6b3VzfQJctgDGVdnTh/ApWcUxBSCJrfRfFqLOjjq3KO65aq4tl1Fm3I4EP8PMpFwj2DYEiAst9U5+EVZW7+u/Vf0r5OUXnjP3LTMVhoh+ynUCpLOT2l8pjDqyF8CtlUHeYCH3Fzb565ujfe8890zb6NK3Gh7nGjfl6q477OyyC/7K8SXlhQE1+PGcPleTF+0b3xz+kkzYHkLoOVQp/NHGJUxz2nqUUJ4bn8xtntIWWna8DL7wVs1rsZMJ3oXbC/kdWXxJEZlUo/qhkrO8ufS4LPIq33R+/ygOLfUlO8lRXa5Wb6z7vDDf/MbziSHI/bzrY3F+8XJFl/JdlsJH/+HgyJxZVoSZH9qifqV8aBrQjGd4gd3KlfI+Dziw/y4g/y6D7LFOtS9KWJkVzD92k9lWfA16+5xHeE9xWEZgnl74WW+wXulKj+RS8woUBt6E1Y54nkPndKpai3LhxdB8yde86jmes2qMmNTpYy8DTq57x0mm74L4ews2PYkZO0La1eCCZHFZ2GKUC0ENF1B2EDiIsegiDw64zkJxlmBdox7uRr27U0loajIqu95HTBv8kxVb/m13+Aftxj1XaajtXncK49qxlc5Cp3JJeBK/3iZ5h1I+0ptG0pdqDyUul/E+u8ht+3mnXR9ooX82/qN37Q/hMtIuS+fXzvcPvnTSuG923rNz/tS/cMjd3VDYTLphbt0nTwhfVXYi2e/GhYJuRHK0kwIXEsUVgHc5WmH+/Iq8GCQ0PUhLJlxVOvn/J705OF4xdizDMnhCNV6tlub/eEbjcTapxtIfTsL/KRiGBh3X71/CkTbeiW98Z0d/PNCXgG+R4UdHgXFEVOHfkKhXW+3x8NCt2gHpkiyEHHNgkGR/3xY4WbyiJrBq4XC/YvltrFVrzCg+t5DHk+YMd+snjab0w8DVKIvXfhkRa5/ylq15obBPOiy+DKx9g+UFCkmacMqLHO15b/mJjzHrveIHwuLPT5w+YWLlQIdd9FQ3PVCybl6CHs+Z3FuRbY8oTRhUJ8gbmD5oe6vY0YtrZHm7MZeWQtCDoar6BXrPEa7ucZHwl/VQA+YGntJFY7BIhqVcPgwMJLu8Wqxvf6VkWH+J9gXEzmNUYP4S7Sws3y8cNah0aDd/r5Q3vQa6rvSMrmf6P/b8KnPJt+5gDwowRx86MUT3vvf6v0Rv1Q5JATdcpFmdmelj08uKjRqW/v316RtPjVanWExuzjB0Ztvbm+GfpVzZhNO7JobAwXUJH3gHqFSnD1Iba/PfTb8ZHU/kaax2AofbsFhYgxVTVa/pjUen8d+AmV/9BUUm/IPYJR2h5ut8tFwyuWPFsLVFEl8wvTDXHqz5tELkC/DiNJSiIWawuhvfwm4WcLlOlhI4s+53Ko3pawhcyH1ckj6Xmfdv9YZI1VRlU7OYCtMCpz1yY5mwhPauxrkfuN2S8gfmOgnIjJWjGqfRJ1sSnoafl+eAiUAn0rVKosUSdRyKsvb5zHRJO5L+PGkv98troImfRjpSgsV8tl5nQc6fZYtDZFqc0+yu5C9agEfbJ1fzt2iazIDhdrvYiufp5m4CvbJ4Jf2XiCHiztl9LLBJDobRru29zpaDGfqQqqtg2kMuX65ZIwp5fCe3+oPIB5tTAurtVVGiGycdp+j9wZwjI6wNaRO7tN71VpeD59ZTJ41pJ1vdD13VWO/sRsc+H/pb0MXvM87SOg/3FEnSqQq/H/9iCWx6IlXcTd6YU9QqvSFRtDrP06v0rR6ODCqBgEGKJgbAA2QN5YNgUtwi60jOOzcS1/nPNkNYsL8CtKHrk/vB41msFBfDDf+3luFM0SOCz3kUk9P+nmtBcqn5dndjhcS1tDZm3670bD1vNU8kYrH1eGQi/v7bg8HxtCDdhcu+8y4kGwmvPe62y6dSvIXdKGrbaMrgwrv1Gt0q7CI5/0h58QMAtZZ6RPIERTSScABz5ThlK9jcHFTEC7WJUska0wE9rm6MM9lLQnC8EVIBMTICN5IKFC6QoXLVpXeG/K//L9CDMO2egz0oH1WDxCtwToq7Dddxm4iI9OP4RDz44gkmrFiI6DqeI4ZIvAPjZKbBihJ/ehjIG+UxXQhMrnzd6gb8S/sTIg7UTyGBCJy0MdR/Pd2smtjfZmJWXxp3Z+F95y//Ovkuxy8LGvQTE4lxS9x+JgW+BvLKGrCdp5cN9HUv4gUKlrfCeonUvuPcOjQ4kcIHkX3d9xSid4ulvUH16Ut1rVVvd8XylXVkpjttvzjfoMPuOK2MT8WwpAN8+cNG/DzINjlRzmGjh1cnzBwx9mYSUdgdEaCGY+sh9VViF4VFg/yJtbrnFTNHv0CljoDyJ8c5BqY2vy/CacBtpFNPPGI7wDIvb4VjNn0HqUMNcMtomV3gTlrDmlAtrvHejH5yWaf8j3Om2zLvN0s4BrRy1R4mb08hwNzQqOPoZ/gvDm9dFXkZjpKCZUXVSD1cE2Hk/E9N15UdJZ9GIswRBRVPzuJZ/o074p64HN2rsr8xBixjfH4v6Uvx3JH+xeRT5MF+AZcLA5Tpk7mxuORV6oD/79JkXSBBLcDaBI+hkhqpxWv6yV2CYSCOSim6kfcVmDMU5jBULUzFzIU1pghZc4PqhvNwXrAraL0IMV7LokJitf/paO4TY52ux+iF8p0XyHedp7DKbzeph9sLx+/XUasS0PhiH7TRyLPHdd0u3SbwhTUi0iyW9yKGImb3Mk1Yvg06KDCEWvobbqjKetDfJLCf6+pGlV3uTUWwcOUig6o0y2pJyOrF2f8Xj0m1eTFuFUXc8n1fS39PrfmDxYq5z24oOJ61saCupLk6S/YKC2sB9Veeot+ccEM6Ud3DXmh3K0AZqySHIbKRWWWkQK23aeN2qtW/vleV8VA5lWERgL1PlbPNSXKOXSjBfNoFb+0CnbswYAZ3bTQ+YO+U2lqtQajLQ2m2siVkQ++mDs97MijqBcvQ3SP2u2QfQd0rmDmujr+bzrvo8jKNV/N31EAl0r4JxibYDK6vytsW8Q/jGCPnpQq7DE6FTTbTg8M79y4PelnSj+JTx9EVlnjx38WHkSPXUeIS+/0OVTD7J5Oa8YuIgn+q86KXgsNEhkSpTdAfAtcUO3xd2Sg6vk72rWT//FDYpd7OsXnfVque46rHki+QxIp2proGmEf9X06JJ3THD4QQ5JvyT1IU8iQcIrPg8U8wU1vqC63QcJ/dr/RBZAD/OovDa3/v2rg8jc+7pb5WMDvVrUcjopoh56NEjBUe967TVn+vk1TcLmb+5+l+eMTN0dFW8kD31zQWDLAd2vaitmZNzCLdbkHQFtZejDxu8diAg3KB8hDJrUU38BjkwYE9xnUf66dlRsV9363zwNO3uQxvD1sUSgMEyMw2lFjdG+o7ALJf3R9HI8wZvy9EAnjSCVCfXzujIM43eepZyqn5g1P8Rq6GwSmp9dpSzXIveOmdcH4j1x853P90FduDw1OHg0MH7K5Zk6ZHaljJsIv+Qy1e9C7D/jM84vIxi3Jr/U+09UhI8WEoiFVanboZ+2jQgeO+0sJF3uZR0C4RGAlBBj5zZ3OKolLwXOeEk3q5mR8dzNJx8zah9Fc9yc2b0Coh6DV6K2oLE+vEon6iogAw6x6qcws0F27b+dLfTWsacYtgfTbB99SevWt8KNkrvHBJtastMyH0WXT8Ci0yx26z6cr6l17LWMRX7LlCmsfgIRPu2jf2Z63n1v7TEm1OtFxjfju53Bt7LTb71i6BBtloc3BNTedyP+aSkxjIZQCHjmIayIHaco7ba/gr6DiGAbN7yTyBggPE6nMvJil/VYmCpJQd+g7nmuvmRC33/UPRtQNUIi6bFyZDuqQrfLENP3MBq3J6J++joRyUhQmpnMDgywr9/IhnWIBnUnUQZCoNpXR9kItTBmyWhq1HkdI/1a8YUyDrWPIKR4eyt49AiPpo+uPlgFulXCKbIT5G+V7yNo5O7nf5DwzLd+Z9aoFXU1PnfeqvfY3UCNpLfHPZOC6Irr2N0szIGb/wLowQ/hy0WyXCFfEk+rlKJZp8j7PU7VuBvV/oeghqstI6cztUJWDGbFNGM1UOX7nihm383vkIA6Bqt4a0aJLw3cpc9DyQAJn+i76srMlSSd4+6ypw/hFOElVFre4+amQXecLVc14IzwrmXDmYpEr4wa/wrImE2mIvPiPMCe2D/in+r4x6C3xVzjVyU8Sk2iK+TM+i/xV1NjflNMH5bwoETtXwO7FCLyezNrEVnLeiqa577BcO1S8jfp0D5UxkLWkPjb0Z7M3uQJCC752Sy3l9EB/XU+dMnu7yb8y7pTv/pGwOMXj34kzTDUQ5XguNsNLnVXy1KeEf5rRsW8bS+3/5SKsN9oku0Ltj8yOUnHU5mHifrrtdt7Rbqqa0iWZrv9t4uoDvqq8uvSi/wZKfmWbU//dUnmiz/j+fubf6hUIuTFiaeGRBDvDJwFwsGAmpEcJC0TOCDgzIHzfHsdlb6RLiNesPViZ+HnwKEPjp2P5g8+SRgINvaBQTmiQpLwZJ0AbJNpD/Mgz9WLQzG5icwSjwHEU7ztyJLecwmCxrVMjWL1KWcf8bABCO6nuWpMCEuNCYICn38UVPjpC9ap1LE/CLCvFYTGRl099sCKq5ezdQHMCtjpNlHmwBspug7Pe6nvqkodvB67DyRTo3Agxts1F5qGVYQPBAhe14aF059hzL5N5Zp090tizt9+tF7ejLlEeCWvu8zIkxH3e3qrjUvH/hIMD6NP1tcT/oVA+rAEDtjLgpO3DUNHAPQMD8d5hi3f4fhLmVvRpVTssQvLJMiC5+IhtYNNj/Qe2dNw+03D6Holkxg3HA96m7ex8GjudWK6tEkPk8FUlbwvrRkJ5s5TnO5rZ+EAer8+q1+Q6v36zIQc52QFgBRP0s9QLGp2r+ymMvKyQIKuY7dEfWK9svsIYpqwG6PerAoYtVizfsvQKw+iFpb3196x/47x11kKDyFSMs5WwKZzp9mwxokUeKURf8ThAV7N+6tbks76RKjMqCU/j1SPehhHt//V0MM7Wc/K5kgfBDwa+8WoBXHtzacdH6tQPXpO/vwCyi/UtX6TWxBImBo75kCDsWBX3wp4UE8nS6Wu+/zn/RWhSXXNMgNcm8Txi91/88IXH1ByLUMB/AHd/VAInbB7RrI8fOgSrbwECOqqajaccrvhKdW0Vl8CRtMCiPAzDYKk2gLrgti1aIH9vlU1FsXnpTrRwzHmsJu3DxyGr/1Z6Ut005gPKbgwTZOgZOSX0Yl8qvoXSV3tYdwts7wTMtrchPH/bs/qheCOgbwRnKnkQacM338pBSQAzJuUbj+P8CiCe+TxrX9UVMz/nZU+VYiw4mbSs/RmIsoRrbKDmI/VmI0AE/Bn/36rD9JRyR52j0S+w41yA6A8JD5pESFyz9a4PrjYl7Bs/zLgz2NMJny/devWt5aSXuHZe9R/Ah/Ofg5uBsyXV4RTCP0qLakedpen7s+I4/r1kHi3Kvm94V+GXfXkvT5sQTVqc4Wi19vIfXPB7uN3xqUfjp5EsDBy+reFpA4d7mPiuPhhGJKjareEwmkaKSM9j1TAf+zhdO3beJvr1yids/M6wKv8X3GrAkHexKVnhtDcwaDVEHOC9rpKpWopQmeTFy3exxK3YddXkFbR3gy+JCbdMRTKz0t7+KIyn/FLCZEeU6MUot8konUw3wXbaIHcPNb9oUHh4dD88kZ5aeMeRKSgfNub5eVsUtesN4C1ijrGVI9glCf2DVqVfjRrb5TgVQULHMO8o2+j0NubU1KhGILX9PRx/vVttyWJ4t2DWWNWpauChEzoUY+LQ9l8BmvCq0TYh5DN9iSLuox7yho2e7z8onxNBuoNRJKHRKXAc9Pd1xTUJi0GKkBOP1waaASRzTHcPmqsTCLhaoYmD6j5aZHF4tXge+XVMIUP/X16sJQe6AreBYNph4YxscSDgqXyPsaJ2ZGFXHXfu42qspPbScNHhbe861U2roWIwetGA1P3ewZ02t9uAfcxhH8hKfMW7UiOeAZs6mQFREOE/Pprz248WUNVp2tLPdc8N8zm9K80AfWA5r8YXd/i6cui/Ylib9uHUFhua2KnzNZ12pK2BEiLaAV2HeWtP1AQwvf7lmZ8V5v3+UPsJVZojBzIiWKffenPN2aQMEQ89BMpaRVCfV2pE/OXdHMyC9GjpwFdZqKNpsoW7FHybRUK6NbHa5P8Dar0FV9Z9No+sA6mUhtZXeqPv+3bjPQEXProPP8vLcI5nMMfWe+7U3A86gGhYT+mf/gSchF+bVr4MdLCmETpuZVMhKclpr0OUfAZ24f4tNqet18P56P66b3z0/aHEIEeqIDXaBdox5CN70LafkuhV/zcgpUjjlJN/d5WF5G++suaoarn+0ksgrzmN+nYYYh94LZ9+PmrRNATYV8gT7HxlD3fUdXZkVVoWFxj4RnOWo7rM/H6+GUStqdVoGog1dDY390w3t4KLQdm6Hmw+RYwjjZHcPMOn8F740zQwBEd4ykZm7BXjz5jajEXbJVc0g9HmOn1d3lOGbRfMhzpVfABB5Ed8U7KoXBMdWOk/NGKw/aYbVq6LekQXrEL1qfNt9JARhV8r2EnvjCdrdADm66kx8DNRBiDUFX5zQIUZqA+jHumeXr5bYhIL36UZHkcCHI5fHSmc6iYmB7+WR/RmysE3QSw/C8gMIRDWkie1TGMmpJ+012kg5bn+mBXa6B2KaK3WS+EnfWELYLoVLVxKl/kvk9Jh/rqMughpDwH4hA2Tziy+MXSH8Z0nEWxfZwAYaom+6QIGT0f6jCjB625+7oyB6KBAG6YCStvOgIIGB7iwATPA6sHiwGqNA71r9PyA2he9q6bAP+pvE0P9H3BTRi6u34c6lvkoyj1GOrloTFZfT4CdBQLDTPEmrcc5AVGQhNH+emKeMq7MenGPwC5UGj3mS8td9pD17Nyq034NCPmyzxG6Hh+/OgaMCsQuXe+pyEJ8kOObo7f+PI96XSwD7U35VQw42ZOJ7p+lK80V//8Gk1HlogLBO54GMZtfEqNAUHClxy02C/H5e3z5y/wPu/xbvtCP6HIOzrhhH644iem9PFia/PoLyvvxZ0uwAQU8hkTgjcHLSjKopTKmdzdekw9jdqTtlzJz0/nDvWytG1RSkQUGj314F1E6mdecd+4hFygDsrretzK2Uu8KkNCLq3844+7uUfJuW6mxYK7jU1qRpuZ6a3sgZxa3yyjSb4tcFQUHquJAozSBdYBSJtbaZLllM1913F4gcdWRnvBiw6hoehBb0SsjMfGve6ZH2+0MVVvfqtJ0ihjyFbIWOsNLuPf9ae3rvY7ReOlX5jUDBFlq0Rka0X6Zs6xv0vjo8OrgjFAmTsp69k3aI6JP39NJ55IREC7WQbj3kO0Kdq0kAW6oxvcrGouNKwHsFZaKyWum+5U7azKdTnJXoUr3zzFcKmMUQRlXQ1sy1Kij8BOPbwTNxu0TApQOptULQACUDRQjLXSEZFbqn0y1QicalmpGas4asHnwb1Zt1ZGTVA2yCi1PpikbT4e1MQ6fH9Br6flbYqGrTKbBkoEpI8wLcVEl+n2UKOSQljiXcBT0q2bvCu6ihI4RnzzB/1Qzu2DLPZKgd3CRiem0usm5KaLtDSqts85Cm31sSc4np2OjR3I9Tm44RoHHABpmmrfuULAm08sFVtL/PITKWt+Bs8cPhQHjQ/WaxiiAuqWAeYjj3RAmzTdci6zjEQjGjiWjpipnxobbLAsIC1QHYnm1OzQdzoJ1YyY7Ml+66zRRBBQQrBNlvMgFhqyn3P3wE03UvD63SFemNPvGa8SfjDBZ/ni/MAD/XUMvxniF/uADpB4ZMMAX/UvZ7AlFoYsMaiu6ExbVDoHg6qSUja6DVZguj4Ois7s1i6G/I1uBVIjP22C2hBpCsbPjEetsLZVqty6Ktb6w2g2axxl+8i3RX/QViSZvOuDDCemeCDpJSqNjfveddy4cvOob11F/p6NXzOVwfQEmUkizR4O9xenfmNYiypeCv3pIYhXergudFUvn+v7J4QmhFHKYxEzWj1OK2Ba/EV+PzWPVV4PEojfhxHQnvKDG8F4vZ2sZnK/x4IvQ1+ub7KTuwt58K50DDVUOC4GZhwaXqhFySSOd9JFqS3eQMJZrIKFvZQsNzVLCKPjvKzfCm+WOfcbnlg9JigTEb1X3zp1Powj0ucoRYeEm4IaqQVLHR9jqx4ZeFcu8+AaAPrNzkHZ1vMKS6wFCBSxAfM6abVvqj0cxtw/mqCB27bPjl/mHve0z6C18d0c6nROm3Cf4xvmv0H2iEZExnvODHDVSb9aj5iuGPpjFj3K5yWN/N8ONeiUYW48PwR9BwU4LNagHx6xxMT1FgS13Z6GeZDWIuHWy0kyvqazdfPa95+pgzqK4+Ujt3zkp+czZVCG/Csk8fDQeP7bmqyNIbgzuOExDRDhFmMzOtFGxH5lf5lD/NIk02WvDObfFVCHuE3Fu+duz9JQ1rvWgJuxf/Vl2tDfN2+Tf9FqtXMeGbhStIynS45LYv1ZR6UDoTSICLd/BcJdelpcK6EfrD0R7fOIYdVcKW6Eo1Lbrk57Q8EEAc2xa0Sr/7fPHSnr693yMA9h9iFllhkrKpr4rzEybEBrDXP2yU2xrDqY4G9R5W2uOfkwXXPnynPOccWR2dKbmIFKZGCREGMRI5uB/KyRFcOzPhMiJTYVheaP+khFJKU12YRTdjZ6dH4GvDMddje5mdNThU4JGzJT2ZI8RDFGAh28/fq0TNIEisSmtM4yE4lcGwvSwYYQbfqht2UWVh7bswEp0Io/NLiSz4bwqPkvM/exIO4Ud4srbTlqaeE0GakkATXO9uMhZd7dqDmS/HoszOL50YGAsl43FX4kI+jXiTjRe9Pf3Xou+eRvJLWse03gxLeNtuJgPQPRpyAKOn7Ub/BAXVd1DVxRzodeWWEk/xbLg3JGBp+LyaV+Kd5QDkBfw5r0KPe7X0Jqe/SiybRqryvh9+zX1K7fdoayzDmW3Ypp/V2ghxYq1WN4A1RP4nd2LUytmLpdRDnChl5ilmopHnROD6D6IxKvqIM0NReiNX2Eoy49lNP+SIzWXopqTu/afa1Z0X7UXq5mxIEeJdNAmdZvXFNqVyDUFX9GpScC8Cu/P6uDf0BDPoCx+RnLbUVENf64u6nNn2mm2XvLmW8WgPsLDZjNv9BqGhBbjNuFdcqsDb9bk34inTOuikZtODkGJbxbJNEseEZYUHmCTp/rAvRLWeH2MY/WmqB/QMVAQbxIYtFKYMmfv3qXrPd4YMRxwyTKaPKvmDdOu2X1gDUZYT374eJXV/1Vn8Qf6q2LmTC5azmXn01BlyAQ+c210aQCUjJXGl3oA5/xlqALmAnVrJE98qDMLpawIsfoP+J+6WsTzGj3RrbO6/VSnsh5+xmqanrj6yT0e9SRRxKGliamcPdfBNL1EDQm/iBFXlVYEK+uYqY2oQQz6/880vWU7fOgjZxPTK5Q0cSpR4BUKgXXPcBbCxAfeHNcZCecRutKgIGVZYGIGRjorT7wVUV/682Qfjzz10Cm+a48c2PRNXkxxGXaAiLxT6tl7rzkRyJLEjiIXm2TFfbVOc/HGnkuY11HQBSzyjHOs+x+vjpFKckdLsibYltPwjd5ePgeptiuhG5erIaOogGWGxLp6udzk/wSqSOTWdq4qaUV71qjghB8qmwyKI281QvFUIdZNTlHm8B+Iy5JUCc1H68mp27GtMaTAseLd3wJY2uy0gCfZTP+tCJ0P4qLfcdH65KsxUEPr4MqpXvU9+LfxiCx1VQeRTyNwo3wGeixx4JIZ2Z9xxmu+e+CC9cc/NXoeUN+JyUyfg7vJvy6W0OKkiE07iRLWdNkSS1os6T4B/XgtHmxMDlToXBnmCdfGrnXMyGDBI3b7HV+icNjCoNp7tNoP/KgJC9hMaJsWfGoYkotDm4Son/unrKJc+3nQet03hpiZM9VnpDGUP0aOl2R9LEV05t08rdLMLeNMlAtLAoSKP4kG/k95iiIflyfbGmrmxxSKQHoRIZA8iTSOjthdGdRcezM8lziM4or4lMq4oOj11GGPRqpYmY5jpGDhUa4FNnoxZ8+fBGgEY9uCb6VVI7lMyOa6y0Lg2bba8CCSdjCR1AKycNRfMkpFpMr8Y9st15fLw9K6Z0oQfq6IcUbmYjFG+IRfFKANQKfvTcYIJPoMQpDVd4ifO/Nqz6Hfn4sgDyc/y9S97cGbMPobnS2IP4BoYHbkuUMZNsFdPQhtdQmREi+PBQ+OlvgibH/MqBpD41JVF6HC3+nblR72P5cqtF0TBrjQCwSWzEKmhKorddBZ6cxjcX19TMA4X40Mpx+dO8wBCXCHsaIfdTuyFEo9RI//dYz3Q1TqLfCO1S4PNhRYuMI78Tg0IOdbeBFXyIWvfl83HfFbXvLUdQD+eJxQBwiZkYPd35dNLSIv8Gw/UYpktBGTh+o4C27Pj8U9NPPSUjq5nw3XFBnDwQsA7DKDMlklQNyeTwWkQp+2+sSnPomRYRv9qtPOX4De4vp6e/ymOZLM12KefTtBxIZm9ePSa8ps0pVfHhVCMFoBrJA3HzJbg3pTXNP29RWG16f7wcGvvGgP9QSqsoHkxY4yhMs7aVmm/aJ5xpcY2ZFeDdoHkzX6plU7xpH60KXrDrecDi92h3PUJgTnr/TLCPe6sASQpnGW0OsQKF87n5FNKIf3bkJOGVEnUbKd096NcAGi/kIM98R8Zn4UPJIJVFwAgw4AlJmxsTS2ZJjwVs/p5r8VwOa0EPnB+KfizT/orgLhnz/5WpFI+S6hoFOdij5lucAirN/D24eipN8c/T23srJldDJb5t7N8+Hd8DHmwmUbY7OXm0/wooK0eWN0CVs4+47DbEKIyAyQ2wrHv422g9Pf1AtM/eJAZee+2SYzuNIqRbgxxh0Kt4E0L8bUz+kJ9KNq9yKfiH2HFSDGSh09suk1tT/cBOHxnntQyY60JHx44RK18Hu2T7fEXRsNhp82gQXhpKeBrybdAiJ2kNSjjnzqyU6q55aXBpeS8pwRUblrM4g3ZehJwodP4RnwM55XorRaIW+jJPfqS/coST4nYS0UC5viGu4NG0Lju+9jpY3XpawlfG3d5qu3qUvLnMtvkOQ8Bd2qI46DmkEuvK+c/PD8cvXCyzxumNgEkHZ6Q3D3XbzvneXWPDFDn+8Iycms1+Cr4bG3loMidMJvMk5W3JMlz1cj5daDI2t+ZOD8lwQo2XEHnMcrJiwHVEoRJmxJJ8rDUcuYYlYWPqUFfjJofJfnN1pXtIJF/j6fWfzqucapcHJ3MaiY951psl+prqWvkVVfp8E1j9m79cLxjXNyLgG900aWhk9jRKkobtoH+Sc/a8X0njmwU2LXZMw0VwI1ADRHi7GOLIG+74AZ4Gy6cth6JXcZcp0bt9XTfY/BpMf7lBiovsgUWIU20eXy4a7DOwv34uKs6IYurlFvlRPV62H7x8n01QrGdOtzCePggI40vMY5gC5o9/dNS7FeIS03zc7lAcfJ2dGT3QsVsOBavcEeTnMbTzs53XBbjjaOo2Pw5lz3sSbG9OdMXH0kFlwaKZ3na3Fl50iRF1ImW+koDt3bTrEWIgzdpHUkJfMYDRgBbsTPw9FWkn3KQ/WMkR9GadGLwy3xtMZaNrph35jYv/riUZ7eGIjpN9gMRNP18/QqATyMpDz87Mfw9CclkOTujR7uoYsPEJk7aKtJ/Q9UrJz+CHY5kazAzdF9ohd/9JJCu/Bv3clAE2J7l5y7/53fGh775pDa6XaWXYcIBykVisbNLisCFGAwZKYyh8G4/y+mWVU8XT25GdsgRTE0b1Rj4R+GMyVKkvph9VEsARk+ylSi63N7Kwe+O+VxaM/Q8Ct0w9gAsk+mv9gMeEhlCA8rPgVCj9g26tzNd5nEKsYv3Ni4CrottGHMBOiDu2IiJSBhQC7ue5POQ0YatEx/yM1xJCZQGO6fuIQv7pGARPLvIFT803lxpeZSDcE/39Zuo5tR3Ug+DVvTw5LMiaDSWZHxuScvv4h31nMDed6bCGpu6uqW60qEHv9ZLeqQRa09BuqSNkXZz0glj/YgUkfGoC8FkBObyRlega5bPnYiQU500BJhvVlmgVBG0L3JvGwyUvU3u0A+ih1tPDv2o7KKENP5IXXMFUnkDvWv3YzGw5pJl62C7vyotuCvgRS70FmNN1bxqwe21rLW7cAK/H4Hx8/BPrt8ZtSETG6vm8Eq6rH2qB+mO3sQXJjr5NVq9wPsxn4cO1c77Ris1VerMsnrsdDcejaKxanKJoPEasVyK+yduumbQ9nmkJyHuOxd6Sx0mnYiZ1cMhodb97NvE/CMNADvDmT1dpEOl6KlfU2oBTYloTHg+kliIW/ih1T/B0madclt99nw1ou90h9yPSKIKA3qoHeykZQRRf6GcY9PEM0DqyCIYs7cmTvKnWY61+yuppD1Yj8nRv29n7Nb/XF9uabXkWcEWyXX2SZZrzTaZcg/gj96prHw2kQpEQ8Iunb7rEKY9HS7wOxo+oTJ/Wm8Qvd5Xnt5zO5ieucZW6lYKovwMV41Srza1tiY7x0gtMFFpu2E9nIbCbTOomdCiglYKrG9NUNE7Ec3thcpv2A3l1ctNzm6+K/0kv2G82lwAzQnKzMY239ya3asMbLlrTdB2LveEGHElYNVTQ8Ppx9/CKufgKliPWl19SWSmz5QaSqfLHOMMNRKXt/5RUYj4IKxQoWboYvWe0ljwptB3OwVuiz15dcHol1UmceXc6cjcbVhGA8+qJ+Nz/kIj4xcHgMdqdKbI+UorsnkAK2u1ogZoCdA78ktlukjZe+ty1oQuixqs5gIiVers0tfCDE7NrXKXOgzPg6FovYBqSr5dJJArqyw1j7uGlQAC2MIY1AErvlZu8w+9LhNFWT/uAo9ZsB72jO6iYODDLTe6lY/uo4jg8qUV2KHgO9VizefjAwxmHmajADvJbrIp+CnqCX3LONHyXKNwA1jr40lhCkHhwkAHtaCIc0OJWBjVBC6i7rAFrZ/UhguhclY5BvvcI+PdNfJ2hAd6JQHmLvCt3pxSTc3Y8/RBdJ2Z4msOzy92PPq74zB+jT9OxlBOO8hDtEkB2gDr7pDj7mLi5HfawcneVhfbwUwkqJp5MvURswrbWLWfLF2691cSWKMRj11hXel8BiZIkpLiaORSypN+LEQqeB02BceD83Gij0+exlETDKruuy5+KQkJ0+DDGxvJGvnWin/It6mWop67c6eyIepK8jbhcq2PjCea1NSoQ7Frz4QqwqZ4DUoXKuLh9i41WPr5XM9Ptpsx2ZH6DCikZATELFEjRTu9qDO9Fb1owapO1N6xiG2Uh0spd3TbWmBWx1kqHxCgi9Qma11VpdCCi7SvX+FYYMgQt+G9756yG5XsgnhuGogKyiZ/yE8Mqd3EIkV/kZEThKFR5RaR9Xadmv97hhjxt9QtghK3k9y8DD7O5bAPg/2GoeeeZwuY75M0XRQphMPC/Ma7H05tXdUOJ3sPuY/G1Hg4VuZgTO+61cTr2OaCYLplWVX5/5pt53A4kKeLGoTJtQ1YMxImh/SXoEQhzx/Hwbi2cOW55WL4PGg6PiJxS9zRkLD1m1pVk/dlsulRGUMd0o/GBEfuHkV3Eis32uBY0k3JdU86Fb/dnmDYPDwa1iC5Op7GHsmjk/iy6raf++vWsayHTNC95ET8U0OFN6OSjBqR8gsGCcwoW48YV96DLpVYoreKPzbroGaqlfl6iZb+QBQ/T+sKnoo02ibZ4883lzBvGhuPwJofDBp4zyEkymBEK+gCxhjVEU6cEuHMBC5Y3jM6PXPNoO4ytlZK3pIh3TDVlylOWkLuknsUQM8rpW+1ea+2uJOxRXZcVK5cy/7Nprcl9Cezkw9td/kr1gpdEsUyTiUZps5dOnD/a8lHAhbQ/1M+77u7DeZQ6xukoBTTB293DYsOJTfIFqCYCPRfHex1huX44DUD0mCSOme1G5jS1VbPcShxkZbWgyidMU9S+SSkbmC730KnYPTZAN7UPo2BNcWX26M81I1CIs5Z3B9kp22w+KYXFl3er7hMivfmrQ8aAX5P0eRDLw6KDai4NDX9lub9L7A4sp9RY8rYbMu9FRu+jD+ERwAF4OX4dOrcjzLEM+Bv+sSd8dudRt/Kdf8RXcOFqeF5JQJJuVOW0W7Jgbv5b3/Sq5KE0bpqtXPUh/hU9A5J/VABCsAwUUYgkvmoDE9t2VZ7duMOOH6zEtscS/v3nKcNQLf6LKOjhAB+HzvPHlsMwLmakM4fU2SRtTTNcDafaazm1TIiiYzYsvR0XFzpiosOdcsXCACGBoLmTbvatAX70UVAx6G0Url6ohrANnIcV+wdVvr3rXGs6d60ZiL6W34fltbQ4rGRTMhMgeP8FGzVEOOs4XLUT+AwDOLilvcNh416zqE/iFnxnabsdQrDyg2dYGxHzrHc/PnxwkZcXsWRQ+5SQ7h6Ct3wnkUCpEElAiE9Gp9Z/vEeDPkHkBT85KBnn9jodkvy61FKCqsbgi5/NsmTOxgImyh5xAnj70yVHQQD2nMkEhYmya6vekLn1VDuqw1TaIDEjKy0Hm3sgQc/FJ0uR5TrzkLJhb710RgmJ2EyaYmlUe3lRcn4EINtr4HlFMhoW5DmLXLVW3UHV7QJpBmdonr4weubo0caJnDQ0GQrfdF8y3yJcW6jxWQhiB1b9e1DaGZu+9iCDN05EONEhkLDekQQKIMkjq8d3ufn/kqn/WF+hmBebF+2pPGiLbHYwys19XLUNELjX8WSpIlxMhFXDRtkwi4+HpysAJK8vdrAeISeZB9kZwLX64dL4s3pAIp+1HdUTdTAKASM7nLpJBhwTTZXYaQ72t0qHeWTCbGHQb3FU/Dz6d4fsOGj595rd/MTtnhge/TroS+zVcchiVWa/MrIJ5VvHzielp2ZSxWuYGHFE6TmjkcaPQzmT7wIoxl+0A5pxQQzThOp8Pgiezb0cNR+ZYlmnXfZfFIBVwt0rTx0opjse/ZpIdrCUlo/3uuUvQPKiTdJZ/dR/ilTJ0uQ6aJyx4beADut2qgsT8h9dhCPVAEMIO6AY2CNPfErn1ur048gXN5voxx2B0wnamE19GXJwQgDmRJBbBNPfyYjpTngjJ55N9Tjmc8XU4jrhNbWVBpi7Oj7bQql0TJdeJFdLrC3do4QrMvhEP4Xqxy+fFI8kyafGSEUc2SJ1xxDEGxWKXo2h0J2qsRZ6pL3z9AVkBv58hbO1Bq+0fJH1ixglBzO/q2iV/S3/eF0rkUMyRtrINQ3fQhiSPN9p3sjB9M+rG1PVjDITWFCI6b46nWQLcD5vj5vrHeqFhzVaMGSvcZiuiDzvjgZPyIR+A7ePVKltAuYeel0PU/Lr24ls1eca209y3rQzaLze3mZojESXucuToe3Kaz40ZJHzFD8pyBTLhs2H/O+Wx1zOE1A5ViJEM+1r/BAf5BmeueAMTZQx0LBcn+bjL9WEINo8pAClb5O3S5cP4ZlHzHqvUhXpGG5ROoR7UqorOCCQTSU4CQGb5AG5nrR+NsQMVCS5b73C7MgI8M7ybpJ7yRNQZI6UryZDojCmEs81d8Uokypj08fhHgJb5YzB5zXa/Ai48ra4IwUXiaFjqOmfpcVueoa5D/cbTAoQEqq4ocHOtWAv8iloFC2TkjaZ04jvkacSezyP4dpx/yYSjceXBJ6C7xYgHPmnnRAnjXSfFccsYZUPqpxUoJdvx9ved0Z6RwFs064ktm2k+4tEtTvdXOjuv0eMXJEtHek9w3MRLqeE1SDPVq3QX8MAjx1vCppvMoSwLpeIV+cvHYcWBHd+rVfaF0zjOJF98hd/9/kIzgF5lNKZ2tOyTcWeDGbImGKO/QkbAvoHAjkFfvhKUfomRNg7dvtLMby1pZ3wscZHF+K563WYyFYEmfkJb1FopjGE6GC5befkqkMooKcOcL7xIwS6lxyUrfdA9sNoW4zEIxUoklk5JTGz/ak7BBajQ7MxmlVbu8Dh/UOSNHvfeC+Y0QsUCWbtNRQGNXTTGk5RNNxo9BWn6lhO47DrMq1Af1HWCsZ7erwDSdRvE+d24Mt0JrL1tziAZZZ5v4gqhzw3dwuvqesFlFBGNSqmkEwVVvs1UsyBOvJ0Xp9nqmhlX7lnk+WmyAryZN9D1mD+fWNJ0SGIqhH0HK8ayk5aGMO5s6YlCg7UrFmo/g7cCliZtsTHrb7aemd5uAQl1keHf9qX3KqNwIK1FIbSdNtydaDut84bJTFzUfuPeeXw137/rjITudxaWljwku2oGxzMZ3mtBONPsQfkNOhDXO2OQhJ/VvJNovshZtK1PvODdIFD4r/xjgyQlgMYV7O0+gMp/BgjKoDtkB9EfPohEU79R5YtBZCPJPWyB7xUJER761ZWVhyRJ5ixoO1eJBscAw8R9BL3Frv1YLTK+llJRoIgnYcFfD+l4oaOkfX533fC9PekeY3242qYF/jP5wn4XqCkcTqdij7+lBYfiSIrRDo0fDBTdt8mmKMu7aDA5CXm4a5PAc5qT2D6uwzTZUeLntNW5Xu5fgYNAvV8X3mL1BsgFQNyV3LUzD5M3msu5rFm80FHcxSPnFDFD7zYr+l/OiISvtJxIKSeesOUAj5XDnSNFu/lvL/VioadoR6pGEGmnC4bOg2FeDKs4gugR8sCtr82q5MLBbGfCT1od5GR7PTDfGiuuBmJYZ6J2G3tPNGqIFzdus9ghLJH3boSa30jxEtKe6sLRHgR7EQO/IDHJiNNqBEsNMw+jt8/BK6w6X8Imy1fbBIxXzIXUWoMn1jRu9cRiR48Qc28/bcdeg4TOEyREUmPt3/ANFCWarzDxxHMK3FGcpjiJ+Sg+CEOPUsOvtXYWFDqoHn5MIDNGz/2ky4Xmzf28K6c/KHV6jZpD6VsYlFIcibYRV+0UYRhpJRxvZqlSwyTzAgObgMMngD5w1F3DR6ujglT5YRERgjZ5fxAHJAYrGhuMRRBjbgOIQ7kqmN7Ljn6/MvyDIQCTpeZ+B969qWWI3+jvVoKI8vqKArpfR4d2E7H3KNPKDIM0X3e0Tme7kfgW98/hMObKzAZiaQa2BnGWPPPtMyEOUCWmnkGeO6aIWpjZqpUz8bOda6tfRjt5EAjK3/xKcKZfZ1hBYC1F8rMb4wLjCncrPa8G6PMOoZ8yZpLAmna76h8ytDVWRUbae58mzlF4wjUG4d2ClpAzUnUdniTY6dqWsZpz1rm+yQwunX9TNXpYbtujqxMBM0zBF+qmMf0P4YnWAWIk5YzgD3ld/M5cUgpEAqor9i/84W0HSS4mOqQSghWx9wE4WbTusS4NwxvfcWmHMDKpX2mt69o8unYne1hqbtewHENCk5XvvC/DLqM6EyO9n/f3jecxP1ajpEd09MWCu+n99sHw/b4z8p6Sqd3x8Ak0nVtOMIBfkRd6yCSU/+4o2Sqt3QrxHeMERptnVxYho6BMthHGN2cyAjJbPCZNb/IeCvPCyXrlf6mKmSi2UlS8N651UKC8xdCvIvULj7AkUcWXGWCvcBaDXM73VAdacH/n+Cr/kIbNyAvwcTxoDnAAnAR01ZyxfiVFlEgS4vgBe4hucRLxjS9ST1yVZvz7wZ6FYvkD+XDBkmrTjTh+DaHMNwO9K1aQTSJ5m8NYwFGWjY2TPfFmi5dV7r21+TSR73MJVC4D20lBIetYX/V6MjDDtzCj0bCYudzSHoSmeuGX5S5+d+BpLsX+g3gFUcOU8bPVGavR432Cpc7q4Whq2yzjd7laVxkNLHRXhjKykBwvqjDPMLKrufBrOhos1p2vZKf6mGalDE6HCgQH/ut8Qbs/UkS5Rq4aJY7jF+42GoTtQLybxQze6Tmpf7oUsu/rbms17hptvwNiqpTJCv+mVmw86tfSsIp/NwSDyc6fkMedyX7I7e8CHJ2Vklz/MTAbxTncR+Id6hAjBJSLhSyCNjviTW0dOLqDvvFW05qv0/q+AnsDwozduB7cVR1rf6yhZbRB9rt3/ajLrHRhfiVreexr+JD/Pv+xin7HGvhhxDxKV6TJRoHtX1+wvtWtZrAyCe2HTri9gwvUXeObnWyQ9goax2ZN7Tt0Y7fhbWSFdRyao2LLn1tTs+J6Fnm1+W6KZBRv2hBZA5Iilr2Qo3Uzv7QJy5OcIS8sjMaj1uV5AP5uMIvSstwRBaoGrDgN+zwamLyouAmWujmO+mDpeynUs6TBmlPIjfITNzkQs37a1WuVNMgBuAgKuLUuNApqasOjC5wOEVlRhKNxs8GbX6rKwWLw6QxZQJY1XpC8iaCHkN80/URQeKuAk/vQjjP0P7uEuKJE0dRfjSyXvhkD/eYPDGEZHntt7JgxjoL8ov7DG9+Fdj9xi08i0cil1s1d24MFOEFcweTO1aqDdg32MPk2M9FzXx5OPzdvZ0kdJ7x+ckdzoMZyp8hwdiC0gNvuwGYXCfvtKYduLViwVxbKGN+Wjo2Q7GKhUXsh28oxLSHtrNlRAsLJ/t7uzQozVVdjIdY38j3MajpgCNPNuN/m/hz3r2F9+7L5zl4kaATmxOC2q6o7so4P48FpzK2equibS4alPIw88HZUkb9ShmjFIc+YjGLP5vzb3wOlV0hqKtLA21QNBvzYzAvLntieeryCkBz3RCUyXr2rf/CaLfJBU6iGSYhdj2jxOJupUFUDu4UKhYyXOuZyhb3hxvcRl1Z2pqC5aarEQfHFGe7UO8/W+7Y1A19I4ndCgs6++grF/INC6eINaQZL4MCDfZrkxOgHk242PCMatGu0UGYxkb/pF6iJx30YKADiCh1no1gHdjkvk+lR7ePkE1BUpTCxRTGkwSGj2Z+uvL/jTZIWvLnrz7oRyFby3EwtR4yOvT+5gZYFaWAcnQ1xZalgj6tHKZMkH3sD5rh4hJs/u/55/V6hKOX3CkXDUfCFFuAw5xvMHQ9cxSuMTB4JBYi0bvvz/e6ZHBFJxj74wqi65dnkoVeXtsueryCOM5AC0fRdHnVk3srX8fYT78/S+xcTC4dEbxmSylctRdOdHcT7w2IpBaWBEotYnxbDzZM+H+ryXmLvgX7Ab7+C99S/kstIZfK3zg8bkcXL01bTlDAWgSXSBQM+p0y2Pp9ovkKxfobI0cpLml1R/RJUZEKd74fL9yqguNnDVvaJ0mGEm60/wectIEVNrpzOu1ZSfeEee6Lih4B7euFlzHhWLlSsApHTO+bDfqfwXTIDBYhZ0HACJilaAgrSUUCYaHjRw8dvD7dkoP9TSLX39LvdI+mRqLnnKAxm/yFsyRsX361IRicK1yULSUdoSxA3KP2J5LDVS/59OwdwzCgGHnyxwp/nbIK9tIqfnmLDBf9brZoy/xwCtxK0D4GGahhYzNFCb6PbOze5UktHJyvqUZgJ3hA54GZTeX7VgXYfkDs16PwiWAXg/wYoLZExZ5yS7hc4ESre8/nSmoznzUSiNZCEa697enV3u0EloGlb4SM8QvF/QzxAeTSTbyV47MJ/V1TueM3bA+3XjodGimz42xfkxAMlHVNImPYz7YnJ49ZKDZqNc/bupgeg5dMUYzGerAheIfs1CQOrSH6Pz8+Hgyl/AOqqB7fymnsGUkCC6flt/SzGLMtV7cTtg49rgLG4I7/rJ76h4QEG1X9X4NrNBp1cGnKj7Xu0MJ6nOdPONJE/gzHwEqZXNa7gEZM8MMuu6dhGCEEvYrPo1fEQ0p/BAQlikJAoYc6bv+UPwZDGJ/FgWFmwbsnOhpe+3bd4hQ+dBJ6c9yDYOj5l1tHyg+JRPLBRFJvpT77Fvg6JHEZLK3naJ3JibSOPBSiVJDQH+hzn9Am89PaYdVnzu4AeuJNZN8jYkBjENgBDV277hUCKpImhjuFHP6Y/78uc2VtTqsCNf7iTVriuli3NnDHwK+qFNRoyJLmBwm19oo4O7Nw+y2A23sEsy6UZ0TErPU+Fm3iwur6KSu9jo11ij8Q14Eb60w9xvkyqeWngBLxnLAuQJiW2iSRnTFTOPg9cR9Qn6iEbZN7ZpixB0zBsbWnrKNMUah0y8TowjA32jVKvv/0MKBU7zjdl4I5VgJRD/xC9B+vUYDLvC54xGottmLuRSOKFQOp3tB697vqArUbT60KOKXIzXu/IDXmR3blMaKNpHgqPXY4xOWhBxgbJbUpSF9reDaGSDZBdNIsR/yCJECCfG/v1ASe1BnyqZKVfHcKy8G+EyA3EeNF+RWVmHe6+CeIvJ0R/wUl1tkjaNwNvwlX3pJJ/p4lt2nfSLH4E+ufBhhd65PQl5vNWHs7orcPcdotJ6l1MoZgCrlUJciIHrIItqsKIRfWDhgu+y8sGCALJyVTcZleicMu9F0efL5+96hMOAkb6ofJmnULgmJaJZ40JCQbDRS6TclwAT5ebHNyx2lsJ2H/n3R0+2ZLJf1T93EygXuLMgrYX7vT+J46UAjljY3kw5Z0HrMXvuwNeY0V5Rb385JX29UghyopdoAOB2HNkwZkBrTiD5O8b6cwfzOF4AKI/D3QoH/SufJ2ThrZtlK7PtczHV4xKFhEqZ/WecYjx2iHr+ZaMO7T5U6Eei3K4D4vPCtD4u5+T+XN/AFX1OjlDYOuO0Pyx0M45YeyjjfdDR6U6v1efepHFzNW2vhabfUWVRkTrg7ZxhoPZzkuTnKo4BqKftbWhLv7gzTYyHFKrSvZaVg35qtw057HiLs8bPWw/2p5oZgcQqKvdrlmfrcTU+B98/elmYEyEXNCU//POOG+RWUsoQJYCVg91ncJJdazwICoze1eTFDhfJerzANwNMuKs5Dx0ypYi6Hd/1tyiqRetmPKGBQon0YfUnwPi8jqxMndWZmxM6b+CGRuZ3ePSVy2DPHUHA3owihi6KNKjZ7QFaKFmQWjcd64xsyBX869pjCjyTe80sk6bF2ipLSLSGVqkiWtPFMSF7CiKK4jUvlKqvdeb65stLvULNVjxoKzls713s+xVgqAR8iU9NtZp2ODLB7+esZevhTEDDQ5/T5Ps0h7UqR+fQWYnP77vTuYfD8qqiPGApGtXaRVj3UhtAKWcpCAge8fZOsRNdGSdqiDODmMDNezcp4tjEaqyfSyyrqWAoRKIgmXwIdBFiLTY44GYkCj+gIhe9TAPu8wzYA6PwQoxhnJSYwhL/sRT2BF4Lo3MFMSQuU0P+7NIcPpWZrWqe8TXZPvtvLCEyCn14WL7Q7U+j5MMPrPf3dFs6oqF4L2TpWsWUPWD6dNwgMDVpVBArJ0P9ZzHMVEbMOO3HCz0MHAcWW1Tug1IU0GTTBZV/1AOyzVjL/ulph8xL2BJacnijJX0w7bif1T5E7UvOoF6qczT9GFS89Rk9n2Eo48YbZz4I9Z8mThmdZpWbD/znTcc0OjoQFNDNSni4R10s4a313G0icsH/8Lmw0a+0UMeX2cnpqfO0bEEkK0MsjOwwC3kGhzaOrOJE4ZdWajTAC4LYIUEi5CDNXDlTVnryUmeu1IhtQ6oUOYc2O61QQnYp7YItCG1uqU4mjzHl+q+R/bACt7QdS3XPyU+bQcM+ltrwwUqJ7F4E8o6sDn4e3QgQxjVkAFdo9b10eEggvThjB9aET1qWKBb+eYBKNegRFs/U2OVzlWYOFdX9/M2vxlo62nF4Cu/bQ64DCF87w+vaqrvQKcd9X0leVD8gjReW1AMIJ4kvJ7JD6NiEcVTcXYihn5ZTNNw8EGBjq9xwz9xWuhB1xBO26HlRFFIqy7iee9wAMBQ2S3H6rcL5x9fkFW7pQeGbi46gScKUzbvor0cTEAs/qastg2Cc3DGV8HOCrxEs2XgHboyXblyDt91Udex6WOfBQr/1qYoXJn69R/4adAPf/pFxoTTcm9PcpDA13k8q0VPBGSjNQ7e5C8d6zs5c/AO1pcl7G1PHyfA8xbRhU02kwYJwI4UT1SDpqXiTG4MaGXE3R/Pj4CSNlBRgZmv09F5Hq9AHo7BuvFu5kMs9guajkRIh1+X3LsZ9Ejbw/lBGnqsvJYCttyHbKJs5i73djLPGvnBbCAcktdXbX6bkqEgYSx25neI2Cl+xw8Mrn04jGq+HSyJ2XYV1lbHyQI27AXoGedHfllYMsK/nEUx3itjIs347dK817/Z51QCOYzf9w3212wX5XwfYK27wqxAuE6252OsYWHIGOTBgPwkGyQ8Xhq5p2kCEx+4ff/UKBJ+XbWbSSwUNS6ZCFHV6mO7+gKqGdb2tWvkTgBSsQjxe2++lfVI7/O3FR0Irr5+SlfBh8+aEcL+W7Hftc1gNz4uTapasSpkd4TZfK03KvLWGkxfR9EA1QPAQcGC3PB0RtKAXKz0fkKzRmPhRdLZ20K/b1eIIB+Gxv3he7mc5SzYEUgNc0Pv5fSqWSV44BPk4UEwYgv7dwsTWPGlrD9XL3E72NPz7sDq71hnnHTe2cXri0Kg73tk8iQL+uR+sNxU+/P+K7msfe6+FzeadCX61BF+cZD5uxCbdjsWOSGgYIDrC2AQWag5hyzrq4+XSN/6fLgsvvAH1vK6guPTDow/pOXS2nllXmQ9gKJnI77/CKAtZ+FYvqPWa2UtBmtvmMUYUedvoO1MODnysLgDaBBi3g1oX5J/E7xaZYimFwUMBWPxYQGfI/8K0ABzcDMfsgijmZvOi+Zpu2HHc4P9qt62tTwIv6sG/6FvcHRGfnwma+eKoLHaLoS/mQS1hSK5ir/jIGu2uVy054Wz59RnVUzjo0vCA1U3klho4BS+9HDUpto6D5iI44r0nigp6J99X6n7wp/4dXHFhp9gxQvRatz9vcGRN1hxBNYMfFY105aJGQz5SZ7ARyfVdqP9l0XjTtM9xW8VwXeb7wXJfuxdiPPQYrSD2VvvGoo7uSlN3JfSCO4T/Vs4ouIoqXewv7dIpsQ8+8lLdwSB5SJQqk851ifATtpNX52gz9SDs6BT3oj0mtB5S62zpj4QjZPoSMQkJGFqc3hYyq1jIj55OFgojEVXJKxZH0w3CnYmjfDdrHvkjrusQqBzCc6slsjjZvYt0uq/dcsCSUBIMnG+zdCBrTp8y16xX7ebGd37NcQbBY1CF527ZkrQbUx7Ym5a6Ww6D9Ua09pkhESQBB6FdWhK3ImJgkEh3IMUXtLDXahJTEIS09Cl1QLSdNlNpMg2hmRtGKXh7ovGpk4xKuxre57rK05Ao56mD3qNXVf9LQdrpTuCt7BDxu/dqsDue6NTkaVVckJbRXfAW9JTwW3WGQa/dADjunkiGw2NBbsbttMdLXCXPPC0obr0AwibUo1zuHZMzMC+Wydi9BVhZRLzIII/UvxRytO3C0kzKIu8te7n0FQ/0tW2CQRPuwlKc8vne+l5wxZeDvAt1udObZK4EtttMZUkbfoVwxY+rpwAWo0sU7Fjre6xHY8s4UHCp5MDUCpVOFEoLRH/QVNVpSieAhElHLrNzXdeL8lMF7I2cAlIe1B8pzogcQTCLVjGl3kJ8+u16Q0JcnTVfRc1h8SuFN1pGA2fnmFn5ubZKnkYHjpC4bMDamap97lBO0FDIR/99qSBBZI+9SRD1ghwiygr1nifZEDrWGpyO6g7XuaZcOPRh5sO+Fql/tNQD8Xz4rHgARou3ks08j/kqmEZwDBMIb+tXgeGKT6r19OaIE39YNWv1Wd8J7BpMzMgCl0Yo5tl815fcPGWMiQVoi8idW/Iv++kgEe3Qeb+9UQzwlZNH4g3iVb8qnoJvBkKk3S/fjhKwG+Q/Hy2JYJwH7t+vAEmb7a/O2BjbNUBWng/2/N39g7MOOD0Y/UgRcUufufS//vrhZqzAYv0cNjIZJaTf7VRsXWINFyU/rDiP+UI+cT586O0H4k7hOjlmA/TF9Nf1qvWFz1RyWPsLk3fyLW8IOFXavOJgTjgiBKc6tSCfc5ueCihgCxIgj50cN2W3kL9guLJ22htUBZfJ61ki1KKAOBE1bBiHGQUtGS2gWaX/1Crr8Op0/rO7/G+NKDpKww8m34Uu6ETQ278gLC/UwZJmLoXA/ta7dcTL4I/jasU+HXtnyh2JscDr1N3YhEKy6q5J0AMsF220nFiAoIywvIAIUaO4SM7R5qDrulJleinc0hwtwp6EnSTfkyD2NUgC3ju0fkKOSBbpmDnkk3SdNvkPwjJ6sflMzBD2Is6qSCh5HdgETa18Fz8JzG9mWx/2SQ465L21NDR3tcVzV8a6FccVt0Y+BslzGpmbz/UsSvPhtwcEEBDp3CkBmxkmv+1VPn+Q1g34TjTSEl/0a97e+SMCd33gMNoQbyP0E3EBIHzEXM0cUg6+eqd5ldaF9Od+cCha6bGypZmbFBE/+6bMY9PlVhn/DhB5Yt9lN51na2vakombjAA2+clUJsGyFTo8eO8HzbdNwIWkkMNt3DstJIDsIuwCB3RPXzAbb8qSXRKs8rvHobjaFg04gCbsbJQx2ekAaLNd7VuIxUsyu1+a283t/BVbLUGgVv1wkEjviNoS9jrLZUGbPmy8KybyFlHUHn21xeCKqMzrGi0oJaRLghiuJ7KdPatwJGYsR4fe/KMvVPvgSwKB9LO4l6mO6sHyjG9qP1F/1g16p5LHmzIr2y3vb5f9kEw9ZdQiM+2SAm1PJhXBtUYDlBZsYsh02e9jo7LL6qytSeq33DYt4tiBRQOLBA3SP6J+ta3sBi5A9CFXZc7mHZIsSBSXcdyXH1wyalotxIMSS4TKHUUVRmzPza1BnnAF8UTkw/G0FLteHWUgayeyW4w3By3a1R/hmOjS38jEarhhvOxaDrcXhXqTuTSR6q3OpZJlfG/vFqSV/MdlNLjsoADtlSZvkT7XWhFsogqfkLAWGMI9Wp68IHO78w6wJZk12M1eqV514ZpLPdjElT4NQDfgRh7+L4HYH2/QuJzMoGNZJcOOQF83QYvgGZE8Uw001RI5dolCYnhHq76TQIA+VuMPFPFEnJsYD1Ll3zIut0Xma2o0KgXK7EzNPCO5fDSlrFjpTJIf/oW8tuJ6IFvNh3CkUzSWpeV6TC+YYFIAYD7hOQiX39QQWTCns+XekaxkFiBMA0EhPQ1tTqZmmhgUzHtv2OmMFrR9dWfVgN8IeWAnhP/TIyXVUIGlufn9uN18l/VbmEjSbVEeA7UwQAXnvl5Yvlunb1J4sSokzF5k2r+3Yep1GNxnV9LfwLhLYM/cvSF9V5CqGwifJ23GvR7I/bGN/ECTzjVuV3x2CiOq9m2vp1Gq6CSRuRnHVVPf/zN8Xlvac6A8rTmFX57XvUsPcPNgLkBkg0K10Lc55WDStHtPqA17hXstmMsQ0bSX+XB8Aa7uQKcl4ELBtgugNOsleU8wHTb9/eM2RtwSYIBhrLiojT+oWijikjt+0BSrpnRUHhpMeSvoLp9IF+LTC92rPxu+wCvPfaL3Xq/o9speuJjEikdGwR3nYAqb+wtGWn0AmbzeunuJwC8dfEbIJSaW37krf6LaRk9g/cLoJ6nNAJuTDvf7PwuSOpxNcQbBrSCklishmHeSgLLj38FxEB4FuI9X14iR1KXLsTTgk0GrA5hchMF+NjCmSniR+GBczL7s4mQ6iB4UjEVWVO/9ipDPZ2LSDI5jW1Ax3X8Imgh3281+Z5rt7b0LFZjd1ZdEbqFlwCrW0PjjBeao2tzz91nxYYiBjsIGgplhsvxwkV6w+b84t3J00jib7BloUckpiCEaODjRhgU0SoYDx9gpUlxnipd4GnepnDiQSlgycrdem8Gf6+zPj2+SKkUsg0AUhD2nci22R8XoIe6ecnqiacZaMKygXNkyTRPDSReAZm45+MvwbaJY8PBRN/QRQrsgCj/C67DpmLkfZoZqZARupJRv/UjmuF3Pkd91hdxVDKv8vmHj0f9ZjT9IySmmad6I3EdM/pL5m+IbIxKuyEBAonmsk6IpmUr609fR1WuJ5aBfZL9StlmFE5mmJhPdfLUt5hRJIromSx+JxpAJDDOAoTWYs+KNepVvLyo3wXNiXG07SI/ISOYICTgDqSTVKqU5zR+aXM6ovu5d0NMKhewVwVFnbI4YZeNgMXvkUVxObeiZYttPqH1MS9Z/f4EUqXrHZ0y1XBCZEZZMVCWWVFUTyIA8rMoFb/gs2O/lIb0HKkTmfnnMpRYUz+gNInlIcLjF5T9RuWbB8kFITedwpiVXouGkOzd9KQjaP+Y8ewicqeS6UlFxn4EDeLU5zzWDSYdZydLVfe9Q8TAd/ar9u+kMBgMn5XgkOt/MkBLM58bPA42XTqYJAAyFjj8npW2wW81r0QzHHOGhbAR4lbzV1QX0K5bDyoi29F656JJ2uxwoCSI+j0CRPsiNl6vfCaM0F3ZRajRF30HF/yLp6V7mA/+xa4wAp9lR1kSISVyfKOfVJYN1fwrFDkhEzION/3DfKIxX8V2nMR2olBW7a5wIWV9P47HgMSK9t7wh7iFpLmnB82l59Uh/3I/Nf54xvzsTJfZmxliztLevIpCu1ykdqHo1zckxOnA7v7Bmm0iEUgvDpyVjjacfwR+xmjp/ntq4nDuOIdlyXBifhAQ+XDWW/psZBrOOr+ldr6OMi5t+GHWkF836PE9Aot8R2JicihHhLwQ9S/4IWr366Z5L2ZUrBVKnM6MvXyAP1+MHntYUUNAadSApws4VL1lWMw256aw6VhufLmb9Cxq6qEhQCBEt1P82oVyENrCJuzfVO2SgPq1BM7f0ZUCkt84ue13vxzM5ifGsGWfxqyjkzB6+pfnm6zuq2B6HKcdS+YQCuR/HPx3WBwFgIP3Is+3bd1hZ4qgZDaG3XfKUxLO04web6Vpw228LEYza+JmWQeC0yJFtGkFWJu3VlT5WE/kfn6pVn7W+64Rivh407DNEstBNk9kS8Hew7poNrq4SwMNTZGk1nhqMEMsQx888eZxSXToUgHyz7CWup4cvSG8IgvN34ScpRsVQeriD2LhQYnOZEIl3LdZ7nGKPUGeySqMJ91SRcZEA5JXxso1in5yzT1I/oPPxbKuTYYbBJ1fm9uM6rtZcYIgEKuDriA5RwMfqlJcCxGfdcxGrS9Kktrrea7ftcpukgzHcGeTrdL+Z1PIENMy/EMMURVD3dUVXxbqszvaViA0skiafdgLdV5y+2k1yPbOgBQO9Vl4PZq/BWknKl+A5Yw4iL9gTMznQ6K9B1uyalCK2Mo6d+8VVgBBp+a5g/LYorVihnTTEbf3TRLzi/UrOFxZuG8lxPKpSn3wUbtuIIJ93VHeUXJHMvSxA0x2xKV8lZkO6zxST3liRAn9Gu5owq255Kn22AnDSjsXaocuvjVkfgMsGpQGsjyzjocgVT/dwKdKB3L79QjiVqbxVxm8HTQqu2uTf489wEHpnB9GVg4subCowVsld5lWlmYGNw3nd1nyuZ4NO6uzb58TyEB6S94+cb8IIGblWeocTfL92OJXIdZh9FlrOam4ZccP0SFvF4tlm3bfMUgq0s9qAmDWXgFzJLKEo8UAL4e7xkSSQNk7Mx68DdxgOA86Dak1R5I+hQ6AcuMHdj6zruI+8FrgecB3vrs9za/ze1qv6HR+pfNUndG36G+dnsfciKe4UuXfhcVRF4TDk7lHOBeKAZLFRUlW24KCOhxab1bGyBu0BkH7Pk7BuTcxp/N3LRtkEIq1RB4AywaCZ8XtYJRDyrir/Ipnqz5+pChU5CZtEI8YXssQLVBc22w5HVxn3ed1MawuLVIRvxSgQuDLub1djvSXfpKiTMOsGb6RGOUWfkIP2wbAFf3StUuQ0arucvOyLfW45PV45Wex82ifE2YUopSPjreIJaiEsm9bxYMSJkZmhiPVtV9R/PARlmw14t7FXpxRzvYn0p69t/jwL4OmXmH9u9W3gqwP1OlUbJy1oQBzFPugvadoFMx7HvgPkWxk1W4FWnT6islFnPAyEllrzX2sFK/xCSIgZee6UPqEMcTI71085QeFajjpDFWkY7vaRX1NpISutACV0g9qLyL6ZQpMblbwhxEArD+ig7+LRER+blOmCf6xtrge+HySfHuDX2Y+t/oU54ZoKU9g6O0yMZd+/fTDN5WDJXXQr1MO553fnXmg+1JeVsQdD5YGYguXdqs4JE11bSuAz5ays4jPwAtUShFhxda6j8cKmH3RyEnnIuBii6xRLpBD4SSjDXhiQQwWp07OvC1YE1kquWo5IghWFg8m7epql+wsfPHjMEAHqi1odBfOZgQzUgUeA/pJCq1g+7bpwhR7vUSr2qH48YiXrZI/6X733y1uNfAdvzl5SDWw8WF5tMoiOdhG6DOkXQvlXZvObvowpeZq2Q0j0VlJSRoURyU3m7moglsAiapT2djQ2BqOKesvgDDGKwx3y8FgBOKKZrwjAXocxjdOERsOdMIKN94kQ+ZDnCOh6hCNHlsE/RBMmUMbcwN20b2lnDg2qmAxSfYPJrlBqDhk6V+xkv7K2KXj5vyszs8G3Md9cBnLLtT8IJhJCXembtrsxgWOn/17YPRvvisOD8Y3LHmJFzEUiNNCv2WQ9xZnY/yk+pK0zB028LXdsHVL0fGQETr6PmiMfjzIzaddthkXIL7iUD9E7FC6OuYsGBRrs36ZEW9Mi2h7FvsP4pcvzC9hbKmsjDCC0wRZsK92Z57BGF7uh9JqR7XeqfOnDSvMUu+YBogDICmLTdLjA2FHRp1eJGAi9PJmNaptOptxedqCB/kwQwS/EpNCp1jmYUtXWVg2ZXgV9DL5ZOVPsCAfioxcRkQ4LQ3OvwZX4HRNFGkkoJcaUmSVrtmgjYQBp2d0v299UlscV+6AXtuAIwGSNvR3iuRQAHF5YqZHvPHbzCSoEiNJqI7frX0ZuVYG9CmxEiDEBqtVIZsCL8jrD7KX3bCVoZNCo0apSuDDDv1SVv3NY8iztB//KEws+DzEz76+EdofmSJlDq5Sd3bLr4Bt7zhWFDze+vbD5q14x5Dcd9A+WjpeBAe55Uw3rmbMG0hUYyMsfcg32NGT7pMVvTIlB9y4NVuRsDv7WSmLM+oRABRml0KNWV0GmnbD5A7mLXeIXX2USBdToKYD4F4Ab68PdZaM8hSVz0sz/+os6ZPHUEpzk/ystFLBDUsIAEFdeiluMnnp08l2B0/wNq+FWvOAv5U/5AWT3k6PJSeEm6RW0100EnLz09WMRtC1F5BPs+qMGd+kGyPDEsM9ie/u2D/DxZhuCO1XAQ3bEzoYLLCCVLopY9pU2oUPeNep6iVO9HykR/Fpkww+Hy4otmu2/jpUwpOLK0QeNvZY2+fweUMITb2r6Pthb9d8jA7E71Y5IyDzlC7vzfoueo9jNNQ0yMILkvZWA0/evGjpxRLSr2REr3BIF+7OmlPFjiHu8WXwQLVkXTz8M2P4HruwjMU6PnDbB3axaNGzUu367lYz8Gza/dfSDh/fFdnPi14VgZqqSqY+2o1rYYWug5hPbzFLjwdDurM6RB8N6kc7LrGBhxGaUNvwfWx938XgpoY74omXagNt/g7HW1Ar4Kdu5KNlcUTT9B9eBmqmS8oJ8Cs3Bhs58iLjO6pkMkvYKGGKdoUeHlXyd8iHEykVjsLXJp708S3tjqSAnGeSRlWmlUKmHip9qq9OTiDG7A8zsxgoqvC5NPzuS1abMRZbYpzfEsnLlMWHweOc7fgGPklFBcHSFP8Qll3DZAqercS2R+DmIH3n1nrEa0uezPcXXx8AbVkVRm8YdU+yl7Idah3p5XNtbeA1O8XFsGnRPUx5obO6s2T1mnyXUqZXKOfPwxyB5qU+FCT4rdsEFRq9oQUggkPeGDRNUYie39RbW/xqkee6nuXEg6WBc7SRXrcUne+HPUyOJEA84wqOYeGlX4qkzp5opbML8aCeB4q4pSjdz9DOPRU/dbzEdNZ9oE92b2friPSLLF+E+WwLF3jjz/bKThQI98aCYimciRY6W5rNoL0RvbRiEpW0e3/dKdjoV9HxEGnq21xsg4ZO7xlE4ARy12bTxm0bsy5/CK0VkzJLWhbzZvbwcc4C7wq09UZ4N+sGpziojjrMG8nmFUFQHsMbXL89MDdvfpjFVvcPUZBcS6TXxlSQKN/Viwti9A3Zd3dzuPp8HXizLuMMSISWAvKBAgqq00RrO5DqcE7o7IASnJecjO5yF5njYDXaeIa66S2H4M7650z6Qia15uyCXxt0I3pMBa9bAoaEdOoeimkTUcGTwRB+oS8oogCcY6I1eVQZMqcVSCTrfSuddbwxqyfSh9HIjDx/O/0E9C8KwaRapsvZwUQh5jftFUZhxzBRQJEBa+yGyN3IdkKFfFL1QQ/8kNZovrQ6Ilvd4hM0dh738bJtXo9sBd88yDf4+Z34jmZf45vo3vQUlkxm2RRRGB8r1tydBWVIbGpXwXSHw6z0eX1pW5oZQ7U1ofWASErVBAC/dQfSL7x1eZBNusGzoVghsltRy28A4++fx2pBqV71O+f7jsdobvOLtcRP7Ow2xWQjfbMpHV2Ef0gDUARLfUWkM650IEuKT0Q4ixS1WN3NLb60/YoZcJdL+nR4Ad9+M8rME9FwSgPAamAO3Km4LYrG9pQ4Pl3cLMgnNhIr6id53Hc9gRfUK9cWNoMJ8NymMCB7W7iYvSLS3RWp3HNqjkD0XTQ0miJ67F7ka7O2e7m7DiQOj00vK0Q5GBQScB2OGH1Hd4w8TRkrPDQkQcaPjtMz5mSx0dPJ14mML3AGWUspDgj9oX7sXkKJhT5e1NIzbnsR7MoiQExEnW3ymu61RnjlgEXG8W/4HVd/Kmm0YlQP0pSvEqPKxLVxkv4uCtpS9SXvnw6jETL7840qSzq/O51I+sQ65ZvRot4tD1jXoQPjIgBTQhakMNgfLReEyVKtrs1EmDsDokumiW/KI757VapRwQOptahyggdVLRHI/LKVVfO3xBwf9TXzl4MyaV1Vha6O0WkdPQIbPVmc97p/3tEepGc+kela0YFQuDuXPv4gXthaiQ/SoiaSoqw+L1DUAHdgiWrxIcVVurEHrYE1PfrAhJHWWzSrZxJ3MJiYIvWHT/F4EQkPQmFeL5eLZouEM/Ub24TeQQCCsQXTF+t6bFfa9SBr+umFW//0pqF4cOxQxNngc9pZPbe+mRNMJMXMS2UwD/OGN8rS/2fpOtYb5Zbg09w9OSzJiIzI7MiInNPTX47nX3jGny3bCLq7qjodeo45GvygoJJHKeLxt1Zbumfu4G+RDtCIxF/uDQP56pYnMY6gvrDu8tgkO4Y1+GuJRFKudF1esisgBgMpkkdQhNVzBRrZQsxL3ASOqPy1lprjToL6mg/nX8cvyB/W+yBLvQD3sK/Y6NY599f23ks72xXyYMsVbN1gzP9wYW0YZeSpVL7wlefb7jLZRwaew1Of/Dl/kRHtEPflOykGQ7VyET8bKxMUXyanRIuJ/92QlbW+t17U0aeoOnK9pGeUzIHxcrOCiOKzsItFCYlJXZCCNaOByLa7PbaxWtVgl1VpAqJ2qo/r0H3QS7cZAH+h3BJwn/UZB/PZI2sxQLGFKVw7wg+pLqJsIg0mlArz1WGLBqzStw9HJzhEQ4vhE5789HR/O0Jh53AorUFGDc/u7k6S4DYp8QdoJ1WUEAPy/GyLs8eRHnQkBau9P9+Pl/BJKrv6g84/Bh8cvON/6295Dbami5kOifwUmZXugMpFul+0fXI0xfsxMHqDW9mNwYR8w+kOgztLTBee1BY84cN7aHKXMfoTKTCTWnnZrRRkyLPK78KlovvNhJjEBHns1ojpXr4RCSPDcJT/Wpzyem26et06cG9KOF1BJr6a8Rpbq6CNGujvCwfoL9LAvuY+INEzRlddE05C+UKrhqW49yvFCH4qQ2lPRP48eHJ7YkLX2eT8gMGJCfkzxXMO9IPHotfNvK4TDccqq8+r7N8X2ImJi4laiL2NPInN9b/mFYEZIrqyLjcXpku7HDdKao0Mkf6iTnH+EmemNRFFV9Z+9oAwjSJIJtbTdJdESfoNPLC0EumgnoN3IRy+4JDBz92Yqcm0iUEpkEE0mjSq/cuBmJcDvTwo5+n55snvxCNN4248IZdvRNQVnURlHTQkseKGtodjtUSrqXBvjrj1y1M4ulMtZPPZ9vC1r4u2aE57vTNntC40zX8E/heNXQNriBKMux5eEdrXfXjL9Cr/DfN1eB/uJcztxuEvmmYfETbMfOXrHGm5P07SNheJDyr5OaPkY7tBkKe2JqYpfRqVMhPKnLQhs2Z/B20Kfi4zSLuG/ehFjyHxfaaN198TBmmo8WXWT6Mvk8OEP0yQCwNeafl5Kbk5IMIpUSewIhZZizzI6JpfvvxoQrJtHEhFpjuDoDFDz9DvNDYTaXJ/A8s0GbiHuMUY2WD5mGjKIOuC4nTaRO4hUxZ0e+mrfygoZjH4/eUCCaIXNQfRnn6v1672i96i+vmYDJf8TTsPYA5cDEw9VJTJ/8557X+g0oRAq7oLNdMtX0Eif0ODsYymSDxBSpuAvbpIEtgRc9lj7adfRLOoIHtICCH3hS2kYqinZnxK3ndHWr+sHJOeDJxoFu8Xg6n9gQbRucOtUrSmFLjyiDsfaYXW0OYoqfKHw/Ojh6Amf1Da4Hn/ziPNaCVGi84ZXfKw7vaVJB7Qjo9F3brS/C1w+1243Ndfe9XyAUjKS+LgQCYLUvdBE8Err51qskSs5PiwJKb8Jrev2omS+dcAAlK+Su2a5+Dkl2qWAI5yHqmJGSa5JSni/HsncJDwJqaYyi14xkaIdExFHCBuOfKFTYl5+iadmBWetx9R9FONC8cb3mYRUFDvyd+4bMah+3yhV7w+++fYfilW5cbHfjxKXGnl9IxwyN+4izX4JNvadve8tZhMk3Qzyixy//5jEPxiJo999V2xsTddcJ2KsBvC4KRIU58DZ3S8+6+ZYH4+H0DY2rDCLFW75Q7SKeQom0gg9ddcZrqMztsvQHdO+LEnfsdotecItVB6sBIP+mV8X4YClZqrIbXDbIFc7yd9sDi76lyxPPivdUktQTgdgzE/asq7hZrtO5EhhsM6p1VZE3i0ilT7a4qhMLKyITYL0fMEEzhiESo3MVpVcErigW7Sl4GY7ko/sVLhWMqnmIz0YdACQ5bYQGaqJ4TeUIyk+djjSGuUBj1KCFn5L6KhWZDpJmRNG7feM+hrWvhFtv1DL1w91HuGitGy3sLxg6RpyJbpaeMHw7eEquDfRG9AHpKJ1ShkTf24T6vClGEyr7IgTe1jo401fT4UqnLk9rPjKQEs3zcUUkio5Hau9Uu738DBdzywzeVK/vLbvxBa5+aMfjmabEryTG2cUtzMeJ/PNCd9pOlktKgBhKggaHyvhLdlvH+/9b+/szF017wZTOTNR+/iasYQBwpa8fBHo9QKoXBN0tKYPgrtV21WI5ExX7R/tR3FvVYUIdFRw13I943M2H5/Tv0U+Ql52S9eyafhmO1cWPpTuoVKFl6I7YdtHSfj0QH5lx4Rv/Dz6invBKJTThYxMvu/eRHLTSOLyRilMoZeWN2d6j97tqewM49z8kIPaEAY20R45hYKN5UJvqUDOLxgGGARAavhRrryVZFpdZmlFZW5db7mw6Hn23r2642lj1vw1fyAIo+ilBrsRrCReE9yRfrL/yKurRtL0LXkxQz0TPAgk1hln/o6YYFOfYMdfjV2DVg1z0/0770nGJMvcpTHZC+URkUwOZLwALDx9kno1kCWU9tkQU5MHVGDaWdmtGDEGW9WaU1Z/4M4E0bhKVYOzV8XhRcZVHnd2gSM+PMzkucUPPnX5mHFKs1kRca1/mg1l1w+Lgd3D4E2Ul6XHV6sKZMqodHCLq2AVujUWaDRyDWTSn13IWOL8UhQcKbM+cxjTpkExub7XQtSBkRrGKWUMYvARchMqdXRQtarrkAf7ilef8Qb8K6vQlwJOl6MeEi2Ut95CxBuGqXxkcZh7mQPf9Hf+Kqvl2CuhEBNB9m8XiPol34m7wdCWMwiSFxQ+q/fRf6j9N928qqvP5xytwpn47sHQ/zbcSUKKRryOVl98H3j0b/RL/9InxcUeJIe11fHl038O9aBhMysd1n0ZXsYtrHXODEDMtVtm+FybS1d58O4JvwS/5kGlarxYzBxSM+o2soTX5xpIgV+QnvSVBKmvIMop0CJiwTr3cfJawTvOz62Qx7pQe0TfgXqeOm5c33qTNceCM/hwehwJRjNOb5iFBaQYZNyhp7gn5CDcyECwQ06peVzceOTJ2F94hl0eFS95UUtFW5Zkuy6cuRiM4U8fAMqTACcpeoTJEr5OcpYnjqLZrSH2ppWnvoTHDworukvsxNp+ru/OEDBGwBg+BZU+1rQ9fjcQzDVmgCj1oWs4CBcNkKHO57vclBvQ7MT76QRamjChgvFl4lq6b1P5wcu3ID9fovPy2pBFoO+qr/WSqAP/3Yj1dU38V42164HGP/a/ExzI+sl1ZGsMaSsNBgW5+wbsNjw1zIHiOztwDR6qY+xWTFUzrIYwdKHFCExxCODZ2+KxS8fVH6UNwoLsPOoT8p0M8ml355Bmi6Y0xt22E6FRHVtWXXdeQOkK0HwwAoQNpklOaxv/vQfKHXBpnaWvl/Epz8Oj4FeRl2AwgWzpicv+d8DzrYXnZ0qvNUFSbdpfGHMboicOVwPQir3SRjrJwf6C2XXlAsb6M7bJxwSJWcXnsDWozvv41Iipu5wk/MZDRvWobmz7DSqfzvAMP21nLHYBmxQX6xEa7k5Br7Z5noa4eyMEKb8jXXFJzhfFOwCO7fEIFcvItT3GQm2lTZVoCBdcHkppb/XoL9ixu9lv3+53OFhfHCwJpS523jCqf0Krt4v1vjhXm4jTG/EsI+azM+i4CztKNyMR3MGMnUWeWQ3iDBA7wk2f/D96nBZRFQi4f5phjGYiQEvw7xvxGPV5esUKRy8nP59vuBUH7YidA0rQOlStSUjf5AylZiddEU8wmKQaEY7HbA9m+IehRCPFEXH9pMowgYCjKKN0AdblYZ9QL2AjTQ5Wild6teyCdzxMovQr+FhPPN+lE42y/+a8V58w/VycMCmg4j8POapE1k7s0FKqfGGyJJ+EnRjRluBsWkRmovUkgu+Tad6UUawvozmZNSM4La7ctFJnyqAJ8TnXE/d/zFZsjbbjzQiiVuABkxYsclxl4ircdeFQaGSaocWl7KQDZRngW96dEN9bi/SJsAMs9fBAGW0Yll0+dQbCutLuXI8HRq2Os3nV1REUe8map00Ea7J6l/sSCp7z6VChLIrUqNWEoPD40mOxQdQmhXPvYbZxcSkG1qRmHhV9M6+6MygiczBRrmvE6NMT/wz5lgushOj9mSvqCu/4wp+Fa2hZYsbccSTnCwmG/2hGvFeq63J7K3KWgX2u7DmpjgoZ1vslPSA0NZh78TBzQvAyXAdB51xpgpFxFY17UsA2Ay0/7FEyQz2B9sd61mw41MEp3vAPhhFGj/DSKRl02invjIu5EzSzcp8q4G9XABommnN+yLyGNBogoaXEKCpTP1kCw85YuESlwgpQ0Iqxtrju8hJ5KdXMZ1csa5UolEJXs8PVBTE6PnG8KgYtYjgwqeRotA9NXMYk5z/WXAGSX4YPHInoPeZvkS2TLaxmOJvPlInaV8zRCU9Ji059AkWvdikcUtML4Esxf9I3lIYrs5yg8nwPV1bYFEIDkGQv0RpQ4YtMHDDLN3D4WV8lJE2L646OJ9azVMxqWjUUXHjDV0G5ew64bFaj/7A6J/IEYM7ycQnj8XOWzmiM4dUUrt0vgMHwTQSWJUBh+EwdO7rD9KQT9td4pYxr/3J1N3znYxTbEejzbT2osv2nno7ZEgVUHOB4MmPVIWv55Bh5fduvsUCxBggDzTBvBT5TB/HHy7U7GhxMAc1Y55a5YTF3dvlaWVFoyPppH8JQJZPL3wuDaa/ys8ahfqc/6WeqzLxKKTarpdy4M4PJ2CVDea/GlBPDN3fsBhwnwUiIGvjmF0rz1d9EZumxYx+LQaaCIpY/SAdJxXH1cIXzGWft57TOxkyC7RFwhD15VwKHoxE+NqsRAZKHhqljq/T9YHxCO+NMB/E9NqvaWUmvnInnsnRw9Ls4xnFlUfobw2bLRT8vRHq7xiAp3BaYqu59nOV3kP4yqtHFXPj5amo8RFT8U1du+OQ+j7J00gzokiz0jxhgp7uoUJ7dSQVzvp3NtSvwL/yL99RCjZdv4Olk6u115+KOrA5nI6RGKT9dUNnp2lkqnsxp4n+QiypxDXsDMHjb+Q3VeKQ6U9pif16qxJokDPIgZu8YZ3uhPeICehCX3/KtxyJowbbe8VFPy9JkWvyOCJyogCerTVgSXsK8HgaCf8kbkU3Kofc1IJ0hv0N7vDuHwrZyw9Cb8RUGfQ12Jou58hQyzkUKfIiQUkQa3lECpmhhxJp/S31atb5q2QoG7yihmAyRJmB5DPUGjXGr74mR1R6AR+0p5U8AejjT7YdEtN8UUcktY1NErQGH9NOm+IrmKd0ikRvVTPJ6T4M2Z88Zx9SYbcdhVnpMmFvhEKL60SDv3wfSO4h9oj/7QpmIGjBcu26CpZmT7FMUjEEvvQQcpfaKpTQmjBDyuqPfKnaAJwu3w3boBpWmiYHGjcLB1u55EVVK9DFLZQaKtm+DIXWVHfVFel/G6nANokJ7K/FjLaaQFg/PRj+0fmDJfoXeaH3kbN6NUoEIpGOb5xSsi2iZFtwD/HT14D3BdZRFdT3F//xmlp/n6kObQzVfdqRQ0HTlC7iFKz0pd+Aa9RgsufwhEEVKiqUY5RbOuSBH3HqFxOL+pEjkSnIUaHliCcUAx4/dOgQevb+lfxlbXjOEib9sr6KTfXJC7C4zLMvmEZs7ugh2dbQYIF7EBh6GXA3esW1iP197UWM1dscgzNAxDJdy9aC6WFpn5eOMS98PhaBJD/O4cWHedm5ugSguyjQz5bN3nczlVyAVyaJdx68C9Rmu63PTG7YaJ87AwyMmvCINfLGzeUj4rf91L+ndRzXfJA5hY+hmbbRX7EsNh1sAz2IsffBrEQgvSyZ1eLq0AQPTwwkSno/l6lJa5FjmGOYFXGBTu2oQYXoZS8R/0lFz7YMPNRoOnOFS3hFLPKT8x7wroo0k6d9CIz7b3bfTkeMF6T1lPdHnh5bXs0qjIeMYK9eC3e2h74naaRB7qwI1SzxcjdB2qswBbUxnL74374EOgR+yNy2TnT6hJdYdUMH8qVX5YprfFVuyaBJg5jHpDwki1JI7CdhSZHClLkIn4kjSIe3DGlj3FCZXNo+ZwOZox+5fZth7GwVKw/96QLhI7son1y87ualm2gqBRwyJdwEXbY8B10VpKncWReOcx+bkeU9oOC+fWRyGCb1fZJqkCJOIz0/ylyHrNTFX9cbAzv4S8ZuUpT/8NGfIWbxX/0RR5KnVdnU9a9t/maK9bYYQICRSWsTT5nM5pPKk3nFITz9SHbcxEaaUJsRYjzi/828mXjtJxOsMPJMKawxPlrCWgZg3Cyc8UvihvqUI3NsBdCM+YAGFBEMnjAUyLnzC75zt2LYA/idrBPDOCbxzp67i5UpGQ+kMrLSM4Brws9XopRhVx1RFgsIsxxINZQbA0hKwWyDFG3BKaeFZi4KyS9Kyi/yB050X4Wsjy9LUfyiiRQJ0a5Jcv4AxNCxjXn6dKqYNyS48NC1KkXRJV8amr5aWA/Tv6l3DUt/evF920eWQxUXi8+dL4jzkytjgYWl/tshVQB9dMMQjOoI4DTzhPdrPlW60tzxCUEzHRBoccBXPwSu+qptC6p3lCReb4bQ7tmfCjtzOsEbqHxRkttMSErxiOyVinjohIETSFoCw9bnN9aBY9qCl8J2E6V6E8+a0xl2g3ROkZQF53QbUzg9rNpDbwxyB/McAqXqCfL1TfMIsZJdRNqSnhfpwgEm8GoKGfHOavGD7gr+ODvk/vDfQXmDhDoVvH6zHpSTvRFbXrgcMoDyawjk07pvwq7BwZ3Qb+T/GXcMYT6pdMasfXgkx03Xe6x/tS58ESMZ7nWp6CNMyMGqCkyITeaTmFQN4LEv+x/ekv2jzKge5sFx9lA4KrHx12fhh6/9Tgs79vFXGg3AtGvVpqjhFPuAwaVMsH/QiIgfb1XgrFGG5/4jqlNV/g3PghjQhFZ2ALCE0BqOwENjHIszReozlomqHO3Q0kEzX5zktg2f2kPfBXaEkJtydKgq4BWkbHCHWgWDZv36NH9aGPD0fglsxjqzuNToYMvHT/j96xQQNP8J3Bgd1hRMjLLhmVykhfxC7sVL82n0bWSQ9H1Hf21Guj8aQRb//BiTvJhn/Nc8J5znyJZR7sjuqctvKzZu9cQaJteUsM01nYDaNHoV56lhNOgJU11EyCH0DfcmEpbuqXguEu2NO1sE/BEh1DreuEF6Gi9ppXJmEtq35dcWqA/nfvPpSpGmvq8Bgp5+Pa1HSlMDeYkBKuBvsEy1pRzsvCH7uUcYe7TiVSqE4+KNeGPYkbrjO0KZfPJ06XhtbT6F9LtbyZ76CVrr4qoEEyZ606IMeXtfcZc7jIS3+i0nU/aIwVeNxEPCZWNJvtcqS9PQPTw+iOPtwdjtGNiv+xKchChFQ8amxqi4l7wow6Z2jE8viiCFXcAB69//njc59L831MetsjSStXetF7tFJLOjVdvrzcRLQIjkSgs0/qEfTwsaUzuVAbc67pFcI6S2DvvugPbwcXM1c1wKCGc8+IBHoMOof59F0qoPh7UnfKhPBVXfbPVX9lXn9bevWKdNnh9iKPzjXEP0vN45+C2X/RLxtgv0Ks2/VO/tbeevzfzMBsopsGliP0lo9WasnFeur1PvMyiTMdYNKSWwxo0CJJwBgT3f5G9Le3RGUE1Zl48Q6HURVLxH+k8ZvnljwB2iRIvyI1CbT/ZvAVVoBgVoJ69ou+RI4egvt9TVo33mazUjI87cWevtYEIVqKvwq+RBcv3SF0xGqtkea09Pxcn2MMMQkUnDZwFZ/gTd2aj2hB/QWMbr+ryA9cqP8L85jDndG9f5NrJmpGId+tKnXJa0BCi2vjzXRpnhqfMF5EzFAov7RZr6AOQaxd+3kGx+A3Wrg+nRlMnnu8mMVUeaRtlYggvSF8WkF8WUIH5FW+xNBsOGev2GRlXNSNHez7joebdAA636XglRDqebHIG0fh0hQ275dT95IqZx8CGcGOvD4gEqrW9Q/dXTRlW9CZkbJVX59rcto86Y8rKfkQN5rzCWP9h3ozLX++pVod8629fILoIsZb2G+jYh1RJ/nuT6wisaVFuw4yb8esDScptyoYaw3vZncW1mxB9L9WdhuD26mdw5WET15zsis/B8wTktl9b0TGFzT32LG6WR6xFUBqbQKbHiByXPSAy235O4sa95CEpcrZBIcgBRZ6BCZ+8huB7FIFXnttnSBruBFH3wStLFaabGVGcV5PeEXyM/7vbvkTXq0pVdpUznq1gGI1j7+HZZzUfYYJSVHr8KHlZPcc5R7rmUiHQ3D2Ho9UpvBJ/3Iy4bGfooENrUf1y/02WDfZrCsHgfnhTWBSmgAiavXgFWEItGlHFm6gw9JBUJ5rurTvshmgp+Gur09XCynbKs7EGPLSUr/DfSCXJ+P36wr7UEn+LlRMRDWQ96lYm/J17lOe5GfPhmVRr8ExS/Vlrj7zauKNtLf/O7hHwyDMcwjvdlfYFf/IxYf6u5aqzZdrq5Jhw4KfLRtZ/L9zjFQYNux3xlTfLYJFab4DHjZWnEWHjhilIxnUoUy9IYdqOauYgHTnFLP6OwUW4pHizuK5lBfmy4VcEgBuQRMDsWMT3atPmjuoP7ycyU+VXwxqhuQ1//hXJtKxc9KuKdv4sfaOkgX32Wn/lQGU2q81C3xVQgXwrFdH+LYWqyHJiPEUOvBLtGZE27BBWXL8IHDaxWbzT63TbV3vZa3Xbbe9XlbSw3FYyoOgzOleSMvECjB6+Fm4NtzWYlOmtj43PZQry1J1+NXGExd81n2eHC8/5OCB1EykxeqSXVU4eszJcREt4oD8aR0QR76QZ6e5q9a07yJIlKJmlclcZzEmn1SvIOMlpcd8fwyxqc1vvSSjH0yPvrmZctl8jG2RuCKrhBqAegPzdK4mNQPlIQkwr/w3xj/Td3yJqpwambdz95ALIdFxl7MnINXsubiqQXqaWroFUrd4IMRUJu+RDfL/gCQ0kb6BzPq9fmBJdZpGofA99nftd3LuZc7EHA5LOI2z+S8TnEdQun19JHniIkpIEaaaomsYsph0tSAcdCtmveGFkdhi1iaHLePyg3sOwSBIjn+1DEZ7h4tBSRKYiTXUzShz4mZ0toyPxnbvParSrcRqyV5t98oFz90x3mtr8IjYETQSBrs38LyFo6byQ2c0fRi4B/PZJKn35ywKo2NglfPuTeWIBJhvx5HLAzJYh1wNVUQNta9plHIYgjgyFIe0xbpX9fgEp20txlWNeg00ACzAjvJ7ZgGrrNl6PketfTYlLETR3XQc49KUvFxNdjgkmeXvzGQP2lf53YvHmFu3OFcww2gjm08WARTIfrbbPwG1+38IWPnIF/gIVJI1r5ycadTAqkM9vxaeRIwRm7n5A9tdiTkOfxlD1slmsAkCBsj+hmvf/L9ckOWak2ernZYrfOh5pZilk7ija5TwQSQgDb5cXAPbZo6QAuWYjoWE/9nRdykiIX6omJxOyngHq9JOG7z0z0CWjWavjGtixERoTBYVCQ9c5Y6XsozF/dj/NVOhzFVHnAnQKzPcsUfvTHf6+rJj1qNPEt7MfH2Wx0//jUSxFWInV337H1u61efsUjZGTRor4hw4wIMVkeXxzqAw3iIyXSfMwCC7QqpMP4ke0fWt6TT1pkbrr232SBh49KRIiFnvmEJhGmn9ij9kw/iO3Ys0GrM4wpS+HwqtScZsrUYfEPfErISSD23zqQHfY/Hh4wi4Dwmte0yJ1AvtPzTYBCjxMw5EPJfUUfvyKDNPQ4obhMvzhFYq5k1Mr6GwIyYOnudMN/Q4estbtJQsJEcDiNbC1fHbbpJ1DhM/hB6UiRkHju8J0Nypz/dGkNxqq1SUY/e1fGv7w/0h44eaDX2/xJNA8TRXeWIuO2WAVS5YaXFl55eXKvN6Mac2ZZ4owAcwsuckskbvhfBcK/x2Gx5mp7YbzUoGUoCEpZ1SELBmxuMS5hjCPGP27pWoaq66cKdU3juE78zAU3iB6BG+TjvOCzGJEgDUDqyA4JKpsOI5dVjia+iFq+53LTi0IAV99viZhl0QP6EXuD/dtMu/v3UdCvaRlRqhKFEe1tKzX0f5j62g0lLNnILdkpaBnFqP4qecvnFW+VP0mV4kiimrFeWz3fLSoyj2fJxmNWQbpa/P1hIHdiyrs8OqaxvU5OLQ/mNkD4JtuMO5H6a97QCJFK46+xu5rZHPtZa7WZezpc5uxe3oX/6OYwMi43Mou0AQpn+bB9Rqk10U9Cf7DCtO3ua21KTiA9UpqWAVq8RCowbrabHI8WMrMmrJQIP/RPwk+/Mno7T97YR08cdJboGKDJ9+WqtvNAjf2G88/LTpKIAaf1iJL6XbRnHyKLHT+GI7m/5gvc1IYcBYagZUxS9o0i0rp0L/nPBthoZnCKItvIHo9cmpEx/YnAfzsUmnvYdfJkv/0Kqo+fOrWZnEoYkOGlTm40/876hLHnBSVKPrkbc879sL+8wi/Fd+cdDS4XJuKj8KlC/W/qc/ro7KoHG5XzFaAO8ofIcEaJHjhNFg7XefhlMqZunKLajenwfBy1W3h3rBJTFfinoidAx88mKaNfLv18b32IVdWXObmuGRudqV3PyvNt0Qhl87S979c3mOcXWsSt/62nFm+4tx0Kg394NATqZM508uq2M2PSQv07Ta7ltG8CG+LfsRjPcS9zqYG5IwHuvUvDz2vUpnI5oFd2L11/0HU20Hdt/gSpPYMdtJc5fKd+7/7jxToKq/GM7y2wTsW4a0iU3VE1G1vP+RqDz7Wa1K9l8CbYUcBWX3uEUbiVgLYKlwQdDQa8X8/bk5oei33FjlrzruHAt98cxkhl012CoUgnOay1lWv4t7XIZ44spymTW97nMeFJnPhRI4tv7EqeK5ie3Qe/1ApyZket4QGhIU2tuDSny3wisifZkfEPJJrjyV/L5wZBiA1ADRb5qvY4uR8f9FxJp3cHjLIzbyh7deQbQ+ihvgpVd0nfbZ+97pIVf1GM7cb8Tq6/5SE61zczM09PZG0N/flb3NFSC/o3J8UitjsuX/yyWLcPxwYNMjeK6U87i82O35aeZ8qsC2D71tZC/YNQzZzvwvEG4+jq7xOc9CCafcY40Gtrg5IueZm9uoe/FTReBaSOtnW/JP6KiAltre+ZnCObg4OmxYSn9/ejhgSZr3LOtZP0IaAcJECMiY9fIfSkU0Ug6vCw/vR+F2ZQmIH3nt8qDGYm6DM31ur8rZmbrrCRWMSahoPmFMSdkuDBWqn9pvYfK+doWonZ4Fcmo4xSt3Gao8ZrhhZvRgdZnpXTSKJ139m4tg9e+53jdaoDBXUWLTp5GHLwaLZWO8JLrUXtQ13nMbxWNP2+Y+3a5ACpkDVaybPCZqSo6QjpcpAPIu1oS7RjyVY+Fdy/OK1zCASg+KhA5lMi7dWBBQW9VSoQKi64BTnABAjbBT7ARm4bWhr5LUosgGlW5XPQbLW4nZ1+9y/7/L7eEuQPlXPHmFEICAubFJ6YEidqPeUu2ujK9sIPBbLS1u/v7Fv5ZT3PRstb3YONciSuEF3/OHzCCfdxPylNdd8S+6Df4ettoNo1GOBfVIvGLKT3/O6SSDSWeb1mRfoKN5sj5akzZzdbS4FoVeP5bWcDAipYW651lc8s0MglXWcCrde8eMJ+9Paz+CU/5NkcUh99NF6nRQ/diz8KPdNNC9HPdpysYECCAlkluuAj+Xk+1+5mELfTKH5ND++Klwn03SE6Vgb/kLNXopN31vKIUA0zhfpyjmp2qmS6K20z3+iVnz/IwjLSsMFGB9bzraUXJKNRE5ZWkQBTX6x5ssUP5Ew6uihSt4AzJXH/saQjYWaR+X2puLZzBJlcINUWWl8O9huQDRCkw8OGU7tdxhopAqrA0IvLJPaeWedqbhKb/7XABt+gg63MiuPxhxvUsdWMbCSLX2TakEWgcmGCWVFxTcxu13uOeMrCltpIFAXbMhCycieT+HFBi5+z1DNNdL8MboifFWPT/F5lVQyf2zNTdeje25gV3Nnyg/BtRqR0d4+EVQ/yaED2QqHKTr/wrvpOS5PVjsqGCddZChdt778cxYbMub7QUbv6aPoVSWIOuZJiiJ+EedVDlr+pD+XRcq2v8IWkNKXjxyCKDTwTcWfvIE0Hl6sk+/lvA81LI+Z42OmKqnZY2OQQtcJa6xp+EB3SQ4aCo2uMId2DGb8Rl2qTbVKTZOEzojJZNlNisEGCuC3A2yPWlD9iseMym/7t80e8JoygevDsUTSsnobA6S4sencF588YK9hQI7xaFdB1QlQKARYzbGf7IeGOiZLuvz7oQfFef4EtB6kIojMTespWY1mV0UopPlNe+bYs5HNPNwGqoFf5bfpc2EwxFVTOkCj0RLmTU23MTVRfs+34NiIWETSntBGo8mm5ihKsGQWDEzZOajBBl6V4kPnv3kYc4oTKLGzf2W5ZAyJascLfV4oIStd8tlmWU+Xt3JoTDBqZpZwFdbZgduyGmFvAI9RVz5/nh0mzfxrUcC5/HLb4fS07WsQuvxkrZUDFkPNE43AHR6UXMSW19OClr9K3ydP9DruCBqnhsgQtT/i4XNVuJ9ee22vQzcPoZhb5sOysbxU0hrFQ5ctHj3nzeGPCHN+JtrlB4rHbBibFvYuRlTgcv75uTmJ2yZCQczhuxQxRE7JPIRs0XS/pi9+rmDMk+zImnOk9SIfrqoqXjT6Puj8Hr7YIpsUgR16J+k5T9RMcQ/6qHhxNpdQYm6pHf4XLxgZ/4bXGncmD1Y59DUFllfvf9oUzSQfkQ2IngmKq3ojezW8AfOrJF+QsaCA5oGGCC66lerX/bz7P000PuMb8ojTDyvhusY78fptX4c6G6ViNbtWGkc6T7VwHFT+hBwoUnkwR3QP7NQjD4VKjzZ28cPuLSFFPNuLDIr6Wvt7BDLrxJlRMJ0pHjQy0AWt+embRc1ZJAtnuwpOUntLyD2KILYtAmRiS53tEv0t96j+VBc45TU+LLpdvrle3ogqSfHTd8PFwhpFlEMeOb9sAsA3Rpw5DVJFQBj11LKSuWxVxmwGWCLH78vyM0wP5eRcSxvLCZx7zZv/uc/Jc9qddQHJpGLTeMM2p3+i0EXrrsUaEEJC44zR9GCUSXW7PJlI4GCHpoH6Qe2nMwb6856B9B51fXhyVWnTqV21IfBHpfEd9mkZ3ppJB8NVAFrCQcTq2hRJeBO26J6lY0uUZj2kqqIz0cP9rR0JQ1SzkeRaDBRmjRFC2zv7Ad4Jc62UGDvwqW25T2Nfhp/rJ/TdG9sds4+Fg7OntzxA7vzyQAT3D7MSLURg/XDjEVGJmNzZV5YNpNolVH+hI8EHRPMhwaD0+jOcXJDH5fKJHzzfj2PleR5KvXd1F5PL0obgfIdBsbw4Tdf017/MjDOJw2HxmH6fqav1jpL4rPaSUH+KIJjjzhCW3U/NEpC3MuK+7zBMorffEK1a+KYWJRtZP8waY2uqMBdsvE52sPhI5N+Kj3UxgK1LJU7S+VwyesHHHDITmE/ZaGUiSrLGpx/VK79NVaFHf/wA6OQR/lnwpBwSqQJH8gWxRB7tLiwOzJnpHCNvSkaWyNmTd6w1YChu8gGJePLfY/iEp/NdSOBO1r3mJlVS4JMixEYQvrs8xjPmqt99NGH3b2YWAOqAcOdjPXJWvaUeRKynHBrgO95t0NhlKsDRwDr7Gh4/1IRvmUVWvDJGVhYlh9G+J6hBXbOrHjcxJxDARyi/rSGMDYsh5XLk0itwEL2RR00dKgs+h+BHVkRnj8thxFL3AAiCW+uQcDSb9NNiyrB9mTHSMMpp9SCQIUAybyQjF1Ev92Odr8iOo/LFvWOayRrKedIfJmGe2KZgKeyUD8jsJsNV8f1GJGOlVIXywRkZV6zbObd7yKsufSvnxZJ6U183FHFxK+Isy5FEQuFUV3EhUr/NPhxZooqeL1WdUhzMOfgv2GJWerPbHy176dQ3tXJITUso+oMgnSvhnD7FKh8TczqACPxMyZMF83yujqOINUINLW4aeUyLG/LHmfesQGAGbCriHEbRtqAW+Z3tdqQEgFn+rUYEPeHok+ZqoBH0lu9srSZLFFuAhnpIQjSpYvpiW4HrGtYmS+bY0X/HBtiFi7iCOhBvMNPawkRLMDXnA9UT1B+45ZG6mAOE5JBibww8dhS/dPMU3eeSoAvsglyAY3mQ5rpn0X1evSmpYB5PHrxhQ9JD5DTH6MSs2pq0M3TOd/a2+3fW9wHYU3AcIjJraU5PSGqSaiKfSey1BWVZLtsJfeoxfxmqi0TkyvUjmEXSyiRlcIGt6jzGGoIXAPlGcDh+hy8RAOMnIUABu/VqJH63t+I0l35UUdSxToMXB0x8VHrerEJtjxMektKc6GqTYp3oFi2pl3pcIJtvg5/y5X/Ve3hq47cyMocOyLBFFDhzRPHgnQoaky9mvZUdZbOaBDfWg65KEgTlMJvLxjYGdxiATabfm5p8QvAWWW9djbNmps4JcHFQ6udufN5qC2vkLYaeAZEL3l34R3abLEKDopKVX53D3LGMgz7XHdM0P2AekIWDNE6MJSQ2VbO6WHgaEdPw9NQk+Of9SKkYizhvNDXkgMQZ67d7mAEoEJ5W4zZ8Ahd/YR8FZDv5kDO0urhvaBmmEpVANosFuslxFcnVWb/iQMEMtr+6esQR6wG61CSV48SM3OJOlky4gkaRReQOkZXL3x1ZSRpd7O7KyImD43b9lO8qt6rrVvwMysycfrxsztxqa4evlhQyUbroQCELRsOJ+yVvDWQx+htN4UlfSVp/qVbKjcXAJ0Q2w3Dkbcx9D5HotkVHzNbA6dVy/QkahDm95N6lZQV/Hep8wZD8N0O8nQumvSAKyf4W2u8Su3ma56/I/hdyRx6tip60mp48KDTPQA+crj1+QGkZnRa5P4i6mMUlkmm8lbcRXaagYfDYDbJ3rfcYMW4AGeCM5hr84fUlXVmoL+1gSI39yOHmurZji+a+rUatjgd3Q7w9r33eU1uowmJeuvkoizbdtzbvfBk+TkEEHYKMrO7q8mC7ct6hI63tbbhpSbNqe4hHY0/rXkfCBnlwLvsj28i17IuNURkY20rfg5OMTMDtr4a9nzyvt2Ek5fX0J/trJeXy5OMj7eppzf849HhmmHktmlQ3GqoCLlBOX8mpbffRT8R4SV0+fpEufJ+sEHWE0yVMCVEgyyrwGZn3vfVPgNXRJJGOwaXB2bfhGzXoCnccsgRakQCbYl6i0M76ShfwWSo7EjpbBbVWYHci3c9s98vjfSj4wHTqBqv2O6kQtsmlMv3pTY5RdAgrUqD6EJdtMZKpANCzta0YRlnk2mV1bQHZ66Dxbbywowye7QuDbGMOj5S9K+tteMglvcVDBzhqh6nzb3IojH1fVcbthsj2Yi1esV/Qh/vLlG6oE3OY2m7WVmB1xRUw4mJH7VHO3D5Z0zj7Jx29gb6ksNBA+ag4unDLdP08+yuZPmNLhS2jB0vCq/v7tZTTkjBuWvzrslJlxoDR4blGFAtroiirvxmoP8xIpGulUxtZjNrDz1H88rRQZcv4oCb82YxZzRek8c36AHCVMyq6MXBsC+zZCaew29lPimC6PLAvz8qoNi9gljPxEh9U0aZTIMEWk+4jJGjgRtEImiwdiEnCbxTj2YfRTYOQ7HlkO+OVQk5fY6/5l/gwSmgecqP0bqRNjC2NudN1ibM3eS3Jfu6U3XKDupY3dJDXIJhuWynzaqAGt5jXd6/AKZ5pgoQmX99dAbHc9S+vwt3t6/7oIuZfRSSdzBb8PAVkZtjxR1eMPoeITj0G1SREaNJxDF/n1+FZG+SOEhB0gmi+urmfsra3CbN4vs6RNCTE68xhHvtTPILPxyglwdvhLu2Jme3nzd5C9dH/UnTTvRvsRK/4pG+1TZwFYD2dpMMuIRoLdPJNBj273DxLfRSIS4iGcPQQK5onmx4Nh7cnlG2un2+pKef+dc78flRavPIPBA2y6Yxqn1fSqvib0YIl/7AKpWM3P2s/xRrFE+CkUK4Q22PoTTN1LUMQR9A980hbtYHYJKjdZ8Qjf+KDJiYbPgl/sfu14fD8+hTu5iPsxjfM11A7A1PpyapYFjxQUkN5IeWj5GkEfm5zfTysuCZbYH9aljgc9ysLvzeI7qlW5VTfs4dHm9ldP2m6Tvc3iJGSMoJwn2e1/PESPtw191FSzqZfCFj0jwMkZgv5XG21IGjOR9HzZJpMmewxrvBsqSxmMqDgf7N8heowbAInWPzbjhp5p/CRFy5CSrVv9k4tIgdGaQRJJv5JIcEytygdHC6UpT8pbK7B/vk9AQqQkh7/0xreaL7G6T3XjRjtZsorRPEpW46Lyl8lj/E1RGyo00vnxyjeAKBXnTTL7Hawkr1zJBVXJKg5aXvNJ41d+dJNBUey8IcqICW7m4yHDQmFF5hV4AknfyPCw0VL6dTW9GgbfFN1vjiwkWxVaBHGTzMWiHSIfGGVkuqkKIJSB97IyQCMGZJF0aPRsxB6t1Iwfc7A/uVnZt+q5HwuNnRE2d97bVjlcI5F3WWPtS5JXPNHklwCDEwOZKZ0JMKPxImH3ZWOTJensHEgWTIFM8y4qQFwuFqzwKdf4/W4w2v9qasbxTN16g7IrgkEIccPDj9ST9IeTTnvv3og5o344Nq1P98GDlPjxWjLoEEsZ5gkq0JJ2ykaqyZwlYyW6YbSw8Z73WZlEmWwI20t9syNEb0bvG+dbfL/kYpZJI2pHzZ++Bt8338nU7MZjDbadpU7wuPGXaC0Dqo9ypTGPFoLe5K+sUGRL7mnyBt0Vn8n1/DtNsrM41O8p/K9ipemakD9Uup5QeRLIgITw37J5f+1rGAHGhR4ChUzgmbEoYGzYmTREgP0SJGJt3nzqEnSVrUq7ge0EXPrddORH4oZLvdpG2BDMR+Wx3S1/rnT0dWD7M+9lkLXMTq3EIG8VkWdMj6mRb5cos7UYWffzG2En8WabmaRdx2dl6HM6yFk7yBu3QrlLydRAzfis0DWIwTiuOwUncX7J/sahDVrD+q+iyAZtxdot8JRFQabfQWCnkPzmkG5WBnhZgdXHTKTP57UkjMuR6Ox7xtFFg33KmRwwAwI1F+V1Sz6dB2UJOj6dbBNGE5a2HXXkea+E2Ma7hWq/LfZVJ9aCZvmcEeqjHngB3yRits7mPjaMLNm9BXBdSWSXbIMAzv0QDQniUnh8VWVgzKF+eCubQJH9wmuUJ6iz2Mjk64azY2vLLR1mBLGrvIqP9SdZ3zCU1Zi7S7lOTe7Ocd7/b0X78CNLi75DD2ixmGFyxpA2Gb/pWJDIbPLGJz3xCwiquyhly6UVkoqumRWmiSA7o2/+x2KIEMwWZHyV1dHQe5QLeIHVYCi+rt3aCSd0Mm4cMT3VzVf+deGE3/pnsHd+RrMebWzFmJNTma30pcPJ+oZ+cB/LBNx0lmwxmEkeMamvVRG1Lz3Yi03Pwta03vcTJxNEdmYi8cg1zxL88yyTdd0KHIIAnWK0jWokbx0LWO/5ElxYuDPiBxAE71Nya5RQ06MlGJJbAibbM8EaruLvYH+K6rW8hCKKRV2fyJ4+gTtlMnmS4BjsGl80q5DBiixgOQhQgd/wBPAjmQOQmMjy1fVVMng9ro6/Gbk1YDBGN0RkTPNC/huBsXnNozngz+zm2QH8sU0oylD27/oGnkKNZb+DYdrlAFlKfH88GDa+++KLSIJsXtoj3T3DnRO87GwJb4NJEK/+W7G6c1VXLc4eHRtkqHCPeyyyVGRCkwI2IDRi4solIBcsnXEngvq1aWsgTRj5f1PWxGbaL0VhUhY05C6XZkxzdVSUDKmpJfY7TwQOD/Lq6iuCe2u+vaWHtqmb9eHSr9f2+2nOOti4ifbltj1cvc554YFUVfSqzs303djP+2S4HqdTCOsrxuc1+qMnDL6asIVXn766HcuQ6IWf0NVqFMBJuXQj9WQqDG+7V3EQXVLoQ+PIGXB6G6vfDl1UG8jAsIS1jWOQII/O9rSFx6qKi5YPo38FiiSNoyRQ2XhLy2dsNHqNLDtfwW43r7Tcn7zD2tjOcnhghvIkywaeKuYi1WueXWofrMGY1eclvNOVlO/3wcqn0KALdp0LNFvKv3MseJYsN2cvkNJZ+Tzf+m8CZSkHpbZyhDo2EP38u4zkZNOCrf7op/niTJeYkDpN4wopS8dLpN8nrC9F4wxdL6dPCrbj6M7L7fCLF0J+89BJLIThsHRpfKVcfdUFKUuN1IDZdlPtCfZ5kKllSltNlM1o/k5E0eMBZzJyqTK+qbOlG1I+TY/2+6jFokU8TzW52BrNpWe+5S7f3uhlYlWEe0yj1UwGvIKwuZyHJSFYyR6/p5I0WkT/RlN/I5u5ZE939y+HJLhgXvUt9HXsRZ5KgKxkCidrrqbIWvmXIsReFT2eu7oEpqVb8xlQzsHXZ54kn4tvLy6czYwCCGa+9Ht9XLirZ8LtrhMiKIq2xbbPf4cIQGXzO08Maun/03RdS44ju/Jr7js9xUd6it67N3ojek9+/Sn27J3Yjo2ZbqnFKiCRCaBQP/bG0bJS7QPf+IfVHXZPg2LIR1h3kZFN2YlAeo6BGN4MUOw9Ky3A0LepzLmWtdQmZezgS4DY1yoidjHz6aRkyC8Skn0yEtaQgtWcLYXDLQdxLQzhFl++kwWL5lFOblvZ65PxsKSGlHzDvhqbp0XeaogyFjZQvjbfatLzq2NbJhDe1QLZ6asUNZDPD3AQHAtpYoB+UIvEVPsGIHOADi8TISzLdfQZpbgZO1N3zZCvyCjayWBT9xI3v/LR3a+9IR8Oycaxkf/5U2jJK4I7xAj+jlyOGMAHJ6c9XFQiKiQsHHj3HRBrwLt2TtcbmvzkbvZus+rWxY1ENHo2ctYaWCMFYoRC7zmabPjMORQqUz6ltJgicncRUnQSjLzb946ID+Jhff8rnmPD/2mUlw28/caplNyaZuiyCdi/JuUP/eR0A5W2Pl5ILPMbS2wjg6RxAHFgH0SkRfONLST/JOWoFGGngETyb0oo8rbYSHPCHP9iA16JFKpfKw3lNPNm3C955A/6mCt8/i4mlCUDTHbfhOcEt0bUGVFOxGhngChW95KbD4vwqQ3TSKsWTcRsgDaPPLwISQUvouKMRcQh6Ffc6tHQE3HOdU6JE9pPEjaJdA0Xzan2+J/imBr4mQQ18R1/Z4SSuTcUsQ5TH0s4OLEZYeVqK+UxIueG82a0EHucHS1EYdMAS+NwP2qoetdJ3fEp9jHF6FK7Rl2CjsxjVU5cjPQtc6rpviCuM59pztMInlaergnBxb5PkrG99UU0qU+vNzXVVHTrlYj61yWnj0f1dz7KzD/nh8Z8RE2JioD1k4W+CKAp2QzbAI3lbfeyVP69XwpsPvRWOxBW8/TL/OH7pwmcs7sHnwYP6Y3oHngtcc2qCliZs0U2XsOVt+JLjA3rU0fdSe24iL1XXklOt2cqbWIQ5N7KHQTwX9Nl+siR1Fh6wgj004rm7dd/F5h/Usszjozf5inx5G07SY/aR0J3ZUIULay7qVm4BC8YUEul7kTidZ/3bfpJCyghdjGg9t+lswHnn+Dl+pIg8wgnOCCSlZnUncIpkOcJh4j6UxIBdOl1p1zS5jkOZI23HvZOMclE4HYy6jc3knSDUamBDp+QxS0uHfNUpmAhpy+e+RRsXG3PYcryxGnpsaPkv+zhaSBxz/94QER7DTrtSoPHbVXBcq0VoLw08kTnKgXjt29oBzdpeEUZuDEAj82G7jO/fCthERG1K+7KJIR41R+i1Za+tv7vpluge21Nur74O6zeQMNmFkl65wNtjHf2XdstyXzrm0FRv/9Ki9iDSDlUzaLpL83INi+8eSH7TIuiBYp2+qIPifCMxD0L/D6B9VRcSz7Xta5P+MCAsR3D5qwfGnrTKNfYsDa2U1PSneWjSiTdOZL23ZR+Svxq7apAxjtOU8dD+EzZ37zXntoDzLjN/eqdL9N/fhp1FaKou0mc1G+ePHsUT9gcpEKDeZwzLtg+298l6JMlXi7wzUNhalSJjwACocrC7NPQO0+jIkEXPJPMm81iUUCiqgcwUnErXX1EoKT/iUx6Oc6r4YREQAL+py/AFnSwXj5RN8k8MtvSBQSj2PDfnWLfqEwrUWt7tAhzUgL8XCE1ivRXn1u4aLm7uO3Q8XNExd3pbli3EaKOIn2J/LOaGYZX+KDgUZqRaOslH7sAK8Fob4ZeuCBUf0R3hV2kMBnxFrPs1qRK2P5uVKmnqgBGIVQEhSUGyU4oM8aPxcfGyRO/9WDkme2xPP5EsgGiK8BOhrEIatk5MjAeZ5GuWQpu8DVuGpQIp0/o7bzWDir/jE3e3jEu2xyp29LzgTWSJxpCiqQP1VlE769H8/shnmVCtaHsxU7Plsw3tE0AbIeAtfiD0OvsK9ktMJ862qsdTQcsZW4pKbLssU15UfEQr1bqt8vT222vpdQ/RwD//V0oR6JMLOOGrtPsl6gQ+Hv17P38+4HOsNoVJf6Ye2Zsb9+YGDQJYLclYPU/nSkhaBPomUaW0UgH+EjETFxvWI2BPjclT/bdhBJG0ahZtwgDep+WMX8SiRY3o9J/Bp1Dhl6WQ9Th5iOeqhbnfeOj/rKxWfCiZc+GxZBBUBpxbz1HyCnqA1Rmhm7U78mO8EDN20lMVMRdMX2Q88NBmgwnv0QVYuxovyyy0AgfjEzK3gabbCGQGbj+ENB7vg+EjpaBuCOs+JyzQvAl5YZl/c4b+eb91jjkHG4JT+jHq0yn5DjWKBV1NO1TEK/CkHkaRHkpwRE9zNil+T2xabACNZfelAOJiz0JsWamzQa2qqDhvgBIAoe+siujHny2LvGZbsZhOkOCSVLO915eRlax5KGj3MzOQlCUFFXzsHrHb0h6S8ynvQ5PeAULindfEPMk7IvvhCpk0jOxa6zfyzQX+GMQ1cn3x4Y2RWygLAG4WOOdboXmqW1ZOngEdj7R8mN52MevluTwKeeyclmEeGyqhnjDG9aFVd2H2tRCdzgz8JbIq5RNebY7XTgDcV+iRt4grQLvqpz4uKwrUZXBVFJaNuEr2MaYggOurd6buBmsEDeSQ+VIPMpLofcKsMc34afb1/pWINAlgadq/gCIF5KWJmNFDK6r6gaUOxLg9Z2/r8NruHAbBpztGkt6xysjSm0ZKj8CitsflyEekaDWb++39FfJR+4kWo6SQ/4GWM22UZ0sUzoZ8sx5bBQbopj7pro/rRQ5Hv4XE2KDk7zPFh1WPqJgVTboFNNKHrWtgZnzVasxlHA/SEpGnQxis5rrad6jKRUpkUZ6FgN+IeF69JuISNjKRAf/ADsK3VdcHHrsltKbAgmH62gFaWRbPch7+rE5o0oAp3vDCOIYyJbcFG6iFqA60QSYw5fccsIRuBerzWs6W7za4KXi0kBuoH2DjCkb+na7dgoHf95HfQLlugbE3MSGFppbFBs4AG5OMr82LWqMeich/PwjFbZ7o4od018C54l6+yV4yVI+OaNeaB1pm4/q80jqKT/0R3y3zfetqsTbm29ZybMJEbKXL0BUW3KHVnXdjMU3FEAXnQBZ3Ug2MwB5J4j7P1GdHSthajloOaCpCdy74BuSFu0nLBpeu2+2EoPZxamMDevZxRX9asYqBhLHx3KwnCHRLwgNtwZx/JNgeUlnbuLbnOSWwvdAFF3x4h/P6b4YMoVZGFcLN017WNOcD2XlVhg29G53ApXcQSBktCLFs2pQAXYyVOUg0mhSz7PYE9s7Y5ZJZ8BMe2xzaKvrXtLIhC0mGkPEU4wiIT/Ph7+OyGG0EUNbhXzKbBjZRKJq5O299xhT53aBLgPaBfbAsf3Jpv3GBFRC6zPV/2o4HTEmEaaC7utpvcn6N9P9qUZdMknf6daqGl+xXepFvZtF7tElhIQ14iS+qgqU40LuZF/p2+5/pYVv0n+ZdSAYmlDOJPxyzT+WGWT5UwBVbYR3yP+g98feWhLqalHbdRu1NEb6OYNEnfYnaHmc0bpRhJet2KbB+6ZWuiJZKA5Tl7sEBSnIsurbm8UnJwZ+W3TcHQfUr6NdS05+OmklWMFcEKn8pG19DcSLVVp/m41Z40V+5shEa4WkrYaVCY6Rl/nlVjMufuZx4qQrcwUnPL95mJhwyH1V9ji5pzSh50rkowIWhMV2P4+tLcHbFA23H7JC1LqqluY6JM6jyhX1XRN1Dsrk2iaKBlmeZGuUgaJGF6pjeIdFqpRyd5Do6HsUktwQkkLL9q0CRlaRSFv7d7Lmk8e/yMiUZmNPCJGoh2Xk1UzbtaHrNUaq/E0hue6IcG+AXAVE4cWbM7+pkrvntpo/YkNEVo3tlH94SNGrexdNlxY2HxfpFvc2OtgKLmyJCdbwz2PRen7HYiCM6Nq0+SanTKA2T/+7OtgzGtTvJra+ysMO32ahR8pQyJEBOxkPNeLeSgalVLSbbHUkHY9NXCColR6/S0ENhFircp3nEtUd4HroZ2qjcGRnVQQv8jyc3NttC2/ySQ9qwbNYaHFoW2+hHy+nXvwVytvSCsOBGHd/tA1YR2UxxvLmiB0sSHvtybAOkWGd2m8BywlqNAv5Dm5ZpLjV3cuBX5HYSA/x+pSo+L37XQuNgDZB9O2uXe/e+QtP/L3O1MrXfsqTJ6SZKezOaUVBVDD9yUJOAMIfqEwOmDUgXsoHSVZHmCNL2QaEXL0g+pDOgHo0HsnrFvIacrzsu4c8oxpPeZXEEUQsY0a2mQa8pKmB0RID0uy5SPmRAMFXMtVNg29hhNczxqWFbyyBpyfaNneBnrL5jNGLdTUfcYvcbiyd2A2t2E3dyX1c1epbC8DmG5u3nx1ObJTj+teGlHWw3q15pnEZeZrq1hs/D7itDkxnAtRGAqp8du1Xh++QJSFCRSK9aYxc9PsE4CNH298UYEFlOa+JXSfGHrki0JjEJBbb7iTRUB0RrWc/SCkebP2q4s6SFK6XIH4DTAAdIJbQm8kicLbCq+wBXmpsBa5LYpNSkdgDcvNKCcBX9b0CcDVDf30Vc5an+CEp2V+D6chwXpx+i58ozbRzRmlNLZVUBUU543olQiRzvR1K3EOcc8IFqydcm4gue7b7Zwq0eLwDS9cVZvHFFxW3ZdK4BJBpB/ZFZKEAgzjwUSiMTAhC//mR/R1dEHRNVPEAs8eNj4c41J3roY7CC+vkQbxoW405Wejs9nuUC7b8hG3iXdDkSvvG8PFxWs2lL83rMnXLSxkfnwxNKGqblhKNAVVk9B/1cw6qwgWEfv4uNiOQNrDaI8ymyL+T+qAGH3CrhrM9gliAUjORN7GGIRpJF3gWL/kCPvIMtC2zWfu8oeA5/b+rdI8DrlJofBNuIMRX1EQwwby9JW6kFqb4HXdXTXsI/k1jRz4htlHIzM974kOi+px7Y3DzQRdSCLOeGU9ya9p9Eb28J+Al0IaB4GTqg35/w8/4y9wAjEhnXzNfUUim3MET67aNmblTUGHIqPGQJIK8+yx0fWs8RtdpTHNjZw8ZkZFcFjlsCMHpL/m608se4U333AZaZxF24Txkq2vMUcDsvm/jAURUw4588StLmdxcn0qfIrUYe460v9L79DVzJYgefzUjIzNkk/PXsoEmRaXkgikFrUhURHt03cqjeMdvutHeplc8Dn/DEip6w5Mpt+CRQEYhbZAFKcr0YBQosfyRR/O1zTZGfNqIaA1PEeaQ62aSUy2kzcRNrhCoaLoRtlXkgd+aOhPO+k6a6nW1lY1q3u+7/U3wowym/9n6x8Ocai9n22jlJYhLHlDo5BFtJFq8KdKuz45+yUmKOlj6oFXf6PCO4sx4rA9OFUn6siACsKwIvS59zM4IHk+xeBVTwQGCZ0hv2Y0aQQhoJiql+20+hjlp3hVJfc6DhdwSfQ8HfJj7Uu4p6q/zebJCy7OAOACxMmSuMqssSQuBz/Z9Y89WdZZDppuhIVG3BWZLAKtkE+DdRiJs1m/UvdliTbhPTrc0IA+oihxLtP29h4JkXhJzMs90MmTB6ANjww5mXAb89k+mvrMja9l2iw80JUasEV8Fj8Bfxb1pOZSzlInImY3vUc9933MVQvNmd2PFuhH6LvNt7GcDHkJe7IHSq469tAFBhh9lzJPb3QSMS7onp/tl3rfx3/ltpo3lj8vA79GIPDmLYy4Qb1RAvPJPihkWSjYamL9ndKridKnIiPtF5rZhVmSs2kUgG1wlQGyaKlshCGCMINzhSRbkbcnUAIBLgXSk/fnZ0oZkZAUPUrylXiKgA/nB/46GHO7u7ikacUfKue5Kz72OfjI4hWyFYPFQJCMpIt4Kt18LkFgRAqH2dECrMWZl8FoX+9JdwZsPe2m+w0W6IiKOTc1HIQCoDZAxR9ahSar8Bwl66PG+Z25Y8/KFjUiMUZxHrriCQdQwTzf0URQx3RLhqWd21TKgvB6+p0Bab+0nj1DGyZ9UbFWRMDIJmIYhGcZSnHxsIasHwOeLeBMQNA5YKAdLqcnJbDKLzDUYwfY/O3MTIYHezR4r53umBkFfjUAc2Jm2sHxyaoJbEQYBoXyxjOvpoxQqlr6OAKediYzaRjBTxp3vl8s5V3ZeM3wHY+iuRtEFNGm1Y5fpWgQ4afT06zSkE2+0QgXdLfO02foTJGrRCLRkCnkTaioTNgH9O+fnHlHyV4e/Zu3p0PoAEP139yyZ3BymftL++xAbJZWrhlDRyWZlsNczFDu++znnfmoea9qDqo1/WkpD8Qi+vjKSWi9LBYoBBpjWp2PdnZ+UvtunOF0gLZ+TDFCjzc5pjNPiwEn9mmMPNx7N9qcP/C6QH3oau5ln4UasXWn13B5sCgTo4EHbhdI2scajSmpAkIY4oAnvSAi7Nte/aROrY2QE9MXgEVh37y5hlsAhKg/mZHUbY6CxLP6exPgEU4vJiYm+UMIFpxTaFUOZldtgQrK7zjdn0hjuW8ycZLkWmOrjW2ZKy9uTNM3MM55s0vCS53QqZbQx9JFxJItuBWX0jipcUwbfywg+BFg52R6pgH1++D38BZQRxxCzD3+T1XJcQ+gRmt9pTIxFF28BrmnuoyfIGV2FaiWGvzHG1DgmffGZ4TkXBrdXA2J2NEH7wGOEDhuaHznbGtPIX4Y7QBjkXq/O+KuUpE/Xr1v7k5DuesehmwdQ5W8KlrSoLUzeDo7HomYR0jnXQpCqADpDuiwMoVOk8kGsctThUT1cnxNSjz+aa4dEZBdsHXPUwwsWj3xhKv6tZjCPDhDXv57pu4+HJ9qVLBnQXUkl9/IInSMSS88iSfoA/5UiKHTpDGXPw+zqSMT79acQt4oJZUMZbPHPZBJq+N5+0ypW3XzCpB/cINPZNI/251C9eDVJbH5EnEMi9S+3+l7fsBsy/hvYN6MoXZ833KDIfj1bG6IDNyLrGQQoSRh9LqyAD+XbFWmpDhHR91DWAgL7E4hG7KVFOVU78WV7oBFpvfZQnyk53eMQgjxPJ6mE2l4TNGTsaQrXJqWjq4gMct1RvbuuGPBxW37rL/BvW9+IY8JleD59It0TBIiE+mSSPKzYeGzx1PaypMjhn360v9q0VRAcxKmNGqSMX/17VOOdtuS0tskP2N/JZ4NDypuzaW0belLPzcyYmk9Bi5BKL0BQu5a2EZGsq36KUagZIMBEcbyBKiWdFRj8+AThADFw/xKy6qZyhoO+b4l1a4PoaYtTkmwJ3lxYsxAli4XyxdhpHteyT2RpgfVc+ASJgW0LiLlAk1p0jFYPbrPmghU5m8YQe+B3D1un8N4Jdx/8h0UqzIBXZA1y8aoi8Yld/7AlBvWncjepD9IR71XFArWSb7aVHZf2KC9od/rAyWYPrMIK/7RWg7tw9gsgDRuNV/Vja6pHTOnUPJoTjSLSOKJadCxRlkYauRC7ihHsN2yH2dwe1xQvrOk8ckrGx8IiUVthCT2CgnSCrob+bprCMfxejg2dOyD4lnx3yKWcfQK/i1gkXgqAvjwsQAd0RJNPE1pJfuhZThJcGsEvjLT6D2gfg796TSTeedJOnIYfsa997nEaIuNCyIcB79KPFLY0SFcpOxu1gEmjlQ0wTErVbt8Bm68AOWfJuM6kxKh97QaoudmVUhW+ah2x9kl3SGZcCwS+luirHWaWDNFmfOLpAg7Zl6tI/U/7u9TFPb9+QlNtZrzegE2rRJgDc0tj7GrvoTl2Ed/TGgKD5v0uFk6qpDtifMTfL+ZhdLvr326Qym4hd/RyxrU1iH2RcBbe7eDO7qmGFzSPwoh6Z9ndRHfQhOVsqPrYHZr7fhPoeXy0vaMZxjtxzr4bkmjLr/gTtycTxKhECSZ3B9NSbMrh+KvFlESCav3w3TFhkcJj1gJE9jhmldOVb/PtUWxQk4e3fKYRDuEi7dmw4fudBuu6IyS13Q8JiPRlznkw6yAiFq/Oigg9oVKHsqBM3d/h5sRYYG8W4UP/0hXituRtJVtnPTVe3dcUX3yutds6Ph8gOXCk6N+beJnEaM3PHkTle9Jyzzr4bViHKHK/GP21rx7Vf7dEPfGKlu21vW41Mu+RV2CkT1ntelUsszj+Hcts/0YBA/FtDCEQnBAxBe/w+b5dn1OL334mk89D/fOIGSZ2kUyceURvl0t3G8yuIkxdtIFgNv3XScd7DxaNfd2H1Y9Q26Hy3jnIizyKPGIy3cZure9XDbQx7KV0o4K34k26EfJR+toc4EUBXMxjeOYAv2WIOIisda7uXEjxHlk7jYd++p7ce+fK8R9hGp8NfOQbb4mJ8qViQnfTgFsMYDiykJPRfrWVeiIG8NSFuo+DeQ6sT4MCBBwPukn1PfSiIVYr6L5gdyYyt3RvJ+MW8srcGUbfFfc3lQkdixMPwbYCYe6NAs8uuDnS6Kez598Jjj4OpgAKnvJV8bupH+VmEhCX3SDI34YiQZhmmJ68Pkek23jTEfNG+naFPLwYHdvYGEgbM45Ws+0JvC2tpiklPmsqdKuZftl0rhEiSUuyD87k+2L5QAZadb/9pw45X790sYOAq3Xd3Tw15IZdbDdV+ODXdtVe2BAtMNKRXBArqmCKOPCtw6wb/bBrakmXrO6kvfQlDvN/lxElcV1tIewcy0wwab6+WFTE0rUPN55tpX7D6ufaCrIoPZGu61UncSPAvNZ0fiM9rf1zGWkIyIA8fa1Q8xBJSbN+smfuR9orP/V/g/u62p7ZvhE8xF2zaxX5W0KUud2on7yFkjIKYfNOP+STYeBiHTqkfkpDpdL9DjABk40NE3AhoDAZSbDoBI50y8xy3Uw5iZPcQ5buy43c8MjVeUfFlDFyDw9n1H7shBr3rTO+kvF6iJ9H6YXwy1yirakHsC7uYbIf7Biewguh+C+WmukYVFfypyTU1/arU0VFwBdcconFJs0PUlL5IJWkhf2Hey45YaCsiideYVS/YJ7IT9CNiWYRcZWkeYrn4on8xKYnenbfUGt8JqOCFgLscovnMeYIDtQGimFDdiDeNh4PZtYzZ3D69pMiZaabU4S1ttvkiysbDKH57+eS83T9nqyH9Lv+HMQQA237a5x0xk/oAyO13jvS/AKJUu6BXgkO500xC2Vw0gnvZdZC+E+EaYjeCh7mMvF9Qs/CwtYNk8IlirmJoUvtC13qnA1crjC3WML6V3+gu8qe7xn66+TKmU67GUXu86gaicvX09fHI4Lrqx5esdqBIsF6h7pkZ/nLnE32aiZMi8C0a4MI7+9n9KzYo0AUEZ/xhxjMkCGvN9u/cCm434zm96rXYRaPLBluYfUMM5KQy6Sqq/SLywcSsAOnIrnOA/b6MTnqh5LOan9CPBavfXme/8qY6iy+iRYaWsqPJHomnNDxrZcJEFKWMEsL01FwWQO3KZZwoLv6GOBD9lLTp4FHnAZdDnc8JUQAQmH3sy5rmt+zXM0cc5Jc9W9TH+V10WRUTu3lqU/xSLgODscF79nXVLfw5q8Nztrh+2yBttT8RaQ6Hsh7P188llHDD3+pseRn9fXDKyIycMtQOw5j7ogbJCzxf6QC71XFxSntoUxOUDOzn4lKoU3++zzft0COrze6d6M6D+rIvWdIyf7MpoLXjsPzuGU44rMbbC7PvD5RLmpDasAjkhHZZuLu6FFQnNT1fsOiyYejekrSo5musKFK/L0ZP2LA160313MzwRJJSbeKNqsPkXBq3o9OOZlH7a+nbkIzGuXwfvaMg8V5EBY5/VYOk4aN6IunFLcFo1seYsaEWCX6qMRz8hztKCQWvTmIWX8FZMev7It9aFQYv5nN4BmZwTpQUPudKtMK/hfzBkkLI+w9lhIgRinplYrpNIBH1EPRcvGgd4gDgqdj+9HfekSXkG6qtK4DtVMy6dcR9s5nBn+GvfrmqcN7/Moh39NEP6lvYWuttxi/Jr3JIBMV4XBSAcrEqDHDZlxfA5NEyETCijP+u4yMXVyrSFotfTOnMC3pDpCv6MFDfYa0lQlzwBoTXhWrAvAlBh/VmJJlIEohOgvnx3GL0HA6dyO/LCkWzBvZGdMTtd/oPjrNlXbPYYRRBS7ceBDqEx2U4Z2iU5qwNqHTe28rUV9ROYSe+IU7nT5SgQdrqnZ9Y8LNDq0j68ysj10UeypFcsmCV2P3TvT4+Esd6nVs5iEmgjjwEH3SBEKVQSkEbSlSr4X8d06nGZw5DvXUxlc7FEAs+8B6ym3tkC+fgAFrn95wLL/RY9bPtrmduUjq2llyX9ESOTW7h/w79luPij9ryY5bJKAWOJULYxqQKq//LjSvy7SHf3tBakdEjMLHUYqbDaiIieAWokD8Igc5RM8Bf8zHeXsXIMbkggpYWQipC4X/Skm515kYQrnAuSJCKntNiydAtZTZStjFxGemPzmc9CSxO8He1UlZ7pHhzUKGbEm9ptIW2YEJVHPOhMI5jHaC2Pd33X6AnHpKynoz58CBx26Ox62n8Dc96iIgf18C+NW98pyc07x/Snnfxr2L1F2ijSu7XL4F/m5AsbKKUMmmEqVFDcvbHH4iAOqLp5y4bXG4crD/OCNDxFKC5KQMzTQ5XBm96FX3dVt/D41sssg/P9zfE0KBbZD58g5O5LRh7npLnyq4Qo1m2N6GiT3ikUVjfuzliCkLfP1QP/HqS17C/7659nePNNUgkDsrHpeqwHUPPIdlVKsWAw/vxzuMbhjROQF+gA/TnLyNE7BXop5Srxp+CAxLFlym6FVSGcm0OAHYEDOSf7wHIrxQifBnEI4HRAQoIirfpJMhYRZ+LbKqXR1Se7yKVOfHAs9FrQ6uoTl/Sf5geEYH/gGyDDKIj4AJ9CyNdpTileNw0w8ZUCMIzWtBD//fqPKKVThCe2ELF+KC+e8kkX+RgK9xsU9eAfCbumGhrjKdemaJiHbqvrrFDmStKlOJYl5qfHI+QQ59d4PDg44eXiKjTvVHkJ9bvkKPXxq45rGt97dndCM7ereKl0Vso3M7+9vZoDQNrpWGUXuTrosrDQ7y3RetaZ8j1eF+i9EhR7J2dL7t8iwxhgpdIF3dImFMlL3xgkDzwUmXN/WSO+6bRwlzyiTMIjFw3VPjPA3mhPr9MfhcqBmDQdzk5fkjdqGN3EOdIgbMQsM5H3J+ypjSB2zCGIzoE320vHKMFoQQG9LWMPiybSZI1qsvhMv5WhDFdm91F91VDyDi7lV1rAP/29bxjLYnquEZgmk3HmC4wtT7FgwzWi9ZGL40dvx2UltKDyYVh4Cxy2y2dMlHQufAgoq3NCDXNMeY7mkEWx6q/h5JibRiNAM2kH7m9Z1BDJufhS5nbr05aSZf4wzOvulteDOWuZ92ILHegkp/8bk7za9kiaqbPA6hvSA5la2bV7WENDW1Me/ZVhLdsMwKNx2TDMl+30UmmIi0rgijzpfgG3JK/OOzoGFrv8XKsbsZMqEB4HRrI3xNhB895Lu0p5sKNJnISu1G4k4jUq+axreiWshcpWZr4avC6pcRsYWJ4YG7kuN2vRAem3YLgiZQudy4MS6wZ8qt0LsdJ8QhM+MV6ToGSP6Sq6/5hlypFlXdlP5N7OW3aAyP+soxmuJb9Hr0Zye+hxrlRut/zLfMIX+7X74kSU910/kLtMWOuwsqPofew5aD626C0elIsARQEJPEvrrRi6bFzrUfxKXo7yWCMN1AKJ+zjSdpSj2/Wd6PrBWz1NP4T9lyOVIWeE/RyvXkX1AL764F2Mz1r50wXAaU2EqYymDDhDMnaVwaPw+1w62MlpyMwyneSWkqcghTbLZv3yHETBpdTAJzgMcCIIffksCnurile7q0FFF/SK3gmcKiLTv4EFWKXCl4eIQxiQ9VPNk8J2d0TB1fh8+PRbyWG8oIeN7HjGbjSHZaUHKTNVEspMIrGM30x7xulZtV3jrcrQzlYwkPsObkIk7D3aTT7QnAd0ot4J5pFp4RoEP3jY3ko2yFO9dZ0I/ec3LPIXQ8+1l/BbkSuqGDjyqrnJd2BB2vuvbMi7RBResUH8IiJJm0hjLgEbkTP/IauKXXNZD490GO0qXI2XMaOt+Tvluin9cwKPyMqy6/HzUrs5VTZ2zNDPZJ+mpaK5RWfWunQ4BDN1Fzszs/IlXI8ay/WSt6JZAxVyjcqUL8FNO/ExMfUjqLsv0a6XdwvC/VKhisWthOZfLKZh6Nbwc0M74PHlnD+IyQBVvun+ezRGHM76kO6Mm6+Evwcmhf+0lmTiR9Rl6PWz9V+kXzRi8d1wKRmV7LF6YaPaC+QgL/3C0F9goT6PIyr6Pb0Q+VNB+sS9N7DDudhJ30HZDRv2pkz5maKVi1qz2cLIsQuQba3k3mGmqN1zRRC87o7Vr/pHDw8ccEb4SECJ/R+ftcPFZEusd9Y1evj+YWNCfpsaSYmxjp5jdpNI1hsmkerETjrHUezHYfeleE+BaKkE6vas4RvFI3zLzH730XvBUkO7QTMXkliz12MdXeEobur/qJgGkDt3m1096J6Pj913rTBeiLvpc+kcLf1bQ0cHj7L6lZhBkG4KktjriC3ic2vrbZ3/AobloCIiYIVSTa6ZdGHJytv/VoaUj+KiBS8P5Ga4Rubv1iLKfTQNW9ruep9Nxi0oFWAb68xv9Xz8rDv1r2W8/LpplDuyxYrqRosgvEpm/uZrrk3K2myIBVU4BVN0zmXlOu5FK4ief+nGdXo34ru3r1JhXTg829DeCQXn5vDxCQtF5XfKfuOgPQWGBctm6tJyAqZ130ry0Qg6ZpW4QAxfHdBRDZtCAoKL63gw5adf/ocfZi+RriMQ2dPcRxdblY3QEozk42Y/PLaudABwuzPaNGN/BRp3wj9XhV/ZCdRjr5QaKO/CWz6hZWb0Iy1O9JbJGhRs22xKHpxsSrAbeiN4YXy7HZ9+8J2qqwLuAMM17sc7CV7Owcjjz8Q2akpcHHDRBqTOW38VLIvqHCy+FGb0xvwfbWHKE8JNRlt3lCrD0xKjgmUWtZjsk7Lbf5HpmITGt0tu8Rsk0/P8Sh2wu5vXGL+RxbKjLqlUAd+/lq/V8GkkWFlc9c27YZSfgk/RgBZfm40I+W0z1fbT+glw/lBk6fsx1/h5576u72Ww22DC35itQNjdkPxZrmR7t0SWD7SpU5To3/Cmob2hC51OwgzpBWhlytL6TM21izrS3bN1zMmDwa/ZV7GIaHK01JVKiCD8cjilSRGuRLvpN3328np92e+dtvycJjcrzJWu3pxZKGx4kPDzFlYlgcfWYEVCPHcBZi+2eq62mr/b3luPA46O8g9ZOEgUCSB6eSVBnlsXvK+jxBE/LPmG4nAXgjOHtnPPuO6v31JrjekHRC8MGhSxakwp2PnjveWgkYtZKr+puZRI3I5cqn/jVRGkru42KWeBHgGSICz/AAxTXiFmKohlubK31AHl+/id6+fRJll3Tpc1O34r/rUzcgqTzuLxVKT5dIvwXp5vXTcbDaHq21S5hfiqcGrGhgFFAKbGq0DWwp4vthxf33DghWBr+O4WQlZDxkiqAOl2fEKDVH458LvGpWMyQSp3UST/L8wQeRZoiH+8a9YbqxGNn+fj+RvlwK1V1zJH2xkLAEr5Rd/Q2QeRHFf6crqHQyHcZDil4XztJhogsOEbny9v1nCOM2vkJ4Hw0i3qwHcx4ALKaJEbYFtef3waoTDTcgOIPrwi4XumXNqq/s5/DScHNVVMcTeOv+21E546s17RFacuGEkVpRRnEW18zZkj+p/GZ8zwI+n6//QWgT0DlEkUXdv6vYBZGgeGlvw3k7JE9fHS6hSmgLrvTeCsDMETGXjpdL0X9pwfdoL5l8+21JgpXE39Jg+ypIKVeS2CoI6CGHBBIDcse33KniAf0ElFMGALdf6qRZ1xUrILrH/DeMsfBnvVmvWIySmMbrO6OJ/KNO19+dvexnr8kGF+dvU3uWTD7TVC++Yd4e0uQ+EaRyK1ZtlFJJoX8eDjC8QAopSDSdofDMH5w1sreiK+wQu590PbER8hfz0VBoPxDjDdQ9ot2f66SaYG2dPqQiRYvk205KDUenzOOsNpyYxf829ICBhSpQg+jtnBR1XcfsZMqtzeJUo7UCdbH4B5DhF1hOP/m7MdH7xGRKqMjh5u/pzkKYbJYorMJlxbwpy4ojPzX1rov7UYTsCpyhRLBJ5YkkIX852EABcAksLHIBoNebGzHteGUCrFSY8s6qUkmmDKZGsKeqWpBzPDIq+HY+5hFio8Y8bNaB/qpFMKQx2GpCRn6Y/GEfnn4PGKeAVKH3IBAhiJ23Sw51L+t2gumxBmHyhDzfVcTj7nzk153KXA8w8DeSWUU5EiBSPqnsMYGg+Aj7txOQVv1+X4IrAIplXzmQRLyEfypvA8iYVLHWDv6wf3KiUpKEztfiV2BrjjNQTA3KEMAe2T9UGaE5UiokljxTk6wSeCCKEo4Ri7kDRX3yxM7kOvZQhf9tyf5CqFnguaUwLnH0QrygpS//BHZVPJIDpHax+57JtkAzDb53mLdz6rCMQab2SqrxNj7JGa6YTyE7jWEUXgS1LBLORf/aH4PUMzA3GjfqTQrXoqYqiunVUPlW+fdmkRlJ6W0sX5TqYm7ffmE06TPTu/mCcYBFqvA7lIFdiVEkSh9wHdh7a/iHREJ83as4O3zvtq06hqg6lZBebys/N9F5RG6gybDEDNUdbEQHSAxbP/i9pkhe9S5UVxlX7aPpUu9Gw2uSPVfQxioccJ3OoAxu23uy3w7H3v5lVDOXcm4atNOWaBsX8Aoj7VdK9FpvL4UHSzLBWw4UcMcdKUrnDYRpVlCWXBBmB9GWyLUEFA8LT1abb2XXNa236UAwNKgMxkPnRKdtu5lwQ8H2UEMCtBHupqBLys+ssShWNvwtU28M+KbWiaZ1bY+sx4pIgWXp5AEe0+k2PXRAga2+0/f35/N99t6mLqFQfQVOuR8JNAq5QxlGqHtq5C00oZ15Klea1FBM/rhvClD40+4skj8SZQ/Up4DLWH89Lp8C6LuW+TEZsbMwuykwceB36t+BeBxiXhzWAIdQfLQOA6Jz1TdvKkGnBj721/Jgl9NkmFUwavscVnoTYSXoe6q594RQn1cY3QE6vWeoKEO16eflJHpSt3SXIXRjswL/PHuqOpmZr5e1zE8p+7MyACMvKAw2r4+BnSZaJdM+339HLsQnpm7stOpUWs7ToDV/WqwDz+L4Gl+RnwHOnGcjtHix9WfMY+q+KSPkbC9iWURyvwQ8I4qQuHoktkgFzZf4M+eC0wGuWDNncP4uM/2rFFvmn+RIa1ur9TCEAdRvhhSF5JmQnGlWqZ+XPp6ZPzLWGBQX83fo5GtXJrX0dYKtvyfAsz08LjKgIoRff+Tzm/yUcz95jwzxapxfVd9EW6He+ByDxxDIdFAYZPtZMH3FnKl+vthcJwOq5kHeaYbhP15IjWqPZWH0LBdeVsjawbO0UzT3qk7DWkhS+o9CG9YLirKvjvrJ685lpT8HFfc3pxcv//fvBuvnT/FmGNwmz294X/k3yTuDve/cdCZBa2ZkY1Bi1fXLN0ct0sdTmPl8Yy+R/Wa0AtaHr/2kwXMmgl/2fhbv7VfM55Uqs0oSEoEt+3gMlQjbpfGV1DrH6rc/gSbWe9zP842ZHKRsFcHyUfdlHvpzz7Srr5qusRgUQeIyCNf7dsffJduSedOPJCMneFH9nnoS3qxYeIrJ6LNjzgB79dKqBjSIqPH+tPtHDJh8tpK8sVgEI+n253gVdvA6TWl6kvdBHWnQtfa7IrLiJa9ZLIw/l/y7LvdlbzvSHtg3jt27o4Qfe/6owbatdrLrH+nav/BvhtXqflsk73ftIslhtYQ+b5VISdi4Dd4RBvTTsjbtkAwPVHzex+eCIdT5vAs2UqgWzTTse75w1cQ72pCJztj3UViZZaS+po3jLJLvf3RQPRqpVkzIaQz0PbGR6fUoj9d9+jEDCpGV9WTay/5cMV61r1wtBPEQpGqax6rhJoTFr56S0DHb+eRL7bNZNKdsV93q5B/A7ISJmvVRjkGU6lvmCFmH7yQUhSC3/JQbxm6RRqMahh/YxxKr5Y9xdmhyk5a4rM+O+7QG64qergnEVCivxxMee4v/57qZfJQP3Ff5lOzEu49N9/p020rl/P5EAIPFnypm27i+2uYJiZMxmcWwDK5gGloQWKMsvT22hWD0jaQ+Jnz8ttWKTu4HNvEYD+F0ntW1GuO0Z58MUnfobmx7NpxTVdlp11R2xgxuYyeWdxkWAMb2SJ9OpW9w+G60oSmosmxt4J4NrFdySlO0k7sJZfEHAOX299Sq90zLlpovjS2SiIk1BSL5D+TsGS88tOjM4+IT+Me681V6su3tcWE2lAIyl1+1Tam5hFQe8d3jGbA7IOXv504rZsiVzwd3C7wsH36uOFaj830ljy/+jgUUHktPa3gOZ7pLzzhiIy5AZBjGg4jWP3da6wp6Yg/MCRcgBbEKkc7Br3Kg9UAXHnn9jPao5tWIoeIUj8DVeNd4xLvnYwaE+OfRLf03UQ3msB3FrGxetp+wcmcs6af5mAErp1GlkHOKgIHEFMbv1jXVlBYPEvcJbcu/Q46sbHmc7e8ilz8EqTw2Ayu7flkXGf8EiKxDT8mRx+GJ2zEqYYREl6w/ZrUi03ZzpmguF0T439O7orF0p39p8OPK955fR9mJpRxiNnqIvJFOJ8CBrHz8JRCpTzFQmmEs4sGaWORiTP2cUN2Lhw+6V+qszPO8HHwWGRryXsnBhFmi0wVs88ySNmt1LGZ8KwK8NjvLGn6Fqgw8FPeyzO+TTC+LGgyYDruEXN5mi+SNbMHGBn+9nTkBTRsUxe++ghCbBUHxl5XU7nLUH+yEFeKNtR/zRWOGHtD6kX9PSNkgCrM9TyX+W6iXR7afNxqeZ3K+WvqHUubOW98U4cSHZ+P43ENHLeXR2QASvV1wIR5pFbwq79GovM/gjXHfRHKVmOXfNMFnOKku0u/xO1rrotxv+g3sqZx2cfgbPhqhOIUHDwnP3E/uKdoOMTKkT2s2GfBPeJp5IChRRqzyzVRdZPluh2t9hUT+Ilc4uhZExN3rNCYZlV8/BTRtCgkmImZuY5AWQG4E9iBfBcrzu5e83Qb1ADZkqVHnIFNuua0JR95Kwgzg5anPIJ1F9HhfpOtcNugn4cb87wInyKBPf9b+7lJ5C2TYHuZZSHwdcacmsLvi6gbEj/QaWEOIaPxeJdz0uaDdLlPMEDSE1HBefSyZXCRhf8lKjF5CveQ9a6yAjPbvtkswM7yscNaqWbDezqTjbZu1untoBulzdh0X2qxRZeS5WQpM2CtPKkoSllS5fRhr+TUftf6o1FibT7H+jes1//yWThAc8CEtRI2bpGsIvAJ8gql/STH6LwQ/AttYJDmdWKa8eGvykFr6KSBUP2NMI0Hsyat4aVcfDoi797VRk2zSXX0HFCX14B+cugsKvEtt2fsA+IZefkdbj7A3mTi6GszVILZsZ5U6CjBSmvTebwAW09btWaK/01Pg72MM7oEKt6DE+P46jt3Xi6aR+k8eoUZ6YZ4r6tcRTvbvVj9eu4w+DpE5Ce0mDsLuDWQb5VZfgeho9sMH+k8INJH08sIrPfZxHiWh4aqb+tBgR/sXNb49xYJxZUw1Qc05x/iRwdcQT8aCzvkpffyRC4TmJQpJ9lEl3R/CDIoQ6I8vRGSJ34w18kQ3Ny+ZMbNEKyPr/pGKwaQJMjyb+m+FmReTjp2Y9iNlHamkjo3kLmf0oMPdXsbj5SXyjb2MeBQE3ygydv+G3CmGhqruweYx5PutimzvVRvCg/Fs7vzdSsLv+c9UDIgRNrav1jOBEtKHSAyLZGutCvpFr5dCoqsuKK3KrnTezymlKK6NP3p9qzY+KsFZ67gxXst8wMjhwROvlDBjAyrgmbQpsJXbyYGfp/tOnFqsYZ8z/TtKRd/nBTw2WyhkLKe48hVDj5JoDzhrxJ2AeG8sFA4KzUIFvIk8u+8ijR8AINkI+FwP/PVXqjAiTitTq4DFSbLCcocmmltZDpRZva4+CsAuJngCnCTCzvStdkNSUM0o5MREFnJvov7sEtZV3bWHdQibzMEdfdeMJHazbaZgkYxrP9w11kZAEJ3T0Q6a/02BhEgqnfflhPeTrryZ0eRTRJLpQNrn+InO1PW9kJA0gU6AUKJhULoaC59vw2gNNH1/uxDwgzM/pOoDXm0uEPT5Fy/iWLxFhdyP2aW20U/zJN88aXMNPEHehZNYvxsRND312aerdHaeSijZX3EwdjkxzHRkrfEmqjO8BxYyoqqVWWs79HMMxNw7Ko9lnLH5rGf7V3x6h6RN5WUS9uK4WZn1wRDr5w9++jGRQumfj5v9y1BNLKOBt3W85OgGsoAgpX+/N2U3zZJ+VtFa9wCS/3ApriEyzrMUc7I0OkX2NmG2snhIAWtEPG+9KlRKKNah+1ETbN8nDx1tTdA24+4zMroJwZq/YBthEWwjT4xM3ZbQ+kU+AeYlmYvso2gtMsDeYdmcodXvoYXtqTVe8eeJ9d9vU7OeVstlP7Ctjdm4VlUF3tw4OMY+1wwobVXRJPYZQB6LlVwZi36HFiSKX/ToKYwxdfRI6UUy/dAfKjWU7AZIB5C+/QKCXDfvQZBkMRCjUv9THdQHBa+jQ4+myeqWzgeGrQ6rO93s8Q2wjHpp657LYM5nw+W66/zMFzYDsOd+dRjBMkNYOlP+DKHwbgGl/Li2Hn6P18FP9fAFXLUqczBDeXyK17bZzaqcvctrpjK9lUCJ0f5gBlzd3f/Ku7LtRnEt+jVZ6/ZDWBJi8mNiJ32r06mqrlRqeMrCWLbpYOMGnOnrWwIEaGCwDU7S5RdHQghZOtpnn0Fk/v3717PkdyLv85eza/fxYT2K9It7utqPGaCQZxG0Da/+Gnsft5fjJ8oC5p9+XAXfM0Z/Hlp548eXc8tL32b4/TKgVtG32T8z/+Vn9Phyaz6E88uff9wYm1vLcrwz+rm5/fbpy5U5/vnhwwmanCDyNIIgYONGeJ3QGl2HWdUDjhL8VKlCFydovHr6HYcrnETPpEl+1bBMDZrZXc9ZFXJAVn70ZwRN0jodaZSV09ol9hfL/HFG3tKNs/Ki6D/9lyDpU6kv5WmMg4ANIv1bB/4su+f68cs4vp6/3F9vR4+nX5y7v8yrUzv/HW6wxVmzGxyRX0XqzrJLcfIc5JfceIM9OqK5/4RJr+cbHPlkHKQ5mpCH+ZsYfy6rzpcJUbFoAsmfLqGqa/K3R2YwvRYv3Vn4SGoAKczceEk7TAvxxvX89eJruCEViFT4K3eBy++Jv6LOhICeUCdKKfEf8N3Mj8jIQjohBKimd3H6I7T4YUHuC9wpDj6HsZ/4ITcGunq+5wZ/Cg0S+uji6lk+9GmYJOGKXJiH6+TGf6FDoSuelsdhEEbpLBEzbTQCoB+p0aHOicxoJEkM1IEsL9AcSmAcSSrwbIFv8mIYJctwEa7d4KKsPY/C7XpWrG/Z5s8wXWMqH3/jJHnOJ9XdJiEvPfjJT37Q2zVos/LPtEwmOy9PnvL+08IzK6zJj/7BuqGF7D7bZOXyvrTEbpz7QVBZ1JmJnZlBpTOJwntcueLoU2RZ+XMr0j9pFIE43EYebphnmC9g4kYLnLTvYLoKjRIV4cCle4Ubhko80lvPoogeHigabEJ/ncSVnj/Tigq8MSnMBVW3QVXUWtvDkSOIZjaCUlCLn7K/7I4ksDvRrSDJtzC54hXLav2zDZN0Adl+rlRZC/ptspupRZTen9VrdNo8D2+S//0mbRay75MWZBRxZ+XPZtk2IkbNiztNu6LCla8J6dc8PzEnDOnOXe9+kW65ipTO049SIpu2uYRUdI7Wa4K1+ThIHWhEsFOg6RDk9+4mg5LQnBq6xqtQKPQRzucxTXMUQK4H2WH7USE8M/+BrT2rmrKKs8u7bzefxlcVWZmWjQkMePd0vMVFRV+VqjE9tQHSpwNUd5cgcSX4Uml7XPoJviEall59jNwNL4w9qCwEBSRQsByEFDprNJTKYkLSben2gQK9AQrGEXYTXFlr4NJMCRD4cYLXFCIAIR1k0kNqLNLO3RVdleDQQUmy+Jk84UwtiMrRF0Phu4sUtzUJoAx5vA5dh2ssKNy8ikFjgOdJEzCqhJrnHBJFo5+e5N3i5b0rRRuM0jPKWCfulbVhIkOn5zRO2RcREKAbm6dsfQWRuo2pZdBp1SVbobL+suKSlCDj/FWubdTYAKvYc3EaY4/icJ2TfoG/uSPPwNN+VnzkaNAC5Qdx62+wchXvbHn9B2PoECnWX+Ts6xnhV6n95QVuHPsev0K7s9/aeW0lppV5MhX7hNUdyB0gsvhlQibfRcbI87samCsyQHNHGWOXOuqNiBhdtBlMVU+4nvuLLU0fApfUQo4INazfwVIn36jKIvdO8INPsFWhAMDeOuBt0t5i7/TDe9laMYdPP5LM9WlrueV0DA5sHqpZEKjRLBWy0kMvOV3av6P/h3GVlQusqIlADcuE2ndLNy6Uq1TUEwdyeEi0dJny25aGoIzu9mBa0JKV3jEdVeZJ6aZiV9Quqv0154FAotu8c9EQKWlXldja0dAqUfZiS8aPQv+VimwThR6O40YTZ1q7rY9tYeuCxWFCebsZKgt7OMrpSAswDnycyog8X7wCPwca1bpjoAGdflGVro/TejMt83WOrW7rOGlrzRDqR3l9/i30DoU6AOtaQyj0nzKP5ghCZdlrgwmSnSRBfTpdOLp4wNmspXpj6W7obK6eFkTClprnx14IR1rgbrJHCwQeABPYqFbhNAc0PC+3lmsDKT0ItWVAzUClUcWzHAhkq5qYYabCrmLhs/6FvN593JH9QKuGbhSb5awKQQd02JGQSDuRyX/+rdhH9VfkPVO/m3TV1lbV2XJlumdZD3ylqs4Wn22rNr6qTldUKrtUPBsIg2xEiwIMGgAj29b9ooVHpP5us5RZ5y6IAS2Vf63HEKjBkwtLFTU3j6jsGLk9xL9S8MSTajCTccaaUKYivliEN4tCpadeo5u1K9keycxxszWSyVbxjXiMDIsXPFu0VbrSY1MI4tt6N3q8awxWFyIvNjCbaTtUm2217U2zqf0wMdsCMOq0LkPAUgmm4YI238BOoRRU4wvQSr8QDeTUWQ/7PNJoeGRM4CUNHI0/ffx4Mf568g7CRns7CNuCP8d0DxbQ34d7EDjGiNtPp3ovuHWq83Bj8B0M5x5kz21SjIyKBP76vovrpNCCzdk9FY1XOFwa1B9TIPqhqs3uqNoY0r4V1SaIiCVqpK6qzRA8P6aoIwf2/OiqWFd7VgbnDAJ4tSWzSei0BOAsADLjAiAqDJRBvy1OvtjiOLnzyM+TMjrKW7ez+G7j0lTzvE2J1xrNLNeyAZYY3hlW+4i7UP9nnFsB/ZB9A9g8dik8ydBQbAxnMLavCrftyPar/hUGbkVOYsH+9Rb6X2IdYeUc3W8j+6JxRh06exkBab/95Ti+U/QU+bwpmpqdDQPBwjDBMIaBIcS+oQGPQNxbgoX1CXO3Hz/8UGPtrvlyVehUoHLJfPkx8GT6b9cP0qABx2pL7FXpBLHnFl0gIcqrhxRGKtxVeVsHw11rT9yNyaZJWuFYlQx2IAOF9mgXDroblrZjZNc88ONgJEKWkKprivmdnRNuEJ9FYVrCaIbmmPXRxVcBMtAFy5os/zye027574CDd9RL0JYbIXgJ3htImqgDSFqKPTUcSMpxV2mWGizuErv0ncxnNX/d3Xv9Jr3QbwtHLfHMoChLXVHUAjzXlDJveuKaaMRvkjYntNgegRYntGE2tR+Iy7aEfjOAmrj0vzLuCu1E2LG7OthN+rbyKPXezg8BzTHYAQ7mJ+3pNJGlgeqHt5GKNR/eb8rUynDnU6yGYAKRvlkZvGgV109XJ2K0YZbLfS7I+UmWXoMA/1XpPgWaBW1R8voRb50HfEPnxf1oJ+eQHMErzpiXFHM9jTcneUq5IBa/yrHzIg1EnfvRknNRZGn1wHQtxLthLUtmupYhk57BvLBIP4Toth7Yz2dO4KNzx8Oep+KjU8c0zEMCUH0b/87bIq1iPjiTlV1Jqw34CCkSfQg9kVZxwMyjW++4Pax9G8m1RrCp/TAkF3WKpf0KJLeKtE140ZEuFODVBxmGDq/WD2ULR9D/+0bQXs2T60BjODBv9VToag9EZx/wqyUVOAJpEKNZncNigkvAFH0eAzt8kSpy1UVgu1OOgqztlQhaiTnonKTSVGu0j7Tuznz2l2/UVZxrNsKrhX2Fg782i3AeKt+W3e0E8a6spohLF664Y7AIVdxut1OZUN881Ucn9ols6A3+D+n9HI2nPvv5DR1Pdyj7EN8eUjPc/zqnYhjdA6dCUDht1JMHBjqa0LGO6AvLKp+jHd1GqiDm/jpNtYSyTksPzVX1mtGs1xoPOGgWsXJ3yHp6dT3HjPJ3l94kvppN9Op01nO20BEaxno3hdcswFzYh9VzqtDrXk77OqXVdOig5qVSHa3xypEFkIS0uw8TSlQ7d5DpoL3fWNU5AWGPpIJh3lj1zpUlA/8+4hXINnkPRD/ROFODvK+P0H/NEojxgPpRFeHtVz9K3omd0id6eo/poBpP75rQy0zAN6rx9j4OIWq8rschdtV4tq0ecK0fXWhvwyO8bZWZy8NpyKYzcuUbWH9NyGd41gfkQxPyIeq+XufKW0dmDxkXpBiFVEjK5vTI+3U4w7TFvw== ================================================ FILE: docs/initrd.md ================================================ # Creating and using an initrd for Firecracker ## Creating ### Based on alpine or suse You can use the script found [here](https://github.com/marcov/firecracker-initrd) to generate an initrd either based on alpine or suse linux. The script extracts the init system from each distribution and creates a initrd. ### Custom Use this option for creating an initrd if you're building your own init or if you need any specific files / logic in your initrd. ```bash mkdir initrd cp /path/to/your/init initrd/init # copy everything else you need in initrd/ cd initrd find . -print0 | cpio --null --create --verbose --format=newc > initrd.cpio ``` ## Usage When setting your boot source, add a `initrd_path` property like so: ```shell curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/boot-source' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"kernel_image_path\": \"/path/to/kernel\", \"boot_args\": \"console=ttyS0 reboot=k panic=1 pci=off\", \"initrd_path\": \"/path/to/initrd.cpio\" }" ``` ### Notes - You should not use a drive with `is_root_device: true` when using an initrd - Make sure your kernel configuration has `CONFIG_BLK_DEV_INITRD=y` - If you don't want to place your init at the root of your initrd, you can add `rdinit=/path/to/init` to your `boot_args` property - If you intend to `pivot_root` in your init, it won't be possible because the initrd is mounted as a rootfs and cannot be unmounted. You will need to use `switch_root` instead. ================================================ FILE: docs/jailer.md ================================================ # The Firecracker Jailer ## Disclaimer The jailer is a program designed to isolate the Firecracker process in order to enhance Firecracker's security posture. It is meant to address the security needs of Firecracker only and is not intended to work with other binaries. Additionally, each jailer binary should be used with a statically linked Firecracker binary (with the default musl toolchain) of the same version. Experimental gnu builds are not supported. ## Jailer Usage The jailer is invoked in this manner: ```bash jailer --id \ --exec-file \ --uid \ --gid \ [--cgroup-version ] \ [--cgroup ] \ [--parent-cgroup ] \ [--chroot-base-dir ] \ [--netns ] \ [--resource-limit ] \ [--daemonize] \ [--new-pid-ns] \ [--...extra arguments for Firecracker] ``` - `--id` specifies the unique VM identification string, which may contain alphanumeric characters and hyphens. The maximum length is currently 64 characters. - `--exec-file` specifies the path to the Firecracker binary that will be exec-ed by the jailer. - `--uid` and `--gid` specify the uid and gid the jailer switches to as it execs the target binary. - `--cgroup-version` is used to select which type of cgroup hierarchy to use for the creation of cgroups. The default value is "1" which means that cgroups specified with `--cgroup` will be created within a v1 hierarchy. Supported options are "1" for cgroup-v1 and "2" for cgroup-v2. - `--cgroup` can be passed to the jailer to let it set the values when the microVM process is spawned. The argument must follow this format: `=` (e.g `cpuset.cpus=0`). This argument can be used multiple times to set multiple cgroups. This is useful to avoid providing privileged permissions to another process for setting the cgroups before or after the jailer is executed. The `--cgroup` flag can help as well to set Firecracker process cgroups before the VM starts running, with no need to create the entire cgroup hierarchy manually (which requires privileged permissions). - `--parent-cgroup` is used to allow the placement of microvm cgroups in custom nested hierarchies. The default value is the filename of ``, which will be henceforth referred to as ``. The behavior of this parameter depends on the following condition: - If either any `--cgroup` parameter is specifed or `--cgroup-version=1` is passed, the jailer will create a new cgroup named `` for the microvm in the `/` subfolder. `` is the cgroup controller root for cgroup v1 (e.g. `/sys/fs/cgroup/cpu`) or the unified controller hierarchy for cgroup v2 (e.g. `/sys/fs/cgroup/unified`). `` is a relative path within that hierarchy. For example, if `--parent-cgroup all_uvms/external_uvms` is specified, the jailer will write all cgroup parameters specified through `--cgroup` in `/sys/fs/cgroup//all_uvms/external_uvms/`. - If no `--cgroup` parameters are specified and `--cgroup-version=2` is passed, the jailer will not create a new cgroup. If the cgroup specified with `--parent-cgroup` exists, the jailer will move the process to the specified cgroup, contrary to its name. This behavior can be used when users want to configure a cgroup beforehand by themselves and move the process to the configured cgroup. Note that, if the specified cgroup has domain controllers (e.g. memory) enabled in `cgroup.subtree_control`, the move fails due to ["no internal process constraint"][1] and jailer exits with an error. If the cgroup spcified with `--parent-cgroup` does not exist, the jailer does not move the process to any cgroup and proceeds without error. - `--chroot-base-dir` specifies the base folder where chroot jails are built. The default is `/srv/jailer`. - `--netns` specifies the path to a network namespace handle. If present, the jailer will use this to join the associated network namespace. - For extra security and control over resource usage, `--resource-limit` can be used to set bounds to the process resources. The argument must follow this format: `=` (e.g `no-file=1024`) and can be used multiple times to set multiple bounds. Current available resources that can be limited using this argument are: - `fsize`: The maximum size in bytes for files created by the process. - `no-file`: Specifies a value one greater than the maximum file descriptor number that can be opened by this process. Here is an example on how to set multiple resource limits using this argument: ```bash --resource-limit fsize=250000000 --resource-limit no-file=1024 ``` - When present, `--daemonize` causes the jailer to call `setsid()` and redirect all three standard I/O file descriptors to `/dev/null`. - When present, `--new-pid-ns` causes the jailer to spawn the provided binary into a new PID namespace. It makes use of the libc `clone()` function with the `CLONE_NEWPID` flag. As a result, the jailer and the process running the exec file have different PIDs. The PID of the child process is stored in the jail root directory inside `.pid`. - The jailer adheres to the "end of command options" convention, meaning all parameters specified after `--` are forwarded to Firecracker. For example, this can be paired with the `--config-file` Firecracker argument to specify a configuration file when starting Firecracker via the jailer (the file path and the resources referenced within must be valid relative to a jailed Firecracker). Please note the jailer already passes `--id` parameter to the Firecracker process. ## Jailer Operation After starting, the Jailer goes through the following operations: - Validate **all provided paths** and the VM ID. - Close all open file descriptors based on `/proc//fd` except input, output and error. - Cleanup all environment variables received from the parent process. - Create the `///root` folder, which will be henceforth referred to as ``. Nothing is done if the path already exists (it should not, since `` is supposed to be unique). - Copy the file specified with `--exec-file` to `/`. This ensures the new process will not share memory with any other Firecracker process. - Set resource bounds for current process and its children through `--resource-limit` argument, by calling `setrlimit()` system call with the specific resource argument. If no limits are provided, the jailer bounds `no-file` to a maximum default value of 2048. - Create the cgroup sub-folders. The jailer can use either `cgroup v1` or `cgroup v2`. On most systems, this is mounted by default in `/sys/fs/cgroup` (should be mounted by the user otherwise). The jailer will parse `/proc/mounts` to detect where each of the controllers required in `--cgroup` can be found (multiple controllers may share the same path). For each identified location (referred to as ``), the jailer creates the `//` subfolder, and writes the current pid to `///tasks`. Also, the value passed for each `` is written to the file. - Call `unshare()` into a new mount namespace, use `pivot_root()` to switch the old system root mount point with a new one base in ``, switch the current working directory to the new root, unmount the old root mount point, and call `chroot` into the current directory. - Use `mknod` to create a `/dev/net/tun` equivalent inside the jail. - Use `mknod` to create a `/dev/kvm` equivalent inside the jail. - Use `chown` to change ownership of the `` (root path `/` as seen by the jailed firecracker), `/dev/net/tun`, `/dev/kvm`. The ownership is changed to the provided `:`. - If `--netns ` is present, attempt to join the specified network namespace. - If `--daemonize` is specified, call `setsid()` and redirect `STDIN`, `STDOUT`, and `STDERR` to `/dev/null`. - If `--new-pid-ns` is specified, call `clone()` with `CLONE_NEWPID` flag to spawn a new process within a new PID namespace. The new process will assume the role of init(1) in the new namespace. The parent will store child's PID inside `.pid`, while the child drops privileges and `exec()`s into the ``, as described below. - Drop privileges via setting the provided `uid` and `gid`. - Exec into ` --id= --start-time-us= --start-time-cpu-us=` (and also forward any extra arguments provided to the jailer after `--`, as mentioned in the **Jailer Usage** section), where: - ``: (`string`) - The `` argument provided to jailer. - ``: (`number`) time calculated by the jailer that it spent doing its work. ## Example Run and Notes Let’s assume Firecracker is available as `/usr/bin/firecracker`, and the jailer can be found at `/usr/bin/jailer`. We pick the **unique id 551e7604-e35c-42b3-b825-416853441234**, and we choose to run on **NUMA node 0** (in order to isolate the process in the 0th NUMA node we need to set `cpuset.mems=0` and `cpuset.cpus` equals to the CPUs of that NUMA node), using **uid 123**, and **gid 100**. For this example, we are content with the default `/srv/jailer` chroot base dir. We start by running: ```bash /usr/bin/jailer --id 551e7604-e35c-42b3-b825-416853441234 --cgroup cpuset.mems=0 --cgroup cpuset.cpus=$(cat /sys/devices/system/node/node0/cpulist) --exec-file /usr/bin/firecracker --uid 123 --gid 100 \ --netns /var/run/netns/my_netns --daemonize ``` After opening the file descriptors mentioned in the previous section, the jailer will create the following resources (and all their prerequisites, such as the path which contains them): - `/srv/jailer/firecracker/551e7604-e35c-42b3-b825-416853441234/root/firecracker` (copied from `/usr/bin/firecracker`) We are going to refer to `/srv/jailer/firecracker/551e7604-e35c-42b3-b825-416853441234/root` as ``. Let’s also assume the, **cpuset** cgroups are mounted at `/sys/fs/cgroup/cpuset`. The jailer will create the following subfolder (which will inherit settings from the parent cgroup): - `/sys/fs/cgroup/cpuset/firecracker/551e7604-e35c-42b3-b825-416853441234` It’s worth noting that, whenever a folder already exists, nothing will be done, and we move on to the next directory that needs to be created. This should only happen for the common `firecracker` subfolder (but, as for creating the chroot path before, we do not issue an error if folders directly associated with the supposedly unique `` already exist). The jailer then writes the current pid to `/sys/fs/cgroup/cpuset/firecracker/551e7604-e35c-42b3-b825-416853441234/tasks`, It also writes `0` to `/sys/fs/cgroup/cpuset/firecracker/551e7604-e35c-42b3-b825-416853441234/cpuset.mems`, And the corresponding CPUs to `/sys/fs/cgroup/cpuset/firecracker/551e7604-e35c-42b3-b825-416853441234/cpuset.cpus`. Since the `--netns` parameter is specified in our example, the jailer opens `/var/run/netns/my_netns` to get a file descriptor `fd`, uses `setns(fd, CLONE_NEWNET)` to join the associated network namespace, and then closes `fd`. The `--daemonize` flag is also present, so the jailers opens `/dev/null` as **RW** and keeps the associate file descriptor as `dev_null_fd` (we do this before going inside the jail), to be used later. Build the chroot jail. First, the jailer uses `unshare()` to enter a new mount namespace, and changes the propagation of all mount points in the new namespace to private using `mount(NULL, “/”, NULL, MS_PRIVATE | MS_REC, NULL)`, as a prerequisite to `pivot_root()`. Another required operation is to bind mount `` on top of itself using `mount(, , NULL, MS_BIND, NULL)`. At this point, the jailer creates the folder `/old_root`, changes the current directory to ``, and calls `syscall(SYS_pivot_root, “.”, “old_root”)`. The final steps of building the jail are unmounting `old_root` using `umount2(“old_root”, MNT_DETACH)`, deleting `old_root` with `rmdir`, and finally calling `chroot(“.”)` for good measure. From now, the process is jailed in ``. Create the special file `/dev/net/tun`, using `mknod(“/dev/net/tun”, S_IFCHR | S_IRUSR | S_IWUSR, makedev(10, 200))`, and then call `chown(“/dev/net/tun”, 123, 100)`, so Firecracker can use it after dropping privileges. This is required to use multiple TAP interfaces when running jailed. Do the same for `/dev/kvm`. Change ownership of `` to `:` so that Firecracker can create its API socket there. Since the `--daemonize` flag is present, call `setsid()` to join a new session, a new process group, and to detach from the controlling terminal. Then, redirect standard file descriptors to `/dev/null` by calling `dup2(dev_null_fd, STDIN)`, `dup2(dev_null_fd, STDOUT)`, and `dup2(dev_null_fd, STDERR)`. Close `dev_null_fd`, because it is no longer necessary. Finally, the jailer switches the uid to `123`, and gid to `100`, and execs ```console ./firecracker \ --id="551e7604-e35c-42b3-b825-416853441234" \ --start-time-us= \ --start-time-cpu-us= ``` Now firecracker creates the socket at `/srv/jailer/firecracker/551e7604-e35c-42b3-b825-416853441234/root/` to interact with the VM. Note: default value for `` is `/run/firecracker.socket`. ### Observations - The user must create hard links for (or copy) any resources which will be provided to the VM via the API (disk images, kernel images, named pipes, etc) inside the jailed root folder. Also, permissions must be properly managed for these resources; for example the user which Firecracker runs as must have both **read and write permissions** to the backing file for a RW block device. - By default the VMs are not asigned to any NUMA node or pinned to any CPU. The user must manage any fine tuning of resource partitioning via cgroups, by using the `--cgroup` command line argument. - It’s up to the user to handle cleanup after running the jailer. One way to do this involves registering handlers with the cgroup `notify_on_release` mechanism, while being wary about potential race conditions (the instance crashing before the subscription process is complete, for example). - For extra resilience, the `--new-pid-ns` flag enables the Jailer to exec the binary file in a new PID namespace, in order to become a pseudo-init process. Alternatively, the user can spawn the jailer in a new PID namespace via a combination of `clone()` with the `CLONE_NEWPID` flag and `exec()`. - We run the jailer as the `root` user; it actually requires a more restricted set of capabilities, but that's to be determined as features stabilize. - The jailer can only log messages to stdout/err for now, which is why the logic associated with `--daemonize` runs towards the end, instead of the very beginning. We are working on adding better logging capabilities. ### Known limitations - The time it takes to create a jail depends on the number of mount points in the system and the number of jailers starting at the same time. Due to the number of mount points playing a bigger role in jailer slowdown, it is recommended to keep the number of mount points in a system to a minimum. The approximate slowdown of the jail creation time is: - 2x when 10 jails are created in parallel with 0 mount points in the system - 10x when 10 jails are created in parallel with 500 mount points in the system. - When passing the --daemonize option to Firecracker without the --new-ns-pid option, the Firecracker process will have a different PID than the Jailer process and killing the Jailer will not kill the Firecracker process. As a workaround to get Firecracker PID, the Jailer stores the PID of the child process in the jail root directory inside `.pid` for all cases regardless of whether `--new-pid-ns` was provided. The suggested way to fetch Firecracker's PID when using the Jailer is to read the `firecracker.pid` file present in the Jailer's root directory. ## Caveats - If all the cgroup controllers are bunched up on a single mount point using the "all" option, our current program logic will complain it cannot detect individual controller mount points. [1]: https://docs.kernel.org/admin-guide/cgroup-v2.html#no-internal-process-constraint ================================================ FILE: docs/kernel-policy.md ================================================ # Firecracker's Kernel Support Policy Firecracker is tightly coupled with the guest and host kernels on which it is run. This document presents our kernel support policy which aims to help our customers choose host and guest OS configuration, and predict future kernel related changes. We are continuously validating the currently supported Firecracker releases (as per [Firecracker’s release policy](../docs/RELEASE_POLICY.md)) using a combination of all supported host and guest kernel versions in the table below. Once a kernel version is officially added, it is supported for a **minimum of 2 years**. At least 2 major guest and host versions will be supported at any time. When support is added for a third kernel version, the oldest will be deprecated and removed in a following release, after its minimum end of support date. > [!NOTE] > > While other versions and other kernel configs might work, they are not > periodically validated in our test suite, and using them might result in > unexpected behaviour. Starting with release `v1.0` each major and minor > release will specify the supported kernel versions. ### Host Kernel | Page size | Host kernel | Min. version | Min. end of support | | --------: | ----------: | -----------: | ------------------: | | 4K | v5.10 | v1.0.0 | 2024-01-31 | | 4K | v6.1 | v1.5.0 | 2025-10-12 | ### Guest Kernel | Page size | Guest kernel | Min. version | Min. end of support | | --------: | -----------: | -----------: | ------------------: | | 4K | v5.10 | v1.0.0 | 2024-01-31 | | 4K | v6.1 | v1.9.0 | 2026-09-02 | The guest kernel configs used in our validation pipelines can be found [here](../resources/guest_configs/) while a breakdown of the relevant guest kernel modules can be found in the next section. We use these configurations to build microVM-specific kernels vended by Amazon Linux. microVM kernel source code is published in the Amazon Linux [linux repo](https://github.com/amazonlinux/linux) under tags in the form of `microvm-kernel-*`, e.g. 6.1.128-3.201.amazn2023 kernel can be found [here](https://github.com/amazonlinux/linux/tree/microvm-kernel-6.1.128-3.201.amzn2023). These kernels may have diverged from the equivalent mainline versions, as we often backport patches that we require for supporting Firecracker features not present in the kernel versions we officially support. As a result, kernel configurations found in this repo should be used to build exclusively the aforementioned Amazon Linux kernels. We do not guarantee that using these configurations to build upstream kernels, will work or produce usable kernel images. ## Guest kernel configuration items The configuration items that may be relevant for Firecracker are: - serial console - `CONFIG_SERIAL_8250_CONSOLE`, `CONFIG_PRINTK` - initrd support - `CONFIG_BLK_DEV_INITRD` - virtio devices - `CONFIG_VIRTIO_MMIO` - balloon - `CONFIG_MEMORY_BALLOON`, `CONFIG_VIRTIO_BALLOON` - block - `CONFIG_VIRTIO_BLK` - partuuid support - `CONFIG_MSDOS_PARTITION` - network - `CONFIG_VIRTIO_NET` - vsock - `CONFIG_VIRTIO_VSOCKETS` - entropy - `CONFIG_HW_RANDOM_VIRTIO` - guest RNG - `CONFIG_RANDOM_TRUST_CPU` - use CPU RNG instructions (if present) to initialize RNG. Available for >= 5.10 - ACPI support - `CONFIG_ACPI` and `CONFIG_PCI` - PCI support: - `CONFIG_BLK_MQ_PCI` - `CONFIG_PCI` - `CONFIG_PCI_MMCONFIG` - `CONFIG_PCI_MSI` - `CONFIG_PCIEPORTBUS` - `CONFIG_VIRTIO_PCI` - `CONFIG_PCI_HOST_COMMON` - `CONFIG_PCI_HOST_GENERIC` There are also guest config options which are dependant on the platform on which Firecracker is run: ### ARM - timekeeping - `CONFIG_ARM_AMBA`, `CONFIG_RTC_DRV_PL031` - serial console - `CONFIG_SERIAL_OF_PLATFORM` ### x86_64 - timekeeping - `CONFIG_KVM_GUEST` (which enables CONFIG_KVM_CLOCK) - high precision timekeeping - `CONFIG_PTP_1588_CLOCK`, `CONFIG_PTP_1588_CLOCK_KVM` - external clean shutdown - `CONFIG_SERIO_I8042`, `CONFIG_KEYBOARD_ATKBD` - virtio devices - `CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES` #### Minimal boot requirements Depending on the source of boot (either from a block device or from an initrd), the minimal configuration for a guest kernel for a successful microVM boot is: - Booting with initrd: - `CONFIG_BLK_DEV_INITRD=y` - aarch64 `CONFIG_VIRTIO_MMIO=y` (for the serial device). - x86_64 `CONFIG_KVM_GUEST=y`. - Booting with root block device: - aarch64 - `CONFIG_VIRTIO_BLK=y` - x86_64 - `CONFIG_VIRTIO_BLK=y` - `CONFIG_ACPI=y` - `CONFIG_PCI=y` - `CONFIG_KVM_GUEST=y`. *Optional*: To enable boot logs set `CONFIG_SERIAL_8250_CONSOLE=y` and `CONFIG_PRINTK=y` in the guest kernel config. ##### Booting with ACPI (x86_64 only): Firecracker supports booting kernels with ACPI support. The relevant configurations for the guest kernel are: - `CONFIG_ACPI=y` - `CONFIG_PCI=y` > [!NOTE] > > Firecracker does not support PCI devices. The `CONFIG_PCI` option is needed > for ACPI initialization inside the guest. ACPI supersedes the legacy way of booting a microVM, i.e. via MPTable and command line parameters for VirtIO devices. We suggest that users disable MPTable and passing VirtIO devices via kernel command line parameters. These boot mechanisms are now deprecated. Users can disable these features by disabling the corresponding guest kernel configuration parameters: - `CONFIG_X86_MPPARSE=n` - `CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=n` During the deprecation period Firecracker will continue to support the legacy way of booting a microVM. Firecracker will be able to boot kernels with the following configurations: - Only ACPI - Only legacy mechanisms - Both ACPI and legacy mechanisms ##### Booting with PCI: Firecracker supports booting guest microVMs with PCI support. This option is enabled using the `--enable-pci` flag when launching the Firecracker process. With PCI enabled, Firecracker will create all VirtIO devices using a PCI VirtIO transport. The PCI transport typically achieves higher throughput and lower latency for VirtIO devices. No further, per device, configuration is needed to enable the PCI transport. PCI support is optional; if it is not enabled Firecracker will create VirtIO devices using the MMIO transport. For Firecracker microVMs to boot properly with PCI support, use a guest kernel built with PCI support. See the relevant Kconfig flags in our list of [relevant Kconfig options](#guest-kernel-configuration-items): > [!IMPORTANT] > > Make sure that the kernel command line **does NOT** include the `pci=off` > slug, which disables PCI support during boot time within the guest. When PCI > is disabled, Firecracker will add this slug in the command line to instruct > the guest kernel to skip useless PCI checks. For more info, look into the > section for [Kernel command line parameters](#kernel-command-line-parameters). > [!NOTE] > > On x86_64 systems, `CONFIG_PCI` Kconfig option is needed even when booting > microVMs without PCI support in case users want to use ACPI to boot. See > [here](#booting-with-acpi-x86_64-only) for more info. ## Kernel command line parameters By default, Firecracker will boot a guest microVM passing the following command line parameters to the kernel: `reboot=k panic=1 nomodule 8250.nr_uarts=0 i8042.noaux i8042.nomux i8042.dumbkbd swiotlb=noforce`. - `reboot=k` shut down the guest on reboot, instead of rebooting - `panic=1` on panic, reboot after 1 second - `nomodule` disable loadable kernel module support - `8250.nr_uarts=0` disable 8250 serial interface - `i8042.noaux` do not probe the i8042 controller for an attached mouse (save boot time) - `i8042.nomux` do not probe i8042 for a multiplexing controller (save boot time) - `i8042.dumbkbd` do not attempt to control kbd state via the i8042 (save boot time) - `swiotlb=noforce` disable software bounce buffers (SWIOTLB) When running without [PCI support](#booting-with-pci), Firecracker will also append `pci=off` to the above list. This option instructs the guest kernel to avoid PCI probing. Users can provide their own command line parameters through the `boot_args` field of the `/boot-source` [Firecracker API](../src/firecracker/swagger/firecracker.yaml). ## Caveats - [Snapshot compatibility across kernel versions](snapshotting/snapshot-support.md#snapshot-compatibility-across-kernel-versions) - When booting with kernels that support both ACPI and legacy boot mechanisms Firecracker passes VirtIO devices to the guest twice, once through ACPI and a second time via kernel command line parameters. In these cases, the guest tries to initialize devices twice. The second time, initialization fails and the guest will emit warning messages in `dmesg`, however the devices will work correctly. ================================================ FILE: docs/logger.md ================================================ # Firecracker logger Configuration For the logging capability, Firecracker uses a single Logger object. The Logger can be configured either by sending a `PUT` API Request to the `/logger` path or by command line. You can configure the Logger only once (by using one of these options) and once configured, you can not update it. ## Prerequisites In order to configure the Logger, first you have to create the resource that will be used for logging: ```bash # Create the required named pipe: mkfifo logs.fifo # The logger also works with usual files: touch logs.file ``` ## Using the API socket for configuration You can configure the Logger by sending the following API command: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT "http://localhost/logger" \ -H "accept: application/json" \ -H "Content-Type: application/json" \ -d "{ "log_path": "logs.fifo", "level": "Warning", "show_level": false, "show_log_origin": false }" ``` Details about the required and optional fields can be found in the [swagger definition](../src/firecracker/swagger/firecracker.yaml). ## Using command line parameters for configuration If you want to configure the Logger on startup and without using the API socket, you can do that by passing the parameter `--log-path` to the Firecracker process: ```bash ./firecracker --api-sock /tmp/firecracker.socket --log-path ``` The other Logger fields have, in this case, the default values: `Level -> Warning`, `show_level -> false`, `show_log_origin -> false`. For configuring these too, you can also pass the following optional parameters: `--level `, `--show-level`, `--show-log-origin`: ```bash ./firecracker --api-sock /tmp/firecracker.socket --log-path logs.fifo --level Error --show-level --show-log-origin ``` ## Reading from the logging destination The `logs.fifo` pipe will store the human readable logs, e.g. errors, warnings etc.(depending on the level). If the path provided is a named pipe, you can use the script below to read from it: ```shell logs=logs.fifo while true do if read line <$logs; then echo $line fi done echo "Reader exiting" ``` Otherwise, if the path points to a normal file, you can simply do: ```shell script cat logs.file ``` ================================================ FILE: docs/memory-hotplug.md ================================================ # Memory Hotplugging with virtio-mem ## What is virtio-mem `virtio-mem` is a para-virtualized memory device that enables dynamic memory resizing for virtual machines. Unlike traditional memory hotplug mechanisms, `virtio-mem` provides a flexible and efficient solution that works across different architectures. The `virtio-mem` device manages a contiguous memory region that is divided into fixed-size blocks. The host can request the guest to plug (make available) or unplug (release) memory by changing the device's target size, and the guest driver responds by allocating or freeing memory blocks accordingly. This approach provides fine-grained control over guest memory with minimal overhead. Firecracker further adds the concept of slots, which are a set of contiguous blocks (usually 128MiB) that can be fully protected from guest accesses to prevent malicious guests from accessing the hotpluggable memory range when not allowed by the host. ## Prerequisites To support memory hotplugging via `virtio-mem`, you must use a guest kernel with the appropriate version and configuration options enabled as follows: #### Kernel Version Requirements - `x86_64`: minimal kernel version is 5.16 - Earlier versions of the kernel don't support `VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE` - `aarch64`: minimal kernel version is 5.18 For more information about officially supported guest kernels, refer to the [kernel policy documentation](kernel-policy.md). #### Kernel Config `CONFIG_VIRTIO_MEM` needs to be enabled in the guest kernel in order to use `virtio-mem`. ## Adding hotpluggable memory The `virtio-mem` device must be configured during VM setup with the total amount of memory that can be hotplugged, before starting the virtual machine. This can be done through a `PUT` request on `/hotplug/memory` or by including the configuration in the JSON configuration file. In both cases, when the VM is started, the hotpluggable region will be completely unplugged. > [!Note] > > Memory configured through `/hotplug/memory` is a separate pool of memory from > the usual "boot memory". Only memory configured through the hotplug endpoint > can be plugged or unplugged dynamically. ### Configuration Parameters - `total_size_mib` (required): The maximum size of hotpluggable memory in MiB. This defines the upper bound of memory that can be added to the VM. Must be a multiple of `slot_size_mib`. - `block_size_mib` (optional, default: 2): The size of individual memory blocks in MiB. Must be at least 2 MiB and a power of 2. Larger block sizes provide better performance but less granularity (harder for the guest to unplug). - `slot_size_mib` (optional, default: 128): The size of KVM memory slots in MiB. Must be at least 128 MiB and a multiple of `block_size_mib`. Larger slot sizes improve performance for large memory operations but reduce unplugging protection efficiency. It is recommended to leave these values to the default unless strict memory protection is required, in which case `block_size_mib` should be equal to `slot_size_mib`. Note that this will make it harder for the guest kernel to find contiguous memory to hot-remove. Refer to the [Memory Protection](#memory-protection) section below for more details. ### API Configuration Here is an example of how to configure the `virtio-mem` device via the API. In this example, the hotpluggable memory is configured with a maximum of 1 GiB in size and default block and slot sizes. ```console socket_location=/run/firecracker.socket curl --unix-socket $socket_location -i \ -X PUT 'http://localhost/hotplug/memory' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"total_size_mib\": 1024, \"block_size_mib\": 2, \"slot_size_mib\": 128 }" ``` > [!Note] > > This is only allowed before the `InstanceStart` action and not on > snapshot-restored VMs (which will use the configuration saved in the > snapshot). ### JSON Configuration To configure via JSON, add the following to your VM configuration file. In this example, the hotpluggable memory is configured with a maximum of 1 GiB in size and default block and slot sizes. ```json { "memory-hotplug": { "total_size_mib": 1024, "block_size_mib": 2, "slot_size_mib": 128 } } ``` ### Checking Device Status After configuration, you can query the device status at any time: ```console socket_location=/run/firecracker.socket curl --unix-socket $socket_location -i \ -X GET 'http://localhost/hotplug/memory' \ -H 'Accept: application/json' ``` This returns information about the current device state, including: - `total_size_mib`: Maximum hotpluggable memory size - `block_size_mib`: Block size used by the device - `slot_size_mib`: Slot size used by Firecracker (granularity of memory protection) - `plugged_size_mib`: Currently plugged (available) memory by the guest - `requested_size_mib`: Target memory size set by the host ## Operating the virtio-mem device Once configured and the VM is running, you can dynamically adjust the amount of memory available to the guest by updating the requested size, which is the target that the guest should reach by requesting to plug or unplug memory blocks. The initial value of the requested size is 0 MiB, meaning that no hotpluggable memory blocks are plugged on VM boot. ### Hotplugging Memory To add memory to a running VM, request a greater size from the `virtio-mem` device: ```console socket_location=/run/firecracker.socket curl --unix-socket $socket_location -i \ -X PATCH 'http://localhost/hotplug/memory' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"requested_size_mib\": 512 }" ``` Setting a higher `requested_size_mib` value causes the guest driver to allocate memory blocks to reach the requested size. The process is asynchronous -- the guest will incrementally plug memory until it reaches the target. It is recommended to use the `GET` API to monitor the current state of the hotplugging by the driver. The operation is complete when `plugged_memory_mib` is equal to `requested_memory_mib`. ### Hot-removing Memory To remove memory from a running VM, request a lower size: ```console socket_location=/run/firecracker.socket curl --unix-socket $socket_location -i \ -X PATCH 'http://localhost/hotplug/memory' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"requested_size_mib\": 256 }" ``` Setting a lower `requested_size_mib` value causes the guest driver to free memory blocks. Once the guest reports a block to be unplugged, the unplugged memory is immediately freed from the host process. If all blocks in a memory slot are unplugged, then Firecracker will also protect the memory slot, removing access from the guest. To remove all hotplugged memory, set `requested_size_mib` to 0: ```console curl --unix-socket $socket_location -i \ -X PATCH 'http://localhost/hotplug/memory' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{"requested_size_mib": 0}' ``` > [!Note] > > Unplugging requires the guest to cooperate and actually be able to find and > report memory blocks that can be moved or freed by the host. As in the > hotplugging case, it is recommended to monitor the operation through the `GET` > API. ## Configuring the guest driver The guest kernel must be configured with specific boot or runtime module parameters to ensure optimal behavior of the `virtio-mem` driver and memory hotplug module. In short: - pass `memhp_default_state=online_movable` if hot-removal is required and there is enough free boot memory for allocating the memory map of the hotplugged memory (64B per 4KiB page). - pass `memory_hotplug.memmap_on_memory=1 memhp_default_state=online` if hot-removal is not required and the hotpluggable memory area can be much bigger than the normal memory. #### `memhp_default_state` This parameter controls how newly hotplugged memory is onlined by the kernel. This parameter is required for automatically onlining new memory pages. It is recommended to set it to `online_movable` as below for reliable memory hot-removal. ``` memhp_default_state=online_movable ``` The `online_movable` setting ensures that: - Hotplugged memory is placed in the MOVABLE zone - The kernel can migrate pages when unplugging is requested - Memory can be successfully freed back to the host Other possible values (not recommended for hot-removal): - `online`: Places memory automatically between NORMAL and MOVABLE zone (may prevent hot-remove) - `online_kernel`: Places memory in NORMAL zone (may prevent hot-remove) - `offline` (default): Memory requires manual onlining #### `memory_hotplug.memmap_on_memory` (optional) This parameter controls whether the kernel allocates memory map (`struct pages`) for hotplugged memory from the hotplugged memory itself, rather than from boot memory. Without this parameter, the kernel needs 64B for every 4KiB page in the boot memory. For example, it would need 262 MiB of free "boot" memory to hotplug 16 GiB of memory. This parameter only works if the memory is not entirely hotplugged as MOVABLE. ``` memory_hotplug.memmap_on_memory=1 memhp_default_state=online ``` This configuration is recommended in case hot-removal is not a priority, and the hotpluggable memory area is very large. #### Additional Resources For more detailed and up-to-date information about memory hotplug in the Linux kernel, refer to the official kernel documentation: https://docs.kernel.org/admin-guide/mm/memory-hotplug.html ## Security Considerations **The `virtio-mem` device is a paravirtualized device requiring cooperation from a driver in the guest.** ### Memory Protection Firecracker provides the following guarantees about unplugged memory: - **Memory that is never plugged is protected**: Memory that has never been plugged before is protected from the guest by not making it available to the guest via a KVM slot and by using `mprotect` to prevent access from device emulation. Any attempt by the guest to access unplugged memory will result in a fault and may crash the Firecracker process. - **Unplugged memory slots are protected**: Memory slots that have been unplugged are removed from KVM and `mprotect`-ed. This requires the guest to report contiguous blocks to be freed for the memory slot to be actually protected. - **Unplugged memory blocks are freed**: When a memory block is unplugged, the backing pages are freed, for example using `madvise(MADV_DONTNEED)` for anon memory, returning memory to the host at block granularity. ### Trust Model While Firecracker enforces memory isolation at the host level, a compromised guest driver could: - Fail to plug or unplug memory as requested by the device - Attempt to access unplugged memory (will result in a fault and crash of Firecracker) Users should: - Be prepared to handle cases where the guest doesn't cooperate with memory operations by monitoring the `GET` API. - Implement host-level memory limits and monitoring, e.g. through `cgroup`. ## Compatibility with Other Features `virtio-mem` is compatible with all Firecracker features. Below are some specific changes in the other features when using memory hotplugging. ### Snapshots Full and diff snapshots will include the unplugged areas as sparse "holes" in the memory snapshot file. Sparse file support is recommended to efficiently handle the memory snapshot files. ### Userfaultfd The userfaultfd (uffd) handler[^uffd] will need to handle the entire hotpluggable memory range even if unplugged. The uffd handler may decide to unregister unplugged memory ranges (holes in the memory file). The uffd handler will also need to handle `UFFD_EVENT_REMOVE` events for hot-removed blocks, either unregistering the range or storing the information and returning an empty page on the next access. ### Vhost-user `vhost-user`[^vhost-user] is fully supported, but Firecracker cannot guarantee protection of unplugged memory from a `vhost-user` backend. A malicious guest driver may be able to trick the backend to access unplugged memory. This is not possible in Firecracker itself as unplugged memory slots are `mprotect`-ed. [^uffd]: snapshotting/handling-page-faults-on-snapshot-resume.md#userfaultfd [^vhost-user]: api_requests/block-vhost-user.md ================================================ FILE: docs/metrics.md ================================================ # Firecracker Metrics Configuration For the metrics capability, Firecracker uses a single Metrics system. This system can be configured either by: a) sending a `PUT` API Request to the `/metrics` path: or b) using the `--metrics-path` CLI option. Note the metrics configuration is **not** part of the guest configuration and is not restored from a snapshot. ## Prerequisites In order to configure the Metrics, first you have to create the resource that will be used for storing the metrics: ```bash # Create the required named pipe: mkfifo metrics.fifo # The Metrics system also works with usual files: touch metrics.file ``` ## Configuring the system via CLI When launching Firecracker, use the CLI option to set the metrics file. ```bash ./firecracker --metrics-path metrics.fifo ``` ## Configuring the system via API You can configure the Metrics system by sending the following API command: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT "http://localhost/metrics" \ -H "accept: application/json" \ -H "Content-Type: application/json" \ -d "{ \"metrics_path\": \"metrics.fifo\" }" ``` Details about this configuration can be found in the [swagger definition](../src/firecracker/swagger/firecracker.yaml). The metrics are written to the `metrics_path` in JSON format. ## Flushing the metrics The metrics get flushed in two ways: - without user intervention every 60 seconds; - upon user demand, by issuing a `FlushMetrics` request. You can find how to use this request in the [actions API](api_requests/actions.md). If the path provided is a named pipe, you can use the script below to read from it: ```shell metrics=metrics.fifo while true do if read line <$metrics; then echo $line fi done echo "Reader exiting" ``` Otherwise, if the path points to a normal file, you can simply do: ```shell script cat metrics.file ``` ## Metrics emitted by Firecracker The metrics emitted by Firecracker are in JSON format. Below are the keys present in each metrics json object emitted by Firecracker: ``` "api_server" "balloon" "block" "deprecated_api" "entropy" "get_api_requests" "i8042" "latencies_us" "logger" "mmds" "net" "patch_api_requests" "put_api_requests" "rtc" "seccomp" "signals" "uart" "vcpu" "vhost_user_block" "vmm" "vsock" ``` Below table explains where Firecracker metrics are defined : | Metrics key | Device | Additional comments | | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | balloon | [BalloonDeviceMetrics](../src/vmm/src/devices/virtio/balloon/metrics.rs) | Represent metrics for the Balloon device. | | block | [BlockDeviceMetrics](../src/vmm/src/devices/virtio/block/virtio/metrics.rs) | Represent aggregate metrics for Virtio Block device. | | block\_{block_drive_id} | [BlockDeviceMetrics](../src/vmm/src/devices/virtio/block/virtio/metrics.rs) | Represent Virtio Block device metrics for the endpoint `"/drives/{drive_id}"` e.g. `"block_rootfs":` represent metrics for the endpoint `"/drives/rootfs"` | | i8042 | [I8042DeviceMetrics](../src/vmm/src/devices/legacy/i8042.rs) | Represent Metrics specific to the i8042 device. | | net | [NetDeviceMetrics](../src/vmm/src/devices/virtio/net/metrics.rs) | Represent aggregate metrics for Virtio Net device. | | net\_{iface_id} | [NetDeviceMetrics](../src/vmm/src/devices/virtio/net/metrics.rs) | Represent Virtio Net device metrics for the endpoint `"/network-interfaces/{iface_id}"` e.g. `net_eth0` represent metrics for the endpoint `"/network-interfaces/eth0"` | | rtc | [RTCDeviceMetrics](../src/vmm/src/devices/legacy/serial.rs) | Represent Metrics specific to the RTC device. `Note`: this is emitted only on `aarch64`. | | uart | [SerialDeviceMetrics](../src/vmm/src/devices/legacy/serial.rs) | Represent Metrics specific to the serial device. | | vhost_user\_{dev}\_{dev_id} | [VhostUserDeviceMetrics](../src/vmm/src/devices/virtio/vhost_user_metrics.rs) | Represent Vhost-user device metrics for the device `dev` and device id `dev_id`. e.g. `"vhost_user_block_rootfs":` represent metrics for vhost-user block device having the endpoint `"/drives/rootfs"` | | vsock | [VsockDeviceMetrics](../src/vmm/src/devices/virtio/vsock/metrics.rs) | Represent Metrics specific to the vsock device. | | entropy | [EntropyDeviceMetrics](../src/vmm/src/devices/virtio/rng/metrics.rs) | Represent Metrics specific to the entropy device. | | "api_server"
"deprecated_api"
"get_api_requests"
"latencies_us"
"logger"
"mmds"
"patch_api_requests"
"put_api_requests"
"seccomp"
"signals"
"vcpu"
"vmm" | [metrics.rs](../src/vmm/src/logger/metrics.rs) | Rest of the metrics are defined in the same file metrics.rs. | Note: Firecracker emits all the above metrics regardless of the presense of that component i.e. even if `vsock` device is not attached to the Microvm, Firecracker will still emit the Vsock metrics with key as `vsock` and value of all metrics defined in `VsockDeviceMetrics` as `0`. ### Units for Firecracker metrics: Units for Firecracker metrics are embedded in their name.
Below pseudo code should be to extract units from Firecracker metrics name:
Note: An example of full_key for below logic is `"vcpu.exit_io_in_agg.min_us"` ``` if substring "_bytes" or "_bytes_count" is present in any subkey of full_key Unit is "Bytes" else substring "_ms" is present in any subkey of full_key Unit is "Milliseconds" else substring "_us" is present in any subkey of full_key Unit is "Microseconds" else Unit is "Count" ``` ================================================ FILE: docs/mmds/mmds-design.md ================================================ # microVM Metadata Service MMDS consists of three major logical components: the backend, the data store, and the minimalist HTTP/TCP/IPv4 stack (named *Dumbo*). They all exist within the Firecracker process, and outside the KVM boundary; the first is a part of the API server, the data store is a global entity for a single microVM, and the last is a part of the device model. ## The MMDS backend Users can add/update the MMDS contents via the backend, which is accessible through the Firecracker API. Setting the initial contents involves a `PUT` request to the `/mmds` API resource, with a JSON body that describes the desired data store structure and contents. Here's a JSON example: ```json { "latest": { "meta-data": { "ami-id": "ami-12345678", "reservation-id": "r-fea54097", "local-hostname": "ip-10-251-50-12.ec2.internal", "public-hostname": "ec2-203-0-113-25.compute-1.amazonaws.com", "network": { "interfaces": { "macs": { "02:29:96:8f:6a:2d": { "device-number": "13345342", "local-hostname": "localhost", "subnet-id": "subnet-be9b61d" } } } } } } } ``` The MMDS contents can be updated either via a subsequent `PUT` (that replaces them entirely), or using `PATCH` requests, which feed the JSON body into the JSON Merge Patch functionality, based on [RFC 7396](https://tools.ietf.org/html/rfc7396). MMDS related API requests come from the host, which is considered a trusted environment, so there are no checks beside the kind of validation done by HTTP server and `serde-json` (the crate used to de/serialize JSON). The size limit for the stored metadata is configurable and defaults to 51200 bytes. When increasing this limit, one must take into consideration that storing and retrieving large amount of data may induce bottlenecks for the HTTP REST API processing, which is based on `micro-http` crate. MMDS contents can be retrieved using the Firecracker API, via a `GET` request to the `/mmds` resource. ## The data store This is a global data structure, currently referenced using a global variable, that represents the strongly-typed version of JSON-based user input describing the MMDS contents. It leverages the recursive [Value](https://docs.serde.rs/serde_json/value/enum.Value.html) type exposed by `serde-json`. It can only be accessed from thread-safe contexts. MMDS data store supports at the moment storing and retrieving JSON values. Data store contents can be retrieved using the Firecracker API server from host and using the embedded MMDS HTTP/TCP/IPv4 network stack from guest. MMDS data store is upper bounded to the value of the `--mmds-size-limit` command line parameter. If left unconfigured, it will default to the value of `--http-api-max-payload-size`, which is 51200 bytes by default. ## Dumbo The *Dumbo* HTTP/TCP/IPv4 network stack handles guest HTTP requests heading towards the configured MMDS IPv4 address. Before going into *Dumbo* specifics, it's worth going through a brief description of the Firecracker network device model. Firecracker only offers Virtio-net paravirtualized devices to guests. Drivers running in the guest OS use ring buffers in a shared memory area to communicate with the device model when sending or receiving frames. The device model associates each guest network device with a TAP device on the host. Frames sent by the guest are written to the TAP fd, and frames read from the TAP fd are handed over to the guest. The *Dumbo* stack can be instantiated once for every network device, and is disabled by default. It can be enabled through the API request body used to configure MMDS by specifying the ID of the network interface inside the `network_interfaces` list. In order for the API call to succeed, the network device must be attached beforehand, otherwise an error is returned. Once enabled, the stack taps into the aforementioned data path. Each frame coming from the guest is examined to determine whether it should be processed by *Dumbo* instead of being written to the TAP fd. Also, every time there is room in the ring buffer to hand over frames to the guest, the device model first checks whether *Dumbo* has anything to send; if not, it resumes getting frames from the TAP fd (when available). We chose to implement our own solution, instead of leveraging existing libraries/implementations, because responding to guest MMDS queries in the context of Firecracker is amenable to a wide swath of simplifications. First of all, we only need to handle `GET` and `PUT` requests, which require a bare-bones HTTP 1.1 server, without support for most headers and more advanced features like chunking. Also, we get to choose what subset of HTTP is used when building responses. Moving lower in the stack, we are dealing with TCP connections over what is essentially a point-to-point link, that seldom loses packets and does not reorder them. This means we can do away with congestion control (we only use flow control), complex reception logic, and support for most TCP options/features. At this point, the layers below (Ethernet and IPv4) don't involve much more than sanity checks of frame/packet contents. *Dumbo* is built using both general purpose components (which we plan to offer as part of one or more libraries), and Firecracker MMDS specific code. The former category consists of various helper modules used to process streams of bytes as protocol data units (Ethernet & ARP frames, IPv4 packets, and TCP segments), a TCP handler which listens for connections while demultiplexing incoming segments, a minimalist TCP connection endpoint implementation, and a greatly simplified HTTP 1.1 server. The Firecracker MMDS specific code is found in the logic which taps into the device model, and the component that parses an HTTP request, builds a response based on MMDS contents, and finally sends back a reply. ### MMDS Network Stack Somewhat confusingly, this is the name of the component which taps the device model. It has a user-configured IPv4 address (see [Firecracker MMDS configuration API](../../src/firecracker/swagger/firecracker.yaml)) and MAC (`06:01:23:45:67:01`) addresses. The latter is also used to respond to ARP requests. For every frame coming from the guest, the following steps take place: 1. Apply a heuristic to determine whether the frame may contain an ARP request for the MMDS IP address, or an IPv4 packet heading towards the same address. There can be no false negatives. Frames that fail both checks are *rejected* (deferred to the device model for regular processing). 1. *Reject* invalid Ethernet frames. *Reject* valid frames if their EtherType is neither ARP, nor IPv4. 1. (**if EtherType == ARP**) *Reject* invalid ARP frames. *Reject* the frame if its target protocol address field is different from the MMDS IP address. Otherwise, record that an ARP request has been received (the stack only remembers the most recent request). 1. (**if EtherType == IPv4**) *Reject* invalid packets. *Reject* packets if their destination address differs from the MMDS IP address. *Drop* (stop processing without deferring to the device model) packets that do not carry TCP segments (by looking at the protocol number field). Send the rest to the inner TCP handler. The current implementation does not support Ethernet 802.1Q tags, and does not handle IP fragmentation. Tagged Ethernet frames are most likely going to be deferred to the device model for processing, because the heuristics do not take the presence of the tag into account. Moreover, their EtherType will not appear to be of interest. Fragmented IP packets do not get reassembled; they are treated as independent packets. Whenever the guest is able to receive a frame, the device model first requests one from the MMDS network stack associated with the current network device. 1. If an ARP request has been previously recorded, send an ARP reply and forget about the request. 1. If the inner TCP handler has any packets to transmit, wrap the next one into a frame and send it. 1. There are no MMDS related frames to send, so tell the device model to read from the TAP fd instead. ### TCP handler Handles received packets that appear to carry TCP segments. Its operation is described in the `dumbo` crate documentation. Each connection is associated with an MMDS endpoint. ### MMDS endpoint This component gets the byte stream from an inner TCP connection object, identifies the boundaries of the next HTTP request, and parses it using an HttpRequest object. For each valid `GET` request, the URI is used to identify a key from the metadata store (like in the previous example), and a response is built using the Firecracker implementation of HttpResponse logic, based on the associated value, and sent back to the guest over the same connection. Each endpoint has a fixed size receive buffer, and a variable length response buffer (depending on the size of each response). TCP receive window semantics are used to ensure the guest does not overrun the receive buffer during normal operation (the connection has to drop segments otherwise). There can be at most one response pending at any given time. Here are more details describing what happens when a segment is received by an MMDS endpoint (previously created when a SYN segment arrived at the TCP handler): 1. Invoke the receive functionality of the inner connection object, and append any new data to the receive buffer. 1. If no response is currently pending, attempt to identify the end of the first request in the receive buffer. If no such boundary can be found, and the buffer is full, reset the inner connection (which also causes the endpoint itself to be subsequently removed) because the guest exceeded the maximum allowed request size. 1. If no response is pending, and we can identify a request in the receive buffer, parse it, free up the associated buffer space (also update the connection receive window), and build an HTTP response, which becomes the current pending response. 1. If a FIN segment was received, and there's no pending response, call `close` on the inner connection. If a valid RST is received at any time, mark the endpoint for removal. When the TCP handler asks an MMDS endpoint for any segments to send, the transmission logic of the inner connection is invoked, specifying the pending response (when present) as the payload source. All packets coming from MMDS have the TTL value set to 1 by default. ### Connection Connection objects are minimalist implementation of the TCP protocol. They are used to reassemble the byte stream which carries guest HTTP requests, and to send back segments which contain parts of the response. More details are available in the `dumbo` crate documentation. ================================================ FILE: docs/mmds/mmds-user-guide.md ================================================ # microVM Metadata Service The Firecracker microVM Metadata Service (MMDS) is a mutable data store which can be used for sharing information between host and guests, in a secure and easy at hand way. ## Configuring and activating the microVM Metadata Service By default, MMDS is not reachable from the guest operating system. At microVM runtime, MMDS is tightly coupled with a network interface, which allows MMDS requests. When configuring the microVM, if MMDS needs to be activated, a network interface has to be configured to allow MMDS requests. This can be achieved in two steps: 1. Attach one (or more) network interfaces through an HTTP `PUT` request to `/network-interfaces/${MMDS_NET_IF}`. The full network configuration API can be found in the [firecracker swagger file](../../src/firecracker/swagger/firecracker.yaml). 1. Configure MMDS through an HTTP `PUT` request to `/mmds/config` resource and include the IDs of the network interfaces that should allow forwarding requests to MMDS in the `network_interfaces` list. The complete MMDS API is described in the [firecracker swagger file](../../src/firecracker/swagger/firecracker.yaml). ### Examples Attaching a network device with ID `MMDS_NET_IF`: ```bash MMDS_NET_IF=eth0 curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/network-interfaces/${MMDS_NET_IF}' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "iface_id": "${MMDS_NET_IF}", "guest_mac": "AA:FC:00:00:00:01", "host_dev_name": "tap0" }' ``` Configuring MMDS to receive requests through the `MMDS_NET_IF` network interface ID: ```bash MMDS_IPV4_ADDR=169.254.170.2 curl --unix-socket /tmp/firecracker.socket -i \ -X PUT "http://localhost/mmds/config" \ -H "Content-Type: application/json" \ -d '{ "network_interfaces": ["${MMDS_NET_IF}"] }' ``` MMDS can be configured pre-boot only, using the Firecracker API server. Enabling MMDS without at least a network device attached will return an error. The IPv4 address used by guest applications when issuing requests to MMDS can be customized through the same HTTP `PUT` request to `/mmds/config` resource, by specifying the IPv4 address to the `ipv4_address` field. If the IP configuration is not provided before booting up the guest, the MMDS IPv4 address defaults to `169.254.169.254`. ```bash MMDS_IPV4_ADDR=169.254.170.2 curl --unix-socket /tmp/firecracker.socket -i \ -X PUT "http://localhost/mmds/config" \ -H "Content-Type: application/json" \ -d '{ "network_interfaces": ["${MMDS_NET_IF}"], "ipv4_address": "${MMDS_IPV4_ADDR}" }' ``` MMDS is tightly coupled with a network interface which is used to route MMDS packets. To send MMDS intended packets, guest applications must insert a new rule into the routing table of the guest OS. This new rule must forward MMDS intended packets to a network interface which allows MMDS requests. For example: ```bash MMDS_IPV4_ADDR=169.254.170.2 MMDS_NET_IF=eth0 ip route add ${MMDS_IPV4_ADDR} dev ${MMDS_NET_IF} ``` MMDS supports two methods to access the contents of the metadata store from the guest operating system: `V1` and `V2`. More about the particularities of the two mechanisms can be found in the [Retrieving metadata in the guest operating system](#retrieving-metadata-in-the-guest-operating-system) section. The MMDS version used can be specified when configuring MMDS, through the `version` field of the HTTP `PUT` request to `/mmds/config` resource. Accepted values are `V1`(deprecated) and `V2` and the default MMDS version used in case the `version` field is missing is [Version 1](#version-1-deprecated). ```bash MMDS_IPV4_ADDR=169.254.170.2 curl --unix-socket /tmp/firecracker.socket -i \ -X PUT "http://localhost/mmds/config" \ -H "Content-Type: application/json" \ -d '{ "network_interfaces": ["${MMDS_NET_IF}"], "version": "V2", "ipv4_address": "${MMDS_IPV4_ADDR}" }' ``` ## Inserting and updating metadata Inserting and updating metadata is possible through the Firecracker API server. The metadata inserted in MMDS must be any valid JSON. A user can create or update the MMDS data store before the microVM is started or during its operation. To insert metadata into MMDS, an HTTP `PUT` request to the `/mmds` resource has to be issued. This request must have a payload with metadata structured in [JSON](https://tools.ietf.org/html/rfc7159) format. To replace existing metadata, a subsequent HTTP `PUT` request to the `/mmds` resource must be issued, using as a payload the new metadata. A complete description of metadata insertion firecracker API can be found in the [firecracker swagger file](../../src/firecracker/swagger/firecracker.yaml). An example of an API request for inserting metadata is provided below: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT "http://localhost/mmds" \ -H "Content-Type: application/json" \ -d '{ "latest": { "meta-data": { "ami-id": "ami-12345678", "reservation-id": "r-fea54097", "local-hostname": "ip-10-251-50-12.ec2.internal", "public-hostname": "ec2-203-0-113-25.compute-1.amazonaws.com", "network": { "interfaces": { "macs": { "02:29:96:8f:6a:2d": { "device-number": "13345342", "local-hostname": "localhost", "subnet-id": "subnet-be9b61d" } } } } } } }' ``` To partially update existing metadata, an HTTP `PATCH` request to the `/mmds` resource has to be issued, using as a payload the metadata patch, as [JSON Merge Patch](https://tools.ietf.org/html/rfc7396) functionality describes. A complete description of updating metadata Firecracker API can be found in the [firecracker swagger file](../../src/firecracker/swagger/firecracker.yaml). An example API for how to update existing metadata is offered below: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PATCH "http://localhost/mmds" \ -H "Content-Type: application/json" \ -d '{ "latest": { "meta-data": { "ami-id": "ami-87654321", "reservation-id": "r-79054aef", } } }' ``` ## Retrieving metadata MicroVM metadata can be retrieved both from host and guest operating systems. For the scope of this chapter, let's assume the data store content is the JSON below: ```json { "latest": { "meta-data": { "ami-id": "ami-87654321", "reservation-id": "r-79054aef" } } } ``` ### Retrieving metadata in the host operating system To retrieve existing MMDS metadata from host operating system, an HTTP `GET` request to the `/mmds` resource must be issued. The HTTP response returns the existing metadata, as a JSON formatted text. A complete description of retrieving metadata Firecracker API can be found in the [firecracker swagger file](../../src/firecracker/swagger/firecracker.yaml). Below you can see how to retrieve metadata from the host: ```bash curl -s --unix-socket /tmp/firecracker.socket http://localhost/mmds ``` Output: ```json { "latest": { "meta-data": { "ami-id": "ami-87654321", "reservation-id": "r-79054aef" } } } ``` ### Retrieving metadata in the guest operating system Accessing the contents of the metadata store from the guest operating system can be done using one of the following methods: - `V1`: simple request/response method (deprecated) - `V2`: session-oriented method #### Version 1 (Deprecated) **Version 1 is deprecated and will be removed in the next major version change. Version 2 should be used instead.** To retrieve existing MMDS metadata using MMDS version 1, an HTTP `GET` request must be issued. The requested resource can be referenced by its corresponding [JSON Pointer](https://tools.ietf.org/html/rfc6901), which is also the path of the MMDS request. The HTTP response content will contain the referenced metadata resource. As in version 2, version 1 also supports a session oriented method in order to make the migration easier. See [the next section](#version-2) for the session oriented method. Note that version 1 returns a successful response to a `GET` request even with an invalid token or no token not to break existing workloads. `mmds.rx_invalid_token` and `mmds.rx_no_token` metrics track the number of `GET` requests with invalid tokens and missing tokens respectively, helping users evaluate their readiness for migrating to MMDS version 2. Requests containing any other HTTP methods than `GET` and `PUT` will receive **405 Method Not Allowed** error. ```bash MMDS_IPV4_ADDR=169.254.170.2 RESOURCE_POINTER_OBJ=latest/meta-data curl -s "http://${MMDS_IPV4_ADDR}/${RESOURCE_POINTER_OBJ}" ``` #### Version 2 Similar to [IMDSv2](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html), MMDS version 2 (`V2`) is a session oriented method, which makes use of a session token in order to allow fetching metadata contents. The session must start with an HTTP `PUT` request that generates the session token. In order to be successful, the request must respect the following constraints: - must be directed towards `/latest/api/token` path - must contain a `X-metadata-token-ttl-seconds` or `X-aws-ec2-metadata-token-ttl-seconds` header specifying the token lifetime in seconds. The value cannot be lower than 1 or greater than 21600 (6 hours). - must not contain a `X-Forwarded-For` header. ```bash MMDS_IPV4_ADDR=169.254.170.2 TOKEN=`curl -X PUT "http://${MMDS_IPV4_ADDR}/latest/api/token" \ -H "X-metadata-token-ttl-seconds: 21600"` ``` The HTTP response from MMDS is a plaintext containing the session token. During the duration specified by the token's time to live value, all subsequent `GET` requests must specify the session token through the `X-metadata-token` or `X-aws-ec2-metadata-token` header in order to fetch data from MMDS. ```bash MMDS_IPV4_ADDR=169.254.170.2 RESOURCE_POINTER_OBJ=latest/meta-data curl -s "http://${MMDS_IPV4_ADDR}/${RESOURCE_POINTER_OBJ}" \ -H "X-metadata-token: ${TOKEN}" ``` After the token expires, it becomes unusable and a new session token must be issued. ##### Snapshotting considerations The data store is **not** persisted across snapshots, in order to avoid leaking vm-specific information that may need to be reseeded into the data store for a new clone. The MMDS version, network stack configuration and IP address used for accessing the service are persisted across snapshot-restore. If the targeted snapshot version does not support Mmds Version 2, it will not be persisted in the snapshot (the clone will use the default, V1). Similarly, if a snapshotted Vm state contains the Mmds version but the Firecracker version used for restoring does not support persisting the version, the default will be used. ### MMDS formats The response format can be JSON or IMDS. The IMDS documentation can be found [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html). The output format can be selected by specifying the optional `Accept` header. Using `Accept: application/json` will format the output to JSON, while using `Accept: plain/text` or not specifying this optional header at all will format the output to IMDS. Setting `imds_compat` to `true` through PUT request to `/mmds/config` enforces MMDS to always respond in IMDS format regardless of the `Accept` header. This allows code written to work on EC2 IMDS to also work on Firecracker MMDS. Retrieving MMDS resources in IMDS format, other than JSON `string` and `object` types, is not supported. Below is an example on how to retrieve the `latest/meta-data` resource in JSON format: ```bash MMDS_IPV4_ADDR=169.254.170.2 RESOURCE_POINTER_OBJ=latest/meta-data curl -s -H "Accept: application/json" "http://${MMDS_IPV4_ADDR}/${RESOURCE_POINTER_OBJ}" ``` Output: ```json { "ami-id": "ami-87654321", "reservation-id": "r-79054aef" } ``` Retrieving the `latest/meta-data/ami-id` resource in JSON format: ```bash MMDS_IPV4_ADDR=169.254.170.2 RESOURCE_POINTER_STR=latest/meta-data/ami-id curl -s -H "Accept: application/json" "http://${MMDS_IPV4_ADDR}/${RESOURCE_POINTER_STR}" ``` Output: ```json "ami-87654321" ``` Retrieving the `latest` resource in IMDS format: ```bash MMDS_IPV4_ADDR=169.254.170.2 RESOURCE_POINTER=latest curl -s "http://${MMDS_IPV4_ADDR}/${RESOURCE_POINTER}" ``` Output: ```text meta-data/ ``` Retrieving the `latest/meta-data/` resource in IMDS format: ```bash MMDS_IPV4_ADDR=169.254.170.2 RESOURCE_POINTER=latest/meta-data curl -s "http://${MMDS_IPV4_ADDR}/${RESOURCE_POINTER}" ``` Output: ```text ami-id reservation-id ``` Retrieving the `latest/meta-data/ami-id` resource in IMDS format: ```bash MMDS_IPV4_ADDR=169.254.170.2 RESOURCE_POINTER=latest/meta-data/ami-id curl -s "http://${MMDS_IPV4_ADDR}/${RESOURCE_POINTER}" ``` Output: ```text ami-87654321 ``` ## Errors *200* - `Ok` The request was successfully processed and a response was successfully formed. *400* - `Bad Request` The request was malformed. *401* - `Unauthorized` Only when using MMDS `V2`. The HTTP request either lacks the session token, or the token specified is invalid. A token is invalid if it was not generated using an HTTP `PUT` request or if it has expired. *404* - `Not Found` The requested resource can not be found in the MMDS data store. *405* - `Method Not Allowed` The HTTP request uses a not allowed HTTP method and a response with the `Allow` header was formed. When using MMDS `V1`, this is returned for any HTTP method other than `GET`. When MMDS `V2` is configured, the only accepted HTTP methods are `PUT` and `GET`. *501* - `Not Implemented` The requested HTTP functionality is not supported by MMDS or the requested resource is not supported in IMDS format. ## Appendix ### Example use case: credential rotation For this example, the guest expects to find some sort of credentials (say, a secret access key) by issuing a `GET` request to `http://169.254.169.254/latest/meta-data/credentials/secret-key`. Most similar use cases will encompass the following sequence of steps: 1. Some agent running on the host sends a `PUT` request with the initial contents of the MMDS, using the Firecracker API. This most likely takes place before the microVM starts running, but may also happen at a later time. Guest MMDS requests which arrive prior to contents being available receive a *NotFound* response. 1. The contents are saved to MMDS. 1. The guest sends a `GET` request for the secret key, which is intercepted by MMDS. 1. MMDS processes the request and sends back an HTTP response with the ensembled secret key as a JSON string. After a while, the host agent decides to rotate the secret key. It does so by updating the data store with a new value. This can be done via a `PUT` request to the `/mmds` API resource, which replaces everything, or with a `PATCH` request that only touches the desired key. This effectively triggers the first two steps again. The guest reads the new secret key, going one more time through the last three steps. This can happen after a notification from the host agent, or discovered via periodic polling, or some other mechanism. Since access to the data store is thread safe, the guest can only receive either the old version, or the new version of the key, and not some intermediate state caused by the update. ================================================ FILE: docs/network-performance.md ================================================ # Firecracker network performance numbers This document provides details about Firecracker network performance. The numbers presented are dependent on the hardware (CPU, networking card, etc.), OS version and settings. Scope of the measurements is to illustrate the limits for the emulation thread. ## TCP Throughput | Segment size/ Direction | 1460bytes | 256bytes | 128bytes | 96bytes | | ----------------------- | --------- | -------- | -------- | ------- | | Ingress | 25Gbps | 23Gbps | 20Gbps | 18Gbps | | Egress | 25Gbps | 23Gbps | 20Gbps | 18Gbps | | Bidirectional | 18Gbps | 18Gbps | 18Gbps | 18Gbps | **Setup and test description** Throughput measurements were done using [iperf3](https://iperf.fr/). The target is to fully saturate the emulation thread and keep it at 100% utilization. No adjustments were done to socket buffer, or any other network related kernel parameters. To identify the limit of emulation thread, TCP throughput was measured between host and guest. An EC2 [M5d.metal](https://aws.amazon.com/ec2/instance-types/m5/) instance, running [Amazon Linux 2](https://aws.amazon.com/amazon-linux-ami/), was used as a host. For ingress or egress throughput measurements, a Firecracker microVM running Kernel 4.14 with 4GB of Ram, 8 vCPUs and one network interface was used. The measurements were taken using 6 iperf3 clients running on host and 6 iperf3 serves running on guest and vice versa. For bidirectional throughput measurements, a Firecracker microVM running Amazon Linux 2, Kernel 4.14 with 4GB of Ram, 12 vCPUs and one network interface was used. The measurements were taken using 4 iperf3 clients and 4 iperf3 servers running on both host and guest. ## Latency The virtualization layer, Firecracker emulation thread plus host kernel stack, is responsible for adding on average 0.06ms of network latency. **Setup and test description** Latency measurements were done using ping round trip times. 2 x EC2 M5d.metal instances running Amazon Linux 2 within the same[VPC](https://aws.amazon.com/vpc/) were used, with a security group configured so that it would allow traffic from instances using private IPs. A 10Mbps background traffic was running between instances. Round trip time between instances was measured. `rtt min/avg/max/mdev = 0.101/0.198/0.237/0.044 ms` On one of the instances, a Firecracker microVM running Kernel 4.14, with 1 GB of RAM, 2 vCPUs, one network interface running was used. Round trip between the microVM and the other instance was measured, while a 10Mbps background traffic was running. `rtt min/avg/max/mdev = 0.191/0.321/0.519/0.058 ms` From the difference between those we can conclude that ~0.06ms are the virtualization overhead. ================================================ FILE: docs/network-setup.md ================================================ # Getting Started Firecracker Network Setup This is a simple quick-start guide to getting one or more Firecracker microVMs connected to the Internet via the host. If you run a production setup, you should consider modifying this setup to accommodate your specific needs. > [!NOTE] > > Currently, Firecracker supports only a TUN/TAP network backend with no multi > queue support. The steps in this guide assume `eth0` to be your Internet-facing network interface on the host. If `eth0` isn't your main network interface, you should change the value to the correct one in the commands below. IPv4 is also assumed to be used, so you will need to adapt the instructions accordingly to support IPv6. Each microVM requires a host network interface (like `eth0`) and a Linux `tap` device (like `tap0`) used by Firecracker, but the differences in configuration stem from routing: how packets from the `tap` get to the network interface (egress) and vice-versa (ingress). There are three main approaches of how to configure routing for a microVM. 1. **NAT-based**, which is presented in the main part of this guide. It is simple but doesn't expose your microVM to the local network (LAN). 1. **Bridge-based**, which exposes your microVM to the local network. Learn more about in the _Advanced: Bridge-based routing_ section of this guide. 1. **Namespaced NAT**, which sacrifices performance in comparison to the other approaches but is desired in the scenario when two clones of the same microVM are running at the same time. To learn more about it, check out the [Network Connectivity for Clones](./snapshotting/network-for-clones.md) guide. To run multiple microVMs while using NAT-based routing, check out the _Advanced: Multiple guests_ section. The same principles can be applied to other routing methods with a bit more effort. For the choice of firewall, `nft` is recommended for use on production Linux systems, but, for the sake of compatibility, this guide provides a choice between either `nft` or the `iptables-nft` translation layer. The latter is [no longer recommended](https://access.redhat.com/solutions/6739041) but may be more familiar to readers. ## On the Host The first step on the host for any microVM is to create a Linux `tap` device, which Firecracker will use for networking. For this setup, only two IP addresses will be necessary - one for the `tap` device and one for the guest itself, through which you will, for example, `ssh` into the guest. So, we'll choose the smallest IPv4 subnet needed for 2 addresses: `/30`. For this VM, let's use the `172.16.0.1` `tap` IP and the `172.16.0.2` guest IP. ```bash # Create the tap device. sudo ip tuntap add tap0 mode tap # Assign it the tap IP and start up the device. sudo ip addr add 172.16.0.1/30 dev tap0 sudo ip link set tap0 up ``` > [!NOTE] > > The IP of the TAP device should be chosen such that it's not in the same > subnet as the IP address of the host. We'll need to enable IPv4 forwarding on the system. ```bash echo 1 | sudo tee /proc/sys/net/ipv4/ip_forward ``` ### Configuration via `nft` We'll need an nftables table for our routing needs, and 2 chains inside that table: one for NAT on `postrouting` stage, and another one for filtering on `forward` stage: ```bash sudo nft add table firecracker sudo nft 'add chain firecracker postrouting { type nat hook postrouting priority srcnat; policy accept; }' sudo nft 'add chain firecracker filter { type filter hook forward priority filter; policy accept; }' ``` The first rule we'll need will masquerade packets from the guest IP as if they came from the host's IP, by changing the source IP address of these packets: ```bash sudo nft add rule firecracker postrouting ip saddr 172.16.0.2 oifname eth0 counter masquerade ``` The second rule we'll need will accept packets from the tap IP (the guest will use the tap IP as its gateway and will therefore route its own packets through the tap IP) and direct them to the host network interface: ```bash sudo nft add rule firecracker filter iifname tap0 oifname eth0 accept ``` ### Configuration via `iptables-nft` Tables and chains are managed by `iptables-nft` automatically, but we'll need three rules to perform the NAT steps: ```bash sudo iptables-nft -t nat -A POSTROUTING -o eth0 -s 172.16.0.2 -j MASQUERADE sudo iptables-nft -A FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT sudo iptables-nft -A FORWARD -i tap0 -o eth0 -j ACCEPT ``` ## Setting Up Firecracker > [!NOTE] > > If you use the rootfs from the [getting started guide](getting-started.md), > you need to use a specific `MAC` address like `06:00:AC:10:00:02`. In this > `MAC` address, the last 4 bytes (`AC:10:00:02`) will represent the IP address > of the guest. In the default case, it is `172.16.0.2`. Otherwise, you can skip > the `guest_mac` field for network configuration. This way, the guest will > generate a random MAC address on startup. > [!NOTE] > > The `iface_id` used during VM configuration is internal to Firecracker and > only used for management purposes. The name of the network interface in the > guest is determined by the guest itself. In this example we assume the guest > will name the network interface `eth0`. > [!NOTE] > > Firecracker cannot guarantee that the network interfaces in the guest will be > initialized in the guest in the same order as API calls used to set them up. > At the same time most kernels/distributions do initialize devices in the API > defined order. Before starting the guest, configure the network interface using Firecracker's API: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/network-interfaces/my_network0' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "iface_id": "my_network0", "guest_mac": "06:00:AC:10:00:02", "host_dev_name": "tap0" }' ``` If you are using a configuration file instead of the API, add a section to your configuration file like this: ```json "network-interfaces": [ { "iface_id": "my_network0", "guest_mac": "06:00:AC:10:00:02", "host_dev_name": "tap0" } ], ``` Alternatively, if you are using firectl, add `--tap-device=tap0/06:00:AC:10:00:02\` to your command line. ## In The Guest You'll now need to assign the guest its IP, activate the guest's networking interface and set up the `tap` IP as the guest's gateway address, so that packets are routed through the `tap` device, where they are then picked up by the setup on the host prepared before: ```bash ip addr add 172.16.0.2/30 dev eth0 ip link set eth0 up ip route add default via 172.16.0.1 dev eth0 ``` Now your guest should be able to route traffic to the internet (assuming that your host can get to the internet). To do anything useful, you probably want to resolve DNS names. In production, you'd want to use the right DNS server for your environment. For testing, you can add a public DNS server to `/etc/resolv.conf` by adding a line like this: ```console nameserver 8.8.8.8 ``` > [!NOTE] > > Sometimes, it's undesirable to have `iproute2` (providing the `ip` command) > installed on your guest OS, or you simply want to have these steps be > performed automatically. To do this, check out the > [Advanced: Guest network configuration using kernel command line](#advanced-guest-network-configuration-using-kernel-command-line) > section. ## Cleaning up The first step to cleaning up is to delete the tap device on the host: ```bash sudo ip link del tap0 ``` ### Cleanup using `nft` You'll want to delete the two nftables rules for NAT routing from the `postrouting` and `filter` chains. To do this with nftables, you'll need to look up the _handles_ (identifiers) of these rules by running: ```bash sudo nft -a list ruleset ``` Now, find the `# handle` comments relating to the two rules and delete them. For example, if the handle to the masquerade rule is 1 and the one to the forwarding rule is 2: ```bash sudo nft delete rule firecracker postrouting handle 1 sudo nft delete rule firecracker filter handle 2 ``` Run the following steps only **if you have no more guests** running on the host: Set IPv4 forwarding back to disabled: ```bash echo 0 | sudo tee /proc/sys/net/ipv4/ip_forward ``` If you're using `nft`, delete the `firecracker` table to revert your nftables configuration fully back to its initial state: ```bash sudo nft delete table firecracker ``` ### Cleanup using `iptables-nft` Of the configured `iptables-nft` rules, two should be deleted if you have guests remaining in your configuration: ```bash sudo iptables-nft -t nat -D POSTROUTING -o eth0 -s 172.16.0.2 -j MASQUERADE sudo iptables-nft -D FORWARD -i tap0 -o eth0 -j ACCEPT ``` **If you have no more guests** running on the host, then similarly set IPv4 forwarding back to disabled: ```bash echo 0 | sudo tee /proc/sys/net/ipv4/ip_forward ``` And delete the remaining `conntrack` rule that applies to all guests: ```bash sudo iptables-nft -D FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT ``` If nothing else is using `iptables-nft` on the system, you may even want to delete the entire system ruleset like so: ```bash sudo iptables-nft -F sudo iptables-nft -t nat -F ``` ## Advanced: Multiple guests To configure multiple guests, we will only need to repeat some of the steps in this setup for each of the microVMs: 1. Each microVM has its own subnet and the two IP addresses inside of it: the `tap` IP and the guest IP. 1. Each microVM has its own two nftables rules for masquerading and forwarding, while the same table and two chains can be shared between the microVMs. 1. Each microVM has its own routing configuration inside the guest itself (achieved through `iproute2` or the method described in the _Advanced: Guest network configuration at kernel level_ section). To give a more concrete example, **let's add a second microVM** to the one you've already configured: Let's assume we allocate /30 subnets in the 172.16.0.0/16 range sequentially to give out as few addresses as needed. The next /30 subnet in the 172.16.0.0/16 range will give us these two IPs: 172.16.0.5 as the `tap` IP and 172.16.0.6 as the guest IP. Our new `tap` device will, sequentially, have the name `tap1`: ```bash sudo ip tuntap add tap1 mode tap sudo ip addr add 172.16.0.5/30 dev tap1 sudo ip link set tap1 up ``` Now, let's add the new two `nft` rules, also with the new values: ```bash sudo nft add rule firecracker postrouting ip saddr 172.16.0.6 oifname eth0 counter masquerade sudo nft add rule firecracker filter iifname tap1 oifname eth0 accept ``` If using `iptables-nft`, add the rules like so: ```bash sudo iptables-nft -t nat -A POSTROUTING -o eth0 -s 172.16.0.6 -j MASQUERADE sudo iptables-nft -A FORWARD -i tap1 -o eth0 -j ACCEPT ``` Modify your Firecracker configuration with the `host_dev_name` now being `tap1` instead of `tap0`, boot up the guest and perform the routing inside of it like so, changing the guest IP and `tap` IP: ```bash ip addr add 172.16.0.6/30 dev eth0 ip link set eth0 up ip route add default via 172.16.0.5 dev eth0 ``` Or, you can use the setup from [Advanced: Guest network configuration](#advanced-guest-network-configuration-using-kernel-command-line) by simply changing the G and T variables, i.e. the guest IP and `tap` IP. > [!NOTE] > > If you'd like to calculate the guest and `tap` IPs using the sequential subnet > allocation method that has been used here, you can use the following formulas > specific to IPv4 addresses: > > `tap` IP = `172.16.[(A*O+1)/256].[(A*O+1)%256]`. > > Guest IP = `172.16.[(A*O+2)/256].[(A*O+2)%256]`. > > Round down the division and replace `A` with the amount of IP addresses inside > your subnet (for a /30 subnet, that will be 4 addresses, for example) and > replace `O` with the sequential number of your microVM, starting at 0. You can > replace `172.16` with any other values that fit between between 1 and 255 as > usual with an IPv4 address. > > For example, let's calculate the addresses of the 1000-th microVM with a /30 > subnet in the `172.16.0.0/16` range: > > `tap` IP = `172.16.[(4*999+1)/256].[(4*999+1)%256]` = `172.16.15.157`. > > Guest IP = `172.16.[(4*999+2)/256].[(4*999+2)%256]` = `172.16.15.158`. > > This allocation setup has been used successfully in the `firecracker-demo` > project for launching several thousand microVMs on the same host: > [relevant lines](https://github.com/firecracker-microvm/firecracker-demo/blob/63717c6e7fbd277bdec8e26a5533d53544a760bb/start-firecracker.sh#L45). ## Advanced: Bridge-based routing ### On The Host 1. Create a bridge interface: ```bash sudo ip link add name br0 type bridge ``` 1. Add the `tap` device [created above](#on-the-host) to the bridge: ```bash sudo ip link set dev tap0 master br0 ``` 1. Define an IP address in your network for the bridge: For example, if your gateway were on `192.168.1.1` and you wanted to use this for getting dynamic IPs, you would want to give the bridge an unused IP address in the `192.168.1.0/24` subnet. ```bash sudo ip address add 192.168.1.7/24 dev br0 ``` 1. Add a firewall rule to allow traffic to be routed to the guest: ```bash sudo iptables -t nat -A POSTROUTING -o br0 -j MASQUERADE ``` 1. Once you're cleaning up the configuration, make sure to delete the bridge: ```bash sudo ip link del br0 ``` ### On The Guest 1. Define an unused IP address in the bridge's subnet e.g., `192.168.1.169/24`. **Note**: Alternatively, you could rely on DHCP for getting a dynamic IP address from your gateway. ```bash ip addr add 192.168.1.169/24 dev eth0 ``` 1. Enable the network interface: ```bash ip link set eth0 up ``` 1. Create a route to the bridge device ```bash ip r add 192.168.1.1 via 192.168.1.7 dev eth0 ``` 1. Create a route to the internet via the bridge ```bash ip r add default via 192.168.1.7 dev eth0 ``` When done, your route table should look similar to the following: ```bash ip r default via 192.168.1.7 dev eth0 192.168.1.0/24 dev eth0 scope link 192.168.1.1 via 192.168.1.7 dev eth0 ``` 1. Add your nameserver to `/etc/resolve.conf` ```bash # cat /etc/resolv.conf nameserver 192.168.1.1 ``` ## Advanced: Guest network configuration using kernel command line The Linux kernel supports an `ip` CLI arguments that can be passed to it when booting. Boot arguments in Firecracker are configured in the `boot_args` property of the boot source (`boot-source` object in the JSON configuration or the equivalent endpoint in the API server). The value of the `ip` CLI argument for our setup will be the of this format: `G::T:GM::GI:off`. G is the guest IP (without the subnet), T is the `tap` IP (without the subnet), GM is the "long" mask IP of the guest CIDR and GI is the name of the guest network interface. Substituting our values, we get: `ip=172.16.0.2::172.16.0.1:255.255.255.252::eth0:off`. Insert this at the end of your boot arguments for your microVM, and the guest Linux kernel will automatically perform the routing configuration done in the _In the Guest_ section without needing `iproute2` installed in the guest. As soon as you boot the guest, it will already be connected to the network (assuming you correctly performing the other steps). ================================================ FILE: docs/pmem.md ================================================ # Using the Firecracker `virtio-pmem` device ## What is a persistent memory device Persistent memory is a type of non-volatile, CPU accessible (with usual load/store instructions) memory that does not lose its content on power loss. In other words all writes to the memory persist over the power cycle. In hardware this known as NVDIMM memory (Non Volatile Double Inline Memory Module). ## What is a `virtio-pmem` device: [`virtio-pmem`](https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.html#x1-68900019) is a device which emulates a persistent memory device without requiring a physical NVDIMM device be present on the host system. `virtio-pmem` is backed by a memory mapped file on the host side and is exposed to the guest kernel as a region in the guest physical memory. This allows the guest to directly access the host memory pages without a need to use guest driver or interact with VMM. From guest user-space perspective `virtio-pmem` devices are presented as normal block device like `/dev/pmem0`. This allows `virtio-pmem` to be used as rootfs device and make VM boot from it. > [!NOTE] > > Since `virtio-pmem` is located fully in memory, when used as a block device > there is no need to use guest page cache for its operations. This behaviour > can be configured by using `DAX` feature of the kernel. > > - To mount a device with `DAX` add `--options=dax` to the `mount` command. > - To configure a root device with `DAX` append `rootflags=dax` to the kernel > arguments. > > `DAX` support is not uniform for all file systems. Check the kernel > [documentation](https://github.com/torvalds/linux/blob/master/Documentation/filesystems/dax.rst) > for more information. ## Prerequisites In order to use `virtio-pmem` device, guest kernel needs to built with support for it. The full list of configuration options needed for `virtio-pmem` and `DAX`: ``` # Needed for DAX on aarch64. Will be ignored on x86_64 CONFIG_ARM64_PMEM=y CONFIG_DEVICE_MIGRATION=y CONFIG_ZONE_DEVICE=y CONFIG_VIRTIO_PMEM=y CONFIG_LIBNVDIMM=y CONFIG_BLK_DEV_PMEM=y CONFIG_ND_CLAIM=y CONFIG_ND_BTT=y CONFIG_BTT=y CONFIG_ND_PFN=y CONFIG_NVDIMM_PFN=y CONFIG_NVDIMM_DAX=y CONFIG_OF_PMEM=y CONFIG_NVDIMM_KEYS=y CONFIG_DAX=y CONFIG_DEV_DAX=y CONFIG_DEV_DAX_PMEM=y CONFIG_DEV_DAX_KMEM=y CONFIG_FS_DAX=y CONFIG_FS_DAX_PMD=y ``` ## Configuration Firecracker implementation exposes these config options for the `virtio-pmem` device: - `id` - id of the device for internal use - `path_on_host` - path to the backing file - `root_device` - toggle to use this device as root device. Device will be marked as `rw` in the kernel arguments - `read_only` - tells Firecracker to `mmap` the backing file in read-only mode. If this device is also configured as `root_device`, it will be marked as `ro` in the kernel arguments > [!NOTE] > > Devices will be exposed to the guest in the order in which they are configured > with sequential names in the form of `/dev/pmem{N}` like: `/dev/pmem0`, > `/dev/pmem1` ... > [!WARNING] > > Setting `virtio-pmem` device to `read-only` mode can lead to VM shutting down > on any attempt to write to the device. This is because from guest kernel > perspective `virtio-pmem` is always `read-write` capable. Use `read-only` mode > only if you want to ensure the underlying file is never written to. > > To mount the `pmem` device with `read-only` options add `-o ro` to the `mount` > command. > > The exact behaviour differs per platform: > > - x86_64 - if KVM is able to decode the write instruction used by the guest, > it will return a MMIO_WRITE to the Firecracker where it will be discarded > and the warning log will be printed. > - aarch64 - the instruction emulation is much stricter. Writes will result in > an internal KVM error which will be returned to Firecracker in a form of an > `ENOSYS` error. This will make Firecracker stop the VM with appropriate log > message. > [!WARNING] > > `virtio-pmem` requires for the guest exposed memory region to be 2MB aligned. > This requirement is transitively carried to the backing file of the > `virtio-pmem`. Firecracker allows users to configure `virtio-pmem` with > backing file of any size and fills the memory gap between the end of the file > and the 2MB boundary with empty `PRIVATE | ANONYMOUS` memory pages. Users must > be careful to not write to this memory gap since it will not be synchronized > with backing file. This is not an issue if `virtio-pmem` is configured in > `read-only` mode. ### Config file Configuration of the `virtio-pmem` device from config file follows similar pattern to `virtio-block` section. Here is an example configuration for a single `virtio-pmem` device: ```json "pmem": [ { "id": "pmem0", "path_on_host": "./some_file", "root_device": true, "read_only": false } ] ``` ### API Similar to other devices `virtio-pmem` can be configured with API calls. An example of configuration request: ```console curl --unix-socket $socket_location -i \ -X PUT 'http://localhost/pmem/pmem0' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d "{ \"id\": \"pmem0\", \"path_on_host\": \"./some_file\", \"root_device\": true, \"read_only\": false }" ``` ## Security It is not recommended to use the same backing file for `virtio-pmem` across different VMs, as this causes the same physical pages to be mapped to different VMs, which could be exploited as a side channel by an attacker inside the microVM. Users that want to use `virtio-pmem` to share memory are encouraged to carefully evaluate the security risk according to their threat model. ## Snapshot support `virtio-pmem` works with snapshot functionality of Firecracker. Snapshot will contain the configuration options provided by the user. During restoration process, Firecracker will attempt to restore `virtio-pmem` device by opening same backing file as it was configured in the first place. This means all `virtio-pmem` backing files should be present in the same locations during restore as they were during initial `virtio-pmem` configuration. ## Performance Even though `virtio-pmem` allows for the direct access of host pages from the guest, the performance of the first access of each page will suffer from the internal KVM page fault which will have to set up Guest physical address to Host Virtual address translation. Consecutive accesses will not need to go through this process again. Since the number of page faults correlate to the size of the pages used to back `virtio-pmem` memory, it is possible to use huge pages to reduce number of required page fault. This can be done by using [`tmpfs`](https://www.kernel.org/doc/html/latest/filesystems/tmpfs.html) with transparent huge pages enabled or by using [`hugetblfs`](https://www.kernel.org/doc/html/latest/admin-guide/mm/hugetlbpage.html) if `virtio-pmem` is used for memory sharing. ## Memory usage > [!NOTE] `virtio-pmem` memory can be paged out by the host, because it is > backed by a file with `MAP_SHARED` mapping type. To prevent this from > happening, you can use `vmtouch` or similar tool to lock file pages from being > evicted. `virtio-pmem` resides in host memory and does increase the maximum possible memory usage of a VM since now VM can use all of its RAM and access all of the `virtio-pmem` memory. In order to minimize the overhead, it is highly recommended to use `DAX` mode to avoid unnecessary duplication of data in guest page cache. As an example, a single VM with 128MB of memory booted from `virtio-pmem` device without `DAX` has `RSS` value of ~120MB, while with `DAX` it is ~96MB. The ~96MB is similar to memory usage of a VM booted using `virtio-block` as a root device. In the case where multiple VMs have `virtio-pmem` devices that point to the same underlying file the memory overhead can be amortized since total maximum memory usage will only include a single instance of `virtio-pmem` memory. As an example 2 VMs configured with 128MB of RAM without `virtio-pmem` devices can consume maximum of 128 + 128 = 256MB of host memory. If each of VMs will have a 100MB `virtio-pmem` device attached with shared backing file, the maximum memory consumption will be 128 + 128 + 100 = 356MB because 100MB of `virtio-pmem` will be shared between VMs. ================================================ FILE: docs/prod-host-setup.md ================================================ # Production Host Setup Recommendations Firecracker relies on KVM and on the processor virtualization features for workload isolation. The host and guest kernels and host microcode must be regularly patched in accordance with your distribution's security advisories such as [ALAS](https://alas.aws.amazon.com/alas2023.html) for Amazon Linux. Security guarantees and defense in depth can only be upheld, if the following list of recommendations are implemented in production. ## Firecracker Configuration ### Seccomp Firecracker uses [seccomp](https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt) filters to limit the system calls allowed by the host OS to the required minimum. By default, Firecracker uses the most restrictive filters, which is the recommended option for production usage. Production usage of the `--seccomp-filter` or `--no-seccomp` parameters is not recommended. ### 8250 Serial Device Firecracker implements the 8250 serial device, which is visible from the guest side and is tied to the Firecracker/non-daemonized jailer process stdout. Without proper handling, because the guest has access to the serial device, this can lead to unbound memory or storage usage on the host side. Firecracker does not offer users the option to limit serial data transfer, nor does it impose any restrictions on stdout handling. Users are responsible for handling the memory and storage usage of the Firecracker process stdout. We suggest using any upper-bounded forms of storage, such as fixed-size or ring buffers, using programs like `journald` or `logrotate`, or redirecting to `/dev/null` or a named pipe. Furthermore, we do not recommend that users enable the serial device in production. To disable it in the guest kernel, use the `8250.nr_uarts=0` boot argument when configuring the boot source. Please be aware that the device can be reactivated from within the guest even if it was disabled at boot. If Firecracker's `stdout` buffer is non-blocking and full (assuming it has a bounded size), any subsequent writes will fail, resulting in data loss, until the buffer is freed. ### Log files Firecracker outputs logging data into a named pipe, socket, or file using the path specified in the `log_path` field of logger configuration. Firecracker can generate log data as a result of guest operations and therefore the guest can influence the volume of data written in the logs. Users are responsible for consuming and storing this data safely. We suggest using any upper-bounded forms of storage, such as fixed-size or ring buffers, programs like `journald` or `logrotate`, or redirecting to a named pipe. ### Logging and performance We recommend adding `quiet loglevel=1` to the host kernel command line to limit the number of messages written to the serial console. This is because some host configurations can have an effect on Firecracker's performance as the process will generate host kernel logs during normal operations. The most recent example of this was the addition of `console=ttyAMA0` host kernel command line argument on one of our testing setups. This enabled console logging, which degraded the snapshot restore time from 3ms to 8.5ms on `aarch64`. In this case, creating the tap device for snapshot restore generated host kernel logs, which were very slow to write. ### Logging and signal handlers Firecracker installs custom signal handlers for some of the POSIX signals, such as SIGSEGV, SIGSYS, etc. The custom signal handlers used by Firecracker are not async-signal-safe, since they write logs and flush the metrics, which use locks for synchronization. While very unlikely, it is possible that the handler will intercept a signal on a thread which is already holding a lock to the log or metrics buffer. This can result in a deadlock, where the specific Firecracker thread becomes unresponsive. While there is no security impact caused by the deadlock, we recommend that customers have an overwatcher process on the host, that periodically looks for Firecracker processes that are unresponsive, and kills them, by SIGKILL. ## Jailer Configuration For assuring secure isolation in production deployments, Firecracker should be started using the `jailer` binary that's part of each Firecracker release, or executed under process constraints equal or more restrictive than those in the jailer. For more about Firecracker sandboxing please see [Firecracker design](design.md) The Jailer process applies [cgroup](https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt), namespace isolation and drops privileges of the Firecracker process. To set up the jailer correctly, you'll need to: - Create a dedicated non-privileged POSIX user and group to run Firecracker under. Use the created POSIX user and group IDs in Jailer's `--uid ` and `--gid ` flags, respectively. This will run the Firecracker as the created non-privileged user and group. All file system resources used for Firecracker should be owned by this user and group. Apply least privilege to the resource files owned by this user and group to prevent other accounts from unauthorized file access. When running multiple Firecracker instances it is recommended that each runs with its unique `uid` and `gid` to provide an extra layer of security for their individually owned resources in the unlikely case where any one of the jails is broken out of. Firecracker's customers are strongly advised to use the provided `resource-limits` and `cgroup` functionalities encapsulated within jailer, in order to control Firecracker's resource consumption in a way that makes the most sense to their specific workload. While aiming to provide as much control as possible, we cannot enforce aggressive default constraints resources such as memory or CPU because these are highly dependent on the workload type and usecase. Here are some recommendations on how to limit the process's resources: ### Disk - `cgroup` provides a [Block IO Controller](https://www.kernel.org/doc/Documentation/cgroup-v1/blkio-controller.txt) which allows users to control I/O operations through the following files: - `blkio.throttle.io_serviced` - bounds the number of I/Os issued to disk - `blkio.throttle.io_service_bytes` - sets a limit on the number of bytes transferred to/from the disk - Jailer's `resource-limit` provides control on the disk usage through: - `fsize` - limits the size in bytes for files created by the process - `no-file` - specifies a value greater than the maximum file descriptor number that can be opened by the process. If not specified, it defaults to 4096\. ### Memory - `cgroup` provides a [Memory Resource Controller](https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt) to allow setting upper limits to memory usage: - `memory.limit_in_bytes` - bounds the memory usage - `memory.memsw.limit_in_bytes` - limits the memory+swap usage - `memory.soft_limit_in_bytes` - enables flexible sharing of memory. Under normal circumstances, control groups are allowed to use as much of the memory as needed, constrained only by their hard limits set with the `memory.limit_in_bytes` parameter. However, when the system detects memory contention or low memory, control groups are forced to restrict their consumption to their soft limits. ### vCPU - `cgroup`’s [CPU Controller](https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt) can guarantee a minimum number of CPU shares when a system is busy and provides CPU bandwidth control through: - `cpu.shares` - limits the amount of CPU that each group it is expected to get. The percentage of CPU assigned is the value of shares divided by the sum of all shares in all `cgroups` in the same level - `cpu.cfs_period_us` - bounds the duration in us of each scheduler period, for bandwidth decisions. This defaults to 100ms - `cpu.cfs_quota_us` - sets the maximum time in microseconds during each `cfs_period_us` for which the current group will be allowed to run - `cpuacct.usage_percpu` - limits the CPU time, in ns, consumed by the process in the group, separated by CPU Additional details of Jailer features can be found in the [Jailer documentation](jailer.md). ## Host Security Configuration ### Constrain CPU overhead caused by kvm-pit kernel threads The current implementation results in host CPU usage increase on x86 CPUs when a guest injects timer interrupts with the help of kvm-pit kernel thread. kvm-pit kthread is by default part of the root cgroup. To mitigate the CPU overhead we recommend two system level configurations. 1. Use an external agent to move the `kvm-pit/` kernel thread in the microVM’s cgroup (e.g., created by the Jailer). This cannot be done by Firecracker since the thread is created by the Linux kernel after guest start, at which point Firecracker is de-privileged. 1. Configure the kvm limit to a lower value. This is a system-wide configuration available to users without Firecracker or Jailer changes. However, the same limit applies to APIC timer events, and users will need to test their workloads in order to apply this mitigation. To modify the kvm limit for interrupts that can be injected in a second. 1. `sudo modprobe -r (kvm_intel|kvm_amd) kvm` 1. `sudo modprobe kvm min_timer_period_us={new_value}` 1. `sudo modprobe (kvm_intel|kvm_amd)` To have this change persistent across boots we can append the option to `/etc/modprobe.d/kvm.conf`: `echo "options kvm min_timer_period_us=" >> /etc/modprobe.d/kvm.conf` ### Mitigating Network flooding issues Network can be flooded by creating connections and sending/receiving a significant amount of requests. This issue can be mitigated either by configuring rate limiters for the network interface as explained within [Network Interface documentation](api_requests/patch-network-interface.md), or by using one of the tools presented below: - `tc qdisc` - manipulate traffic control settings by configuring filters. When traffic enters a classful qdisc, the filters are consulted and the packet is enqueued into one of the classes within. Besides containing other qdiscs, most classful qdiscs perform rate control. - `netnamespace` and `iptables` - `--pid-owner` - can be used to match packets based on the PID that was responsible for them - `connlimit` - restricts the number of connections for a destination IP address/from a source IP address, as well as limit the bandwidth ### Mitigating Noisy-Neighbour Storage Device Contention Data written to storage devices is managed in Linux with a page cache. Updates to these pages are written through to their mapped storage devices asynchronously at the host operating system's discretion. As a result, high storage output can result in this cache being filled quickly resulting in a backlog which can slow down I/O of other guests on the host. To protect the resource access of the guests, make sure to tune each Firecracker process via the following tools: - [Jailer](jailer.md): A wrapper environment designed to contain Firecracker and strictly control what the process and its guest has access to. Take note of the [jailer operations guide](jailer.md#jailer-operation), paying particular note to the `--resource-limit` parameter. - Rate limiting: Rate limiting functionality is supported for both networking and storage devices and is configured by the operator of the environment that launches the Firecracker process and its associated guest. See the [block device documentation](api_requests/patch-block.md) for examples of calling the API to configure rate limiting. ### Disabling swapping to disk or enabling secure swap Memory pressure on a host can cause memory to be written to drive storage when swapping is enabled. Disabling swap mitigates data remanence issues related to having guest memory contents on microVM storage devices. Verify that swap is disabled by running: ```bash grep -q "/dev" /proc/swaps && \ echo "swap partitions present (Recommendation: no swap)" \ || echo "no swap partitions (OK)" ``` ### Mitigating hardware vulnerabilities > [!CAUTION] > > Firecracker is not able to mitigate host's hardware vulnerabilities. Adequate > mitigations need to be put in place when configuring the host. > [!CAUTION] > > Firecracker is designed to provide isolation boundaries between microVMs > running in different Firecracker processes. It is strongly recommended that > each Firecracker process corresponds to a workload of a single tenant. > [!CAUTION] > > For security and stability reasons it is highly recommended to load updated > microcode as soon as possible. Aside from keeping the system firmware > up-to-date, when the kernel is used to load updated microcode of the CPU this > should be done as early as possible in the boot process. #### Side channel attacks For the purposes of this document we assume a workload that involves arbitrary code execution in a multi-tenant context where each Firecracker process corresponds to a single tenant. Specific mitigations for side channel issues are constantly evolving as researchers find additional issues on a regular basis. Firecracker itself has no control over many lower-level software and hardware behaviors and capabilities and is not able to mitigate all these issues. Thus, it is strongly recommended that users follow the very latest [Linux kernel documentation on hardware vulnerabilities](https://docs.kernel.org/admin-guide/hw-vuln/index.html) as well as hardware/processor-specific recommendations and firmware updates (see [vendor-specific recommendations](#vendor-specific-recommendations) below) when configuring mitigations against side channel attacks including "Spectre" and "Meltdown" attacks. However, some generic recommendations are also provided in what follows. ##### Disable SMT Simultaneous Multi-Threading (SMT) is frequently a precondition for speculation issues utilized in side channel attacks such as Spectre variants, MDS, and others, where one tenant could leak information to another tenant or the host. As such, our recommendation is to disable SMT in production scenarios that require tenant separation. ##### Disable Kernel Samepage Merging Users should disable [Kernel Samepage Merging](https://www.kernel.org/doc/html/latest/admin-guide/mm/ksm.html) to mitigate [side channel issues](https://eprint.iacr.org/2013/448.pdf) that rely on page deduplication for revealing what memory pages are accessed by another process. ##### Use memory with Rowhammer mitigation support Rowhammer is a memory side-channel issue that can lead to unauthorized cross- process memory changes. Using DDR4 memory that supports Target Row Refresh (TRR) with error-correcting code (ECC) is recommended. Use of pseudo target row refresh (pTRR) for systems with pTRR-compliant DDR3 memory can help mitigate the issue, but it also incurs a performance penalty. ##### Vendor-specific recommendations For vendor-specific recommendations, please consult the resources below: - Intel: [Software Security Guidance](https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/overview.html) - AMD: [AMD Product Security](https://www.amd.com/en/resources/product-security.html) - ARM: [Speculative Processor Vulnerability](https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability) ##### [ARM only] VM Physical counter behaviour On ARM, Firecracker tries to reset the `CNTPCT` physical counter on VM boot. This is done in order to prevent VM from reading host physical counter value. Firecracker will only try to reset the counter if the host KVM contains `KVM_CAP_COUNTER_OFFSET` capability. This capability is only present in kernels containing [this](https://lore.kernel.org/all/20230330174800.2677007-1-maz@kernel.org/) patch series (starting from 6.4 and newer). For older kernels the counter value will be passed through from the host. ##### Verification [spectre-meltdown-checker script](https://github.com/speed47/spectre-meltdown-checker) can be used to assess host's resilience against several transient execution CVEs and receive guidance on how to mitigate them. The script is used in integration tests by the Firecracker team. It can be downloaded and executed like: ```bash # Read https://meltdown.ovh before running it. wget -O - https://meltdown.ovh | bash ``` ### Linux 6.1 boot time regressions Linux 6.1 introduced some regressions in the time it takes to boot a VM, for the x86_64 architecture. They can be mitigated depending on the CPU and the version of cgroups in use. #### Explanation The regression happens in the `KVM_CREATE_VM` ioctl and there are two factors that cause the issue: 1. In the implementation of the mitigation for the iTLB multihit vulnerability, KVM creates a worker thread called `kvm-nx-lpage-recovery`. This thread is responsible for recovering huge pages split when the mitigation kicks-in. In the process of creating this thread, KVM calls `cgroup_attach_task_all()` to move it to the same cgroup used by the hypervisor thread 1. In kernel v4.4, upstream converted a cgroup per process read-write semaphore into a per-cpu read-write semaphore to allow to perform operations across multiple processes ([commit](https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?&id=1ed1328792ff46e4bb86a3d7f7be2971f4549f6c)). It was found that this conversion introduced high latency for write paths, which mainly includes moving tasks between cgroups. This was fixed in kernel v4.9 by [commit](https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?&id=3942a9bd7b5842a924e99ee6ec1350b8006c94ec) which chose to favor writers over readers since moving tasks between cgroups is a common operation for Android. However, In kernel 6.0, upstream decided to revert back again and favor readers over writers re-introducing the original behavior of the rw semaphore ([commit](https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?&id=6a010a49b63ac8465851a79185d8deff966f8e1a)). At the same time, this commit provided an option called favordynmods to favor writers over readers. 1. Since the `kvm-nx-lpage-recovery` thread creation and its cgroup change is done in the `KVM_CREATE_VM` call, the high latency we observe in 6.1 is due to the upstream decision to favor readers over writers for this per-cpu rw semaphore. While the 4.14 and 5.10 kernels favor writers over readers. The first step is to check if the host is vulnerable to iTLB multihit. Look at the value of `cat /sys/devices/system/cpu/vulnerabilities/itlb_multihit`. If it does says `Not affected`, the host is not vulnerable and you can apply mitigation 2, and optionally 1 for best results. Otherwise it is vulnerable and you can only apply mitigation 1. #### Mitigation 1: `favordynmods` The mitigation in this case is to enable `favordynmods` in cgroupsv1 or cgroupsv2. This changes the behavior of all cgroups in the host, and makes it closer to the performance of Linux 5.10 and 4.14. For cgroupsv2, run this command: ```sh sudo mount -o remount,favordynmods /sys/fs/cgroup ``` For cgroupsv1, remounting with `favordynmods` is not supported, so it has to be done at boot time, through a kernel command line option. Add `cgroup_favordynmods=true` to your kernel command line in GRUB. Refer to your distribution's documentation for where to make this change[^1] #### Mitigation 2: `kvm.nx_huge_pages=never` This mitigation is preferred to the previous one as it is less invasive (it doesn't affect other cgroups), but it can also be combined with the cgroups mitigation. ```sh KVM_VENDOR_MOD=$(lsmod |grep -P "^kvm_(amd|intel)" | awk '{print $1}') sudo modprobe -r $KVM_VENDOR_MOD kvm sudo modprobe kvm nx_huge_pages=never sudo modprobe $KVM_VENDOR_MOD ``` To validate that the change took effect, the file `/sys/module/kvm/parameters/nx_huge_pages` should say `never`. [^1]: Look for `GRUB_CMDLINE_LINUX` in file `/etc/default/grub` in RPM-based systems, and [this doc for Ubuntu](https://wiki.ubuntu.com/Kernel/KernelBootParameters). ================================================ FILE: docs/pvh.md ================================================ # PVH boot mode Firecracker supports booting x86 kernels in "PVH direct boot" mode [as specified by the Xen project](https://github.com/xen-project/xen/blob/master/docs/misc/pvh.pandoc). If a kernel is provided which contains the XEN_ELFNOTE_PHYS32_ENTRY ELF Note then this boot mode will be used. This boot mode was designed for virtualized environments which load the kernel directly, and is simpler than the "Linux boot" mode which is designed to be launched from a legacy boot loader. PVH boot mode can be enabled for Linux by setting `CONFIG_PVH=y` in the kernel configuration. (This is not the default setting.) PVH boot mode is enabled by default in FreeBSD, which has support for Firecracker starting with FreeBSD 14.0. Instructions on building a FreeBSD kernel and root filesystem are available [here](rootfs-and-kernel-setup.md). ================================================ FILE: docs/rootfs-and-kernel-setup.md ================================================ # Creating Custom rootfs and kernel Images ## Creating a Linux kernel Image ### Manual compilation Currently, Firecracker supports uncompressed ELF kernel images on x86_64 while on aarch64 it supports PE formatted images. Here's a quick step-by-step guide to building your own kernel that Firecracker can boot: 1. Get the Linux source code: ```bash git clone https://github.com/torvalds/linux.git linux.git cd linux.git ``` 1. Check out the Linux version you want to build (e.g. we'll be using v4.20 here): ```bash git checkout v4.20 ``` 1. You will need to configure your Linux build. You can start from our recommended [guest kernel configurations](../resources/guest_configs/) by copying the relevant one to `.config` (under the Linux sources dir). You can make interactive config adjustments using: ```bash make menuconfig ``` > [!NOTE] > > There are many ways of building a kernel config file, other than `menuconfig`. > You are free to use whichever one you choose. 1. Build the kernel image: ```bash arch=$(uname -m) if [ "$arch" = "x86_64" ]; then make vmlinux elif [ "$arch" = "aarch64" ]; then make Image fi ``` 1. Upon a successful build, you can find the kernel image under `./vmlinux` (for x86) or `./arch/arm64/boot/Image` (for aarch64). For a list of currently supported kernel versions, check out the [kernel support policy](kernel-policy.md). ### Use the provided recipe The kernel images used in our CI to test Firecracker's features are obtained by running the script `resources/rebuild.sh`. Users can build those locally by running: ```bash ./tools/devtool build_ci_artifacts kernels ``` This will build all versions that we currently use in our CI. `kernels` subcommand allows passing a specific kernel version to build. For example: ```bash ./tools/devtool build_ci_artifacts kernels 6.1 ``` will build only the 6.1 kernel. Currently supported kernel versions are: `5.10`, `5.10-no-acpi` (same as 5.10 but without ACPI support) and `6.1`. After the command finishes, the kernels along with the corresponding KConfig used will be stored under `resources/$(uname -m)`. ## Creating a Linux rootfs Image A rootfs image is just a file system image, that hosts at least an init system. For instance, our getting started guide uses an ext4 filesystem image. Note that, whichever file system you choose to use, support for it will have to be compiled into the kernel, so it can be mounted at boot time. In order to obtain an ext4 image that you can use with Firecracker, you have the following options: ### Manual build 1. Prepare a properly-sized file. We'll use 50MiB here, but this depends on how much data you'll want to fit inside: ```bash dd if=/dev/zero of=rootfs.ext4 bs=1M count=50 ``` 1. Create an empty file system on the file you created: ```bash mkfs.ext4 rootfs.ext4 ``` You now have an empty EXT4 image in `rootfs.ext4`, so let's prepare to populate it. First, you'll need to mount this new file system, so you can easily access its contents: ```bash mkdir /tmp/my-rootfs sudo mount rootfs.ext4 /tmp/my-rootfs ``` The minimal init system would be just an ELF binary, placed at `/sbin/init`. The final step in the Linux boot process executes `/sbin/init` and expects it to never exit. More complex init systems build on top of this, providing service configuration files, startup / shutdown scripts for various services, and many other features. For the sake of simplicity, let's set up an Alpine-based rootfs, with OpenRC as an init system. To that end, we'll use the official Docker image for Alpine Linux: 1. First, let's start the Alpine container, bind-mounting the EXT4 image created earlier, to `/my-rootfs`: ```bash docker run -it --rm -v /tmp/my-rootfs:/my-rootfs alpine ``` 1. Then, inside the container, install the OpenRC init system, and some basic tools: ```bash apk add openrc apk add util-linux ``` 1. And set up userspace init (still inside the container shell): ```bash # Set up a login terminal on the serial console (ttyS0): ln -s agetty /etc/init.d/agetty.ttyS0 echo ttyS0 > /etc/securetty rc-update add agetty.ttyS0 default # Make sure special file systems are mounted on boot: rc-update add devfs boot rc-update add procfs boot rc-update add sysfs boot # Then, copy the newly configured system to the rootfs image: for d in bin etc lib root sbin usr; do tar c "/$d" | tar x -C /my-rootfs; done # The above command may trigger the following message: # tar: Removing leading "/" from member names # However, this is just a warning, so you should be able to # proceed with the setup process. for dir in dev proc run sys var; do mkdir /my-rootfs/${dir}; done # All done, exit docker shell. exit ``` 1. Finally, unmount your rootfs image: ```bash sudo umount /tmp/my-rootfs ``` ### Use the provided recipe The disk images used in our CI to test Firecracker's features are obtained by using the recipe (in a Ubuntu 22.04 host): ```bash ./tools/devtool build_ci_artifacts rootfs ``` The images resulting using this method are minimized Ubuntu 22.04. Feel free to adjust the script(s) to suit your use case. You should now have a rootfs image (`ubuntu-22.04.ext4`), that you can boot with Firecracker. ## Creating FreeBSD rootfs and kernel Images Here's a quick step-by-step guide to building a FreeBSD rootfs and kernel that Firecracker can boot: 1. Boot a FreeBSD system. In EC2, the [FreeBSD 13 Marketplace image](https://aws.amazon.com/marketplace/pp/prodview-ukzmy5dzc6nbq) is a good option; you can also use weekly snapshot AMIs published by the FreeBSD project. (Firecracker support is in FreeBSD 14 and later, so you'll need FreeBSD 13 or later to build it.) The build will require about 50 GB of disk space, so size the disk appropriately. 1. Log in to the FreeBSD system and become root. If using EC2, you'll want to ssh in as `ec2-user` with your chosen SSH key and then `su` to become root. 1. Install git and check out the FreeBSD src tree: ```sh pkg install -y git git clone https://git.freebsd.org/src.git /usr/src ``` Firecracker support is available since FreeBSD 14.0 (released November 2023). 1. Build FreeBSD: ```sh make -C /usr/src buildworld buildkernel KERNCONF=FIRECRACKER make -C /usr/src/release firecracker DESTDIR=`pwd` ``` You should now have a rootfs `freebsd-rootfs.bin` and a kernel `freebsd-kern.bin` in the current directory (or elsewhere if you change the `DESTDIR` value) that you can boot with Firecracker. Note that the FreeBSD rootfs generated in this manner is somewhat minimized compared to "stock" FreeBSD; it omits utilities which are only relevant on physical systems (e.g., utilities related to floppy disks, USB devices, and some network interfaces) and also debug files and the system compiler. ================================================ FILE: docs/seccomp.md ================================================ # Seccomp in Firecracker Seccomp filters are used by default to limit the host system calls Firecracker can use. The default filters only allow the bare minimum set of system calls and parameters that Firecracker needs in order to function correctly. The filters are loaded in the Firecracker process, on a per-thread basis, as follows: - VMM (main) - right before executing guest code on the VCPU threads; - API - right before launching the HTTP server; - VCPUs - right before executing guest code. > [!WARNING] > > On debug binaries and experimental GNU targets, there are no default seccomp > filters installed, since they are not intended for production use. Firecracker uses JSON files for expressing the filter rules and relies on the [seccompiler](seccompiler.md) tool for all the seccomp functionality. ## Default filters (recommended) At build time, the default target-specific JSON file is compiled into the serialized binary file, using seccompiler-bin, and gets embedded in the Firecracker binary. This process is performed automatically, when building the executable. To minimise the overhead of succesive builds, the compiled filter file is cached in the build folder and is only recompiled if modified. You can find the default seccomp filters under `resources/seccomp`. For a certain release, the default JSON filters used to build Firecracker are also included in the respective release archive, viewable on the [releases page](https://github.com/firecracker-microvm/firecracker/releases). ## Custom filters (advanced users only) > [!NOTE] > > This feature overrides the default filters and can be dangerous. Filter > misconfiguration can result in abruptly terminating the process or disabling > the seccomp security boundary altogether. We recommend using the default > filters instead. > [!NOTE] > > The user is fully responsible for managing the filter files. We recommend > using integrity checks whenever transferring/downloading files, for example > checksums, as well as for the Firecracker binary or other artifacts, in order > to mitigate potential man-in-the-middle attacks. Firecracker exposes a way for advanced users to override the default filters with fully customisable alternatives, leveraging the same JSON/seccompiler tooling, at startup time. Via Firecracker's optional `--seccomp-filter` parameter, one can supply the path to a custom filter file compiled with seccompiler-bin. Potential use cases: - Users of experimentally-supported targets (like GNU libc builds) may be able to use this feature to implement seccomp filters without needing to have a custom build of Firecracker. - Users of debug binaries who need to use a seccomp filter for any reason will be able to use this feature to implement seccomp filters without needing to have a custom build of Firecracker. Note: there may be some differences in syscalls between `debug` and `release` builds. A non-comprehensive list is: - `fcntl(F_GETFD)` is used by debug assertions to verify a dropped `fd` is valid. - Faced with a _theoretical_ production issue, due to a syscall that was issued by the Firecracker process, but not allowed by the seccomp policy, one may use a custom filter in order to quickly mitigate the issue. This can speed up the resolution time, by not needing to build and deploy a new Firecracker binary. However, as the note above states, this needs to be thoroughly tested and should not be a long-term solution. ## Disabling seccomp (not recommended) Firecracker also has support for a `--no-seccomp` parameter, which disables all seccomp filtering. It can be helpful when quickly prototyping changes in Firecracker that use new system calls. Do **not** use in production. ================================================ FILE: docs/seccompiler.md ================================================ # Seccompiler - overview and user guide ## Overview Seccompiler-bin is a tool that compiles seccomp filters expressed as JSON files into serialized, binary BPF code that is directly consumed by Firecracker, at build or launch time. The binary filters are serialized using bitcode format. Seccompiler-bin uses a custom [JSON file structure](#json-file-format), detailed further below, that the filters must adhere to. Besides the seccompiler-bin executable, seccompiler also exports a library interface, with helper functions for deserializing and installing the binary filters. The library uses bitcode format for serialization and deserialization. ## Usage ### Seccompiler-bin To view the seccompiler-bin command line arguments, pass the `--help` parameter to the executable. Example usage: ```bash ./seccompiler-bin --target-arch "x86_64" # The CPU arch where the BPF program will run. # Supported architectures: x86_64, aarch64. --input-file "x86_64_musl.json" # File path of the JSON input. --output-file "bpf_x86_64_musl" # Optional path of the output file. # [default: "seccomp_binary_filter.out"] --basic # Optional, creates basic filters, discarding any parameter checks. # (Deprecated). --split-output # Optional, creates individual BPF files for each thread. ``` ### Seccompiler library To view the library documentation, navigate to the seccompiler source code, in `firecracker/src/seccompiler/src` and run `cargo doc --lib --open`. ### Output format Seccompiler-bin generates binary BPF filters serialized using the bitcode format. The output file contains a bitcode-serialized map of thread names to their corresponding BPF instruction sequences. When using the `--split-output` flag, seccompiler-bin will generate individual `.bpf` files for each thread containing raw BPF bytecode (useful for testing) The individual thread files are named `.bpf` and placed in the same directory as the main output file. ## Where is seccompiler implemented? Seccompiler is implemented as another package in the Firecracker cargo workspace. The code is located at `firecracker/src/seccompiler/src`. ## Supported platforms Seccompiler-bin is supported on the [same platforms as Firecracker](../README.md#supported-platforms). ## Release policy Seccompiler-bin follows Firecracker's [release policy](RELEASE_POLICY.md) and version (it's released at the same time, with the same version number and adheres to the same support window). ## JSON file format A JSON file expresses the seccomp policy for the entire Firecracker process. It contains multiple filters, one per each thread category and is specific to just one target platform. This means that Firecracker has a JSON file for each supported target (currently determined by the arch-libc combinations). You can view them in `resources/seccomp`. At the top level, the file requires an object that maps thread categories (vmm, api and vcpu) to seccomp filters: ``` { "vmm": { "default_action": { "errno" : -1 }, "filter_action": "allow", "filter": [...] }, "api": {...}, "vcpu": {...}, } ``` The associated filter is a JSON object containing the `default_action`, `filter_action` and `filter`. The `default_action` represents the action we have to execute if none of the rules in `filter` matches, and `filter_action` is what gets executed if a rule in the filter matches (e.g: `"Allow"` in the case of implementing an allowlist). An **action** is the JSON representation of the following enum: ```rust pub enum SeccompAction { Allow, // Allows syscall. Errno(u32), // Returns from syscall with specified error number. Kill, // Kills calling process. Log, // Same as allow but logs call. Trace(u32), // Notifies tracing process of the caller with respective number. Trap, // Sends `SIGSYS` to the calling process. } ``` The `filter` property specifies the set of rules that would trigger a match. This is an array containing multiple **or-bound SyscallRule** **objects** (if one of them matches, the corresponding action gets triggered). The **SyscallRule** object is used for adding a rule to a syscall. It has an optional `args` property that is used to specify a vector of and-bound conditions that the syscall arguments must satisfy in order for the rule to match. In the absence of the `args` property, the corresponding action will get triggered by any call that matches that name, irrespective of the argument values. Here is the structure of the object: ``` { "syscall": "accept4", // mandatory, the syscall name "comment": "Used by vsock & api thread", // optional, for adding meaningful comments "args": [...] // optional, vector of and-bound conditions for the parameters } ``` Note that the file format expects syscall names, not arch-specific numbers, for increased usability. This is not true, however for the syscall arguments, which are expected as base-10 integers. In order to allow a syscall with multiple alternatives for the same parameters, you can write multiple syscall rule objects at the filter-level, each with its own rules. Note that, when passing the deprecated `--basic` flag to seccompiler-bin, all `args` fields of the `SeccompRule`s are ignored. A **condition object** is made up of the following mandatory properties: - `index` (0-based index of the syscall argument we want to check) - `type` (`dword` or `qword`, which specifies the argument size - 4 or 8 bytes respectively) - `op`, which is one of `eq, ge, gt, ge, lt, masked_eq, ne` (the operator used for comparing the parameter to `val`) - `val` is the integer value being checked against As mentioned eariler, we don’t support any named parameters, but only numeric constants in the JSON file. You may however add an optional `comment` property to each condition object. This way, you can provide meaning to each numeric value, much like when using named parameters, like so: ``` { "syscall": "accept4", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 1, "comment": "libc::AF_UNIX" } ] } ``` To see example filters, look over Firecracker's JSON filters in `resources/seccomp`. ================================================ FILE: docs/snapshotting/handling-page-faults-on-snapshot-resume.md ================================================ # Handling snapshot memory loading Firecracker allows for a better management of the microVM's memory loading by letting users choose between relying on host OS to handle the page faults when resuming from a snapshot, or having a dedicated userspace process for dealing with page faults, with the help of [Userfaultfd](https://www.kernel.org/doc/html/v4.18/admin-guide/mm/userfaultfd.html). ## Kernel When resuming a microVM from a snapshot, loading the snapshotted guest's memory (which is file-backed) into RAM is usually kernel's responsibility and is handled on a per-page-fault basis. Each time the guest touches a page that is not already in Firecracker's process memory, a page fault occurs, which triggers a context switch and IO operation in order to bring that page into RAM. Depending on the use case, doing this for every page can be time-consuming. ## Userfaultfd Userfaultfd is a mechanism that passes that responsibility of handling page fault events from kernel space to user space. In order to be able to interact with this mechanism, userspace needs to firstly obtain an userfault file descriptor object (UFFD). ### Creating a UFFD object #### Kernel 5.10 For host kernel 5.10 UFFD objects are created by calling into [`userfaultfd` syscall](https://man7.org/linux/man-pages/man2/userfaultfd.2.html). #### Kernel 6.1 For kernel 6.1, UFFD is created through the `/dev/userfaultfd` device. Access to `/dev/userfaultfd` is managed by file system permissions, so the Firecracker process needs to have proper permissions to create the UFFD object. When `/dev/userfaultfd` is present on the host system, jailer makes it available inside the jail and Firecracker process can use it without any further configuration. If a user is not using Firecracker along with the jailer, they should manage manually permissions to `/dev/userfaultfd`. For example, on systems that rely on access control lists (ACLs), this can be achieved by: ```bash sudo setfacl -m u:${USER}:rw /dev/userfaultfd ``` ### Registering memory to be handled via Userfault File Descriptors Next, the memory address range must be registered with the userfault file descriptor so that the userfault object can monitor page faults occurring for those addresses. After this, the user space process can start reading and serving events via the userfault file descriptor. These events will contain the address that triggered the fault. The fault-handling thread can choose to handle these events using these [operations](https://www.kernel.org/doc/html/latest/admin-guide/mm/userfaultfd.html#resolving-userfaults). In the flow described above, there are two userspace processes that interact with each other in order to handle page faults: Firecracker process and the page fault handler. Please note that users are responsible for writing the page fault handler process to monitor userfaultfd events and handle those events. Below is the interaction flow between Firecracker and the page fault handler (designed by the users): - Page fault handler binds and listens on a unix domain socket in order to be able to communicate with the Firecracker process. ![](../images/uffd_flow1.png) Please note that when using the Jailer, the page fault handler process, UDS and memory file must reside inside the jail. The UDS must only be accessible to Firecracker and the page fault handler. - PUT snapshot/load API call is issued towards Firecracker's API thread. The request encapsulates in its body the path to the unix domain socket that page fault handler listens to in order to communicate with Firecracker. - Firecracker process creates the userfault object and obtains the userfault file descriptor. - The page fault handler privately mmaps the contents of the guest memory file. ![](../images/uffd_flow2.png) - Firecracker anonymously mmaps memory based on the memory description found in the microVM state file and registers the memory regions with the userfault object in order for the userfaultfd to be aware of page fault events on these addresses. Firecracker then connects to the socket previously opened by the page fault process. ![](../images/uffd_flow3.png) - Firecracker passes the userfault file descriptor and the guest memory layout (e.g. dimensions of each memory region, and their [page size](../hugepages.md) in KiB) to the page fault handler process through the socket. ![](../images/uffd_flow4.png) - After sending the necessary information to the page fault handler, Firecracker continues with the normal cycle to restore from snapshot. It reads from the microVM state file the relevant serialized components and loads them into memory. - Page faults that occur while Firecracker is touching guest memory are handled by the page fault handler process, which listens for events on the userfault file descriptor that Firecracker previously sent. When a page fault event happens, the page fault handler issues `UFFDIO_COPY` to load the previously mmaped file contents into the correspondent memory region. After Firecracker sends the payload (i.e. mem mappings and file descriptor), no other communication happens on the UDS socket (or otherwise) between Firecracker and the page fault handler process. ### Userfaultfd interaction with balloon The balloon device allows the host to reclaim memory from a microVM. For more details on balloon, please refer to [this doc](../ballooning.md). When the balloon device asks for removal of a memory range, Firecracker calls `madvise` with the `MADV_DONTNEED` flag in order to let the kernel know that it can free up memory found in that specific area. On such a system call, the userfaultfd interface sends `UFFD_EVENT_REMOVE`. When implementing the logic for the page fault handler, users must identify events of type `UFFD_EVENT_REMOVE` and handle them by zeroing out those pages. This is because the memory is removed, but the area still remains monitored by userfaultfd. After a cycle of inflation and deflation, page faults might happen again for memory ranges that have been removed by balloon (and subsequently zeroed out by the page fault handler). In such a case, the page fault handler process must zero out the faulted page (instead of bringing it from file), as recommended by [the userfaultfd documentation](https://www.kernel.org/doc/html/latest/admin-guide/mm/userfaultfd.html#non-cooperative-userfaultfd). In case of a compromised balloon driver, the page fault handler can get flooded with `UFFD_EVENT_REMOVE`. We recommend using the jailer's built-in cgroup functionality as defense in depth, in order to limit resource usage of the Firecracker process. ### Caveats If the handler process crashes while Firecracker is resuming the snapshot, Firecracker will hang when a page fault occurs. This is because Firecracker is designed to wait for the requested page to be made available. If the page fault handler process is no longer around when this happens, Firecracker will wait forever. Users are expected to monitor the page fault handler's status or gather metrics of hanged Firecracker process and implement a recycle mechanism if necessary. It is the page fault handler process's responsibility to handle any errors that might occur and also send signals to Firecracker process to inform it of any crashes/exits. The page fault handler can fetch Firecracker's PID through `getsockopt` call with `SO_PEERCRED` option, which fetches credentials of the peer process that is connected to the socket. The returned credentials contain: PID, GID and UID of the peer process (Firecracker in the page fault handler's case). We recommend that the page fault handler includes timeouts for waiting on Firecracker to connect to the UDS or send information over the UDS, in order to account for unexpected cases when Firecracker crashes before being able to connect/send data. ### Example An example of a handler process can be found [here](../../src/firecracker/examples/uffd/on_demand_handler.rs). The process is designed to tackle faults on a certain address by loading into memory the entire region that the address belongs to, but users can choose any other behavior that suits their use case best. ================================================ FILE: docs/snapshotting/network-for-clones.md ================================================ # Network Connectivity for Clones This document presents a strategy to ensure continued network connectivity for multiple clones created from a single Firecracker microVM snapshot. > [!CAUTION] > > This should be considered as just an example to get you started, and we don't > claim this is a performant or secure setup. ## Setup There are two things which prevent network connectivity from resuming out-of-the-box for clones created from the same snapshot: Firecracker currently saves and attempts to restore network devices using the initially configured TAP names, and each guest will be resumed with the same network configuration, most importantly with the same IP address(es). To work around the former, each clone should be started within a separate network namespace (we can have multiple TAP interfaces with the same name, as long as they reside in distinct network namespaces). The latter can be mitigated by leveraging `iptables` `NAT` support. Let’s have a more detailed look at this approach. We assume each VM has a single network interface attached. If multiple interfaces with full connectivity are required, we simply repeat the relevant parts of this process for each additional interface. A typical setup right before taking a snapshot involves having a VM with a network interface backed by a TAP device (named `vmtap0`, for example) with an IP address (referred to as the TAP IP address, for example `192.168.241.1/29`), and an IP address configured inside the guest for the corresponding virtio device (referred to as the guest IP address, for example `192.168.241.2/29`). ### Network namespaces Attempting to restore multiple clones from the same snapshot faces the problem of every single one of them attempting to use a TAP device with the original name, which is not possible by default. Therefore, we need to start each clone in a separate network namespace. This is already possible using the `--netns` jailer parameter, described in the [documentation](../jailer.md). The specified namespace must already exist, so we have to create it first using ```bash sudo ip netns add fc0 ``` (where `fc0` is the name of the network namespace we plan to use for this specific clone - `clone0`). A new network namespace is initially empty, so we also have to create a new tap interface within using ```bash sudo ip netns exec fc0 ip tuntap add name vmtap0 mode tap ``` The `ip netns exec ` allows us to execute `command` in the context of the specified network namespace (in the previous case, the secondary command creates a new tap interface). Next we configure the new TAP interface to match the expectations of the snapshotted guest by running ```bash sudo ip netns exec fc0 ip addr add 192.168.241.1/29 dev vmtap0 sudo ip netns exec fc0 ip link set vmtap0 up ``` At this point we can start multiple clones, each in its separate namespace, but they won’t have connectivity to the rest of the host, only the respective TAP interfaces. However, interaction over the network is still possible; for example we can connect over ssh to clone0 using ```bash sudo ip netns exec fc0 ssh root@192.168.241.2 ``` ### `veth` interfaces to connect the network namespaces In order to obtain full connectivity we have to begin by connecting the network namespace to the rest of the host, and then solving the *“same guest IP”* problem. The former requires the use of `veth` pairs - *virtual interfaces that are link-local to each other (any packet sent through one end of the pair is immediately received on the other, and the other way around)*. One end resides inside the network namespace, while the other is moved into the parent namespace (the host global namespace in this case), and packets flow in or out according to the network configuration. We have to pick IP addresses for both ends of the veth pair. For clone index `idx`, let’s use `10..<(idx % 30) * 8>.1/24` for the endpoint residing in the host namespace, and the same address ending with `2` for the other end which remains inside the clone's namespace. Thus, for `clone 0` the former is `10.0.0.1` and the latter `10.0.0.2`. The first endpoint must have an unique name on the host, for example chosen as `veth(idx + 1) (so veth1 for clone 0)`. To create and setup the veth pair, we use the following commands (for namespace `fc0`): ```bash # create the veth pair inside the namespace sudo ip link add name veth1 type veth peer name veth0 netns fc0 sudo ip netns exec fc0 ip addr add 10.0.0.2/24 dev veth0 sudo ip netns exec fc0 ip link set dev veth0 up sudo ip addr add 10.0.0.1/24 dev veth1 sudo ip link set dev veth1 up # designate the outer end as default gateway for packets leaving the namespace sudo ip netns exec fc0 ip route add default via 10.0.0.1 ``` ### `iptables` rules for VM egress connectivity The last step involves adding the `iptables` rules which change the source/destination IP address of packets on the fly (thus allowing all clones to have the same internal IP). ```sh # Find the host egress device UPSTREAM=$(ip -j route list default |jq -r '.[0].dev') # anything coming from the VMs, we NAT the address ip netns exec fc0 iptables -t nat -A POSTROUTING -s 192.168.241.1/29 -o veth0 -j MASQUERADE iptables -t nat -A POSTROUTING -s 10.0.0.0/30 -o $UPSTREAM -j MASQUERADE # forward packets by default iptables -P FORWARD ACCEPT ip netns exec fc0 ip route add default via 10.0.0.1 ip netns exec fc0 iptables -P FORWARD ACCEPT ``` You may also want to configure the guest with a default route and a DNS nameserver: ```bash ip route default via 10.0.0.1 echo nameserver 8.8.8.8 >/etc/resolv.conf ``` **Connectivity from the clone should be present at this point.** To make sure the guest also adjusts to the new environment, you can explicitly clear the ARP/neighbour table in the guest: ```bash ip -family inet neigh flush any ip -family inet6 neigh flush any ``` Otherwise, packets originating from the guest might be using old Link Layer Address for up to arp cache timeout seconds. After said timeout period, connectivity will work both ways even without an explicit flush. ### Renaming host device names In some environments where the jailer is not being used, restoring a snapshot may be tricky because the tap device on the host will not be the same as the tap device that the original VM was mapped to when it was snapshotted, for example when the tap device comes from a pool of such devices. In this case you can use the `network_overrides` parameter of the snapshot restore API to specify which guest network device maps to which host tap device. For example, if we have a network interface named `eth0` in the snapshotted microVM, we can override it to point to the host device `vmtap01` during snapshot resume, like this: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/snapshot/load' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "snapshot_path": "./snapshot_file", "mem_backend": { "backend_path": "./mem_file", "backend_type": "File" }, "network_overrides": [ { "iface_id": "eth0", "host_dev_name": "vmtap01" } ] }' ``` This may require reconfiguration of the networking inside the VM so that it is still routable externally. [network setup documentation](../network-setup.md#in-the-guest) describes what the typical setup is. If you are not using network namespaces or the jailer, then the guest will have to be made aware (via vsock or other channel) that it needs to reconfigure its network to match the network configured on the tap device. If the new TAP device, say `vmtap3` has been configured to use a guest address of `172.16.3.2` then after snapshot restore you would run something like: ```bash # In the guest # Clear out the previous addr and route ip addr flush dev eth0 ip route flush dev eth0 # Configure the new address ip addr add 172.16.3.2/30 dev eth0 ip route add default via 172.16.3.1/30 dev eth0 ``` # Ingress connectivity The above setup only provides egress connectivity. If in addition we also want to add ingress (in other words, make the guest VM routable outside the network namespace), then we need to choose a "clone address" that will represent this VM uniquely. For our example we can use IPs from `172.16.0.0/12`, for example `172.16.0.1`. Then we can rewrite destination address heading towards the "clone address" to the guest IP. ```bash ip netns exec fc0 iptables -t nat -A PREROUTING -i veth0 \ -d 172.16.0.1 -j DNAT --to 192.168.241.2 ``` And add a route on the host so we can access the guest VM from the host network namespace: ```bash ip route add 172.16.0.1 via 10.0.0.2 ``` To confirm that ingress connectivity works, try ```bash ping 172.16.0.1 # or ssh root@172.16.0.1 ``` # See also For an improved setup with full ingress and egress connectivity to the individual VMs, see [this discussion](https://github.com/firecracker-microvm/firecracker/discussions/4720). ================================================ FILE: docs/snapshotting/random-for-clones.md ================================================ # Entropy for Clones This document provides a high level perspective on the implications of restoring multiple VM clones from a single snapshot. We start with an overview of the Linux random number generation (RNG) facilities, then go through the potential issues we’ve identified related to cloning state, and finally conclude with a series of recommendations. It’s worth stressing that we aim to prevent stale state being a problem only for the kernel interfaces. Some userspace applications or libraries keep their own equivalent of entropy pools and suffer from the same potential issues after being cloned. There is no generic solution under the current programming model, and all we can do is recommend against their use in pre-snapshot logic. ## Background The Linux kernel exposes three main `RNG` interfaces to userspace: the `/dev/random` and `/dev/urandom` special devices, and the `getrandom` syscall, which are described in the [random(7) man page][1]. Moreover, Firecracker supports the [`virtio-rng`](../entropy.md) device which can provide additional entropy to guest VMs. It draws its random bytes from the [`aws-lc-rs`][8] crate which wraps the [`AWS-LC` cryptographic library][9]. Traditionally, `/dev/random` has been considered a source of “true” randomness, with the downside that reads block when the pool of entropy gets depleted. On the other hand, `/dev/urandom` doesn’t block, which lead people believe that it provides lower quality results. It turns out the distinction in output quality is actually very hard to make. According to [this article][2], for kernel versions prior to 4.8, both devices draw their output from the same pool, with the exception that `/dev/random` will block when the system estimates the entropy count has decreased below a certain threshold. The `/dev/urandom` output is considered secure for virtually all purposes, with the caveat that using it before the system gathers sufficient entropy for initialization may indeed produce low quality random numbers. The `getrandom` syscall helps with this situation; it uses the `/dev/urandom` source by default, but will block until it gets properly initialized (the behavior can be altered via configuration flags). Newer kernels (4.8+) have switched to an implementation where `/dev/random` output comes from a pool called the blocking pool, the output of `/dev/urandom` is given by a CSPRNG (cryptographically secure pseudorandom number generator), and there’s also an input pool which gathers entropy from various sources available on the system, and is used to feed into or seed the other two components. A very detailed description is available [here][3]. The details of this newer implementation are used to make the recommendations present in the document. There are in-kernel interfaces used to obtain random numbers as well, but they are similar to using `/dev/urandom` (or `getrandom` with the default source) from userspace. Whenever a VM clone is created based on a snapshot, execution resumes precisely from the previously saved state. Getting random bytes from either `/dev/random` or `/dev/urandom` does not lead to identical results for different clones created from the same snapshot because multiple parameters (such as timer data, or output from `CPU HWRNG` instructions which are present on Ivy Bridge or newer Intel processors and enabled in a Firecracker guest) are mixed with each result. Extra bits are mixed in both when reading random values, and in conjunction with entropy related events such as interrupts. Moreover, the guest kernel will eventually receive fresh entropy from `virtio-rng`, if attached. There are two questions here: - Is the `CPU HWRNG` output always mixed in when the feature is present (as opposed to only when the `CPU HWRNG` is trusted)? - Is the added noise strong enough to consider the final RNG output sufficiently divergent from all other clones? Both these questions are particularly relevant immediately after resuming a VM from a snapshot. After the VM gets to run for a "sufficient" amount of time it should be able to gather some more entropy by itself and its state should be sufficiently divergent that of any other clones. It seems the `CPU HWRNG` is always added to mix when present. More specifically, [page 32 point 1 (at the top of the page)][3] mentions using the `CPU HWRNG` when present for the entropy pool output function. Page 34 states *in case a CPU random number generator is known to the Linux-RNG, data from that hardware RNG is mixed into the entropy pool in a second step*. With respect to the initialization of the random pools and DRNG behind /dev/urandom. The discussion regarding DRNG state on page 35 mentions *the key part, the counter, and the nonce are XORed with the output of the CPU random number generator if one is present. If it is not present, one high-resolution time stamp obtained with the kernel function random_get_entropy word is XORed with the key part*. The `CPU HWRNG` is also used for the DRNG state transition function (as stated on page 36 point 1), and during the reseed operation (page 37 point 2). The document explicitly mentions when the `CPU HWRNG` has to be trusted (for example, the bullet points at the end of Section 3.3.2.3). It’s not yet clear whether the noise that gets added for each clone post restore is sufficient to consider their RNG states distinct for security purposes. The conservative approach is to presume the stale state has a significant influence on RNG output, so we should reinitialize both sources based on fresh data after each restore. It would seem that simply writing data to `/dev/urandom` is enough to muddle the entropy pools, but the bits only get mixed with the input pool. It’s not certain at this point whether such writes have any immediate impact on the blocking pool, and it’s unlikely they cause the `CSPRNG` to be automatically reseeded. The standard methods of interacting with the kernel RNG sources are documented in the [random(4) man page][4]. It states that any writes to either `/dev/random` or `/dev/urandom` are mixed with the input entropy pool, but do not increase the current entropy estimation. There is also an `ioctl` interface which, given the appropriate privileges, can be used to add data to the input entropy pool while also increasing the count, or completely empty all pools. ### Linux kernels with VMGenID support Linux has support for the [Virtual Machine Generation Identifier](https://learn.microsoft.com/en-us/windows/win32/hyperv_v2/virtual-machine-generation-identifier) since 5.18 for ACPI systems. Since 6.10, Linux added support also for systems that use DeviceTree instead of ACPI. The purpose of VMGenID is to notify the guest about time shift events, such as resuming from a snapshot. The device exposes a 16-byte cryptographically random identifier in guest memory. Firecracker implements VMGenID. When resuming a microVM from a snapshot Firecracker writes a new identifier and injects a notification to the guest. Linux [uses this value](https://elixir.bootlin.com/linux/v5.18.19/source/drivers/virt/vmgenid.c#L77) [as new randomness for its CSPRNG](https://elixir.bootlin.com/linux/v5.18.19/source/drivers/char/random.c#L908). Quoting the random.c implementation of the kernel: ``` /* * Handle a new unique VM ID, which is unique, not secret, so we * don't credit it, but we do immediately force a reseed after so * that it's used by the crng posthaste. */ ``` As a result, values returned by `getrandom()` and `/dev/(u)random` are distinct in all VMs started from the same snapshot, **after** the kernel handles the VMGenID notification. This leaves a race window between resuming vCPUs and Linux CSPRNG getting successfully re-seeded. Firecracker supports VMGenID on ARM systems using the DeviceTree binding that was added for the device in Linux 6.10. However, the latest Linux kernel that Firecracker supports is 6.1. As a result, in order to use VMGenID on ARM systems, users need to use a 6.1 kernel with the DeviceTree binding support backported from 6.10. For our CI we backport the [relevant changes](https://github.com/torvalds/linux/commit/f0cd69b8cca6a5096463644d6dacc9f991bfa521) from 6.10 to 6.1. Consumers of Firecracker that want to use the feature on ARM need to ensure they backport these changes on their guest kernels. Please note that, Firecracker will always enable VMGenID. In kernels where there is no VMGenID driver, the device will not have any effect in the guest. ### User space considerations Init systems (such as `systemd` used by AL2 and other distros) might save a random seed file after boot. For `systemd`, the path is `/var/lib/systemd/random-seed`. Just to be on the safe side, any such file should be deleted before taking a snapshot, to prevent its reuse for any purposes by the guest. There’s also the `/proc/sys/kernel/random/boot_id` special file, which gets initialized with a random string at boot time, and is read-only afterwards. All clones restored from the same snapshot will implicitly read the same value from this file. If that’s not desirable, it’s possible to alter the read result via bind mounting another file on top of `/proc/sys/kernel/random/boot_id`. ## Recommendations - Delete `/var/lib/systemd/random-seed`, or any equivalent files. - If changing the value present in `/proc/sys/kernel/random/boot_id` is important, bind mount another file on top of it. - If microVMs run on machines with IvyBridge or newer Intel processors (which provide RDRAND; in addition, RDSEED is offered starting with Broadwell). Hardware supported reseeding is done on a cadence defined by the Linux Kernel and should be sufficient for most cases. - Use `virtio-rng`. When present, the guest kernel uses the device as an additional source of entropy. - On kernels before 5.18, to be as safe as possible, the direct approach is to do the following (before customer code is resumed in the clone): 1. Open one of the special devices files (either `/dev/random` or `/dev/urandom`). Take note that `RNDCLEARPOOL` no longer [has any effect][7] on the entropy pool. 1. Issue an `RNDADDENTROPY` ioctl call (requires `CAP_SYS_ADMIN`) to mix the provided bytes into the input entropy pool and increase the entropy count. This should also cause the `/dev/urandom` `CSPRNG` to be reseeded. The bytes can be generated locally in the guest, or obtained from the host. 1. Issue a `RNDRESEEDCRNG` ioctl call ([4.14][5], [5.10][6], (requires `CAP_SYS_ADMIN`)) that specifically causes the `CSPRNG` to be reseeded from the input pool. - On kernels starting from 5.18 onwards, the CSPRNG will be automatically reseeded when the guest kernel handles the VMGenID notification. To completely avoid the race condition, users should follow the same steps as with kernels \< 5.18. - [Userspace notifications of loading snapshots](snapshot-support.md#userspace-notifications-of-loading-snapshots) can be used to trigger the direct approach described above. **Annex 1 contains the source code of a C program which implements the previous three steps.** As soon as the guest kernel version switches to 4.19 (or higher), we can rely on the `CONFIG_RANDOM_TRUST_CPU` kernel option (or the random.trust_cpu=on cmdline parameter) to have the entropy pool automatically refilled using the `CPU HWRNG`, so step 3 would no longer be necessary. Another way around step 3 is to attach a `virtio-rng` device. However, we cannot control when the guest kernel will request for random bytes from the device. ## Annex 1: Source code that clears and reinitializes the entropy pool ```cpp #include #include #include #include #include #include #include void exit_usage() { printf("Usage: ./rerand []\n" "The length of the string must be a multiple of 8.\n"); exit(EXIT_FAILURE); } void exit_perror(const char *msg) { perror(msg); exit(EXIT_FAILURE); } int main(int argc, char ** argv) { if (argc > 2) { exit_usage(); } size_t len = 0; struct rand_pool_info *info = NULL; if (argc == 2) { len = strlen(argv[1]); // We want len to be a multiple of 8 such that we have an easier time // parsing argv[1] into an array of u32s. if (len % 8) { exit_usage(); } info = malloc(sizeof(struct rand_pool_info) + len / 8); if (info == NULL) { exit_perror("Could not alloc rand_pool_info struct"); } // This is measured in bits IIRC. info->entropy_count = len * 4; info->buf_size = len / 8; } int fd = open("/dev/urandom", O_RDWR); if (fd < 0) { exit_perror("Unable to open /dev/urandom"); } if (ioctl(fd, RNDCLEARPOOL) < 0) { exit_perror("Error issuing RNDCLEARPOOL operation"); } if (argc == 1) { exit(EXIT_SUCCESS); } // Add the entropy bytes supplied by the user. char num_buf[9] = {}; size_t pos = 0; while (pos < len) { memcpy(num_buf, &argv[1] + pos, 8); info->buf[pos / 8] = strtoul(num_buf, NULL, 16); pos += 8; } if (ioctl(fd, RNDADDENTROPY, info) < 0) { exit_perror("Error issuing RNDADDENTROPY operation"); } } ``` [1]: http://man7.org/linux/man-pages/man7/random.7.html [2]: https://www.2uo.de/myths-about-urandom [3]: https://www.bsi.bund.de/SharedDocs/Downloads/EN/BSI/Publications/Studies/LinuxRNG/LinuxRNG_EN.pdf [4]: http://man7.org/linux/man-pages/man4/random.4.html [5]: https://elixir.bootlin.com/linux/v4.14.295/source/drivers/char/random.c#L1355 [6]: https://elixir.bootlin.com/linux/v5.10.147/source/drivers/char/random.c#L1360 [7]: https://elixir.bootlin.com/linux/v4.14.295/source/drivers/char/random.c#L1351 [8]: https://docs.rs/aws-lc-rs/latest/aws_lc_rs/index.html [9]: https://github.com/aws/aws-lc ================================================ FILE: docs/snapshotting/snapshot-editor.md ================================================ # Snapshot editor The `snapshot-editor` is a program for modification of Firecracker snapshots. ## Prior knowledge Firecracker snapshot consists of 2 files: - `vmstate` file: file with Firecracker internal data such as vcpu states, devices states etc. - `memory` file: file with guest memory. ## Usage ### `edit-memory` command #### `rebase` subcommand > This command is used to merge a `diff` snapshot memory file on top of a base > memory file. > > **Note** You can also use `rebase-snap` (deprecated) tool for this. > > Arguments: > > - `MEMORY_PATH` - path to the `memory` file > - `DIFF_PATH` - path to the `diff` file > > Usage: > > ```bash > snapshot-editor edit-memory rebase \ > --memory-path \ > --diff-path > ``` > > Example: > > ```bash > snapshot-editor edit-memory rebase \ > --memory-path ./memory_file \ > --diff-path ./diff_file > ``` ### `edit-vmstate` command #### `remove-regs` subcommand (aarch64 only) > This command is used to remove specified registers from vcpu states inside > vmstate snapshot file. > > Arguments: > > - `VMSTATE_PATH` - path to the `vmstate` file > - `OUTPUT_PATH` - path to the file where the output will be placed > - `[REGS]` - set of u32 values representing registers ids as they are defined > in KVM. Can be both in decimal and in hex formats. > > Usage: > > ```bash > snapshot-editor edit-vmstate remove-regs \ > --vmstate-path \ > --output-path \ > [REGS]... > ``` > > Example: > > ```bash > ./snapshot-editor edit-vmstate remove-regs \ > --vmstate-path ./vmstate_file \ > --output-path ./new_vmstate_file \ > 0x1 0x2 > ``` ### `info-vmstate` command #### `version` subcommand > This command is used to print version of the provided vmstate file. > > Arguments: > > - `VMSTATE_PATH` - path to the `vmstate` file > > Usage: > > ```bash > snapshot-editor info-vmstate version --vmstate-path > ``` > > Example: > > ```bash > ./snapshot-editor info-vmstate version --vmstate-path ./vmstate_file > ``` #### `vcpu-states` subcommand > This command is used to print the vCPU states inside vmstate snapshot file. > > Arguments: > > - `VMSTATE_PATH` - path to the `vmstate` file > > Usage: > > ```bash > snapshot-editor info-vmstate vcpu-states --vmstate-path > ``` > > Example: > > ```bash > ./snapshot-editor info-vmstate vcpu-states --vmstate-path ./vmstate_file > ``` #### `vm-state` subcommand > This command is used to print the vmstate of snapshot file in readable format > thus, making it easier to compare vmstate of 2 snapshots. > > Arguments: > > - `VMSTATE_PATH` - path to the `vmstate` file > > Usage: > > ```bash > snapshot-editor info-vmstate vm-state --vmstate-path > ``` > > Example: > > ```bash > ./snapshot-editor info-vmstate vm-state --vmstate-path ./vmstate_file > ``` ================================================ FILE: docs/snapshotting/snapshot-support.md ================================================ # Firecracker Snapshotting ## Table of Contents - [What is microVM snapshotting?](#about-microvm-snapshotting) - [Snapshotting in Firecracker](#snapshotting-in-firecracker) - [Supported platforms](#supported-platforms) - [Overview](#overview) - [Snapshot files management](#snapshot-files-management) - [Performance](#performance) - [Developer preview status](#developer-preview-status) - [Limitations](#limitations) - [Firecracker Snapshotting characteristics](#firecracker-snapshotting-characteristics) - [Snapshot versioning](#snapshot-versioning) - [Snapshot API](#snapshot-api) - [Pausing the microVM](#pausing-the-microvm) - [Creating snapshots](#creating-snapshots) - [Creating full snapshots](#creating-full-snapshots) - [Creating diff snapshots](#creating-diff-snapshots) - [Resuming the microVM](#resuming-the-microvm) - [Loading snapshots](#loading-snapshots) - [Provisioning host disk space for snapshots](#provisioning-host-disk-space-for-snapshots) - [Ensure continued network connectivity for clones](#ensure-continued-network-connectivity-for-clones) - [Snapshot security and uniqueness](#snapshot-security-and-uniqueness) - [Secure and insecure usage examples](#usage-examples) - [Reusing snapshotted states securely](#reusing-snapshotted-states-securely) - [Userspace notifications of loading snapshots](#userspace-notifications-of-loading-snapshots) - [Vsock device reset](#vsock-device-reset) - [VMGenID device limitation](#vmgenid-device-limitation) - [Where can I resume my snapshots?](#where-can-i-resume-my-snapshots) ## About microVM snapshotting MicroVM snapshotting is a mechanism through which a running microVM and its resources can be serialized and saved to an external medium in the form of a `snapshot`. This snapshot can be later used to restore a microVM with its guest workload at that particular point in time. ## Snapshotting in Firecracker ### Supported platforms The Firecracker snapshot feature is supported on all CPU micro-architectures listed in [README](../../README.md#supported-platforms). ### Overview A Firecracker microVM snapshot can be used for loading it later in a different Firecracker process, and the original guest workload is being simply resumed. The original guest which the snapshot is created from, should see no side effects from this process (other than the latency introduced by the snapshot creation process). Both network and vsock packet loss can be expected on guests that are resumed from snapshots in another Firecracker process. It is also not guaranteed that the state of the network connections survives the process. Furthermore, vsock connections that are open when the snapshot is taken are closed, but existing vsock listen sockets in the guest still remain active and can accept new connections after resume (see [Vsock device reset](#vsock-device-reset)). In order to make restoring possible, Firecracker snapshots save the full state of the following resources: - the guest memory, - the emulated HW state (both KVM and Firecracker emulated HW). The state of the components listed above is generated independently, which brings flexibility to our snapshotting support. This means that taking a snapshot results in multiple files that are composing the full microVM snapshot: - the guest memory file, - the microVM state file, - zero or more disk files (depending on how many the guest had; these are **managed by the users**). The design allows sharing of memory pages and read only disks between multiple microVMs. When loading a snapshot, instead of loading at resume time the full contents from file to memory, Firecracker creates a [MAP_PRIVATE mapping](http://man7.org/linux/man-pages/man2/mmap.2.html) of the memory file, resulting in runtime on-demand loading of memory pages. Any subsequent memory writes go to a copy-on-write anonymous memory mapping. This has the advantage of very fast snapshot loading times, but comes with the cost of having to keep the guest memory file around for the entire lifetime of the resumed microVM. ### Snapshot files management The Firecracker snapshot design offers a very simple interface to interact with snapshots but provides no functionality to package or manage them on the host. The [threat containment model](../design.md#threat-containment) states that the host, host/API communication and snapshot files are trusted by Firecracker. To ensure a secure integration with the snapshot functionality, users need to secure snapshot files by implementing authentication and encryption schemes while managing their lifecycle or moving them across the trust boundary, like for example when provisioning them from a repository to a host over the network. Firecracker is optimized for fast load/resume, and it's designed to do some very basic sanity checks only on the vm state file. It only verifies integrity using a 64-bit CRC value embedded in the vm state file, but this is only a partial measure to protect against accidental corruption, as the disk files and memory file need to be secured as well. It is important to note that CRC computation is validated before trying to load the snapshot. Should it encounter failure, an error will be shown to the user and the Firecracker process will be terminated. ### Performance The Firecracker snapshot create/resume performance depends on the memory size, vCPU count and emulated devices count. The Firecracker CI runs snapshot tests on all [supported platforms](../../README.md#tested-platforms). ### Developer preview status Diff snapshots are still in developer preview while we are diving deep into how the feature can be combined with guest_memfd support in Firecracker. ### Limitations - High snapshot restoration latency when cgroups V1 are in use. We strongly recommend to deploy snapshots on cgroups V2 enabled hosts for the implied kernel versions - [related issue](https://github.com/firecracker-microvm/firecracker/issues/2129). - Guest network connectivity is not guaranteed to be preserved after resume. For recommendations related to guest network connectivity for clones please see [Network connectivity for clones](network-for-clones.md). - Snapshotting on arm64 works for both GICv2 and GICv3 enabled guests. However, restoring between different GIC version is not possible. - If a [CPU template](../cpu_templates/cpu-templates.md) is not used on x86_64, overwrites of `MSR_IA32_TSX_CTRL` MSR value will not be preserved after restoring from a snapshot. - Resuming from a snapshot that was taken during early stages of the guest kernel boot might lead to crashes upon snapshot resume. We suggest that users take snapshot after the guest microVM kernel has booted. Please see [VMGenID device limitation](#vmgenid-device-limitation). ## Firecracker Snapshotting characteristics - Fresh Firecracker microVMs are booted using `anonymous` memory, while microVMs resumed from snapshot load memory on-demand from the snapshot and copy-on-write to anonymous memory. - Resuming from a snapshot is optimized for speed, while taking a snapshot involves some extra CPU cycles for synchronously writing memory pages to the memory snapshot file. Taking a full snapshot of a microVM results in the full contents of guest memory being written to the snapshot, and particularly, in all guest memory being faulted in. - The _memory file_ and _microVM state file_ are generated by Firecracker on snapshot creation. The disk contents are _not_ explicitly flushed to their backing files. - The API calls exposing the snapshotting functionality have clear **Prerequisites** that describe the requirements on when/how they should be used. - The Firecracker microVM's MMDS config is included in the snapshot. However, the data store is not persisted across snapshots. - Configuration information for metrics and logs are not saved to the snapshot. These need to be reconfigured on the restored microVM. - On x86_64, if a vCPU has MSR_IA32_TSC_DEADLINE set to 0 when a snapshot is taken, Firecracker replaces it with the MSR_IA32_TSC value from the same vCPU. This is to guarantee that the vCPU will continue receiving TSC interrupts after restoring from the snapshot even if an interrupt is lost when taking a snapshot. ## Snapshot versioning The microVM state snapshot file uses a data format that has a version in the form of `MAJOR.MINOR.PATCH`. Each Firecracker binary supports a fixed version of the snapshot data format. When creating a snapshot, Firecracker will use the supported data format version. When loading snapshots, Firecracker will check that the snapshot version is compatible with the version it supports. More information about the snapshot data format and details about snapshot data format versions can be found at [versioning](./versioning.md). ## Snapshot API Firecracker exposes the following APIs for manipulating snapshots: `Pause`, `Resume` and `CreateSnapshot` can be called only after booting the microVM, while `LoadSnapshot` is allowed only before boot. ### Pausing the microVM To create a snapshot, first you have to pause the running microVM and its vCPUs with the following API command: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PATCH 'http://localhost/vm' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "state": "Paused" }' ``` **Prerequisites**: The microVM is booted. Successive calls of this request keep the microVM in the `Paused` state. **Effects**: - _on success_: microVM is guaranteed to be `Paused`. - _on failure_: no side-effects. ### Creating snapshots > [!WARNING] > > Diff snapshot support is in developer preview. See > [this section](#developer-preview-status) for more info. Now that the microVM is paused, you can create a snapshot, which can be either a `full`one or a `diff` one. Full snapshots always create a complete, resume-able snapshot of the current microVM state and memory. Diff snapshots save at least the current microVM state and the memory accessed since the last snapshot (full or diff) in a sparse file (but they might include more pages than strictly needed due to technical limitation in Firecracker's ability to accurately track accesses). Diff snapshots are generally not resume-able, but must be merged with a base snapshot into a full snapshot. The exception here are diff snapshots of booted VMs, which are immediately resumable. In this context, we will refer to the base as the first memory file created by a `/snapshot/create` API call and the layer as a memory file created by a subsequent `/snapshot/create` API call. The order in which the snapshots were created matters and they should be merged in the same order in which they were created. To merge a `diff` snapshot memory file on top of a base, users should copy its content over the base. This can be done using the `rebase-snap` (deprecated) or `snapshot-editor` tools provided with the firecracker release: ```bash snapshot-editor edit-memory rebase \ --memory-path path/to/base \ --diff-path path/to/layer ``` After executing the command above, the base would be a resumable snapshot memory file describing the state of the memory at the moment of creation of the layer. More layers which were created later can be merged on top of this base. This process needs to be repeated for each layer until the one describing the desired memory state is merged on top of the base, which is constantly updated with information from previously merged layers. Please note that users should not merge state files which resulted from `/snapshot/create` API calls and they should use the state file created in the same call as the memory file which was merged last on top of the base. #### Creating full snapshots For creating a full snapshot, you can use the following API command: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/snapshot/create' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "snapshot_type": "Full", "snapshot_path": "./snapshot_file", "mem_file_path": "./mem_file" }' ``` Details about the required and optional fields can be found in the [swagger definition](../../src/firecracker/swagger/firecracker.yaml). > [!NOTE] > > If the files indicated by `snapshot_path` and `mem_file_path` don't exist at > the specified paths, then they will be created right before generating the > snapshot. If they exist, the files will be truncated and overwritten. **Prerequisites**: The microVM is `Paused`. **Effects**: - _on success_: - The file indicated by `snapshot_path` (e.g. `/path/to/snapshot_file`) contains the devices' model state and emulation state. The one indicated by `mem_file_path`(e.g. `/path/to/mem_file`) contains a full copy of the guest memory. - The generated snapshot files are immediately available to be used (current process releases ownership). At this point, the block devices backing files should be backed up externally by the user. Please note that block device contents are only guaranteed to be committed/flushed to the host FS, but not necessarily to the underlying persistent storage (could still live in host FS cache). - If dirty page tracking is enabled, the snapshot creation resets then the dirtied page bitmap and marks all pages clean (from a dirty page tracking point of view). - _on failure_: no side-effects. **Notes**: - The separate block device file components of the snapshot have to be handled by the user. #### Creating diff snapshots For creating a diff snapshot, you should use the same API command, but with `snapshot_type` field set to `Diff`. > [!NOTE] > > If not specified, `snapshot_type` is by default `Full`. > > ```bash > curl --unix-socket /tmp/firecracker.socket -i \ > -X PUT 'http://localhost/snapshot/create' \ > -H 'Accept: application/json' \ > -H 'Content-Type: application/json' \ > -d '{ > "snapshot_type": "Diff", > "snapshot_path": "./snapshot_file", > "mem_file_path": "./mem_file" > }' > ``` **Prerequisites**: The microVM is `Paused`. Diff snapshots come in two flavors. If `track_dirty_pages` was set to `true` when configuring the `/machine-config` resource or when restoring from a snapshot via `/snapshot/load`, Firecracker will use KVM's dirty page log runtime functionality to ensure the diff snapshot only contains exactly pages that were written to since boot / snapshot restoration. If `track_dirty_pages` is not enabled, Firecracker uses the [`mincore(2)`][man mincore] syscall to determine which pages to include in the snapshot. As such, this mode of snapshot taking will only work _if swap is disabled_, as mincore does not consider pages written to swap to be "in core". This potentially results in bigger memory files (although they are still sparse), but avoids the runtime overhead of dirty page logging. > [!NOTE] > > Dirty page tracking negates most of the benefits of > [huge pages](../hugepages.md#known-limitations). **Effects**: - _on success_: - The file indicated by `snapshot_path` contains the devices' model state and emulation state, same as when creating a full snapshot. The one indicated by `mem_file_path` contains this time a **diff copy** of the guest memory; the diff consists of the memory pages which have been dirtied since the last snapshot creation or since the creation of the microVM, whichever of these events was the most recent. - All the other effects mentioned in the **Effects** paragraph from **Creating full snapshots** section apply here. - _on failure_: no side-effects. > [!NOTE] > > This is an example of an API command that enables dirty page tracking: > > ```bash > curl --unix-socket /tmp/firecracker.socket -i \ > -X PUT 'http://localhost/machine-config' \ > -H 'Accept: application/json' \ > -H 'Content-Type: application/json' \ > -d '{ > "vcpu_count": 2, > "mem_size_mib": 1024, > "smt": false, > "track_dirty_pages": true > }' > ``` Enabling this support enables KVM dirty page tracking, so it comes at a cost (which consists of CPU cycles spent by KVM accounting for dirtied pages); it should only be used when needed. Creating a snapshot has some minor effects on the currently running microVM: - The vsock device is [reset](#vsock-device-reset), causing the driver to terminate connection on resumption. - On x86_64, a notification for KVM-clock is injected to notify the guest about being paused. ### Resuming the microVM You can resume the microVM by sending the following API command: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PATCH 'http://localhost/vm' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "state": "Resumed" }' ``` **Prerequisites**: The microVM is `Paused`. Successive calls of this request are ignored (microVM remains in the running state). **Effects**: - _on success_: microVM is guaranteed to be `Resumed`. - _on failure_: no side-effects. ### Loading snapshots If you want to load a snapshot, you can do that only **before** the microVM is configured (the only resources that can be configured prior are the logger and the metrics systems) by sending the following API command: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/snapshot/load' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "snapshot_path": "./snapshot_file", "mem_backend": { "backend_path": "./mem_file", "backend_type": "File" }, "track_dirty_pages": true, "resume_vm": false }' ``` The `backend_type` field represents the memory backend type used for loading the snapshot. Accepted values are: - `File` - rely on the kernel to handle page faults when loading the contents of the guest memory file into memory. - `Uffd` - use a dedicated user space process to handle page faults that occur for the guest memory range. Please refer to [this](handling-page-faults-on-snapshot-resume.md) for more details on handling page faults in the user space. The meaning of `backend_path` depends on the `backend_type` chosen: - if using `File`, then `backend_path` should contain the path to the snapshot's memory file to be loaded. - when using `Uffd`, `backend_path` refers to the path of the unix domain socket used for communication between Firecracker and the user space process that handles page faults. When relying on the OS to handle page faults, the command below is also accepted. Note that `mem_file_path` field is currently under the deprecation policy. `mem_file_path` and `mem_backend` are mutually exclusive, therefore specifying them both at the same time will return an error. ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/snapshot/load' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "snapshot_path": "./snapshot_file", "mem_file_path": "./mem_file", "track_dirty_pages": true, "resume_vm": false }' ``` Details about the required and optional fields can be found in the [swagger definition](../../src/firecracker/swagger/firecracker.yaml). **Prerequisites**: A full memory snapshot and a microVM state file **must** be provided. The disk backing files, network interfaces backing TAPs and/or vsock backing socket that were used for the original microVM's configuration should be set up and accessible to the new Firecracker process (in which the microVM is resumed). These host-resources need to be accessible at the same relative paths to the new Firecracker process as they were to the original one. **Effects:** - _on success_: - The complete microVM state is loaded from snapshot into the current Firecracker process. - It then resets the dirtied page bitmap and marks all pages clean (from a diff snapshot point of view). - The loaded microVM is now in the `Paused` state, so it needs to be resumed for it to run. - The memory file (pointed by `backend_path` when using `File` backend type, or pointed by `mem_file_path`) **must** be considered immutable from Firecracker and host point of view. It backs the guest OS memory for read access through the page cache. External modification to this file corrupts the guest memory and leads to undefined behavior. - The file indicated by `snapshot_path`, that is used to load from, is released and no longer used by this process. - If `track_dirty_pages` is set, subsequent diff snapshots will be based on KVM dirty page tracking. - If `resume_vm` is set, the vm is automatically resumed if load is successful. - _on failure_: A specific error is reported and then the current Firecracker process is ended (as it might be in an invalid state). *Notes*: The `track_dirty_pages` configuration is not saved when creating a snapshot, so you need to explicitly set `track_dirty_pages` again when sending the `LoadSnapshot` command if you want to be able to do dirty page tracking based diff snapshots from a loaded microVM. It is also worth knowing, a microVM that is restored from snapshot will be resumed with the guest OS wall-clock continuing from the moment of the snapshot creation. For this reason, the wall-clock should be updated to the current time, on the guest-side. More details on how you could do this can be found at a [related FAQ](../../FAQ.md#my-guest-wall-clock-is-drifting-how-can-i-fix-it). ## Provisioning host disk space for snapshots Depending on VM memory size, snapshots can consume a lot of disk space. Firecracker integrators **must** ensure that the provisioned disk space is sufficient for normal operation of their service as well as during failure scenarios. If the service exposes the snapshot triggers to customers, integrators **must** enforce proper disk quotas to avoid any DoS threats that would cause the service to fail or function abnormally. ## Ensure continued network connectivity for clones For recommendations related to continued network connectivity for multiple clones created from a single Firecracker microVM snapshot please see [this doc](network-for-clones.md). ## Snapshot security and uniqueness When snapshots are used in a such a manner that a given guest's state is resumed from more than once, guest information assumed to be unique may in fact not be; this information can include identifiers, random numbers and random number seeds, the guest OS entropy pool, as well as cryptographic tokens. Without a strong mechanism that enables users to guarantee that unique things stay unique across snapshot restores, we consider resuming execution from the same state more than once insecure. For more information please see [this doc](random-for-clones.md) ### Usage examples #### Example 1: secure usage ```console Boot microVM A -> ... -> Create snapshot S -> Terminate -> Load S in microVM B -> Resume -> ... ``` Here, microVM A terminates after creating the snapshot without ever resuming work, and a single microVM B resumes execution from snapshot S. In this case, unique identifiers, random numbers, and cryptographic tokens that are meant to be used once are indeed only used once. In this example, we consider microVM B secure. #### Example 2: potentially insecure usage ```console Boot microVM A -> ... -> Create snapshot S -> Resume -> ... -> Load S in microVM B -> Resume -> ... ``` Here, both microVM A and B do work starting from the state stored in snapshot S. Unique identifiers, random numbers, and cryptographic tokens that are meant to be used once may be used twice. It doesn't matter if microVM A is terminated before microVM B resumes execution from snapshot S or not. In this example, we consider both microVMs insecure as soon as microVM A resumes execution. #### Example 3: potentially insecure usage ```console Boot microVM A -> ... -> Create snapshot S -> ... -> Load S in microVM B -> Resume -> ... -> Load S in microVM C -> Resume -> ... [...] ``` Here, both microVM B and C do work starting from the state stored in snapshot S. Unique identifiers, random numbers, and cryptographic tokens that are meant to be used once may be used twice. It doesn't matter at which points in time microVMs B and C resume execution, or if microVM A terminates or not after the snapshot is created. In this example, we consider microVMs B and C insecure, and we also consider microVM A insecure if it resumes execution. ### Reusing snapshotted states securely [Virtual Machine Generation Identifier](https://learn.microsoft.com/en-us/windows/win32/hyperv_v2/virtual-machine-generation-identifier) (VMGenID) is a virtual device that allows VM guests to detect when they have resumed from a snapshot. It works by exposing a cryptographically random 16-bytes identifier to the guest. The VMM ensures that the value of the identifier changes every time the VM a time shift happens in the lifecycle of the VM, e.g. when it resumes from a snapshot. Linux supports VMGenID since version 5.18 for systems with ACPI support. Linux 6.10 added support also for systems that use DeviceTree instead of ACPI. When Linux detects a change in the identifier, it uses its value to reseed its internal PRNG. Firecracker supports VMGenID device both on x86 and Aarch64 platforms. Firecracker will always enable the device. During snapshot resume, Firecracker will update the 16-byte generation ID and inject a notification in the guest before resuming its vCPUs. As a result, guests that run Linux versions >= 5.18 will re-seed their in-kernel PRNG upon snapshot resume. User space applications can rely on the guest kernel for randomness. State other than the guest kernel entropy pool, such as unique identifiers, cached random numbers, cryptographic tokens, etc **will** still be replicated across multiple microVMs resumed from the same snapshot. Users need to implement mechanisms for ensuring de-duplication of such state, where needed. ## Userspace notifications of loading snapshots VMClock device ([specification](https://uapi-group.org/specifications/specs/vmclock/)) is a device that enables efficient application clock synchronization against real wallclock time, for applications running inside virtual machines. VMClock also takes care situations where there is some sort disruption happens to the clock. It handles these through fields in the [`vmlcock_abi`](https://uapi-group.org/specifications/specs/vmclock/#the-vmclock_abi-structure). Currently, it handles two cases: 1. Live migration through the `disruption_marker` field. 1. Restore from snapshots through the `vm_generation_counter`. Whenever a VM starts from a snapshot VMClock will present a new (different that what was previously stored) value in the `vm_generation_counter`. This happens in an atomic way, i.e. `vm_generation_counter` will include the new value as soon as vCPUs are resumed post snapshot loading. User space libraries, e.g. userspace PRNGs can mmap() `vmclock_abi` and monitor changes in `vm_generation_counter` to observe when they need to adapt and/or recreate state. Moreover, VMClock allows processes to call poll() on the VMClock device and get notified about changes through an event loop. For reference, the C code used in our tests is available [here](https://github.com/firecracker-microvm/firecracker/blob/main/tests/host_tools/vmclock.c). > [!IMPORTANT] > > Support for `vm_generation_counter` and `poll()` is implemented in Linux > through the patches > [here](https://lore.kernel.org/all/20260130173704.12575-1-itazur@amazon.com/) > and was merged in Linux kernel v7.0. Users need to make sure that the linked > patches are applied on their kernels. We have backported these patches for > Amazon Linux kernels v5.10 and v6.1 [here](../../resources/patches/vmclock). > The kernels used in the [Getting Started Guide](../getting-started.md) include > these patches. ## Vsock device reset The vsock device is reset across snapshot/restore to avoid inconsistent state between device and driver leading to breakage ([#2218](https://github.com/firecracker-microvm/firecracker/issues/2218)). This is done by sending a `VIRTIO_VSOCK_EVENT_TRANSPORT_RESET` event to the guest driver during `SnapshotCreate` ([#2562](https://github.com/firecracker-microvm/firecracker/pull/2562)). On `SnapshotResume`, when the VM becomes active again, the vsock driver closes all existing connections. Existing listen sockets still remain active, but their CID is updated to reflect the current `guest_cid`. More details about this event can be found in the official Virtio document [here](https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-4080006). ## VMGenID device limitation During snashot resume, Firecracker updates the 16-byte generation ID of the VMGenID device and injects an interrupt in the guest before resuming vCPUs. If the snapshot was taken at the very early stages of the guest kernel boot process proper interrupt handling might not be in place yet. As a result, the kernel might not be able to handle the injected notification and crash. We suggest to users that they take snapshots only after the guest kernel has completed booting, to avoid this issue. ## Where can I resume my snapshots? Snapshots must be resumed on an software and hardware configuration which is identical to what they were generated on. However, in limited cases, snapshots can be resumed on identical hardware instances where they were taken on, but using newer host kernel versions. While we do not provide any guarantees on this setup (and do not recommend doing this in production), we are currently aware of the compatibility table reported below: | .metal instance type | taken on host kernel | restored on host kernel | | -------------------- | -------------------- | ----------------------- | | {m5n,m6i,m6a} | 5.10 | 6.1 | For example, a snapshot taken on a m6i.metal host (Intel Ice Lake) running a 5.10 host kernel can be restored on a different m6i.metal host running a 6.1 host kernel (but not vice versa), but could not be restored on a m5n.metal host (Intel Cascade Lake). [man mincore]: https://man7.org/linux/man-pages/man2/mincore.2.html ================================================ FILE: docs/snapshotting/versioning.md ================================================ # Firecracker snapshot versioning This document describes how Firecracker persists microVM state into Firecracker snapshots. It describes the snapshot format, encoding, compatibility and limitations. ## Introduction Firecracker uses the serde crate [1] along with the bitcode [2] format to serialize its state into Firecracker snapshots. Firecracker snapshots have versions that are independent of Firecracker versions. Each Firecracker version declares support for a specific snapshot data format version. When creating a snapshot, Firecracker will use the supported snapshot format version. When loading a snapshot, Firecracker will check that format of the snapshot file is compatible with the snapshot version Firecracker supports. ## Overview Firecracker persists the microVM state as 2 separate objects: - a **guest memory** file - a **microVM state** file. *The block devices attached to the microVM are not considered part of the state and need to be managed separately.* ### Guest memory The guest memory file contains the microVM memory saved as a dump of all pages. ### MicroVM state In the VM state file, Firecracker stores the internal state of the VMM (device emulation, KVM and vCPUs) with 2 exceptions - serial emulation and vsock backend. While we continuously improve and extend Firecracker's features by adding new capabilities, devices or enhancements, the microVM state file may change both structurally and semantically with each new release. ## MicroVM state file format A Firecracker snapshot has the following format: | Field | Bits | Description | | -------- | ---- | --------------------------------------------------------- | | magic_id | 64 | Firecracker snapshot and architecture (x86_64/aarch64). | | version | M | The snapshot data format version (`MAJOR.MINOR.PATCH`) | | state | N | Bitcode blob containing the microVM state. | | crc | 64 | Optional CRC64 sum of magic_id, version and state fields. | The snapshot format has its own version encoded in the snapshot file itself after the snapshot's `magic_id`. The snapshot format version is independent of the Firecracker version and it is of the form `MAJOR.MINOR.PATCH`. Currently, Firecracker uses the [Serde bitcode encoder](https://github.com/SoftbearStudios/bitcode) for serializing the microVM state. The encoding format that bitcode uses does not allow backwards compatible changes in the state, so essentially every change in the microVM state description will result in bump of the format's `MAJOR` version. If the needs arises, we will look into alternative formats that allow more flexibility with regards to backwards compatibility. If/when this happens, we will define how changes in the snapshot format reflect to changes in its `MAJOR.MINOR.PATCH` version. ## VM state encoding During research and prototyping we considered multiple storage formats. The criteria used for comparing these are: performance, size, rust support, specification, versioning support, community and tooling. Performance, size and Rust support are hard requirements while all others can be the subject of trade offs. More info about this comparison can be found [here](https://github.com/firecracker-microvm/firecracker/blob/9d427b33d989c3225d874210f6c2849465941dc0/docs/snapshotting/design.md#snapshot-format). Key benefits of using *bitcode*: - Minimal snapshot size overhead - Minimal CPU overhead - Simple implementation The current implementation relies on the [Serde bitcode encoder](https://github.com/SoftbearStudios/bitcode). ## Snapshot compatibility ### Host kernel Snapshots can be saved and restored on the same kernel version without any issues. There might be issues when restoring snapshots created on different host kernel version even when using the same Firecracker version. SnapshotCreate and SnapshotLoad operations across different host kernels is considered unstable in Firecracker as the saved KVM state might have different semantics on different kernels. ### Device model The current Firecracker devices are backwards compatible up to the version that introduces them. Ideally this property would be kept over time, but there are situations when a new version of a device exposes new features to the guest that do not exist in an older version. In such cases restoring a snapshot at an older version becomes impossible without breaking the guest workload. The microVM state file links some resources that are external to the snapshot: - tap devices by device name, - block devices by block file path, - vsock backing Unix domain socket by socket name. To successfully restore a microVM one should check that: - tap devices are available, their names match their original names since these are the values saved in the microVM state file, and they are accessible to the Firecracker process where the microVM is being restored, - block devices are set up at their original relative or absolute paths with the proper permissions, as the Firecracker process with the restored microVM will attempt to access them exactly as they were accessed in the original Firecracker process, - the vsock backing Unix domain socket is available, its name matches the original name, and it is accessible to the new Firecracker process. ### CPU model Firecracker microVMs snapshot functionality is available for Intel/AMD/ARM64 CPU models that support the hardware virtualizations extensions, more details are available [here](../../README.md#supported-platforms). Snapshots are not compatible across CPU architectures and even across CPU models of the same architecture. They are only compatible if the CPU features exposed to the guest are an invariant when saving and restoring the snapshot. The trivial scenario is creating and restoring snapshots on hosts that have the same CPU model. Restoring from an Intel snapshot on AMD (or vice-versa) is not supported. It is important to note that guest workloads can still execute instructions that are being [masked](../cpu_templates/cpu-templates.md) by CPUID and restoring and saving of such workloads will lead to undefined result. Firecracker retrieves the state of a discrete list of MSRs from KVM, more specifically, the MSRs corresponding to the guest exposed features. ## Implementation The microVM state file format is implemented in the [snapshot crate](../../src/vmm/src/snapshot/mod.rs) in the Firecracker repository. All Firecracker devices implement the [Persist](../../src/vmm/src/snapshot/persist.rs) trait which exposes an interface that enables creating from and saving to the microVM state. [1]: https://serde.rs [2]: https://github.com/SoftbearStudios/bitcode ================================================ FILE: docs/tracing.md ================================================ # Tracing ## Introduction Firecracker implements a framework for instrumentation based tracing with the aim to improve its debugability. Instrumentation based tracing was defined by [Sheng Liang on usenix.org](https://www.usenix.org/legacy/publications/library/proceedings/coots99/full_papers/liang/liang_html/node9.html) as: > There are two ways to obtain profiling information: either statistical > sampling or code instrumentation. Statistical sampling is less disruptive to > program execution, but cannot provide completely accurate information. Code > instrumentation, on the other hand, may be more disruptive, but allows the > profiler to record all the events it is interested in. Specifically in CPU > time profiling, statistical sampling may reveal, for example, the relative > percentage of time spent in frequently-called methods, whereas code > instrumentation can report the exact number of time each method is invoked. Enabling tracing adds logs output on each functions entry and exit. This assists debugging problems that relate to deadlocks or high latencies by quickly identifying elongated function calls. ## Implementation Firecracker implements instrumentation based tracing via [`log`](https://github.com/rust-lang/log) and [`log_instrument`](../src/log-instrument), outputting a `Trace` level log when entering and exiting every function. Adding traces impacts Firecracker binary size and its performance, so instrumentation is not present by default. Instrumentation is also not present on the release binaries. You can use `cargo run --bin clippy-tracing --` to build and run the latest version in the repo or you can run `cargo install --path src/clippy-tracing` to install the binary then use `clippy-tracing` to run this binary. You can run `clippy-tracing --help` for help. To enable tracing in Firecracker, add instrumentation with: ``` clippy-tracing \ --action fix \ --path ./src \ --exclude benches \ --exclude virtio/generated,bindings.rs,net/generated \ --exclude log-instrument-macros/,log-instrument/,clippy-tracing/ \ --exclude vmm_config/logger.rs,logger/,signal_handler.rs,time.rs ``` `--exclude` can be used to avoid adding instrumentation to specific files, here it is used to avoid adding instrumentation in: - tests. - bindings. - the instrumentation tooling. - logger functionality that may form an infinite loop. After adding instrumentation re-compile with `--features tracing`: ``` cargo build --features tracing ``` This will result in an increase in the binary size (~100kb) and a significant regression in performance (>10x). To mitigate the performance impact you can filter the tracing output as described in the next section. ## Filtering You can filter tracing output both at run-time and compile-time. This can be used to mitigate the performance impact of logging many traces. Run-time filtering is implemented with the `/logger` API call, this can significantly mitigate the impact on execution time but cannot mitigate the impact on memory usage. Execution time impact is mitigated by avoiding constructing and writing the trace log, it still needs to check the condition in every place it would output a log. Memory usage impact is not mitigated as the instrumentation remains in the binary unchanged. Compile-time filtering is a manual process using the [`clippy-tracing`](https://github.com/JonathanWoollett-Light/clippy-tracing) tool. This can almost entirely mitigate the impact on execution time and the impact on memory usage. ### Run-time You can filter by module path and/or file path at runtime, e.g.: ```bash curl -X PUT --unix-socket "${API_SOCKET}" \ --data "{ \"level\": \"Trace\", \"module\": \"api_server::request\", }" \ "http://localhost/logger" ``` Instrumentation logs are `Trace` level logs, at runtime the level must be set to `Trace` to see them. The module filter applied here ensures only logs from the `request` modules within the `api_server` crate will be output. This will mitigate most of the performance regression. ### Compile-time Specific environments can restrict run-time configuration. In these environments it becomes necessary to support targeted tracing without run-time re-configuration, for this compile-time filtering must be used. To reproduce the same filtering as run-time at compile-time, you can use [`clippy-tracing`](../src/clippy-tracing) at compile-time like: ```bash # Remove all instrumentation. clippy-tracing --action strip --path ./src # Adds instrumentation to the specific file/s. clippy-tracing --action fix --path ./src/firecracker/src/api_server/src/request # Build Firecracker. cargo build --features tracing ``` Then at run-time: ```bash curl -X PUT --unix-socket "${API_SOCKET}" \ --data "{ \"level\": \"Trace\", }" \ "http://localhost/logger" ``` The instrumentation has been stripped from all files other than those at `./src/firecracker/src/api_server/src/request` so we do not need to apply a run-time filter. Runtime filtering could be applied but in this case it yields no additional benefit. ## Example In this example we start Firecracker with tracing then make a simple API call. ### API call ``` ~/Projects/firecracker$ sudo curl -X GET --unix-socket "/run/firecracker.socket" "http://localhost/" {"id":"anonymous-instance","state":"Not started","vmm_version":"1.6.0-dev","app_name":"Firecracker"} ``` ### Firecracker ``` ~/Projects/firecracker$ sudo ./firecracker/build/cargo_target/release/firecracker --level Trace 2023-10-13T14:15:38.851263983 [anonymous-instance:main] Running Firecracker v1.6.0-dev 2023-10-13T14:15:38.851316122 [anonymous-instance:main] ThreadId(1)::main::main_exec>>single_value 2023-10-13T14:15:38.851322264 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value>>value_of 2023-10-13T14:15:38.851325119 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>flag_present 2023-10-13T14:15:38.851335809 [anonymous-instance:main] ThreadId(1)::main::main_exec::flag_present>>value_of 2023-10-13T14:15:38.851338254 [anonymous-instance:main] ThreadId(1)::main::main_exec::flag_present<>single_value 2023-10-13T14:15:38.851349245 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value>>value_of 2023-10-13T14:15:38.851352721 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>from_args 2023-10-13T14:15:38.851362931 [anonymous-instance:main] ThreadId(1)::main::main_exec<>get_filters 2023-10-13T14:15:38.851368401 [anonymous-instance:main] ThreadId(1)::main::main_exec::get_filters>>get_default_filters 2023-10-13T14:15:38.851372068 [anonymous-instance:main] ThreadId(1)::main::main_exec::get_filters::get_default_filters>>deserialize_binary 2023-10-13T14:15:38.851380033 [anonymous-instance:main] ThreadId(1)::main::main_exec::get_filters::get_default_filters<>filter_thread_categories 2023-10-13T14:15:38.851388098 [anonymous-instance:main] ThreadId(1)::main::main_exec::get_filters::get_default_filters<>single_value 2023-10-13T14:15:38.851400462 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value>>value_of 2023-10-13T14:15:38.851403507 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>single_value 2023-10-13T14:15:38.851417955 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value>>value_of 2023-10-13T14:15:38.851420650 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>flag_present 2023-10-13T14:15:38.851430949 [anonymous-instance:main] ThreadId(1)::main::main_exec::flag_present>>value_of 2023-10-13T14:15:38.851434766 [anonymous-instance:main] ThreadId(1)::main::main_exec::flag_present<>flag_present 2023-10-13T14:15:38.851444575 [anonymous-instance:main] ThreadId(1)::main::main_exec::flag_present>>value_of 2023-10-13T14:15:38.851447941 [anonymous-instance:main] ThreadId(1)::main::main_exec::flag_present<>arguments 2023-10-13T14:15:38.851456488 [anonymous-instance:main] ThreadId(1)::main::main_exec<>single_value 2023-10-13T14:15:38.851462679 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value>>value_of 2023-10-13T14:15:38.851466587 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>as_single_value 2023-10-13T14:15:38.851473239 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>arguments 2023-10-13T14:15:38.851485062 [anonymous-instance:main] ThreadId(1)::main::main_exec<>single_value 2023-10-13T14:15:38.851491925 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value>>value_of 2023-10-13T14:15:38.851494900 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>single_value 2023-10-13T14:15:38.851502374 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value>>value_of 2023-10-13T14:15:38.851504629 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>as_single_value 2023-10-13T14:15:38.851508897 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>single_value 2023-10-13T14:15:38.851515559 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value>>value_of 2023-10-13T14:15:38.851517503 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>single_value 2023-10-13T14:15:38.851525628 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value>>value_of 2023-10-13T14:15:38.851529045 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>single_value 2023-10-13T14:15:38.851538883 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value>>value_of 2023-10-13T14:15:38.851542330 [anonymous-instance:main] ThreadId(1)::main::main_exec::single_value<>run_with_api 2023-10-13T14:15:38.851664621 [anonymous-instance:main] ThreadId(1)::main::main_exec::run_with_api>>new 2023-10-13T14:15:38.851672586 [anonymous-instance:main] ThreadId(1)::main::main_exec::run_with_api<>init 2023-10-13T14:15:38.851684739 [anonymous-instance:main] ThreadId(1)::main::main_exec::run_with_api<>build_microvm_from_requests 2023-10-13T14:15:38.851728171 [anonymous-instance:main] ThreadId(1)::main::main_exec::run_with_api::build_microvm_from_requests>>default 2023-10-13T14:15:38.851731888 [anonymous-instance:main] ThreadId(1)::main::main_exec::run_with_api::build_microvm_from_requests<>new 2023-10-13T14:15:38.851737830 [anonymous-instance:main] ThreadId(1)::main::main_exec::run_with_api::build_microvm_from_requests<>new 2023-10-13T14:15:38.851761404 [anonymous-instance:fc_api] ThreadId(2)<>run 2023-10-13T14:15:38.851775200 [anonymous-instance:fc_api] ThreadId(2)::run>>apply_filter 2023-10-13T14:15:38.851823462 [anonymous-instance:fc_api] ThreadId(2)::run<>handle_request 2023-10-13T14:15:55.422417909 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request>>try_from 2023-10-13T14:15:55.422420554 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request::try_from>>describe 2023-10-13T14:15:55.422424551 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request::try_from<>log_received_api_request 2023-10-13T14:15:55.422429270 [anonymous-instance:fc_api] The API server received a Get request on "/". 2023-10-13T14:15:55.422431354 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request::try_from<>parse_get_instance_info 2023-10-13T14:15:55.422435211 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request::try_from::parse_get_instance_info>>new_sync 2023-10-13T14:15:55.422437165 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request::try_from::parse_get_instance_info::new_sync>>new 2023-10-13T14:15:55.422439289 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request::try_from::parse_get_instance_info::new_sync<>into_parts 2023-10-13T14:15:55.422450921 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request<>serve_vmm_action_request 2023-10-13T14:15:55.422472552 [anonymous-instance:main] ThreadId(1)::main::main_exec::run_with_api::build_microvm_from_requests>>handle_preboot_request 2023-10-13T14:15:55.422480477 [anonymous-instance:main] ThreadId(1)::main::main_exec::run_with_api::build_microvm_from_requests<>convert_to_response 2023-10-13T14:15:55.422492289 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request::serve_vmm_action_request::convert_to_response>>success_response_with_data 2023-10-13T14:15:55.422493983 [anonymous-instance:fc_api] The request was executed successfully. Status code: 200 OK. 2023-10-13T14:15:55.422498331 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request::serve_vmm_action_request::convert_to_response::success_response_with_data>>serialize 2023-10-13T14:15:55.422501387 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request::serve_vmm_action_request::convert_to_response::success_response_with_data::serialize>>fmt 2023-10-13T14:15:55.422506086 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request::serve_vmm_action_request::convert_to_response::success_response_with_data::serialize<>take_deprecation_message 2023-10-13T14:15:55.422520533 [anonymous-instance:fc_api] ThreadId(2)::run::handle_request<`; 1. Host: `connect()` to AF_UNIX at `uds_path`. 1. Host: `send()` "CONNECT ``\\n". 1. Guest: `accept()` the new connection. 1. Host: `read()` "OK ``\\n". The channel is established between the sockets obtained at steps 3 (host) and 5 (guest). ### Guest-Initiated Connections When the virtio-vsock device model in Firecracker detects a connection request coming from the guest (a VIRTIO_VSOCK_OP_REQUEST packet), it tries to forward the connection to an AF_UNIX socket listening on the host, at `/path/to/v.sock_PORT` (or whatever path was configured via the `uds_path` property of the vsock device), where `PORT` is the destination port (in decimal), as specified in the connection request packet. If no such socket exists, or no one is listening on it, a connection cannot be established, and a VIRTIO_VSOCK_OP_RST packet will be sent back to the guest. Client B initiates connection to Server B in [figure below](#vsock-connections): 1. Host: At VM configuration time, add a virtio-vsock device, with some `uds_path` (e.g. `/path/to/v.sock`). 1. Host: create and listen on an AF_UNIX socket at `/path/to/v.sock_PORT`. 1. Guest: create an AF_VSOCK socket and connect to `HOST_CID` (i.e. integer value 2) and `PORT`; 1. Host: `accept()` the new connection. The channel is established between the sockets obtained at steps 4 (host) and 3 (guest). ![Vsock Connections](images/vsock-connections.png?raw=true "Vsock Connections") ## Setting up the virtio-vsock device The virtio-vsock device will require a CID, and the path to a backing AF_UNIX socket: ```bash curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/vsock' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "guest_cid": 3, "uds_path": "./v.sock" }' ``` Once the microvm is started, Firecracker will create and start listening on the AF_UNIX socket at `uds_path`. Incoming connections will get forwarded to the guest microvm, and translated to AF_VSOCK. The destination port is expected to be specified by sending the text command "CONNECT ``\\n", immediately after the AF_UNIX connection is established. Connections initiated from within the guest will be forwarded to AF_UNIX sockets expected to be listening at `./v.sock_`. I.e. a guest connection to port 52 will get forwarded to `./v.sock_52`. ## Examples The examples below assume a running microvm, with a vsock device configured as shown [above](#setting-up-the-virtio-vsock-device) and [socat](http://www.dest-unreach.org/socat/) version 1.7.4.0 or later. ### Connecting From Host to Guest First, make sure the vsock port is bound and listened to on the guest side. Say, port 52: ```bash socat VSOCK-LISTEN:52,fork - ``` On the host side, connect to `./v.sock` and issue a connection request to that port: ```bash $ socat - UNIX-CONNECT:./v.sock CONNECT 52 ``` `socat` will display the connection acknowledgement message: ```console OK 1073741824 ``` The connection should now be established (in the above example, between `socat` on the guest and the host side). ### Connecting From Guest To Host First make sure the AF_UNIX corresponding to your desired port is listened to on the host side: ```bash socat - UNIX-LISTEN:./v.sock_52 ``` On the guest side, create an AF_VSOCK socket and connect it to the previously chosen port on the host (CID=2): ```bash socat - VSOCK-CONNECT:2:52 ``` ## Unix Domain Socket Renaming In certain environments where the jailer is not used, restoring snapshots with vsock devices may be difficult because the same host Unix Domain Socket (UDS) path cannot be multiplexed, meaning collisions could occur if two VMs with the same UDS path attempted to open a connection on the same port. In this case, you can use the `vsock_override` parameter of the snapshot restore API to provide a different path for the UDS to be opened at. For example, if a snapshot was taken with the host socket path `./v.sock.1`, on restore we can override it to instead open the socket at `./v.sock.2`, like this: ``` curl --unix-socket /tmp/firecracker.socket -i \ -X PUT 'http://localhost/snapshot/load' \ -H 'Accept: application/json' \ -H 'Content-Type: application/json' \ -d '{ "snapshot_path": "./snapshot_file", "mem_backend": { "backend_path": "./mem_file", "backend_type": "File" }, "vsock_override": { "uds_path": "./v.sock.2", } }' ``` All connections on the restored VM will then be opened with `./v.sock.2` as a prefix. ## Known issues Vsock snapshot support is currently limited. Please see [Snapshotting vsock limitation](snapshotting/snapshot-support.md#vsock-device-reset). ================================================ FILE: resources/chroot.sh ================================================ #!/bin/bash # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # fail if we encounter an error, uninitialized variable or a pipe breaks set -eu -o pipefail # be verbose set -x PS4='+\t ' packages="udev systemd-sysv openssh-server iproute2 curl socat python3-minimal iperf3 iputils-ping fio kmod tmux hwloc-nox vim-tiny trace-cmd linuxptp strace python3-boto3 pciutils" # msr-tools is only supported on x86-64. arch=$(uname -m) if [ "${arch}" == "x86_64" ]; then packages="$packages msr-tools cpuid" fi export DEBIAN_FRONTEND=noninteractive apt update apt install -y --no-install-recommends $packages apt autoremove # Set a hostname. echo "ubuntu-fc-uvm" > /etc/hostname passwd -d root # The serial getty service hooks up the login prompt to the kernel console # at ttyS0 (where Firecracker connects its serial console). We'll set it up # for autologin to avoid the login prompt. mkdir "/etc/systemd/system/serial-getty@ttyS0.service.d/" cat <<'EOF' >"/etc/systemd/system/serial-getty@ttyS0.service.d/override.conf" [Service] # systemd requires this empty ExecStart line to override ExecStart= ExecStart=-/sbin/agetty --autologin root -o '-p -- \\u' --keep-baud 115200,38400,9600 %I dumb EOF # Setup fcnet service. This is a custom Firecracker setup for assigning IPs # to the network interfaces in the guests spawned by the CI. ln -s /etc/systemd/system/fcnet.service /etc/systemd/system/sysinit.target.wants/fcnet.service # Disable resolved and ntpd # rm -f /etc/systemd/system/multi-user.target.wants/systemd-resolved.service rm -f /etc/systemd/system/dbus-org.freedesktop.resolve1.service rm -f /etc/systemd/system/sysinit.target.wants/systemd-timesyncd.service # make /tmp a tmpfs ln -s /usr/share/systemd/tmp.mount /etc/systemd/system/tmp.mount systemctl enable tmp.mount # don't need this systemctl disable e2scrub_reap.service rm -vf /etc/systemd/system/timers.target.wants/* # systemctl list-units --failed # /lib/systemd/system/systemd-random-seed.service systemctl enable var-lib-systemd.mount # disable Predictable Network Interface Names to keep ethN names # even with PCI enabled ln -s /dev/null /etc/systemd/network/99-default.link #### trim image https://wiki.ubuntu.com/ReducingDiskFootprint # this does not save much, but oh well rm -rf /usr/share/{doc,man,info,locale} cat >> /etc/sysctl.conf </root/manifest # Make systemd mountpoint mkdir -pv $rootfs/var/lib/systemd # So apt works mkdir -pv $rootfs/var/lib/dpkg ================================================ FILE: resources/guest_configs/DISCLAIMER.md ================================================ ### DISCLAIMER Please keep this document in mind when using these guest kernel configuration files. Firecracker as a virtual machine monitor is designed and built for use with specific goals, so these kernel configurations are tuned to be secure and to use the host's resources as optimally as possible, specifically allowing for as many guests to be running concurrently as possible (high density). For example, one of the mechanisms to improve density is to reduce virtual memory areas of the guest. This decreases the page table size and improves available memory on the host for other guests to occupy. As Firecracker is intended for ephemeral compute (short-lived environments, not intended to run indefinitely), a Firecracker guest is not expected to require large memory sizes. One interesting use-case where this can be seen to cause odd side affects is one where golang's race detector for aarch64 expected a 48-bit space, but the guest's kernel config enforced 39-bit. See [Firecracker issue #3514](https://github.com/firecracker-microvm/firecracker/issues/3514). ================================================ FILE: resources/guest_configs/ci.config ================================================ CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_MSDOS_PARTITION=y CONFIG_SQUASHFS_ZSTD=y # aarch64 only TBD split into a separate file CONFIG_DEVMEM=y # CONFIG_ARM64_ERRATUM_3194386 is not set # Needed for CTRL+ALT+DEL support CONFIG_SERIO=y CONFIG_SERIO_I8042=y CONFIG_SERIO_LIBPS2=y CONFIG_SERIO_GSCPS2=y CONFIG_KEYBOARD_ATKBD=y CONFIG_INPUT_KEYBOARD=y ================================================ FILE: resources/guest_configs/debug.config ================================================ CONFIG_FRAME_POINTER=y # CONFIG_KGDB=y # CONFIG_KGDB_SERIAL_CONSOLE=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y ================================================ FILE: resources/guest_configs/ftrace.config ================================================ CONFIG_FTRACE=y CONFIG_FUNCTION_TRACER=y CONFIG_FUNCTION_GRAPH_TRACER=y CONFIG_IRQSOFF_TRACER=y CONFIG_PREEMPT_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_FUNCTION_PROFILER=y CONFIG_FTRACE_MCOUNT_RECORD=y CONFIG_FTRACE_SYSCALLS=y ================================================ FILE: resources/guest_configs/microvm-kernel-ci-aarch64-5.10.config ================================================ CONFIG_CC_VERSION_TEXT="gcc10-gcc (GCC) 10.5.0 20230707 (Red Hat 10.5.0-1)" CONFIG_CC_IS_GCC=y CONFIG_GCC_VERSION=100500 CONFIG_LD_VERSION=235020000 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23502 CONFIG_LLD_VERSION=0 CONFIG_CC_CAN_LINK=y CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y # # General setup # CONFIG_INIT_ENV_ARG_LIMIT=32 # CONFIG_COMPILE_TEST is not set CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_BUILD_SALT="" CONFIG_DEFAULT_INIT="" CONFIG_DEFAULT_HOSTNAME="(none)" CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_WATCH_QUEUE is not set CONFIG_CROSS_MEMORY_ATTACH=y # CONFIG_USELIB is not set CONFIG_AUDIT=y CONFIG_HAVE_ARCH_AUDITSYSCALL=y CONFIG_AUDITSYSCALL=y # # IRQ subsystem # CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_IRQ_SHOW=y CONFIG_GENERIC_IRQ_SHOW_LEVEL=y CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y CONFIG_GENERIC_IRQ_MIGRATION=y CONFIG_HARDIRQS_SW_RESEND=y CONFIG_IRQ_DOMAIN=y CONFIG_IRQ_DOMAIN_HIERARCHY=y CONFIG_GENERIC_IRQ_IPI=y CONFIG_GENERIC_MSI_IRQ=y CONFIG_GENERIC_MSI_IRQ_DOMAIN=y CONFIG_IRQ_MSI_IOMMU=y CONFIG_HANDLE_DOMAIN_IRQ=y CONFIG_IRQ_FORCED_THREADING=y CONFIG_SPARSE_IRQ=y # CONFIG_GENERIC_IRQ_DEBUGFS is not set # end of IRQ subsystem CONFIG_GENERIC_IRQ_MULTI_HANDLER=y CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_ARCH_HAS_TICK_BROADCAST=y CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y # # Timers subsystem # CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ_COMMON=y # CONFIG_HZ_PERIODIC is not set CONFIG_NO_HZ_IDLE=y # CONFIG_NO_HZ_FULL is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y # end of Timers subsystem CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # # CPU/Task time and stats accounting # CONFIG_TICK_CPU_ACCOUNTING=y # CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set # CONFIG_IRQ_TIME_ACCOUNTING is not set CONFIG_HAVE_SCHED_AVG_IRQ=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_PSI_DEFAULT_DISABLED=y # end of CPU/Task time and stats accounting CONFIG_CPU_ISOLATION=y # # RCU Subsystem # CONFIG_TREE_RCU=y # CONFIG_RCU_EXPERT is not set CONFIG_SRCU=y CONFIG_TREE_SRCU=y CONFIG_TASKS_RCU_GENERIC=y CONFIG_TASKS_TRACE_RCU=y CONFIG_RCU_STALL_COMMON=y CONFIG_RCU_NEED_SEGCBLIST=y # end of RCU Subsystem # CONFIG_IKCONFIG is not set # CONFIG_IKHEADERS is not set CONFIG_LOG_BUF_SHIFT=17 CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 CONFIG_GENERIC_SCHED_CLOCK=y # # Scheduler features # # CONFIG_UCLAMP_TASK is not set # end of Scheduler features CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y CONFIG_CC_HAS_INT128=y CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y # CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set CONFIG_CGROUPS=y CONFIG_PAGE_COUNTER=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y # CONFIG_CGROUP_RDMA is not set CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y # CONFIG_CGROUP_DEBUG is not set CONFIG_SOCK_CGROUP_DATA=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_TIME_NS=y CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y # CONFIG_CHECKPOINT_RESTORE is not set CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y CONFIG_RD_XZ=y CONFIG_RD_LZO=y CONFIG_RD_LZ4=y CONFIG_RD_ZSTD=y # CONFIG_BOOT_CONFIG is not set CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_LD_ORPHAN_WARN=y CONFIG_SYSCTL=y CONFIG_HAVE_UID16=y CONFIG_SYSCTL_EXCEPTION_TRACE=y CONFIG_BPF=y # CONFIG_EXPERT is not set CONFIG_UID16=y CONFIG_MULTIUSER=y CONFIG_SYSFS_SYSCALL=y CONFIG_FHANDLE=y CONFIG_POSIX_TIMERS=y CONFIG_PRINTK=y CONFIG_PRINTK_NMI=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_FUTEX_PI=y CONFIG_HAVE_FUTEX_CMPXCHG=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_AIO=y CONFIG_IO_URING=y CONFIG_ADVISE_SYSCALLS=y CONFIG_MEMBARRIER=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_BASE_RELATIVE=y CONFIG_BPF_SYSCALL=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_BPF_UNPRIV_DEFAULT_OFF=y CONFIG_USERMODE_DRIVER=y CONFIG_BPF_PRELOAD=y CONFIG_USERFAULTFD=y CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y CONFIG_RSEQ=y # CONFIG_EMBEDDED is not set CONFIG_HAVE_PERF_EVENTS=y # # Kernel Performance Events And Counters # CONFIG_PERF_EVENTS=y # CONFIG_DEBUG_PERF_USE_VMALLOC is not set # end of Kernel Performance Events And Counters CONFIG_VM_EVENT_COUNTERS=y CONFIG_SLUB_DEBUG=y # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB is not set CONFIG_SLUB=y CONFIG_SLAB_MERGE_DEFAULT=y CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_SHUFFLE_PAGE_ALLOCATOR=y CONFIG_SLUB_CPU_PARTIAL=y CONFIG_PROFILING=y # end of General setup CONFIG_ARM64=y CONFIG_64BIT=y CONFIG_MMU=y CONFIG_ARM64_PAGE_SHIFT=12 CONFIG_ARM64_CONT_PTE_SHIFT=4 CONFIG_ARM64_CONT_PMD_SHIFT=4 CONFIG_ARCH_MMAP_RND_BITS_MIN=18 CONFIG_ARCH_MMAP_RND_BITS_MAX=33 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=11 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 CONFIG_NO_IOPORT_MAP=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 CONFIG_LOCKDEP_SUPPORT=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CSUM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_ZONE_DMA=y CONFIG_ZONE_DMA32=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y CONFIG_ARCH_MEMORY_PROBE=y # CONFIG_ARCH_MEMORY_REMOVE is not set CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y CONFIG_SMP=y CONFIG_KERNEL_MODE_NEON=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_PGTABLE_LEVELS=4 CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_ARCH_PROC_KCORE_TEXT=y # # Platform selection # # CONFIG_ARCH_ACTIONS is not set # CONFIG_ARCH_AGILEX is not set # CONFIG_ARCH_SUNXI is not set # CONFIG_ARCH_ALPINE is not set # CONFIG_ARCH_BCM2835 is not set # CONFIG_ARCH_BCM_IPROC is not set # CONFIG_ARCH_BERLIN is not set # CONFIG_ARCH_BITMAIN is not set # CONFIG_ARCH_BRCMSTB is not set # CONFIG_ARCH_EXYNOS is not set # CONFIG_ARCH_SPARX5 is not set # CONFIG_ARCH_K3 is not set # CONFIG_ARCH_LAYERSCAPE is not set # CONFIG_ARCH_LG1K is not set # CONFIG_ARCH_HISI is not set # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_MEDIATEK is not set # CONFIG_ARCH_MESON is not set # CONFIG_ARCH_MVEBU is not set # CONFIG_ARCH_MXC is not set # CONFIG_ARCH_QCOM is not set # CONFIG_ARCH_REALTEK is not set # CONFIG_ARCH_RENESAS is not set # CONFIG_ARCH_ROCKCHIP is not set # CONFIG_ARCH_S32 is not set # CONFIG_ARCH_SEATTLE is not set # CONFIG_ARCH_STRATIX10 is not set # CONFIG_ARCH_SYNQUACER is not set # CONFIG_ARCH_TEGRA is not set # CONFIG_ARCH_SPRD is not set # CONFIG_ARCH_THUNDER is not set # CONFIG_ARCH_THUNDER2 is not set # CONFIG_ARCH_UNIPHIER is not set # CONFIG_ARCH_VEXPRESS is not set # CONFIG_ARCH_VISCONTI is not set # CONFIG_ARCH_XGENE is not set # CONFIG_ARCH_ZX is not set # CONFIG_ARCH_ZYNQMP is not set # end of Platform selection # # Kernel Features # # # ARM errata workarounds via the alternatives framework # CONFIG_ARM64_WORKAROUND_CLEAN_CACHE=y CONFIG_ARM64_ERRATUM_826319=y CONFIG_ARM64_ERRATUM_827319=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_832075=y CONFIG_ARM64_ERRATUM_1742098=y CONFIG_ARM64_ERRATUM_845719=y CONFIG_ARM64_ERRATUM_843419=y CONFIG_ARM64_ERRATUM_1024718=y CONFIG_ARM64_ERRATUM_1418040=y CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT=y CONFIG_ARM64_ERRATUM_1165522=y CONFIG_ARM64_ERRATUM_1319367=y CONFIG_ARM64_ERRATUM_1530923=y CONFIG_ARM64_WORKAROUND_REPEAT_TLBI=y CONFIG_ARM64_ERRATUM_1286807=y CONFIG_ARM64_ERRATUM_1463225=y CONFIG_ARM64_ERRATUM_1542419=y CONFIG_ARM64_ERRATUM_1508412=y CONFIG_ARM64_ERRATUM_2457168=y CONFIG_CAVIUM_ERRATUM_22375=y CONFIG_CAVIUM_ERRATUM_23144=y CONFIG_CAVIUM_ERRATUM_23154=y CONFIG_CAVIUM_ERRATUM_27456=y CONFIG_CAVIUM_ERRATUM_30115=y CONFIG_CAVIUM_TX2_ERRATUM_219=y CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_HISILICON_ERRATUM_161600802 is not set CONFIG_QCOM_FALKOR_ERRATUM_1003=y CONFIG_QCOM_FALKOR_ERRATUM_1009=y CONFIG_QCOM_QDF2400_ERRATUM_0065=y CONFIG_QCOM_FALKOR_ERRATUM_E1041=y # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # end of ARM errata workarounds via the alternatives framework CONFIG_ARM64_4K_PAGES=y # CONFIG_ARM64_16K_PAGES is not set # CONFIG_ARM64_64K_PAGES is not set # CONFIG_ARM64_VA_BITS_39 is not set CONFIG_ARM64_VA_BITS_48=y CONFIG_ARM64_VA_BITS=48 CONFIG_ARM64_PA_BITS_48=y CONFIG_ARM64_PA_BITS=48 # CONFIG_CPU_BIG_ENDIAN is not set CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SCHED_MC=y CONFIG_SCHED_SMT=y CONFIG_NR_CPUS=64 CONFIG_HOTPLUG_CPU=y CONFIG_NUMA=y CONFIG_NODES_SHIFT=10 CONFIG_USE_PERCPU_NUMA_NODE_ID=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y CONFIG_HOLES_IN_ZONE=y CONFIG_HZ_100=y # CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=100 CONFIG_SCHED_HRTICK=y CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_DEFAULT=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_HAVE_ARCH_PFN_VALID=y CONFIG_HW_PERF_EVENTS=y CONFIG_SYS_SUPPORTS_HUGETLBFS=y CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y CONFIG_PARAVIRT=y CONFIG_PARAVIRT_TIME_ACCOUNTING=y # CONFIG_KEXEC is not set CONFIG_KEXEC_FILE=y # CONFIG_KEXEC_SIG is not set # CONFIG_CRASH_DUMP is not set # CONFIG_XEN is not set CONFIG_FORCE_MAX_ZONEORDER=11 CONFIG_UNMAP_KERNEL_AT_EL0=y CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY=y CONFIG_RODATA_FULL_DEFAULT_ENABLED=y # CONFIG_ARM64_SW_TTBR0_PAN is not set CONFIG_ARM64_TAGGED_ADDR_ABI=y CONFIG_COMPAT=y CONFIG_KUSER_HELPERS=y # CONFIG_ARMV8_DEPRECATED is not set # # ARMv8.1 architectural features # CONFIG_ARM64_HW_AFDBM=y CONFIG_ARM64_PAN=y CONFIG_AS_HAS_LSE_ATOMICS=y CONFIG_ARM64_LSE_ATOMICS=y CONFIG_ARM64_USE_LSE_ATOMICS=y CONFIG_ARM64_VHE=y # end of ARMv8.1 architectural features # # ARMv8.2 architectural features # CONFIG_ARM64_UAO=y # CONFIG_ARM64_PMEM is not set CONFIG_ARM64_RAS_EXTN=y CONFIG_ARM64_CNP=y # end of ARMv8.2 architectural features # # ARMv8.3 architectural features # CONFIG_ARM64_PTR_AUTH=y CONFIG_CC_HAS_BRANCH_PROT_PAC_RET=y CONFIG_CC_HAS_SIGN_RETURN_ADDRESS=y CONFIG_AS_HAS_PAC=y CONFIG_AS_HAS_CFI_NEGATE_RA_STATE=y # end of ARMv8.3 architectural features # # ARMv8.4 architectural features # CONFIG_ARM64_AMU_EXTN=y CONFIG_AS_HAS_ARMV8_4=y CONFIG_ARM64_TLB_RANGE=y # end of ARMv8.4 architectural features # # ARMv8.5 architectural features # CONFIG_ARM64_BTI=y CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI=y CONFIG_ARM64_E0PD=y CONFIG_ARCH_RANDOM=y CONFIG_ARM64_AS_HAS_MTE=y CONFIG_ARM64_MTE=y # end of ARMv8.5 architectural features CONFIG_ARM64_SVE=y # CONFIG_ARM64_PSEUDO_NMI is not set CONFIG_RELOCATABLE=y # CONFIG_RANDOMIZE_BASE is not set CONFIG_CC_HAVE_STACKPROTECTOR_SYSREG=y CONFIG_STACKPROTECTOR_PER_TASK=y # end of Kernel Features # # Boot options # # CONFIG_ARM64_ACPI_PARKING_PROTOCOL is not set CONFIG_CMDLINE="" CONFIG_EFI_STUB=y CONFIG_EFI=y CONFIG_DMI=y # end of Boot options CONFIG_SYSVIPC_COMPAT=y CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y CONFIG_ARCH_ENABLE_THP_MIGRATION=y # # Power management options # # CONFIG_SUSPEND is not set CONFIG_HIBERNATE_CALLBACKS=y CONFIG_HIBERNATION=y CONFIG_HIBERNATION_SNAPSHOT_DEV=y CONFIG_PM_STD_PARTITION="" CONFIG_PM_SLEEP=y CONFIG_PM_SLEEP_SMP=y # CONFIG_PM_AUTOSLEEP is not set # CONFIG_PM_WAKELOCKS is not set CONFIG_PM=y # CONFIG_PM_DEBUG is not set CONFIG_PM_CLK=y # CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set CONFIG_CPU_PM=y # CONFIG_ENERGY_MODEL is not set CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_HIBERNATION_HEADER=y CONFIG_ARCH_SUSPEND_POSSIBLE=y # end of Power management options # # CPU Power Management # # # CPU Idle # CONFIG_CPU_IDLE=y # CONFIG_CPU_IDLE_GOV_LADDER is not set CONFIG_CPU_IDLE_GOV_MENU=y # CONFIG_CPU_IDLE_GOV_TEO is not set # # ARM CPU Idle Drivers # # CONFIG_ARM_CPUIDLE is not set # CONFIG_ARM_PSCI_CPUIDLE is not set # end of ARM CPU Idle Drivers # end of CPU Idle # # CPU Frequency scaling # CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_ATTR_SET=y CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_GOV_USERSPACE is not set # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y # # CPU frequency scaling drivers # # CONFIG_CPUFREQ_DT is not set # end of CPU Frequency scaling # end of CPU Power Management # # Firmware Drivers # # CONFIG_ARM_SCMI_PROTOCOL is not set # CONFIG_ARM_SCPI_PROTOCOL is not set # CONFIG_ARM_SDE_INTERFACE is not set CONFIG_DMIID=y # CONFIG_DMI_SYSFS is not set # CONFIG_ISCSI_IBFT is not set # CONFIG_GOOGLE_FIRMWARE is not set # # EFI (Extensible Firmware Interface) Support # CONFIG_EFI_ESRT=y CONFIG_EFI_VARS_PSTORE=y # CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE is not set CONFIG_EFI_PARAMS_FROM_FDT=y CONFIG_EFI_RUNTIME_WRAPPERS=y CONFIG_EFI_GENERIC_STUB=y CONFIG_EFI_ARMSTUB_DTB_LOADER=y CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y # CONFIG_EFI_BOOTLOADER_CONTROL is not set # CONFIG_EFI_CAPSULE_LOADER is not set # CONFIG_EFI_TEST is not set # CONFIG_RESET_ATTACK_MITIGATION is not set # CONFIG_EFI_DISABLE_PCI_DMA is not set # end of EFI (Extensible Firmware Interface) Support CONFIG_EFI_EARLYCON=y # CONFIG_EFI_CUSTOM_SSDT_OVERLAYS is not set CONFIG_ARM_PSCI_FW=y # CONFIG_ARM_PSCI_CHECKER is not set CONFIG_HAVE_ARM_SMCCC=y CONFIG_HAVE_ARM_SMCCC_DISCOVERY=y CONFIG_ARM_SMCCC_SOC_ID=y # # Tegra firmware driver # # end of Tegra firmware driver # end of Firmware Drivers CONFIG_ARCH_SUPPORTS_ACPI=y CONFIG_ACPI=y CONFIG_ACPI_GENERIC_GSI=y CONFIG_ACPI_CCA_REQUIRED=y # CONFIG_ACPI_DEBUGGER is not set CONFIG_ACPI_SPCR_TABLE=y # CONFIG_ACPI_EC_DEBUGFS is not set # CONFIG_ACPI_AC is not set # CONFIG_ACPI_BATTERY is not set # CONFIG_ACPI_BUTTON is not set # CONFIG_ACPI_TINY_POWER_BUTTON is not set # CONFIG_ACPI_FAN is not set # CONFIG_ACPI_TAD is not set # CONFIG_ACPI_DOCK is not set # CONFIG_ACPI_PROCESSOR is not set CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y # CONFIG_ACPI_TABLE_UPGRADE is not set # CONFIG_ACPI_DEBUG is not set # CONFIG_ACPI_CONTAINER is not set # CONFIG_ACPI_HOTPLUG_MEMORY is not set # CONFIG_ACPI_HED is not set # CONFIG_ACPI_CUSTOM_METHOD is not set # CONFIG_ACPI_BGRT is not set CONFIG_ACPI_REDUCED_HARDWARE_ONLY=y CONFIG_ACPI_NUMA=y # CONFIG_ACPI_HMAT is not set CONFIG_HAVE_ACPI_APEI=y # CONFIG_ACPI_APEI is not set # CONFIG_ACPI_CONFIGFS is not set CONFIG_ACPI_IORT=y CONFIG_ACPI_GTDT=y CONFIG_ACPI_PPTT=y # CONFIG_PMIC_OPREGION is not set # CONFIG_VIRTUALIZATION is not set # CONFIG_ARM64_CRYPTO is not set CONFIG_HAVE_LIVEPATCH=y CONFIG_CPU_MITIGATIONS=y # # General architecture-dependent options # CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_SET_FS=y CONFIG_JUMP_LABEL=y # CONFIG_STATIC_KEYS_SELFTEST is not set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y CONFIG_HAVE_NMI=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_CONTIGUOUS=y CONFIG_GENERIC_SMP_IDLE_THREAD=y CONFIG_GENERIC_IDLE_POLL_SETUP=y CONFIG_ARCH_HAS_FORTIFY_SOURCE=y CONFIG_ARCH_HAS_KEEPINITRD=y CONFIG_ARCH_HAS_SET_MEMORY=y CONFIG_ARCH_HAS_SET_DIRECT_MAP=y CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y CONFIG_HAVE_ASM_MODVERSIONS=y CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y CONFIG_HAVE_RSEQ=y CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y CONFIG_HAVE_HW_BREAKPOINT=y CONFIG_HAVE_PERF_REGS=y CONFIG_HAVE_PERF_USER_STACK_DUMP=y CONFIG_HAVE_ARCH_JUMP_LABEL=y CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y CONFIG_MMU_GATHER_TABLE_FREE=y CONFIG_MMU_GATHER_RCU_TABLE_FREE=y CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y CONFIG_HAVE_CMPXCHG_LOCAL=y CONFIG_HAVE_CMPXCHG_DOUBLE=y CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y CONFIG_HAVE_ARCH_SECCOMP=y CONFIG_HAVE_ARCH_SECCOMP_FILTER=y CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y CONFIG_HAVE_ARCH_STACKLEAK=y CONFIG_HAVE_STACKPROTECTOR=y CONFIG_STACKPROTECTOR=y CONFIG_STACKPROTECTOR_STRONG=y CONFIG_HAVE_CONTEXT_TRACKING=y CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y CONFIG_HAVE_MOVE_PMD=y CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y CONFIG_HAVE_ARCH_HUGE_VMAP=y CONFIG_MODULES_USE_ELF_RELA=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y CONFIG_HAVE_ARCH_MMAP_RND_BITS=y CONFIG_ARCH_MMAP_RND_BITS=18 CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y CONFIG_ARCH_MMAP_RND_COMPAT_BITS=11 CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT=y CONFIG_HAVE_STACK_VALIDATION=y CONFIG_HAVE_RELIABLE_STACKTRACE=y CONFIG_CLONE_BACKWARDS=y CONFIG_OLD_SIGSUSPEND3=y CONFIG_COMPAT_OLD_SIGACTION=y CONFIG_COMPAT_32BIT_TIME=y CONFIG_HAVE_ARCH_VMAP_STACK=y CONFIG_VMAP_STACK=y CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y CONFIG_STRICT_KERNEL_RWX=y CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y CONFIG_HAVE_ARCH_COMPILER_H=y CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y CONFIG_ARCH_USE_MEMREMAP_PROT=y # CONFIG_LOCK_EVENT_COUNTS is not set CONFIG_ARCH_HAS_RELR=y CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y # # GCOV-based kernel profiling # # CONFIG_GCOV_KERNEL is not set CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y # end of GCOV-based kernel profiling CONFIG_HAVE_GCC_PLUGINS=y CONFIG_GCC_PLUGINS=y # CONFIG_GCC_PLUGIN_LATENT_ENTROPY is not set # CONFIG_GCC_PLUGIN_RANDSTRUCT is not set # end of General architecture-dependent options CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 # CONFIG_MODULES is not set CONFIG_MODULES_TREE_LOOKUP=y CONFIG_BLOCK=y CONFIG_BLK_RQ_ALLOC_TIME=y CONFIG_BLK_SCSI_REQUEST=y CONFIG_BLK_CGROUP_RWSTAT=y CONFIG_BLK_DEV_BSG=y CONFIG_BLK_DEV_BSGLIB=y CONFIG_BLK_DEV_INTEGRITY=y # CONFIG_BLK_DEV_ZONED is not set CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set # CONFIG_BLK_CMDLINE_PARSER is not set # CONFIG_BLK_WBT is not set # CONFIG_BLK_CGROUP_IOLATENCY is not set CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_DEBUG_FS=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # # Partition Types # CONFIG_PARTITION_ADVANCED=y # CONFIG_ACORN_PARTITION is not set # CONFIG_AIX_PARTITION is not set # CONFIG_OSF_PARTITION is not set # CONFIG_AMIGA_PARTITION is not set # CONFIG_ATARI_PARTITION is not set # CONFIG_MAC_PARTITION is not set # CONFIG_MSDOS_PARTITION is not set # CONFIG_LDM_PARTITION is not set # CONFIG_SGI_PARTITION is not set # CONFIG_ULTRIX_PARTITION is not set # CONFIG_SUN_PARTITION is not set # CONFIG_KARMA_PARTITION is not set # CONFIG_EFI_PARTITION is not set # CONFIG_SYSV68_PARTITION is not set # CONFIG_CMDLINE_PARTITION is not set # end of Partition Types CONFIG_BLOCK_COMPAT=y CONFIG_BLK_MQ_VIRTIO=y CONFIG_BLK_PM=y # # IO Schedulers # CONFIG_MQ_IOSCHED_DEADLINE=y CONFIG_MQ_IOSCHED_KYBER=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y # CONFIG_BFQ_CGROUP_DEBUG is not set # end of IO Schedulers CONFIG_ASN1=y CONFIG_ARCH_INLINE_SPIN_TRYLOCK=y CONFIG_ARCH_INLINE_SPIN_TRYLOCK_BH=y CONFIG_ARCH_INLINE_SPIN_LOCK=y CONFIG_ARCH_INLINE_SPIN_LOCK_BH=y CONFIG_ARCH_INLINE_SPIN_LOCK_IRQ=y CONFIG_ARCH_INLINE_SPIN_LOCK_IRQSAVE=y CONFIG_ARCH_INLINE_SPIN_UNLOCK=y CONFIG_ARCH_INLINE_SPIN_UNLOCK_BH=y CONFIG_ARCH_INLINE_SPIN_UNLOCK_IRQ=y CONFIG_ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE=y CONFIG_ARCH_INLINE_READ_LOCK=y CONFIG_ARCH_INLINE_READ_LOCK_BH=y CONFIG_ARCH_INLINE_READ_LOCK_IRQ=y CONFIG_ARCH_INLINE_READ_LOCK_IRQSAVE=y CONFIG_ARCH_INLINE_READ_UNLOCK=y CONFIG_ARCH_INLINE_READ_UNLOCK_BH=y CONFIG_ARCH_INLINE_READ_UNLOCK_IRQ=y CONFIG_ARCH_INLINE_READ_UNLOCK_IRQRESTORE=y CONFIG_ARCH_INLINE_WRITE_LOCK=y CONFIG_ARCH_INLINE_WRITE_LOCK_BH=y CONFIG_ARCH_INLINE_WRITE_LOCK_IRQ=y CONFIG_ARCH_INLINE_WRITE_LOCK_IRQSAVE=y CONFIG_ARCH_INLINE_WRITE_UNLOCK=y CONFIG_ARCH_INLINE_WRITE_UNLOCK_BH=y CONFIG_ARCH_INLINE_WRITE_UNLOCK_IRQ=y CONFIG_ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE=y CONFIG_INLINE_SPIN_TRYLOCK=y CONFIG_INLINE_SPIN_TRYLOCK_BH=y CONFIG_INLINE_SPIN_LOCK=y CONFIG_INLINE_SPIN_LOCK_BH=y CONFIG_INLINE_SPIN_LOCK_IRQ=y CONFIG_INLINE_SPIN_LOCK_IRQSAVE=y CONFIG_INLINE_SPIN_UNLOCK_BH=y CONFIG_INLINE_SPIN_UNLOCK_IRQ=y CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE=y CONFIG_INLINE_READ_LOCK=y CONFIG_INLINE_READ_LOCK_BH=y CONFIG_INLINE_READ_LOCK_IRQ=y CONFIG_INLINE_READ_LOCK_IRQSAVE=y CONFIG_INLINE_READ_UNLOCK=y CONFIG_INLINE_READ_UNLOCK_BH=y CONFIG_INLINE_READ_UNLOCK_IRQ=y CONFIG_INLINE_READ_UNLOCK_IRQRESTORE=y CONFIG_INLINE_WRITE_LOCK=y CONFIG_INLINE_WRITE_LOCK_BH=y CONFIG_INLINE_WRITE_LOCK_IRQ=y CONFIG_INLINE_WRITE_LOCK_IRQSAVE=y CONFIG_INLINE_WRITE_UNLOCK=y CONFIG_INLINE_WRITE_UNLOCK_BH=y CONFIG_INLINE_WRITE_UNLOCK_IRQ=y CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE=y CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y CONFIG_MUTEX_SPIN_ON_OWNER=y CONFIG_RWSEM_SPIN_ON_OWNER=y CONFIG_LOCK_SPIN_ON_OWNER=y CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y CONFIG_QUEUED_SPINLOCKS=y CONFIG_ARCH_USE_QUEUED_RWLOCKS=y CONFIG_QUEUED_RWLOCKS=y CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y CONFIG_FREEZER=y # # Executable file formats # CONFIG_BINFMT_ELF=y CONFIG_COMPAT_BINFMT_ELF=y CONFIG_ARCH_BINFMT_ELF_STATE=y CONFIG_ARCH_HAVE_ELF_PROT=y CONFIG_ARCH_USE_GNU_PROPERTY=y CONFIG_ELFCORE=y CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_SCRIPT=y CONFIG_BINFMT_MISC=y CONFIG_COREDUMP=y # end of Executable file formats # # Memory Management options # CONFIG_SELECT_MEMORY_MODEL=y CONFIG_SPARSEMEM_MANUAL=y CONFIG_SPARSEMEM=y CONFIG_NEED_MULTIPLE_NODES=y CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_HAVE_FAST_GUP=y CONFIG_ARCH_KEEP_MEMBLOCK=y CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_ISOLATION=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTPLUG_SPARSE=y # CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set CONFIG_MEMORY_HOTREMOVE=y CONFIG_MHP_MEMMAP_ON_MEMORY=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MEMORY_BALLOON=y CONFIG_BALLOON_COMPACTION=y CONFIG_COMPACTION=y CONFIG_PAGE_REPORTING=y CONFIG_MIGRATION=y CONFIG_CONTIG_ALLOC=y CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_BOUNCE=y CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y # CONFIG_MEMORY_FAILURE is not set CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y # CONFIG_CMA is not set CONFIG_ZSWAP=y # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo" CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set # CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud" # CONFIG_ZSWAP_DEFAULT_ON is not set CONFIG_ZPOOL=y CONFIG_ZBUD=y # CONFIG_Z3FOLD is not set # CONFIG_ZSMALLOC is not set CONFIG_GENERIC_EARLY_IOREMAP=y # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set CONFIG_PAGE_IDLE_FLAG=y # CONFIG_IDLE_PAGE_TRACKING is not set CONFIG_ARCH_HAS_PTE_DEVMAP=y # CONFIG_ZONE_DEVICE is not set CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y # CONFIG_PERCPU_STATS is not set # CONFIG_GUP_BENCHMARK is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y # # Data Access Monitoring # CONFIG_DAMON=y CONFIG_DAMON_VADDR=y CONFIG_DAMON_PADDR=y CONFIG_DAMON_SYSFS=y CONFIG_DAMON_DBGFS=y # CONFIG_DAMON_RECLAIM is not set # CONFIG_DAMON_LRU_SORT is not set # end of Data Access Monitoring # end of Memory Management options CONFIG_NET=y CONFIG_NET_INGRESS=y CONFIG_SKB_EXTENSIONS=y # # Networking options # CONFIG_PACKET=y # CONFIG_PACKET_DIAG is not set CONFIG_UNIX=y CONFIG_UNIX_SCM=y # CONFIG_UNIX_DIAG is not set # CONFIG_TLS is not set CONFIG_XFRM=y CONFIG_XFRM_ALGO=y CONFIG_XFRM_USER=y # CONFIG_XFRM_INTERFACE is not set CONFIG_XFRM_SUB_POLICY=y CONFIG_XFRM_MIGRATE=y CONFIG_XFRM_STATISTICS=y # CONFIG_NET_KEY is not set CONFIG_XDP_SOCKETS=y # CONFIG_XDP_SOCKETS_DIAG is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y # CONFIG_IP_FIB_TRIE_STATS is not set CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE_DEMUX is not set CONFIG_IP_MROUTE_COMMON=y CONFIG_IP_MROUTE=y CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y CONFIG_SYN_COOKIES=y # CONFIG_NET_IPVTI is not set # CONFIG_NET_FOU is not set # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_INET_UDP_DIAG is not set # CONFIG_INET_RAW_DIAG is not set CONFIG_INET_DIAG_DESTROY=y CONFIG_TCP_CONG_ADVANCED=y # CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y # CONFIG_TCP_CONG_WESTWOOD is not set # CONFIG_TCP_CONG_HTCP is not set # CONFIG_TCP_CONG_HSTCP is not set # CONFIG_TCP_CONG_HYBLA is not set # CONFIG_TCP_CONG_VEGAS is not set # CONFIG_TCP_CONG_NV is not set # CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_TCP_CONG_LP is not set # CONFIG_TCP_CONG_VENO is not set # CONFIG_TCP_CONG_YEAH is not set # CONFIG_TCP_CONG_ILLINOIS is not set # CONFIG_TCP_CONG_DCTCP is not set # CONFIG_TCP_CONG_CDG is not set # CONFIG_TCP_CONG_BBR is not set # CONFIG_TCP_CONG_BBR2 is not set CONFIG_DEFAULT_CUBIC=y # CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y # CONFIG_INET6_AH is not set # CONFIG_INET6_ESP is not set # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_IPV6_ILA is not set # CONFIG_IPV6_VTI is not set # CONFIG_IPV6_SIT is not set # CONFIG_IPV6_TUNNEL is not set CONFIG_IPV6_MULTIPLE_TABLES=y # CONFIG_IPV6_SUBTREES is not set CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y # CONFIG_IPV6_SEG6_LWTUNNEL is not set # CONFIG_IPV6_SEG6_HMAC is not set # CONFIG_IPV6_RPL_LWTUNNEL is not set CONFIG_NETLABEL=y CONFIG_MPTCP=y CONFIG_INET_MPTCP_DIAG=y CONFIG_MPTCP_IPV6=y CONFIG_NETWORK_SECMARK=y CONFIG_NET_PTP_CLASSIFY=y CONFIG_NETWORK_PHY_TIMESTAMPING=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_BRIDGE_NETFILTER=y # # Core Netfilter Configuration # CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_FAMILY_BRIDGE=y # CONFIG_NETFILTER_NETLINK_ACCT is not set # CONFIG_NETFILTER_NETLINK_QUEUE is not set # CONFIG_NETFILTER_NETLINK_LOG is not set # CONFIG_NETFILTER_NETLINK_OSF is not set CONFIG_NF_CONNTRACK=y CONFIG_NF_LOG_COMMON=y # CONFIG_NF_LOG_NETDEV is not set CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y CONFIG_NF_CONNTRACK_TIMESTAMP=y CONFIG_NF_CONNTRACK_LABELS=y CONFIG_NF_CT_PROTO_DCCP=y CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y # CONFIG_NF_CONNTRACK_AMANDA is not set # CONFIG_NF_CONNTRACK_FTP is not set # CONFIG_NF_CONNTRACK_H323 is not set # CONFIG_NF_CONNTRACK_IRC is not set # CONFIG_NF_CONNTRACK_NETBIOS_NS is not set # CONFIG_NF_CONNTRACK_SNMP is not set # CONFIG_NF_CONNTRACK_PPTP is not set # CONFIG_NF_CONNTRACK_SANE is not set # CONFIG_NF_CONNTRACK_SIP is not set # CONFIG_NF_CONNTRACK_TFTP is not set # CONFIG_NF_CT_NETLINK is not set # CONFIG_NF_CT_NETLINK_TIMEOUT is not set CONFIG_NF_NAT=y CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_NAT_MASQUERADE=y CONFIG_NETFILTER_SYNPROXY=y # CONFIG_NF_TABLES is not set CONFIG_NETFILTER_XTABLES=y # # Xtables combined modules # # CONFIG_NETFILTER_XT_MARK is not set # CONFIG_NETFILTER_XT_CONNMARK is not set # # Xtables targets # # CONFIG_NETFILTER_XT_TARGET_AUDIT is not set # CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set # CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set # CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set # CONFIG_NETFILTER_XT_TARGET_CONNSECMARK is not set # CONFIG_NETFILTER_XT_TARGET_DSCP is not set # CONFIG_NETFILTER_XT_TARGET_HL is not set # CONFIG_NETFILTER_XT_TARGET_HMARK is not set # CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set # CONFIG_NETFILTER_XT_TARGET_LOG is not set # CONFIG_NETFILTER_XT_TARGET_MARK is not set CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_TARGET_NETMAP=y # CONFIG_NETFILTER_XT_TARGET_NFLOG is not set # CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set # CONFIG_NETFILTER_XT_TARGET_RATEEST is not set CONFIG_NETFILTER_XT_TARGET_REDIRECT=y CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y # CONFIG_NETFILTER_XT_TARGET_TEE is not set # CONFIG_NETFILTER_XT_TARGET_TPROXY is not set # CONFIG_NETFILTER_XT_TARGET_SECMARK is not set # CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set # CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set # # Xtables matches # CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y # CONFIG_NETFILTER_XT_MATCH_BPF is not set # CONFIG_NETFILTER_XT_MATCH_CGROUP is not set # CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set # CONFIG_NETFILTER_XT_MATCH_COMMENT is not set # CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set # CONFIG_NETFILTER_XT_MATCH_CONNLABEL is not set # CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set # CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y # CONFIG_NETFILTER_XT_MATCH_CPU is not set # CONFIG_NETFILTER_XT_MATCH_DCCP is not set # CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set # CONFIG_NETFILTER_XT_MATCH_DSCP is not set # CONFIG_NETFILTER_XT_MATCH_ECN is not set # CONFIG_NETFILTER_XT_MATCH_ESP is not set # CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set # CONFIG_NETFILTER_XT_MATCH_HELPER is not set # CONFIG_NETFILTER_XT_MATCH_HL is not set # CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set # CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set # CONFIG_NETFILTER_XT_MATCH_L2TP is not set # CONFIG_NETFILTER_XT_MATCH_LENGTH is not set # CONFIG_NETFILTER_XT_MATCH_LIMIT is not set # CONFIG_NETFILTER_XT_MATCH_MAC is not set # CONFIG_NETFILTER_XT_MATCH_MARK is not set # CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set # CONFIG_NETFILTER_XT_MATCH_NFACCT is not set # CONFIG_NETFILTER_XT_MATCH_OSF is not set # CONFIG_NETFILTER_XT_MATCH_OWNER is not set # CONFIG_NETFILTER_XT_MATCH_POLICY is not set # CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set # CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set # CONFIG_NETFILTER_XT_MATCH_QUOTA is not set # CONFIG_NETFILTER_XT_MATCH_RATEEST is not set # CONFIG_NETFILTER_XT_MATCH_REALM is not set # CONFIG_NETFILTER_XT_MATCH_RECENT is not set # CONFIG_NETFILTER_XT_MATCH_SCTP is not set # CONFIG_NETFILTER_XT_MATCH_SOCKET is not set # CONFIG_NETFILTER_XT_MATCH_STATE is not set # CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set # CONFIG_NETFILTER_XT_MATCH_STRING is not set # CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set # CONFIG_NETFILTER_XT_MATCH_TIME is not set # CONFIG_NETFILTER_XT_MATCH_U32 is not set # end of Core Netfilter Configuration # CONFIG_IP_SET is not set # CONFIG_IP_VS is not set # # IP: Netfilter Configuration # CONFIG_NF_DEFRAG_IPV4=y # CONFIG_NF_SOCKET_IPV4 is not set # CONFIG_NF_TPROXY_IPV4 is not set # CONFIG_NF_DUP_IPV4 is not set # CONFIG_NF_LOG_ARP is not set # CONFIG_NF_LOG_IPV4 is not set CONFIG_NF_REJECT_IPV4=y CONFIG_IP_NF_IPTABLES=y # CONFIG_IP_NF_MATCH_AH is not set # CONFIG_IP_NF_MATCH_ECN is not set # CONFIG_IP_NF_MATCH_RPFILTER is not set # CONFIG_IP_NF_MATCH_TTL is not set CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y CONFIG_IP_NF_TARGET_SYNPROXY=y CONFIG_IP_NF_NAT=y CONFIG_IP_NF_TARGET_MASQUERADE=y CONFIG_IP_NF_TARGET_NETMAP=y CONFIG_IP_NF_TARGET_REDIRECT=y CONFIG_IP_NF_MANGLE=y # CONFIG_IP_NF_TARGET_CLUSTERIP is not set # CONFIG_IP_NF_TARGET_ECN is not set # CONFIG_IP_NF_TARGET_TTL is not set # CONFIG_IP_NF_RAW is not set # CONFIG_IP_NF_SECURITY is not set # CONFIG_IP_NF_ARPTABLES is not set # end of IP: Netfilter Configuration # # IPv6: Netfilter Configuration # # CONFIG_NF_SOCKET_IPV6 is not set # CONFIG_NF_TPROXY_IPV6 is not set # CONFIG_NF_DUP_IPV6 is not set CONFIG_NF_REJECT_IPV6=y CONFIG_NF_LOG_IPV6=y CONFIG_IP6_NF_IPTABLES=y # CONFIG_IP6_NF_MATCH_AH is not set # CONFIG_IP6_NF_MATCH_EUI64 is not set # CONFIG_IP6_NF_MATCH_FRAG is not set # CONFIG_IP6_NF_MATCH_OPTS is not set # CONFIG_IP6_NF_MATCH_HL is not set # CONFIG_IP6_NF_MATCH_IPV6HEADER is not set # CONFIG_IP6_NF_MATCH_MH is not set # CONFIG_IP6_NF_MATCH_RPFILTER is not set # CONFIG_IP6_NF_MATCH_RT is not set # CONFIG_IP6_NF_MATCH_SRH is not set # CONFIG_IP6_NF_TARGET_HL is not set CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_TARGET_SYNPROXY=y CONFIG_IP6_NF_MANGLE=y # CONFIG_IP6_NF_RAW is not set # CONFIG_IP6_NF_SECURITY is not set CONFIG_IP6_NF_NAT=y CONFIG_IP6_NF_TARGET_MASQUERADE=y # CONFIG_IP6_NF_TARGET_NPT is not set # end of IPv6: Netfilter Configuration CONFIG_NF_DEFRAG_IPV6=y # CONFIG_NF_CONNTRACK_BRIDGE is not set # CONFIG_BRIDGE_NF_EBTABLES is not set CONFIG_BPFILTER=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_L2TP is not set CONFIG_STP=y CONFIG_BRIDGE=y CONFIG_BRIDGE_IGMP_SNOOPING=y # CONFIG_BRIDGE_MRP is not set CONFIG_HAVE_NET_DSA=y # CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set # CONFIG_PHONET is not set # CONFIG_6LOWPAN is not set # CONFIG_IEEE802154 is not set CONFIG_NET_SCHED=y # # Queueing/Scheduling # # CONFIG_NET_SCH_CBQ is not set # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set # CONFIG_NET_SCH_MULTIQ is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFB is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set # CONFIG_NET_SCH_TBF is not set # CONFIG_NET_SCH_CBS is not set # CONFIG_NET_SCH_ETF is not set # CONFIG_NET_SCH_TAPRIO is not set # CONFIG_NET_SCH_GRED is not set # CONFIG_NET_SCH_DSMARK is not set # CONFIG_NET_SCH_NETEM is not set # CONFIG_NET_SCH_DRR is not set # CONFIG_NET_SCH_MQPRIO is not set # CONFIG_NET_SCH_SKBPRIO is not set # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_QFQ is not set # CONFIG_NET_SCH_CODEL is not set # CONFIG_NET_SCH_FQ_CODEL is not set # CONFIG_NET_SCH_CAKE is not set # CONFIG_NET_SCH_FQ is not set # CONFIG_NET_SCH_HHF is not set # CONFIG_NET_SCH_PIE is not set # CONFIG_NET_SCH_INGRESS is not set # CONFIG_NET_SCH_PLUG is not set # CONFIG_NET_SCH_ETS is not set # CONFIG_NET_SCH_DEFAULT is not set # # Classification # CONFIG_NET_CLS=y # CONFIG_NET_CLS_BASIC is not set # CONFIG_NET_CLS_ROUTE4 is not set # CONFIG_NET_CLS_FW is not set # CONFIG_NET_CLS_U32 is not set # CONFIG_NET_CLS_FLOW is not set # CONFIG_NET_CLS_CGROUP is not set # CONFIG_NET_CLS_BPF is not set # CONFIG_NET_CLS_FLOWER is not set # CONFIG_NET_CLS_MATCHALL is not set CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 # CONFIG_NET_EMATCH_CMP is not set # CONFIG_NET_EMATCH_NBYTE is not set # CONFIG_NET_EMATCH_U32 is not set # CONFIG_NET_EMATCH_META is not set # CONFIG_NET_EMATCH_TEXT is not set # CONFIG_NET_EMATCH_IPT is not set CONFIG_NET_CLS_ACT=y # CONFIG_NET_ACT_POLICE is not set # CONFIG_NET_ACT_GACT is not set # CONFIG_NET_ACT_MIRRED is not set # CONFIG_NET_ACT_SAMPLE is not set # CONFIG_NET_ACT_IPT is not set # CONFIG_NET_ACT_NAT is not set # CONFIG_NET_ACT_PEDIT is not set # CONFIG_NET_ACT_SIMP is not set # CONFIG_NET_ACT_SKBEDIT is not set # CONFIG_NET_ACT_CSUM is not set # CONFIG_NET_ACT_MPLS is not set # CONFIG_NET_ACT_VLAN is not set # CONFIG_NET_ACT_BPF is not set # CONFIG_NET_ACT_CONNMARK is not set # CONFIG_NET_ACT_CTINFO is not set # CONFIG_NET_ACT_SKBMOD is not set # CONFIG_NET_ACT_IFE is not set # CONFIG_NET_ACT_TUNNEL_KEY is not set # CONFIG_NET_ACT_GATE is not set # CONFIG_NET_TC_SKB_EXT is not set CONFIG_NET_SCH_FIFO=y CONFIG_DCB=y CONFIG_DNS_RESOLVER=y # CONFIG_BATMAN_ADV is not set # CONFIG_OPENVSWITCH is not set CONFIG_VSOCKETS=y # CONFIG_VSOCKETS_DIAG is not set # CONFIG_VSOCKETS_LOOPBACK is not set CONFIG_VIRTIO_VSOCKETS=y CONFIG_VIRTIO_VSOCKETS_COMMON=y # CONFIG_NETLINK_DIAG is not set CONFIG_MPLS=y # CONFIG_NET_MPLS_GSO is not set # CONFIG_MPLS_ROUTING is not set # CONFIG_NET_NSH is not set # CONFIG_HSR is not set # CONFIG_NET_SWITCHDEV is not set CONFIG_NET_L3_MASTER_DEV=y # CONFIG_QRTR is not set # CONFIG_NET_NCSI is not set CONFIG_RPS=y CONFIG_RFS_ACCEL=y CONFIG_XPS=y CONFIG_CGROUP_NET_PRIO=y CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_NET_FLOW_LIMIT=y # # Network testing # # CONFIG_NET_PKTGEN is not set # end of Network testing # end of Networking options # CONFIG_HAMRADIO is not set # CONFIG_CAN is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set # CONFIG_AF_KCM is not set CONFIG_STREAM_PARSER=y CONFIG_FIB_RULES=y # CONFIG_WIRELESS is not set # CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set # CONFIG_CAIF is not set # CONFIG_CEPH_LIB is not set # CONFIG_NFC is not set # CONFIG_PSAMPLE is not set # CONFIG_NET_IFE is not set CONFIG_LWTUNNEL=y CONFIG_LWTUNNEL_BPF=y CONFIG_GRO_CELLS=y CONFIG_NET_SOCK_MSG=y CONFIG_FAILOVER=y CONFIG_ETHTOOL_NETLINK=y CONFIG_HAVE_EBPF_JIT=y # # Device Drivers # CONFIG_ARM_AMBA=y CONFIG_HAVE_PCI=y # CONFIG_PCI is not set # CONFIG_PCCARD is not set # # Generic Driver Options # # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y # # Firmware loader # CONFIG_FW_LOADER=y CONFIG_FW_LOADER_PAGED_BUF=y CONFIG_EXTRA_FIRMWARE="" CONFIG_FW_LOADER_USER_HELPER=y # CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set # CONFIG_FW_LOADER_COMPRESS is not set CONFIG_FW_CACHE=y # end of Firmware loader CONFIG_ALLOW_DEV_COREDUMP=y # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set CONFIG_GENERIC_CPU_AUTOPROBE=y CONFIG_GENERIC_CPU_VULNERABILITIES=y CONFIG_SOC_BUS=y CONFIG_DMA_SHARED_BUFFER=y # CONFIG_DMA_FENCE_TRACE is not set CONFIG_GENERIC_ARCH_TOPOLOGY=y # end of Generic Driver Options # # Bus devices # # CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_SIMPLE_PM_BUS is not set # CONFIG_VEXPRESS_CONFIG is not set # CONFIG_MHI_BUS is not set # end of Bus devices CONFIG_CONNECTOR=y CONFIG_PROC_EVENTS=y # CONFIG_GNSS is not set # CONFIG_MTD is not set CONFIG_DTC=y CONFIG_OF=y # CONFIG_OF_UNITTEST is not set CONFIG_OF_FLATTREE=y CONFIG_OF_EARLY_FLATTREE=y CONFIG_OF_KOBJ=y CONFIG_OF_ADDRESS=y CONFIG_OF_IRQ=y CONFIG_OF_NET=y CONFIG_OF_RESERVED_MEM=y # CONFIG_OF_OVERLAY is not set CONFIG_OF_NUMA=y # CONFIG_PARPORT is not set CONFIG_PNP=y CONFIG_PNP_DEBUG_MESSAGES=y # # Protocols # CONFIG_PNPACPI=y CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 # CONFIG_BLK_DEV_CRYPTOLOOP is not set # CONFIG_BLK_DEV_DRBD is not set # CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_RAM is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_VIRTIO_BLK=y # CONFIG_BLK_DEV_RBD is not set # # NVME Support # # CONFIG_NVME_FC is not set # CONFIG_NVME_TCP is not set # end of NVME Support # # Misc devices # # CONFIG_DUMMY_IRQ is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_SRAM is not set # CONFIG_XILINX_SDFEC is not set # CONFIG_PVPANIC is not set CONFIG_SYSGENID=y # CONFIG_C2PORT is not set # # EEPROM support # # CONFIG_EEPROM_93CX6 is not set # end of EEPROM support # # Texas Instruments shared transport line discipline # # end of Texas Instruments shared transport line discipline # # Altera FPGA firmware download module (requires I2C) # # CONFIG_ECHO is not set # CONFIG_UACCE is not set # end of Misc devices # # SCSI device support # CONFIG_SCSI_MOD=y # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y CONFIG_SCSI_DMA=y CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) # # CONFIG_BLK_DEV_SD is not set # CONFIG_CHR_DEV_ST is not set # CONFIG_BLK_DEV_SR is not set # CONFIG_CHR_DEV_SG is not set # CONFIG_CHR_DEV_SCH is not set # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set # # SCSI Transports # # CONFIG_SCSI_SPI_ATTRS is not set # CONFIG_SCSI_FC_ATTRS is not set CONFIG_SCSI_ISCSI_ATTRS=y # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set # end of SCSI Transports CONFIG_SCSI_LOWLEVEL=y CONFIG_ISCSI_TCP=y # CONFIG_ISCSI_BOOT_SYSFS is not set # CONFIG_SCSI_UFSHCD is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_VIRTIO is not set # CONFIG_SCSI_DH is not set # end of SCSI device support CONFIG_HAVE_PATA_PLATFORM=y # CONFIG_ATA is not set # CONFIG_MD is not set # CONFIG_TARGET_CORE is not set CONFIG_NETDEVICES=y CONFIG_NET_CORE=y # CONFIG_BONDING is not set # CONFIG_DUMMY is not set # CONFIG_WIREGUARD is not set # CONFIG_EQUALIZER is not set # CONFIG_NET_TEAM is not set # CONFIG_MACVLAN is not set # CONFIG_IPVLAN is not set # CONFIG_VXLAN is not set # CONFIG_GENEVE is not set # CONFIG_BAREUDP is not set # CONFIG_GTP is not set # CONFIG_MACSEC is not set # CONFIG_NETCONSOLE is not set # CONFIG_TUN is not set # CONFIG_TUN_VNET_CROSS_LE is not set CONFIG_VETH=y CONFIG_VIRTIO_NET=y # CONFIG_NLMON is not set # CONFIG_NET_VRF is not set # # Distributed Switch Architecture drivers # # end of Distributed Switch Architecture drivers # CONFIG_ETHERNET is not set # CONFIG_NET_SB1000 is not set # CONFIG_PHYLIB is not set # CONFIG_MDIO_DEVICE is not set # # PCS device drivers # # end of PCS device drivers # CONFIG_PPP is not set # CONFIG_SLIP is not set # # Host-side USB support is needed for USB Network Adapter support # # CONFIG_WLAN is not set # # Enable WiMAX (Networking options) to see the WiMAX drivers # # CONFIG_WAN is not set # CONFIG_FUJITSU_ES is not set # CONFIG_NETDEVSIM is not set CONFIG_NET_FAILOVER=y # CONFIG_ISDN is not set # # Input device support # CONFIG_INPUT=y CONFIG_INPUT_FF_MEMLESS=y # CONFIG_INPUT_POLLDEV is not set # CONFIG_INPUT_SPARSEKMAP is not set # CONFIG_INPUT_MATRIXKMAP is not set # # Userland interfaces # # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_JOYDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set # # Input Device Drivers # # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set # CONFIG_RMI4_CORE is not set # # Hardware I/O ports # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set # end of Hardware I/O ports # end of Input device support # # Character devices # CONFIG_TTY=y CONFIG_VT=y CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y CONFIG_VT_CONSOLE_SLEEP=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set CONFIG_LDISC_AUTOLOAD=y # # Serial drivers # CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_PNP=y # CONFIG_SERIAL_8250_16550A_VARIANTS is not set # CONFIG_SERIAL_8250_FINTEK is not set CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DMA=y CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 # CONFIG_SERIAL_8250_EXTENDED is not set CONFIG_SERIAL_8250_FSL=y # CONFIG_SERIAL_8250_DW is not set # CONFIG_SERIAL_8250_RT288X is not set CONFIG_SERIAL_OF_PLATFORM=y # # Non-8250 serial port support # # CONFIG_SERIAL_AMBA_PL010 is not set # CONFIG_SERIAL_AMBA_PL011 is not set # CONFIG_SERIAL_EARLYCON_ARM_SEMIHOST is not set # CONFIG_SERIAL_UARTLITE is not set CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_SIFIVE is not set # CONFIG_SERIAL_SCCNXP is not set # CONFIG_SERIAL_ALTERA_JTAGUART is not set # CONFIG_SERIAL_ALTERA_UART is not set # CONFIG_SERIAL_XILINX_PS_UART is not set # CONFIG_SERIAL_ARC is not set # CONFIG_SERIAL_FSL_LPUART is not set # CONFIG_SERIAL_FSL_LINFLEXUART is not set # CONFIG_SERIAL_CONEXANT_DIGICOLOR is not set # CONFIG_SERIAL_SPRD is not set # end of Serial drivers # CONFIG_SERIAL_NONSTANDARD is not set # CONFIG_N_GSM is not set # CONFIG_NULL_TTY is not set # CONFIG_TRACE_SINK is not set CONFIG_HVC_DRIVER=y # CONFIG_HVC_DCC is not set # CONFIG_SERIAL_DEV_BUS is not set CONFIG_VIRTIO_CONSOLE=y # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_TIMERIOMEM is not set # CONFIG_HW_RANDOM_BA431 is not set CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_HISI_V2 is not set # CONFIG_HW_RANDOM_CCTRNG is not set # CONFIG_HW_RANDOM_XIPHERA is not set # CONFIG_HW_RANDOM_GRAVITON is not set # CONFIG_DEVMEM is not set # CONFIG_RAW_DRIVER is not set # CONFIG_TCG_TPM is not set # CONFIG_XILLYBUS is not set CONFIG_RANDOM_TRUST_CPU=y CONFIG_RANDOM_TRUST_BOOTLOADER=y # end of Character devices # # I2C support # # CONFIG_I2C is not set # end of I2C support # CONFIG_I3C is not set # CONFIG_SPI is not set # CONFIG_SPMI is not set # CONFIG_HSI is not set CONFIG_PPS=y # CONFIG_PPS_DEBUG is not set # # PPS clients support # # CONFIG_PPS_CLIENT_KTIMER is not set # CONFIG_PPS_CLIENT_LDISC is not set # CONFIG_PPS_CLIENT_GPIO is not set # # PPS generators support # # # PTP clock support # CONFIG_PTP_1588_CLOCK=y # # Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. # CONFIG_PTP_1588_CLOCK_KVM=y # end of PTP clock support # CONFIG_PINCTRL is not set # CONFIG_GPIOLIB is not set # CONFIG_W1 is not set CONFIG_POWER_RESET=y # CONFIG_POWER_RESET_RESTART is not set # CONFIG_POWER_RESET_XGENE is not set # CONFIG_POWER_RESET_SYSCON is not set # CONFIG_POWER_RESET_SYSCON_POWEROFF is not set # CONFIG_NVMEM_REBOOT_MODE is not set CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set # CONFIG_PDA_POWER is not set # CONFIG_TEST_POWER is not set # CONFIG_BATTERY_DS2780 is not set # CONFIG_BATTERY_DS2781 is not set # CONFIG_BATTERY_BQ27XXX is not set # CONFIG_CHARGER_MAX8903 is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y # CONFIG_THERMAL_NETLINK is not set # CONFIG_THERMAL_STATISTICS is not set CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 CONFIG_THERMAL_OF=y # CONFIG_THERMAL_WRITABLE_TRIPS is not set CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y # CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set # CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set CONFIG_THERMAL_GOV_FAIR_SHARE=y CONFIG_THERMAL_GOV_STEP_WISE=y # CONFIG_THERMAL_GOV_BANG_BANG is not set CONFIG_THERMAL_GOV_USER_SPACE=y # CONFIG_CPU_THERMAL is not set # CONFIG_THERMAL_EMULATION is not set # CONFIG_THERMAL_MMIO is not set CONFIG_WATCHDOG=y CONFIG_WATCHDOG_CORE=y # CONFIG_WATCHDOG_NOWAYOUT is not set CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y CONFIG_WATCHDOG_OPEN_TIMEOUT=0 CONFIG_WATCHDOG_SYSFS=y # # Watchdog Pretimeout Governors # # CONFIG_WATCHDOG_PRETIMEOUT_GOV is not set # # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set # CONFIG_WDAT_WDT is not set # CONFIG_XILINX_WATCHDOG is not set # CONFIG_ARM_SP805_WATCHDOG is not set # CONFIG_ARM_SBSA_WATCHDOG is not set # CONFIG_CADENCE_WATCHDOG is not set # CONFIG_DW_WATCHDOG is not set # CONFIG_MAX63XX_WATCHDOG is not set # CONFIG_ARM_SMC_WATCHDOG is not set CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set CONFIG_BCMA_POSSIBLE=y # CONFIG_BCMA is not set # # Multifunction device drivers # # CONFIG_MFD_ATMEL_FLEXCOM is not set # CONFIG_MFD_ATMEL_HLCDC is not set # CONFIG_MFD_MADERA is not set # CONFIG_MFD_HI6421_PMIC is not set # CONFIG_HTC_PASIC3 is not set # CONFIG_MFD_KEMPLD is not set # CONFIG_MFD_MT6397 is not set # CONFIG_MFD_SM501 is not set # CONFIG_ABX500_CORE is not set # CONFIG_MFD_SYSCON is not set # CONFIG_MFD_TI_AM335X_TSCADC is not set # CONFIG_MFD_TQMX86 is not set # end of Multifunction device drivers # CONFIG_REGULATOR is not set # CONFIG_RC_CORE is not set # CONFIG_MEDIA_CEC_SUPPORT is not set # CONFIG_MEDIA_SUPPORT is not set # # Graphics support # # CONFIG_DRM is not set # # ARM devices # # end of ARM devices # # Frame buffer Devices # # CONFIG_FB is not set # end of Frame buffer Devices # # Backlight & LCD device support # # CONFIG_LCD_CLASS_DEVICE is not set # CONFIG_BACKLIGHT_CLASS_DEVICE is not set # end of Backlight & LCD device support # # Console display driver support # CONFIG_DUMMY_CONSOLE=y CONFIG_DUMMY_CONSOLE_COLUMNS=80 CONFIG_DUMMY_CONSOLE_ROWS=25 # end of Console display driver support # end of Graphics support # CONFIG_SOUND is not set # # HID support # CONFIG_HID=y # CONFIG_HID_BATTERY_STRENGTH is not set CONFIG_HIDRAW=y # CONFIG_UHID is not set # CONFIG_HID_GENERIC is not set # # Special HID drivers # # CONFIG_HID_A4TECH is not set # CONFIG_HID_ACRUX is not set # CONFIG_HID_APPLE is not set # CONFIG_HID_AUREAL is not set # CONFIG_HID_BELKIN is not set # CONFIG_HID_CHERRY is not set # CONFIG_HID_COUGAR is not set # CONFIG_HID_MACALLY is not set # CONFIG_HID_CMEDIA is not set # CONFIG_HID_CYPRESS is not set # CONFIG_HID_DRAGONRISE is not set # CONFIG_HID_EMS_FF is not set # CONFIG_HID_ELECOM is not set # CONFIG_HID_EZKEY is not set # CONFIG_HID_GEMBIRD is not set # CONFIG_HID_GFRM is not set # CONFIG_HID_GLORIOUS is not set # CONFIG_HID_VIVALDI is not set # CONFIG_HID_KEYTOUCH is not set # CONFIG_HID_KYE is not set # CONFIG_HID_WALTOP is not set # CONFIG_HID_VIEWSONIC is not set # CONFIG_HID_GYRATION is not set # CONFIG_HID_ICADE is not set # CONFIG_HID_ITE is not set # CONFIG_HID_JABRA is not set # CONFIG_HID_TWINHAN is not set # CONFIG_HID_KENSINGTON is not set # CONFIG_HID_LCPOWER is not set # CONFIG_HID_LENOVO is not set # CONFIG_HID_MAGICMOUSE is not set # CONFIG_HID_MALTRON is not set # CONFIG_HID_MAYFLASH is not set # CONFIG_HID_REDRAGON is not set # CONFIG_HID_MICROSOFT is not set # CONFIG_HID_MONTEREY is not set # CONFIG_HID_MULTITOUCH is not set # CONFIG_HID_NTI is not set # CONFIG_HID_ORTEK is not set # CONFIG_HID_PANTHERLORD is not set # CONFIG_HID_PETALYNX is not set # CONFIG_HID_PICOLCD is not set # CONFIG_HID_PLANTRONICS is not set # CONFIG_HID_PRIMAX is not set # CONFIG_HID_SAITEK is not set # CONFIG_HID_SPEEDLINK is not set # CONFIG_HID_STEAM is not set # CONFIG_HID_STEELSERIES is not set # CONFIG_HID_SUNPLUS is not set # CONFIG_HID_RMI is not set # CONFIG_HID_GREENASIA is not set # CONFIG_HID_SMARTJOYPLUS is not set # CONFIG_HID_TIVO is not set # CONFIG_HID_TOPSEED is not set # CONFIG_HID_THRUSTMASTER is not set # CONFIG_HID_UDRAW_PS3 is not set # CONFIG_HID_XINMO is not set # CONFIG_HID_ZEROPLUS is not set # CONFIG_HID_ZYDACRON is not set # CONFIG_HID_SENSOR_HUB is not set # CONFIG_HID_ALPS is not set # end of Special HID drivers # end of HID support CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_SUPPORT=y # CONFIG_USB_ULPI_BUS is not set CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB is not set # # USB port drivers # # # USB Physical Layer drivers # # CONFIG_NOP_USB_XCEIV is not set # CONFIG_USB_ULPI is not set # end of USB Physical Layer drivers # CONFIG_USB_GADGET is not set # CONFIG_TYPEC is not set # CONFIG_USB_ROLE_SWITCH is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set CONFIG_EDAC_SUPPORT=y # CONFIG_EDAC is not set CONFIG_RTC_LIB=y CONFIG_RTC_CLASS=y CONFIG_RTC_HCTOSYS=y CONFIG_RTC_HCTOSYS_DEVICE="rtc0" CONFIG_RTC_SYSTOHC=y CONFIG_RTC_SYSTOHC_DEVICE="rtc0" # CONFIG_RTC_DEBUG is not set CONFIG_RTC_NVMEM=y # # RTC interfaces # CONFIG_RTC_INTF_SYSFS=y CONFIG_RTC_INTF_PROC=y CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_INTF_DEV_UIE_EMUL is not set # CONFIG_RTC_DRV_TEST is not set # # I2C RTC drivers # # # SPI RTC drivers # # # SPI and I2C RTC drivers # # # Platform RTC drivers # # CONFIG_RTC_DRV_DS1286 is not set # CONFIG_RTC_DRV_DS1511 is not set # CONFIG_RTC_DRV_DS1553 is not set # CONFIG_RTC_DRV_DS1685_FAMILY is not set # CONFIG_RTC_DRV_DS1742 is not set # CONFIG_RTC_DRV_DS2404 is not set # CONFIG_RTC_DRV_EFI is not set # CONFIG_RTC_DRV_STK17TA8 is not set # CONFIG_RTC_DRV_M48T86 is not set # CONFIG_RTC_DRV_M48T35 is not set # CONFIG_RTC_DRV_M48T59 is not set # CONFIG_RTC_DRV_MSM6242 is not set # CONFIG_RTC_DRV_BQ4802 is not set # CONFIG_RTC_DRV_RP5C01 is not set # CONFIG_RTC_DRV_V3020 is not set # CONFIG_RTC_DRV_ZYNQMP is not set # # on-CPU RTC drivers # # CONFIG_RTC_DRV_PL030 is not set CONFIG_RTC_DRV_PL031=y # CONFIG_RTC_DRV_CADENCE is not set # CONFIG_RTC_DRV_FTRTC010 is not set # CONFIG_RTC_DRV_R7301 is not set # # HID Sensor RTC drivers # CONFIG_DMADEVICES=y # CONFIG_DMADEVICES_DEBUG is not set # # DMA Devices # CONFIG_DMA_ENGINE=y CONFIG_DMA_ACPI=y CONFIG_DMA_OF=y # CONFIG_ALTERA_MSGDMA is not set # CONFIG_AMBA_PL08X is not set # CONFIG_DW_AXI_DMAC is not set # CONFIG_FSL_EDMA is not set # CONFIG_FSL_QDMA is not set # CONFIG_INTEL_IDMA64 is not set # CONFIG_MV_XOR_V2 is not set # CONFIG_PL330_DMA is not set # CONFIG_XILINX_DMA is not set # CONFIG_XILINX_ZYNQMP_DMA is not set # CONFIG_XILINX_ZYNQMP_DPDMA is not set # CONFIG_QCOM_HIDMA_MGMT is not set # CONFIG_QCOM_HIDMA is not set # CONFIG_DW_DMAC is not set # CONFIG_SF_PDMA is not set # # DMA Clients # # CONFIG_ASYNC_TX_DMA is not set # CONFIG_DMATEST is not set # # DMABUF options # CONFIG_SYNC_FILE=y # CONFIG_SW_SYNC is not set # CONFIG_UDMABUF is not set # CONFIG_DMABUF_MOVE_NOTIFY is not set # CONFIG_DMABUF_SELFTESTS is not set # CONFIG_DMABUF_HEAPS is not set # end of DMABUF options CONFIG_AUXDISPLAY=y # CONFIG_IMG_ASCII_LCD is not set CONFIG_CHARLCD_BL_OFF=y # CONFIG_CHARLCD_BL_ON is not set # CONFIG_CHARLCD_BL_FLASH is not set # CONFIG_UIO is not set # CONFIG_VFIO is not set CONFIG_VIRT_DRIVERS=y CONFIG_VMGENID=y CONFIG_VIRTIO=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_BALLOON=y # CONFIG_VIRTIO_INPUT is not set CONFIG_VIRTIO_MMIO=y # CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set # CONFIG_VDPA is not set CONFIG_VHOST_MENU=y # CONFIG_VHOST_NET is not set # CONFIG_VHOST_VSOCK is not set # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set # # Microsoft Hyper-V guest support # # end of Microsoft Hyper-V guest support # CONFIG_GREYBUS is not set CONFIG_STAGING=y # CONFIG_COMEDI is not set # CONFIG_STAGING_MEDIA is not set # # Android # # end of Android # CONFIG_STAGING_BOARD is not set # CONFIG_GS_FPGABOOT is not set # CONFIG_UNISYSSPAR is not set # CONFIG_COMMON_CLK_XLNX_CLKWZRD is not set # # Gasket devices # # end of Gasket devices # CONFIG_XIL_AXIS_FIFO is not set # CONFIG_FIELDBUS_DEV is not set # CONFIG_GOLDFISH is not set # CONFIG_CHROME_PLATFORMS is not set # CONFIG_MELLANOX_PLATFORM is not set CONFIG_HAVE_CLK=y CONFIG_CLKDEV_LOOKUP=y CONFIG_HAVE_CLK_PREPARE=y CONFIG_COMMON_CLK=y # CONFIG_CLK_QORIQ is not set # CONFIG_COMMON_CLK_XGENE is not set # CONFIG_COMMON_CLK_FIXED_MMIO is not set # CONFIG_HWSPINLOCK is not set # # Clock Source drivers # CONFIG_TIMER_OF=y CONFIG_TIMER_ACPI=y CONFIG_TIMER_PROBE=y CONFIG_ARM_ARCH_TIMER=y CONFIG_ARM_ARCH_TIMER_EVTSTREAM=y CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND=y CONFIG_FSL_ERRATUM_A008585=y CONFIG_HISILICON_ERRATUM_161010101=y CONFIG_ARM64_ERRATUM_858921=y # CONFIG_MICROCHIP_PIT64B is not set # end of Clock Source drivers CONFIG_MAILBOX=y # CONFIG_ARM_MHU is not set # CONFIG_PLATFORM_MHU is not set # CONFIG_PL320_MBOX is not set # CONFIG_PCC is not set # CONFIG_ALTERA_MBOX is not set # CONFIG_MAILBOX_TEST is not set CONFIG_IOMMU_IOVA=y CONFIG_IOMMU_API=y CONFIG_IOMMU_SUPPORT=y # # Generic IOMMU Pagetable Support # # CONFIG_IOMMU_IO_PGTABLE_LPAE is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # end of Generic IOMMU Pagetable Support # CONFIG_IOMMU_DEBUGFS is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set CONFIG_OF_IOMMU=y CONFIG_IOMMU_DMA=y # CONFIG_ARM_SMMU is not set # CONFIG_ARM_SMMU_V3 is not set # CONFIG_VIRTIO_IOMMU is not set # # Remoteproc drivers # # CONFIG_REMOTEPROC is not set # end of Remoteproc drivers # # Rpmsg drivers # # CONFIG_RPMSG_QCOM_GLINK_RPM is not set # CONFIG_RPMSG_VIRTIO is not set # end of Rpmsg drivers # CONFIG_SOUNDWIRE is not set # # SOC (System On Chip) specific Drivers # # # Amlogic SoC drivers # # end of Amlogic SoC drivers # # Aspeed SoC drivers # # end of Aspeed SoC drivers # # Broadcom SoC drivers # # CONFIG_SOC_BRCMSTB is not set # end of Broadcom SoC drivers # # NXP/Freescale QorIQ SoC drivers # # CONFIG_QUICC_ENGINE is not set # CONFIG_FSL_RCPM is not set # end of NXP/Freescale QorIQ SoC drivers # # i.MX SoC drivers # # end of i.MX SoC drivers # # Qualcomm SoC drivers # # end of Qualcomm SoC drivers # CONFIG_SOC_TI is not set # # Xilinx SoC drivers # # CONFIG_XILINX_VCU is not set # end of Xilinx SoC drivers # end of SOC (System On Chip) specific Drivers # CONFIG_PM_DEVFREQ is not set # CONFIG_EXTCON is not set # CONFIG_MEMORY is not set # CONFIG_IIO is not set # CONFIG_PWM is not set # # IRQ chip support # CONFIG_IRQCHIP=y CONFIG_ARM_GIC=y CONFIG_ARM_GIC_MAX_NR=1 CONFIG_ARM_GIC_V3=y CONFIG_ARM_GIC_V3_ITS=y # CONFIG_AL_FIC is not set CONFIG_PARTITION_PERCPU=y # end of IRQ chip support # CONFIG_IPACK_BUS is not set # CONFIG_RESET_CONTROLLER is not set # # PHY Subsystem # # CONFIG_GENERIC_PHY is not set # CONFIG_PHY_XGENE is not set # CONFIG_BCM_KONA_USB2_PHY is not set # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CADENCE_DPHY is not set # CONFIG_PHY_CADENCE_SALVO is not set # CONFIG_PHY_FSL_IMX8MQ_USB is not set # CONFIG_PHY_MIXEL_MIPI_DPHY is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set # end of PHY Subsystem # CONFIG_POWERCAP is not set # CONFIG_MCB is not set # # Performance monitor support # # CONFIG_ARM_CCI_PMU is not set # CONFIG_ARM_CCN is not set # CONFIG_ARM_CMN is not set CONFIG_ARM_PMU=y CONFIG_ARM_PMU_ACPI=y # CONFIG_ARM_DSU_PMU is not set # CONFIG_ARM_SPE_PMU is not set # CONFIG_HISI_PMU is not set # end of Performance monitor support CONFIG_RAS=y # # Android # # CONFIG_ANDROID is not set # end of Android # CONFIG_LIBNVDIMM is not set # CONFIG_DAX is not set CONFIG_NVMEM=y CONFIG_NVMEM_SYSFS=y # # HW tracing support # # CONFIG_STM is not set # CONFIG_INTEL_TH is not set # end of HW tracing support # CONFIG_FPGA is not set # CONFIG_FSI is not set # CONFIG_TEE is not set # CONFIG_SIOX is not set # CONFIG_SLIMBUS is not set # CONFIG_INTERCONNECT is not set # CONFIG_COUNTER is not set # end of Device Drivers # # File systems # CONFIG_DCACHE_WORD_ACCESS=y CONFIG_VALIDATE_FS_PARSER=y CONFIG_FS_IOMAP=y # CONFIG_EXT2_FS is not set # CONFIG_EXT3_FS is not set CONFIG_EXT4_FS=y CONFIG_EXT4_USE_FOR_EXT2=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y # CONFIG_EXT4_DEBUG is not set CONFIG_JBD2=y # CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_XFS_FS=y CONFIG_XFS_SUPPORT_V4=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y # CONFIG_XFS_RT is not set # CONFIG_XFS_ONLINE_SCRUB is not set # CONFIG_XFS_WARN is not set # CONFIG_XFS_DEBUG is not set # CONFIG_GFS2_FS is not set # CONFIG_BTRFS_FS is not set # CONFIG_NILFS2_FS is not set # CONFIG_F2FS_FS is not set # CONFIG_FS_DAX is not set CONFIG_FS_POSIX_ACL=y CONFIG_EXPORTFS=y # CONFIG_EXPORTFS_BLOCK_OPS is not set CONFIG_FILE_LOCKING=y CONFIG_MANDATORY_FILE_LOCKING=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_ENCRYPTION_ALGS=y # CONFIG_FS_VERITY is not set CONFIG_FSNOTIFY=y CONFIG_DNOTIFY=y CONFIG_INOTIFY_USER=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set # CONFIG_QUOTA_DEBUG is not set # CONFIG_QFMT_V1 is not set # CONFIG_QFMT_V2 is not set CONFIG_QUOTACTL=y CONFIG_AUTOFS4_FS=y CONFIG_AUTOFS_FS=y # CONFIG_FUSE_FS is not set CONFIG_OVERLAY_FS=y # CONFIG_OVERLAY_FS_REDIRECT_DIR is not set CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW=y # CONFIG_OVERLAY_FS_INDEX is not set # CONFIG_OVERLAY_FS_XINO_AUTO is not set # CONFIG_OVERLAY_FS_METACOPY is not set # # Caches # # CONFIG_FSCACHE is not set # end of Caches # # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set # CONFIG_UDF_FS is not set # end of CD-ROM/DVD Filesystems # # DOS/FAT/EXFAT/NT Filesystems # # CONFIG_MSDOS_FS is not set # CONFIG_VFAT_FS is not set # CONFIG_EXFAT_FS is not set # CONFIG_NTFS_FS is not set # end of DOS/FAT/EXFAT/NT Filesystems # # Pseudo filesystems # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_SYSCTL=y CONFIG_PROC_PAGE_MONITOR=y CONFIG_PROC_CHILDREN=y CONFIG_KERNFS=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y # CONFIG_CONFIGFS_FS is not set # CONFIG_EFIVAR_FS is not set # end of Pseudo filesystems CONFIG_MISC_FILESYSTEMS=y # CONFIG_ORANGEFS_FS is not set # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set CONFIG_SQUASHFS=y CONFIG_SQUASHFS_FILE_CACHE=y # CONFIG_SQUASHFS_FILE_DIRECT is not set CONFIG_SQUASHFS_DECOMP_SINGLE=y # CONFIG_SQUASHFS_DECOMP_MULTI is not set # CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_ZLIB=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y # CONFIG_SQUASHFS_ZSTD is not set # CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set # CONFIG_SQUASHFS_EMBEDDED is not set CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_PSTORE=y CONFIG_PSTORE_DEFLATE_COMPRESS=y # CONFIG_PSTORE_LZO_COMPRESS is not set # CONFIG_PSTORE_LZ4_COMPRESS is not set # CONFIG_PSTORE_LZ4HC_COMPRESS is not set # CONFIG_PSTORE_842_COMPRESS is not set # CONFIG_PSTORE_ZSTD_COMPRESS is not set CONFIG_PSTORE_COMPRESS=y CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT=y CONFIG_PSTORE_COMPRESS_DEFAULT="deflate" # CONFIG_PSTORE_CONSOLE is not set # CONFIG_PSTORE_PMSG is not set # CONFIG_PSTORE_RAM is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set # CONFIG_EROFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V2 is not set # CONFIG_NFS_V3 is not set CONFIG_NFS_V4=y # CONFIG_NFS_SWAP is not set CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_PNFS_FILE_LAYOUT=y CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org" # CONFIG_NFS_V4_1_MIGRATION is not set CONFIG_NFS_V4_SECURITY_LABEL=y # CONFIG_ROOT_NFS is not set # CONFIG_NFS_USE_LEGACY_DNS is not set CONFIG_NFS_USE_KERNEL_DNS=y CONFIG_NFS_DISABLE_UDP_SUPPORT=y # CONFIG_NFS_V4_2_READ_PLUS is not set # CONFIG_NFSD is not set CONFIG_GRACE_PERIOD=y CONFIG_LOCKD=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y CONFIG_SUNRPC_BACKCHANNEL=y # CONFIG_SUNRPC_DEBUG is not set # CONFIG_CEPH_FS is not set # CONFIG_CIFS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set CONFIG_NLS=y CONFIG_NLS_DEFAULT="utf8" # CONFIG_NLS_CODEPAGE_437 is not set # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set # CONFIG_NLS_CODEPAGE_850 is not set # CONFIG_NLS_CODEPAGE_852 is not set # CONFIG_NLS_CODEPAGE_855 is not set # CONFIG_NLS_CODEPAGE_857 is not set # CONFIG_NLS_CODEPAGE_860 is not set # CONFIG_NLS_CODEPAGE_861 is not set # CONFIG_NLS_CODEPAGE_862 is not set # CONFIG_NLS_CODEPAGE_863 is not set # CONFIG_NLS_CODEPAGE_864 is not set # CONFIG_NLS_CODEPAGE_865 is not set # CONFIG_NLS_CODEPAGE_866 is not set # CONFIG_NLS_CODEPAGE_869 is not set # CONFIG_NLS_CODEPAGE_936 is not set # CONFIG_NLS_CODEPAGE_950 is not set # CONFIG_NLS_CODEPAGE_932 is not set # CONFIG_NLS_CODEPAGE_949 is not set # CONFIG_NLS_CODEPAGE_874 is not set # CONFIG_NLS_ISO8859_8 is not set # CONFIG_NLS_CODEPAGE_1250 is not set # CONFIG_NLS_CODEPAGE_1251 is not set # CONFIG_NLS_ASCII is not set # CONFIG_NLS_ISO8859_1 is not set # CONFIG_NLS_ISO8859_2 is not set # CONFIG_NLS_ISO8859_3 is not set # CONFIG_NLS_ISO8859_4 is not set # CONFIG_NLS_ISO8859_5 is not set # CONFIG_NLS_ISO8859_6 is not set # CONFIG_NLS_ISO8859_7 is not set # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set # CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_MAC_ROMAN is not set # CONFIG_NLS_MAC_CELTIC is not set # CONFIG_NLS_MAC_CENTEURO is not set # CONFIG_NLS_MAC_CROATIAN is not set # CONFIG_NLS_MAC_CYRILLIC is not set # CONFIG_NLS_MAC_GAELIC is not set # CONFIG_NLS_MAC_GREEK is not set # CONFIG_NLS_MAC_ICELAND is not set # CONFIG_NLS_MAC_INUIT is not set # CONFIG_NLS_MAC_ROMANIAN is not set # CONFIG_NLS_MAC_TURKISH is not set # CONFIG_NLS_UTF8 is not set # CONFIG_UNICODE is not set CONFIG_IO_WQ=y # end of File systems # # Security options # CONFIG_KEYS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=y # CONFIG_KEY_DH_OPERATIONS is not set # CONFIG_SECURITY_DMESG_RESTRICT is not set CONFIG_SECURITY=y CONFIG_SECURITY_WRITABLE_HOOKS=y CONFIG_SECURITYFS=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_NETWORK_XFRM=y # CONFIG_SECURITY_PATH is not set CONFIG_LSM_MMAP_MIN_ADDR=65536 CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y CONFIG_HARDENED_USERCOPY=y CONFIG_HARDENED_USERCOPY_FALLBACK=y # CONFIG_FORTIFY_SOURCE is not set # CONFIG_STATIC_USERMODEHELPER is not set CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_SELINUX_DEVELOP=y CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9 CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256 # CONFIG_SECURITY_SMACK is not set # CONFIG_SECURITY_TOMOYO is not set # CONFIG_SECURITY_APPARMOR is not set # CONFIG_SECURITY_LOADPIN is not set # CONFIG_SECURITY_YAMA is not set # CONFIG_SECURITY_SAFESETID is not set # CONFIG_SECURITY_LOCKDOWN_LSM is not set # CONFIG_INTEGRITY is not set CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFAULT_SECURITY_DAC is not set CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor,bpf" # # Kernel hardening options # # # Memory initialization # CONFIG_INIT_STACK_NONE=y # CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set # CONFIG_INIT_ON_FREE_DEFAULT_ON is not set # end of Memory initialization # end of Kernel hardening options # end of Security options CONFIG_CRYPTO=y # # Crypto core or helper # CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_SKCIPHER=y CONFIG_CRYPTO_SKCIPHER2=y CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_RNG=y CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_RNG_DEFAULT=y CONFIG_CRYPTO_AKCIPHER2=y CONFIG_CRYPTO_AKCIPHER=y CONFIG_CRYPTO_KPP2=y CONFIG_CRYPTO_ACOMP2=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_USER is not set CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_NULL2=y # CONFIG_CRYPTO_PCRYPT is not set # CONFIG_CRYPTO_CRYPTD is not set # CONFIG_CRYPTO_AUTHENC is not set # # Public-key cryptography # CONFIG_CRYPTO_RSA=y # CONFIG_CRYPTO_DH is not set # CONFIG_CRYPTO_ECDH is not set # CONFIG_CRYPTO_ECRDSA is not set # CONFIG_CRYPTO_SM2 is not set # CONFIG_CRYPTO_CURVE25519 is not set # # Authenticated Encryption with Associated Data # # CONFIG_CRYPTO_CCM is not set # CONFIG_CRYPTO_GCM is not set # CONFIG_CRYPTO_CHACHA20POLY1305 is not set # CONFIG_CRYPTO_AEGIS128 is not set CONFIG_CRYPTO_SEQIV=y # CONFIG_CRYPTO_ECHAINIV is not set # # Block modes # CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_CFB is not set CONFIG_CRYPTO_CTR=y CONFIG_CRYPTO_CTS=y CONFIG_CRYPTO_ECB=y # CONFIG_CRYPTO_LRW is not set # CONFIG_CRYPTO_OFB is not set # CONFIG_CRYPTO_PCBC is not set CONFIG_CRYPTO_XTS=y # CONFIG_CRYPTO_KEYWRAP is not set # CONFIG_CRYPTO_ADIANTUM is not set # CONFIG_CRYPTO_ESSIV is not set # # Hash modes # # CONFIG_CRYPTO_CMAC is not set CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_XCBC is not set # CONFIG_CRYPTO_VMAC is not set # # Digest # CONFIG_CRYPTO_CRC32C=y # CONFIG_CRYPTO_CRC32 is not set # CONFIG_CRYPTO_XXHASH is not set # CONFIG_CRYPTO_BLAKE2B is not set # CONFIG_CRYPTO_BLAKE2S is not set CONFIG_CRYPTO_CRCT10DIF=y # CONFIG_CRYPTO_GHASH is not set # CONFIG_CRYPTO_POLY1305 is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set # CONFIG_CRYPTO_RMD128 is not set # CONFIG_CRYPTO_RMD160 is not set # CONFIG_CRYPTO_RMD256 is not set # CONFIG_CRYPTO_RMD320 is not set CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y # CONFIG_CRYPTO_SHA3 is not set # CONFIG_CRYPTO_SM3 is not set # CONFIG_CRYPTO_STREEBOG is not set # CONFIG_CRYPTO_TGR192 is not set # CONFIG_CRYPTO_WP512 is not set # # Ciphers # CONFIG_CRYPTO_AES=y # CONFIG_CRYPTO_AES_TI is not set # CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_CAST5 is not set # CONFIG_CRYPTO_CAST6 is not set # CONFIG_CRYPTO_DES is not set # CONFIG_CRYPTO_FCRYPT is not set # CONFIG_CRYPTO_SALSA20 is not set # CONFIG_CRYPTO_CHACHA20 is not set # CONFIG_CRYPTO_SERPENT is not set # CONFIG_CRYPTO_SM4 is not set # CONFIG_CRYPTO_TWOFISH is not set # # Compression # CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_842 is not set # CONFIG_CRYPTO_LZ4 is not set # CONFIG_CRYPTO_LZ4HC is not set # CONFIG_CRYPTO_ZSTD is not set # # Random Number Generation # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DRBG_MENU=y CONFIG_CRYPTO_DRBG_HMAC=y CONFIG_CRYPTO_DRBG_HASH=y CONFIG_CRYPTO_DRBG_CTR=y CONFIG_CRYPTO_DRBG=y CONFIG_CRYPTO_JITTERENTROPY=y # CONFIG_CRYPTO_USER_API_HASH is not set # CONFIG_CRYPTO_USER_API_SKCIPHER is not set # CONFIG_CRYPTO_USER_API_RNG is not set # CONFIG_CRYPTO_USER_API_AEAD is not set CONFIG_CRYPTO_HASH_INFO=y # CONFIG_CRYPTO_HW is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y # CONFIG_PKCS8_PRIVATE_KEY_PARSER is not set CONFIG_PKCS7_MESSAGE_PARSER=y # # Certificates for signature checking # CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_SYSTEM_TRUSTED_KEYS="" # CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set # CONFIG_SECONDARY_TRUSTED_KEYRING is not set # CONFIG_SYSTEM_BLACKLIST_KEYRING is not set # end of Certificates for signature checking # # Library routines # # CONFIG_PACKING is not set CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y CONFIG_GENERIC_NET_UTILS=y # CONFIG_CORDIC is not set # CONFIG_PRIME_NUMBERS is not set CONFIG_RATIONAL=y CONFIG_GENERIC_PCI_IOMAP=y CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y CONFIG_ARCH_HAS_FAST_MULTIPLIER=y CONFIG_ARCH_USE_SYM_ANNOTATIONS=y # CONFIG_INDIRECT_PIO is not set # # Crypto library routines # CONFIG_CRYPTO_LIB_AES=y CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y # CONFIG_CRYPTO_LIB_CHACHA is not set # CONFIG_CRYPTO_LIB_CURVE25519 is not set CONFIG_CRYPTO_LIB_POLY1305_RSIZE=9 # CONFIG_CRYPTO_LIB_POLY1305 is not set # CONFIG_CRYPTO_LIB_CHACHA20POLY1305 is not set CONFIG_CRYPTO_LIB_SHA256=y # end of Crypto library routines CONFIG_LIB_MEMNEQ=y CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_T10DIF=y # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC32_SELFTEST is not set CONFIG_CRC32_SLICEBY8=y # CONFIG_CRC32_SLICEBY4 is not set # CONFIG_CRC32_SARWATE is not set # CONFIG_CRC32_BIT is not set # CONFIG_CRC64 is not set # CONFIG_CRC4 is not set # CONFIG_CRC7 is not set CONFIG_LIBCRC32C=y # CONFIG_CRC8 is not set CONFIG_XXHASH=y CONFIG_AUDIT_GENERIC=y CONFIG_AUDIT_ARCH_COMPAT_GENERIC=y CONFIG_AUDIT_COMPAT_GENERIC=y # CONFIG_RANDOM32_SELFTEST is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_LZO_COMPRESS=y CONFIG_LZO_DECOMPRESS=y CONFIG_LZ4_DECOMPRESS=y CONFIG_ZSTD_DECOMPRESS=y CONFIG_XZ_DEC=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y CONFIG_XZ_DEC_BCJ=y # CONFIG_XZ_DEC_TEST is not set CONFIG_DECOMPRESS_GZIP=y CONFIG_DECOMPRESS_BZIP2=y CONFIG_DECOMPRESS_LZMA=y CONFIG_DECOMPRESS_XZ=y CONFIG_DECOMPRESS_LZO=y CONFIG_DECOMPRESS_LZ4=y CONFIG_DECOMPRESS_ZSTD=y CONFIG_GENERIC_ALLOCATOR=y CONFIG_XARRAY_MULTI=y CONFIG_ASSOCIATIVE_ARRAY=y CONFIG_HAS_IOMEM=y CONFIG_HAS_DMA=y CONFIG_DMA_OPS=y # CONFIG_DMA_PAGE_TOUCHING is not set CONFIG_NEED_SG_DMA_LENGTH=y CONFIG_NEED_DMA_MAP_STATE=y CONFIG_ARCH_DMA_ADDR_T_64BIT=y CONFIG_DMA_DECLARE_COHERENT=y CONFIG_ARCH_HAS_SETUP_DMA_OPS=y CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS=y CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE=y CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU=y CONFIG_ARCH_HAS_DMA_PREP_COHERENT=y CONFIG_SWIOTLB=y CONFIG_DMA_NONCOHERENT_MMAP=y CONFIG_DMA_COHERENT_POOL=y CONFIG_DMA_REMAP=y CONFIG_DMA_DIRECT_REMAP=y # CONFIG_DMA_API_DEBUG is not set CONFIG_SGL_ALLOC=y CONFIG_CPU_RMAP=y CONFIG_DQL=y CONFIG_GLOB=y # CONFIG_GLOB_SELFTEST is not set CONFIG_NLATTR=y CONFIG_CLZ_TAB=y CONFIG_IRQ_POLL=y CONFIG_MPILIB=y CONFIG_LIBFDT=y CONFIG_OID_REGISTRY=y CONFIG_UCS2_STRING=y CONFIG_HAVE_GENERIC_VDSO=y CONFIG_GENERIC_GETTIMEOFDAY=y CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_FONT_SUPPORT=y CONFIG_FONT_8x16=y CONFIG_FONT_AUTOSELECT=y CONFIG_SG_POOL=y CONFIG_ARCH_STACKWALK=y CONFIG_SBITMAP=y # CONFIG_STRING_SELFTEST is not set # end of Library routines # # Kernel hacking # # # printk and dmesg options # CONFIG_PRINTK_TIME=y # CONFIG_PRINTK_CALLER is not set CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_DYNAMIC_DEBUG is not set # CONFIG_DYNAMIC_DEBUG_CORE is not set CONFIG_SYMBOLIC_ERRNAME=y CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options # # Compile-time checks and compiler options # # CONFIG_DEBUG_INFO is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FRAME_WARN=2048 CONFIG_STRIP_ASM_SYMS=y # CONFIG_READABLE_ASM is not set # CONFIG_HEADERS_INSTALL is not set CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_ARCH_WANT_FRAME_POINTERS=y CONFIG_FRAME_POINTER=y # CONFIG_STACK_VALIDATION is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # end of Compile-time checks and compiler options # # Generic Kernel Debugging Instruments # CONFIG_MAGIC_SYSRQ=y CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 CONFIG_MAGIC_SYSRQ_SERIAL=y CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE="" CONFIG_DEBUG_FS=y CONFIG_DEBUG_FS_ALLOW_ALL=y # CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set # CONFIG_DEBUG_FS_ALLOW_NONE is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y # CONFIG_UBSAN is not set # end of Generic Kernel Debugging Instruments CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MISC=y # # Memory Debugging # # CONFIG_PAGE_EXTENSION is not set # CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_PAGE_OWNER is not set # CONFIG_PAGE_POISONING is not set # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_ARCH_HAS_DEBUG_WX=y # CONFIG_DEBUG_WX is not set CONFIG_GENERIC_PTDUMP=y # CONFIG_PTDUMP_DEBUGFS is not set # CONFIG_DEBUG_OBJECTS is not set # CONFIG_SLUB_DEBUG_ON is not set # CONFIG_SLUB_STATS is not set CONFIG_HAVE_DEBUG_KMEMLEAK=y # CONFIG_DEBUG_KMEMLEAK is not set # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_SCHED_STACK_END_CHECK is not set CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_VM_PGTABLE is not set CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y # CONFIG_DEBUG_VIRTUAL is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_PER_CPU_MAPS is not set CONFIG_HAVE_ARCH_KASAN=y CONFIG_HAVE_ARCH_KASAN_SW_TAGS=y CONFIG_CC_HAS_KASAN_GENERIC=y CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y # CONFIG_KASAN is not set # end of Memory Debugging # CONFIG_DEBUG_SHIRQ is not set # # Debug Oops, Lockups and Hangs # # CONFIG_PANIC_ON_OOPS is not set CONFIG_PANIC_ON_OOPS_VALUE=0 CONFIG_PANIC_TIMEOUT=0 CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 CONFIG_DETECT_HUNG_TASK=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 # CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_WQ_WATCHDOG=y # end of Debug Oops, Lockups and Hangs # # Scheduler Debugging # # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_INFO=y # CONFIG_SCHEDSTATS is not set # end of Scheduler Debugging # CONFIG_DEBUG_TIMEKEEPING is not set # # Lock Debugging (spinlocks, mutexes, etc...) # CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_PROVE_LOCKING is not set # CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_MUTEXES is not set # CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set # CONFIG_DEBUG_RWSEMS is not set # CONFIG_DEBUG_LOCK_ALLOC is not set # CONFIG_DEBUG_ATOMIC_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set # CONFIG_SCF_TORTURE_TEST is not set # CONFIG_CSD_LOCK_WAIT_DEBUG is not set # end of Lock Debugging (spinlocks, mutexes, etc...) CONFIG_STACKTRACE=y # CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set # CONFIG_DEBUG_KOBJECT is not set CONFIG_HAVE_DEBUG_BUGVERBOSE=y # # Debug kernel data structures # CONFIG_DEBUG_LIST=y # CONFIG_DEBUG_PLIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_DEBUG_NOTIFIERS is not set # CONFIG_BUG_ON_DATA_CORRUPTION is not set # end of Debug kernel data structures # CONFIG_DEBUG_CREDENTIALS is not set # # RCU Debugging # # CONFIG_RCU_SCALE_TEST is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_REF_SCALE_TEST is not set CONFIG_RCU_CPU_STALL_TIMEOUT=59 # CONFIG_RCU_TRACE is not set # CONFIG_RCU_EQS_DEBUG is not set # end of RCU Debugging # CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set # CONFIG_LATENCYTOP is not set CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_SYSCALL_TRACEPOINTS=y CONFIG_HAVE_C_RECORDMCOUNT=y CONFIG_TRACING_SUPPORT=y # CONFIG_FTRACE is not set # CONFIG_SAMPLES is not set CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y # CONFIG_STRICT_DEVMEM is not set # # arm64 Debugging # # CONFIG_PID_IN_CONTEXTIDR is not set # CONFIG_CORESIGHT is not set # end of arm64 Debugging # # Kernel Testing and Coverage # # CONFIG_KUNIT is not set # CONFIG_NOTIFIER_ERROR_INJECTION is not set # CONFIG_FAULT_INJECTION is not set CONFIG_ARCH_HAS_KCOV=y CONFIG_CC_HAS_SANCOV_TRACE_PC=y # CONFIG_KCOV is not set # CONFIG_RUNTIME_TESTING_MENU is not set # CONFIG_MEMTEST is not set # end of Kernel Testing and Coverage # end of Kernel hacking ================================================ FILE: resources/guest_configs/microvm-kernel-ci-aarch64-6.1.config ================================================ # # Automatically generated file; DO NOT EDIT. # Linux/arm64 6.1.102-1.182.amzn2023.aarch64 Kernel Configuration # CONFIG_CC_VERSION_TEXT="gcc (GCC) 11.4.1 20230605 (Red Hat 11.4.1-2)" CONFIG_CC_IS_GCC=y CONFIG_GCC_VERSION=110401 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23900 CONFIG_LD_IS_BFD=y CONFIG_LD_VERSION=23900 CONFIG_LLD_VERSION=0 CONFIG_CC_CAN_LINK=y CONFIG_CC_CAN_LINK_STATIC=y CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y CONFIG_PAHOLE_VERSION=122 CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y # # General setup # CONFIG_INIT_ENV_ARG_LIMIT=32 # CONFIG_COMPILE_TEST is not set # CONFIG_WERROR is not set CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_BUILD_SALT="6.1.102-1.182.amzn2023.aarch64" CONFIG_DEFAULT_INIT="" CONFIG_DEFAULT_HOSTNAME="(none)" CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_SYSVIPC_COMPAT=y CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_WATCH_QUEUE is not set CONFIG_CROSS_MEMORY_ATTACH=y # CONFIG_USELIB is not set CONFIG_AUDIT=y CONFIG_HAVE_ARCH_AUDITSYSCALL=y CONFIG_AUDITSYSCALL=y # # IRQ subsystem # CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_IRQ_SHOW=y CONFIG_GENERIC_IRQ_SHOW_LEVEL=y CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y CONFIG_GENERIC_IRQ_MIGRATION=y CONFIG_HARDIRQS_SW_RESEND=y CONFIG_IRQ_DOMAIN=y CONFIG_IRQ_DOMAIN_HIERARCHY=y CONFIG_GENERIC_IRQ_IPI=y CONFIG_GENERIC_MSI_IRQ=y CONFIG_GENERIC_MSI_IRQ_DOMAIN=y CONFIG_IRQ_MSI_IOMMU=y CONFIG_IRQ_FORCED_THREADING=y CONFIG_SPARSE_IRQ=y # CONFIG_GENERIC_IRQ_DEBUGFS is not set # end of IRQ subsystem CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_ARCH_HAS_TICK_BROADCAST=y CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK=y CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y CONFIG_CONTEXT_TRACKING=y CONFIG_CONTEXT_TRACKING_IDLE=y # # Timers subsystem # CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ_COMMON=y # CONFIG_HZ_PERIODIC is not set CONFIG_NO_HZ_IDLE=y # CONFIG_NO_HZ_FULL is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y # end of Timers subsystem CONFIG_BPF=y CONFIG_HAVE_EBPF_JIT=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y # # BPF subsystem # CONFIG_BPF_SYSCALL=y CONFIG_BPF_UNPRIV_DEFAULT_OFF=y CONFIG_USERMODE_DRIVER=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y # end of BPF subsystem CONFIG_PREEMPT_NONE_BUILD=y CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # CONFIG_PREEMPT_DYNAMIC is not set # CONFIG_SCHED_CORE is not set # # CPU/Task time and stats accounting # CONFIG_TICK_CPU_ACCOUNTING=y # CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set # CONFIG_IRQ_TIME_ACCOUNTING is not set CONFIG_HAVE_SCHED_AVG_IRQ=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_PSI_DEFAULT_DISABLED=y # end of CPU/Task time and stats accounting CONFIG_CPU_ISOLATION=y # # RCU Subsystem # CONFIG_TREE_RCU=y # CONFIG_RCU_EXPERT is not set CONFIG_SRCU=y CONFIG_TREE_SRCU=y CONFIG_TASKS_RCU_GENERIC=y CONFIG_TASKS_TRACE_RCU=y CONFIG_RCU_STALL_COMMON=y CONFIG_RCU_NEED_SEGCBLIST=y # end of RCU Subsystem # CONFIG_IKCONFIG is not set # CONFIG_IKHEADERS is not set CONFIG_LOG_BUF_SHIFT=17 CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 # CONFIG_PRINTK_INDEX is not set CONFIG_GENERIC_SCHED_CLOCK=y # # Scheduler features # # CONFIG_UCLAMP_TASK is not set # end of Scheduler features CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y CONFIG_CC_HAS_INT128=y CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5" CONFIG_GCC10_NO_ARRAY_BOUNDS=y CONFIG_CC_NO_ARRAY_BOUNDS=y CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y # CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set CONFIG_CGROUPS=y CONFIG_PAGE_COUNTER=y # CONFIG_CGROUP_FAVOR_DYNMODS is not set CONFIG_MEMCG=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y # CONFIG_CGROUP_RDMA is not set CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y # CONFIG_CGROUP_MISC is not set # CONFIG_CGROUP_DEBUG is not set CONFIG_SOCK_CGROUP_DATA=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_TIME_NS=y CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y # CONFIG_CHECKPOINT_RESTORE is not set CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y CONFIG_RD_XZ=y CONFIG_RD_LZO=y CONFIG_RD_LZ4=y CONFIG_RD_ZSTD=y # CONFIG_BOOT_CONFIG is not set CONFIG_INITRAMFS_PRESERVE_MTIME=y CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_LD_ORPHAN_WARN=y CONFIG_SYSCTL=y CONFIG_HAVE_UID16=y CONFIG_SYSCTL_EXCEPTION_TRACE=y # CONFIG_EXPERT is not set CONFIG_UID16=y CONFIG_MULTIUSER=y CONFIG_SYSFS_SYSCALL=y CONFIG_FHANDLE=y CONFIG_POSIX_TIMERS=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_FUTEX_PI=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_AIO=y CONFIG_IO_URING=y CONFIG_ADVISE_SYSCALLS=y CONFIG_MEMBARRIER=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_BASE_RELATIVE=y CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y CONFIG_RSEQ=y # CONFIG_EMBEDDED is not set CONFIG_HAVE_PERF_EVENTS=y # # Kernel Performance Events And Counters # CONFIG_PERF_EVENTS=y # CONFIG_DEBUG_PERF_USE_VMALLOC is not set # end of Kernel Performance Events And Counters CONFIG_PROFILING=y # end of General setup CONFIG_ARM64=y CONFIG_GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_64BIT=y CONFIG_MMU=y CONFIG_ARM64_PAGE_SHIFT=12 CONFIG_ARM64_CONT_PTE_SHIFT=4 CONFIG_ARM64_CONT_PMD_SHIFT=4 CONFIG_ARCH_MMAP_RND_BITS_MIN=18 CONFIG_ARCH_MMAP_RND_BITS_MAX=33 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=11 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 CONFIG_NO_IOPORT_MAP=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 CONFIG_LOCKDEP_SUPPORT=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y CONFIG_GENERIC_HWEIGHT=y CONFIG_GENERIC_CSUM=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y CONFIG_ARCH_MEMORY_PROBE=y # CONFIG_ARCH_MEMORY_REMOVE is not set CONFIG_SMP=y CONFIG_KERNEL_MODE_NEON=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_PGTABLE_LEVELS=4 CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_ARCH_PROC_KCORE_TEXT=y # # Platform selection # # CONFIG_ARCH_ACTIONS is not set # CONFIG_ARCH_SUNXI is not set # CONFIG_ARCH_ALPINE is not set # CONFIG_ARCH_APPLE is not set # CONFIG_ARCH_BCM is not set # CONFIG_ARCH_BERLIN is not set # CONFIG_ARCH_BITMAIN is not set # CONFIG_ARCH_EXYNOS is not set # CONFIG_ARCH_SPARX5 is not set # CONFIG_ARCH_K3 is not set # CONFIG_ARCH_LG1K is not set # CONFIG_ARCH_HISI is not set # CONFIG_ARCH_KEEMBAY is not set # CONFIG_ARCH_MEDIATEK is not set # CONFIG_ARCH_MESON is not set # CONFIG_ARCH_MVEBU is not set # CONFIG_ARCH_NXP is not set # CONFIG_ARCH_NPCM is not set # CONFIG_ARCH_QCOM is not set # CONFIG_ARCH_REALTEK is not set # CONFIG_ARCH_RENESAS is not set # CONFIG_ARCH_ROCKCHIP is not set # CONFIG_ARCH_SEATTLE is not set # CONFIG_ARCH_INTEL_SOCFPGA is not set # CONFIG_ARCH_SYNQUACER is not set # CONFIG_ARCH_TEGRA is not set # CONFIG_ARCH_SPRD is not set # CONFIG_ARCH_THUNDER is not set # CONFIG_ARCH_THUNDER2 is not set # CONFIG_ARCH_UNIPHIER is not set # CONFIG_ARCH_VEXPRESS is not set # CONFIG_ARCH_VISCONTI is not set # CONFIG_ARCH_XGENE is not set # CONFIG_ARCH_ZYNQMP is not set # end of Platform selection # # Kernel Features # # # ARM errata workarounds via the alternatives framework # CONFIG_AMPERE_ERRATUM_AC03_CPU_38=y CONFIG_ARM64_WORKAROUND_CLEAN_CACHE=y CONFIG_ARM64_ERRATUM_826319=y CONFIG_ARM64_ERRATUM_827319=y CONFIG_ARM64_ERRATUM_824069=y CONFIG_ARM64_ERRATUM_819472=y CONFIG_ARM64_ERRATUM_832075=y CONFIG_ARM64_ERRATUM_1742098=y CONFIG_ARM64_ERRATUM_845719=y CONFIG_ARM64_ERRATUM_843419=y CONFIG_ARM64_LD_HAS_FIX_ERRATUM_843419=y CONFIG_ARM64_ERRATUM_1024718=y CONFIG_ARM64_ERRATUM_1418040=y CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT=y CONFIG_ARM64_ERRATUM_1165522=y CONFIG_ARM64_ERRATUM_1319367=y CONFIG_ARM64_ERRATUM_1530923=y CONFIG_ARM64_WORKAROUND_REPEAT_TLBI=y CONFIG_ARM64_ERRATUM_2441007=y CONFIG_ARM64_ERRATUM_1286807=y CONFIG_ARM64_ERRATUM_1463225=y CONFIG_ARM64_ERRATUM_1542419=y CONFIG_ARM64_ERRATUM_1508412=y CONFIG_ARM64_ERRATUM_2051678=y CONFIG_ARM64_ERRATUM_2077057=y CONFIG_ARM64_ERRATUM_2658417=y CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE=y CONFIG_ARM64_ERRATUM_2054223=y CONFIG_ARM64_ERRATUM_2067961=y CONFIG_ARM64_ERRATUM_2441009=y CONFIG_ARM64_ERRATUM_2457168=y CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD=y CONFIG_ARM64_ERRATUM_2966298=y CONFIG_CAVIUM_ERRATUM_22375=y CONFIG_CAVIUM_ERRATUM_23144=y CONFIG_CAVIUM_ERRATUM_23154=y CONFIG_CAVIUM_ERRATUM_27456=y CONFIG_CAVIUM_ERRATUM_30115=y CONFIG_CAVIUM_TX2_ERRATUM_219=y CONFIG_FUJITSU_ERRATUM_010001=y # CONFIG_HISILICON_ERRATUM_161600802 is not set CONFIG_QCOM_FALKOR_ERRATUM_1003=y CONFIG_QCOM_FALKOR_ERRATUM_1009=y CONFIG_QCOM_QDF2400_ERRATUM_0065=y CONFIG_QCOM_FALKOR_ERRATUM_E1041=y CONFIG_NVIDIA_CARMEL_CNP_ERRATUM=y # CONFIG_SOCIONEXT_SYNQUACER_PREITS is not set # end of ARM errata workarounds via the alternatives framework CONFIG_ARM64_4K_PAGES=y # CONFIG_ARM64_16K_PAGES is not set # CONFIG_ARM64_64K_PAGES is not set # CONFIG_ARM64_VA_BITS_39 is not set CONFIG_ARM64_VA_BITS_48=y CONFIG_ARM64_VA_BITS=48 CONFIG_ARM64_PA_BITS_48=y CONFIG_ARM64_PA_BITS=48 # CONFIG_CPU_BIG_ENDIAN is not set CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SCHED_MC=y # CONFIG_SCHED_CLUSTER is not set CONFIG_SCHED_SMT=y CONFIG_NR_CPUS=64 CONFIG_HOTPLUG_CPU=y CONFIG_NUMA=y CONFIG_NODES_SHIFT=10 CONFIG_HZ_100=y # CONFIG_HZ_250 is not set # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=100 CONFIG_SCHED_HRTICK=y CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_HW_PERF_EVENTS=y CONFIG_PARAVIRT=y CONFIG_PARAVIRT_TIME_ACCOUNTING=y # CONFIG_KEXEC is not set CONFIG_KEXEC_FILE=y # CONFIG_KEXEC_SIG is not set # CONFIG_CRASH_DUMP is not set CONFIG_TRANS_TABLE=y # CONFIG_XEN is not set CONFIG_ARCH_FORCE_MAX_ORDER=11 CONFIG_UNMAP_KERNEL_AT_EL0=y CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY=y CONFIG_RODATA_FULL_DEFAULT_ENABLED=y # CONFIG_ARM64_SW_TTBR0_PAN is not set CONFIG_ARM64_TAGGED_ADDR_ABI=y CONFIG_COMPAT=y CONFIG_KUSER_HELPERS=y # CONFIG_COMPAT_ALIGNMENT_FIXUPS is not set # CONFIG_ARMV8_DEPRECATED is not set # # ARMv8.1 architectural features # CONFIG_ARM64_HW_AFDBM=y CONFIG_ARM64_PAN=y CONFIG_AS_HAS_LDAPR=y CONFIG_AS_HAS_LSE_ATOMICS=y CONFIG_ARM64_LSE_ATOMICS=y CONFIG_ARM64_USE_LSE_ATOMICS=y # end of ARMv8.1 architectural features # # ARMv8.2 architectural features # CONFIG_AS_HAS_ARMV8_2=y CONFIG_AS_HAS_SHA3=y # CONFIG_ARM64_PMEM is not set CONFIG_ARM64_RAS_EXTN=y CONFIG_ARM64_CNP=y # end of ARMv8.2 architectural features # # ARMv8.3 architectural features # CONFIG_ARM64_PTR_AUTH=y CONFIG_ARM64_PTR_AUTH_KERNEL=y CONFIG_CC_HAS_BRANCH_PROT_PAC_RET=y CONFIG_CC_HAS_SIGN_RETURN_ADDRESS=y CONFIG_AS_HAS_PAC=y CONFIG_AS_HAS_CFI_NEGATE_RA_STATE=y # end of ARMv8.3 architectural features # # ARMv8.4 architectural features # CONFIG_ARM64_AMU_EXTN=y CONFIG_AS_HAS_ARMV8_4=y CONFIG_ARM64_TLB_RANGE=y # end of ARMv8.4 architectural features # # ARMv8.5 architectural features # CONFIG_AS_HAS_ARMV8_5=y CONFIG_ARM64_BTI=y CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI=y CONFIG_ARM64_E0PD=y CONFIG_ARM64_AS_HAS_MTE=y CONFIG_ARM64_MTE=y # end of ARMv8.5 architectural features # # ARMv8.7 architectural features # CONFIG_ARM64_EPAN=y # end of ARMv8.7 architectural features CONFIG_ARM64_SVE=y CONFIG_ARM64_SME=y # CONFIG_ARM64_PSEUDO_NMI is not set CONFIG_RELOCATABLE=y # CONFIG_RANDOMIZE_BASE is not set CONFIG_CC_HAVE_STACKPROTECTOR_SYSREG=y CONFIG_STACKPROTECTOR_PER_TASK=y CONFIG_ARCH_NR_GPIO=0 # end of Kernel Features # # Boot options # # CONFIG_ARM64_ACPI_PARKING_PROTOCOL is not set CONFIG_CMDLINE="" CONFIG_EFI_STUB=y CONFIG_EFI=y CONFIG_DMI=y # end of Boot options # # Power management options # # CONFIG_SUSPEND is not set CONFIG_HIBERNATE_CALLBACKS=y CONFIG_HIBERNATION=y CONFIG_HIBERNATION_SNAPSHOT_DEV=y CONFIG_PM_STD_PARTITION="" CONFIG_PM_SLEEP=y CONFIG_PM_SLEEP_SMP=y # CONFIG_PM_AUTOSLEEP is not set # CONFIG_PM_USERSPACE_AUTOSLEEP is not set # CONFIG_PM_WAKELOCKS is not set CONFIG_PM=y # CONFIG_PM_DEBUG is not set CONFIG_PM_CLK=y # CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set CONFIG_CPU_PM=y # CONFIG_ENERGY_MODEL is not set CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_HIBERNATION_HEADER=y CONFIG_ARCH_SUSPEND_POSSIBLE=y # end of Power management options # # CPU Power Management # # # CPU Idle # CONFIG_CPU_IDLE=y # CONFIG_CPU_IDLE_GOV_LADDER is not set CONFIG_CPU_IDLE_GOV_MENU=y # CONFIG_CPU_IDLE_GOV_TEO is not set # # ARM CPU Idle Drivers # # CONFIG_ARM_PSCI_CPUIDLE is not set # end of ARM CPU Idle Drivers # end of CPU Idle # # CPU Frequency scaling # CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_ATTR_SET=y CONFIG_CPU_FREQ_STAT=y # CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_GOV_USERSPACE is not set # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y # # CPU frequency scaling drivers # # CONFIG_CPUFREQ_DT is not set # end of CPU Frequency scaling # end of CPU Power Management CONFIG_ARCH_SUPPORTS_ACPI=y CONFIG_ACPI=y CONFIG_ACPI_GENERIC_GSI=y CONFIG_ACPI_CCA_REQUIRED=y # CONFIG_ACPI_DEBUGGER is not set CONFIG_ACPI_SPCR_TABLE=y # CONFIG_ACPI_EC_DEBUGFS is not set # CONFIG_ACPI_AC is not set # CONFIG_ACPI_BATTERY is not set # CONFIG_ACPI_BUTTON is not set # CONFIG_ACPI_TINY_POWER_BUTTON is not set # CONFIG_ACPI_FAN is not set # CONFIG_ACPI_TAD is not set # CONFIG_ACPI_DOCK is not set # CONFIG_ACPI_PROCESSOR is not set CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y # CONFIG_ACPI_TABLE_UPGRADE is not set # CONFIG_ACPI_DEBUG is not set # CONFIG_ACPI_CONTAINER is not set # CONFIG_ACPI_HOTPLUG_MEMORY is not set # CONFIG_ACPI_HED is not set # CONFIG_ACPI_CUSTOM_METHOD is not set # CONFIG_ACPI_BGRT is not set CONFIG_ACPI_REDUCED_HARDWARE_ONLY=y CONFIG_ACPI_NUMA=y # CONFIG_ACPI_HMAT is not set CONFIG_HAVE_ACPI_APEI=y # CONFIG_ACPI_APEI is not set # CONFIG_ACPI_CONFIGFS is not set # CONFIG_ACPI_PFRUT is not set CONFIG_ACPI_IORT=y CONFIG_ACPI_GTDT=y CONFIG_ACPI_PPTT=y # CONFIG_PMIC_OPREGION is not set CONFIG_ACPI_PRMT=y CONFIG_HAVE_KVM=y # CONFIG_VIRTUALIZATION is not set CONFIG_HAVE_LIVEPATCH=y CONFIG_CPU_MITIGATIONS=y # # General architecture-dependent options # CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_ARCH_HAS_SUBPAGE_FAULTS=y CONFIG_JUMP_LABEL=y # CONFIG_STATIC_KEYS_SELFTEST is not set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE=y CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y CONFIG_HAVE_NMI=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_CONTIGUOUS=y CONFIG_GENERIC_SMP_IDLE_THREAD=y CONFIG_GENERIC_IDLE_POLL_SETUP=y CONFIG_ARCH_HAS_FORTIFY_SOURCE=y CONFIG_ARCH_HAS_KEEPINITRD=y CONFIG_ARCH_HAS_SET_MEMORY=y CONFIG_ARCH_HAS_SET_DIRECT_MAP=y CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y CONFIG_ARCH_WANTS_NO_INSTR=y CONFIG_HAVE_ASM_MODVERSIONS=y CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y CONFIG_HAVE_RSEQ=y CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y CONFIG_HAVE_HW_BREAKPOINT=y CONFIG_HAVE_PERF_REGS=y CONFIG_HAVE_PERF_USER_STACK_DUMP=y CONFIG_HAVE_ARCH_JUMP_LABEL=y CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y CONFIG_MMU_GATHER_TABLE_FREE=y CONFIG_MMU_GATHER_RCU_TABLE_FREE=y CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y CONFIG_HAVE_CMPXCHG_LOCAL=y CONFIG_HAVE_CMPXCHG_DOUBLE=y CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y CONFIG_HAVE_ARCH_SECCOMP=y CONFIG_HAVE_ARCH_SECCOMP_FILTER=y CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_HAVE_ARCH_STACKLEAK=y CONFIG_HAVE_STACKPROTECTOR=y CONFIG_STACKPROTECTOR=y CONFIG_STACKPROTECTOR_STRONG=y CONFIG_ARCH_SUPPORTS_LTO_CLANG=y CONFIG_ARCH_SUPPORTS_LTO_CLANG_THIN=y CONFIG_LTO_NONE=y CONFIG_ARCH_SUPPORTS_CFI_CLANG=y CONFIG_HAVE_CONTEXT_TRACKING_USER=y CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y CONFIG_HAVE_MOVE_PUD=y CONFIG_HAVE_MOVE_PMD=y CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y CONFIG_HAVE_ARCH_HUGE_VMAP=y CONFIG_HAVE_ARCH_HUGE_VMALLOC=y CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_MODULES_USE_ELF_RELA=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_SOFTIRQ_ON_OWN_STACK=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y CONFIG_HAVE_ARCH_MMAP_RND_BITS=y CONFIG_ARCH_MMAP_RND_BITS=18 CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y CONFIG_ARCH_MMAP_RND_COMPAT_BITS=11 CONFIG_PAGE_SIZE_LESS_THAN_64KB=y CONFIG_PAGE_SIZE_LESS_THAN_256KB=y CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT=y CONFIG_HAVE_OBJTOOL=y CONFIG_HAVE_STACK_VALIDATION=y CONFIG_HAVE_RELIABLE_STACKTRACE=y CONFIG_CLONE_BACKWARDS=y CONFIG_OLD_SIGSUSPEND3=y CONFIG_COMPAT_OLD_SIGACTION=y CONFIG_COMPAT_32BIT_TIME=y CONFIG_HAVE_ARCH_VMAP_STACK=y CONFIG_VMAP_STACK=y CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET=y CONFIG_RANDOMIZE_KSTACK_OFFSET=y # CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT is not set CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y CONFIG_STRICT_KERNEL_RWX=y CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y CONFIG_HAVE_ARCH_COMPILER_H=y CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y CONFIG_ARCH_USE_MEMREMAP_PROT=y # CONFIG_LOCK_EVENT_COUNTS is not set CONFIG_ARCH_HAS_RELR=y CONFIG_HAVE_PREEMPT_DYNAMIC=y CONFIG_HAVE_PREEMPT_DYNAMIC_KEY=y CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_ARCH_SUPPORTS_PAGE_TABLE_CHECK=y CONFIG_ARCH_HAVE_TRACE_MMIO_ACCESS=y # # GCOV-based kernel profiling # # CONFIG_GCOV_KERNEL is not set CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y # end of GCOV-based kernel profiling CONFIG_HAVE_GCC_PLUGINS=y # end of General architecture-dependent options CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 # CONFIG_MODULES is not set CONFIG_BLOCK=y CONFIG_BLOCK_LEGACY_AUTOLOAD=y CONFIG_BLK_RQ_ALLOC_TIME=y CONFIG_BLK_CGROUP_RWSTAT=y CONFIG_BLK_DEV_BSG_COMMON=y CONFIG_BLK_ICQ=y CONFIG_BLK_DEV_BSGLIB=y CONFIG_BLK_DEV_INTEGRITY=y # CONFIG_BLK_DEV_ZONED is not set CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set # CONFIG_BLK_WBT is not set # CONFIG_BLK_CGROUP_IOLATENCY is not set CONFIG_BLK_CGROUP_IOCOST=y # CONFIG_BLK_CGROUP_IOPRIO is not set CONFIG_BLK_DEBUG_FS=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # # Partition Types # CONFIG_PARTITION_ADVANCED=y # CONFIG_ACORN_PARTITION is not set # CONFIG_AIX_PARTITION is not set # CONFIG_OSF_PARTITION is not set # CONFIG_AMIGA_PARTITION is not set # CONFIG_ATARI_PARTITION is not set # CONFIG_MAC_PARTITION is not set # CONFIG_MSDOS_PARTITION is not set # CONFIG_LDM_PARTITION is not set # CONFIG_SGI_PARTITION is not set # CONFIG_ULTRIX_PARTITION is not set # CONFIG_SUN_PARTITION is not set # CONFIG_KARMA_PARTITION is not set # CONFIG_EFI_PARTITION is not set # CONFIG_SYSV68_PARTITION is not set # CONFIG_CMDLINE_PARTITION is not set # end of Partition Types CONFIG_BLOCK_COMPAT=y CONFIG_BLK_MQ_VIRTIO=y CONFIG_BLK_PM=y # # IO Schedulers # CONFIG_MQ_IOSCHED_DEADLINE=y CONFIG_MQ_IOSCHED_KYBER=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y # CONFIG_BFQ_CGROUP_DEBUG is not set # end of IO Schedulers CONFIG_ASN1=y CONFIG_ARCH_INLINE_SPIN_TRYLOCK=y CONFIG_ARCH_INLINE_SPIN_TRYLOCK_BH=y CONFIG_ARCH_INLINE_SPIN_LOCK=y CONFIG_ARCH_INLINE_SPIN_LOCK_BH=y CONFIG_ARCH_INLINE_SPIN_LOCK_IRQ=y CONFIG_ARCH_INLINE_SPIN_LOCK_IRQSAVE=y CONFIG_ARCH_INLINE_SPIN_UNLOCK=y CONFIG_ARCH_INLINE_SPIN_UNLOCK_BH=y CONFIG_ARCH_INLINE_SPIN_UNLOCK_IRQ=y CONFIG_ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE=y CONFIG_ARCH_INLINE_READ_LOCK=y CONFIG_ARCH_INLINE_READ_LOCK_BH=y CONFIG_ARCH_INLINE_READ_LOCK_IRQ=y CONFIG_ARCH_INLINE_READ_LOCK_IRQSAVE=y CONFIG_ARCH_INLINE_READ_UNLOCK=y CONFIG_ARCH_INLINE_READ_UNLOCK_BH=y CONFIG_ARCH_INLINE_READ_UNLOCK_IRQ=y CONFIG_ARCH_INLINE_READ_UNLOCK_IRQRESTORE=y CONFIG_ARCH_INLINE_WRITE_LOCK=y CONFIG_ARCH_INLINE_WRITE_LOCK_BH=y CONFIG_ARCH_INLINE_WRITE_LOCK_IRQ=y CONFIG_ARCH_INLINE_WRITE_LOCK_IRQSAVE=y CONFIG_ARCH_INLINE_WRITE_UNLOCK=y CONFIG_ARCH_INLINE_WRITE_UNLOCK_BH=y CONFIG_ARCH_INLINE_WRITE_UNLOCK_IRQ=y CONFIG_ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE=y CONFIG_INLINE_SPIN_TRYLOCK=y CONFIG_INLINE_SPIN_TRYLOCK_BH=y CONFIG_INLINE_SPIN_LOCK=y CONFIG_INLINE_SPIN_LOCK_BH=y CONFIG_INLINE_SPIN_LOCK_IRQ=y CONFIG_INLINE_SPIN_LOCK_IRQSAVE=y CONFIG_INLINE_SPIN_UNLOCK_BH=y CONFIG_INLINE_SPIN_UNLOCK_IRQ=y CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE=y CONFIG_INLINE_READ_LOCK=y CONFIG_INLINE_READ_LOCK_BH=y CONFIG_INLINE_READ_LOCK_IRQ=y CONFIG_INLINE_READ_LOCK_IRQSAVE=y CONFIG_INLINE_READ_UNLOCK=y CONFIG_INLINE_READ_UNLOCK_BH=y CONFIG_INLINE_READ_UNLOCK_IRQ=y CONFIG_INLINE_READ_UNLOCK_IRQRESTORE=y CONFIG_INLINE_WRITE_LOCK=y CONFIG_INLINE_WRITE_LOCK_BH=y CONFIG_INLINE_WRITE_LOCK_IRQ=y CONFIG_INLINE_WRITE_LOCK_IRQSAVE=y CONFIG_INLINE_WRITE_UNLOCK=y CONFIG_INLINE_WRITE_UNLOCK_BH=y CONFIG_INLINE_WRITE_UNLOCK_IRQ=y CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE=y CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y CONFIG_MUTEX_SPIN_ON_OWNER=y CONFIG_RWSEM_SPIN_ON_OWNER=y CONFIG_LOCK_SPIN_ON_OWNER=y CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y CONFIG_QUEUED_SPINLOCKS=y CONFIG_ARCH_USE_QUEUED_RWLOCKS=y CONFIG_QUEUED_RWLOCKS=y CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y CONFIG_FREEZER=y # # Executable file formats # CONFIG_BINFMT_ELF=y CONFIG_COMPAT_BINFMT_ELF=y CONFIG_ARCH_BINFMT_ELF_STATE=y CONFIG_ARCH_BINFMT_ELF_EXTRA_PHDRS=y CONFIG_ARCH_HAVE_ELF_PROT=y CONFIG_ARCH_USE_GNU_PROPERTY=y CONFIG_ELFCORE=y CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_SCRIPT=y CONFIG_BINFMT_MISC=y CONFIG_COREDUMP=y # end of Executable file formats # # Memory Management options # CONFIG_ZPOOL=y CONFIG_SWAP=y CONFIG_ZSWAP=y # CONFIG_ZSWAP_DEFAULT_ON is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo" CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set # CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud" CONFIG_ZBUD=y # CONFIG_Z3FOLD is not set # CONFIG_ZSMALLOC is not set # # SLAB allocator options # # CONFIG_SLAB is not set CONFIG_SLUB=y CONFIG_SLAB_MERGE_DEFAULT=y CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y # CONFIG_SLUB_STATS is not set CONFIG_SLUB_CPU_PARTIAL=y # end of SLAB allocator options CONFIG_SHUFFLE_PAGE_ALLOCATOR=y # CONFIG_COMPAT_BRK is not set CONFIG_SPARSEMEM=y CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_HAVE_FAST_GUP=y CONFIG_ARCH_KEEP_MEMBLOCK=y CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_ISOLATION=y CONFIG_EXCLUSIVE_SYSTEM_RAM=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y CONFIG_MEMORY_HOTPLUG=y # CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set CONFIG_MEMORY_HOTREMOVE=y CONFIG_MHP_MEMMAP_ON_MEMORY=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y CONFIG_MEMORY_BALLOON=y CONFIG_BALLOON_COMPACTION=y CONFIG_COMPACTION=y CONFIG_COMPACT_UNEVICTABLE_DEFAULT=1 CONFIG_PAGE_REPORTING=y CONFIG_MIGRATION=y CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y CONFIG_ARCH_ENABLE_THP_MIGRATION=y CONFIG_CONTIG_ALLOC=y CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_ARCH_SUPPORTS_MEMORY_FAILURE=y # CONFIG_MEMORY_FAILURE is not set CONFIG_ARCH_WANTS_THP_SWAP=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y # CONFIG_TRANSPARENT_HUGEPAGE_MADVISE is not set CONFIG_THP_SWAP=y # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y CONFIG_USE_PERCPU_NUMA_NODE_ID=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_FRONTSWAP=y # CONFIG_CMA is not set CONFIG_GENERIC_EARLY_IOREMAP=y # CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set CONFIG_PAGE_IDLE_FLAG=y # CONFIG_IDLE_PAGE_TRACKING is not set CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_ARCH_HAS_CURRENT_STACK_POINTER=y CONFIG_ARCH_HAS_PTE_DEVMAP=y CONFIG_ZONE_DMA=y CONFIG_ZONE_DMA32=y # CONFIG_ZONE_DEVICE is not set CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y CONFIG_VM_EVENT_COUNTERS=y # CONFIG_PERCPU_STATS is not set # CONFIG_GUP_TEST is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y CONFIG_SECRETMEM=y # CONFIG_ANON_VMA_NAME is not set CONFIG_USERFAULTFD=y CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y # CONFIG_LRU_GEN is not set CONFIG_LOCK_MM_AND_FIND_VMA=y # # Data Access Monitoring # CONFIG_DAMON=y CONFIG_DAMON_VADDR=y CONFIG_DAMON_PADDR=y CONFIG_DAMON_SYSFS=y CONFIG_DAMON_DBGFS=y # CONFIG_DAMON_RECLAIM is not set # CONFIG_DAMON_LRU_SORT is not set # end of Data Access Monitoring # end of Memory Management options CONFIG_NET=y CONFIG_NET_INGRESS=y CONFIG_SKB_EXTENSIONS=y # # Networking options # CONFIG_PACKET=y # CONFIG_PACKET_DIAG is not set CONFIG_UNIX=y CONFIG_UNIX_SCM=y CONFIG_AF_UNIX_OOB=y # CONFIG_UNIX_DIAG is not set # CONFIG_TLS is not set CONFIG_XFRM=y CONFIG_XFRM_ALGO=y CONFIG_XFRM_USER=y # CONFIG_XFRM_INTERFACE is not set CONFIG_XFRM_SUB_POLICY=y CONFIG_XFRM_MIGRATE=y CONFIG_XFRM_STATISTICS=y # CONFIG_NET_KEY is not set CONFIG_XDP_SOCKETS=y # CONFIG_XDP_SOCKETS_DIAG is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y # CONFIG_IP_FIB_TRIE_STATS is not set CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE_DEMUX is not set CONFIG_IP_MROUTE_COMMON=y CONFIG_IP_MROUTE=y CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y CONFIG_SYN_COOKIES=y # CONFIG_NET_IPVTI is not set # CONFIG_NET_FOU is not set # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_INET_UDP_DIAG is not set # CONFIG_INET_RAW_DIAG is not set CONFIG_INET_DIAG_DESTROY=y CONFIG_TCP_CONG_ADVANCED=y # CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y # CONFIG_TCP_CONG_WESTWOOD is not set # CONFIG_TCP_CONG_HTCP is not set # CONFIG_TCP_CONG_HSTCP is not set # CONFIG_TCP_CONG_HYBLA is not set # CONFIG_TCP_CONG_VEGAS is not set # CONFIG_TCP_CONG_NV is not set # CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_TCP_CONG_LP is not set # CONFIG_TCP_CONG_VENO is not set # CONFIG_TCP_CONG_YEAH is not set # CONFIG_TCP_CONG_ILLINOIS is not set # CONFIG_TCP_CONG_DCTCP is not set # CONFIG_TCP_CONG_CDG is not set # CONFIG_TCP_CONG_BBR is not set CONFIG_DEFAULT_CUBIC=y # CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y # CONFIG_INET6_AH is not set # CONFIG_INET6_ESP is not set # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_IPV6_ILA is not set # CONFIG_IPV6_VTI is not set # CONFIG_IPV6_SIT is not set # CONFIG_IPV6_TUNNEL is not set CONFIG_IPV6_MULTIPLE_TABLES=y # CONFIG_IPV6_SUBTREES is not set CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y # CONFIG_IPV6_SEG6_LWTUNNEL is not set # CONFIG_IPV6_SEG6_HMAC is not set # CONFIG_IPV6_RPL_LWTUNNEL is not set # CONFIG_IPV6_IOAM6_LWTUNNEL is not set CONFIG_NETLABEL=y CONFIG_MPTCP=y CONFIG_INET_MPTCP_DIAG=y CONFIG_MPTCP_IPV6=y CONFIG_NETWORK_SECMARK=y CONFIG_NET_PTP_CLASSIFY=y CONFIG_NETWORK_PHY_TIMESTAMPING=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_BRIDGE_NETFILTER=y # # Core Netfilter Configuration # CONFIG_NETFILTER_INGRESS=y # CONFIG_NETFILTER_EGRESS is not set CONFIG_NETFILTER_FAMILY_BRIDGE=y # CONFIG_NETFILTER_NETLINK_ACCT is not set # CONFIG_NETFILTER_NETLINK_QUEUE is not set # CONFIG_NETFILTER_NETLINK_LOG is not set # CONFIG_NETFILTER_NETLINK_OSF is not set CONFIG_NF_CONNTRACK=y CONFIG_NF_LOG_SYSLOG=y CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y CONFIG_NF_CONNTRACK_TIMESTAMP=y CONFIG_NF_CONNTRACK_LABELS=y CONFIG_NF_CT_PROTO_DCCP=y CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y # CONFIG_NF_CONNTRACK_AMANDA is not set # CONFIG_NF_CONNTRACK_FTP is not set # CONFIG_NF_CONNTRACK_H323 is not set # CONFIG_NF_CONNTRACK_IRC is not set # CONFIG_NF_CONNTRACK_NETBIOS_NS is not set # CONFIG_NF_CONNTRACK_SNMP is not set # CONFIG_NF_CONNTRACK_PPTP is not set # CONFIG_NF_CONNTRACK_SANE is not set # CONFIG_NF_CONNTRACK_SIP is not set # CONFIG_NF_CONNTRACK_TFTP is not set # CONFIG_NF_CT_NETLINK is not set # CONFIG_NF_CT_NETLINK_TIMEOUT is not set CONFIG_NF_NAT=y CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_NAT_MASQUERADE=y CONFIG_NETFILTER_SYNPROXY=y # CONFIG_NF_TABLES is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XTABLES_COMPAT=y # # Xtables combined modules # # CONFIG_NETFILTER_XT_MARK is not set # CONFIG_NETFILTER_XT_CONNMARK is not set # # Xtables targets # # CONFIG_NETFILTER_XT_TARGET_AUDIT is not set # CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set # CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set # CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set # CONFIG_NETFILTER_XT_TARGET_CONNSECMARK is not set # CONFIG_NETFILTER_XT_TARGET_DSCP is not set # CONFIG_NETFILTER_XT_TARGET_HL is not set # CONFIG_NETFILTER_XT_TARGET_HMARK is not set # CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set # CONFIG_NETFILTER_XT_TARGET_LOG is not set # CONFIG_NETFILTER_XT_TARGET_MARK is not set CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_TARGET_NETMAP=y # CONFIG_NETFILTER_XT_TARGET_NFLOG is not set # CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set # CONFIG_NETFILTER_XT_TARGET_RATEEST is not set CONFIG_NETFILTER_XT_TARGET_REDIRECT=y CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y # CONFIG_NETFILTER_XT_TARGET_TEE is not set # CONFIG_NETFILTER_XT_TARGET_TPROXY is not set # CONFIG_NETFILTER_XT_TARGET_SECMARK is not set # CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set # CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set # # Xtables matches # CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y # CONFIG_NETFILTER_XT_MATCH_BPF is not set # CONFIG_NETFILTER_XT_MATCH_CGROUP is not set # CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set # CONFIG_NETFILTER_XT_MATCH_COMMENT is not set # CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set # CONFIG_NETFILTER_XT_MATCH_CONNLABEL is not set # CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set # CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y # CONFIG_NETFILTER_XT_MATCH_CPU is not set # CONFIG_NETFILTER_XT_MATCH_DCCP is not set # CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set # CONFIG_NETFILTER_XT_MATCH_DSCP is not set # CONFIG_NETFILTER_XT_MATCH_ECN is not set # CONFIG_NETFILTER_XT_MATCH_ESP is not set # CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set # CONFIG_NETFILTER_XT_MATCH_HELPER is not set # CONFIG_NETFILTER_XT_MATCH_HL is not set # CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set # CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set # CONFIG_NETFILTER_XT_MATCH_L2TP is not set # CONFIG_NETFILTER_XT_MATCH_LENGTH is not set # CONFIG_NETFILTER_XT_MATCH_LIMIT is not set # CONFIG_NETFILTER_XT_MATCH_MAC is not set # CONFIG_NETFILTER_XT_MATCH_MARK is not set # CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set # CONFIG_NETFILTER_XT_MATCH_NFACCT is not set # CONFIG_NETFILTER_XT_MATCH_OSF is not set # CONFIG_NETFILTER_XT_MATCH_OWNER is not set # CONFIG_NETFILTER_XT_MATCH_POLICY is not set # CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set # CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set # CONFIG_NETFILTER_XT_MATCH_QUOTA is not set # CONFIG_NETFILTER_XT_MATCH_RATEEST is not set # CONFIG_NETFILTER_XT_MATCH_REALM is not set # CONFIG_NETFILTER_XT_MATCH_RECENT is not set # CONFIG_NETFILTER_XT_MATCH_SCTP is not set # CONFIG_NETFILTER_XT_MATCH_SOCKET is not set # CONFIG_NETFILTER_XT_MATCH_STATE is not set # CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set # CONFIG_NETFILTER_XT_MATCH_STRING is not set # CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set # CONFIG_NETFILTER_XT_MATCH_TIME is not set # CONFIG_NETFILTER_XT_MATCH_U32 is not set # end of Core Netfilter Configuration # CONFIG_IP_SET is not set # CONFIG_IP_VS is not set # # IP: Netfilter Configuration # CONFIG_NF_DEFRAG_IPV4=y # CONFIG_NF_SOCKET_IPV4 is not set # CONFIG_NF_TPROXY_IPV4 is not set # CONFIG_NF_DUP_IPV4 is not set # CONFIG_NF_LOG_ARP is not set # CONFIG_NF_LOG_IPV4 is not set CONFIG_NF_REJECT_IPV4=y CONFIG_IP_NF_IPTABLES=y # CONFIG_IP_NF_MATCH_AH is not set # CONFIG_IP_NF_MATCH_ECN is not set # CONFIG_IP_NF_MATCH_RPFILTER is not set # CONFIG_IP_NF_MATCH_TTL is not set CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y CONFIG_IP_NF_TARGET_SYNPROXY=y CONFIG_IP_NF_NAT=y CONFIG_IP_NF_TARGET_MASQUERADE=y CONFIG_IP_NF_TARGET_NETMAP=y CONFIG_IP_NF_TARGET_REDIRECT=y CONFIG_IP_NF_MANGLE=y # CONFIG_IP_NF_TARGET_CLUSTERIP is not set # CONFIG_IP_NF_TARGET_ECN is not set # CONFIG_IP_NF_TARGET_TTL is not set # CONFIG_IP_NF_RAW is not set # CONFIG_IP_NF_SECURITY is not set # CONFIG_IP_NF_ARPTABLES is not set # end of IP: Netfilter Configuration # # IPv6: Netfilter Configuration # # CONFIG_NF_SOCKET_IPV6 is not set # CONFIG_NF_TPROXY_IPV6 is not set # CONFIG_NF_DUP_IPV6 is not set CONFIG_NF_REJECT_IPV6=y CONFIG_NF_LOG_IPV6=y CONFIG_IP6_NF_IPTABLES=y # CONFIG_IP6_NF_MATCH_AH is not set # CONFIG_IP6_NF_MATCH_EUI64 is not set # CONFIG_IP6_NF_MATCH_FRAG is not set # CONFIG_IP6_NF_MATCH_OPTS is not set # CONFIG_IP6_NF_MATCH_HL is not set # CONFIG_IP6_NF_MATCH_IPV6HEADER is not set # CONFIG_IP6_NF_MATCH_MH is not set # CONFIG_IP6_NF_MATCH_RPFILTER is not set # CONFIG_IP6_NF_MATCH_RT is not set # CONFIG_IP6_NF_MATCH_SRH is not set # CONFIG_IP6_NF_TARGET_HL is not set CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_TARGET_SYNPROXY=y CONFIG_IP6_NF_MANGLE=y # CONFIG_IP6_NF_RAW is not set # CONFIG_IP6_NF_SECURITY is not set CONFIG_IP6_NF_NAT=y CONFIG_IP6_NF_TARGET_MASQUERADE=y # CONFIG_IP6_NF_TARGET_NPT is not set # end of IPv6: Netfilter Configuration CONFIG_NF_DEFRAG_IPV6=y # CONFIG_NF_CONNTRACK_BRIDGE is not set # CONFIG_BRIDGE_NF_EBTABLES is not set CONFIG_BPFILTER=y CONFIG_BPFILTER_UMH=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_L2TP is not set CONFIG_STP=y CONFIG_BRIDGE=y CONFIG_BRIDGE_IGMP_SNOOPING=y # CONFIG_BRIDGE_MRP is not set # CONFIG_BRIDGE_CFM is not set # CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set # CONFIG_PHONET is not set # CONFIG_6LOWPAN is not set # CONFIG_IEEE802154 is not set CONFIG_NET_SCHED=y # # Queueing/Scheduling # # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set # CONFIG_NET_SCH_MULTIQ is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFB is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set # CONFIG_NET_SCH_TBF is not set # CONFIG_NET_SCH_CBS is not set # CONFIG_NET_SCH_ETF is not set # CONFIG_NET_SCH_TAPRIO is not set # CONFIG_NET_SCH_GRED is not set # CONFIG_NET_SCH_NETEM is not set # CONFIG_NET_SCH_DRR is not set # CONFIG_NET_SCH_MQPRIO is not set # CONFIG_NET_SCH_SKBPRIO is not set # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_QFQ is not set # CONFIG_NET_SCH_CODEL is not set # CONFIG_NET_SCH_FQ_CODEL is not set # CONFIG_NET_SCH_CAKE is not set # CONFIG_NET_SCH_FQ is not set # CONFIG_NET_SCH_HHF is not set # CONFIG_NET_SCH_PIE is not set # CONFIG_NET_SCH_INGRESS is not set # CONFIG_NET_SCH_PLUG is not set # CONFIG_NET_SCH_ETS is not set # CONFIG_NET_SCH_DEFAULT is not set # # Classification # CONFIG_NET_CLS=y # CONFIG_NET_CLS_BASIC is not set # CONFIG_NET_CLS_ROUTE4 is not set # CONFIG_NET_CLS_FW is not set # CONFIG_NET_CLS_U32 is not set # CONFIG_NET_CLS_FLOW is not set # CONFIG_NET_CLS_CGROUP is not set # CONFIG_NET_CLS_BPF is not set # CONFIG_NET_CLS_FLOWER is not set # CONFIG_NET_CLS_MATCHALL is not set CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 # CONFIG_NET_EMATCH_CMP is not set # CONFIG_NET_EMATCH_NBYTE is not set # CONFIG_NET_EMATCH_U32 is not set # CONFIG_NET_EMATCH_META is not set # CONFIG_NET_EMATCH_TEXT is not set # CONFIG_NET_EMATCH_IPT is not set CONFIG_NET_CLS_ACT=y # CONFIG_NET_ACT_POLICE is not set # CONFIG_NET_ACT_GACT is not set # CONFIG_NET_ACT_MIRRED is not set # CONFIG_NET_ACT_SAMPLE is not set # CONFIG_NET_ACT_IPT is not set # CONFIG_NET_ACT_NAT is not set # CONFIG_NET_ACT_PEDIT is not set # CONFIG_NET_ACT_SIMP is not set # CONFIG_NET_ACT_SKBEDIT is not set # CONFIG_NET_ACT_CSUM is not set # CONFIG_NET_ACT_MPLS is not set # CONFIG_NET_ACT_VLAN is not set # CONFIG_NET_ACT_BPF is not set # CONFIG_NET_ACT_CONNMARK is not set # CONFIG_NET_ACT_CTINFO is not set # CONFIG_NET_ACT_SKBMOD is not set # CONFIG_NET_ACT_IFE is not set # CONFIG_NET_ACT_TUNNEL_KEY is not set # CONFIG_NET_ACT_GATE is not set # CONFIG_NET_TC_SKB_EXT is not set CONFIG_NET_SCH_FIFO=y CONFIG_DCB=y CONFIG_DNS_RESOLVER=y # CONFIG_BATMAN_ADV is not set # CONFIG_OPENVSWITCH is not set CONFIG_VSOCKETS=y # CONFIG_VSOCKETS_DIAG is not set # CONFIG_VSOCKETS_LOOPBACK is not set CONFIG_VIRTIO_VSOCKETS=y CONFIG_VIRTIO_VSOCKETS_COMMON=y # CONFIG_NETLINK_DIAG is not set CONFIG_MPLS=y # CONFIG_NET_MPLS_GSO is not set # CONFIG_MPLS_ROUTING is not set # CONFIG_NET_NSH is not set # CONFIG_HSR is not set # CONFIG_NET_SWITCHDEV is not set CONFIG_NET_L3_MASTER_DEV=y # CONFIG_QRTR is not set # CONFIG_NET_NCSI is not set CONFIG_PCPU_DEV_REFCNT=y CONFIG_RPS=y CONFIG_RFS_ACCEL=y CONFIG_SOCK_RX_QUEUE_MAPPING=y CONFIG_XPS=y CONFIG_CGROUP_NET_PRIO=y CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_NET_FLOW_LIMIT=y # # Network testing # # CONFIG_NET_PKTGEN is not set # end of Network testing # end of Networking options # CONFIG_HAMRADIO is not set # CONFIG_CAN is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set # CONFIG_AF_KCM is not set CONFIG_STREAM_PARSER=y # CONFIG_MCTP is not set CONFIG_FIB_RULES=y # CONFIG_WIRELESS is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set # CONFIG_CAIF is not set # CONFIG_CEPH_LIB is not set # CONFIG_NFC is not set # CONFIG_PSAMPLE is not set # CONFIG_NET_IFE is not set CONFIG_LWTUNNEL=y CONFIG_LWTUNNEL_BPF=y CONFIG_GRO_CELLS=y CONFIG_NET_SOCK_MSG=y CONFIG_PAGE_POOL=y # CONFIG_PAGE_POOL_STATS is not set CONFIG_FAILOVER=y CONFIG_ETHTOOL_NETLINK=y # # Device Drivers # CONFIG_ARM_AMBA=y CONFIG_HAVE_PCI=y # CONFIG_PCI is not set # CONFIG_PCCARD is not set # # Generic Driver Options # # CONFIG_UEVENT_HELPER is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_DEVTMPFS_SAFE is not set CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y # # Firmware loader # CONFIG_FW_LOADER=y CONFIG_FW_LOADER_PAGED_BUF=y CONFIG_FW_LOADER_SYSFS=y CONFIG_EXTRA_FIRMWARE="" CONFIG_FW_LOADER_USER_HELPER=y # CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set # CONFIG_FW_LOADER_COMPRESS is not set CONFIG_FW_CACHE=y # CONFIG_FW_UPLOAD is not set # end of Firmware loader CONFIG_ALLOW_DEV_COREDUMP=y # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set CONFIG_GENERIC_CPU_AUTOPROBE=y CONFIG_GENERIC_CPU_VULNERABILITIES=y CONFIG_SOC_BUS=y CONFIG_DMA_SHARED_BUFFER=y # CONFIG_DMA_FENCE_TRACE is not set CONFIG_GENERIC_ARCH_TOPOLOGY=y CONFIG_GENERIC_ARCH_NUMA=y # end of Generic Driver Options # # Bus devices # # CONFIG_BRCMSTB_GISB_ARB is not set # CONFIG_VEXPRESS_CONFIG is not set # CONFIG_MHI_BUS is not set # CONFIG_MHI_BUS_EP is not set # end of Bus devices CONFIG_CONNECTOR=y CONFIG_PROC_EVENTS=y # # Firmware Drivers # # # ARM System Control and Management Interface Protocol # # CONFIG_ARM_SCMI_PROTOCOL is not set # end of ARM System Control and Management Interface Protocol # CONFIG_ARM_SCPI_PROTOCOL is not set CONFIG_DMIID=y # CONFIG_DMI_SYSFS is not set # CONFIG_ISCSI_IBFT is not set # CONFIG_SYSFB_SIMPLEFB is not set # CONFIG_ARM_FFA_TRANSPORT is not set # CONFIG_GOOGLE_FIRMWARE is not set # # EFI (Extensible Firmware Interface) Support # CONFIG_EFI_ESRT=y CONFIG_EFI_VARS_PSTORE=y # CONFIG_EFI_VARS_PSTORE_DEFAULT_DISABLE is not set CONFIG_EFI_PARAMS_FROM_FDT=y CONFIG_EFI_RUNTIME_WRAPPERS=y CONFIG_EFI_GENERIC_STUB=y # CONFIG_EFI_ZBOOT is not set CONFIG_EFI_ARMSTUB_DTB_LOADER=y CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y # CONFIG_EFI_BOOTLOADER_CONTROL is not set # CONFIG_EFI_CAPSULE_LOADER is not set # CONFIG_EFI_TEST is not set # CONFIG_RESET_ATTACK_MITIGATION is not set # CONFIG_EFI_DISABLE_PCI_DMA is not set CONFIG_EFI_EARLYCON=y # CONFIG_EFI_CUSTOM_SSDT_OVERLAYS is not set # CONFIG_EFI_DISABLE_RUNTIME is not set # CONFIG_EFI_COCO_SECRET is not set # end of EFI (Extensible Firmware Interface) Support CONFIG_ARM_PSCI_FW=y # CONFIG_ARM_PSCI_CHECKER is not set CONFIG_HAVE_ARM_SMCCC=y CONFIG_HAVE_ARM_SMCCC_DISCOVERY=y CONFIG_ARM_SMCCC_SOC_ID=y # # Tegra firmware driver # # end of Tegra firmware driver # end of Firmware Drivers # CONFIG_GNSS is not set # CONFIG_MTD is not set CONFIG_DTC=y CONFIG_OF=y # CONFIG_OF_UNITTEST is not set CONFIG_OF_FLATTREE=y CONFIG_OF_EARLY_FLATTREE=y CONFIG_OF_KOBJ=y CONFIG_OF_ADDRESS=y CONFIG_OF_IRQ=y CONFIG_OF_RESERVED_MEM=y # CONFIG_OF_OVERLAY is not set CONFIG_OF_NUMA=y # CONFIG_PARPORT is not set CONFIG_PNP=y CONFIG_PNP_DEBUG_MESSAGES=y # # Protocols # CONFIG_PNPACPI=y CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set # CONFIG_ZRAM is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 # CONFIG_BLK_DEV_DRBD is not set # CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_RAM is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_VIRTIO_BLK=y # CONFIG_BLK_DEV_RBD is not set # CONFIG_BLK_DEV_UBLK is not set # # NVME Support # # CONFIG_NVME_FC is not set # CONFIG_NVME_TCP is not set # end of NVME Support # # Misc devices # # CONFIG_DUMMY_IRQ is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_SRAM is not set # CONFIG_XILINX_SDFEC is not set CONFIG_SYSGENID=y # CONFIG_OPEN_DICE is not set # CONFIG_VCPU_STALL_DETECTOR is not set # CONFIG_C2PORT is not set # # EEPROM support # # CONFIG_EEPROM_93CX6 is not set # end of EEPROM support # # Texas Instruments shared transport line discipline # # end of Texas Instruments shared transport line discipline # # Altera FPGA firmware download module (requires I2C) # # CONFIG_ECHO is not set # CONFIG_UACCE is not set # CONFIG_PVPANIC is not set # end of Misc devices # # SCSI device support # CONFIG_SCSI_MOD=y # CONFIG_RAID_ATTRS is not set CONFIG_SCSI_COMMON=y CONFIG_SCSI=y CONFIG_SCSI_DMA=y CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) # # CONFIG_BLK_DEV_SD is not set # CONFIG_CHR_DEV_ST is not set # CONFIG_BLK_DEV_SR is not set # CONFIG_CHR_DEV_SG is not set CONFIG_BLK_DEV_BSG=y # CONFIG_CHR_DEV_SCH is not set # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set # # SCSI Transports # # CONFIG_SCSI_SPI_ATTRS is not set # CONFIG_SCSI_FC_ATTRS is not set CONFIG_SCSI_ISCSI_ATTRS=y # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set # end of SCSI Transports CONFIG_SCSI_LOWLEVEL=y CONFIG_ISCSI_TCP=y # CONFIG_ISCSI_BOOT_SYSFS is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_VIRTIO is not set # CONFIG_SCSI_DH is not set # end of SCSI device support # CONFIG_ATA is not set # CONFIG_MD is not set # CONFIG_TARGET_CORE is not set CONFIG_NETDEVICES=y CONFIG_NET_CORE=y # CONFIG_BONDING is not set # CONFIG_DUMMY is not set # CONFIG_WIREGUARD is not set # CONFIG_EQUALIZER is not set # CONFIG_NET_TEAM is not set # CONFIG_MACVLAN is not set # CONFIG_IPVLAN is not set # CONFIG_VXLAN is not set # CONFIG_GENEVE is not set # CONFIG_BAREUDP is not set # CONFIG_GTP is not set # CONFIG_AMT is not set # CONFIG_MACSEC is not set # CONFIG_NETCONSOLE is not set # CONFIG_TUN is not set # CONFIG_TUN_VNET_CROSS_LE is not set CONFIG_VETH=y CONFIG_VIRTIO_NET=y # CONFIG_NLMON is not set # CONFIG_NET_VRF is not set # CONFIG_ETHERNET is not set # CONFIG_NET_SB1000 is not set # CONFIG_PHYLIB is not set # CONFIG_PSE_CONTROLLER is not set # CONFIG_MDIO_DEVICE is not set # # PCS device drivers # # end of PCS device drivers # CONFIG_PPP is not set # CONFIG_SLIP is not set # # Host-side USB support is needed for USB Network Adapter support # # CONFIG_WLAN is not set # CONFIG_WAN is not set # # Wireless WAN # # CONFIG_WWAN is not set # end of Wireless WAN # CONFIG_FUJITSU_ES is not set # CONFIG_NETDEVSIM is not set CONFIG_NET_FAILOVER=y # CONFIG_ISDN is not set # # Input device support # CONFIG_INPUT=y CONFIG_INPUT_FF_MEMLESS=y # CONFIG_INPUT_SPARSEKMAP is not set # CONFIG_INPUT_MATRIXKMAP is not set # # Userland interfaces # # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_JOYDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set # # Input Device Drivers # # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set # CONFIG_INPUT_MISC is not set # CONFIG_RMI4_CORE is not set # # Hardware I/O ports # # CONFIG_SERIO is not set # CONFIG_GAMEPORT is not set # end of Hardware I/O ports # end of Input device support # # Character devices # CONFIG_TTY=y CONFIG_VT=y CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y CONFIG_VT_CONSOLE_SLEEP=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set CONFIG_LDISC_AUTOLOAD=y # # Serial drivers # CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_PNP=y # CONFIG_SERIAL_8250_16550A_VARIANTS is not set # CONFIG_SERIAL_8250_FINTEK is not set CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DMA=y CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 # CONFIG_SERIAL_8250_EXTENDED is not set CONFIG_SERIAL_8250_FSL=y # CONFIG_SERIAL_8250_DW is not set # CONFIG_SERIAL_8250_RT288X is not set CONFIG_SERIAL_OF_PLATFORM=y # # Non-8250 serial port support # # CONFIG_SERIAL_AMBA_PL010 is not set # CONFIG_SERIAL_AMBA_PL011 is not set # CONFIG_SERIAL_EARLYCON_ARM_SEMIHOST is not set # CONFIG_SERIAL_UARTLITE is not set CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_SIFIVE is not set # CONFIG_SERIAL_SCCNXP is not set # CONFIG_SERIAL_ALTERA_JTAGUART is not set # CONFIG_SERIAL_ALTERA_UART is not set # CONFIG_SERIAL_XILINX_PS_UART is not set # CONFIG_SERIAL_ARC is not set # CONFIG_SERIAL_FSL_LPUART is not set # CONFIG_SERIAL_FSL_LINFLEXUART is not set # CONFIG_SERIAL_CONEXANT_DIGICOLOR is not set # CONFIG_SERIAL_SPRD is not set # end of Serial drivers # CONFIG_SERIAL_NONSTANDARD is not set # CONFIG_N_GSM is not set # CONFIG_NULL_TTY is not set CONFIG_HVC_DRIVER=y # CONFIG_HVC_DCC is not set # CONFIG_SERIAL_DEV_BUS is not set CONFIG_VIRTIO_CONSOLE=y # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_TIMERIOMEM is not set # CONFIG_HW_RANDOM_BA431 is not set CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_CCTRNG is not set # CONFIG_HW_RANDOM_XIPHERA is not set CONFIG_HW_RANDOM_ARM_SMCCC_TRNG=y # CONFIG_HW_RANDOM_GRAVITON is not set # CONFIG_DEVMEM is not set # CONFIG_TCG_TPM is not set # CONFIG_XILLYBUS is not set CONFIG_RANDOM_TRUST_CPU=y CONFIG_RANDOM_TRUST_BOOTLOADER=y # end of Character devices # # I2C support # # CONFIG_I2C is not set # end of I2C support # CONFIG_I3C is not set # CONFIG_SPI is not set # CONFIG_SPMI is not set # CONFIG_HSI is not set CONFIG_PPS=y # CONFIG_PPS_DEBUG is not set # # PPS clients support # # CONFIG_PPS_CLIENT_KTIMER is not set # CONFIG_PPS_CLIENT_LDISC is not set # CONFIG_PPS_CLIENT_GPIO is not set # # PPS generators support # # # PTP clock support # CONFIG_PTP_1588_CLOCK=y CONFIG_PTP_1588_CLOCK_OPTIONAL=y # # Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. # CONFIG_PTP_1588_CLOCK_KVM=y # CONFIG_PTP_1588_CLOCK_VMCLOCK is not set # end of PTP clock support # CONFIG_PINCTRL is not set # CONFIG_GPIOLIB is not set # CONFIG_W1 is not set CONFIG_POWER_RESET=y # CONFIG_POWER_RESET_RESTART is not set # CONFIG_POWER_RESET_XGENE is not set # CONFIG_POWER_RESET_SYSCON is not set # CONFIG_POWER_RESET_SYSCON_POWEROFF is not set # CONFIG_NVMEM_REBOOT_MODE is not set CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set # CONFIG_PDA_POWER is not set # CONFIG_TEST_POWER is not set # CONFIG_BATTERY_DS2780 is not set # CONFIG_BATTERY_DS2781 is not set # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_BQ27XXX is not set # CONFIG_CHARGER_MAX8903 is not set # CONFIG_BATTERY_GOLDFISH is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y # CONFIG_THERMAL_NETLINK is not set # CONFIG_THERMAL_STATISTICS is not set CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 CONFIG_THERMAL_OF=y # CONFIG_THERMAL_WRITABLE_TRIPS is not set CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y # CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set # CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set CONFIG_THERMAL_GOV_FAIR_SHARE=y CONFIG_THERMAL_GOV_STEP_WISE=y # CONFIG_THERMAL_GOV_BANG_BANG is not set CONFIG_THERMAL_GOV_USER_SPACE=y # CONFIG_CPU_THERMAL is not set # CONFIG_THERMAL_EMULATION is not set # CONFIG_THERMAL_MMIO is not set CONFIG_WATCHDOG=y CONFIG_WATCHDOG_CORE=y # CONFIG_WATCHDOG_NOWAYOUT is not set CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y CONFIG_WATCHDOG_OPEN_TIMEOUT=0 CONFIG_WATCHDOG_SYSFS=y # CONFIG_WATCHDOG_HRTIMER_PRETIMEOUT is not set # # Watchdog Pretimeout Governors # # CONFIG_WATCHDOG_PRETIMEOUT_GOV is not set # # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set # CONFIG_WDAT_WDT is not set # CONFIG_XILINX_WATCHDOG is not set # CONFIG_ARM_SP805_WATCHDOG is not set # CONFIG_ARM_SBSA_WATCHDOG is not set # CONFIG_CADENCE_WATCHDOG is not set # CONFIG_DW_WATCHDOG is not set # CONFIG_MAX63XX_WATCHDOG is not set # CONFIG_ARM_SMC_WATCHDOG is not set CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set CONFIG_BCMA_POSSIBLE=y # CONFIG_BCMA is not set # # Multifunction device drivers # # CONFIG_MFD_ATMEL_FLEXCOM is not set # CONFIG_MFD_ATMEL_HLCDC is not set # CONFIG_MFD_MADERA is not set # CONFIG_MFD_HI6421_PMIC is not set # CONFIG_HTC_PASIC3 is not set # CONFIG_MFD_KEMPLD is not set # CONFIG_MFD_MT6397 is not set # CONFIG_MFD_SM501 is not set # CONFIG_MFD_SYSCON is not set # CONFIG_MFD_TQMX86 is not set # end of Multifunction device drivers # CONFIG_REGULATOR is not set # CONFIG_RC_CORE is not set # # CEC support # # CONFIG_MEDIA_CEC_SUPPORT is not set # end of CEC support # CONFIG_MEDIA_SUPPORT is not set # # Graphics support # # CONFIG_DRM is not set # # ARM devices # # end of ARM devices # # Frame buffer Devices # # CONFIG_FB is not set # end of Frame buffer Devices # # Backlight & LCD device support # # CONFIG_LCD_CLASS_DEVICE is not set # CONFIG_BACKLIGHT_CLASS_DEVICE is not set # end of Backlight & LCD device support # # Console display driver support # CONFIG_DUMMY_CONSOLE=y CONFIG_DUMMY_CONSOLE_COLUMNS=80 CONFIG_DUMMY_CONSOLE_ROWS=25 # end of Console display driver support # end of Graphics support # CONFIG_SOUND is not set # # HID support # CONFIG_HID=y # CONFIG_HID_BATTERY_STRENGTH is not set CONFIG_HIDRAW=y # CONFIG_UHID is not set # CONFIG_HID_GENERIC is not set # # Special HID drivers # # CONFIG_HID_A4TECH is not set # CONFIG_HID_ACRUX is not set # CONFIG_HID_AUREAL is not set # CONFIG_HID_BELKIN is not set # CONFIG_HID_CHERRY is not set # CONFIG_HID_COUGAR is not set # CONFIG_HID_MACALLY is not set # CONFIG_HID_CMEDIA is not set # CONFIG_HID_CYPRESS is not set # CONFIG_HID_DRAGONRISE is not set # CONFIG_HID_EMS_FF is not set # CONFIG_HID_ELECOM is not set # CONFIG_HID_EZKEY is not set # CONFIG_HID_GEMBIRD is not set # CONFIG_HID_GFRM is not set # CONFIG_HID_GLORIOUS is not set # CONFIG_HID_VIVALDI is not set # CONFIG_HID_KEYTOUCH is not set # CONFIG_HID_KYE is not set # CONFIG_HID_WALTOP is not set # CONFIG_HID_VIEWSONIC is not set # CONFIG_HID_VRC2 is not set # CONFIG_HID_XIAOMI is not set # CONFIG_HID_GYRATION is not set # CONFIG_HID_ICADE is not set # CONFIG_HID_ITE is not set # CONFIG_HID_JABRA is not set # CONFIG_HID_TWINHAN is not set # CONFIG_HID_KENSINGTON is not set # CONFIG_HID_LCPOWER is not set # CONFIG_HID_LENOVO is not set # CONFIG_HID_MAGICMOUSE is not set # CONFIG_HID_MALTRON is not set # CONFIG_HID_MAYFLASH is not set # CONFIG_HID_REDRAGON is not set # CONFIG_HID_MICROSOFT is not set # CONFIG_HID_MONTEREY is not set # CONFIG_HID_MULTITOUCH is not set # CONFIG_HID_NTI is not set # CONFIG_HID_ORTEK is not set # CONFIG_HID_PANTHERLORD is not set # CONFIG_HID_PETALYNX is not set # CONFIG_HID_PICOLCD is not set # CONFIG_HID_PLANTRONICS is not set # CONFIG_HID_PXRC is not set # CONFIG_HID_RAZER is not set # CONFIG_HID_PRIMAX is not set # CONFIG_HID_SAITEK is not set # CONFIG_HID_SEMITEK is not set # CONFIG_HID_SPEEDLINK is not set # CONFIG_HID_STEAM is not set # CONFIG_HID_STEELSERIES is not set # CONFIG_HID_SUNPLUS is not set # CONFIG_HID_RMI is not set # CONFIG_HID_GREENASIA is not set # CONFIG_HID_SMARTJOYPLUS is not set # CONFIG_HID_TIVO is not set # CONFIG_HID_TOPSEED is not set # CONFIG_HID_TOPRE is not set # CONFIG_HID_UDRAW_PS3 is not set # CONFIG_HID_XINMO is not set # CONFIG_HID_ZEROPLUS is not set # CONFIG_HID_ZYDACRON is not set # CONFIG_HID_SENSOR_HUB is not set # CONFIG_HID_ALPS is not set # end of Special HID drivers # end of HID support CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_SUPPORT=y # CONFIG_USB_ULPI_BUS is not set CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB is not set # # USB port drivers # # # USB Physical Layer drivers # # CONFIG_NOP_USB_XCEIV is not set # CONFIG_USB_ULPI is not set # end of USB Physical Layer drivers # CONFIG_USB_GADGET is not set # CONFIG_TYPEC is not set # CONFIG_USB_ROLE_SWITCH is not set # CONFIG_MMC is not set # CONFIG_SCSI_UFSHCD is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set CONFIG_EDAC_SUPPORT=y # CONFIG_EDAC is not set CONFIG_RTC_LIB=y CONFIG_RTC_CLASS=y CONFIG_RTC_HCTOSYS=y CONFIG_RTC_HCTOSYS_DEVICE="rtc0" CONFIG_RTC_SYSTOHC=y CONFIG_RTC_SYSTOHC_DEVICE="rtc0" # CONFIG_RTC_DEBUG is not set CONFIG_RTC_NVMEM=y # # RTC interfaces # CONFIG_RTC_INTF_SYSFS=y CONFIG_RTC_INTF_PROC=y CONFIG_RTC_INTF_DEV=y # CONFIG_RTC_INTF_DEV_UIE_EMUL is not set # CONFIG_RTC_DRV_TEST is not set # # I2C RTC drivers # # # SPI RTC drivers # # # SPI and I2C RTC drivers # # # Platform RTC drivers # # CONFIG_RTC_DRV_DS1286 is not set # CONFIG_RTC_DRV_DS1511 is not set # CONFIG_RTC_DRV_DS1553 is not set # CONFIG_RTC_DRV_DS1685_FAMILY is not set # CONFIG_RTC_DRV_DS1742 is not set # CONFIG_RTC_DRV_DS2404 is not set # CONFIG_RTC_DRV_EFI is not set # CONFIG_RTC_DRV_STK17TA8 is not set # CONFIG_RTC_DRV_M48T86 is not set # CONFIG_RTC_DRV_M48T35 is not set # CONFIG_RTC_DRV_M48T59 is not set # CONFIG_RTC_DRV_MSM6242 is not set # CONFIG_RTC_DRV_BQ4802 is not set # CONFIG_RTC_DRV_RP5C01 is not set # CONFIG_RTC_DRV_V3020 is not set # CONFIG_RTC_DRV_ZYNQMP is not set # # on-CPU RTC drivers # # CONFIG_RTC_DRV_PL030 is not set CONFIG_RTC_DRV_PL031=y # CONFIG_RTC_DRV_CADENCE is not set # CONFIG_RTC_DRV_FTRTC010 is not set # CONFIG_RTC_DRV_R7301 is not set # # HID Sensor RTC drivers # # CONFIG_RTC_DRV_GOLDFISH is not set CONFIG_DMADEVICES=y # CONFIG_DMADEVICES_DEBUG is not set # # DMA Devices # CONFIG_DMA_ENGINE=y CONFIG_DMA_ACPI=y CONFIG_DMA_OF=y # CONFIG_ALTERA_MSGDMA is not set # CONFIG_AMBA_PL08X is not set # CONFIG_DW_AXI_DMAC is not set # CONFIG_FSL_EDMA is not set # CONFIG_FSL_QDMA is not set # CONFIG_INTEL_IDMA64 is not set # CONFIG_MV_XOR_V2 is not set # CONFIG_PL330_DMA is not set # CONFIG_XILINX_DMA is not set # CONFIG_XILINX_ZYNQMP_DMA is not set # CONFIG_XILINX_ZYNQMP_DPDMA is not set # CONFIG_QCOM_HIDMA_MGMT is not set # CONFIG_QCOM_HIDMA is not set # CONFIG_DW_DMAC is not set # CONFIG_SF_PDMA is not set # # DMA Clients # # CONFIG_ASYNC_TX_DMA is not set # CONFIG_DMATEST is not set # # DMABUF options # CONFIG_SYNC_FILE=y # CONFIG_SW_SYNC is not set # CONFIG_UDMABUF is not set # CONFIG_DMABUF_MOVE_NOTIFY is not set # CONFIG_DMABUF_DEBUG is not set # CONFIG_DMABUF_SELFTESTS is not set # CONFIG_DMABUF_HEAPS is not set # CONFIG_DMABUF_SYSFS_STATS is not set # end of DMABUF options CONFIG_AUXDISPLAY=y # CONFIG_IMG_ASCII_LCD is not set CONFIG_CHARLCD_BL_OFF=y # CONFIG_CHARLCD_BL_ON is not set # CONFIG_CHARLCD_BL_FLASH is not set # CONFIG_UIO is not set # CONFIG_VFIO is not set CONFIG_VIRT_DRIVERS=y CONFIG_VMGENID=y CONFIG_VIRTIO_ANCHOR=y CONFIG_VIRTIO=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_BALLOON=y # CONFIG_VIRTIO_MEM is not set # CONFIG_VIRTIO_INPUT is not set CONFIG_VIRTIO_MMIO=y # CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set # CONFIG_VDPA is not set CONFIG_VHOST_MENU=y # CONFIG_VHOST_NET is not set # CONFIG_VHOST_VSOCK is not set # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set # # Microsoft Hyper-V guest support # # CONFIG_HYPERV is not set # end of Microsoft Hyper-V guest support # CONFIG_GREYBUS is not set # CONFIG_COMEDI is not set CONFIG_STAGING=y # CONFIG_STAGING_MEDIA is not set # CONFIG_STAGING_BOARD is not set # CONFIG_XIL_AXIS_FIFO is not set # CONFIG_FIELDBUS_DEV is not set # CONFIG_GOLDFISH is not set # CONFIG_CHROME_PLATFORMS is not set # CONFIG_MELLANOX_PLATFORM is not set CONFIG_SURFACE_PLATFORMS=y # CONFIG_SURFACE_GPE is not set # CONFIG_SURFACE_PRO3_BUTTON is not set CONFIG_HAVE_CLK=y CONFIG_HAVE_CLK_PREPARE=y CONFIG_COMMON_CLK=y # # Clock driver for ARM Reference designs # # CONFIG_CLK_ICST is not set # CONFIG_CLK_SP810 is not set # end of Clock driver for ARM Reference designs # CONFIG_COMMON_CLK_AXI_CLKGEN is not set # CONFIG_COMMON_CLK_XGENE is not set # CONFIG_COMMON_CLK_FIXED_MMIO is not set # CONFIG_XILINX_VCU is not set # CONFIG_COMMON_CLK_XLNX_CLKWZRD is not set # CONFIG_HWSPINLOCK is not set # # Clock Source drivers # CONFIG_TIMER_OF=y CONFIG_TIMER_ACPI=y CONFIG_TIMER_PROBE=y CONFIG_ARM_ARCH_TIMER=y CONFIG_ARM_ARCH_TIMER_EVTSTREAM=y CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND=y CONFIG_FSL_ERRATUM_A008585=y CONFIG_HISILICON_ERRATUM_161010101=y CONFIG_ARM64_ERRATUM_858921=y # CONFIG_MICROCHIP_PIT64B is not set # end of Clock Source drivers CONFIG_MAILBOX=y # CONFIG_ARM_MHU is not set # CONFIG_ARM_MHU_V2 is not set # CONFIG_PLATFORM_MHU is not set # CONFIG_PL320_MBOX is not set # CONFIG_PCC is not set # CONFIG_ALTERA_MBOX is not set # CONFIG_MAILBOX_TEST is not set CONFIG_IOMMU_IOVA=y CONFIG_IOMMU_API=y CONFIG_IOMMU_SUPPORT=y # # Generic IOMMU Pagetable Support # # CONFIG_IOMMU_IO_PGTABLE_LPAE is not set # CONFIG_IOMMU_IO_PGTABLE_ARMV7S is not set # CONFIG_IOMMU_IO_PGTABLE_DART is not set # end of Generic IOMMU Pagetable Support # CONFIG_IOMMU_DEBUGFS is not set CONFIG_IOMMU_DEFAULT_DMA_STRICT=y # CONFIG_IOMMU_DEFAULT_DMA_LAZY is not set # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set CONFIG_OF_IOMMU=y CONFIG_IOMMU_DMA=y # CONFIG_ARM_SMMU is not set # CONFIG_ARM_SMMU_V3 is not set # CONFIG_VIRTIO_IOMMU is not set # # Remoteproc drivers # # CONFIG_REMOTEPROC is not set # end of Remoteproc drivers # # Rpmsg drivers # # CONFIG_RPMSG_QCOM_GLINK_RPM is not set # CONFIG_RPMSG_VIRTIO is not set # end of Rpmsg drivers # CONFIG_SOUNDWIRE is not set # # SOC (System On Chip) specific Drivers # # # Amlogic SoC drivers # # end of Amlogic SoC drivers # # Broadcom SoC drivers # # CONFIG_SOC_BRCMSTB is not set # end of Broadcom SoC drivers # # NXP/Freescale QorIQ SoC drivers # # CONFIG_QUICC_ENGINE is not set # CONFIG_FSL_RCPM is not set # end of NXP/Freescale QorIQ SoC drivers # # fujitsu SoC drivers # # CONFIG_A64FX_DIAG is not set # end of fujitsu SoC drivers # # i.MX SoC drivers # # end of i.MX SoC drivers # # Enable LiteX SoC Builder specific drivers # # CONFIG_LITEX_SOC_CONTROLLER is not set # end of Enable LiteX SoC Builder specific drivers # # Qualcomm SoC drivers # # end of Qualcomm SoC drivers # CONFIG_SOC_TI is not set # # Xilinx SoC drivers # # end of Xilinx SoC drivers # end of SOC (System On Chip) specific Drivers # CONFIG_PM_DEVFREQ is not set # CONFIG_EXTCON is not set # CONFIG_MEMORY is not set # CONFIG_IIO is not set # CONFIG_PWM is not set # # IRQ chip support # CONFIG_IRQCHIP=y CONFIG_ARM_GIC=y CONFIG_ARM_GIC_MAX_NR=1 CONFIG_ARM_GIC_V3=y CONFIG_ARM_GIC_V3_ITS=y # CONFIG_AL_FIC is not set # CONFIG_XILINX_INTC is not set CONFIG_PARTITION_PERCPU=y # end of IRQ chip support # CONFIG_IPACK_BUS is not set # CONFIG_RESET_CONTROLLER is not set # # PHY Subsystem # # CONFIG_GENERIC_PHY is not set # CONFIG_PHY_XGENE is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # # PHY drivers for Broadcom platforms # # CONFIG_BCM_KONA_USB2_PHY is not set # end of PHY drivers for Broadcom platforms # CONFIG_PHY_CADENCE_TORRENT is not set # CONFIG_PHY_CADENCE_DPHY is not set # CONFIG_PHY_CADENCE_DPHY_RX is not set # CONFIG_PHY_CADENCE_SALVO is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set # end of PHY Subsystem # CONFIG_POWERCAP is not set # CONFIG_MCB is not set # # Performance monitor support # # CONFIG_ARM_CCI_PMU is not set # CONFIG_ARM_CCN is not set # CONFIG_ARM_CMN is not set CONFIG_ARM_PMU=y CONFIG_ARM_PMU_ACPI=y # CONFIG_ARM_SMMU_V3_PMU is not set # CONFIG_ARM_DSU_PMU is not set # CONFIG_ARM_SPE_PMU is not set # CONFIG_ARM_DMC620_PMU is not set # CONFIG_ALIBABA_UNCORE_DRW_PMU is not set # CONFIG_HISI_PMU is not set # end of Performance monitor support CONFIG_RAS=y # # Android # # CONFIG_ANDROID_BINDER_IPC is not set # end of Android # CONFIG_LIBNVDIMM is not set # CONFIG_DAX is not set CONFIG_NVMEM=y CONFIG_NVMEM_SYSFS=y # CONFIG_NVMEM_RMEM is not set # # HW tracing support # # CONFIG_STM is not set # CONFIG_INTEL_TH is not set # end of HW tracing support # CONFIG_FPGA is not set # CONFIG_FSI is not set # CONFIG_TEE is not set # CONFIG_SIOX is not set # CONFIG_SLIMBUS is not set # CONFIG_INTERCONNECT is not set # CONFIG_COUNTER is not set # CONFIG_PECI is not set # CONFIG_HTE is not set # end of Device Drivers # # File systems # CONFIG_DCACHE_WORD_ACCESS=y CONFIG_VALIDATE_FS_PARSER=y CONFIG_FS_IOMAP=y # CONFIG_EXT2_FS is not set # CONFIG_EXT3_FS is not set CONFIG_EXT4_FS=y CONFIG_EXT4_USE_FOR_EXT2=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y # CONFIG_EXT4_DEBUG is not set CONFIG_JBD2=y # CONFIG_JBD2_DEBUG is not set CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_XFS_FS=y CONFIG_XFS_SUPPORT_V4=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y # CONFIG_XFS_RT is not set # CONFIG_XFS_ONLINE_SCRUB is not set # CONFIG_XFS_WARN is not set # CONFIG_XFS_DEBUG is not set # CONFIG_GFS2_FS is not set # CONFIG_BTRFS_FS is not set # CONFIG_NILFS2_FS is not set # CONFIG_F2FS_FS is not set CONFIG_FS_POSIX_ACL=y CONFIG_EXPORTFS=y # CONFIG_EXPORTFS_BLOCK_OPS is not set CONFIG_FILE_LOCKING=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_ENCRYPTION_ALGS=y # CONFIG_FS_VERITY is not set CONFIG_FSNOTIFY=y CONFIG_DNOTIFY=y CONFIG_INOTIFY_USER=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set # CONFIG_QUOTA_DEBUG is not set # CONFIG_QFMT_V1 is not set # CONFIG_QFMT_V2 is not set CONFIG_QUOTACTL=y CONFIG_AUTOFS4_FS=y CONFIG_AUTOFS_FS=y # CONFIG_FUSE_FS is not set CONFIG_OVERLAY_FS=y # CONFIG_OVERLAY_FS_REDIRECT_DIR is not set CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW=y # CONFIG_OVERLAY_FS_INDEX is not set # CONFIG_OVERLAY_FS_XINO_AUTO is not set # CONFIG_OVERLAY_FS_METACOPY is not set # # Caches # # CONFIG_FSCACHE is not set # end of Caches # # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set # CONFIG_UDF_FS is not set # end of CD-ROM/DVD Filesystems # # DOS/FAT/EXFAT/NT Filesystems # # CONFIG_MSDOS_FS is not set # CONFIG_VFAT_FS is not set # CONFIG_EXFAT_FS is not set # CONFIG_NTFS_FS is not set # CONFIG_NTFS3_FS is not set # end of DOS/FAT/EXFAT/NT Filesystems # # Pseudo filesystems # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_SYSCTL=y CONFIG_PROC_PAGE_MONITOR=y CONFIG_PROC_CHILDREN=y CONFIG_KERNFS=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set CONFIG_ARCH_SUPPORTS_HUGETLBFS=y CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y # CONFIG_CONFIGFS_FS is not set # CONFIG_EFIVAR_FS is not set # end of Pseudo filesystems CONFIG_MISC_FILESYSTEMS=y # CONFIG_ORANGEFS_FS is not set # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set CONFIG_SQUASHFS=y CONFIG_SQUASHFS_FILE_CACHE=y # CONFIG_SQUASHFS_FILE_DIRECT is not set CONFIG_SQUASHFS_DECOMP_SINGLE=y # CONFIG_SQUASHFS_DECOMP_MULTI is not set # CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_ZLIB=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y # CONFIG_SQUASHFS_ZSTD is not set # CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set # CONFIG_SQUASHFS_EMBEDDED is not set CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_PSTORE=y CONFIG_PSTORE_DEFAULT_KMSG_BYTES=10240 CONFIG_PSTORE_DEFLATE_COMPRESS=y # CONFIG_PSTORE_LZO_COMPRESS is not set # CONFIG_PSTORE_LZ4_COMPRESS is not set # CONFIG_PSTORE_LZ4HC_COMPRESS is not set # CONFIG_PSTORE_842_COMPRESS is not set # CONFIG_PSTORE_ZSTD_COMPRESS is not set CONFIG_PSTORE_COMPRESS=y CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT=y CONFIG_PSTORE_COMPRESS_DEFAULT="deflate" # CONFIG_PSTORE_CONSOLE is not set # CONFIG_PSTORE_PMSG is not set # CONFIG_PSTORE_RAM is not set # CONFIG_PSTORE_BLK is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set # CONFIG_EROFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V2 is not set # CONFIG_NFS_V3 is not set CONFIG_NFS_V4=y # CONFIG_NFS_SWAP is not set CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_PNFS_FILE_LAYOUT=y CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org" # CONFIG_NFS_V4_1_MIGRATION is not set CONFIG_NFS_V4_SECURITY_LABEL=y # CONFIG_ROOT_NFS is not set # CONFIG_NFS_USE_LEGACY_DNS is not set CONFIG_NFS_USE_KERNEL_DNS=y CONFIG_NFS_DISABLE_UDP_SUPPORT=y # CONFIG_NFS_V4_2_READ_PLUS is not set # CONFIG_NFSD is not set CONFIG_GRACE_PERIOD=y CONFIG_LOCKD=y CONFIG_NFS_COMMON=y CONFIG_NFS_V4_2_SSC_HELPER=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y CONFIG_SUNRPC_BACKCHANNEL=y # CONFIG_SUNRPC_DEBUG is not set # CONFIG_CEPH_FS is not set # CONFIG_CIFS is not set # CONFIG_SMB_SERVER is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set CONFIG_NLS=y CONFIG_NLS_DEFAULT="utf8" # CONFIG_NLS_CODEPAGE_437 is not set # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set # CONFIG_NLS_CODEPAGE_850 is not set # CONFIG_NLS_CODEPAGE_852 is not set # CONFIG_NLS_CODEPAGE_855 is not set # CONFIG_NLS_CODEPAGE_857 is not set # CONFIG_NLS_CODEPAGE_860 is not set # CONFIG_NLS_CODEPAGE_861 is not set # CONFIG_NLS_CODEPAGE_862 is not set # CONFIG_NLS_CODEPAGE_863 is not set # CONFIG_NLS_CODEPAGE_864 is not set # CONFIG_NLS_CODEPAGE_865 is not set # CONFIG_NLS_CODEPAGE_866 is not set # CONFIG_NLS_CODEPAGE_869 is not set # CONFIG_NLS_CODEPAGE_936 is not set # CONFIG_NLS_CODEPAGE_950 is not set # CONFIG_NLS_CODEPAGE_932 is not set # CONFIG_NLS_CODEPAGE_949 is not set # CONFIG_NLS_CODEPAGE_874 is not set # CONFIG_NLS_ISO8859_8 is not set # CONFIG_NLS_CODEPAGE_1250 is not set # CONFIG_NLS_CODEPAGE_1251 is not set # CONFIG_NLS_ASCII is not set # CONFIG_NLS_ISO8859_1 is not set # CONFIG_NLS_ISO8859_2 is not set # CONFIG_NLS_ISO8859_3 is not set # CONFIG_NLS_ISO8859_4 is not set # CONFIG_NLS_ISO8859_5 is not set # CONFIG_NLS_ISO8859_6 is not set # CONFIG_NLS_ISO8859_7 is not set # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set # CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_MAC_ROMAN is not set # CONFIG_NLS_MAC_CELTIC is not set # CONFIG_NLS_MAC_CENTEURO is not set # CONFIG_NLS_MAC_CROATIAN is not set # CONFIG_NLS_MAC_CYRILLIC is not set # CONFIG_NLS_MAC_GAELIC is not set # CONFIG_NLS_MAC_GREEK is not set # CONFIG_NLS_MAC_ICELAND is not set # CONFIG_NLS_MAC_INUIT is not set # CONFIG_NLS_MAC_ROMANIAN is not set # CONFIG_NLS_MAC_TURKISH is not set # CONFIG_NLS_UTF8 is not set # CONFIG_UNICODE is not set CONFIG_IO_WQ=y # end of File systems # # Security options # CONFIG_KEYS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_PERSISTENT_KEYRINGS=y # CONFIG_TRUSTED_KEYS is not set CONFIG_ENCRYPTED_KEYS=y # CONFIG_USER_DECRYPTED_DATA is not set # CONFIG_KEY_DH_OPERATIONS is not set # CONFIG_SECURITY_DMESG_RESTRICT is not set CONFIG_SECURITY=y CONFIG_SECURITY_WRITABLE_HOOKS=y CONFIG_SECURITYFS=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_NETWORK_XFRM=y # CONFIG_SECURITY_PATH is not set CONFIG_LSM_MMAP_MIN_ADDR=65536 CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y CONFIG_HARDENED_USERCOPY=y # CONFIG_FORTIFY_SOURCE is not set # CONFIG_STATIC_USERMODEHELPER is not set CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_SELINUX_DEVELOP=y CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9 CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256 # CONFIG_SECURITY_SMACK is not set # CONFIG_SECURITY_TOMOYO is not set # CONFIG_SECURITY_APPARMOR is not set # CONFIG_SECURITY_LOADPIN is not set # CONFIG_SECURITY_YAMA is not set # CONFIG_SECURITY_SAFESETID is not set # CONFIG_SECURITY_LOCKDOWN_LSM is not set # CONFIG_SECURITY_LANDLOCK is not set # CONFIG_INTEGRITY is not set # CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFAULT_SECURITY_DAC is not set CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor,bpf" # # Kernel hardening options # # # Memory initialization # CONFIG_INIT_STACK_NONE=y # CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set # CONFIG_INIT_ON_FREE_DEFAULT_ON is not set CONFIG_CC_HAS_ZERO_CALL_USED_REGS=y # CONFIG_ZERO_CALL_USED_REGS is not set # end of Memory initialization CONFIG_RANDSTRUCT_NONE=y # end of Kernel hardening options # end of Security options CONFIG_CRYPTO=y # # Crypto core or helper # CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_SKCIPHER=y CONFIG_CRYPTO_SKCIPHER2=y CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_RNG=y CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_RNG_DEFAULT=y CONFIG_CRYPTO_AKCIPHER2=y CONFIG_CRYPTO_AKCIPHER=y CONFIG_CRYPTO_KPP2=y CONFIG_CRYPTO_ACOMP2=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_USER is not set CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_NULL2=y # CONFIG_CRYPTO_PCRYPT is not set # CONFIG_CRYPTO_CRYPTD is not set # CONFIG_CRYPTO_AUTHENC is not set # end of Crypto core or helper # # Public-key cryptography # CONFIG_CRYPTO_RSA=y # CONFIG_CRYPTO_DH is not set # CONFIG_CRYPTO_ECDH is not set # CONFIG_CRYPTO_ECDSA is not set # CONFIG_CRYPTO_ECRDSA is not set # CONFIG_CRYPTO_SM2 is not set # CONFIG_CRYPTO_CURVE25519 is not set # end of Public-key cryptography # # Block ciphers # CONFIG_CRYPTO_AES=y # CONFIG_CRYPTO_AES_TI is not set # CONFIG_CRYPTO_ARIA is not set # CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_CAST5 is not set # CONFIG_CRYPTO_CAST6 is not set # CONFIG_CRYPTO_DES is not set # CONFIG_CRYPTO_FCRYPT is not set # CONFIG_CRYPTO_SERPENT is not set # CONFIG_CRYPTO_SM4_GENERIC is not set # CONFIG_CRYPTO_TWOFISH is not set # end of Block ciphers # # Length-preserving ciphers and modes # # CONFIG_CRYPTO_ADIANTUM is not set # CONFIG_CRYPTO_CHACHA20 is not set CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_CFB is not set CONFIG_CRYPTO_CTR=y CONFIG_CRYPTO_CTS=y CONFIG_CRYPTO_ECB=y # CONFIG_CRYPTO_HCTR2 is not set # CONFIG_CRYPTO_KEYWRAP is not set # CONFIG_CRYPTO_LRW is not set # CONFIG_CRYPTO_OFB is not set # CONFIG_CRYPTO_PCBC is not set CONFIG_CRYPTO_XTS=y # end of Length-preserving ciphers and modes # # AEAD (authenticated encryption with associated data) ciphers # # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_CHACHA20POLY1305 is not set # CONFIG_CRYPTO_CCM is not set # CONFIG_CRYPTO_GCM is not set CONFIG_CRYPTO_SEQIV=y # CONFIG_CRYPTO_ECHAINIV is not set # CONFIG_CRYPTO_ESSIV is not set # end of AEAD (authenticated encryption with associated data) ciphers # # Hashes, digests, and MACs # # CONFIG_CRYPTO_BLAKE2B is not set # CONFIG_CRYPTO_CMAC is not set # CONFIG_CRYPTO_GHASH is not set CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set # CONFIG_CRYPTO_POLY1305 is not set # CONFIG_CRYPTO_RMD160 is not set CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y CONFIG_CRYPTO_SHA3=y # CONFIG_CRYPTO_SM3_GENERIC is not set # CONFIG_CRYPTO_STREEBOG is not set # CONFIG_CRYPTO_VMAC is not set # CONFIG_CRYPTO_WP512 is not set # CONFIG_CRYPTO_XCBC is not set # CONFIG_CRYPTO_XXHASH is not set # end of Hashes, digests, and MACs # # CRCs (cyclic redundancy checks) # CONFIG_CRYPTO_CRC32C=y # CONFIG_CRYPTO_CRC32 is not set CONFIG_CRYPTO_CRCT10DIF=y # end of CRCs (cyclic redundancy checks) # # Compression # CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_842 is not set # CONFIG_CRYPTO_LZ4 is not set # CONFIG_CRYPTO_LZ4HC is not set # CONFIG_CRYPTO_ZSTD is not set # end of Compression # # Random number generation # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DRBG_MENU=y CONFIG_CRYPTO_DRBG_HMAC=y CONFIG_CRYPTO_DRBG_HASH=y CONFIG_CRYPTO_DRBG_CTR=y CONFIG_CRYPTO_DRBG=y CONFIG_CRYPTO_JITTERENTROPY=y # end of Random number generation # # Userspace interface # # CONFIG_CRYPTO_USER_API_HASH is not set # CONFIG_CRYPTO_USER_API_SKCIPHER is not set # CONFIG_CRYPTO_USER_API_RNG is not set # CONFIG_CRYPTO_USER_API_AEAD is not set # end of Userspace interface CONFIG_CRYPTO_HASH_INFO=y # CONFIG_CRYPTO_NHPOLY1305_NEON is not set # CONFIG_CRYPTO_CHACHA20_NEON is not set # # Accelerated Cryptographic Algorithms for CPU (arm64) # # CONFIG_CRYPTO_GHASH_ARM64_CE is not set # CONFIG_CRYPTO_POLY1305_NEON is not set # CONFIG_CRYPTO_SHA1_ARM64_CE is not set # CONFIG_CRYPTO_SHA256_ARM64 is not set # CONFIG_CRYPTO_SHA2_ARM64_CE is not set # CONFIG_CRYPTO_SHA512_ARM64 is not set # CONFIG_CRYPTO_SHA512_ARM64_CE is not set # CONFIG_CRYPTO_SHA3_ARM64 is not set # CONFIG_CRYPTO_SM3_NEON is not set # CONFIG_CRYPTO_SM3_ARM64_CE is not set # CONFIG_CRYPTO_POLYVAL_ARM64_CE is not set # CONFIG_CRYPTO_AES_ARM64 is not set # CONFIG_CRYPTO_AES_ARM64_CE is not set # CONFIG_CRYPTO_AES_ARM64_CE_BLK is not set # CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set # CONFIG_CRYPTO_AES_ARM64_BS is not set # CONFIG_CRYPTO_SM4_ARM64_CE is not set # CONFIG_CRYPTO_SM4_ARM64_CE_BLK is not set # CONFIG_CRYPTO_SM4_ARM64_NEON_BLK is not set # CONFIG_CRYPTO_AES_ARM64_CE_CCM is not set # CONFIG_CRYPTO_CRCT10DIF_ARM64_CE is not set # end of Accelerated Cryptographic Algorithms for CPU (arm64) # CONFIG_CRYPTO_HW is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y # CONFIG_PKCS8_PRIVATE_KEY_PARSER is not set CONFIG_PKCS7_MESSAGE_PARSER=y # CONFIG_FIPS_SIGNATURE_SELFTEST is not set # # Certificates for signature checking # CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_SYSTEM_TRUSTED_KEYS="" # CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set # CONFIG_SECONDARY_TRUSTED_KEYRING is not set # CONFIG_SYSTEM_BLACKLIST_KEYRING is not set # end of Certificates for signature checking CONFIG_BINARY_PRINTF=y # # Library routines # # CONFIG_PACKING is not set CONFIG_BITREVERSE=y CONFIG_HAVE_ARCH_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y CONFIG_GENERIC_NET_UTILS=y # CONFIG_CORDIC is not set # CONFIG_PRIME_NUMBERS is not set CONFIG_RATIONAL=y CONFIG_GENERIC_PCI_IOMAP=y CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y CONFIG_ARCH_HAS_FAST_MULTIPLIER=y CONFIG_ARCH_USE_SYM_ANNOTATIONS=y # CONFIG_INDIRECT_PIO is not set # # Crypto library routines # CONFIG_CRYPTO_LIB_UTILS=y CONFIG_CRYPTO_LIB_AES=y CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y # CONFIG_CRYPTO_LIB_CHACHA is not set # CONFIG_CRYPTO_LIB_CURVE25519 is not set CONFIG_CRYPTO_LIB_POLY1305_RSIZE=9 # CONFIG_CRYPTO_LIB_POLY1305 is not set # CONFIG_CRYPTO_LIB_CHACHA20POLY1305 is not set CONFIG_CRYPTO_LIB_SHA1=y CONFIG_CRYPTO_LIB_SHA256=y # end of Crypto library routines CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_T10DIF=y # CONFIG_CRC64_ROCKSOFT is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC32_SELFTEST is not set CONFIG_CRC32_SLICEBY8=y # CONFIG_CRC32_SLICEBY4 is not set # CONFIG_CRC32_SARWATE is not set # CONFIG_CRC32_BIT is not set # CONFIG_CRC64 is not set # CONFIG_CRC4 is not set # CONFIG_CRC7 is not set CONFIG_LIBCRC32C=y # CONFIG_CRC8 is not set CONFIG_XXHASH=y CONFIG_AUDIT_GENERIC=y CONFIG_AUDIT_ARCH_COMPAT_GENERIC=y CONFIG_AUDIT_COMPAT_GENERIC=y # CONFIG_RANDOM32_SELFTEST is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_LZO_COMPRESS=y CONFIG_LZO_DECOMPRESS=y CONFIG_LZ4_DECOMPRESS=y CONFIG_ZSTD_COMMON=y CONFIG_ZSTD_DECOMPRESS=y CONFIG_XZ_DEC=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y # CONFIG_XZ_DEC_MICROLZMA is not set CONFIG_XZ_DEC_BCJ=y # CONFIG_XZ_DEC_TEST is not set CONFIG_DECOMPRESS_GZIP=y CONFIG_DECOMPRESS_BZIP2=y CONFIG_DECOMPRESS_LZMA=y CONFIG_DECOMPRESS_XZ=y CONFIG_DECOMPRESS_LZO=y CONFIG_DECOMPRESS_LZ4=y CONFIG_DECOMPRESS_ZSTD=y CONFIG_GENERIC_ALLOCATOR=y CONFIG_XARRAY_MULTI=y CONFIG_ASSOCIATIVE_ARRAY=y CONFIG_HAS_IOMEM=y CONFIG_HAS_DMA=y CONFIG_DMA_OPS=y # CONFIG_DMA_PAGE_TOUCHING is not set CONFIG_NEED_SG_DMA_LENGTH=y CONFIG_NEED_DMA_MAP_STATE=y CONFIG_ARCH_DMA_ADDR_T_64BIT=y CONFIG_DMA_DECLARE_COHERENT=y CONFIG_ARCH_HAS_SETUP_DMA_OPS=y CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS=y CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE=y CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU=y CONFIG_ARCH_HAS_DMA_PREP_COHERENT=y CONFIG_SWIOTLB=y # CONFIG_DMA_RESTRICTED_POOL is not set CONFIG_DMA_NONCOHERENT_MMAP=y CONFIG_DMA_COHERENT_POOL=y CONFIG_DMA_DIRECT_REMAP=y # CONFIG_DMA_API_DEBUG is not set # CONFIG_DMA_MAP_BENCHMARK is not set CONFIG_SGL_ALLOC=y # CONFIG_FORCE_NR_CPUS is not set CONFIG_CPU_RMAP=y CONFIG_DQL=y CONFIG_GLOB=y # CONFIG_GLOB_SELFTEST is not set CONFIG_NLATTR=y CONFIG_CLZ_TAB=y CONFIG_IRQ_POLL=y CONFIG_MPILIB=y CONFIG_LIBFDT=y CONFIG_OID_REGISTRY=y CONFIG_UCS2_STRING=y CONFIG_HAVE_GENERIC_VDSO=y CONFIG_GENERIC_GETTIMEOFDAY=y CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_FONT_SUPPORT=y CONFIG_FONT_8x16=y CONFIG_FONT_AUTOSELECT=y CONFIG_SG_POOL=y CONFIG_ARCH_STACKWALK=y CONFIG_STACKDEPOT=y CONFIG_SBITMAP=y # end of Library routines CONFIG_GENERIC_IOREMAP=y CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED=y # # Kernel hacking # # # printk and dmesg options # CONFIG_PRINTK_TIME=y # CONFIG_PRINTK_CALLER is not set # CONFIG_STACKTRACE_BUILD_ID is not set CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_DYNAMIC_DEBUG is not set # CONFIG_DYNAMIC_DEBUG_CORE is not set CONFIG_SYMBOLIC_ERRNAME=y CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MISC=y # # Compile-time checks and compiler options # CONFIG_AS_HAS_NON_CONST_LEB128=y CONFIG_DEBUG_INFO_NONE=y # CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set # CONFIG_DEBUG_INFO_DWARF4 is not set # CONFIG_DEBUG_INFO_DWARF5 is not set CONFIG_FRAME_WARN=2048 CONFIG_STRIP_ASM_SYMS=y # CONFIG_READABLE_ASM is not set # CONFIG_HEADERS_INSTALL is not set CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_ARCH_WANT_FRAME_POINTERS=y CONFIG_FRAME_POINTER=y # CONFIG_STACK_VALIDATION is not set # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # end of Compile-time checks and compiler options # # Generic Kernel Debugging Instruments # CONFIG_MAGIC_SYSRQ=y CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 CONFIG_MAGIC_SYSRQ_SERIAL=y CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE="" CONFIG_DEBUG_FS=y CONFIG_DEBUG_FS_ALLOW_ALL=y # CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set # CONFIG_DEBUG_FS_ALLOW_NONE is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y # CONFIG_UBSAN is not set CONFIG_HAVE_KCSAN_COMPILER=y # end of Generic Kernel Debugging Instruments # # Networking Debugging # # CONFIG_NET_DEV_REFCNT_TRACKER is not set # CONFIG_NET_NS_REFCNT_TRACKER is not set # CONFIG_DEBUG_NET is not set # end of Networking Debugging # # Memory Debugging # # CONFIG_PAGE_EXTENSION is not set # CONFIG_DEBUG_PAGEALLOC is not set CONFIG_SLUB_DEBUG=y # CONFIG_SLUB_DEBUG_ON is not set # CONFIG_PAGE_OWNER is not set # CONFIG_PAGE_TABLE_CHECK is not set # CONFIG_PAGE_POISONING is not set # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_ARCH_HAS_DEBUG_WX=y # CONFIG_DEBUG_WX is not set CONFIG_GENERIC_PTDUMP=y # CONFIG_PTDUMP_DEBUGFS is not set # CONFIG_DEBUG_OBJECTS is not set # CONFIG_SHRINKER_DEBUG is not set CONFIG_HAVE_DEBUG_KMEMLEAK=y # CONFIG_DEBUG_KMEMLEAK is not set # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_SCHED_STACK_END_CHECK is not set CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_VM_PGTABLE is not set CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y # CONFIG_DEBUG_VIRTUAL is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_PER_CPU_MAPS is not set CONFIG_HAVE_ARCH_KASAN=y CONFIG_HAVE_ARCH_KASAN_SW_TAGS=y CONFIG_HAVE_ARCH_KASAN_HW_TAGS=y CONFIG_HAVE_ARCH_KASAN_VMALLOC=y CONFIG_CC_HAS_KASAN_GENERIC=y CONFIG_CC_HAS_KASAN_SW_TAGS=y CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y # CONFIG_KASAN is not set CONFIG_HAVE_ARCH_KFENCE=y # CONFIG_KFENCE is not set # end of Memory Debugging # CONFIG_DEBUG_SHIRQ is not set # # Debug Oops, Lockups and Hangs # # CONFIG_PANIC_ON_OOPS is not set CONFIG_PANIC_ON_OOPS_VALUE=0 CONFIG_PANIC_TIMEOUT=0 CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_DETECT_HUNG_TASK=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 # CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set CONFIG_WQ_WATCHDOG=y # end of Debug Oops, Lockups and Hangs # # Scheduler Debugging # # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_INFO=y # CONFIG_SCHEDSTATS is not set # end of Scheduler Debugging # CONFIG_DEBUG_TIMEKEEPING is not set # # Lock Debugging (spinlocks, mutexes, etc...) # CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_PROVE_LOCKING is not set # CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_MUTEXES is not set # CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set # CONFIG_DEBUG_RWSEMS is not set # CONFIG_DEBUG_LOCK_ALLOC is not set # CONFIG_DEBUG_ATOMIC_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set # CONFIG_SCF_TORTURE_TEST is not set # CONFIG_CSD_LOCK_WAIT_DEBUG is not set # end of Lock Debugging (spinlocks, mutexes, etc...) # CONFIG_DEBUG_IRQFLAGS is not set CONFIG_STACKTRACE=y # CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set # CONFIG_DEBUG_KOBJECT is not set # # Debug kernel data structures # CONFIG_DEBUG_LIST=y # CONFIG_DEBUG_PLIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_DEBUG_NOTIFIERS is not set # CONFIG_BUG_ON_DATA_CORRUPTION is not set # CONFIG_DEBUG_MAPLE_TREE is not set # end of Debug kernel data structures # CONFIG_DEBUG_CREDENTIALS is not set # # RCU Debugging # # CONFIG_RCU_SCALE_TEST is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_REF_SCALE_TEST is not set CONFIG_RCU_CPU_STALL_TIMEOUT=59 CONFIG_RCU_EXP_CPU_STALL_TIMEOUT=0 # CONFIG_RCU_TRACE is not set # CONFIG_RCU_EQS_DEBUG is not set # end of RCU Debugging # CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set # CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set # CONFIG_LATENCYTOP is not set CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_SYSCALL_TRACEPOINTS=y CONFIG_HAVE_C_RECORDMCOUNT=y CONFIG_TRACING_SUPPORT=y # CONFIG_FTRACE is not set # CONFIG_SAMPLES is not set # CONFIG_STRICT_DEVMEM is not set # # arm64 Debugging # # CONFIG_PID_IN_CONTEXTIDR is not set CONFIG_UNWINDER_FRAME_POINTER=y # CONFIG_CORESIGHT is not set # end of arm64 Debugging # # Kernel Testing and Coverage # # CONFIG_KUNIT is not set # CONFIG_NOTIFIER_ERROR_INJECTION is not set # CONFIG_FAULT_INJECTION is not set CONFIG_ARCH_HAS_KCOV=y CONFIG_CC_HAS_SANCOV_TRACE_PC=y # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_ARCH_USE_MEMTEST=y # CONFIG_MEMTEST is not set # end of Kernel Testing and Coverage # # Rust hacking # # end of Rust hacking # end of Kernel hacking ================================================ FILE: resources/guest_configs/microvm-kernel-ci-x86_64-5.10-no-acpi.config ================================================ # # Automatically generated file; DO NOT EDIT. # Linux/x86 5.10.219 Kernel Configuration # CONFIG_CC_VERSION_TEXT="gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0" CONFIG_CC_IS_GCC=y CONFIG_GCC_VERSION=110400 CONFIG_LD_VERSION=238000000 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23800 CONFIG_LLD_VERSION=0 CONFIG_CC_CAN_LINK=y CONFIG_CC_CAN_LINK_STATIC=y CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y # # General setup # CONFIG_INIT_ENV_ARG_LIMIT=32 # CONFIG_COMPILE_TEST is not set CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_BUILD_SALT="" CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y CONFIG_HAVE_KERNEL_LZMA=y CONFIG_HAVE_KERNEL_XZ=y CONFIG_HAVE_KERNEL_LZO=y CONFIG_HAVE_KERNEL_LZ4=y CONFIG_HAVE_KERNEL_ZSTD=y CONFIG_KERNEL_GZIP=y # CONFIG_KERNEL_BZIP2 is not set # CONFIG_KERNEL_LZMA is not set # CONFIG_KERNEL_XZ is not set # CONFIG_KERNEL_LZO is not set # CONFIG_KERNEL_LZ4 is not set # CONFIG_KERNEL_ZSTD is not set CONFIG_DEFAULT_INIT="" CONFIG_DEFAULT_HOSTNAME="(none)" CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_WATCH_QUEUE is not set CONFIG_CROSS_MEMORY_ATTACH=y # CONFIG_USELIB is not set CONFIG_AUDIT=y CONFIG_HAVE_ARCH_AUDITSYSCALL=y CONFIG_AUDITSYSCALL=y # # IRQ subsystem # CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_IRQ_SHOW=y CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y CONFIG_GENERIC_PENDING_IRQ=y CONFIG_GENERIC_IRQ_MIGRATION=y CONFIG_HARDIRQS_SW_RESEND=y CONFIG_IRQ_DOMAIN=y CONFIG_IRQ_DOMAIN_HIERARCHY=y CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y CONFIG_GENERIC_IRQ_RESERVATION_MODE=y CONFIG_IRQ_FORCED_THREADING=y CONFIG_SPARSE_IRQ=y # CONFIG_GENERIC_IRQ_DEBUGFS is not set # end of IRQ subsystem CONFIG_CLOCKSOURCE_WATCHDOG=y CONFIG_ARCH_CLOCKSOURCE_INIT=y CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK=y CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y # # Timers subsystem # CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ_COMMON=y # CONFIG_HZ_PERIODIC is not set CONFIG_NO_HZ_IDLE=y # CONFIG_NO_HZ_FULL is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y # end of Timers subsystem CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # # CPU/Task time and stats accounting # CONFIG_TICK_CPU_ACCOUNTING=y # CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set # CONFIG_IRQ_TIME_ACCOUNTING is not set CONFIG_HAVE_SCHED_AVG_IRQ=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_PSI_DEFAULT_DISABLED=y # end of CPU/Task time and stats accounting CONFIG_CPU_ISOLATION=y # # RCU Subsystem # CONFIG_TREE_RCU=y # CONFIG_RCU_EXPERT is not set CONFIG_SRCU=y CONFIG_TREE_SRCU=y CONFIG_TASKS_RCU_GENERIC=y CONFIG_TASKS_TRACE_RCU=y CONFIG_RCU_STALL_COMMON=y CONFIG_RCU_NEED_SEGCBLIST=y # end of RCU Subsystem CONFIG_BUILD_BIN2C=y # CONFIG_IKCONFIG is not set # CONFIG_IKHEADERS is not set CONFIG_LOG_BUF_SHIFT=17 CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y # # Scheduler features # # CONFIG_UCLAMP_TASK is not set # end of Scheduler features CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y CONFIG_CC_HAS_INT128=y CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y # CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set CONFIG_CGROUPS=y CONFIG_PAGE_COUNTER=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y # CONFIG_CGROUP_RDMA is not set CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y # CONFIG_CGROUP_DEBUG is not set CONFIG_SOCK_CGROUP_DATA=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_TIME_NS=y CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y # CONFIG_CHECKPOINT_RESTORE is not set CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y CONFIG_RD_XZ=y CONFIG_RD_LZO=y CONFIG_RD_LZ4=y CONFIG_RD_ZSTD=y # CONFIG_BOOT_CONFIG is not set CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_LD_ORPHAN_WARN=y CONFIG_SYSCTL=y CONFIG_HAVE_UID16=y CONFIG_SYSCTL_EXCEPTION_TRACE=y CONFIG_HAVE_PCSPKR_PLATFORM=y CONFIG_BPF=y # CONFIG_EXPERT is not set CONFIG_UID16=y CONFIG_MULTIUSER=y CONFIG_SGETMASK_SYSCALL=y CONFIG_SYSFS_SYSCALL=y CONFIG_FHANDLE=y CONFIG_POSIX_TIMERS=y CONFIG_PRINTK=y CONFIG_PRINTK_NMI=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_PCSPKR_PLATFORM=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_FUTEX_PI=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_AIO=y CONFIG_IO_URING=y CONFIG_ADVISE_SYSCALLS=y CONFIG_HAVE_ARCH_USERFAULTFD_WP=y CONFIG_MEMBARRIER=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y CONFIG_KALLSYMS_BASE_RELATIVE=y CONFIG_BPF_SYSCALL=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_BPF_UNPRIV_DEFAULT_OFF=y CONFIG_USERMODE_DRIVER=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y CONFIG_USERFAULTFD=y CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y CONFIG_RSEQ=y # CONFIG_EMBEDDED is not set CONFIG_HAVE_PERF_EVENTS=y # # Kernel Performance Events And Counters # CONFIG_PERF_EVENTS=y # CONFIG_DEBUG_PERF_USE_VMALLOC is not set # end of Kernel Performance Events And Counters CONFIG_VM_EVENT_COUNTERS=y CONFIG_SLUB_DEBUG=y # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB is not set CONFIG_SLUB=y CONFIG_SLAB_MERGE_DEFAULT=y CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_SHUFFLE_PAGE_ALLOCATOR=y CONFIG_SLUB_CPU_PARTIAL=y CONFIG_PROFILING=y # end of General setup CONFIG_64BIT=y CONFIG_X86_64=y CONFIG_X86=y CONFIG_INSTRUCTION_DECODER=y CONFIG_OUTPUT_FORMAT="elf64-x86-64" CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_MMU=y CONFIG_ARCH_MMAP_RND_BITS_MIN=28 CONFIG_ARCH_MMAP_RND_BITS_MAX=32 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_ARCH_HAS_CPU_RELAX=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_ARCH_HAS_FILTER_PGPROT=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_ARCH_WANT_GENERAL_HUGETLB=y CONFIG_ZONE_DMA32=y CONFIG_AUDIT_ARCH=y CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_X86_64_SMP=y CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_PGTABLE_LEVELS=4 CONFIG_CC_HAS_SANE_STACKPROTECTOR=y # # Processor type and features # CONFIG_ZONE_DMA=y CONFIG_SMP=y CONFIG_X86_FEATURE_NAMES=y CONFIG_X86_X2APIC=y CONFIG_X86_MPPARSE=y # CONFIG_GOLDFISH is not set # CONFIG_X86_CPU_RESCTRL is not set # CONFIG_X86_EXTENDED_PLATFORM is not set CONFIG_SCHED_OMIT_FRAME_POINTER=y CONFIG_HYPERVISOR_GUEST=y CONFIG_PARAVIRT=y # CONFIG_PARAVIRT_DEBUG is not set CONFIG_PARAVIRT_SPINLOCKS=y CONFIG_X86_HV_CALLBACK_VECTOR=y # CONFIG_XEN is not set CONFIG_KVM_GUEST=y CONFIG_ARCH_CPUIDLE_HALTPOLL=y CONFIG_PVH=y CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_PARAVIRT_CLOCK=y # CONFIG_ACRN_GUEST is not set # CONFIG_MK8 is not set # CONFIG_MPSC is not set # CONFIG_MCORE2 is not set # CONFIG_MATOM is not set CONFIG_GENERIC_CPU=y CONFIG_X86_INTERNODE_CACHE_SHIFT=6 CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_TSC=y CONFIG_X86_CMPXCHG64=y CONFIG_X86_CMOV=y CONFIG_X86_MINIMUM_CPU_FAMILY=64 CONFIG_X86_DEBUGCTLMSR=y CONFIG_IA32_FEAT_CTL=y CONFIG_X86_VMX_FEATURE_NAMES=y CONFIG_CPU_SUP_INTEL=y CONFIG_CPU_SUP_AMD=y CONFIG_CPU_SUP_HYGON=y CONFIG_CPU_SUP_CENTAUR=y CONFIG_CPU_SUP_ZHAOXIN=y CONFIG_HPET_TIMER=y CONFIG_DMI=y # CONFIG_MAXSMP is not set CONFIG_NR_CPUS_RANGE_BEGIN=2 CONFIG_NR_CPUS_RANGE_END=512 CONFIG_NR_CPUS_DEFAULT=64 CONFIG_NR_CPUS=64 CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y CONFIG_SCHED_MC_PRIO=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y # CONFIG_X86_MCE is not set # # Performance monitoring # # CONFIG_PERF_EVENTS_AMD_POWER is not set # end of Performance monitoring CONFIG_X86_16BIT=y CONFIG_X86_ESPFIX64=y CONFIG_X86_VSYSCALL_EMULATION=y CONFIG_X86_IOPL_IOPERM=y # CONFIG_MICROCODE is not set CONFIG_X86_MSR=y CONFIG_X86_CPUID=y # CONFIG_X86_5LEVEL is not set CONFIG_X86_DIRECT_GBPAGES=y # CONFIG_X86_CPA_STATISTICS is not set # CONFIG_AMD_MEM_ENCRYPT is not set CONFIG_NUMA=y # CONFIG_NUMA_EMU is not set CONFIG_NODES_SHIFT=10 CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_DEFAULT=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_MEMORY_PROBE=y CONFIG_ARCH_PROC_KCORE_TEXT=y CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 # CONFIG_X86_PMEM_LEGACY is not set CONFIG_X86_CHECK_BIOS_CORRUPTION=y CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y CONFIG_X86_RESERVE_LOW=64 CONFIG_MTRR=y CONFIG_MTRR_SANITIZER=y CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1 CONFIG_X86_PAT=y CONFIG_ARCH_USES_PG_UNCACHED=y CONFIG_ARCH_RANDOM=y CONFIG_X86_SMAP=y CONFIG_X86_UMIP=y CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y CONFIG_X86_INTEL_TSX_MODE_OFF=y # CONFIG_X86_INTEL_TSX_MODE_ON is not set # CONFIG_X86_INTEL_TSX_MODE_AUTO is not set # CONFIG_HZ_100 is not set CONFIG_HZ_250=y # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=250 CONFIG_SCHED_HRTICK=y # CONFIG_KEXEC is not set CONFIG_KEXEC_FILE=y CONFIG_ARCH_HAS_KEXEC_PURGATORY=y # CONFIG_KEXEC_SIG is not set # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x1000000 CONFIG_RELOCATABLE=y CONFIG_RANDOMIZE_BASE=y CONFIG_X86_NEED_RELOCS=y CONFIG_PHYSICAL_ALIGN=0x1000000 CONFIG_DYNAMIC_MEMORY_LAYOUT=y CONFIG_RANDOMIZE_MEMORY=y CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa CONFIG_HOTPLUG_CPU=y # CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set # CONFIG_DEBUG_HOTPLUG_CPU0 is not set # CONFIG_COMPAT_VDSO is not set CONFIG_LEGACY_VSYSCALL_EMULATE=y # CONFIG_LEGACY_VSYSCALL_XONLY is not set # CONFIG_LEGACY_VSYSCALL_NONE is not set # CONFIG_CMDLINE_BOOL is not set CONFIG_MODIFY_LDT_SYSCALL=y CONFIG_HAVE_LIVEPATCH=y # end of Processor type and features CONFIG_CC_HAS_SLS=y CONFIG_CC_HAS_RETURN_THUNK=y CONFIG_CPU_MITIGATIONS=y CONFIG_PAGE_TABLE_ISOLATION=y CONFIG_RETPOLINE=y CONFIG_RETHUNK=y CONFIG_CPU_UNRET_ENTRY=y CONFIG_CPU_IBPB_ENTRY=y CONFIG_CPU_IBRS_ENTRY=y CONFIG_CPU_SRSO=y # CONFIG_SLS is not set # CONFIG_GDS_FORCE_MITIGATION is not set CONFIG_MITIGATION_RFDS=y CONFIG_ARCH_HAS_ADD_PAGES=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y CONFIG_USE_PERCPU_NUMA_NODE_ID=y CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y CONFIG_ARCH_ENABLE_THP_MIGRATION=y # # Power management and ACPI options # CONFIG_ARCH_HIBERNATION_HEADER=y # CONFIG_SUSPEND is not set CONFIG_HIBERNATE_CALLBACKS=y CONFIG_HIBERNATION=y CONFIG_HIBERNATION_SNAPSHOT_DEV=y CONFIG_PM_STD_PARTITION="" CONFIG_PM_SLEEP=y CONFIG_PM_SLEEP_SMP=y # CONFIG_PM_AUTOSLEEP is not set # CONFIG_PM_WAKELOCKS is not set CONFIG_PM=y # CONFIG_PM_DEBUG is not set CONFIG_PM_CLK=y # CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set # CONFIG_ENERGY_MODEL is not set CONFIG_ARCH_SUPPORTS_ACPI=y # CONFIG_ACPI is not set # CONFIG_SFI is not set # # CPU Frequency scaling # CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_ATTR_SET=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_GOV_USERSPACE is not set # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y # # CPU frequency scaling drivers # CONFIG_X86_INTEL_PSTATE=y # CONFIG_X86_P4_CLOCKMOD is not set # # shared options # # end of CPU Frequency scaling # # CPU Idle # CONFIG_CPU_IDLE=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPU_IDLE_GOV_MENU=y # CONFIG_CPU_IDLE_GOV_TEO is not set CONFIG_CPU_IDLE_GOV_HALTPOLL=y CONFIG_HALTPOLL_CPUIDLE=y # end of CPU Idle CONFIG_INTEL_IDLE=y # end of Power management and ACPI options # # Bus options (PCI etc.) # CONFIG_ISA_DMA_API=y # CONFIG_X86_SYSFB is not set # end of Bus options (PCI etc.) # # Binary Emulations # CONFIG_IA32_EMULATION=y # CONFIG_X86_X32 is not set CONFIG_COMPAT_32=y CONFIG_COMPAT=y CONFIG_COMPAT_FOR_U64_ALIGNMENT=y CONFIG_SYSVIPC_COMPAT=y # end of Binary Emulations # # Firmware Drivers # # CONFIG_EDD is not set CONFIG_FIRMWARE_MEMMAP=y CONFIG_DMIID=y # CONFIG_DMI_SYSFS is not set CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y # CONFIG_FW_CFG_SYSFS is not set # CONFIG_GOOGLE_FIRMWARE is not set # # Tegra firmware driver # # end of Tegra firmware driver # end of Firmware Drivers CONFIG_HAVE_KVM=y # CONFIG_VIRTUALIZATION is not set CONFIG_AS_AVX512=y CONFIG_AS_SHA1_NI=y CONFIG_AS_SHA256_NI=y CONFIG_AS_TPAUSE=y CONFIG_ARCH_CONFIGURES_CPU_MITIGATIONS=y # # General architecture-dependent options # CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_HOTPLUG_SMT=y CONFIG_GENERIC_ENTRY=y # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y CONFIG_OPROFILE_NMI_TIMER=y CONFIG_JUMP_LABEL=y # CONFIG_STATIC_KEYS_SELFTEST is not set # CONFIG_STATIC_CALL_SELFTEST is not set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_ARCH_USE_BUILTIN_BSWAP=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_OPTPROBES=y CONFIG_HAVE_KPROBES_ON_FTRACE=y CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y CONFIG_HAVE_NMI=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_CONTIGUOUS=y CONFIG_GENERIC_SMP_IDLE_THREAD=y CONFIG_ARCH_HAS_FORTIFY_SOURCE=y CONFIG_ARCH_HAS_SET_MEMORY=y CONFIG_ARCH_HAS_SET_DIRECT_MAP=y CONFIG_ARCH_HAS_CPU_FINALIZE_INIT=y CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y CONFIG_HAVE_ASM_MODVERSIONS=y CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y CONFIG_HAVE_RSEQ=y CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y CONFIG_HAVE_HW_BREAKPOINT=y CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y CONFIG_HAVE_USER_RETURN_NOTIFIER=y CONFIG_HAVE_PERF_EVENTS_NMI=y CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y CONFIG_HAVE_PERF_REGS=y CONFIG_HAVE_PERF_USER_STACK_DUMP=y CONFIG_HAVE_ARCH_JUMP_LABEL=y CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y CONFIG_MMU_GATHER_TABLE_FREE=y CONFIG_MMU_GATHER_RCU_TABLE_FREE=y CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y CONFIG_HAVE_CMPXCHG_LOCAL=y CONFIG_HAVE_CMPXCHG_DOUBLE=y CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y CONFIG_HAVE_ARCH_SECCOMP=y CONFIG_HAVE_ARCH_SECCOMP_FILTER=y CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y CONFIG_HAVE_ARCH_STACKLEAK=y CONFIG_HAVE_STACKPROTECTOR=y CONFIG_STACKPROTECTOR=y CONFIG_STACKPROTECTOR_STRONG=y CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y CONFIG_HAVE_CONTEXT_TRACKING=y CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y CONFIG_HAVE_MOVE_PMD=y CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y CONFIG_HAVE_ARCH_HUGE_VMAP=y CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y CONFIG_HAVE_ARCH_MMAP_RND_BITS=y CONFIG_HAVE_EXIT_THREAD=y CONFIG_ARCH_MMAP_RND_BITS=28 CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y CONFIG_HAVE_STACK_VALIDATION=y CONFIG_HAVE_RELIABLE_STACKTRACE=y CONFIG_OLD_SIGSUSPEND3=y CONFIG_COMPAT_OLD_SIGACTION=y CONFIG_COMPAT_32BIT_TIME=y CONFIG_HAVE_ARCH_VMAP_STACK=y CONFIG_VMAP_STACK=y CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y CONFIG_STRICT_KERNEL_RWX=y CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y # CONFIG_LOCK_EVENT_COUNTS is not set CONFIG_ARCH_HAS_MEM_ENCRYPT=y CONFIG_HAVE_STATIC_CALL=y CONFIG_HAVE_STATIC_CALL_INLINE=y CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH=y # # GCOV-based kernel profiling # # CONFIG_GCOV_KERNEL is not set CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y # end of GCOV-based kernel profiling CONFIG_HAVE_GCC_PLUGINS=y # end of General architecture-dependent options CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 # CONFIG_MODULES is not set CONFIG_MODULES_TREE_LOOKUP=y CONFIG_BLOCK=y CONFIG_BLK_RQ_ALLOC_TIME=y CONFIG_BLK_SCSI_REQUEST=y CONFIG_BLK_CGROUP_RWSTAT=y CONFIG_BLK_DEV_BSG=y CONFIG_BLK_DEV_BSGLIB=y CONFIG_BLK_DEV_INTEGRITY=y # CONFIG_BLK_DEV_ZONED is not set CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set CONFIG_BLK_CMDLINE_PARSER=y CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # # Partition Types # CONFIG_PARTITION_ADVANCED=y # CONFIG_ACORN_PARTITION is not set # CONFIG_AIX_PARTITION is not set # CONFIG_OSF_PARTITION is not set # CONFIG_AMIGA_PARTITION is not set # CONFIG_ATARI_PARTITION is not set # CONFIG_MAC_PARTITION is not set # CONFIG_MSDOS_PARTITION is not set # CONFIG_LDM_PARTITION is not set # CONFIG_SGI_PARTITION is not set # CONFIG_ULTRIX_PARTITION is not set # CONFIG_SUN_PARTITION is not set # CONFIG_KARMA_PARTITION is not set # CONFIG_EFI_PARTITION is not set # CONFIG_SYSV68_PARTITION is not set # CONFIG_CMDLINE_PARTITION is not set # end of Partition Types CONFIG_BLOCK_COMPAT=y CONFIG_BLK_MQ_VIRTIO=y CONFIG_BLK_PM=y # # IO Schedulers # CONFIG_MQ_IOSCHED_DEADLINE=y CONFIG_MQ_IOSCHED_KYBER=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y # CONFIG_BFQ_CGROUP_DEBUG is not set # end of IO Schedulers CONFIG_PADATA=y CONFIG_ASN1=y CONFIG_INLINE_SPIN_UNLOCK_IRQ=y CONFIG_INLINE_READ_UNLOCK=y CONFIG_INLINE_READ_UNLOCK_IRQ=y CONFIG_INLINE_WRITE_UNLOCK=y CONFIG_INLINE_WRITE_UNLOCK_IRQ=y CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y CONFIG_MUTEX_SPIN_ON_OWNER=y CONFIG_RWSEM_SPIN_ON_OWNER=y CONFIG_LOCK_SPIN_ON_OWNER=y CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y CONFIG_QUEUED_SPINLOCKS=y CONFIG_ARCH_USE_QUEUED_RWLOCKS=y CONFIG_QUEUED_RWLOCKS=y CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y CONFIG_FREEZER=y # # Executable file formats # CONFIG_BINFMT_ELF=y CONFIG_COMPAT_BINFMT_ELF=y CONFIG_ELFCORE=y CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_SCRIPT=y CONFIG_BINFMT_MISC=y CONFIG_COREDUMP=y # end of Executable file formats # # Memory Management options # CONFIG_SELECT_MEMORY_MODEL=y CONFIG_SPARSEMEM_MANUAL=y CONFIG_SPARSEMEM=y CONFIG_NEED_MULTIPLE_NODES=y CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_HAVE_FAST_GUP=y CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_ISOLATION=y CONFIG_HAVE_BOOTMEM_INFO_NODE=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTPLUG_SPARSE=y # CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set CONFIG_MEMORY_HOTREMOVE=y CONFIG_MHP_MEMMAP_ON_MEMORY=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MEMORY_BALLOON=y CONFIG_BALLOON_COMPACTION=y CONFIG_COMPACTION=y CONFIG_PAGE_REPORTING=y CONFIG_MIGRATION=y CONFIG_CONTIG_ALLOC=y CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_TRANSPARENT_HUGEPAGE=y # CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y CONFIG_ARCH_WANTS_THP_SWAP=y CONFIG_THP_SWAP=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y # CONFIG_CMA is not set CONFIG_ZSWAP=y # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo" CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set # CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud" # CONFIG_ZSWAP_DEFAULT_ON is not set CONFIG_ZPOOL=y CONFIG_ZBUD=y # CONFIG_Z3FOLD is not set # CONFIG_ZSMALLOC is not set CONFIG_GENERIC_EARLY_IOREMAP=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_PAGE_IDLE_FLAG=y # CONFIG_IDLE_PAGE_TRACKING is not set CONFIG_ARCH_HAS_PTE_DEVMAP=y # CONFIG_ZONE_DEVICE is not set CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y CONFIG_ARCH_HAS_PKEYS=y CONFIG_PERCPU_STATS=y # CONFIG_GUP_BENCHMARK is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y # # Data Access Monitoring # CONFIG_DAMON=y CONFIG_DAMON_VADDR=y CONFIG_DAMON_PADDR=y CONFIG_DAMON_SYSFS=y CONFIG_DAMON_DBGFS=y # CONFIG_DAMON_RECLAIM is not set # CONFIG_DAMON_LRU_SORT is not set # end of Data Access Monitoring # end of Memory Management options CONFIG_NET=y CONFIG_NET_INGRESS=y CONFIG_SKB_EXTENSIONS=y # # Networking options # CONFIG_PACKET=y # CONFIG_PACKET_DIAG is not set CONFIG_UNIX=y CONFIG_UNIX_SCM=y # CONFIG_UNIX_DIAG is not set # CONFIG_TLS is not set CONFIG_XFRM=y CONFIG_XFRM_ALGO=y CONFIG_XFRM_USER=y # CONFIG_XFRM_USER_COMPAT is not set # CONFIG_XFRM_INTERFACE is not set CONFIG_XFRM_SUB_POLICY=y CONFIG_XFRM_MIGRATE=y CONFIG_XFRM_STATISTICS=y # CONFIG_NET_KEY is not set CONFIG_XDP_SOCKETS=y # CONFIG_XDP_SOCKETS_DIAG is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y # CONFIG_IP_FIB_TRIE_STATS is not set CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE_DEMUX is not set CONFIG_IP_MROUTE_COMMON=y CONFIG_IP_MROUTE=y CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y CONFIG_SYN_COOKIES=y # CONFIG_NET_IPVTI is not set # CONFIG_NET_FOU is not set # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_INET_UDP_DIAG is not set # CONFIG_INET_RAW_DIAG is not set CONFIG_INET_DIAG_DESTROY=y CONFIG_TCP_CONG_ADVANCED=y # CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y # CONFIG_TCP_CONG_WESTWOOD is not set # CONFIG_TCP_CONG_HTCP is not set # CONFIG_TCP_CONG_HSTCP is not set # CONFIG_TCP_CONG_HYBLA is not set # CONFIG_TCP_CONG_VEGAS is not set # CONFIG_TCP_CONG_NV is not set # CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_TCP_CONG_LP is not set # CONFIG_TCP_CONG_VENO is not set # CONFIG_TCP_CONG_YEAH is not set # CONFIG_TCP_CONG_ILLINOIS is not set # CONFIG_TCP_CONG_DCTCP is not set # CONFIG_TCP_CONG_CDG is not set # CONFIG_TCP_CONG_BBR is not set # CONFIG_TCP_CONG_BBR2 is not set CONFIG_DEFAULT_CUBIC=y # CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y # CONFIG_INET6_AH is not set # CONFIG_INET6_ESP is not set # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_IPV6_ILA is not set # CONFIG_IPV6_VTI is not set # CONFIG_IPV6_SIT is not set # CONFIG_IPV6_TUNNEL is not set CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_IPV6_SEG6_HMAC=y CONFIG_IPV6_SEG6_BPF=y # CONFIG_IPV6_RPL_LWTUNNEL is not set CONFIG_NETLABEL=y CONFIG_MPTCP=y CONFIG_INET_MPTCP_DIAG=y CONFIG_MPTCP_IPV6=y CONFIG_NETWORK_SECMARK=y CONFIG_NET_PTP_CLASSIFY=y CONFIG_NETWORK_PHY_TIMESTAMPING=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_BRIDGE_NETFILTER=y # # Core Netfilter Configuration # CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_FAMILY_BRIDGE=y # CONFIG_NETFILTER_NETLINK_ACCT is not set # CONFIG_NETFILTER_NETLINK_QUEUE is not set # CONFIG_NETFILTER_NETLINK_LOG is not set # CONFIG_NETFILTER_NETLINK_OSF is not set CONFIG_NF_CONNTRACK=y CONFIG_NF_LOG_COMMON=y # CONFIG_NF_LOG_NETDEV is not set CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y CONFIG_NF_CONNTRACK_TIMESTAMP=y CONFIG_NF_CONNTRACK_LABELS=y CONFIG_NF_CT_PROTO_DCCP=y CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y # CONFIG_NF_CONNTRACK_AMANDA is not set # CONFIG_NF_CONNTRACK_FTP is not set # CONFIG_NF_CONNTRACK_H323 is not set # CONFIG_NF_CONNTRACK_IRC is not set # CONFIG_NF_CONNTRACK_NETBIOS_NS is not set # CONFIG_NF_CONNTRACK_SNMP is not set # CONFIG_NF_CONNTRACK_PPTP is not set # CONFIG_NF_CONNTRACK_SANE is not set # CONFIG_NF_CONNTRACK_SIP is not set # CONFIG_NF_CONNTRACK_TFTP is not set # CONFIG_NF_CT_NETLINK is not set # CONFIG_NF_CT_NETLINK_TIMEOUT is not set CONFIG_NF_NAT=y CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_NAT_MASQUERADE=y CONFIG_NETFILTER_SYNPROXY=y # CONFIG_NF_TABLES is not set CONFIG_NETFILTER_XTABLES=y # # Xtables combined modules # # CONFIG_NETFILTER_XT_MARK is not set # CONFIG_NETFILTER_XT_CONNMARK is not set # # Xtables targets # # CONFIG_NETFILTER_XT_TARGET_AUDIT is not set # CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set # CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set # CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set # CONFIG_NETFILTER_XT_TARGET_CONNSECMARK is not set # CONFIG_NETFILTER_XT_TARGET_DSCP is not set # CONFIG_NETFILTER_XT_TARGET_HL is not set # CONFIG_NETFILTER_XT_TARGET_HMARK is not set # CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set # CONFIG_NETFILTER_XT_TARGET_LOG is not set # CONFIG_NETFILTER_XT_TARGET_MARK is not set CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_TARGET_NETMAP=y # CONFIG_NETFILTER_XT_TARGET_NFLOG is not set # CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set # CONFIG_NETFILTER_XT_TARGET_RATEEST is not set CONFIG_NETFILTER_XT_TARGET_REDIRECT=y CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y # CONFIG_NETFILTER_XT_TARGET_TEE is not set # CONFIG_NETFILTER_XT_TARGET_TPROXY is not set # CONFIG_NETFILTER_XT_TARGET_SECMARK is not set # CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set # CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set # # Xtables matches # CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y # CONFIG_NETFILTER_XT_MATCH_BPF is not set # CONFIG_NETFILTER_XT_MATCH_CGROUP is not set # CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set # CONFIG_NETFILTER_XT_MATCH_COMMENT is not set # CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set # CONFIG_NETFILTER_XT_MATCH_CONNLABEL is not set # CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set # CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y # CONFIG_NETFILTER_XT_MATCH_CPU is not set # CONFIG_NETFILTER_XT_MATCH_DCCP is not set # CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set # CONFIG_NETFILTER_XT_MATCH_DSCP is not set # CONFIG_NETFILTER_XT_MATCH_ECN is not set # CONFIG_NETFILTER_XT_MATCH_ESP is not set # CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set # CONFIG_NETFILTER_XT_MATCH_HELPER is not set # CONFIG_NETFILTER_XT_MATCH_HL is not set # CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set # CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set # CONFIG_NETFILTER_XT_MATCH_L2TP is not set # CONFIG_NETFILTER_XT_MATCH_LENGTH is not set # CONFIG_NETFILTER_XT_MATCH_LIMIT is not set # CONFIG_NETFILTER_XT_MATCH_MAC is not set # CONFIG_NETFILTER_XT_MATCH_MARK is not set # CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set # CONFIG_NETFILTER_XT_MATCH_NFACCT is not set # CONFIG_NETFILTER_XT_MATCH_OSF is not set # CONFIG_NETFILTER_XT_MATCH_OWNER is not set # CONFIG_NETFILTER_XT_MATCH_POLICY is not set # CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set # CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set # CONFIG_NETFILTER_XT_MATCH_QUOTA is not set # CONFIG_NETFILTER_XT_MATCH_RATEEST is not set # CONFIG_NETFILTER_XT_MATCH_REALM is not set # CONFIG_NETFILTER_XT_MATCH_RECENT is not set # CONFIG_NETFILTER_XT_MATCH_SCTP is not set # CONFIG_NETFILTER_XT_MATCH_SOCKET is not set # CONFIG_NETFILTER_XT_MATCH_STATE is not set # CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set # CONFIG_NETFILTER_XT_MATCH_STRING is not set # CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set # CONFIG_NETFILTER_XT_MATCH_TIME is not set # CONFIG_NETFILTER_XT_MATCH_U32 is not set # end of Core Netfilter Configuration # CONFIG_IP_SET is not set # CONFIG_IP_VS is not set # # IP: Netfilter Configuration # CONFIG_NF_DEFRAG_IPV4=y # CONFIG_NF_SOCKET_IPV4 is not set # CONFIG_NF_TPROXY_IPV4 is not set # CONFIG_NF_DUP_IPV4 is not set # CONFIG_NF_LOG_ARP is not set # CONFIG_NF_LOG_IPV4 is not set CONFIG_NF_REJECT_IPV4=y CONFIG_IP_NF_IPTABLES=y # CONFIG_IP_NF_MATCH_AH is not set # CONFIG_IP_NF_MATCH_ECN is not set # CONFIG_IP_NF_MATCH_RPFILTER is not set # CONFIG_IP_NF_MATCH_TTL is not set CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y CONFIG_IP_NF_TARGET_SYNPROXY=y CONFIG_IP_NF_NAT=y CONFIG_IP_NF_TARGET_MASQUERADE=y CONFIG_IP_NF_TARGET_NETMAP=y CONFIG_IP_NF_TARGET_REDIRECT=y CONFIG_IP_NF_MANGLE=y # CONFIG_IP_NF_TARGET_CLUSTERIP is not set # CONFIG_IP_NF_TARGET_ECN is not set # CONFIG_IP_NF_TARGET_TTL is not set # CONFIG_IP_NF_RAW is not set # CONFIG_IP_NF_SECURITY is not set # CONFIG_IP_NF_ARPTABLES is not set # end of IP: Netfilter Configuration # # IPv6: Netfilter Configuration # # CONFIG_NF_SOCKET_IPV6 is not set # CONFIG_NF_TPROXY_IPV6 is not set # CONFIG_NF_DUP_IPV6 is not set CONFIG_NF_REJECT_IPV6=y CONFIG_NF_LOG_IPV6=y CONFIG_IP6_NF_IPTABLES=y # CONFIG_IP6_NF_MATCH_AH is not set # CONFIG_IP6_NF_MATCH_EUI64 is not set # CONFIG_IP6_NF_MATCH_FRAG is not set # CONFIG_IP6_NF_MATCH_OPTS is not set # CONFIG_IP6_NF_MATCH_HL is not set # CONFIG_IP6_NF_MATCH_IPV6HEADER is not set # CONFIG_IP6_NF_MATCH_MH is not set # CONFIG_IP6_NF_MATCH_RPFILTER is not set # CONFIG_IP6_NF_MATCH_RT is not set # CONFIG_IP6_NF_MATCH_SRH is not set # CONFIG_IP6_NF_TARGET_HL is not set CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_TARGET_SYNPROXY=y CONFIG_IP6_NF_MANGLE=y # CONFIG_IP6_NF_RAW is not set # CONFIG_IP6_NF_SECURITY is not set CONFIG_IP6_NF_NAT=y CONFIG_IP6_NF_TARGET_MASQUERADE=y # CONFIG_IP6_NF_TARGET_NPT is not set # end of IPv6: Netfilter Configuration CONFIG_NF_DEFRAG_IPV6=y # CONFIG_NF_CONNTRACK_BRIDGE is not set # CONFIG_BRIDGE_NF_EBTABLES is not set CONFIG_BPFILTER=y CONFIG_BPFILTER_UMH=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_L2TP is not set CONFIG_STP=y CONFIG_BRIDGE=y CONFIG_BRIDGE_IGMP_SNOOPING=y # CONFIG_BRIDGE_MRP is not set CONFIG_HAVE_NET_DSA=y # CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set # CONFIG_PHONET is not set # CONFIG_6LOWPAN is not set # CONFIG_IEEE802154 is not set CONFIG_NET_SCHED=y # # Queueing/Scheduling # # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set # CONFIG_NET_SCH_MULTIQ is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFB is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set # CONFIG_NET_SCH_TBF is not set # CONFIG_NET_SCH_CBS is not set # CONFIG_NET_SCH_ETF is not set # CONFIG_NET_SCH_TAPRIO is not set # CONFIG_NET_SCH_GRED is not set # CONFIG_NET_SCH_NETEM is not set # CONFIG_NET_SCH_DRR is not set # CONFIG_NET_SCH_MQPRIO is not set # CONFIG_NET_SCH_SKBPRIO is not set # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_QFQ is not set # CONFIG_NET_SCH_CODEL is not set # CONFIG_NET_SCH_FQ_CODEL is not set # CONFIG_NET_SCH_CAKE is not set # CONFIG_NET_SCH_FQ is not set # CONFIG_NET_SCH_HHF is not set # CONFIG_NET_SCH_PIE is not set # CONFIG_NET_SCH_INGRESS is not set # CONFIG_NET_SCH_PLUG is not set # CONFIG_NET_SCH_ETS is not set # CONFIG_NET_SCH_DEFAULT is not set # # Classification # CONFIG_NET_CLS=y # CONFIG_NET_CLS_BASIC is not set # CONFIG_NET_CLS_ROUTE4 is not set # CONFIG_NET_CLS_FW is not set # CONFIG_NET_CLS_U32 is not set # CONFIG_NET_CLS_FLOW is not set # CONFIG_NET_CLS_CGROUP is not set # CONFIG_NET_CLS_BPF is not set # CONFIG_NET_CLS_FLOWER is not set # CONFIG_NET_CLS_MATCHALL is not set CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 # CONFIG_NET_EMATCH_CMP is not set # CONFIG_NET_EMATCH_NBYTE is not set # CONFIG_NET_EMATCH_U32 is not set # CONFIG_NET_EMATCH_META is not set # CONFIG_NET_EMATCH_TEXT is not set # CONFIG_NET_EMATCH_IPT is not set CONFIG_NET_CLS_ACT=y # CONFIG_NET_ACT_POLICE is not set # CONFIG_NET_ACT_GACT is not set # CONFIG_NET_ACT_MIRRED is not set # CONFIG_NET_ACT_SAMPLE is not set # CONFIG_NET_ACT_IPT is not set # CONFIG_NET_ACT_NAT is not set # CONFIG_NET_ACT_PEDIT is not set # CONFIG_NET_ACT_SIMP is not set # CONFIG_NET_ACT_SKBEDIT is not set # CONFIG_NET_ACT_CSUM is not set # CONFIG_NET_ACT_MPLS is not set # CONFIG_NET_ACT_VLAN is not set # CONFIG_NET_ACT_BPF is not set # CONFIG_NET_ACT_CONNMARK is not set # CONFIG_NET_ACT_CTINFO is not set # CONFIG_NET_ACT_SKBMOD is not set # CONFIG_NET_ACT_IFE is not set # CONFIG_NET_ACT_TUNNEL_KEY is not set # CONFIG_NET_ACT_GATE is not set # CONFIG_NET_TC_SKB_EXT is not set CONFIG_NET_SCH_FIFO=y CONFIG_DCB=y CONFIG_DNS_RESOLVER=y # CONFIG_BATMAN_ADV is not set # CONFIG_OPENVSWITCH is not set CONFIG_VSOCKETS=y # CONFIG_VSOCKETS_DIAG is not set # CONFIG_VSOCKETS_LOOPBACK is not set CONFIG_VIRTIO_VSOCKETS=y CONFIG_VIRTIO_VSOCKETS_COMMON=y # CONFIG_NETLINK_DIAG is not set CONFIG_MPLS=y # CONFIG_NET_MPLS_GSO is not set # CONFIG_MPLS_ROUTING is not set # CONFIG_NET_NSH is not set # CONFIG_HSR is not set # CONFIG_NET_SWITCHDEV is not set CONFIG_NET_L3_MASTER_DEV=y # CONFIG_QRTR is not set # CONFIG_NET_NCSI is not set CONFIG_RPS=y CONFIG_RFS_ACCEL=y CONFIG_XPS=y CONFIG_CGROUP_NET_PRIO=y CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_NET_FLOW_LIMIT=y # # Network testing # # CONFIG_NET_PKTGEN is not set # end of Network testing # end of Networking options # CONFIG_HAMRADIO is not set # CONFIG_CAN is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set # CONFIG_AF_KCM is not set CONFIG_STREAM_PARSER=y CONFIG_FIB_RULES=y # CONFIG_WIRELESS is not set # CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set # CONFIG_CAIF is not set # CONFIG_CEPH_LIB is not set # CONFIG_NFC is not set # CONFIG_PSAMPLE is not set # CONFIG_NET_IFE is not set CONFIG_LWTUNNEL=y CONFIG_LWTUNNEL_BPF=y CONFIG_DST_CACHE=y CONFIG_GRO_CELLS=y CONFIG_NET_SOCK_MSG=y CONFIG_FAILOVER=y CONFIG_ETHTOOL_NETLINK=y CONFIG_HAVE_EBPF_JIT=y # # Device Drivers # CONFIG_HAVE_EISA=y # CONFIG_EISA is not set CONFIG_HAVE_PCI=y # CONFIG_PCI is not set # CONFIG_PCCARD is not set # # Generic Driver Options # CONFIG_UEVENT_HELPER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y # # Firmware loader # CONFIG_FW_LOADER=y CONFIG_FW_LOADER_PAGED_BUF=y CONFIG_EXTRA_FIRMWARE="" CONFIG_FW_LOADER_USER_HELPER=y # CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set # CONFIG_FW_LOADER_COMPRESS is not set CONFIG_FW_CACHE=y # end of Firmware loader CONFIG_ALLOW_DEV_COREDUMP=y # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set CONFIG_GENERIC_CPU_AUTOPROBE=y CONFIG_GENERIC_CPU_VULNERABILITIES=y CONFIG_DMA_SHARED_BUFFER=y # CONFIG_DMA_FENCE_TRACE is not set # end of Generic Driver Options # # Bus devices # # CONFIG_MHI_BUS is not set # end of Bus devices CONFIG_CONNECTOR=y CONFIG_PROC_EVENTS=y # CONFIG_GNSS is not set # CONFIG_MTD is not set # CONFIG_OF is not set CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y # CONFIG_PARPORT is not set CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set # CONFIG_BLK_DEV_FD is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 # CONFIG_BLK_DEV_CRYPTOLOOP is not set # CONFIG_BLK_DEV_DRBD is not set # CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_RAM is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_VIRTIO_BLK=y # CONFIG_BLK_DEV_RBD is not set # # NVME Support # # CONFIG_NVME_FC is not set # CONFIG_NVME_TCP is not set # end of NVME Support # # Misc devices # # CONFIG_DUMMY_IRQ is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_SRAM is not set # CONFIG_XILINX_SDFEC is not set CONFIG_SYSGENID=y # CONFIG_C2PORT is not set # # EEPROM support # # CONFIG_EEPROM_93CX6 is not set # end of EEPROM support # # Texas Instruments shared transport line discipline # # end of Texas Instruments shared transport line discipline # # Altera FPGA firmware download module (requires I2C) # # CONFIG_ECHO is not set # end of Misc devices CONFIG_HAVE_IDE=y # CONFIG_IDE is not set # # SCSI device support # CONFIG_SCSI_MOD=y # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y CONFIG_SCSI_DMA=y CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) # # CONFIG_BLK_DEV_SD is not set # CONFIG_CHR_DEV_ST is not set # CONFIG_BLK_DEV_SR is not set # CONFIG_CHR_DEV_SG is not set # CONFIG_CHR_DEV_SCH is not set # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set # # SCSI Transports # # CONFIG_SCSI_SPI_ATTRS is not set # CONFIG_SCSI_FC_ATTRS is not set CONFIG_SCSI_ISCSI_ATTRS=y # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set # end of SCSI Transports CONFIG_SCSI_LOWLEVEL=y CONFIG_ISCSI_TCP=y # CONFIG_ISCSI_BOOT_SYSFS is not set # CONFIG_SCSI_UFSHCD is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_VIRTIO is not set # CONFIG_SCSI_DH is not set # end of SCSI device support # CONFIG_ATA is not set # CONFIG_MD is not set # CONFIG_TARGET_CORE is not set # CONFIG_MACINTOSH_DRIVERS is not set CONFIG_NETDEVICES=y CONFIG_NET_CORE=y # CONFIG_BONDING is not set # CONFIG_DUMMY is not set # CONFIG_WIREGUARD is not set # CONFIG_EQUALIZER is not set # CONFIG_NET_TEAM is not set # CONFIG_MACVLAN is not set # CONFIG_IPVLAN is not set # CONFIG_VXLAN is not set # CONFIG_GENEVE is not set # CONFIG_BAREUDP is not set # CONFIG_GTP is not set # CONFIG_MACSEC is not set # CONFIG_NETCONSOLE is not set # CONFIG_TUN is not set # CONFIG_TUN_VNET_CROSS_LE is not set CONFIG_VETH=y CONFIG_VIRTIO_NET=y # CONFIG_NLMON is not set # CONFIG_NET_VRF is not set # # Distributed Switch Architecture drivers # # end of Distributed Switch Architecture drivers # CONFIG_ETHERNET is not set # CONFIG_PHYLIB is not set # CONFIG_MDIO_DEVICE is not set # # PCS device drivers # # end of PCS device drivers # CONFIG_PPP is not set # CONFIG_SLIP is not set # # Host-side USB support is needed for USB Network Adapter support # # CONFIG_WLAN is not set # # Enable WiMAX (Networking options) to see the WiMAX drivers # # CONFIG_WAN is not set # CONFIG_NETDEVSIM is not set CONFIG_NET_FAILOVER=y # CONFIG_ISDN is not set # # Input device support # CONFIG_INPUT=y CONFIG_INPUT_FF_MEMLESS=y # CONFIG_INPUT_POLLDEV is not set # CONFIG_INPUT_SPARSEKMAP is not set # CONFIG_INPUT_MATRIXKMAP is not set # # Userland interfaces # # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_JOYDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set # # Input Device Drivers # # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set CONFIG_INPUT_MISC=y # CONFIG_INPUT_AD714X is not set # CONFIG_INPUT_E3X0_BUTTON is not set # CONFIG_INPUT_PCSPKR is not set # CONFIG_INPUT_ATI_REMOTE2 is not set # CONFIG_INPUT_KEYSPAN_REMOTE is not set # CONFIG_INPUT_POWERMATE is not set # CONFIG_INPUT_YEALINK is not set # CONFIG_INPUT_CM109 is not set # CONFIG_INPUT_UINPUT is not set # CONFIG_INPUT_ADXL34X is not set # CONFIG_INPUT_CMA3000 is not set # CONFIG_RMI4_CORE is not set # # Hardware I/O ports # # CONFIG_SERIO is not set CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y # CONFIG_GAMEPORT is not set # end of Hardware I/O ports # end of Input device support # # Character devices # CONFIG_TTY=y CONFIG_VT=y CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y CONFIG_VT_CONSOLE_SLEEP=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set CONFIG_LDISC_AUTOLOAD=y # # Serial drivers # CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set # CONFIG_SERIAL_8250_16550A_VARIANTS is not set # CONFIG_SERIAL_8250_FINTEK is not set CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DMA=y CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 # CONFIG_SERIAL_8250_EXTENDED is not set # CONFIG_SERIAL_8250_DW is not set # CONFIG_SERIAL_8250_RT288X is not set # # Non-8250 serial port support # # CONFIG_SERIAL_UARTLITE is not set CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_LANTIQ is not set # CONFIG_SERIAL_SCCNXP is not set # CONFIG_SERIAL_ALTERA_JTAGUART is not set # CONFIG_SERIAL_ALTERA_UART is not set # CONFIG_SERIAL_ARC is not set # CONFIG_SERIAL_FSL_LPUART is not set # CONFIG_SERIAL_FSL_LINFLEXUART is not set # CONFIG_SERIAL_SPRD is not set # end of Serial drivers # CONFIG_SERIAL_NONSTANDARD is not set # CONFIG_N_GSM is not set # CONFIG_NULL_TTY is not set # CONFIG_TRACE_SINK is not set CONFIG_HVC_DRIVER=y CONFIG_SERIAL_DEV_BUS=y CONFIG_SERIAL_DEV_CTRL_TTYPORT=y CONFIG_VIRTIO_CONSOLE=y # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_TIMERIOMEM is not set # CONFIG_HW_RANDOM_BA431 is not set # CONFIG_HW_RANDOM_VIA is not set CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_XIPHERA is not set # CONFIG_MWAVE is not set CONFIG_DEVMEM=y # CONFIG_DEVKMEM is not set # CONFIG_NVRAM is not set # CONFIG_RAW_DRIVER is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set # CONFIG_TELCLOCK is not set CONFIG_RANDOM_TRUST_CPU=y CONFIG_RANDOM_TRUST_BOOTLOADER=y # end of Character devices # # I2C support # # CONFIG_I2C is not set # end of I2C support # CONFIG_I3C is not set # CONFIG_SPI is not set # CONFIG_SPMI is not set # CONFIG_HSI is not set CONFIG_PPS=y # CONFIG_PPS_DEBUG is not set # # PPS clients support # # CONFIG_PPS_CLIENT_KTIMER is not set # CONFIG_PPS_CLIENT_LDISC is not set # CONFIG_PPS_CLIENT_GPIO is not set # # PPS generators support # # # PTP clock support # CONFIG_PTP_1588_CLOCK=y # # Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. # CONFIG_PTP_1588_CLOCK_KVM=y # end of PTP clock support # CONFIG_PINCTRL is not set # CONFIG_GPIOLIB is not set # CONFIG_W1 is not set CONFIG_POWER_RESET=y # CONFIG_POWER_RESET_RESTART is not set CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set # CONFIG_PDA_POWER is not set # CONFIG_TEST_POWER is not set # CONFIG_BATTERY_DS2780 is not set # CONFIG_BATTERY_DS2781 is not set # CONFIG_BATTERY_BQ27XXX is not set # CONFIG_CHARGER_MAX8903 is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y # CONFIG_THERMAL_NETLINK is not set # CONFIG_THERMAL_STATISTICS is not set CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 CONFIG_THERMAL_WRITABLE_TRIPS=y CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y # CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set # CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set CONFIG_THERMAL_GOV_FAIR_SHARE=y CONFIG_THERMAL_GOV_STEP_WISE=y # CONFIG_THERMAL_GOV_BANG_BANG is not set CONFIG_THERMAL_GOV_USER_SPACE=y # CONFIG_THERMAL_EMULATION is not set # # Intel thermal drivers # # CONFIG_INTEL_POWERCLAMP is not set # # ACPI INT340X thermal drivers # # end of ACPI INT340X thermal drivers # end of Intel thermal drivers CONFIG_WATCHDOG=y CONFIG_WATCHDOG_CORE=y # CONFIG_WATCHDOG_NOWAYOUT is not set CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y CONFIG_WATCHDOG_OPEN_TIMEOUT=0 CONFIG_WATCHDOG_SYSFS=y # # Watchdog Pretimeout Governors # # CONFIG_WATCHDOG_PRETIMEOUT_GOV is not set # # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set # CONFIG_XILINX_WATCHDOG is not set # CONFIG_CADENCE_WATCHDOG is not set # CONFIG_DW_WATCHDOG is not set # CONFIG_MAX63XX_WATCHDOG is not set # CONFIG_ACQUIRE_WDT is not set # CONFIG_ADVANTECH_WDT is not set # CONFIG_EBC_C384_WDT is not set # CONFIG_F71808E_WDT is not set # CONFIG_SBC_FITPC2_WATCHDOG is not set # CONFIG_EUROTECH_WDT is not set # CONFIG_IB700_WDT is not set # CONFIG_IBMASR is not set # CONFIG_WAFER_WDT is not set # CONFIG_IT8712F_WDT is not set # CONFIG_IT87_WDT is not set # CONFIG_SC1200_WDT is not set # CONFIG_PC87413_WDT is not set # CONFIG_60XX_WDT is not set # CONFIG_CPU5_WDT is not set # CONFIG_SMSC_SCH311X_WDT is not set # CONFIG_SMSC37B787_WDT is not set # CONFIG_TQMX86_WDT is not set # CONFIG_W83627HF_WDT is not set # CONFIG_W83877F_WDT is not set # CONFIG_W83977F_WDT is not set # CONFIG_MACHZ_WDT is not set # CONFIG_SBC_EPX_C3_WATCHDOG is not set CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set CONFIG_BCMA_POSSIBLE=y # CONFIG_BCMA is not set # # Multifunction device drivers # # CONFIG_MFD_MADERA is not set # CONFIG_HTC_PASIC3 is not set # CONFIG_MFD_KEMPLD is not set # CONFIG_MFD_MT6397 is not set # CONFIG_MFD_SM501 is not set # CONFIG_ABX500_CORE is not set # CONFIG_MFD_SYSCON is not set # CONFIG_MFD_TQMX86 is not set # CONFIG_RAVE_SP_CORE is not set # end of Multifunction device drivers # CONFIG_REGULATOR is not set # CONFIG_RC_CORE is not set # CONFIG_MEDIA_CEC_SUPPORT is not set # CONFIG_MEDIA_SUPPORT is not set # # Graphics support # # CONFIG_DRM is not set # # ARM devices # # end of ARM devices # # Frame buffer Devices # # CONFIG_FB is not set # end of Frame buffer Devices # # Backlight & LCD device support # # CONFIG_LCD_CLASS_DEVICE is not set # CONFIG_BACKLIGHT_CLASS_DEVICE is not set # end of Backlight & LCD device support # # Console display driver support # CONFIG_VGA_CONSOLE=y CONFIG_DUMMY_CONSOLE=y CONFIG_DUMMY_CONSOLE_COLUMNS=80 CONFIG_DUMMY_CONSOLE_ROWS=25 # end of Console display driver support # end of Graphics support # CONFIG_SOUND is not set # # HID support # CONFIG_HID=y # CONFIG_HID_BATTERY_STRENGTH is not set CONFIG_HIDRAW=y # CONFIG_UHID is not set # CONFIG_HID_GENERIC is not set # # Special HID drivers # # CONFIG_HID_A4TECH is not set # CONFIG_HID_ACRUX is not set # CONFIG_HID_APPLE is not set # CONFIG_HID_AUREAL is not set # CONFIG_HID_BELKIN is not set # CONFIG_HID_CHERRY is not set # CONFIG_HID_COUGAR is not set # CONFIG_HID_MACALLY is not set # CONFIG_HID_CMEDIA is not set # CONFIG_HID_CYPRESS is not set # CONFIG_HID_DRAGONRISE is not set # CONFIG_HID_EMS_FF is not set # CONFIG_HID_ELECOM is not set # CONFIG_HID_EZKEY is not set # CONFIG_HID_GEMBIRD is not set # CONFIG_HID_GFRM is not set # CONFIG_HID_GLORIOUS is not set # CONFIG_HID_VIVALDI is not set # CONFIG_HID_KEYTOUCH is not set # CONFIG_HID_KYE is not set # CONFIG_HID_WALTOP is not set # CONFIG_HID_VIEWSONIC is not set # CONFIG_HID_GYRATION is not set # CONFIG_HID_ICADE is not set # CONFIG_HID_ITE is not set # CONFIG_HID_JABRA is not set # CONFIG_HID_TWINHAN is not set # CONFIG_HID_KENSINGTON is not set # CONFIG_HID_LCPOWER is not set # CONFIG_HID_LENOVO is not set # CONFIG_HID_MAGICMOUSE is not set # CONFIG_HID_MALTRON is not set # CONFIG_HID_MAYFLASH is not set # CONFIG_HID_REDRAGON is not set # CONFIG_HID_MICROSOFT is not set # CONFIG_HID_MONTEREY is not set # CONFIG_HID_MULTITOUCH is not set # CONFIG_HID_NTI is not set # CONFIG_HID_ORTEK is not set # CONFIG_HID_PANTHERLORD is not set # CONFIG_HID_PETALYNX is not set # CONFIG_HID_PICOLCD is not set # CONFIG_HID_PLANTRONICS is not set # CONFIG_HID_PRIMAX is not set # CONFIG_HID_SAITEK is not set # CONFIG_HID_SPEEDLINK is not set # CONFIG_HID_STEAM is not set # CONFIG_HID_STEELSERIES is not set # CONFIG_HID_SUNPLUS is not set # CONFIG_HID_RMI is not set # CONFIG_HID_GREENASIA is not set # CONFIG_HID_SMARTJOYPLUS is not set # CONFIG_HID_TIVO is not set # CONFIG_HID_TOPSEED is not set # CONFIG_HID_THRUSTMASTER is not set # CONFIG_HID_UDRAW_PS3 is not set # CONFIG_HID_XINMO is not set # CONFIG_HID_ZEROPLUS is not set # CONFIG_HID_ZYDACRON is not set # CONFIG_HID_SENSOR_HUB is not set # CONFIG_HID_ALPS is not set # end of Special HID drivers # end of HID support CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_SUPPORT=y # CONFIG_USB_ULPI_BUS is not set CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB is not set # # USB port drivers # # # USB Physical Layer drivers # # CONFIG_NOP_USB_XCEIV is not set # end of USB Physical Layer drivers # CONFIG_USB_GADGET is not set # CONFIG_TYPEC is not set # CONFIG_USB_ROLE_SWITCH is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set CONFIG_EDAC_ATOMIC_SCRUB=y CONFIG_EDAC_SUPPORT=y # CONFIG_EDAC is not set CONFIG_RTC_LIB=y CONFIG_RTC_MC146818_LIB=y # CONFIG_RTC_CLASS is not set CONFIG_DMADEVICES=y # CONFIG_DMADEVICES_DEBUG is not set # # DMA Devices # # CONFIG_ALTERA_MSGDMA is not set # CONFIG_INTEL_IDMA64 is not set # CONFIG_QCOM_HIDMA_MGMT is not set # CONFIG_QCOM_HIDMA is not set # CONFIG_DW_DMAC is not set # CONFIG_SF_PDMA is not set # # DMABUF options # CONFIG_SYNC_FILE=y # CONFIG_SW_SYNC is not set # CONFIG_UDMABUF is not set # CONFIG_DMABUF_MOVE_NOTIFY is not set # CONFIG_DMABUF_SELFTESTS is not set # CONFIG_DMABUF_HEAPS is not set # end of DMABUF options CONFIG_AUXDISPLAY=y # CONFIG_IMG_ASCII_LCD is not set CONFIG_CHARLCD_BL_OFF=y # CONFIG_CHARLCD_BL_ON is not set # CONFIG_CHARLCD_BL_FLASH is not set # CONFIG_UIO is not set # CONFIG_VFIO is not set CONFIG_VIRT_DRIVERS=y CONFIG_VIRTIO=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_BALLOON=y # CONFIG_VIRTIO_MEM is not set # CONFIG_VIRTIO_INPUT is not set CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y # CONFIG_VDPA is not set CONFIG_VHOST_MENU=y # CONFIG_VHOST_NET is not set # CONFIG_VHOST_VSOCK is not set # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set # # Microsoft Hyper-V guest support # # end of Microsoft Hyper-V guest support # CONFIG_GREYBUS is not set CONFIG_STAGING=y # CONFIG_COMEDI is not set # CONFIG_STAGING_MEDIA is not set # # Android # # end of Android # CONFIG_GS_FPGABOOT is not set # CONFIG_UNISYSSPAR is not set # # Gasket devices # # end of Gasket devices # CONFIG_FIELDBUS_DEV is not set CONFIG_X86_PLATFORM_DEVICES=y # CONFIG_DCDBAS is not set # CONFIG_DELL_SMBIOS is not set # CONFIG_DELL_RBU is not set # CONFIG_SENSORS_HDAPS is not set CONFIG_INTEL_TURBO_MAX_3=y # CONFIG_INTEL_UNCORE_FREQ_CONTROL is not set # CONFIG_INTEL_PUNIT_IPC is not set # CONFIG_CHROME_PLATFORMS is not set # CONFIG_MELLANOX_PLATFORM is not set CONFIG_HAVE_CLK=y CONFIG_CLKDEV_LOOKUP=y CONFIG_HAVE_CLK_PREPARE=y CONFIG_COMMON_CLK=y # CONFIG_HWSPINLOCK is not set # # Clock Source drivers # CONFIG_CLKEVT_I8253=y CONFIG_I8253_LOCK=y CONFIG_CLKBLD_I8253=y # end of Clock Source drivers CONFIG_MAILBOX=y # CONFIG_ALTERA_MBOX is not set CONFIG_IOMMU_SUPPORT=y # # Generic IOMMU Pagetable Support # # end of Generic IOMMU Pagetable Support # CONFIG_IOMMU_DEBUGFS is not set # # Remoteproc drivers # # CONFIG_REMOTEPROC is not set # end of Remoteproc drivers # # Rpmsg drivers # # CONFIG_RPMSG_QCOM_GLINK_RPM is not set # CONFIG_RPMSG_VIRTIO is not set # end of Rpmsg drivers # # SOC (System On Chip) specific Drivers # # # Amlogic SoC drivers # # end of Amlogic SoC drivers # # Aspeed SoC drivers # # end of Aspeed SoC drivers # # Broadcom SoC drivers # # end of Broadcom SoC drivers # # NXP/Freescale QorIQ SoC drivers # # end of NXP/Freescale QorIQ SoC drivers # # i.MX SoC drivers # # end of i.MX SoC drivers # # Qualcomm SoC drivers # # end of Qualcomm SoC drivers # CONFIG_SOC_TI is not set # # Xilinx SoC drivers # # CONFIG_XILINX_VCU is not set # end of Xilinx SoC drivers # end of SOC (System On Chip) specific Drivers # CONFIG_PM_DEVFREQ is not set # CONFIG_EXTCON is not set # CONFIG_MEMORY is not set # CONFIG_IIO is not set # CONFIG_PWM is not set # # IRQ chip support # # end of IRQ chip support # CONFIG_IPACK_BUS is not set # CONFIG_RESET_CONTROLLER is not set # # PHY Subsystem # # CONFIG_GENERIC_PHY is not set # CONFIG_USB_LGM_PHY is not set # CONFIG_BCM_KONA_USB2_PHY is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set # CONFIG_PHY_INTEL_LGM_EMMC is not set # end of PHY Subsystem # CONFIG_POWERCAP is not set # CONFIG_MCB is not set # # Performance monitor support # # end of Performance monitor support CONFIG_RAS=y # # Android # # CONFIG_ANDROID is not set # end of Android # CONFIG_LIBNVDIMM is not set # CONFIG_DAX is not set # CONFIG_NVMEM is not set # # HW tracing support # # CONFIG_STM is not set # CONFIG_INTEL_TH is not set # end of HW tracing support # CONFIG_FPGA is not set # CONFIG_TEE is not set # CONFIG_SIOX is not set # CONFIG_SLIMBUS is not set # CONFIG_INTERCONNECT is not set # CONFIG_COUNTER is not set # end of Device Drivers # # File systems # CONFIG_DCACHE_WORD_ACCESS=y CONFIG_VALIDATE_FS_PARSER=y CONFIG_FS_IOMAP=y # CONFIG_EXT2_FS is not set # CONFIG_EXT3_FS is not set CONFIG_EXT4_FS=y CONFIG_EXT4_USE_FOR_EXT2=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_EXT4_DEBUG=y CONFIG_JBD2=y CONFIG_JBD2_DEBUG=y CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_XFS_FS=y CONFIG_XFS_SUPPORT_V4=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y # CONFIG_XFS_RT is not set # CONFIG_XFS_ONLINE_SCRUB is not set # CONFIG_XFS_WARN is not set # CONFIG_XFS_DEBUG is not set # CONFIG_GFS2_FS is not set # CONFIG_BTRFS_FS is not set # CONFIG_NILFS2_FS is not set # CONFIG_F2FS_FS is not set # CONFIG_FS_DAX is not set CONFIG_FS_POSIX_ACL=y CONFIG_EXPORTFS=y # CONFIG_EXPORTFS_BLOCK_OPS is not set CONFIG_FILE_LOCKING=y CONFIG_MANDATORY_FILE_LOCKING=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_ENCRYPTION_ALGS=y # CONFIG_FS_VERITY is not set CONFIG_FSNOTIFY=y CONFIG_DNOTIFY=y CONFIG_INOTIFY_USER=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set # CONFIG_QUOTA_DEBUG is not set # CONFIG_QFMT_V1 is not set # CONFIG_QFMT_V2 is not set CONFIG_QUOTACTL=y CONFIG_AUTOFS4_FS=y CONFIG_AUTOFS_FS=y # CONFIG_FUSE_FS is not set CONFIG_OVERLAY_FS=y # CONFIG_OVERLAY_FS_REDIRECT_DIR is not set CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW=y # CONFIG_OVERLAY_FS_INDEX is not set # CONFIG_OVERLAY_FS_XINO_AUTO is not set # CONFIG_OVERLAY_FS_METACOPY is not set # # Caches # # CONFIG_FSCACHE is not set # end of Caches # # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set # CONFIG_UDF_FS is not set # end of CD-ROM/DVD Filesystems # # DOS/FAT/EXFAT/NT Filesystems # # CONFIG_MSDOS_FS is not set # CONFIG_VFAT_FS is not set # CONFIG_EXFAT_FS is not set # CONFIG_NTFS_FS is not set # end of DOS/FAT/EXFAT/NT Filesystems # # Pseudo filesystems # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_SYSCTL=y CONFIG_PROC_PAGE_MONITOR=y CONFIG_PROC_CHILDREN=y CONFIG_PROC_PID_ARCH_STATUS=y CONFIG_KERNFS=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y # CONFIG_CONFIGFS_FS is not set # end of Pseudo filesystems CONFIG_MISC_FILESYSTEMS=y # CONFIG_ORANGEFS_FS is not set # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set CONFIG_SQUASHFS=y CONFIG_SQUASHFS_FILE_CACHE=y # CONFIG_SQUASHFS_FILE_DIRECT is not set CONFIG_SQUASHFS_DECOMP_SINGLE=y # CONFIG_SQUASHFS_DECOMP_MULTI is not set # CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_ZLIB=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y CONFIG_SQUASHFS_ZSTD=y # CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set # CONFIG_SQUASHFS_EMBEDDED is not set CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_PSTORE=y CONFIG_PSTORE_DEFLATE_COMPRESS=y # CONFIG_PSTORE_LZO_COMPRESS is not set # CONFIG_PSTORE_LZ4_COMPRESS is not set # CONFIG_PSTORE_LZ4HC_COMPRESS is not set # CONFIG_PSTORE_842_COMPRESS is not set # CONFIG_PSTORE_ZSTD_COMPRESS is not set CONFIG_PSTORE_COMPRESS=y CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT=y CONFIG_PSTORE_COMPRESS_DEFAULT="deflate" # CONFIG_PSTORE_CONSOLE is not set # CONFIG_PSTORE_PMSG is not set # CONFIG_PSTORE_RAM is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set # CONFIG_EROFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V2 is not set # CONFIG_NFS_V3 is not set CONFIG_NFS_V4=y CONFIG_NFS_SWAP=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_PNFS_FILE_LAYOUT=y CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org" # CONFIG_NFS_V4_1_MIGRATION is not set CONFIG_NFS_V4_SECURITY_LABEL=y CONFIG_ROOT_NFS=y # CONFIG_NFS_USE_LEGACY_DNS is not set CONFIG_NFS_USE_KERNEL_DNS=y CONFIG_NFS_DISABLE_UDP_SUPPORT=y # CONFIG_NFS_V4_2_READ_PLUS is not set # CONFIG_NFSD is not set CONFIG_GRACE_PERIOD=y CONFIG_LOCKD=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y CONFIG_SUNRPC_BACKCHANNEL=y CONFIG_SUNRPC_SWAP=y # CONFIG_SUNRPC_DEBUG is not set # CONFIG_CEPH_FS is not set # CONFIG_CIFS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set CONFIG_NLS=y CONFIG_NLS_DEFAULT="utf8" # CONFIG_NLS_CODEPAGE_437 is not set # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set # CONFIG_NLS_CODEPAGE_850 is not set # CONFIG_NLS_CODEPAGE_852 is not set # CONFIG_NLS_CODEPAGE_855 is not set # CONFIG_NLS_CODEPAGE_857 is not set # CONFIG_NLS_CODEPAGE_860 is not set # CONFIG_NLS_CODEPAGE_861 is not set # CONFIG_NLS_CODEPAGE_862 is not set # CONFIG_NLS_CODEPAGE_863 is not set # CONFIG_NLS_CODEPAGE_864 is not set # CONFIG_NLS_CODEPAGE_865 is not set # CONFIG_NLS_CODEPAGE_866 is not set # CONFIG_NLS_CODEPAGE_869 is not set # CONFIG_NLS_CODEPAGE_936 is not set # CONFIG_NLS_CODEPAGE_950 is not set # CONFIG_NLS_CODEPAGE_932 is not set # CONFIG_NLS_CODEPAGE_949 is not set # CONFIG_NLS_CODEPAGE_874 is not set # CONFIG_NLS_ISO8859_8 is not set # CONFIG_NLS_CODEPAGE_1250 is not set # CONFIG_NLS_CODEPAGE_1251 is not set # CONFIG_NLS_ASCII is not set # CONFIG_NLS_ISO8859_1 is not set # CONFIG_NLS_ISO8859_2 is not set # CONFIG_NLS_ISO8859_3 is not set # CONFIG_NLS_ISO8859_4 is not set # CONFIG_NLS_ISO8859_5 is not set # CONFIG_NLS_ISO8859_6 is not set # CONFIG_NLS_ISO8859_7 is not set # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set # CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_MAC_ROMAN is not set # CONFIG_NLS_MAC_CELTIC is not set # CONFIG_NLS_MAC_CENTEURO is not set # CONFIG_NLS_MAC_CROATIAN is not set # CONFIG_NLS_MAC_CYRILLIC is not set # CONFIG_NLS_MAC_GAELIC is not set # CONFIG_NLS_MAC_GREEK is not set # CONFIG_NLS_MAC_ICELAND is not set # CONFIG_NLS_MAC_INUIT is not set # CONFIG_NLS_MAC_ROMANIAN is not set # CONFIG_NLS_MAC_TURKISH is not set # CONFIG_NLS_UTF8 is not set # CONFIG_UNICODE is not set CONFIG_IO_WQ=y # end of File systems # # Security options # CONFIG_KEYS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=y # CONFIG_KEY_DH_OPERATIONS is not set # CONFIG_SECURITY_DMESG_RESTRICT is not set CONFIG_SECURITY=y CONFIG_SECURITY_WRITABLE_HOOKS=y CONFIG_SECURITYFS=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_NETWORK_XFRM=y # CONFIG_SECURITY_PATH is not set CONFIG_LSM_MMAP_MIN_ADDR=65536 CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y CONFIG_HARDENED_USERCOPY=y CONFIG_HARDENED_USERCOPY_FALLBACK=y CONFIG_FORTIFY_SOURCE=y # CONFIG_STATIC_USERMODEHELPER is not set CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_SELINUX_DEVELOP=y CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9 CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256 # CONFIG_SECURITY_SMACK is not set # CONFIG_SECURITY_TOMOYO is not set # CONFIG_SECURITY_APPARMOR is not set # CONFIG_SECURITY_LOADPIN is not set # CONFIG_SECURITY_YAMA is not set # CONFIG_SECURITY_SAFESETID is not set # CONFIG_SECURITY_LOCKDOWN_LSM is not set # CONFIG_INTEGRITY is not set CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFAULT_SECURITY_DAC is not set CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor,bpf" # # Kernel hardening options # # # Memory initialization # CONFIG_INIT_STACK_NONE=y # CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set # CONFIG_INIT_ON_FREE_DEFAULT_ON is not set # end of Memory initialization # end of Kernel hardening options # end of Security options CONFIG_CRYPTO=y # # Crypto core or helper # CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_SKCIPHER=y CONFIG_CRYPTO_SKCIPHER2=y CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_RNG=y CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_RNG_DEFAULT=y CONFIG_CRYPTO_AKCIPHER2=y CONFIG_CRYPTO_AKCIPHER=y CONFIG_CRYPTO_KPP2=y CONFIG_CRYPTO_KPP=y CONFIG_CRYPTO_ACOMP2=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_USER is not set CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_NULL2=y # CONFIG_CRYPTO_PCRYPT is not set # CONFIG_CRYPTO_CRYPTD is not set # CONFIG_CRYPTO_AUTHENC is not set # # Public-key cryptography # CONFIG_CRYPTO_RSA=y CONFIG_CRYPTO_DH=y CONFIG_CRYPTO_ECC=y CONFIG_CRYPTO_ECDH=y # CONFIG_CRYPTO_ECRDSA is not set # CONFIG_CRYPTO_SM2 is not set # CONFIG_CRYPTO_CURVE25519 is not set # CONFIG_CRYPTO_CURVE25519_X86 is not set # # Authenticated Encryption with Associated Data # # CONFIG_CRYPTO_CCM is not set # CONFIG_CRYPTO_GCM is not set # CONFIG_CRYPTO_CHACHA20POLY1305 is not set # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set CONFIG_CRYPTO_SEQIV=y # CONFIG_CRYPTO_ECHAINIV is not set # # Block modes # CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_CFB is not set CONFIG_CRYPTO_CTR=y CONFIG_CRYPTO_CTS=y CONFIG_CRYPTO_ECB=y # CONFIG_CRYPTO_LRW is not set # CONFIG_CRYPTO_OFB is not set # CONFIG_CRYPTO_PCBC is not set CONFIG_CRYPTO_XTS=y # CONFIG_CRYPTO_KEYWRAP is not set # CONFIG_CRYPTO_NHPOLY1305_SSE2 is not set # CONFIG_CRYPTO_NHPOLY1305_AVX2 is not set # CONFIG_CRYPTO_ADIANTUM is not set # CONFIG_CRYPTO_ESSIV is not set # # Hash modes # # CONFIG_CRYPTO_CMAC is not set CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_XCBC is not set # CONFIG_CRYPTO_VMAC is not set # # Digest # CONFIG_CRYPTO_CRC32C=y # CONFIG_CRYPTO_CRC32C_INTEL is not set # CONFIG_CRYPTO_CRC32 is not set # CONFIG_CRYPTO_CRC32_PCLMUL is not set CONFIG_CRYPTO_XXHASH=y # CONFIG_CRYPTO_BLAKE2B is not set # CONFIG_CRYPTO_BLAKE2S is not set # CONFIG_CRYPTO_BLAKE2S_X86 is not set CONFIG_CRYPTO_CRCT10DIF=y CONFIG_CRYPTO_CRCT10DIF_PCLMUL=y # CONFIG_CRYPTO_GHASH is not set # CONFIG_CRYPTO_POLY1305 is not set # CONFIG_CRYPTO_POLY1305_X86_64 is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set # CONFIG_CRYPTO_RMD128 is not set # CONFIG_CRYPTO_RMD160 is not set # CONFIG_CRYPTO_RMD256 is not set # CONFIG_CRYPTO_RMD320 is not set CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA1_SSSE3=y CONFIG_CRYPTO_SHA256_SSSE3=y CONFIG_CRYPTO_SHA512_SSSE3=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y # CONFIG_CRYPTO_SHA3 is not set # CONFIG_CRYPTO_SM3 is not set # CONFIG_CRYPTO_STREEBOG is not set # CONFIG_CRYPTO_TGR192 is not set # CONFIG_CRYPTO_WP512 is not set # CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set # # Ciphers # CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=y # CONFIG_CRYPTO_AES_NI_INTEL is not set # CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_BLOWFISH_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_CAMELLIA_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 is not set # CONFIG_CRYPTO_CAST5 is not set # CONFIG_CRYPTO_CAST5_AVX_X86_64 is not set # CONFIG_CRYPTO_CAST6 is not set # CONFIG_CRYPTO_CAST6_AVX_X86_64 is not set # CONFIG_CRYPTO_DES is not set # CONFIG_CRYPTO_DES3_EDE_X86_64 is not set # CONFIG_CRYPTO_FCRYPT is not set # CONFIG_CRYPTO_SALSA20 is not set # CONFIG_CRYPTO_CHACHA20 is not set # CONFIG_CRYPTO_CHACHA20_X86_64 is not set # CONFIG_CRYPTO_SERPENT is not set # CONFIG_CRYPTO_SERPENT_SSE2_X86_64 is not set # CONFIG_CRYPTO_SERPENT_AVX_X86_64 is not set # CONFIG_CRYPTO_SERPENT_AVX2_X86_64 is not set # CONFIG_CRYPTO_SM4 is not set # CONFIG_CRYPTO_TWOFISH is not set # CONFIG_CRYPTO_TWOFISH_X86_64 is not set # CONFIG_CRYPTO_TWOFISH_X86_64_3WAY is not set # CONFIG_CRYPTO_TWOFISH_AVX_X86_64 is not set # # Compression # CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_842 is not set # CONFIG_CRYPTO_LZ4 is not set # CONFIG_CRYPTO_LZ4HC is not set # CONFIG_CRYPTO_ZSTD is not set # # Random Number Generation # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DRBG_MENU=y CONFIG_CRYPTO_DRBG_HMAC=y CONFIG_CRYPTO_DRBG_HASH=y CONFIG_CRYPTO_DRBG_CTR=y CONFIG_CRYPTO_DRBG=y CONFIG_CRYPTO_JITTERENTROPY=y # CONFIG_CRYPTO_USER_API_HASH is not set # CONFIG_CRYPTO_USER_API_SKCIPHER is not set # CONFIG_CRYPTO_USER_API_RNG is not set # CONFIG_CRYPTO_USER_API_AEAD is not set CONFIG_CRYPTO_HASH_INFO=y # CONFIG_CRYPTO_HW is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y # CONFIG_PKCS8_PRIVATE_KEY_PARSER is not set CONFIG_PKCS7_MESSAGE_PARSER=y # # Certificates for signature checking # CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_SYSTEM_TRUSTED_KEYS="" # CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set # CONFIG_SECONDARY_TRUSTED_KEYRING is not set CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_SYSTEM_BLACKLIST_HASH_LIST="" # CONFIG_SYSTEM_REVOCATION_LIST is not set # end of Certificates for signature checking # # Library routines # # CONFIG_PACKING is not set CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y CONFIG_GENERIC_NET_UTILS=y CONFIG_GENERIC_FIND_FIRST_BIT=y # CONFIG_CORDIC is not set # CONFIG_PRIME_NUMBERS is not set CONFIG_RATIONAL=y CONFIG_GENERIC_PCI_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y CONFIG_ARCH_HAS_FAST_MULTIPLIER=y CONFIG_ARCH_USE_SYM_ANNOTATIONS=y # # Crypto library routines # CONFIG_CRYPTO_LIB_AES=y CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y # CONFIG_CRYPTO_LIB_CHACHA is not set # CONFIG_CRYPTO_LIB_CURVE25519 is not set CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11 # CONFIG_CRYPTO_LIB_POLY1305 is not set # CONFIG_CRYPTO_LIB_CHACHA20POLY1305 is not set CONFIG_CRYPTO_LIB_SHA256=y # end of Crypto library routines CONFIG_LIB_MEMNEQ=y CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_T10DIF=y # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC32_SELFTEST is not set CONFIG_CRC32_SLICEBY8=y # CONFIG_CRC32_SLICEBY4 is not set # CONFIG_CRC32_SARWATE is not set # CONFIG_CRC32_BIT is not set # CONFIG_CRC64 is not set # CONFIG_CRC4 is not set # CONFIG_CRC7 is not set CONFIG_LIBCRC32C=y # CONFIG_CRC8 is not set CONFIG_XXHASH=y # CONFIG_RANDOM32_SELFTEST is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_LZO_COMPRESS=y CONFIG_LZO_DECOMPRESS=y CONFIG_LZ4_DECOMPRESS=y CONFIG_ZSTD_DECOMPRESS=y CONFIG_XZ_DEC=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y CONFIG_XZ_DEC_BCJ=y # CONFIG_XZ_DEC_TEST is not set CONFIG_DECOMPRESS_GZIP=y CONFIG_DECOMPRESS_BZIP2=y CONFIG_DECOMPRESS_LZMA=y CONFIG_DECOMPRESS_XZ=y CONFIG_DECOMPRESS_LZO=y CONFIG_DECOMPRESS_LZ4=y CONFIG_DECOMPRESS_ZSTD=y CONFIG_XARRAY_MULTI=y CONFIG_ASSOCIATIVE_ARRAY=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT_MAP=y CONFIG_HAS_DMA=y # CONFIG_DMA_PAGE_TOUCHING is not set CONFIG_NEED_SG_DMA_LENGTH=y CONFIG_NEED_DMA_MAP_STATE=y CONFIG_ARCH_DMA_ADDR_T_64BIT=y CONFIG_SWIOTLB=y # CONFIG_DMA_API_DEBUG is not set CONFIG_SGL_ALLOC=y CONFIG_CPU_RMAP=y CONFIG_DQL=y CONFIG_NLATTR=y CONFIG_CLZ_TAB=y CONFIG_IRQ_POLL=y CONFIG_MPILIB=y CONFIG_OID_REGISTRY=y CONFIG_HAVE_GENERIC_VDSO=y CONFIG_GENERIC_GETTIMEOFDAY=y CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_SG_POOL=y CONFIG_ARCH_HAS_PMEM_API=y CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y CONFIG_ARCH_HAS_COPY_MC=y CONFIG_ARCH_STACKWALK=y CONFIG_SBITMAP=y # CONFIG_STRING_SELFTEST is not set # end of Library routines # # Kernel hacking # # # printk and dmesg options # CONFIG_PRINTK_TIME=y # CONFIG_PRINTK_CALLER is not set CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_DYNAMIC_DEBUG is not set # CONFIG_DYNAMIC_DEBUG_CORE is not set CONFIG_SYMBOLIC_ERRNAME=y CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options # # Compile-time checks and compiler options # # CONFIG_DEBUG_INFO is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FRAME_WARN=2048 CONFIG_STRIP_ASM_SYMS=y # CONFIG_READABLE_ASM is not set # CONFIG_HEADERS_INSTALL is not set CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_ARCH_WANT_FRAME_POINTERS=y CONFIG_FRAME_POINTER=y CONFIG_STACK_VALIDATION=y # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # end of Compile-time checks and compiler options # # Generic Kernel Debugging Instruments # CONFIG_MAGIC_SYSRQ=y CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 CONFIG_MAGIC_SYSRQ_SERIAL=y CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE="" CONFIG_DEBUG_FS=y CONFIG_DEBUG_FS_ALLOW_ALL=y # CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set # CONFIG_DEBUG_FS_ALLOW_NONE is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y # CONFIG_UBSAN is not set CONFIG_HAVE_ARCH_KCSAN=y CONFIG_HAVE_KCSAN_COMPILER=y # CONFIG_KCSAN is not set # end of Generic Kernel Debugging Instruments CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MISC=y # # Memory Debugging # # CONFIG_PAGE_EXTENSION is not set # CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_PAGE_OWNER is not set # CONFIG_PAGE_POISONING is not set # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_ARCH_HAS_DEBUG_WX=y # CONFIG_DEBUG_WX is not set CONFIG_GENERIC_PTDUMP=y # CONFIG_PTDUMP_DEBUGFS is not set # CONFIG_DEBUG_OBJECTS is not set # CONFIG_SLUB_DEBUG_ON is not set # CONFIG_SLUB_STATS is not set CONFIG_HAVE_DEBUG_KMEMLEAK=y # CONFIG_DEBUG_KMEMLEAK is not set # CONFIG_DEBUG_STACK_USAGE is not set CONFIG_SCHED_STACK_END_CHECK=y CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_VM_PGTABLE is not set CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y # CONFIG_DEBUG_VIRTUAL is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_PER_CPU_MAPS is not set CONFIG_HAVE_ARCH_KASAN=y CONFIG_HAVE_ARCH_KASAN_VMALLOC=y CONFIG_CC_HAS_KASAN_GENERIC=y CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y # CONFIG_KASAN is not set # end of Memory Debugging # CONFIG_DEBUG_SHIRQ is not set # # Debug Oops, Lockups and Hangs # # CONFIG_PANIC_ON_OOPS is not set CONFIG_PANIC_ON_OOPS_VALUE=0 CONFIG_PANIC_TIMEOUT=0 CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 CONFIG_HARDLOCKUP_DETECTOR_PERF=y CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y CONFIG_HARDLOCKUP_DETECTOR=y # CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0 CONFIG_DETECT_HUNG_TASK=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 # CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_WQ_WATCHDOG=y # end of Debug Oops, Lockups and Hangs # # Scheduler Debugging # # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_INFO=y # CONFIG_SCHEDSTATS is not set # end of Scheduler Debugging # CONFIG_DEBUG_TIMEKEEPING is not set # # Lock Debugging (spinlocks, mutexes, etc...) # CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_PROVE_LOCKING is not set # CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_MUTEXES is not set # CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set # CONFIG_DEBUG_RWSEMS is not set # CONFIG_DEBUG_LOCK_ALLOC is not set # CONFIG_DEBUG_ATOMIC_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set # CONFIG_SCF_TORTURE_TEST is not set # CONFIG_CSD_LOCK_WAIT_DEBUG is not set # end of Lock Debugging (spinlocks, mutexes, etc...) CONFIG_STACKTRACE=y # CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set # CONFIG_DEBUG_KOBJECT is not set # # Debug kernel data structures # CONFIG_DEBUG_LIST=y # CONFIG_DEBUG_PLIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_DEBUG_NOTIFIERS is not set CONFIG_BUG_ON_DATA_CORRUPTION=y # end of Debug kernel data structures # CONFIG_DEBUG_CREDENTIALS is not set # # RCU Debugging # # CONFIG_RCU_SCALE_TEST is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_REF_SCALE_TEST is not set CONFIG_RCU_CPU_STALL_TIMEOUT=59 # CONFIG_RCU_TRACE is not set # CONFIG_RCU_EQS_DEBUG is not set # end of RCU Debugging # CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set # CONFIG_LATENCYTOP is not set CONFIG_USER_STACKTRACE_SUPPORT=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_SYSCALL_TRACEPOINTS=y CONFIG_HAVE_FENTRY=y CONFIG_HAVE_C_RECORDMCOUNT=y CONFIG_TRACING_SUPPORT=y # CONFIG_FTRACE is not set # CONFIG_SAMPLES is not set CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y CONFIG_STRICT_DEVMEM=y # CONFIG_IO_STRICT_DEVMEM is not set # # x86 Debugging # CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y CONFIG_X86_VERBOSE_BOOTUP=y CONFIG_EARLY_PRINTK=y # CONFIG_DEBUG_TLBFLUSH is not set CONFIG_HAVE_MMIOTRACE_SUPPORT=y # CONFIG_X86_DECODER_SELFTEST is not set CONFIG_IO_DELAY_0X80=y # CONFIG_IO_DELAY_0XED is not set # CONFIG_IO_DELAY_UDELAY is not set # CONFIG_IO_DELAY_NONE is not set # CONFIG_DEBUG_BOOT_PARAMS is not set # CONFIG_CPA_DEBUG is not set # CONFIG_DEBUG_ENTRY is not set # CONFIG_DEBUG_NMI_SELFTEST is not set # CONFIG_X86_DEBUG_FPU is not set # CONFIG_UNWINDER_ORC is not set CONFIG_UNWINDER_FRAME_POINTER=y # end of x86 Debugging # # Kernel Testing and Coverage # # CONFIG_KUNIT is not set # CONFIG_NOTIFIER_ERROR_INJECTION is not set # CONFIG_FAULT_INJECTION is not set CONFIG_ARCH_HAS_KCOV=y CONFIG_CC_HAS_SANCOV_TRACE_PC=y # CONFIG_KCOV is not set # CONFIG_RUNTIME_TESTING_MENU is not set # CONFIG_MEMTEST is not set # end of Kernel Testing and Coverage # end of Kernel hacking ================================================ FILE: resources/guest_configs/microvm-kernel-ci-x86_64-5.10.config ================================================ CONFIG_CC_VERSION_TEXT="gcc10-gcc (GCC) 10.5.0 20230707 (Red Hat 10.5.0-1)" CONFIG_CC_IS_GCC=y CONFIG_GCC_VERSION=100500 CONFIG_LD_VERSION=235020000 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23502 CONFIG_LLD_VERSION=0 CONFIG_CC_CAN_LINK=y CONFIG_CC_HAS_ASM_GOTO=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y # # General setup # CONFIG_INIT_ENV_ARG_LIMIT=32 # CONFIG_COMPILE_TEST is not set CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_BUILD_SALT="" CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y CONFIG_HAVE_KERNEL_LZMA=y CONFIG_HAVE_KERNEL_XZ=y CONFIG_HAVE_KERNEL_LZO=y CONFIG_HAVE_KERNEL_LZ4=y CONFIG_HAVE_KERNEL_ZSTD=y CONFIG_KERNEL_GZIP=y # CONFIG_KERNEL_BZIP2 is not set # CONFIG_KERNEL_LZMA is not set # CONFIG_KERNEL_XZ is not set # CONFIG_KERNEL_LZO is not set # CONFIG_KERNEL_LZ4 is not set # CONFIG_KERNEL_ZSTD is not set CONFIG_DEFAULT_INIT="" CONFIG_DEFAULT_HOSTNAME="(none)" CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_WATCH_QUEUE is not set CONFIG_CROSS_MEMORY_ATTACH=y # CONFIG_USELIB is not set CONFIG_AUDIT=y CONFIG_HAVE_ARCH_AUDITSYSCALL=y CONFIG_AUDITSYSCALL=y # # IRQ subsystem # CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_IRQ_SHOW=y CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y CONFIG_GENERIC_PENDING_IRQ=y CONFIG_GENERIC_IRQ_MIGRATION=y CONFIG_HARDIRQS_SW_RESEND=y CONFIG_IRQ_DOMAIN=y CONFIG_IRQ_DOMAIN_HIERARCHY=y CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y CONFIG_GENERIC_IRQ_RESERVATION_MODE=y CONFIG_IRQ_FORCED_THREADING=y CONFIG_SPARSE_IRQ=y # CONFIG_GENERIC_IRQ_DEBUGFS is not set # end of IRQ subsystem CONFIG_CLOCKSOURCE_WATCHDOG=y CONFIG_ARCH_CLOCKSOURCE_INIT=y CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK=y CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y # # Timers subsystem # CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ_COMMON=y # CONFIG_HZ_PERIODIC is not set CONFIG_NO_HZ_IDLE=y # CONFIG_NO_HZ_FULL is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y # end of Timers subsystem CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set # # CPU/Task time and stats accounting # CONFIG_TICK_CPU_ACCOUNTING=y # CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set # CONFIG_IRQ_TIME_ACCOUNTING is not set CONFIG_HAVE_SCHED_AVG_IRQ=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_PSI_DEFAULT_DISABLED=y # end of CPU/Task time and stats accounting CONFIG_CPU_ISOLATION=y # # RCU Subsystem # CONFIG_TREE_RCU=y # CONFIG_RCU_EXPERT is not set CONFIG_SRCU=y CONFIG_TREE_SRCU=y CONFIG_TASKS_RCU_GENERIC=y CONFIG_TASKS_TRACE_RCU=y CONFIG_RCU_STALL_COMMON=y CONFIG_RCU_NEED_SEGCBLIST=y # end of RCU Subsystem CONFIG_BUILD_BIN2C=y # CONFIG_IKCONFIG is not set # CONFIG_IKHEADERS is not set CONFIG_LOG_BUF_SHIFT=17 CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y # # Scheduler features # # CONFIG_UCLAMP_TASK is not set # end of Scheduler features CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y CONFIG_CC_HAS_INT128=y CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y # CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set CONFIG_CGROUPS=y CONFIG_PAGE_COUNTER=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y # CONFIG_CGROUP_RDMA is not set CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y # CONFIG_CGROUP_DEBUG is not set CONFIG_SOCK_CGROUP_DATA=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_TIME_NS=y CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y # CONFIG_CHECKPOINT_RESTORE is not set CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y CONFIG_RD_XZ=y CONFIG_RD_LZO=y CONFIG_RD_LZ4=y CONFIG_RD_ZSTD=y # CONFIG_BOOT_CONFIG is not set CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_LD_ORPHAN_WARN=y CONFIG_SYSCTL=y CONFIG_HAVE_UID16=y CONFIG_SYSCTL_EXCEPTION_TRACE=y CONFIG_HAVE_PCSPKR_PLATFORM=y CONFIG_BPF=y # CONFIG_EXPERT is not set CONFIG_UID16=y CONFIG_MULTIUSER=y CONFIG_SGETMASK_SYSCALL=y CONFIG_SYSFS_SYSCALL=y CONFIG_FHANDLE=y CONFIG_POSIX_TIMERS=y CONFIG_PRINTK=y CONFIG_PRINTK_NMI=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_PCSPKR_PLATFORM=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_FUTEX_PI=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_AIO=y CONFIG_IO_URING=y CONFIG_ADVISE_SYSCALLS=y CONFIG_HAVE_ARCH_USERFAULTFD_WP=y CONFIG_MEMBARRIER=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y CONFIG_KALLSYMS_BASE_RELATIVE=y CONFIG_BPF_SYSCALL=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y CONFIG_BPF_UNPRIV_DEFAULT_OFF=y CONFIG_USERMODE_DRIVER=y CONFIG_BPF_PRELOAD=y CONFIG_USERFAULTFD=y CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y CONFIG_RSEQ=y # CONFIG_EMBEDDED is not set CONFIG_HAVE_PERF_EVENTS=y # # Kernel Performance Events And Counters # CONFIG_PERF_EVENTS=y # CONFIG_DEBUG_PERF_USE_VMALLOC is not set # end of Kernel Performance Events And Counters CONFIG_VM_EVENT_COUNTERS=y CONFIG_SLUB_DEBUG=y # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB is not set CONFIG_SLUB=y CONFIG_SLAB_MERGE_DEFAULT=y CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_SHUFFLE_PAGE_ALLOCATOR=y CONFIG_SLUB_CPU_PARTIAL=y CONFIG_PROFILING=y # end of General setup CONFIG_64BIT=y CONFIG_X86_64=y CONFIG_X86=y CONFIG_INSTRUCTION_DECODER=y CONFIG_OUTPUT_FORMAT="elf64-x86-64" CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_MMU=y CONFIG_ARCH_MMAP_RND_BITS_MIN=28 CONFIG_ARCH_MMAP_RND_BITS_MAX=32 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_ARCH_HAS_CPU_RELAX=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_ARCH_HAS_FILTER_PGPROT=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_ARCH_WANT_GENERAL_HUGETLB=y CONFIG_ZONE_DMA32=y CONFIG_AUDIT_ARCH=y CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_X86_64_SMP=y CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_PGTABLE_LEVELS=4 CONFIG_CC_HAS_SANE_STACKPROTECTOR=y # # Processor type and features # CONFIG_ZONE_DMA=y CONFIG_SMP=y CONFIG_X86_FEATURE_NAMES=y CONFIG_X86_X2APIC=y CONFIG_X86_MPPARSE=y # CONFIG_GOLDFISH is not set # CONFIG_X86_CPU_RESCTRL is not set # CONFIG_X86_EXTENDED_PLATFORM is not set # CONFIG_X86_AMD_PLATFORM_DEVICE is not set CONFIG_SCHED_OMIT_FRAME_POINTER=y CONFIG_HYPERVISOR_GUEST=y CONFIG_PARAVIRT=y # CONFIG_PARAVIRT_DEBUG is not set CONFIG_PARAVIRT_SPINLOCKS=y CONFIG_X86_HV_CALLBACK_VECTOR=y # CONFIG_XEN is not set CONFIG_KVM_GUEST=y CONFIG_ARCH_CPUIDLE_HALTPOLL=y CONFIG_PVH=y CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_PARAVIRT_CLOCK=y # CONFIG_ACRN_GUEST is not set # CONFIG_MK8 is not set # CONFIG_MPSC is not set # CONFIG_MCORE2 is not set # CONFIG_MATOM is not set CONFIG_GENERIC_CPU=y CONFIG_X86_INTERNODE_CACHE_SHIFT=6 CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_TSC=y CONFIG_X86_CMPXCHG64=y CONFIG_X86_CMOV=y CONFIG_X86_MINIMUM_CPU_FAMILY=64 CONFIG_X86_DEBUGCTLMSR=y CONFIG_IA32_FEAT_CTL=y CONFIG_X86_VMX_FEATURE_NAMES=y CONFIG_CPU_SUP_INTEL=y CONFIG_CPU_SUP_AMD=y CONFIG_CPU_SUP_HYGON=y CONFIG_CPU_SUP_CENTAUR=y CONFIG_CPU_SUP_ZHAOXIN=y CONFIG_HPET_TIMER=y CONFIG_DMI=y # CONFIG_MAXSMP is not set CONFIG_NR_CPUS_RANGE_BEGIN=2 CONFIG_NR_CPUS_RANGE_END=512 CONFIG_NR_CPUS_DEFAULT=64 CONFIG_NR_CPUS=64 CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y CONFIG_SCHED_MC_PRIO=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y # CONFIG_X86_MCE is not set # # Performance monitoring # # CONFIG_PERF_EVENTS_AMD_POWER is not set # end of Performance monitoring CONFIG_X86_16BIT=y CONFIG_X86_ESPFIX64=y CONFIG_X86_VSYSCALL_EMULATION=y CONFIG_X86_IOPL_IOPERM=y # CONFIG_MICROCODE is not set CONFIG_X86_MSR=y CONFIG_X86_CPUID=y # CONFIG_X86_5LEVEL is not set CONFIG_X86_DIRECT_GBPAGES=y # CONFIG_X86_CPA_STATISTICS is not set # CONFIG_AMD_MEM_ENCRYPT is not set CONFIG_NUMA=y # CONFIG_NUMA_EMU is not set CONFIG_NODES_SHIFT=10 CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_DEFAULT=y CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_ARCH_MEMORY_PROBE=y CONFIG_ARCH_PROC_KCORE_TEXT=y CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 # CONFIG_X86_PMEM_LEGACY is not set CONFIG_X86_CHECK_BIOS_CORRUPTION=y CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y CONFIG_X86_RESERVE_LOW=64 CONFIG_MTRR=y CONFIG_MTRR_SANITIZER=y CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1 CONFIG_X86_PAT=y CONFIG_ARCH_USES_PG_UNCACHED=y CONFIG_ARCH_RANDOM=y CONFIG_X86_SMAP=y CONFIG_X86_UMIP=y CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y CONFIG_X86_INTEL_TSX_MODE_OFF=y # CONFIG_X86_INTEL_TSX_MODE_ON is not set # CONFIG_X86_INTEL_TSX_MODE_AUTO is not set # CONFIG_EFI is not set # CONFIG_HZ_100 is not set CONFIG_HZ_250=y # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=250 CONFIG_SCHED_HRTICK=y # CONFIG_KEXEC is not set CONFIG_KEXEC_FILE=y CONFIG_ARCH_HAS_KEXEC_PURGATORY=y # CONFIG_KEXEC_SIG is not set # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x1000000 CONFIG_RELOCATABLE=y CONFIG_RANDOMIZE_BASE=y CONFIG_X86_NEED_RELOCS=y CONFIG_PHYSICAL_ALIGN=0x1000000 CONFIG_DYNAMIC_MEMORY_LAYOUT=y CONFIG_RANDOMIZE_MEMORY=y CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa CONFIG_HOTPLUG_CPU=y # CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set # CONFIG_DEBUG_HOTPLUG_CPU0 is not set # CONFIG_COMPAT_VDSO is not set CONFIG_LEGACY_VSYSCALL_EMULATE=y # CONFIG_LEGACY_VSYSCALL_XONLY is not set # CONFIG_LEGACY_VSYSCALL_NONE is not set # CONFIG_CMDLINE_BOOL is not set CONFIG_MODIFY_LDT_SYSCALL=y CONFIG_HAVE_LIVEPATCH=y # end of Processor type and features CONFIG_CC_HAS_RETURN_THUNK=y CONFIG_CPU_MITIGATIONS=y CONFIG_PAGE_TABLE_ISOLATION=y CONFIG_RETPOLINE=y CONFIG_RETHUNK=y CONFIG_CPU_UNRET_ENTRY=y CONFIG_CPU_IBPB_ENTRY=y CONFIG_CPU_IBRS_ENTRY=y CONFIG_CPU_SRSO=y # CONFIG_GDS_FORCE_MITIGATION is not set CONFIG_MITIGATION_RFDS=y CONFIG_ARCH_HAS_ADD_PAGES=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y CONFIG_USE_PERCPU_NUMA_NODE_ID=y CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y CONFIG_ARCH_ENABLE_THP_MIGRATION=y # # Power management and ACPI options # CONFIG_ARCH_HIBERNATION_HEADER=y # CONFIG_SUSPEND is not set CONFIG_HIBERNATE_CALLBACKS=y CONFIG_HIBERNATION=y CONFIG_HIBERNATION_SNAPSHOT_DEV=y CONFIG_PM_STD_PARTITION="" CONFIG_PM_SLEEP=y CONFIG_PM_SLEEP_SMP=y # CONFIG_PM_AUTOSLEEP is not set # CONFIG_PM_WAKELOCKS is not set CONFIG_PM=y # CONFIG_PM_DEBUG is not set CONFIG_PM_CLK=y # CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set # CONFIG_ENERGY_MODEL is not set CONFIG_ARCH_SUPPORTS_ACPI=y CONFIG_ACPI=y CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y # CONFIG_ACPI_DEBUGGER is not set # CONFIG_ACPI_SPCR_TABLE is not set CONFIG_ACPI_LPIT=y CONFIG_ACPI_SLEEP=y # CONFIG_ACPI_REV_OVERRIDE_POSSIBLE is not set # CONFIG_ACPI_EC_DEBUGFS is not set # CONFIG_ACPI_AC is not set # CONFIG_ACPI_BATTERY is not set # CONFIG_ACPI_BUTTON is not set # CONFIG_ACPI_TINY_POWER_BUTTON is not set # CONFIG_ACPI_FAN is not set # CONFIG_ACPI_TAD is not set # CONFIG_ACPI_DOCK is not set CONFIG_ACPI_CPU_FREQ_PSS=y CONFIG_ACPI_PROCESSOR_CSTATE=y CONFIG_ACPI_PROCESSOR_IDLE=y CONFIG_ACPI_CPPC_LIB=y CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y # CONFIG_ACPI_PROCESSOR_AGGREGATOR is not set # CONFIG_ACPI_THERMAL is not set CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y # CONFIG_ACPI_TABLE_UPGRADE is not set # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_CONTAINER=y # CONFIG_ACPI_HOTPLUG_MEMORY is not set # CONFIG_ACPI_SBS is not set # CONFIG_ACPI_HED is not set # CONFIG_ACPI_CUSTOM_METHOD is not set # CONFIG_ACPI_NFIT is not set # CONFIG_ACPI_NUMA is not set CONFIG_HAVE_ACPI_APEI=y CONFIG_HAVE_ACPI_APEI_NMI=y # CONFIG_ACPI_APEI is not set # CONFIG_ACPI_DPTF is not set # CONFIG_ACPI_CONFIGFS is not set # CONFIG_PMIC_OPREGION is not set CONFIG_X86_PM_TIMER=y # CONFIG_SFI is not set # # CPU Frequency scaling # CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_ATTR_SET=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_GOV_USERSPACE is not set # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y # # CPU frequency scaling drivers # CONFIG_X86_INTEL_PSTATE=y # CONFIG_X86_PCC_CPUFREQ is not set # CONFIG_X86_ACPI_CPUFREQ is not set # CONFIG_X86_SPEEDSTEP_CENTRINO is not set # CONFIG_X86_P4_CLOCKMOD is not set # # shared options # # end of CPU Frequency scaling # # CPU Idle # CONFIG_CPU_IDLE=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPU_IDLE_GOV_MENU=y # CONFIG_CPU_IDLE_GOV_TEO is not set CONFIG_CPU_IDLE_GOV_HALTPOLL=y CONFIG_HALTPOLL_CPUIDLE=y # end of CPU Idle CONFIG_INTEL_IDLE=y # end of Power management and ACPI options # # Bus options (PCI etc.) # CONFIG_ISA_DMA_API=y # CONFIG_X86_SYSFB is not set # end of Bus options (PCI etc.) # # Binary Emulations # CONFIG_IA32_EMULATION=y # CONFIG_X86_X32 is not set CONFIG_COMPAT_32=y CONFIG_COMPAT=y CONFIG_COMPAT_FOR_U64_ALIGNMENT=y CONFIG_SYSVIPC_COMPAT=y # end of Binary Emulations # # Firmware Drivers # # CONFIG_EDD is not set CONFIG_FIRMWARE_MEMMAP=y CONFIG_DMIID=y # CONFIG_DMI_SYSFS is not set CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y # CONFIG_ISCSI_IBFT is not set # CONFIG_FW_CFG_SYSFS is not set # CONFIG_GOOGLE_FIRMWARE is not set # # Tegra firmware driver # # end of Tegra firmware driver # end of Firmware Drivers CONFIG_HAVE_KVM=y # CONFIG_VIRTUALIZATION is not set CONFIG_AS_AVX512=y CONFIG_AS_SHA1_NI=y CONFIG_AS_SHA256_NI=y CONFIG_AS_TPAUSE=y CONFIG_ARCH_CONFIGURES_CPU_MITIGATIONS=y # # General architecture-dependent options # CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_HOTPLUG_SMT=y CONFIG_GENERIC_ENTRY=y # CONFIG_OPROFILE is not set CONFIG_HAVE_OPROFILE=y CONFIG_OPROFILE_NMI_TIMER=y CONFIG_JUMP_LABEL=y # CONFIG_STATIC_KEYS_SELFTEST is not set # CONFIG_STATIC_CALL_SELFTEST is not set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_ARCH_USE_BUILTIN_BSWAP=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_OPTPROBES=y CONFIG_HAVE_KPROBES_ON_FTRACE=y CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y CONFIG_HAVE_NMI=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_CONTIGUOUS=y CONFIG_GENERIC_SMP_IDLE_THREAD=y CONFIG_ARCH_HAS_FORTIFY_SOURCE=y CONFIG_ARCH_HAS_SET_MEMORY=y CONFIG_ARCH_HAS_SET_DIRECT_MAP=y CONFIG_ARCH_HAS_CPU_FINALIZE_INIT=y CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y CONFIG_HAVE_ASM_MODVERSIONS=y CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y CONFIG_HAVE_RSEQ=y CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y CONFIG_HAVE_HW_BREAKPOINT=y CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y CONFIG_HAVE_USER_RETURN_NOTIFIER=y CONFIG_HAVE_PERF_EVENTS_NMI=y CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y CONFIG_HAVE_PERF_REGS=y CONFIG_HAVE_PERF_USER_STACK_DUMP=y CONFIG_HAVE_ARCH_JUMP_LABEL=y CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y CONFIG_MMU_GATHER_TABLE_FREE=y CONFIG_MMU_GATHER_RCU_TABLE_FREE=y CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y CONFIG_HAVE_CMPXCHG_LOCAL=y CONFIG_HAVE_CMPXCHG_DOUBLE=y CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y CONFIG_HAVE_ARCH_SECCOMP=y CONFIG_HAVE_ARCH_SECCOMP_FILTER=y CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y CONFIG_HAVE_ARCH_STACKLEAK=y CONFIG_HAVE_STACKPROTECTOR=y CONFIG_STACKPROTECTOR=y CONFIG_STACKPROTECTOR_STRONG=y CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y CONFIG_HAVE_CONTEXT_TRACKING=y CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y CONFIG_HAVE_MOVE_PMD=y CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y CONFIG_HAVE_ARCH_HUGE_VMAP=y CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y CONFIG_HAVE_ARCH_MMAP_RND_BITS=y CONFIG_HAVE_EXIT_THREAD=y CONFIG_ARCH_MMAP_RND_BITS=28 CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y CONFIG_HAVE_STACK_VALIDATION=y CONFIG_HAVE_RELIABLE_STACKTRACE=y CONFIG_OLD_SIGSUSPEND3=y CONFIG_COMPAT_OLD_SIGACTION=y CONFIG_COMPAT_32BIT_TIME=y CONFIG_HAVE_ARCH_VMAP_STACK=y CONFIG_VMAP_STACK=y CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y CONFIG_STRICT_KERNEL_RWX=y CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y # CONFIG_LOCK_EVENT_COUNTS is not set CONFIG_ARCH_HAS_MEM_ENCRYPT=y CONFIG_HAVE_STATIC_CALL=y CONFIG_HAVE_STATIC_CALL_INLINE=y CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH=y # # GCOV-based kernel profiling # # CONFIG_GCOV_KERNEL is not set CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y # end of GCOV-based kernel profiling CONFIG_HAVE_GCC_PLUGINS=y # end of General architecture-dependent options CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 # CONFIG_MODULES is not set CONFIG_MODULES_TREE_LOOKUP=y CONFIG_BLOCK=y CONFIG_BLK_RQ_ALLOC_TIME=y CONFIG_BLK_SCSI_REQUEST=y CONFIG_BLK_CGROUP_RWSTAT=y CONFIG_BLK_DEV_BSG=y CONFIG_BLK_DEV_BSGLIB=y CONFIG_BLK_DEV_INTEGRITY=y # CONFIG_BLK_DEV_ZONED is not set CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set CONFIG_BLK_CMDLINE_PARSER=y CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # # Partition Types # CONFIG_PARTITION_ADVANCED=y # CONFIG_ACORN_PARTITION is not set # CONFIG_AIX_PARTITION is not set # CONFIG_OSF_PARTITION is not set # CONFIG_AMIGA_PARTITION is not set # CONFIG_ATARI_PARTITION is not set # CONFIG_MAC_PARTITION is not set # CONFIG_MSDOS_PARTITION is not set # CONFIG_LDM_PARTITION is not set # CONFIG_SGI_PARTITION is not set # CONFIG_ULTRIX_PARTITION is not set # CONFIG_SUN_PARTITION is not set # CONFIG_KARMA_PARTITION is not set # CONFIG_EFI_PARTITION is not set # CONFIG_SYSV68_PARTITION is not set # CONFIG_CMDLINE_PARTITION is not set # end of Partition Types CONFIG_BLOCK_COMPAT=y CONFIG_BLK_MQ_VIRTIO=y CONFIG_BLK_PM=y # # IO Schedulers # CONFIG_MQ_IOSCHED_DEADLINE=y CONFIG_MQ_IOSCHED_KYBER=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y # CONFIG_BFQ_CGROUP_DEBUG is not set # end of IO Schedulers CONFIG_PADATA=y CONFIG_ASN1=y CONFIG_INLINE_SPIN_UNLOCK_IRQ=y CONFIG_INLINE_READ_UNLOCK=y CONFIG_INLINE_READ_UNLOCK_IRQ=y CONFIG_INLINE_WRITE_UNLOCK=y CONFIG_INLINE_WRITE_UNLOCK_IRQ=y CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y CONFIG_MUTEX_SPIN_ON_OWNER=y CONFIG_RWSEM_SPIN_ON_OWNER=y CONFIG_LOCK_SPIN_ON_OWNER=y CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y CONFIG_QUEUED_SPINLOCKS=y CONFIG_ARCH_USE_QUEUED_RWLOCKS=y CONFIG_QUEUED_RWLOCKS=y CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y CONFIG_FREEZER=y # # Executable file formats # CONFIG_BINFMT_ELF=y CONFIG_COMPAT_BINFMT_ELF=y CONFIG_ELFCORE=y CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_SCRIPT=y CONFIG_BINFMT_MISC=y CONFIG_COREDUMP=y # end of Executable file formats # # Memory Management options # CONFIG_SELECT_MEMORY_MODEL=y CONFIG_SPARSEMEM_MANUAL=y CONFIG_SPARSEMEM=y CONFIG_NEED_MULTIPLE_NODES=y CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_HAVE_FAST_GUP=y CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_ISOLATION=y CONFIG_HAVE_BOOTMEM_INFO_NODE=y CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTPLUG_SPARSE=y # CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set CONFIG_MEMORY_HOTREMOVE=y CONFIG_MHP_MEMMAP_ON_MEMORY=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_MEMORY_BALLOON=y CONFIG_BALLOON_COMPACTION=y CONFIG_COMPACTION=y CONFIG_PAGE_REPORTING=y CONFIG_MIGRATION=y CONFIG_CONTIG_ALLOC=y CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_TRANSPARENT_HUGEPAGE=y # CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y CONFIG_ARCH_WANTS_THP_SWAP=y CONFIG_THP_SWAP=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y # CONFIG_CMA is not set CONFIG_ZSWAP=y # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo" CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set # CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud" # CONFIG_ZSWAP_DEFAULT_ON is not set CONFIG_ZPOOL=y CONFIG_ZBUD=y # CONFIG_Z3FOLD is not set # CONFIG_ZSMALLOC is not set CONFIG_GENERIC_EARLY_IOREMAP=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_PAGE_IDLE_FLAG=y # CONFIG_IDLE_PAGE_TRACKING is not set CONFIG_ARCH_HAS_PTE_DEVMAP=y # CONFIG_ZONE_DEVICE is not set CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y CONFIG_ARCH_HAS_PKEYS=y CONFIG_PERCPU_STATS=y # CONFIG_GUP_BENCHMARK is not set # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y # # Data Access Monitoring # CONFIG_DAMON=y CONFIG_DAMON_VADDR=y CONFIG_DAMON_PADDR=y CONFIG_DAMON_SYSFS=y CONFIG_DAMON_DBGFS=y # CONFIG_DAMON_RECLAIM is not set # CONFIG_DAMON_LRU_SORT is not set # end of Data Access Monitoring # end of Memory Management options CONFIG_NET=y CONFIG_NET_INGRESS=y CONFIG_SKB_EXTENSIONS=y # # Networking options # CONFIG_PACKET=y # CONFIG_PACKET_DIAG is not set CONFIG_UNIX=y CONFIG_UNIX_SCM=y # CONFIG_UNIX_DIAG is not set # CONFIG_TLS is not set CONFIG_XFRM=y CONFIG_XFRM_ALGO=y CONFIG_XFRM_USER=y # CONFIG_XFRM_USER_COMPAT is not set # CONFIG_XFRM_INTERFACE is not set CONFIG_XFRM_SUB_POLICY=y CONFIG_XFRM_MIGRATE=y CONFIG_XFRM_STATISTICS=y # CONFIG_NET_KEY is not set CONFIG_XDP_SOCKETS=y # CONFIG_XDP_SOCKETS_DIAG is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y # CONFIG_IP_FIB_TRIE_STATS is not set CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE_DEMUX is not set CONFIG_IP_MROUTE_COMMON=y CONFIG_IP_MROUTE=y CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y CONFIG_SYN_COOKIES=y # CONFIG_NET_IPVTI is not set # CONFIG_NET_FOU is not set # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_INET_UDP_DIAG is not set # CONFIG_INET_RAW_DIAG is not set CONFIG_INET_DIAG_DESTROY=y CONFIG_TCP_CONG_ADVANCED=y # CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y # CONFIG_TCP_CONG_WESTWOOD is not set # CONFIG_TCP_CONG_HTCP is not set # CONFIG_TCP_CONG_HSTCP is not set # CONFIG_TCP_CONG_HYBLA is not set # CONFIG_TCP_CONG_VEGAS is not set # CONFIG_TCP_CONG_NV is not set # CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_TCP_CONG_LP is not set # CONFIG_TCP_CONG_VENO is not set # CONFIG_TCP_CONG_YEAH is not set # CONFIG_TCP_CONG_ILLINOIS is not set # CONFIG_TCP_CONG_DCTCP is not set # CONFIG_TCP_CONG_CDG is not set # CONFIG_TCP_CONG_BBR is not set # CONFIG_TCP_CONG_BBR2 is not set CONFIG_DEFAULT_CUBIC=y # CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y # CONFIG_INET6_AH is not set # CONFIG_INET6_ESP is not set # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_IPV6_ILA is not set # CONFIG_IPV6_VTI is not set # CONFIG_IPV6_SIT is not set # CONFIG_IPV6_TUNNEL is not set CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_IPV6_SEG6_HMAC=y CONFIG_IPV6_SEG6_BPF=y # CONFIG_IPV6_RPL_LWTUNNEL is not set CONFIG_NETLABEL=y CONFIG_MPTCP=y CONFIG_INET_MPTCP_DIAG=y CONFIG_MPTCP_IPV6=y CONFIG_NETWORK_SECMARK=y CONFIG_NET_PTP_CLASSIFY=y CONFIG_NETWORK_PHY_TIMESTAMPING=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_BRIDGE_NETFILTER=y # # Core Netfilter Configuration # CONFIG_NETFILTER_INGRESS=y CONFIG_NETFILTER_FAMILY_BRIDGE=y # CONFIG_NETFILTER_NETLINK_ACCT is not set # CONFIG_NETFILTER_NETLINK_QUEUE is not set # CONFIG_NETFILTER_NETLINK_LOG is not set # CONFIG_NETFILTER_NETLINK_OSF is not set CONFIG_NF_CONNTRACK=y CONFIG_NF_LOG_COMMON=y # CONFIG_NF_LOG_NETDEV is not set CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y CONFIG_NF_CONNTRACK_TIMESTAMP=y CONFIG_NF_CONNTRACK_LABELS=y CONFIG_NF_CT_PROTO_DCCP=y CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y # CONFIG_NF_CONNTRACK_AMANDA is not set # CONFIG_NF_CONNTRACK_FTP is not set # CONFIG_NF_CONNTRACK_H323 is not set # CONFIG_NF_CONNTRACK_IRC is not set # CONFIG_NF_CONNTRACK_NETBIOS_NS is not set # CONFIG_NF_CONNTRACK_SNMP is not set # CONFIG_NF_CONNTRACK_PPTP is not set # CONFIG_NF_CONNTRACK_SANE is not set # CONFIG_NF_CONNTRACK_SIP is not set # CONFIG_NF_CONNTRACK_TFTP is not set # CONFIG_NF_CT_NETLINK is not set # CONFIG_NF_CT_NETLINK_TIMEOUT is not set CONFIG_NF_NAT=y CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_NAT_MASQUERADE=y CONFIG_NETFILTER_SYNPROXY=y # CONFIG_NF_TABLES is not set CONFIG_NETFILTER_XTABLES=y # # Xtables combined modules # # CONFIG_NETFILTER_XT_MARK is not set # CONFIG_NETFILTER_XT_CONNMARK is not set # # Xtables targets # # CONFIG_NETFILTER_XT_TARGET_AUDIT is not set # CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set # CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set # CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set # CONFIG_NETFILTER_XT_TARGET_CONNSECMARK is not set # CONFIG_NETFILTER_XT_TARGET_DSCP is not set # CONFIG_NETFILTER_XT_TARGET_HL is not set # CONFIG_NETFILTER_XT_TARGET_HMARK is not set # CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set # CONFIG_NETFILTER_XT_TARGET_LOG is not set # CONFIG_NETFILTER_XT_TARGET_MARK is not set CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_TARGET_NETMAP=y # CONFIG_NETFILTER_XT_TARGET_NFLOG is not set # CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set # CONFIG_NETFILTER_XT_TARGET_RATEEST is not set CONFIG_NETFILTER_XT_TARGET_REDIRECT=y CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y # CONFIG_NETFILTER_XT_TARGET_TEE is not set # CONFIG_NETFILTER_XT_TARGET_TPROXY is not set # CONFIG_NETFILTER_XT_TARGET_SECMARK is not set # CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set # CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set # # Xtables matches # CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y # CONFIG_NETFILTER_XT_MATCH_BPF is not set # CONFIG_NETFILTER_XT_MATCH_CGROUP is not set # CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set # CONFIG_NETFILTER_XT_MATCH_COMMENT is not set # CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set # CONFIG_NETFILTER_XT_MATCH_CONNLABEL is not set # CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set # CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y # CONFIG_NETFILTER_XT_MATCH_CPU is not set # CONFIG_NETFILTER_XT_MATCH_DCCP is not set # CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set # CONFIG_NETFILTER_XT_MATCH_DSCP is not set # CONFIG_NETFILTER_XT_MATCH_ECN is not set # CONFIG_NETFILTER_XT_MATCH_ESP is not set # CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set # CONFIG_NETFILTER_XT_MATCH_HELPER is not set # CONFIG_NETFILTER_XT_MATCH_HL is not set # CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set # CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set # CONFIG_NETFILTER_XT_MATCH_L2TP is not set # CONFIG_NETFILTER_XT_MATCH_LENGTH is not set # CONFIG_NETFILTER_XT_MATCH_LIMIT is not set # CONFIG_NETFILTER_XT_MATCH_MAC is not set # CONFIG_NETFILTER_XT_MATCH_MARK is not set # CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set # CONFIG_NETFILTER_XT_MATCH_NFACCT is not set # CONFIG_NETFILTER_XT_MATCH_OSF is not set # CONFIG_NETFILTER_XT_MATCH_OWNER is not set # CONFIG_NETFILTER_XT_MATCH_POLICY is not set # CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set # CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set # CONFIG_NETFILTER_XT_MATCH_QUOTA is not set # CONFIG_NETFILTER_XT_MATCH_RATEEST is not set # CONFIG_NETFILTER_XT_MATCH_REALM is not set # CONFIG_NETFILTER_XT_MATCH_RECENT is not set # CONFIG_NETFILTER_XT_MATCH_SCTP is not set # CONFIG_NETFILTER_XT_MATCH_SOCKET is not set # CONFIG_NETFILTER_XT_MATCH_STATE is not set # CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set # CONFIG_NETFILTER_XT_MATCH_STRING is not set # CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set # CONFIG_NETFILTER_XT_MATCH_TIME is not set # CONFIG_NETFILTER_XT_MATCH_U32 is not set # end of Core Netfilter Configuration # CONFIG_IP_SET is not set # CONFIG_IP_VS is not set # # IP: Netfilter Configuration # CONFIG_NF_DEFRAG_IPV4=y # CONFIG_NF_SOCKET_IPV4 is not set # CONFIG_NF_TPROXY_IPV4 is not set # CONFIG_NF_DUP_IPV4 is not set # CONFIG_NF_LOG_ARP is not set # CONFIG_NF_LOG_IPV4 is not set CONFIG_NF_REJECT_IPV4=y CONFIG_IP_NF_IPTABLES=y # CONFIG_IP_NF_MATCH_AH is not set # CONFIG_IP_NF_MATCH_ECN is not set # CONFIG_IP_NF_MATCH_RPFILTER is not set # CONFIG_IP_NF_MATCH_TTL is not set CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y CONFIG_IP_NF_TARGET_SYNPROXY=y CONFIG_IP_NF_NAT=y CONFIG_IP_NF_TARGET_MASQUERADE=y CONFIG_IP_NF_TARGET_NETMAP=y CONFIG_IP_NF_TARGET_REDIRECT=y CONFIG_IP_NF_MANGLE=y # CONFIG_IP_NF_TARGET_CLUSTERIP is not set # CONFIG_IP_NF_TARGET_ECN is not set # CONFIG_IP_NF_TARGET_TTL is not set # CONFIG_IP_NF_RAW is not set # CONFIG_IP_NF_SECURITY is not set # CONFIG_IP_NF_ARPTABLES is not set # end of IP: Netfilter Configuration # # IPv6: Netfilter Configuration # # CONFIG_NF_SOCKET_IPV6 is not set # CONFIG_NF_TPROXY_IPV6 is not set # CONFIG_NF_DUP_IPV6 is not set CONFIG_NF_REJECT_IPV6=y CONFIG_NF_LOG_IPV6=y CONFIG_IP6_NF_IPTABLES=y # CONFIG_IP6_NF_MATCH_AH is not set # CONFIG_IP6_NF_MATCH_EUI64 is not set # CONFIG_IP6_NF_MATCH_FRAG is not set # CONFIG_IP6_NF_MATCH_OPTS is not set # CONFIG_IP6_NF_MATCH_HL is not set # CONFIG_IP6_NF_MATCH_IPV6HEADER is not set # CONFIG_IP6_NF_MATCH_MH is not set # CONFIG_IP6_NF_MATCH_RPFILTER is not set # CONFIG_IP6_NF_MATCH_RT is not set # CONFIG_IP6_NF_MATCH_SRH is not set # CONFIG_IP6_NF_TARGET_HL is not set CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_TARGET_SYNPROXY=y CONFIG_IP6_NF_MANGLE=y # CONFIG_IP6_NF_RAW is not set # CONFIG_IP6_NF_SECURITY is not set CONFIG_IP6_NF_NAT=y CONFIG_IP6_NF_TARGET_MASQUERADE=y # CONFIG_IP6_NF_TARGET_NPT is not set # end of IPv6: Netfilter Configuration CONFIG_NF_DEFRAG_IPV6=y # CONFIG_NF_CONNTRACK_BRIDGE is not set # CONFIG_BRIDGE_NF_EBTABLES is not set CONFIG_BPFILTER=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_L2TP is not set CONFIG_STP=y CONFIG_BRIDGE=y CONFIG_BRIDGE_IGMP_SNOOPING=y # CONFIG_BRIDGE_MRP is not set CONFIG_HAVE_NET_DSA=y # CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set # CONFIG_PHONET is not set # CONFIG_6LOWPAN is not set # CONFIG_IEEE802154 is not set CONFIG_NET_SCHED=y # # Queueing/Scheduling # # CONFIG_NET_SCH_CBQ is not set # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set # CONFIG_NET_SCH_MULTIQ is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFB is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set # CONFIG_NET_SCH_TBF is not set # CONFIG_NET_SCH_CBS is not set # CONFIG_NET_SCH_ETF is not set # CONFIG_NET_SCH_TAPRIO is not set # CONFIG_NET_SCH_GRED is not set # CONFIG_NET_SCH_DSMARK is not set # CONFIG_NET_SCH_NETEM is not set # CONFIG_NET_SCH_DRR is not set # CONFIG_NET_SCH_MQPRIO is not set # CONFIG_NET_SCH_SKBPRIO is not set # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_QFQ is not set # CONFIG_NET_SCH_CODEL is not set # CONFIG_NET_SCH_FQ_CODEL is not set # CONFIG_NET_SCH_CAKE is not set # CONFIG_NET_SCH_FQ is not set # CONFIG_NET_SCH_HHF is not set # CONFIG_NET_SCH_PIE is not set # CONFIG_NET_SCH_INGRESS is not set # CONFIG_NET_SCH_PLUG is not set # CONFIG_NET_SCH_ETS is not set # CONFIG_NET_SCH_DEFAULT is not set # # Classification # CONFIG_NET_CLS=y # CONFIG_NET_CLS_BASIC is not set # CONFIG_NET_CLS_ROUTE4 is not set # CONFIG_NET_CLS_FW is not set # CONFIG_NET_CLS_U32 is not set # CONFIG_NET_CLS_FLOW is not set # CONFIG_NET_CLS_CGROUP is not set # CONFIG_NET_CLS_BPF is not set # CONFIG_NET_CLS_FLOWER is not set # CONFIG_NET_CLS_MATCHALL is not set CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 # CONFIG_NET_EMATCH_CMP is not set # CONFIG_NET_EMATCH_NBYTE is not set # CONFIG_NET_EMATCH_U32 is not set # CONFIG_NET_EMATCH_META is not set # CONFIG_NET_EMATCH_TEXT is not set # CONFIG_NET_EMATCH_IPT is not set CONFIG_NET_CLS_ACT=y # CONFIG_NET_ACT_POLICE is not set # CONFIG_NET_ACT_GACT is not set # CONFIG_NET_ACT_MIRRED is not set # CONFIG_NET_ACT_SAMPLE is not set # CONFIG_NET_ACT_IPT is not set # CONFIG_NET_ACT_NAT is not set # CONFIG_NET_ACT_PEDIT is not set # CONFIG_NET_ACT_SIMP is not set # CONFIG_NET_ACT_SKBEDIT is not set # CONFIG_NET_ACT_CSUM is not set # CONFIG_NET_ACT_MPLS is not set # CONFIG_NET_ACT_VLAN is not set # CONFIG_NET_ACT_BPF is not set # CONFIG_NET_ACT_CONNMARK is not set # CONFIG_NET_ACT_CTINFO is not set # CONFIG_NET_ACT_SKBMOD is not set # CONFIG_NET_ACT_IFE is not set # CONFIG_NET_ACT_TUNNEL_KEY is not set # CONFIG_NET_ACT_GATE is not set # CONFIG_NET_TC_SKB_EXT is not set CONFIG_NET_SCH_FIFO=y CONFIG_DCB=y CONFIG_DNS_RESOLVER=y # CONFIG_BATMAN_ADV is not set # CONFIG_OPENVSWITCH is not set CONFIG_VSOCKETS=y # CONFIG_VSOCKETS_DIAG is not set # CONFIG_VSOCKETS_LOOPBACK is not set CONFIG_VIRTIO_VSOCKETS=y CONFIG_VIRTIO_VSOCKETS_COMMON=y # CONFIG_NETLINK_DIAG is not set CONFIG_MPLS=y # CONFIG_NET_MPLS_GSO is not set # CONFIG_MPLS_ROUTING is not set # CONFIG_NET_NSH is not set # CONFIG_HSR is not set # CONFIG_NET_SWITCHDEV is not set CONFIG_NET_L3_MASTER_DEV=y # CONFIG_QRTR is not set # CONFIG_NET_NCSI is not set CONFIG_RPS=y CONFIG_RFS_ACCEL=y CONFIG_XPS=y CONFIG_CGROUP_NET_PRIO=y CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_NET_FLOW_LIMIT=y # # Network testing # # CONFIG_NET_PKTGEN is not set # end of Network testing # end of Networking options # CONFIG_HAMRADIO is not set # CONFIG_CAN is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set # CONFIG_AF_KCM is not set CONFIG_STREAM_PARSER=y CONFIG_FIB_RULES=y # CONFIG_WIRELESS is not set # CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set # CONFIG_CAIF is not set # CONFIG_CEPH_LIB is not set # CONFIG_NFC is not set # CONFIG_PSAMPLE is not set # CONFIG_NET_IFE is not set CONFIG_LWTUNNEL=y CONFIG_LWTUNNEL_BPF=y CONFIG_DST_CACHE=y CONFIG_GRO_CELLS=y CONFIG_NET_SOCK_MSG=y CONFIG_FAILOVER=y CONFIG_ETHTOOL_NETLINK=y CONFIG_HAVE_EBPF_JIT=y # # Device Drivers # CONFIG_HAVE_EISA=y # CONFIG_EISA is not set CONFIG_HAVE_PCI=y # CONFIG_PCI is not set # CONFIG_PCCARD is not set # # Generic Driver Options # CONFIG_UEVENT_HELPER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y # # Firmware loader # CONFIG_FW_LOADER=y CONFIG_FW_LOADER_PAGED_BUF=y CONFIG_EXTRA_FIRMWARE="" CONFIG_FW_LOADER_USER_HELPER=y # CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set # CONFIG_FW_LOADER_COMPRESS is not set CONFIG_FW_CACHE=y # end of Firmware loader CONFIG_ALLOW_DEV_COREDUMP=y # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set CONFIG_GENERIC_CPU_AUTOPROBE=y CONFIG_GENERIC_CPU_VULNERABILITIES=y CONFIG_DMA_SHARED_BUFFER=y # CONFIG_DMA_FENCE_TRACE is not set # end of Generic Driver Options # # Bus devices # # CONFIG_MHI_BUS is not set # end of Bus devices CONFIG_CONNECTOR=y CONFIG_PROC_EVENTS=y # CONFIG_GNSS is not set # CONFIG_MTD is not set # CONFIG_OF is not set CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y # CONFIG_PARPORT is not set CONFIG_PNP=y CONFIG_PNP_DEBUG_MESSAGES=y # # Protocols # CONFIG_PNPACPI=y CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set # CONFIG_BLK_DEV_FD is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 # CONFIG_BLK_DEV_CRYPTOLOOP is not set # CONFIG_BLK_DEV_DRBD is not set # CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_RAM is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_VIRTIO_BLK=y # CONFIG_BLK_DEV_RBD is not set # # NVME Support # # CONFIG_NVME_FC is not set # CONFIG_NVME_TCP is not set # end of NVME Support # # Misc devices # # CONFIG_DUMMY_IRQ is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_SRAM is not set # CONFIG_XILINX_SDFEC is not set # CONFIG_PVPANIC is not set CONFIG_SYSGENID=y # CONFIG_C2PORT is not set # # EEPROM support # # CONFIG_EEPROM_93CX6 is not set # end of EEPROM support # # Texas Instruments shared transport line discipline # # end of Texas Instruments shared transport line discipline # # Altera FPGA firmware download module (requires I2C) # # CONFIG_ECHO is not set # end of Misc devices CONFIG_HAVE_IDE=y # CONFIG_IDE is not set # # SCSI device support # CONFIG_SCSI_MOD=y # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y CONFIG_SCSI_DMA=y CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) # # CONFIG_BLK_DEV_SD is not set # CONFIG_CHR_DEV_ST is not set # CONFIG_BLK_DEV_SR is not set # CONFIG_CHR_DEV_SG is not set # CONFIG_CHR_DEV_SCH is not set # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set # # SCSI Transports # # CONFIG_SCSI_SPI_ATTRS is not set # CONFIG_SCSI_FC_ATTRS is not set CONFIG_SCSI_ISCSI_ATTRS=y # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set # end of SCSI Transports CONFIG_SCSI_LOWLEVEL=y CONFIG_ISCSI_TCP=y # CONFIG_ISCSI_BOOT_SYSFS is not set # CONFIG_SCSI_UFSHCD is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_VIRTIO is not set # CONFIG_SCSI_DH is not set # end of SCSI device support # CONFIG_ATA is not set # CONFIG_MD is not set # CONFIG_TARGET_CORE is not set # CONFIG_MACINTOSH_DRIVERS is not set CONFIG_NETDEVICES=y CONFIG_NET_CORE=y # CONFIG_BONDING is not set # CONFIG_DUMMY is not set # CONFIG_WIREGUARD is not set # CONFIG_EQUALIZER is not set # CONFIG_NET_TEAM is not set # CONFIG_MACVLAN is not set # CONFIG_IPVLAN is not set # CONFIG_VXLAN is not set # CONFIG_GENEVE is not set # CONFIG_BAREUDP is not set # CONFIG_GTP is not set # CONFIG_MACSEC is not set # CONFIG_NETCONSOLE is not set # CONFIG_TUN is not set # CONFIG_TUN_VNET_CROSS_LE is not set CONFIG_VETH=y CONFIG_VIRTIO_NET=y # CONFIG_NLMON is not set # CONFIG_NET_VRF is not set # # Distributed Switch Architecture drivers # # end of Distributed Switch Architecture drivers # CONFIG_ETHERNET is not set # CONFIG_NET_SB1000 is not set # CONFIG_PHYLIB is not set # CONFIG_MDIO_DEVICE is not set # # PCS device drivers # # end of PCS device drivers # CONFIG_PPP is not set # CONFIG_SLIP is not set # # Host-side USB support is needed for USB Network Adapter support # # CONFIG_WLAN is not set # # Enable WiMAX (Networking options) to see the WiMAX drivers # # CONFIG_WAN is not set # CONFIG_FUJITSU_ES is not set # CONFIG_NETDEVSIM is not set CONFIG_NET_FAILOVER=y # CONFIG_ISDN is not set # # Input device support # CONFIG_INPUT=y CONFIG_INPUT_FF_MEMLESS=y # CONFIG_INPUT_POLLDEV is not set # CONFIG_INPUT_SPARSEKMAP is not set # CONFIG_INPUT_MATRIXKMAP is not set # # Userland interfaces # # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_JOYDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set # # Input Device Drivers # # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set CONFIG_INPUT_MISC=y # CONFIG_INPUT_AD714X is not set # CONFIG_INPUT_E3X0_BUTTON is not set # CONFIG_INPUT_PCSPKR is not set # CONFIG_INPUT_ATLAS_BTNS is not set # CONFIG_INPUT_ATI_REMOTE2 is not set # CONFIG_INPUT_KEYSPAN_REMOTE is not set # CONFIG_INPUT_POWERMATE is not set # CONFIG_INPUT_YEALINK is not set # CONFIG_INPUT_CM109 is not set # CONFIG_INPUT_UINPUT is not set # CONFIG_INPUT_ADXL34X is not set # CONFIG_INPUT_CMA3000 is not set # CONFIG_RMI4_CORE is not set # # Hardware I/O ports # # CONFIG_SERIO is not set CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y # CONFIG_GAMEPORT is not set # end of Hardware I/O ports # end of Input device support # # Character devices # CONFIG_TTY=y CONFIG_VT=y CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y CONFIG_VT_CONSOLE_SLEEP=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set CONFIG_LDISC_AUTOLOAD=y # # Serial drivers # CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_PNP=y # CONFIG_SERIAL_8250_16550A_VARIANTS is not set # CONFIG_SERIAL_8250_FINTEK is not set CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DMA=y CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 # CONFIG_SERIAL_8250_EXTENDED is not set # CONFIG_SERIAL_8250_DW is not set # CONFIG_SERIAL_8250_RT288X is not set # # Non-8250 serial port support # # CONFIG_SERIAL_UARTLITE is not set CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_LANTIQ is not set # CONFIG_SERIAL_SCCNXP is not set # CONFIG_SERIAL_ALTERA_JTAGUART is not set # CONFIG_SERIAL_ALTERA_UART is not set # CONFIG_SERIAL_ARC is not set # CONFIG_SERIAL_FSL_LPUART is not set # CONFIG_SERIAL_FSL_LINFLEXUART is not set # CONFIG_SERIAL_SPRD is not set # end of Serial drivers # CONFIG_SERIAL_NONSTANDARD is not set # CONFIG_N_GSM is not set # CONFIG_NULL_TTY is not set # CONFIG_TRACE_SINK is not set CONFIG_HVC_DRIVER=y CONFIG_SERIAL_DEV_BUS=y CONFIG_SERIAL_DEV_CTRL_TTYPORT=y CONFIG_VIRTIO_CONSOLE=y # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_TIMERIOMEM is not set # CONFIG_HW_RANDOM_BA431 is not set # CONFIG_HW_RANDOM_VIA is not set CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_XIPHERA is not set # CONFIG_MWAVE is not set CONFIG_DEVMEM=y # CONFIG_DEVKMEM is not set # CONFIG_NVRAM is not set # CONFIG_RAW_DRIVER is not set # CONFIG_HPET is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set # CONFIG_TELCLOCK is not set CONFIG_RANDOM_TRUST_CPU=y CONFIG_RANDOM_TRUST_BOOTLOADER=y # end of Character devices # # I2C support # # CONFIG_I2C is not set # end of I2C support # CONFIG_I3C is not set # CONFIG_SPI is not set # CONFIG_SPMI is not set # CONFIG_HSI is not set CONFIG_PPS=y # CONFIG_PPS_DEBUG is not set # # PPS clients support # # CONFIG_PPS_CLIENT_KTIMER is not set # CONFIG_PPS_CLIENT_LDISC is not set # CONFIG_PPS_CLIENT_GPIO is not set # # PPS generators support # # # PTP clock support # CONFIG_PTP_1588_CLOCK=y # # Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. # CONFIG_PTP_1588_CLOCK_KVM=y # CONFIG_PTP_1588_CLOCK_VMW is not set # end of PTP clock support # CONFIG_PINCTRL is not set # CONFIG_GPIOLIB is not set # CONFIG_W1 is not set CONFIG_POWER_RESET=y # CONFIG_POWER_RESET_RESTART is not set CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set # CONFIG_PDA_POWER is not set # CONFIG_TEST_POWER is not set # CONFIG_BATTERY_DS2780 is not set # CONFIG_BATTERY_DS2781 is not set # CONFIG_BATTERY_BQ27XXX is not set # CONFIG_CHARGER_MAX8903 is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y # CONFIG_THERMAL_NETLINK is not set # CONFIG_THERMAL_STATISTICS is not set CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 CONFIG_THERMAL_WRITABLE_TRIPS=y CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y # CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set # CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set CONFIG_THERMAL_GOV_FAIR_SHARE=y CONFIG_THERMAL_GOV_STEP_WISE=y # CONFIG_THERMAL_GOV_BANG_BANG is not set CONFIG_THERMAL_GOV_USER_SPACE=y # CONFIG_THERMAL_EMULATION is not set # # Intel thermal drivers # # CONFIG_INTEL_POWERCLAMP is not set # # ACPI INT340X thermal drivers # # end of ACPI INT340X thermal drivers # end of Intel thermal drivers CONFIG_WATCHDOG=y CONFIG_WATCHDOG_CORE=y # CONFIG_WATCHDOG_NOWAYOUT is not set CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y CONFIG_WATCHDOG_OPEN_TIMEOUT=0 CONFIG_WATCHDOG_SYSFS=y # # Watchdog Pretimeout Governors # # CONFIG_WATCHDOG_PRETIMEOUT_GOV is not set # # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set # CONFIG_WDAT_WDT is not set # CONFIG_XILINX_WATCHDOG is not set # CONFIG_CADENCE_WATCHDOG is not set # CONFIG_DW_WATCHDOG is not set # CONFIG_MAX63XX_WATCHDOG is not set # CONFIG_ACQUIRE_WDT is not set # CONFIG_ADVANTECH_WDT is not set # CONFIG_EBC_C384_WDT is not set # CONFIG_F71808E_WDT is not set # CONFIG_SBC_FITPC2_WATCHDOG is not set # CONFIG_EUROTECH_WDT is not set # CONFIG_IB700_WDT is not set # CONFIG_IBMASR is not set # CONFIG_WAFER_WDT is not set # CONFIG_IT8712F_WDT is not set # CONFIG_IT87_WDT is not set # CONFIG_SC1200_WDT is not set # CONFIG_PC87413_WDT is not set # CONFIG_60XX_WDT is not set # CONFIG_CPU5_WDT is not set # CONFIG_SMSC_SCH311X_WDT is not set # CONFIG_SMSC37B787_WDT is not set # CONFIG_TQMX86_WDT is not set # CONFIG_W83627HF_WDT is not set # CONFIG_W83877F_WDT is not set # CONFIG_W83977F_WDT is not set # CONFIG_MACHZ_WDT is not set # CONFIG_SBC_EPX_C3_WATCHDOG is not set # CONFIG_NI903X_WDT is not set # CONFIG_NIC7018_WDT is not set CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set CONFIG_BCMA_POSSIBLE=y # CONFIG_BCMA is not set # # Multifunction device drivers # # CONFIG_MFD_MADERA is not set # CONFIG_HTC_PASIC3 is not set # CONFIG_MFD_INTEL_LPSS_ACPI is not set # CONFIG_MFD_INTEL_PMC_BXT is not set # CONFIG_MFD_KEMPLD is not set # CONFIG_MFD_MT6397 is not set # CONFIG_MFD_SM501 is not set # CONFIG_ABX500_CORE is not set # CONFIG_MFD_SYSCON is not set # CONFIG_MFD_TI_AM335X_TSCADC is not set # CONFIG_MFD_TQMX86 is not set # CONFIG_RAVE_SP_CORE is not set # end of Multifunction device drivers # CONFIG_REGULATOR is not set # CONFIG_RC_CORE is not set # CONFIG_MEDIA_CEC_SUPPORT is not set # CONFIG_MEDIA_SUPPORT is not set # # Graphics support # # CONFIG_DRM is not set # # ARM devices # # end of ARM devices # # Frame buffer Devices # # CONFIG_FB is not set # end of Frame buffer Devices # # Backlight & LCD device support # # CONFIG_LCD_CLASS_DEVICE is not set # CONFIG_BACKLIGHT_CLASS_DEVICE is not set # end of Backlight & LCD device support # # Console display driver support # CONFIG_VGA_CONSOLE=y CONFIG_DUMMY_CONSOLE=y CONFIG_DUMMY_CONSOLE_COLUMNS=80 CONFIG_DUMMY_CONSOLE_ROWS=25 # end of Console display driver support # end of Graphics support # CONFIG_SOUND is not set # # HID support # CONFIG_HID=y # CONFIG_HID_BATTERY_STRENGTH is not set CONFIG_HIDRAW=y # CONFIG_UHID is not set # CONFIG_HID_GENERIC is not set # # Special HID drivers # # CONFIG_HID_A4TECH is not set # CONFIG_HID_ACRUX is not set # CONFIG_HID_APPLE is not set # CONFIG_HID_AUREAL is not set # CONFIG_HID_BELKIN is not set # CONFIG_HID_CHERRY is not set # CONFIG_HID_COUGAR is not set # CONFIG_HID_MACALLY is not set # CONFIG_HID_CMEDIA is not set # CONFIG_HID_CYPRESS is not set # CONFIG_HID_DRAGONRISE is not set # CONFIG_HID_EMS_FF is not set # CONFIG_HID_ELECOM is not set # CONFIG_HID_EZKEY is not set # CONFIG_HID_GEMBIRD is not set # CONFIG_HID_GFRM is not set # CONFIG_HID_GLORIOUS is not set # CONFIG_HID_VIVALDI is not set # CONFIG_HID_KEYTOUCH is not set # CONFIG_HID_KYE is not set # CONFIG_HID_WALTOP is not set # CONFIG_HID_VIEWSONIC is not set # CONFIG_HID_GYRATION is not set # CONFIG_HID_ICADE is not set # CONFIG_HID_ITE is not set # CONFIG_HID_JABRA is not set # CONFIG_HID_TWINHAN is not set # CONFIG_HID_KENSINGTON is not set # CONFIG_HID_LCPOWER is not set # CONFIG_HID_LENOVO is not set # CONFIG_HID_MAGICMOUSE is not set # CONFIG_HID_MALTRON is not set # CONFIG_HID_MAYFLASH is not set # CONFIG_HID_REDRAGON is not set # CONFIG_HID_MICROSOFT is not set # CONFIG_HID_MONTEREY is not set # CONFIG_HID_MULTITOUCH is not set # CONFIG_HID_NTI is not set # CONFIG_HID_ORTEK is not set # CONFIG_HID_PANTHERLORD is not set # CONFIG_HID_PETALYNX is not set # CONFIG_HID_PICOLCD is not set # CONFIG_HID_PLANTRONICS is not set # CONFIG_HID_PRIMAX is not set # CONFIG_HID_SAITEK is not set # CONFIG_HID_SPEEDLINK is not set # CONFIG_HID_STEAM is not set # CONFIG_HID_STEELSERIES is not set # CONFIG_HID_SUNPLUS is not set # CONFIG_HID_RMI is not set # CONFIG_HID_GREENASIA is not set # CONFIG_HID_SMARTJOYPLUS is not set # CONFIG_HID_TIVO is not set # CONFIG_HID_TOPSEED is not set # CONFIG_HID_THRUSTMASTER is not set # CONFIG_HID_UDRAW_PS3 is not set # CONFIG_HID_XINMO is not set # CONFIG_HID_ZEROPLUS is not set # CONFIG_HID_ZYDACRON is not set # CONFIG_HID_SENSOR_HUB is not set # CONFIG_HID_ALPS is not set # end of Special HID drivers # end of HID support CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_SUPPORT=y # CONFIG_USB_ULPI_BUS is not set CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB is not set # # USB port drivers # # # USB Physical Layer drivers # # CONFIG_NOP_USB_XCEIV is not set # end of USB Physical Layer drivers # CONFIG_USB_GADGET is not set # CONFIG_TYPEC is not set # CONFIG_USB_ROLE_SWITCH is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set CONFIG_EDAC_ATOMIC_SCRUB=y CONFIG_EDAC_SUPPORT=y # CONFIG_EDAC is not set CONFIG_RTC_LIB=y CONFIG_RTC_MC146818_LIB=y # CONFIG_RTC_CLASS is not set CONFIG_DMADEVICES=y # CONFIG_DMADEVICES_DEBUG is not set # # DMA Devices # CONFIG_DMA_ACPI=y # CONFIG_ALTERA_MSGDMA is not set # CONFIG_INTEL_IDMA64 is not set # CONFIG_QCOM_HIDMA_MGMT is not set # CONFIG_QCOM_HIDMA is not set # CONFIG_DW_DMAC is not set # CONFIG_SF_PDMA is not set # # DMABUF options # CONFIG_SYNC_FILE=y # CONFIG_SW_SYNC is not set # CONFIG_UDMABUF is not set # CONFIG_DMABUF_MOVE_NOTIFY is not set # CONFIG_DMABUF_SELFTESTS is not set # CONFIG_DMABUF_HEAPS is not set # end of DMABUF options CONFIG_AUXDISPLAY=y # CONFIG_IMG_ASCII_LCD is not set CONFIG_CHARLCD_BL_OFF=y # CONFIG_CHARLCD_BL_ON is not set # CONFIG_CHARLCD_BL_FLASH is not set # CONFIG_UIO is not set # CONFIG_VFIO is not set CONFIG_VIRT_DRIVERS=y CONFIG_VMGENID=y CONFIG_VIRTIO=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_BALLOON=y # CONFIG_VIRTIO_MEM is not set # CONFIG_VIRTIO_INPUT is not set CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y # CONFIG_VDPA is not set CONFIG_VHOST_MENU=y # CONFIG_VHOST_NET is not set # CONFIG_VHOST_VSOCK is not set # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set # # Microsoft Hyper-V guest support # # CONFIG_HYPERV is not set # end of Microsoft Hyper-V guest support # CONFIG_GREYBUS is not set CONFIG_STAGING=y # CONFIG_COMEDI is not set # CONFIG_STAGING_MEDIA is not set # # Android # # end of Android # CONFIG_GS_FPGABOOT is not set # CONFIG_UNISYSSPAR is not set # # Gasket devices # # end of Gasket devices # CONFIG_FIELDBUS_DEV is not set CONFIG_X86_PLATFORM_DEVICES=y # CONFIG_ACPI_WMI is not set # CONFIG_ACERHDF is not set # CONFIG_ACER_WIRELESS is not set # CONFIG_ASUS_WIRELESS is not set # CONFIG_DCDBAS is not set # CONFIG_DELL_SMBIOS is not set # CONFIG_DELL_RBU is not set # CONFIG_DELL_SMO8800 is not set # CONFIG_FUJITSU_TABLET is not set # CONFIG_GPD_POCKET_FAN is not set # CONFIG_HP_WIRELESS is not set # CONFIG_SENSORS_HDAPS is not set # CONFIG_INTEL_HID_EVENT is not set # CONFIG_INTEL_VBTN is not set # CONFIG_SURFACE_PRO3_BUTTON is not set # CONFIG_SAMSUNG_Q10 is not set # CONFIG_TOSHIBA_BT_RFKILL is not set # CONFIG_TOSHIBA_HAPS is not set # CONFIG_ACPI_CMPC is not set # CONFIG_SYSTEM76_ACPI is not set # CONFIG_TOPSTAR_LAPTOP is not set # CONFIG_INTEL_RST is not set # CONFIG_INTEL_SMARTCONNECT is not set CONFIG_INTEL_TURBO_MAX_3=y # CONFIG_INTEL_UNCORE_FREQ_CONTROL is not set # CONFIG_INTEL_PUNIT_IPC is not set # CONFIG_INTEL_SCU_PLATFORM is not set # CONFIG_CHROME_PLATFORMS is not set # CONFIG_MELLANOX_PLATFORM is not set CONFIG_HAVE_CLK=y CONFIG_CLKDEV_LOOKUP=y CONFIG_HAVE_CLK_PREPARE=y CONFIG_COMMON_CLK=y # CONFIG_HWSPINLOCK is not set # # Clock Source drivers # CONFIG_CLKEVT_I8253=y CONFIG_I8253_LOCK=y CONFIG_CLKBLD_I8253=y # end of Clock Source drivers CONFIG_MAILBOX=y CONFIG_PCC=y # CONFIG_ALTERA_MBOX is not set CONFIG_IOMMU_SUPPORT=y # # Generic IOMMU Pagetable Support # # end of Generic IOMMU Pagetable Support # CONFIG_IOMMU_DEBUGFS is not set # # Remoteproc drivers # # CONFIG_REMOTEPROC is not set # end of Remoteproc drivers # # Rpmsg drivers # # CONFIG_RPMSG_QCOM_GLINK_RPM is not set # CONFIG_RPMSG_VIRTIO is not set # end of Rpmsg drivers # CONFIG_SOUNDWIRE is not set # # SOC (System On Chip) specific Drivers # # # Amlogic SoC drivers # # end of Amlogic SoC drivers # # Aspeed SoC drivers # # end of Aspeed SoC drivers # # Broadcom SoC drivers # # end of Broadcom SoC drivers # # NXP/Freescale QorIQ SoC drivers # # end of NXP/Freescale QorIQ SoC drivers # # i.MX SoC drivers # # end of i.MX SoC drivers # # Qualcomm SoC drivers # # end of Qualcomm SoC drivers # CONFIG_SOC_TI is not set # # Xilinx SoC drivers # # CONFIG_XILINX_VCU is not set # end of Xilinx SoC drivers # end of SOC (System On Chip) specific Drivers # CONFIG_PM_DEVFREQ is not set # CONFIG_EXTCON is not set # CONFIG_MEMORY is not set # CONFIG_IIO is not set # CONFIG_PWM is not set # # IRQ chip support # # end of IRQ chip support # CONFIG_IPACK_BUS is not set # CONFIG_RESET_CONTROLLER is not set # # PHY Subsystem # # CONFIG_GENERIC_PHY is not set # CONFIG_USB_LGM_PHY is not set # CONFIG_BCM_KONA_USB2_PHY is not set # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set # CONFIG_PHY_INTEL_LGM_EMMC is not set # end of PHY Subsystem # CONFIG_POWERCAP is not set # CONFIG_MCB is not set # # Performance monitor support # # end of Performance monitor support CONFIG_RAS=y # # Android # # CONFIG_ANDROID is not set # end of Android # CONFIG_LIBNVDIMM is not set # CONFIG_DAX is not set # CONFIG_NVMEM is not set # # HW tracing support # # CONFIG_STM is not set # CONFIG_INTEL_TH is not set # end of HW tracing support # CONFIG_FPGA is not set # CONFIG_TEE is not set # CONFIG_UNISYS_VISORBUS is not set # CONFIG_SIOX is not set # CONFIG_SLIMBUS is not set # CONFIG_INTERCONNECT is not set # CONFIG_COUNTER is not set # end of Device Drivers # # File systems # CONFIG_DCACHE_WORD_ACCESS=y CONFIG_VALIDATE_FS_PARSER=y CONFIG_FS_IOMAP=y # CONFIG_EXT2_FS is not set # CONFIG_EXT3_FS is not set CONFIG_EXT4_FS=y CONFIG_EXT4_USE_FOR_EXT2=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_EXT4_DEBUG=y CONFIG_JBD2=y CONFIG_JBD2_DEBUG=y CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_XFS_FS=y CONFIG_XFS_SUPPORT_V4=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y # CONFIG_XFS_RT is not set # CONFIG_XFS_ONLINE_SCRUB is not set # CONFIG_XFS_WARN is not set # CONFIG_XFS_DEBUG is not set # CONFIG_GFS2_FS is not set # CONFIG_BTRFS_FS is not set # CONFIG_NILFS2_FS is not set # CONFIG_F2FS_FS is not set # CONFIG_FS_DAX is not set CONFIG_FS_POSIX_ACL=y CONFIG_EXPORTFS=y # CONFIG_EXPORTFS_BLOCK_OPS is not set CONFIG_FILE_LOCKING=y CONFIG_MANDATORY_FILE_LOCKING=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_ENCRYPTION_ALGS=y # CONFIG_FS_VERITY is not set CONFIG_FSNOTIFY=y CONFIG_DNOTIFY=y CONFIG_INOTIFY_USER=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set # CONFIG_QUOTA_DEBUG is not set # CONFIG_QFMT_V1 is not set # CONFIG_QFMT_V2 is not set CONFIG_QUOTACTL=y CONFIG_AUTOFS4_FS=y CONFIG_AUTOFS_FS=y # CONFIG_FUSE_FS is not set CONFIG_OVERLAY_FS=y # CONFIG_OVERLAY_FS_REDIRECT_DIR is not set CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW=y # CONFIG_OVERLAY_FS_INDEX is not set # CONFIG_OVERLAY_FS_XINO_AUTO is not set # CONFIG_OVERLAY_FS_METACOPY is not set # # Caches # # CONFIG_FSCACHE is not set # end of Caches # # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set # CONFIG_UDF_FS is not set # end of CD-ROM/DVD Filesystems # # DOS/FAT/EXFAT/NT Filesystems # # CONFIG_MSDOS_FS is not set # CONFIG_VFAT_FS is not set # CONFIG_EXFAT_FS is not set # CONFIG_NTFS_FS is not set # end of DOS/FAT/EXFAT/NT Filesystems # # Pseudo filesystems # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_SYSCTL=y CONFIG_PROC_PAGE_MONITOR=y CONFIG_PROC_CHILDREN=y CONFIG_PROC_PID_ARCH_STATUS=y CONFIG_KERNFS=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y # CONFIG_CONFIGFS_FS is not set # end of Pseudo filesystems CONFIG_MISC_FILESYSTEMS=y # CONFIG_ORANGEFS_FS is not set # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set CONFIG_SQUASHFS=y CONFIG_SQUASHFS_FILE_CACHE=y # CONFIG_SQUASHFS_FILE_DIRECT is not set CONFIG_SQUASHFS_DECOMP_SINGLE=y # CONFIG_SQUASHFS_DECOMP_MULTI is not set # CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_ZLIB=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y CONFIG_SQUASHFS_ZSTD=y # CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set # CONFIG_SQUASHFS_EMBEDDED is not set CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_PSTORE=y CONFIG_PSTORE_DEFLATE_COMPRESS=y # CONFIG_PSTORE_LZO_COMPRESS is not set # CONFIG_PSTORE_LZ4_COMPRESS is not set # CONFIG_PSTORE_LZ4HC_COMPRESS is not set # CONFIG_PSTORE_842_COMPRESS is not set # CONFIG_PSTORE_ZSTD_COMPRESS is not set CONFIG_PSTORE_COMPRESS=y CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT=y CONFIG_PSTORE_COMPRESS_DEFAULT="deflate" # CONFIG_PSTORE_CONSOLE is not set # CONFIG_PSTORE_PMSG is not set # CONFIG_PSTORE_RAM is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set # CONFIG_EROFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V2 is not set # CONFIG_NFS_V3 is not set CONFIG_NFS_V4=y CONFIG_NFS_SWAP=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_PNFS_FILE_LAYOUT=y CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org" # CONFIG_NFS_V4_1_MIGRATION is not set CONFIG_NFS_V4_SECURITY_LABEL=y CONFIG_ROOT_NFS=y # CONFIG_NFS_USE_LEGACY_DNS is not set CONFIG_NFS_USE_KERNEL_DNS=y CONFIG_NFS_DISABLE_UDP_SUPPORT=y # CONFIG_NFS_V4_2_READ_PLUS is not set # CONFIG_NFSD is not set CONFIG_GRACE_PERIOD=y CONFIG_LOCKD=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y CONFIG_SUNRPC_BACKCHANNEL=y CONFIG_SUNRPC_SWAP=y # CONFIG_SUNRPC_DEBUG is not set # CONFIG_CEPH_FS is not set # CONFIG_CIFS is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set CONFIG_NLS=y CONFIG_NLS_DEFAULT="utf8" # CONFIG_NLS_CODEPAGE_437 is not set # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set # CONFIG_NLS_CODEPAGE_850 is not set # CONFIG_NLS_CODEPAGE_852 is not set # CONFIG_NLS_CODEPAGE_855 is not set # CONFIG_NLS_CODEPAGE_857 is not set # CONFIG_NLS_CODEPAGE_860 is not set # CONFIG_NLS_CODEPAGE_861 is not set # CONFIG_NLS_CODEPAGE_862 is not set # CONFIG_NLS_CODEPAGE_863 is not set # CONFIG_NLS_CODEPAGE_864 is not set # CONFIG_NLS_CODEPAGE_865 is not set # CONFIG_NLS_CODEPAGE_866 is not set # CONFIG_NLS_CODEPAGE_869 is not set # CONFIG_NLS_CODEPAGE_936 is not set # CONFIG_NLS_CODEPAGE_950 is not set # CONFIG_NLS_CODEPAGE_932 is not set # CONFIG_NLS_CODEPAGE_949 is not set # CONFIG_NLS_CODEPAGE_874 is not set # CONFIG_NLS_ISO8859_8 is not set # CONFIG_NLS_CODEPAGE_1250 is not set # CONFIG_NLS_CODEPAGE_1251 is not set # CONFIG_NLS_ASCII is not set # CONFIG_NLS_ISO8859_1 is not set # CONFIG_NLS_ISO8859_2 is not set # CONFIG_NLS_ISO8859_3 is not set # CONFIG_NLS_ISO8859_4 is not set # CONFIG_NLS_ISO8859_5 is not set # CONFIG_NLS_ISO8859_6 is not set # CONFIG_NLS_ISO8859_7 is not set # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set # CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_MAC_ROMAN is not set # CONFIG_NLS_MAC_CELTIC is not set # CONFIG_NLS_MAC_CENTEURO is not set # CONFIG_NLS_MAC_CROATIAN is not set # CONFIG_NLS_MAC_CYRILLIC is not set # CONFIG_NLS_MAC_GAELIC is not set # CONFIG_NLS_MAC_GREEK is not set # CONFIG_NLS_MAC_ICELAND is not set # CONFIG_NLS_MAC_INUIT is not set # CONFIG_NLS_MAC_ROMANIAN is not set # CONFIG_NLS_MAC_TURKISH is not set # CONFIG_NLS_UTF8 is not set # CONFIG_UNICODE is not set CONFIG_IO_WQ=y # end of File systems # # Security options # CONFIG_KEYS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_PERSISTENT_KEYRINGS=y CONFIG_ENCRYPTED_KEYS=y # CONFIG_KEY_DH_OPERATIONS is not set # CONFIG_SECURITY_DMESG_RESTRICT is not set CONFIG_SECURITY=y CONFIG_SECURITY_WRITABLE_HOOKS=y CONFIG_SECURITYFS=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_NETWORK_XFRM=y # CONFIG_SECURITY_PATH is not set CONFIG_LSM_MMAP_MIN_ADDR=65536 CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y CONFIG_HARDENED_USERCOPY=y CONFIG_HARDENED_USERCOPY_FALLBACK=y CONFIG_FORTIFY_SOURCE=y # CONFIG_STATIC_USERMODEHELPER is not set CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_SELINUX_DEVELOP=y CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9 CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256 # CONFIG_SECURITY_SMACK is not set # CONFIG_SECURITY_TOMOYO is not set # CONFIG_SECURITY_APPARMOR is not set # CONFIG_SECURITY_LOADPIN is not set # CONFIG_SECURITY_YAMA is not set # CONFIG_SECURITY_SAFESETID is not set # CONFIG_SECURITY_LOCKDOWN_LSM is not set # CONFIG_INTEGRITY is not set CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFAULT_SECURITY_DAC is not set CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor,bpf" # # Kernel hardening options # # # Memory initialization # CONFIG_INIT_STACK_NONE=y # CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set # CONFIG_INIT_ON_FREE_DEFAULT_ON is not set # end of Memory initialization # end of Kernel hardening options # end of Security options CONFIG_CRYPTO=y # # Crypto core or helper # CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_SKCIPHER=y CONFIG_CRYPTO_SKCIPHER2=y CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_RNG=y CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_RNG_DEFAULT=y CONFIG_CRYPTO_AKCIPHER2=y CONFIG_CRYPTO_AKCIPHER=y CONFIG_CRYPTO_KPP2=y CONFIG_CRYPTO_KPP=y CONFIG_CRYPTO_ACOMP2=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_USER is not set CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_NULL2=y # CONFIG_CRYPTO_PCRYPT is not set # CONFIG_CRYPTO_CRYPTD is not set # CONFIG_CRYPTO_AUTHENC is not set # # Public-key cryptography # CONFIG_CRYPTO_RSA=y CONFIG_CRYPTO_DH=y CONFIG_CRYPTO_ECC=y CONFIG_CRYPTO_ECDH=y # CONFIG_CRYPTO_ECRDSA is not set # CONFIG_CRYPTO_SM2 is not set # CONFIG_CRYPTO_CURVE25519 is not set # CONFIG_CRYPTO_CURVE25519_X86 is not set # # Authenticated Encryption with Associated Data # # CONFIG_CRYPTO_CCM is not set # CONFIG_CRYPTO_GCM is not set # CONFIG_CRYPTO_CHACHA20POLY1305 is not set # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set CONFIG_CRYPTO_SEQIV=y # CONFIG_CRYPTO_ECHAINIV is not set # # Block modes # CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_CFB is not set CONFIG_CRYPTO_CTR=y CONFIG_CRYPTO_CTS=y CONFIG_CRYPTO_ECB=y # CONFIG_CRYPTO_LRW is not set # CONFIG_CRYPTO_OFB is not set # CONFIG_CRYPTO_PCBC is not set CONFIG_CRYPTO_XTS=y # CONFIG_CRYPTO_KEYWRAP is not set # CONFIG_CRYPTO_NHPOLY1305_SSE2 is not set # CONFIG_CRYPTO_NHPOLY1305_AVX2 is not set # CONFIG_CRYPTO_ADIANTUM is not set # CONFIG_CRYPTO_ESSIV is not set # # Hash modes # # CONFIG_CRYPTO_CMAC is not set CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_XCBC is not set # CONFIG_CRYPTO_VMAC is not set # # Digest # CONFIG_CRYPTO_CRC32C=y # CONFIG_CRYPTO_CRC32C_INTEL is not set # CONFIG_CRYPTO_CRC32 is not set # CONFIG_CRYPTO_CRC32_PCLMUL is not set CONFIG_CRYPTO_XXHASH=y # CONFIG_CRYPTO_BLAKE2B is not set # CONFIG_CRYPTO_BLAKE2S is not set # CONFIG_CRYPTO_BLAKE2S_X86 is not set CONFIG_CRYPTO_CRCT10DIF=y CONFIG_CRYPTO_CRCT10DIF_PCLMUL=y # CONFIG_CRYPTO_GHASH is not set # CONFIG_CRYPTO_POLY1305 is not set # CONFIG_CRYPTO_POLY1305_X86_64 is not set # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set # CONFIG_CRYPTO_RMD128 is not set # CONFIG_CRYPTO_RMD160 is not set # CONFIG_CRYPTO_RMD256 is not set # CONFIG_CRYPTO_RMD320 is not set CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA1_SSSE3=y CONFIG_CRYPTO_SHA256_SSSE3=y CONFIG_CRYPTO_SHA512_SSSE3=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y # CONFIG_CRYPTO_SHA3 is not set # CONFIG_CRYPTO_SM3 is not set # CONFIG_CRYPTO_STREEBOG is not set # CONFIG_CRYPTO_TGR192 is not set # CONFIG_CRYPTO_WP512 is not set # CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set # # Ciphers # CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=y # CONFIG_CRYPTO_AES_NI_INTEL is not set # CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_BLOWFISH_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_CAMELLIA_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 is not set # CONFIG_CRYPTO_CAST5 is not set # CONFIG_CRYPTO_CAST5_AVX_X86_64 is not set # CONFIG_CRYPTO_CAST6 is not set # CONFIG_CRYPTO_CAST6_AVX_X86_64 is not set # CONFIG_CRYPTO_DES is not set # CONFIG_CRYPTO_DES3_EDE_X86_64 is not set # CONFIG_CRYPTO_FCRYPT is not set # CONFIG_CRYPTO_SALSA20 is not set # CONFIG_CRYPTO_CHACHA20 is not set # CONFIG_CRYPTO_CHACHA20_X86_64 is not set # CONFIG_CRYPTO_SERPENT is not set # CONFIG_CRYPTO_SERPENT_SSE2_X86_64 is not set # CONFIG_CRYPTO_SERPENT_AVX_X86_64 is not set # CONFIG_CRYPTO_SERPENT_AVX2_X86_64 is not set # CONFIG_CRYPTO_SM4 is not set # CONFIG_CRYPTO_TWOFISH is not set # CONFIG_CRYPTO_TWOFISH_X86_64 is not set # CONFIG_CRYPTO_TWOFISH_X86_64_3WAY is not set # CONFIG_CRYPTO_TWOFISH_AVX_X86_64 is not set # # Compression # CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_842 is not set # CONFIG_CRYPTO_LZ4 is not set # CONFIG_CRYPTO_LZ4HC is not set # CONFIG_CRYPTO_ZSTD is not set # # Random Number Generation # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DRBG_MENU=y CONFIG_CRYPTO_DRBG_HMAC=y CONFIG_CRYPTO_DRBG_HASH=y CONFIG_CRYPTO_DRBG_CTR=y CONFIG_CRYPTO_DRBG=y CONFIG_CRYPTO_JITTERENTROPY=y # CONFIG_CRYPTO_USER_API_HASH is not set # CONFIG_CRYPTO_USER_API_SKCIPHER is not set # CONFIG_CRYPTO_USER_API_RNG is not set # CONFIG_CRYPTO_USER_API_AEAD is not set CONFIG_CRYPTO_HASH_INFO=y # CONFIG_CRYPTO_HW is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y # CONFIG_PKCS8_PRIVATE_KEY_PARSER is not set CONFIG_PKCS7_MESSAGE_PARSER=y # # Certificates for signature checking # CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_SYSTEM_TRUSTED_KEYS="" # CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set # CONFIG_SECONDARY_TRUSTED_KEYRING is not set CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_SYSTEM_BLACKLIST_HASH_LIST="" # CONFIG_SYSTEM_REVOCATION_LIST is not set # end of Certificates for signature checking # # Library routines # # CONFIG_PACKING is not set CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y CONFIG_GENERIC_NET_UTILS=y CONFIG_GENERIC_FIND_FIRST_BIT=y # CONFIG_CORDIC is not set # CONFIG_PRIME_NUMBERS is not set CONFIG_RATIONAL=y CONFIG_GENERIC_PCI_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y CONFIG_ARCH_HAS_FAST_MULTIPLIER=y CONFIG_ARCH_USE_SYM_ANNOTATIONS=y # # Crypto library routines # CONFIG_CRYPTO_LIB_AES=y CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y # CONFIG_CRYPTO_LIB_CHACHA is not set # CONFIG_CRYPTO_LIB_CURVE25519 is not set CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11 # CONFIG_CRYPTO_LIB_POLY1305 is not set # CONFIG_CRYPTO_LIB_CHACHA20POLY1305 is not set CONFIG_CRYPTO_LIB_SHA256=y # end of Crypto library routines CONFIG_LIB_MEMNEQ=y CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_T10DIF=y # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC32_SELFTEST is not set CONFIG_CRC32_SLICEBY8=y # CONFIG_CRC32_SLICEBY4 is not set # CONFIG_CRC32_SARWATE is not set # CONFIG_CRC32_BIT is not set # CONFIG_CRC64 is not set # CONFIG_CRC4 is not set # CONFIG_CRC7 is not set CONFIG_LIBCRC32C=y # CONFIG_CRC8 is not set CONFIG_XXHASH=y # CONFIG_RANDOM32_SELFTEST is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_LZO_COMPRESS=y CONFIG_LZO_DECOMPRESS=y CONFIG_LZ4_DECOMPRESS=y CONFIG_ZSTD_DECOMPRESS=y CONFIG_XZ_DEC=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y CONFIG_XZ_DEC_BCJ=y # CONFIG_XZ_DEC_TEST is not set CONFIG_DECOMPRESS_GZIP=y CONFIG_DECOMPRESS_BZIP2=y CONFIG_DECOMPRESS_LZMA=y CONFIG_DECOMPRESS_XZ=y CONFIG_DECOMPRESS_LZO=y CONFIG_DECOMPRESS_LZ4=y CONFIG_DECOMPRESS_ZSTD=y CONFIG_XARRAY_MULTI=y CONFIG_ASSOCIATIVE_ARRAY=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT_MAP=y CONFIG_HAS_DMA=y # CONFIG_DMA_PAGE_TOUCHING is not set CONFIG_NEED_SG_DMA_LENGTH=y CONFIG_NEED_DMA_MAP_STATE=y CONFIG_ARCH_DMA_ADDR_T_64BIT=y CONFIG_SWIOTLB=y # CONFIG_DMA_API_DEBUG is not set CONFIG_SGL_ALLOC=y CONFIG_CPU_RMAP=y CONFIG_DQL=y CONFIG_NLATTR=y CONFIG_CLZ_TAB=y CONFIG_IRQ_POLL=y CONFIG_MPILIB=y CONFIG_OID_REGISTRY=y CONFIG_HAVE_GENERIC_VDSO=y CONFIG_GENERIC_GETTIMEOFDAY=y CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_SG_POOL=y CONFIG_ARCH_HAS_PMEM_API=y CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y CONFIG_ARCH_HAS_COPY_MC=y CONFIG_ARCH_STACKWALK=y CONFIG_SBITMAP=y # CONFIG_STRING_SELFTEST is not set # end of Library routines # # Kernel hacking # # # printk and dmesg options # CONFIG_PRINTK_TIME=y # CONFIG_PRINTK_CALLER is not set CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_DYNAMIC_DEBUG is not set # CONFIG_DYNAMIC_DEBUG_CORE is not set CONFIG_SYMBOLIC_ERRNAME=y CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options # # Compile-time checks and compiler options # # CONFIG_DEBUG_INFO is not set # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FRAME_WARN=2048 CONFIG_STRIP_ASM_SYMS=y # CONFIG_READABLE_ASM is not set # CONFIG_HEADERS_INSTALL is not set CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_FRAME_POINTER=y CONFIG_STACK_VALIDATION=y # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # end of Compile-time checks and compiler options # # Generic Kernel Debugging Instruments # CONFIG_MAGIC_SYSRQ=y CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 CONFIG_MAGIC_SYSRQ_SERIAL=y CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE="" CONFIG_DEBUG_FS=y CONFIG_DEBUG_FS_ALLOW_ALL=y # CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set # CONFIG_DEBUG_FS_ALLOW_NONE is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y # CONFIG_UBSAN is not set CONFIG_HAVE_ARCH_KCSAN=y # end of Generic Kernel Debugging Instruments CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MISC=y # # Memory Debugging # # CONFIG_PAGE_EXTENSION is not set # CONFIG_DEBUG_PAGEALLOC is not set # CONFIG_PAGE_OWNER is not set # CONFIG_PAGE_POISONING is not set # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_ARCH_HAS_DEBUG_WX=y # CONFIG_DEBUG_WX is not set CONFIG_GENERIC_PTDUMP=y # CONFIG_PTDUMP_DEBUGFS is not set # CONFIG_DEBUG_OBJECTS is not set # CONFIG_SLUB_DEBUG_ON is not set # CONFIG_SLUB_STATS is not set CONFIG_HAVE_DEBUG_KMEMLEAK=y # CONFIG_DEBUG_KMEMLEAK is not set # CONFIG_DEBUG_STACK_USAGE is not set CONFIG_SCHED_STACK_END_CHECK=y CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_VM_PGTABLE is not set CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y # CONFIG_DEBUG_VIRTUAL is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_PER_CPU_MAPS is not set CONFIG_HAVE_ARCH_KASAN=y CONFIG_HAVE_ARCH_KASAN_VMALLOC=y CONFIG_CC_HAS_KASAN_GENERIC=y CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y # CONFIG_KASAN is not set # end of Memory Debugging # CONFIG_DEBUG_SHIRQ is not set # # Debug Oops, Lockups and Hangs # # CONFIG_PANIC_ON_OOPS is not set CONFIG_PANIC_ON_OOPS_VALUE=0 CONFIG_PANIC_TIMEOUT=0 CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 CONFIG_HARDLOCKUP_DETECTOR_PERF=y CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y CONFIG_HARDLOCKUP_DETECTOR=y # CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE=0 CONFIG_DETECT_HUNG_TASK=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 # CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_WQ_WATCHDOG=y # end of Debug Oops, Lockups and Hangs # # Scheduler Debugging # # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_INFO=y # CONFIG_SCHEDSTATS is not set # end of Scheduler Debugging # CONFIG_DEBUG_TIMEKEEPING is not set # # Lock Debugging (spinlocks, mutexes, etc...) # CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_PROVE_LOCKING is not set # CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_MUTEXES is not set # CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set # CONFIG_DEBUG_RWSEMS is not set # CONFIG_DEBUG_LOCK_ALLOC is not set # CONFIG_DEBUG_ATOMIC_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set # CONFIG_SCF_TORTURE_TEST is not set # CONFIG_CSD_LOCK_WAIT_DEBUG is not set # end of Lock Debugging (spinlocks, mutexes, etc...) CONFIG_STACKTRACE=y # CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set # CONFIG_DEBUG_KOBJECT is not set # # Debug kernel data structures # CONFIG_DEBUG_LIST=y # CONFIG_DEBUG_PLIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_DEBUG_NOTIFIERS is not set CONFIG_BUG_ON_DATA_CORRUPTION=y # end of Debug kernel data structures # CONFIG_DEBUG_CREDENTIALS is not set # # RCU Debugging # # CONFIG_RCU_SCALE_TEST is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_REF_SCALE_TEST is not set CONFIG_RCU_CPU_STALL_TIMEOUT=59 # CONFIG_RCU_TRACE is not set # CONFIG_RCU_EQS_DEBUG is not set # end of RCU Debugging # CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set # CONFIG_DEBUG_BLOCK_EXT_DEVT is not set # CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set # CONFIG_LATENCYTOP is not set CONFIG_USER_STACKTRACE_SUPPORT=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_SYSCALL_TRACEPOINTS=y CONFIG_HAVE_FENTRY=y CONFIG_HAVE_C_RECORDMCOUNT=y CONFIG_TRACING_SUPPORT=y # CONFIG_FTRACE is not set # CONFIG_SAMPLES is not set CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y CONFIG_STRICT_DEVMEM=y # CONFIG_IO_STRICT_DEVMEM is not set # # x86 Debugging # CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y CONFIG_X86_VERBOSE_BOOTUP=y CONFIG_EARLY_PRINTK=y # CONFIG_DEBUG_TLBFLUSH is not set CONFIG_HAVE_MMIOTRACE_SUPPORT=y # CONFIG_X86_DECODER_SELFTEST is not set CONFIG_IO_DELAY_0X80=y # CONFIG_IO_DELAY_0XED is not set # CONFIG_IO_DELAY_UDELAY is not set # CONFIG_IO_DELAY_NONE is not set # CONFIG_DEBUG_BOOT_PARAMS is not set # CONFIG_CPA_DEBUG is not set # CONFIG_DEBUG_ENTRY is not set # CONFIG_DEBUG_NMI_SELFTEST is not set # CONFIG_X86_DEBUG_FPU is not set # CONFIG_UNWINDER_ORC is not set CONFIG_UNWINDER_FRAME_POINTER=y # end of x86 Debugging # # Kernel Testing and Coverage # # CONFIG_KUNIT is not set # CONFIG_NOTIFIER_ERROR_INJECTION is not set # CONFIG_FAULT_INJECTION is not set CONFIG_ARCH_HAS_KCOV=y CONFIG_CC_HAS_SANCOV_TRACE_PC=y # CONFIG_KCOV is not set # CONFIG_RUNTIME_TESTING_MENU is not set # CONFIG_MEMTEST is not set # end of Kernel Testing and Coverage # end of Kernel hacking ================================================ FILE: resources/guest_configs/microvm-kernel-ci-x86_64-6.1.config ================================================ # # Automatically generated file; DO NOT EDIT. # Linux/x86_64 6.1.102-1.182.amzn2023.x86_64 Kernel Configuration # CONFIG_CC_VERSION_TEXT="gcc (GCC) 11.4.1 20230605 (Red Hat 11.4.1-2)" CONFIG_CC_IS_GCC=y CONFIG_GCC_VERSION=110401 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23900 CONFIG_LD_IS_BFD=y CONFIG_LD_VERSION=23900 CONFIG_LLD_VERSION=0 CONFIG_CC_CAN_LINK=y CONFIG_CC_CAN_LINK_STATIC=y CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y CONFIG_PAHOLE_VERSION=122 CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y # # General setup # CONFIG_INIT_ENV_ARG_LIMIT=32 # CONFIG_COMPILE_TEST is not set # CONFIG_WERROR is not set CONFIG_LOCALVERSION="" # CONFIG_LOCALVERSION_AUTO is not set CONFIG_BUILD_SALT="6.1.102-1.182.amzn2023.x86_64" CONFIG_HAVE_KERNEL_GZIP=y CONFIG_HAVE_KERNEL_BZIP2=y CONFIG_HAVE_KERNEL_LZMA=y CONFIG_HAVE_KERNEL_XZ=y CONFIG_HAVE_KERNEL_LZO=y CONFIG_HAVE_KERNEL_LZ4=y CONFIG_HAVE_KERNEL_ZSTD=y CONFIG_KERNEL_GZIP=y # CONFIG_KERNEL_BZIP2 is not set # CONFIG_KERNEL_LZMA is not set # CONFIG_KERNEL_XZ is not set # CONFIG_KERNEL_LZO is not set # CONFIG_KERNEL_LZ4 is not set # CONFIG_KERNEL_ZSTD is not set CONFIG_DEFAULT_INIT="" CONFIG_DEFAULT_HOSTNAME="(none)" CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_SYSVIPC_COMPAT=y CONFIG_POSIX_MQUEUE=y CONFIG_POSIX_MQUEUE_SYSCTL=y # CONFIG_WATCH_QUEUE is not set CONFIG_CROSS_MEMORY_ATTACH=y # CONFIG_USELIB is not set CONFIG_AUDIT=y CONFIG_HAVE_ARCH_AUDITSYSCALL=y CONFIG_AUDITSYSCALL=y # # IRQ subsystem # CONFIG_GENERIC_IRQ_PROBE=y CONFIG_GENERIC_IRQ_SHOW=y CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK=y CONFIG_GENERIC_PENDING_IRQ=y CONFIG_GENERIC_IRQ_MIGRATION=y CONFIG_HARDIRQS_SW_RESEND=y CONFIG_IRQ_DOMAIN=y CONFIG_IRQ_DOMAIN_HIERARCHY=y CONFIG_IRQ_MSI_IOMMU=y CONFIG_GENERIC_IRQ_MATRIX_ALLOCATOR=y CONFIG_GENERIC_IRQ_RESERVATION_MODE=y CONFIG_IRQ_FORCED_THREADING=y CONFIG_SPARSE_IRQ=y # CONFIG_GENERIC_IRQ_DEBUGFS is not set # end of IRQ subsystem CONFIG_CLOCKSOURCE_WATCHDOG=y CONFIG_ARCH_CLOCKSOURCE_INIT=y CONFIG_CLOCKSOURCE_VALIDATE_LAST_CYCLE=y CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST=y CONFIG_GENERIC_CMOS_UPDATE=y CONFIG_HAVE_POSIX_CPU_TIMERS_TASK_WORK=y CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y CONFIG_CONTEXT_TRACKING=y CONFIG_CONTEXT_TRACKING_IDLE=y # # Timers subsystem # CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ_COMMON=y # CONFIG_HZ_PERIODIC is not set CONFIG_NO_HZ_IDLE=y # CONFIG_NO_HZ_FULL is not set CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US=100 # end of Timers subsystem CONFIG_BPF=y CONFIG_HAVE_EBPF_JIT=y CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y # # BPF subsystem # CONFIG_BPF_SYSCALL=y CONFIG_BPF_UNPRIV_DEFAULT_OFF=y CONFIG_USERMODE_DRIVER=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y # end of BPF subsystem CONFIG_PREEMPT_BUILD=y CONFIG_PREEMPT_NONE=y # CONFIG_PREEMPT_VOLUNTARY is not set # CONFIG_PREEMPT is not set CONFIG_PREEMPT_COUNT=y CONFIG_PREEMPTION=y CONFIG_PREEMPT_DYNAMIC=y # CONFIG_SCHED_CORE is not set # # CPU/Task time and stats accounting # CONFIG_TICK_CPU_ACCOUNTING=y # CONFIG_VIRT_CPU_ACCOUNTING_GEN is not set # CONFIG_IRQ_TIME_ACCOUNTING is not set CONFIG_HAVE_SCHED_AVG_IRQ=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_PSI_DEFAULT_DISABLED=y # end of CPU/Task time and stats accounting CONFIG_CPU_ISOLATION=y # # RCU Subsystem # CONFIG_TREE_RCU=y CONFIG_PREEMPT_RCU=y # CONFIG_RCU_EXPERT is not set CONFIG_SRCU=y CONFIG_TREE_SRCU=y CONFIG_TASKS_RCU_GENERIC=y CONFIG_TASKS_RCU=y CONFIG_TASKS_TRACE_RCU=y CONFIG_RCU_STALL_COMMON=y CONFIG_RCU_NEED_SEGCBLIST=y # end of RCU Subsystem # CONFIG_IKCONFIG is not set # CONFIG_IKHEADERS is not set CONFIG_LOG_BUF_SHIFT=17 CONFIG_LOG_CPU_MAX_BUF_SHIFT=12 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=13 # CONFIG_PRINTK_INDEX is not set CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y # # Scheduler features # # CONFIG_UCLAMP_TASK is not set # end of Scheduler features CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH=y CONFIG_CC_HAS_INT128=y CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5" CONFIG_GCC10_NO_ARRAY_BOUNDS=y CONFIG_CC_NO_ARRAY_BOUNDS=y CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y # CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set CONFIG_CGROUPS=y CONFIG_PAGE_COUNTER=y # CONFIG_CGROUP_FAVOR_DYNMODS is not set CONFIG_MEMCG=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y CONFIG_CFS_BANDWIDTH=y CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_PIDS=y # CONFIG_CGROUP_RDMA is not set CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_HUGETLB=y CONFIG_CPUSETS=y CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y # CONFIG_CGROUP_MISC is not set # CONFIG_CGROUP_DEBUG is not set CONFIG_SOCK_CGROUP_DATA=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y CONFIG_TIME_NS=y CONFIG_IPC_NS=y CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y # CONFIG_CHECKPOINT_RESTORE is not set CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y CONFIG_RD_XZ=y CONFIG_RD_LZO=y CONFIG_RD_LZ4=y CONFIG_RD_ZSTD=y # CONFIG_BOOT_CONFIG is not set CONFIG_INITRAMFS_PRESERVE_MTIME=y CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_LD_ORPHAN_WARN=y CONFIG_SYSCTL=y CONFIG_HAVE_UID16=y CONFIG_SYSCTL_EXCEPTION_TRACE=y CONFIG_HAVE_PCSPKR_PLATFORM=y # CONFIG_EXPERT is not set CONFIG_UID16=y CONFIG_MULTIUSER=y CONFIG_SGETMASK_SYSCALL=y CONFIG_SYSFS_SYSCALL=y CONFIG_FHANDLE=y CONFIG_POSIX_TIMERS=y CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y CONFIG_PCSPKR_PLATFORM=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_FUTEX_PI=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y CONFIG_AIO=y CONFIG_IO_URING=y CONFIG_ADVISE_SYSCALLS=y CONFIG_MEMBARRIER=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y CONFIG_KALLSYMS_BASE_RELATIVE=y CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y CONFIG_RSEQ=y # CONFIG_EMBEDDED is not set CONFIG_HAVE_PERF_EVENTS=y # # Kernel Performance Events And Counters # CONFIG_PERF_EVENTS=y # CONFIG_DEBUG_PERF_USE_VMALLOC is not set # end of Kernel Performance Events And Counters CONFIG_PROFILING=y # end of General setup CONFIG_64BIT=y CONFIG_X86_64=y CONFIG_X86=y CONFIG_INSTRUCTION_DECODER=y CONFIG_OUTPUT_FORMAT="elf64-x86-64" CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_MMU=y CONFIG_ARCH_MMAP_RND_BITS_MIN=28 CONFIG_ARCH_MMAP_RND_BITS_MAX=32 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=8 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 CONFIG_GENERIC_ISA_DMA=y CONFIG_GENERIC_BUG=y CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y CONFIG_ARCH_MAY_HAVE_PC_FDC=y CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_ARCH_HAS_CPU_RELAX=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_NR_GPIO=1024 CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_AUDIT_ARCH=y CONFIG_X86_64_SMP=y CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_FIX_EARLYCON_MEM=y CONFIG_PGTABLE_LEVELS=4 CONFIG_CC_HAS_SANE_STACKPROTECTOR=y # # Processor type and features # CONFIG_SMP=y CONFIG_X86_FEATURE_NAMES=y CONFIG_X86_X2APIC=y # CONFIG_X86_MPPARSE is not set # CONFIG_GOLDFISH is not set # CONFIG_X86_CPU_RESCTRL is not set # CONFIG_X86_EXTENDED_PLATFORM is not set # CONFIG_X86_AMD_PLATFORM_DEVICE is not set CONFIG_SCHED_OMIT_FRAME_POINTER=y CONFIG_HYPERVISOR_GUEST=y CONFIG_PARAVIRT=y # CONFIG_PARAVIRT_DEBUG is not set CONFIG_PARAVIRT_SPINLOCKS=y CONFIG_X86_HV_CALLBACK_VECTOR=y # CONFIG_XEN is not set CONFIG_KVM_GUEST=y CONFIG_ARCH_CPUIDLE_HALTPOLL=y CONFIG_PVH=y CONFIG_PARAVIRT_TIME_ACCOUNTING=y CONFIG_PARAVIRT_CLOCK=y # CONFIG_ACRN_GUEST is not set # CONFIG_INTEL_TDX_GUEST is not set # CONFIG_MK8 is not set # CONFIG_MPSC is not set # CONFIG_MCORE2 is not set # CONFIG_MATOM is not set CONFIG_GENERIC_CPU=y CONFIG_X86_INTERNODE_CACHE_SHIFT=6 CONFIG_X86_L1_CACHE_SHIFT=6 CONFIG_X86_TSC=y CONFIG_X86_CMPXCHG64=y CONFIG_X86_CMOV=y CONFIG_X86_MINIMUM_CPU_FAMILY=64 CONFIG_X86_DEBUGCTLMSR=y CONFIG_IA32_FEAT_CTL=y CONFIG_X86_VMX_FEATURE_NAMES=y CONFIG_CPU_SUP_INTEL=y CONFIG_CPU_SUP_AMD=y CONFIG_CPU_SUP_HYGON=y CONFIG_CPU_SUP_CENTAUR=y CONFIG_CPU_SUP_ZHAOXIN=y CONFIG_HPET_TIMER=y CONFIG_DMI=y # CONFIG_MAXSMP is not set CONFIG_NR_CPUS_RANGE_BEGIN=2 CONFIG_NR_CPUS_RANGE_END=512 CONFIG_NR_CPUS_DEFAULT=64 CONFIG_NR_CPUS=64 CONFIG_SCHED_CLUSTER=y CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y CONFIG_SCHED_MC_PRIO=y CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y # CONFIG_X86_MCE is not set # # Performance monitoring # # CONFIG_PERF_EVENTS_AMD_POWER is not set CONFIG_PERF_EVENTS_AMD_UNCORE=y # CONFIG_PERF_EVENTS_AMD_BRS is not set # end of Performance monitoring CONFIG_X86_16BIT=y CONFIG_X86_ESPFIX64=y CONFIG_X86_VSYSCALL_EMULATION=y CONFIG_X86_IOPL_IOPERM=y # CONFIG_MICROCODE is not set CONFIG_X86_MSR=y CONFIG_X86_CPUID=y # CONFIG_X86_5LEVEL is not set CONFIG_X86_DIRECT_GBPAGES=y # CONFIG_X86_CPA_STATISTICS is not set # CONFIG_AMD_MEM_ENCRYPT is not set CONFIG_NUMA=y # CONFIG_NUMA_EMU is not set CONFIG_NODES_SHIFT=10 CONFIG_ARCH_SPARSEMEM_ENABLE=y CONFIG_ARCH_SPARSEMEM_DEFAULT=y CONFIG_ARCH_MEMORY_PROBE=y CONFIG_ARCH_PROC_KCORE_TEXT=y CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 # CONFIG_X86_PMEM_LEGACY is not set CONFIG_X86_CHECK_BIOS_CORRUPTION=y CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y CONFIG_MTRR=y CONFIG_MTRR_SANITIZER=y CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1 CONFIG_X86_PAT=y CONFIG_ARCH_USES_PG_UNCACHED=y CONFIG_X86_UMIP=y CONFIG_CC_HAS_IBT=y # CONFIG_X86_KERNEL_IBT is not set CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS=y CONFIG_X86_INTEL_TSX_MODE_OFF=y # CONFIG_X86_INTEL_TSX_MODE_ON is not set # CONFIG_X86_INTEL_TSX_MODE_AUTO is not set # CONFIG_X86_SGX is not set # CONFIG_EFI is not set # CONFIG_HZ_100 is not set CONFIG_HZ_250=y # CONFIG_HZ_300 is not set # CONFIG_HZ_1000 is not set CONFIG_HZ=250 CONFIG_SCHED_HRTICK=y # CONFIG_KEXEC is not set CONFIG_KEXEC_FILE=y CONFIG_ARCH_HAS_KEXEC_PURGATORY=y # CONFIG_KEXEC_SIG is not set # CONFIG_CRASH_DUMP is not set CONFIG_PHYSICAL_START=0x1000000 CONFIG_RELOCATABLE=y CONFIG_RANDOMIZE_BASE=y CONFIG_X86_NEED_RELOCS=y CONFIG_PHYSICAL_ALIGN=0x1000000 CONFIG_DYNAMIC_MEMORY_LAYOUT=y CONFIG_RANDOMIZE_MEMORY=y CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa CONFIG_HOTPLUG_CPU=y # CONFIG_BOOTPARAM_HOTPLUG_CPU0 is not set # CONFIG_DEBUG_HOTPLUG_CPU0 is not set # CONFIG_COMPAT_VDSO is not set CONFIG_LEGACY_VSYSCALL_XONLY=y # CONFIG_LEGACY_VSYSCALL_NONE is not set # CONFIG_CMDLINE_BOOL is not set CONFIG_MODIFY_LDT_SYSCALL=y # CONFIG_STRICT_SIGALTSTACK_SIZE is not set CONFIG_HAVE_LIVEPATCH=y # end of Processor type and features CONFIG_CC_HAS_SLS=y CONFIG_CC_HAS_RETURN_THUNK=y CONFIG_CPU_MITIGATIONS=y CONFIG_PAGE_TABLE_ISOLATION=y CONFIG_RETPOLINE=y CONFIG_RETHUNK=y CONFIG_CPU_UNRET_ENTRY=y CONFIG_CPU_IBPB_ENTRY=y CONFIG_CPU_IBRS_ENTRY=y CONFIG_CPU_SRSO=y # CONFIG_SLS is not set # CONFIG_GDS_FORCE_MITIGATION is not set CONFIG_MITIGATION_RFDS=y CONFIG_MITIGATION_SPECTRE_BHI=y CONFIG_ARCH_HAS_ADD_PAGES=y CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y # # Power management and ACPI options # CONFIG_ARCH_HIBERNATION_HEADER=y # CONFIG_SUSPEND is not set CONFIG_HIBERNATE_CALLBACKS=y CONFIG_HIBERNATION=y CONFIG_HIBERNATION_SNAPSHOT_DEV=y CONFIG_PM_STD_PARTITION="" CONFIG_PM_SLEEP=y CONFIG_PM_SLEEP_SMP=y # CONFIG_PM_AUTOSLEEP is not set # CONFIG_PM_USERSPACE_AUTOSLEEP is not set # CONFIG_PM_WAKELOCKS is not set CONFIG_PM=y # CONFIG_PM_DEBUG is not set CONFIG_PM_CLK=y # CONFIG_WQ_POWER_EFFICIENT_DEFAULT is not set # CONFIG_ENERGY_MODEL is not set CONFIG_ARCH_SUPPORTS_ACPI=y CONFIG_ACPI=y CONFIG_ACPI_LEGACY_TABLES_LOOKUP=y CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC=y CONFIG_ACPI_SYSTEM_POWER_STATES_SUPPORT=y # CONFIG_ACPI_DEBUGGER is not set # CONFIG_ACPI_SPCR_TABLE is not set # CONFIG_ACPI_FPDT is not set CONFIG_ACPI_LPIT=y CONFIG_ACPI_SLEEP=y # CONFIG_ACPI_REV_OVERRIDE_POSSIBLE is not set # CONFIG_ACPI_EC_DEBUGFS is not set # CONFIG_ACPI_AC is not set # CONFIG_ACPI_BATTERY is not set # CONFIG_ACPI_BUTTON is not set # CONFIG_ACPI_TINY_POWER_BUTTON is not set # CONFIG_ACPI_FAN is not set # CONFIG_ACPI_TAD is not set # CONFIG_ACPI_DOCK is not set CONFIG_ACPI_CPU_FREQ_PSS=y CONFIG_ACPI_PROCESSOR_CSTATE=y CONFIG_ACPI_PROCESSOR_IDLE=y CONFIG_ACPI_CPPC_LIB=y CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_HOTPLUG_CPU=y # CONFIG_ACPI_PROCESSOR_AGGREGATOR is not set # CONFIG_ACPI_THERMAL is not set CONFIG_ARCH_HAS_ACPI_TABLE_UPGRADE=y # CONFIG_ACPI_TABLE_UPGRADE is not set # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_CONTAINER=y # CONFIG_ACPI_HOTPLUG_MEMORY is not set # CONFIG_ACPI_SBS is not set # CONFIG_ACPI_HED is not set # CONFIG_ACPI_CUSTOM_METHOD is not set # CONFIG_ACPI_NFIT is not set # CONFIG_ACPI_NUMA is not set CONFIG_HAVE_ACPI_APEI=y CONFIG_HAVE_ACPI_APEI_NMI=y # CONFIG_ACPI_APEI is not set # CONFIG_ACPI_DPTF is not set # CONFIG_ACPI_CONFIGFS is not set # CONFIG_ACPI_PFRUT is not set CONFIG_ACPI_PCC=y # CONFIG_PMIC_OPREGION is not set CONFIG_X86_PM_TIMER=y # # CPU Frequency scaling # CONFIG_CPU_FREQ=y CONFIG_CPU_FREQ_GOV_ATTR_SET=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set # CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL is not set CONFIG_CPU_FREQ_GOV_PERFORMANCE=y # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set # CONFIG_CPU_FREQ_GOV_USERSPACE is not set # CONFIG_CPU_FREQ_GOV_ONDEMAND is not set # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y # # CPU frequency scaling drivers # CONFIG_X86_INTEL_PSTATE=y # CONFIG_X86_PCC_CPUFREQ is not set # CONFIG_X86_AMD_PSTATE is not set # CONFIG_X86_AMD_PSTATE_UT is not set # CONFIG_X86_ACPI_CPUFREQ is not set # CONFIG_X86_SPEEDSTEP_CENTRINO is not set # CONFIG_X86_P4_CLOCKMOD is not set # # shared options # # end of CPU Frequency scaling # # CPU Idle # CONFIG_CPU_IDLE=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPU_IDLE_GOV_MENU=y # CONFIG_CPU_IDLE_GOV_TEO is not set CONFIG_CPU_IDLE_GOV_HALTPOLL=y CONFIG_HALTPOLL_CPUIDLE=y # end of CPU Idle CONFIG_INTEL_IDLE=y # end of Power management and ACPI options # # Bus options (PCI etc.) # CONFIG_ISA_DMA_API=y # end of Bus options (PCI etc.) # # Binary Emulations # CONFIG_IA32_EMULATION=y # CONFIG_X86_X32_ABI is not set CONFIG_COMPAT_32=y CONFIG_COMPAT=y CONFIG_COMPAT_FOR_U64_ALIGNMENT=y # end of Binary Emulations CONFIG_HAVE_KVM=y # CONFIG_VIRTUALIZATION is not set CONFIG_AS_AVX512=y CONFIG_AS_SHA1_NI=y CONFIG_AS_SHA256_NI=y CONFIG_AS_TPAUSE=y CONFIG_ARCH_CONFIGURES_CPU_MITIGATIONS=y # # General architecture-dependent options # CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_HOTPLUG_SMT=y CONFIG_GENERIC_ENTRY=y CONFIG_JUMP_LABEL=y # CONFIG_STATIC_KEYS_SELFTEST is not set # CONFIG_STATIC_CALL_SELFTEST is not set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y CONFIG_ARCH_USE_BUILTIN_BSWAP=y CONFIG_HAVE_IOREMAP_PROT=y CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_OPTPROBES=y CONFIG_HAVE_KPROBES_ON_FTRACE=y CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE=y CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y CONFIG_HAVE_NMI=y CONFIG_TRACE_IRQFLAGS_SUPPORT=y CONFIG_TRACE_IRQFLAGS_NMI_SUPPORT=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_HAVE_DMA_CONTIGUOUS=y CONFIG_GENERIC_SMP_IDLE_THREAD=y CONFIG_ARCH_HAS_FORTIFY_SOURCE=y CONFIG_ARCH_HAS_SET_MEMORY=y CONFIG_ARCH_HAS_SET_DIRECT_MAP=y CONFIG_ARCH_HAS_CPU_FINALIZE_INIT=y CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST=y CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT=y CONFIG_ARCH_WANTS_NO_INSTR=y CONFIG_HAVE_ASM_MODVERSIONS=y CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y CONFIG_HAVE_RSEQ=y CONFIG_HAVE_RUST=y CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y CONFIG_HAVE_HW_BREAKPOINT=y CONFIG_HAVE_MIXED_BREAKPOINTS_REGS=y CONFIG_HAVE_USER_RETURN_NOTIFIER=y CONFIG_HAVE_PERF_EVENTS_NMI=y CONFIG_HAVE_HARDLOCKUP_DETECTOR_PERF=y CONFIG_HAVE_PERF_REGS=y CONFIG_HAVE_PERF_USER_STACK_DUMP=y CONFIG_HAVE_ARCH_JUMP_LABEL=y CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y CONFIG_MMU_GATHER_TABLE_FREE=y CONFIG_MMU_GATHER_RCU_TABLE_FREE=y CONFIG_MMU_GATHER_MERGE_VMAS=y CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y CONFIG_HAVE_CMPXCHG_LOCAL=y CONFIG_HAVE_CMPXCHG_DOUBLE=y CONFIG_ARCH_WANT_COMPAT_IPC_PARSE_VERSION=y CONFIG_ARCH_WANT_OLD_COMPAT_IPC=y CONFIG_HAVE_ARCH_SECCOMP=y CONFIG_HAVE_ARCH_SECCOMP_FILTER=y CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y # CONFIG_SECCOMP_CACHE_DEBUG is not set CONFIG_HAVE_ARCH_STACKLEAK=y CONFIG_HAVE_STACKPROTECTOR=y CONFIG_STACKPROTECTOR=y CONFIG_STACKPROTECTOR_STRONG=y CONFIG_ARCH_SUPPORTS_LTO_CLANG=y CONFIG_ARCH_SUPPORTS_LTO_CLANG_THIN=y CONFIG_LTO_NONE=y CONFIG_ARCH_SUPPORTS_CFI_CLANG=y CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES=y CONFIG_HAVE_CONTEXT_TRACKING_USER=y CONFIG_HAVE_CONTEXT_TRACKING_USER_OFFSTACK=y CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y CONFIG_HAVE_IRQ_TIME_ACCOUNTING=y CONFIG_HAVE_MOVE_PUD=y CONFIG_HAVE_MOVE_PMD=y CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD=y CONFIG_HAVE_ARCH_HUGE_VMAP=y CONFIG_HAVE_ARCH_HUGE_VMALLOC=y CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_HAVE_ARCH_SOFT_DIRTY=y CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK=y CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y CONFIG_SOFTIRQ_ON_OWN_STACK=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y CONFIG_HAVE_ARCH_MMAP_RND_BITS=y CONFIG_HAVE_EXIT_THREAD=y CONFIG_ARCH_MMAP_RND_BITS=28 CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y CONFIG_ARCH_MMAP_RND_COMPAT_BITS=8 CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES=y CONFIG_PAGE_SIZE_LESS_THAN_64KB=y CONFIG_PAGE_SIZE_LESS_THAN_256KB=y CONFIG_HAVE_OBJTOOL=y CONFIG_HAVE_JUMP_LABEL_HACK=y CONFIG_HAVE_NOINSTR_HACK=y CONFIG_HAVE_NOINSTR_VALIDATION=y CONFIG_HAVE_UACCESS_VALIDATION=y CONFIG_HAVE_STACK_VALIDATION=y CONFIG_HAVE_RELIABLE_STACKTRACE=y CONFIG_OLD_SIGSUSPEND3=y CONFIG_COMPAT_OLD_SIGACTION=y CONFIG_COMPAT_32BIT_TIME=y CONFIG_HAVE_ARCH_VMAP_STACK=y CONFIG_VMAP_STACK=y CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET=y CONFIG_RANDOMIZE_KSTACK_OFFSET=y # CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT is not set CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y CONFIG_STRICT_KERNEL_RWX=y CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y CONFIG_HAVE_ARCH_PREL32_RELOCATIONS=y # CONFIG_LOCK_EVENT_COUNTS is not set CONFIG_ARCH_HAS_MEM_ENCRYPT=y CONFIG_HAVE_STATIC_CALL=y CONFIG_HAVE_STATIC_CALL_INLINE=y CONFIG_HAVE_PREEMPT_DYNAMIC=y CONFIG_HAVE_PREEMPT_DYNAMIC_CALL=y CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y CONFIG_ARCH_SUPPORTS_PAGE_TABLE_CHECK=y CONFIG_ARCH_HAS_ELFCORE_COMPAT=y CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH=y CONFIG_DYNAMIC_SIGFRAME=y CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG=y # # GCOV-based kernel profiling # # CONFIG_GCOV_KERNEL is not set CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y # end of GCOV-based kernel profiling CONFIG_HAVE_GCC_PLUGINS=y # end of General architecture-dependent options CONFIG_RT_MUTEXES=y CONFIG_BASE_SMALL=0 # CONFIG_MODULES is not set CONFIG_BLOCK=y CONFIG_BLOCK_LEGACY_AUTOLOAD=y CONFIG_BLK_RQ_ALLOC_TIME=y CONFIG_BLK_CGROUP_RWSTAT=y CONFIG_BLK_DEV_BSG_COMMON=y CONFIG_BLK_ICQ=y CONFIG_BLK_DEV_BSGLIB=y CONFIG_BLK_DEV_INTEGRITY=y # CONFIG_BLK_DEV_ZONED is not set CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set CONFIG_BLK_WBT=y CONFIG_BLK_WBT_MQ=y # CONFIG_BLK_CGROUP_IOLATENCY is not set CONFIG_BLK_CGROUP_IOCOST=y # CONFIG_BLK_CGROUP_IOPRIO is not set CONFIG_BLK_DEBUG_FS=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # # Partition Types # CONFIG_PARTITION_ADVANCED=y # CONFIG_ACORN_PARTITION is not set # CONFIG_AIX_PARTITION is not set # CONFIG_OSF_PARTITION is not set # CONFIG_AMIGA_PARTITION is not set # CONFIG_ATARI_PARTITION is not set # CONFIG_MAC_PARTITION is not set # CONFIG_MSDOS_PARTITION is not set # CONFIG_LDM_PARTITION is not set # CONFIG_SGI_PARTITION is not set # CONFIG_ULTRIX_PARTITION is not set # CONFIG_SUN_PARTITION is not set # CONFIG_KARMA_PARTITION is not set # CONFIG_EFI_PARTITION is not set # CONFIG_SYSV68_PARTITION is not set # CONFIG_CMDLINE_PARTITION is not set # end of Partition Types CONFIG_BLOCK_COMPAT=y CONFIG_BLK_MQ_VIRTIO=y CONFIG_BLK_PM=y # # IO Schedulers # CONFIG_MQ_IOSCHED_DEADLINE=y CONFIG_MQ_IOSCHED_KYBER=y CONFIG_IOSCHED_BFQ=y CONFIG_BFQ_GROUP_IOSCHED=y # CONFIG_BFQ_CGROUP_DEBUG is not set # end of IO Schedulers CONFIG_PADATA=y CONFIG_ASN1=y CONFIG_UNINLINE_SPIN_UNLOCK=y CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y CONFIG_MUTEX_SPIN_ON_OWNER=y CONFIG_RWSEM_SPIN_ON_OWNER=y CONFIG_LOCK_SPIN_ON_OWNER=y CONFIG_ARCH_USE_QUEUED_SPINLOCKS=y CONFIG_QUEUED_SPINLOCKS=y CONFIG_ARCH_USE_QUEUED_RWLOCKS=y CONFIG_QUEUED_RWLOCKS=y CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE=y CONFIG_ARCH_HAS_SYNC_CORE_BEFORE_USERMODE=y CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y CONFIG_FREEZER=y # # Executable file formats # CONFIG_BINFMT_ELF=y CONFIG_COMPAT_BINFMT_ELF=y CONFIG_ELFCORE=y CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y CONFIG_BINFMT_SCRIPT=y CONFIG_BINFMT_MISC=y CONFIG_COREDUMP=y # end of Executable file formats # # Memory Management options # CONFIG_ZPOOL=y CONFIG_SWAP=y CONFIG_ZSWAP=y # CONFIG_ZSWAP_DEFAULT_ON is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_DEFLATE is not set CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZO=y # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_842 is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4 is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_LZ4HC is not set # CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD is not set CONFIG_ZSWAP_COMPRESSOR_DEFAULT="lzo" CONFIG_ZSWAP_ZPOOL_DEFAULT_ZBUD=y # CONFIG_ZSWAP_ZPOOL_DEFAULT_Z3FOLD is not set # CONFIG_ZSWAP_ZPOOL_DEFAULT_ZSMALLOC is not set CONFIG_ZSWAP_ZPOOL_DEFAULT="zbud" CONFIG_ZBUD=y # CONFIG_Z3FOLD is not set # CONFIG_ZSMALLOC is not set # # SLAB allocator options # # CONFIG_SLAB is not set CONFIG_SLUB=y CONFIG_SLAB_MERGE_DEFAULT=y CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y # CONFIG_SLUB_STATS is not set CONFIG_SLUB_CPU_PARTIAL=y # end of SLAB allocator options CONFIG_SHUFFLE_PAGE_ALLOCATOR=y # CONFIG_COMPAT_BRK is not set CONFIG_SPARSEMEM=y CONFIG_SPARSEMEM_EXTREME=y CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y CONFIG_SPARSEMEM_VMEMMAP=y CONFIG_HAVE_FAST_GUP=y CONFIG_NUMA_KEEP_MEMINFO=y CONFIG_MEMORY_ISOLATION=y CONFIG_EXCLUSIVE_SYSTEM_RAM=y CONFIG_HAVE_BOOTMEM_INFO_NODE=y CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y CONFIG_MEMORY_HOTPLUG=y # CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set CONFIG_MEMORY_HOTREMOVE=y CONFIG_MHP_MEMMAP_ON_MEMORY=y CONFIG_SPLIT_PTLOCK_CPUS=4 CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y CONFIG_MEMORY_BALLOON=y CONFIG_BALLOON_COMPACTION=y CONFIG_COMPACTION=y CONFIG_COMPACT_UNEVICTABLE_DEFAULT=1 CONFIG_PAGE_REPORTING=y CONFIG_MIGRATION=y CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION=y CONFIG_ARCH_ENABLE_THP_MIGRATION=y CONFIG_CONTIG_ALLOC=y CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_ARCH_WANT_GENERAL_HUGETLB=y CONFIG_ARCH_WANTS_THP_SWAP=y CONFIG_TRANSPARENT_HUGEPAGE=y # CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS is not set CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y CONFIG_THP_SWAP=y # CONFIG_READ_ONLY_THP_FOR_FS is not set CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK=y CONFIG_USE_PERCPU_NUMA_NODE_ID=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_FRONTSWAP=y # CONFIG_CMA is not set CONFIG_GENERIC_EARLY_IOREMAP=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_PAGE_IDLE_FLAG=y # CONFIG_IDLE_PAGE_TRACKING is not set CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_ARCH_HAS_CURRENT_STACK_POINTER=y CONFIG_ARCH_HAS_PTE_DEVMAP=y CONFIG_ZONE_DMA=y CONFIG_ZONE_DMA32=y # CONFIG_ZONE_DEVICE is not set CONFIG_ARCH_USES_HIGH_VMA_FLAGS=y CONFIG_ARCH_HAS_PKEYS=y CONFIG_VM_EVENT_COUNTERS=y CONFIG_PERCPU_STATS=y # CONFIG_GUP_TEST is not set CONFIG_ARCH_HAS_PTE_SPECIAL=y CONFIG_SECRETMEM=y # CONFIG_ANON_VMA_NAME is not set CONFIG_USERFAULTFD=y CONFIG_HAVE_ARCH_USERFAULTFD_WP=y CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y CONFIG_PTE_MARKER=y CONFIG_PTE_MARKER_UFFD_WP=y # CONFIG_LRU_GEN is not set CONFIG_LOCK_MM_AND_FIND_VMA=y # # Data Access Monitoring # CONFIG_DAMON=y CONFIG_DAMON_VADDR=y CONFIG_DAMON_PADDR=y CONFIG_DAMON_SYSFS=y CONFIG_DAMON_DBGFS=y # CONFIG_DAMON_RECLAIM is not set # CONFIG_DAMON_LRU_SORT is not set # end of Data Access Monitoring # end of Memory Management options CONFIG_NET=y CONFIG_NET_INGRESS=y CONFIG_SKB_EXTENSIONS=y # # Networking options # CONFIG_PACKET=y # CONFIG_PACKET_DIAG is not set CONFIG_UNIX=y CONFIG_UNIX_SCM=y CONFIG_AF_UNIX_OOB=y # CONFIG_UNIX_DIAG is not set # CONFIG_TLS is not set CONFIG_XFRM=y CONFIG_XFRM_ALGO=y CONFIG_XFRM_USER=y # CONFIG_XFRM_USER_COMPAT is not set # CONFIG_XFRM_INTERFACE is not set CONFIG_XFRM_SUB_POLICY=y CONFIG_XFRM_MIGRATE=y CONFIG_XFRM_STATISTICS=y # CONFIG_NET_KEY is not set CONFIG_XDP_SOCKETS=y # CONFIG_XDP_SOCKETS_DIAG is not set CONFIG_INET=y CONFIG_IP_MULTICAST=y CONFIG_IP_ADVANCED_ROUTER=y # CONFIG_IP_FIB_TRIE_STATS is not set CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_ROUTE_MULTIPATH=y CONFIG_IP_ROUTE_VERBOSE=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y # CONFIG_NET_IPIP is not set # CONFIG_NET_IPGRE_DEMUX is not set CONFIG_IP_MROUTE_COMMON=y CONFIG_IP_MROUTE=y CONFIG_IP_MROUTE_MULTIPLE_TABLES=y CONFIG_IP_PIMSM_V1=y CONFIG_IP_PIMSM_V2=y CONFIG_SYN_COOKIES=y # CONFIG_NET_IPVTI is not set # CONFIG_NET_FOU is not set # CONFIG_INET_AH is not set # CONFIG_INET_ESP is not set # CONFIG_INET_IPCOMP is not set CONFIG_INET_TABLE_PERTURB_ORDER=16 CONFIG_INET_DIAG=y CONFIG_INET_TCP_DIAG=y # CONFIG_INET_UDP_DIAG is not set # CONFIG_INET_RAW_DIAG is not set CONFIG_INET_DIAG_DESTROY=y CONFIG_TCP_CONG_ADVANCED=y # CONFIG_TCP_CONG_BIC is not set CONFIG_TCP_CONG_CUBIC=y # CONFIG_TCP_CONG_WESTWOOD is not set # CONFIG_TCP_CONG_HTCP is not set # CONFIG_TCP_CONG_HSTCP is not set # CONFIG_TCP_CONG_HYBLA is not set # CONFIG_TCP_CONG_VEGAS is not set # CONFIG_TCP_CONG_NV is not set # CONFIG_TCP_CONG_SCALABLE is not set # CONFIG_TCP_CONG_LP is not set # CONFIG_TCP_CONG_VENO is not set # CONFIG_TCP_CONG_YEAH is not set # CONFIG_TCP_CONG_ILLINOIS is not set # CONFIG_TCP_CONG_DCTCP is not set # CONFIG_TCP_CONG_CDG is not set # CONFIG_TCP_CONG_BBR is not set CONFIG_DEFAULT_CUBIC=y # CONFIG_DEFAULT_RENO is not set CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y CONFIG_IPV6=y CONFIG_IPV6_ROUTER_PREF=y CONFIG_IPV6_ROUTE_INFO=y CONFIG_IPV6_OPTIMISTIC_DAD=y # CONFIG_INET6_AH is not set # CONFIG_INET6_ESP is not set # CONFIG_INET6_IPCOMP is not set # CONFIG_IPV6_MIP6 is not set # CONFIG_IPV6_ILA is not set # CONFIG_IPV6_VTI is not set # CONFIG_IPV6_SIT is not set # CONFIG_IPV6_TUNNEL is not set CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y CONFIG_IPV6_SEG6_LWTUNNEL=y CONFIG_IPV6_SEG6_HMAC=y CONFIG_IPV6_SEG6_BPF=y # CONFIG_IPV6_RPL_LWTUNNEL is not set # CONFIG_IPV6_IOAM6_LWTUNNEL is not set CONFIG_NETLABEL=y CONFIG_MPTCP=y CONFIG_INET_MPTCP_DIAG=y CONFIG_MPTCP_IPV6=y CONFIG_NETWORK_SECMARK=y CONFIG_NET_PTP_CLASSIFY=y CONFIG_NETWORK_PHY_TIMESTAMPING=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_BRIDGE_NETFILTER=y # # Core Netfilter Configuration # CONFIG_NETFILTER_INGRESS=y # CONFIG_NETFILTER_EGRESS is not set CONFIG_NETFILTER_FAMILY_BRIDGE=y # CONFIG_NETFILTER_NETLINK_ACCT is not set # CONFIG_NETFILTER_NETLINK_QUEUE is not set # CONFIG_NETFILTER_NETLINK_LOG is not set # CONFIG_NETFILTER_NETLINK_OSF is not set CONFIG_NF_CONNTRACK=y CONFIG_NF_LOG_SYSLOG=y CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_SECMARK=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_TIMEOUT=y CONFIG_NF_CONNTRACK_TIMESTAMP=y CONFIG_NF_CONNTRACK_LABELS=y CONFIG_NF_CT_PROTO_DCCP=y CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_CT_PROTO_UDPLITE=y # CONFIG_NF_CONNTRACK_AMANDA is not set # CONFIG_NF_CONNTRACK_FTP is not set # CONFIG_NF_CONNTRACK_H323 is not set # CONFIG_NF_CONNTRACK_IRC is not set # CONFIG_NF_CONNTRACK_NETBIOS_NS is not set # CONFIG_NF_CONNTRACK_SNMP is not set # CONFIG_NF_CONNTRACK_PPTP is not set # CONFIG_NF_CONNTRACK_SANE is not set # CONFIG_NF_CONNTRACK_SIP is not set # CONFIG_NF_CONNTRACK_TFTP is not set # CONFIG_NF_CT_NETLINK is not set # CONFIG_NF_CT_NETLINK_TIMEOUT is not set CONFIG_NF_NAT=y CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_NAT_MASQUERADE=y CONFIG_NETFILTER_SYNPROXY=y # CONFIG_NF_TABLES is not set CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XTABLES_COMPAT=y # # Xtables combined modules # # CONFIG_NETFILTER_XT_MARK is not set # CONFIG_NETFILTER_XT_CONNMARK is not set # # Xtables targets # # CONFIG_NETFILTER_XT_TARGET_AUDIT is not set # CONFIG_NETFILTER_XT_TARGET_CHECKSUM is not set # CONFIG_NETFILTER_XT_TARGET_CLASSIFY is not set # CONFIG_NETFILTER_XT_TARGET_CONNMARK is not set # CONFIG_NETFILTER_XT_TARGET_CONNSECMARK is not set # CONFIG_NETFILTER_XT_TARGET_DSCP is not set # CONFIG_NETFILTER_XT_TARGET_HL is not set # CONFIG_NETFILTER_XT_TARGET_HMARK is not set # CONFIG_NETFILTER_XT_TARGET_IDLETIMER is not set # CONFIG_NETFILTER_XT_TARGET_LOG is not set # CONFIG_NETFILTER_XT_TARGET_MARK is not set CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_TARGET_NETMAP=y # CONFIG_NETFILTER_XT_TARGET_NFLOG is not set # CONFIG_NETFILTER_XT_TARGET_NFQUEUE is not set # CONFIG_NETFILTER_XT_TARGET_RATEEST is not set CONFIG_NETFILTER_XT_TARGET_REDIRECT=y CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y # CONFIG_NETFILTER_XT_TARGET_TEE is not set # CONFIG_NETFILTER_XT_TARGET_TPROXY is not set # CONFIG_NETFILTER_XT_TARGET_SECMARK is not set # CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set # CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set # # Xtables matches # CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y # CONFIG_NETFILTER_XT_MATCH_BPF is not set # CONFIG_NETFILTER_XT_MATCH_CGROUP is not set # CONFIG_NETFILTER_XT_MATCH_CLUSTER is not set # CONFIG_NETFILTER_XT_MATCH_COMMENT is not set # CONFIG_NETFILTER_XT_MATCH_CONNBYTES is not set # CONFIG_NETFILTER_XT_MATCH_CONNLABEL is not set # CONFIG_NETFILTER_XT_MATCH_CONNLIMIT is not set # CONFIG_NETFILTER_XT_MATCH_CONNMARK is not set CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y # CONFIG_NETFILTER_XT_MATCH_CPU is not set # CONFIG_NETFILTER_XT_MATCH_DCCP is not set # CONFIG_NETFILTER_XT_MATCH_DEVGROUP is not set # CONFIG_NETFILTER_XT_MATCH_DSCP is not set # CONFIG_NETFILTER_XT_MATCH_ECN is not set # CONFIG_NETFILTER_XT_MATCH_ESP is not set # CONFIG_NETFILTER_XT_MATCH_HASHLIMIT is not set # CONFIG_NETFILTER_XT_MATCH_HELPER is not set # CONFIG_NETFILTER_XT_MATCH_HL is not set # CONFIG_NETFILTER_XT_MATCH_IPCOMP is not set # CONFIG_NETFILTER_XT_MATCH_IPRANGE is not set # CONFIG_NETFILTER_XT_MATCH_L2TP is not set # CONFIG_NETFILTER_XT_MATCH_LENGTH is not set # CONFIG_NETFILTER_XT_MATCH_LIMIT is not set # CONFIG_NETFILTER_XT_MATCH_MAC is not set # CONFIG_NETFILTER_XT_MATCH_MARK is not set # CONFIG_NETFILTER_XT_MATCH_MULTIPORT is not set # CONFIG_NETFILTER_XT_MATCH_NFACCT is not set # CONFIG_NETFILTER_XT_MATCH_OSF is not set # CONFIG_NETFILTER_XT_MATCH_OWNER is not set # CONFIG_NETFILTER_XT_MATCH_POLICY is not set # CONFIG_NETFILTER_XT_MATCH_PHYSDEV is not set # CONFIG_NETFILTER_XT_MATCH_PKTTYPE is not set # CONFIG_NETFILTER_XT_MATCH_QUOTA is not set # CONFIG_NETFILTER_XT_MATCH_RATEEST is not set # CONFIG_NETFILTER_XT_MATCH_REALM is not set # CONFIG_NETFILTER_XT_MATCH_RECENT is not set # CONFIG_NETFILTER_XT_MATCH_SCTP is not set # CONFIG_NETFILTER_XT_MATCH_SOCKET is not set # CONFIG_NETFILTER_XT_MATCH_STATE is not set # CONFIG_NETFILTER_XT_MATCH_STATISTIC is not set # CONFIG_NETFILTER_XT_MATCH_STRING is not set # CONFIG_NETFILTER_XT_MATCH_TCPMSS is not set # CONFIG_NETFILTER_XT_MATCH_TIME is not set # CONFIG_NETFILTER_XT_MATCH_U32 is not set # end of Core Netfilter Configuration # CONFIG_IP_SET is not set # CONFIG_IP_VS is not set # # IP: Netfilter Configuration # CONFIG_NF_DEFRAG_IPV4=y # CONFIG_NF_SOCKET_IPV4 is not set # CONFIG_NF_TPROXY_IPV4 is not set # CONFIG_NF_DUP_IPV4 is not set # CONFIG_NF_LOG_ARP is not set # CONFIG_NF_LOG_IPV4 is not set CONFIG_NF_REJECT_IPV4=y CONFIG_IP_NF_IPTABLES=y # CONFIG_IP_NF_MATCH_AH is not set # CONFIG_IP_NF_MATCH_ECN is not set # CONFIG_IP_NF_MATCH_RPFILTER is not set # CONFIG_IP_NF_MATCH_TTL is not set CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_TARGET_REJECT=y CONFIG_IP_NF_TARGET_SYNPROXY=y CONFIG_IP_NF_NAT=y CONFIG_IP_NF_TARGET_MASQUERADE=y CONFIG_IP_NF_TARGET_NETMAP=y CONFIG_IP_NF_TARGET_REDIRECT=y CONFIG_IP_NF_MANGLE=y # CONFIG_IP_NF_TARGET_CLUSTERIP is not set # CONFIG_IP_NF_TARGET_ECN is not set # CONFIG_IP_NF_TARGET_TTL is not set # CONFIG_IP_NF_RAW is not set # CONFIG_IP_NF_SECURITY is not set # CONFIG_IP_NF_ARPTABLES is not set # end of IP: Netfilter Configuration # # IPv6: Netfilter Configuration # # CONFIG_NF_SOCKET_IPV6 is not set # CONFIG_NF_TPROXY_IPV6 is not set # CONFIG_NF_DUP_IPV6 is not set CONFIG_NF_REJECT_IPV6=y CONFIG_NF_LOG_IPV6=y CONFIG_IP6_NF_IPTABLES=y # CONFIG_IP6_NF_MATCH_AH is not set # CONFIG_IP6_NF_MATCH_EUI64 is not set # CONFIG_IP6_NF_MATCH_FRAG is not set # CONFIG_IP6_NF_MATCH_OPTS is not set # CONFIG_IP6_NF_MATCH_HL is not set # CONFIG_IP6_NF_MATCH_IPV6HEADER is not set # CONFIG_IP6_NF_MATCH_MH is not set # CONFIG_IP6_NF_MATCH_RPFILTER is not set # CONFIG_IP6_NF_MATCH_RT is not set # CONFIG_IP6_NF_MATCH_SRH is not set # CONFIG_IP6_NF_TARGET_HL is not set CONFIG_IP6_NF_FILTER=y CONFIG_IP6_NF_TARGET_REJECT=y CONFIG_IP6_NF_TARGET_SYNPROXY=y CONFIG_IP6_NF_MANGLE=y # CONFIG_IP6_NF_RAW is not set # CONFIG_IP6_NF_SECURITY is not set CONFIG_IP6_NF_NAT=y CONFIG_IP6_NF_TARGET_MASQUERADE=y # CONFIG_IP6_NF_TARGET_NPT is not set # end of IPv6: Netfilter Configuration CONFIG_NF_DEFRAG_IPV6=y # CONFIG_NF_CONNTRACK_BRIDGE is not set # CONFIG_BRIDGE_NF_EBTABLES is not set CONFIG_BPFILTER=y CONFIG_BPFILTER_UMH=y # CONFIG_IP_DCCP is not set # CONFIG_IP_SCTP is not set # CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set # CONFIG_L2TP is not set CONFIG_STP=y CONFIG_BRIDGE=y CONFIG_BRIDGE_IGMP_SNOOPING=y # CONFIG_BRIDGE_MRP is not set # CONFIG_BRIDGE_CFM is not set # CONFIG_NET_DSA is not set # CONFIG_VLAN_8021Q is not set CONFIG_LLC=y # CONFIG_LLC2 is not set # CONFIG_ATALK is not set # CONFIG_X25 is not set # CONFIG_LAPB is not set # CONFIG_PHONET is not set # CONFIG_6LOWPAN is not set # CONFIG_IEEE802154 is not set CONFIG_NET_SCHED=y # # Queueing/Scheduling # # CONFIG_NET_SCH_HTB is not set # CONFIG_NET_SCH_HFSC is not set # CONFIG_NET_SCH_PRIO is not set # CONFIG_NET_SCH_MULTIQ is not set # CONFIG_NET_SCH_RED is not set # CONFIG_NET_SCH_SFB is not set # CONFIG_NET_SCH_SFQ is not set # CONFIG_NET_SCH_TEQL is not set # CONFIG_NET_SCH_TBF is not set # CONFIG_NET_SCH_CBS is not set # CONFIG_NET_SCH_ETF is not set # CONFIG_NET_SCH_TAPRIO is not set # CONFIG_NET_SCH_GRED is not set # CONFIG_NET_SCH_NETEM is not set # CONFIG_NET_SCH_DRR is not set # CONFIG_NET_SCH_MQPRIO is not set # CONFIG_NET_SCH_SKBPRIO is not set # CONFIG_NET_SCH_CHOKE is not set # CONFIG_NET_SCH_QFQ is not set # CONFIG_NET_SCH_CODEL is not set # CONFIG_NET_SCH_FQ_CODEL is not set # CONFIG_NET_SCH_CAKE is not set # CONFIG_NET_SCH_FQ is not set # CONFIG_NET_SCH_HHF is not set # CONFIG_NET_SCH_PIE is not set # CONFIG_NET_SCH_INGRESS is not set # CONFIG_NET_SCH_PLUG is not set # CONFIG_NET_SCH_ETS is not set # CONFIG_NET_SCH_DEFAULT is not set # # Classification # CONFIG_NET_CLS=y # CONFIG_NET_CLS_BASIC is not set # CONFIG_NET_CLS_ROUTE4 is not set # CONFIG_NET_CLS_FW is not set # CONFIG_NET_CLS_U32 is not set # CONFIG_NET_CLS_FLOW is not set # CONFIG_NET_CLS_CGROUP is not set # CONFIG_NET_CLS_BPF is not set # CONFIG_NET_CLS_FLOWER is not set # CONFIG_NET_CLS_MATCHALL is not set CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_STACK=32 # CONFIG_NET_EMATCH_CMP is not set # CONFIG_NET_EMATCH_NBYTE is not set # CONFIG_NET_EMATCH_U32 is not set # CONFIG_NET_EMATCH_META is not set # CONFIG_NET_EMATCH_TEXT is not set # CONFIG_NET_EMATCH_IPT is not set CONFIG_NET_CLS_ACT=y # CONFIG_NET_ACT_POLICE is not set # CONFIG_NET_ACT_GACT is not set # CONFIG_NET_ACT_MIRRED is not set # CONFIG_NET_ACT_SAMPLE is not set # CONFIG_NET_ACT_IPT is not set # CONFIG_NET_ACT_NAT is not set # CONFIG_NET_ACT_PEDIT is not set # CONFIG_NET_ACT_SIMP is not set # CONFIG_NET_ACT_SKBEDIT is not set # CONFIG_NET_ACT_CSUM is not set # CONFIG_NET_ACT_MPLS is not set # CONFIG_NET_ACT_VLAN is not set # CONFIG_NET_ACT_BPF is not set # CONFIG_NET_ACT_CONNMARK is not set # CONFIG_NET_ACT_CTINFO is not set # CONFIG_NET_ACT_SKBMOD is not set # CONFIG_NET_ACT_IFE is not set # CONFIG_NET_ACT_TUNNEL_KEY is not set # CONFIG_NET_ACT_GATE is not set # CONFIG_NET_TC_SKB_EXT is not set CONFIG_NET_SCH_FIFO=y CONFIG_DCB=y CONFIG_DNS_RESOLVER=y # CONFIG_BATMAN_ADV is not set # CONFIG_OPENVSWITCH is not set CONFIG_VSOCKETS=y # CONFIG_VSOCKETS_DIAG is not set # CONFIG_VSOCKETS_LOOPBACK is not set CONFIG_VIRTIO_VSOCKETS=y CONFIG_VIRTIO_VSOCKETS_COMMON=y # CONFIG_NETLINK_DIAG is not set CONFIG_MPLS=y # CONFIG_NET_MPLS_GSO is not set # CONFIG_MPLS_ROUTING is not set # CONFIG_NET_NSH is not set # CONFIG_HSR is not set # CONFIG_NET_SWITCHDEV is not set CONFIG_NET_L3_MASTER_DEV=y # CONFIG_QRTR is not set # CONFIG_NET_NCSI is not set CONFIG_PCPU_DEV_REFCNT=y CONFIG_RPS=y CONFIG_RFS_ACCEL=y CONFIG_SOCK_RX_QUEUE_MAPPING=y CONFIG_XPS=y CONFIG_CGROUP_NET_PRIO=y CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y CONFIG_NET_FLOW_LIMIT=y # # Network testing # # CONFIG_NET_PKTGEN is not set # end of Network testing # end of Networking options # CONFIG_HAMRADIO is not set # CONFIG_CAN is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set # CONFIG_AF_KCM is not set CONFIG_STREAM_PARSER=y # CONFIG_MCTP is not set CONFIG_FIB_RULES=y # CONFIG_WIRELESS is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set # CONFIG_CAIF is not set # CONFIG_CEPH_LIB is not set # CONFIG_NFC is not set # CONFIG_PSAMPLE is not set # CONFIG_NET_IFE is not set CONFIG_LWTUNNEL=y CONFIG_LWTUNNEL_BPF=y CONFIG_DST_CACHE=y CONFIG_GRO_CELLS=y CONFIG_NET_SOCK_MSG=y CONFIG_PAGE_POOL=y # CONFIG_PAGE_POOL_STATS is not set CONFIG_FAILOVER=y CONFIG_ETHTOOL_NETLINK=y # # Device Drivers # CONFIG_HAVE_EISA=y # CONFIG_EISA is not set CONFIG_HAVE_PCI=y # CONFIG_PCI is not set # CONFIG_PCCARD is not set # # Generic Driver Options # CONFIG_UEVENT_HELPER=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y # CONFIG_DEVTMPFS_SAFE is not set CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y # # Firmware loader # CONFIG_FW_LOADER=y CONFIG_FW_LOADER_PAGED_BUF=y CONFIG_FW_LOADER_SYSFS=y CONFIG_EXTRA_FIRMWARE="" CONFIG_FW_LOADER_USER_HELPER=y # CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set # CONFIG_FW_LOADER_COMPRESS is not set CONFIG_FW_CACHE=y # CONFIG_FW_UPLOAD is not set # end of Firmware loader CONFIG_ALLOW_DEV_COREDUMP=y # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_DEBUG_TEST_DRIVER_REMOVE is not set CONFIG_GENERIC_CPU_AUTOPROBE=y CONFIG_GENERIC_CPU_VULNERABILITIES=y CONFIG_DMA_SHARED_BUFFER=y # CONFIG_DMA_FENCE_TRACE is not set # end of Generic Driver Options # # Bus devices # # CONFIG_MHI_BUS is not set # CONFIG_MHI_BUS_EP is not set # end of Bus devices CONFIG_CONNECTOR=y CONFIG_PROC_EVENTS=y # # Firmware Drivers # # # ARM System Control and Management Interface Protocol # # end of ARM System Control and Management Interface Protocol # CONFIG_EDD is not set CONFIG_FIRMWARE_MEMMAP=y CONFIG_DMIID=y # CONFIG_DMI_SYSFS is not set CONFIG_DMI_SCAN_MACHINE_NON_EFI_FALLBACK=y # CONFIG_ISCSI_IBFT is not set # CONFIG_FW_CFG_SYSFS is not set # CONFIG_SYSFB_SIMPLEFB is not set # CONFIG_GOOGLE_FIRMWARE is not set # # Tegra firmware driver # # end of Tegra firmware driver # end of Firmware Drivers # CONFIG_GNSS is not set # CONFIG_MTD is not set # CONFIG_OF is not set CONFIG_ARCH_MIGHT_HAVE_PC_PARPORT=y # CONFIG_PARPORT is not set CONFIG_PNP=y CONFIG_PNP_DEBUG_MESSAGES=y # # Protocols # CONFIG_PNPACPI=y CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_NULL_BLK is not set # CONFIG_BLK_DEV_FD is not set # CONFIG_ZRAM is not set CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_LOOP_MIN_COUNT=8 # CONFIG_BLK_DEV_DRBD is not set # CONFIG_BLK_DEV_NBD is not set # CONFIG_BLK_DEV_RAM is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set CONFIG_VIRTIO_BLK=y # CONFIG_BLK_DEV_RBD is not set # CONFIG_BLK_DEV_UBLK is not set # # NVME Support # # CONFIG_NVME_FC is not set # CONFIG_NVME_TCP is not set # end of NVME Support # # Misc devices # # CONFIG_DUMMY_IRQ is not set # CONFIG_ENCLOSURE_SERVICES is not set # CONFIG_SRAM is not set # CONFIG_XILINX_SDFEC is not set CONFIG_SYSGENID=y # CONFIG_C2PORT is not set # # EEPROM support # # CONFIG_EEPROM_93CX6 is not set # end of EEPROM support # # Texas Instruments shared transport line discipline # # end of Texas Instruments shared transport line discipline # # Altera FPGA firmware download module (requires I2C) # # CONFIG_ECHO is not set # CONFIG_UACCE is not set # CONFIG_PVPANIC is not set # end of Misc devices # # SCSI device support # CONFIG_SCSI_MOD=y # CONFIG_RAID_ATTRS is not set CONFIG_SCSI_COMMON=y CONFIG_SCSI=y CONFIG_SCSI_DMA=y CONFIG_SCSI_PROC_FS=y # # SCSI support type (disk, tape, CD-ROM) # # CONFIG_BLK_DEV_SD is not set # CONFIG_CHR_DEV_ST is not set # CONFIG_BLK_DEV_SR is not set # CONFIG_CHR_DEV_SG is not set CONFIG_BLK_DEV_BSG=y # CONFIG_CHR_DEV_SCH is not set # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set # CONFIG_SCSI_SCAN_ASYNC is not set # # SCSI Transports # # CONFIG_SCSI_SPI_ATTRS is not set # CONFIG_SCSI_FC_ATTRS is not set CONFIG_SCSI_ISCSI_ATTRS=y # CONFIG_SCSI_SAS_ATTRS is not set # CONFIG_SCSI_SAS_LIBSAS is not set # CONFIG_SCSI_SRP_ATTRS is not set # end of SCSI Transports CONFIG_SCSI_LOWLEVEL=y CONFIG_ISCSI_TCP=y # CONFIG_ISCSI_BOOT_SYSFS is not set # CONFIG_SCSI_DEBUG is not set # CONFIG_SCSI_VIRTIO is not set # CONFIG_SCSI_DH is not set # end of SCSI device support # CONFIG_ATA is not set # CONFIG_MD is not set # CONFIG_TARGET_CORE is not set # CONFIG_MACINTOSH_DRIVERS is not set CONFIG_NETDEVICES=y CONFIG_NET_CORE=y # CONFIG_BONDING is not set # CONFIG_DUMMY is not set # CONFIG_WIREGUARD is not set # CONFIG_EQUALIZER is not set # CONFIG_NET_TEAM is not set # CONFIG_MACVLAN is not set # CONFIG_IPVLAN is not set # CONFIG_VXLAN is not set # CONFIG_GENEVE is not set # CONFIG_BAREUDP is not set # CONFIG_GTP is not set # CONFIG_AMT is not set # CONFIG_MACSEC is not set # CONFIG_NETCONSOLE is not set # CONFIG_TUN is not set # CONFIG_TUN_VNET_CROSS_LE is not set CONFIG_VETH=y CONFIG_VIRTIO_NET=y # CONFIG_NLMON is not set # CONFIG_NET_VRF is not set # CONFIG_ETHERNET is not set # CONFIG_NET_SB1000 is not set # CONFIG_PHYLIB is not set # CONFIG_PSE_CONTROLLER is not set # CONFIG_MDIO_DEVICE is not set # # PCS device drivers # # end of PCS device drivers # CONFIG_PPP is not set # CONFIG_SLIP is not set # # Host-side USB support is needed for USB Network Adapter support # # CONFIG_WLAN is not set # CONFIG_WAN is not set # # Wireless WAN # # CONFIG_WWAN is not set # end of Wireless WAN # CONFIG_FUJITSU_ES is not set # CONFIG_NETDEVSIM is not set CONFIG_NET_FAILOVER=y # CONFIG_ISDN is not set # # Input device support # CONFIG_INPUT=y CONFIG_INPUT_FF_MEMLESS=y # CONFIG_INPUT_SPARSEKMAP is not set # CONFIG_INPUT_MATRIXKMAP is not set # # Userland interfaces # # CONFIG_INPUT_MOUSEDEV is not set # CONFIG_INPUT_JOYDEV is not set CONFIG_INPUT_EVDEV=y # CONFIG_INPUT_EVBUG is not set # # Input Device Drivers # # CONFIG_INPUT_KEYBOARD is not set # CONFIG_INPUT_MOUSE is not set # CONFIG_INPUT_JOYSTICK is not set # CONFIG_INPUT_TABLET is not set # CONFIG_INPUT_TOUCHSCREEN is not set CONFIG_INPUT_MISC=y # CONFIG_INPUT_AD714X is not set # CONFIG_INPUT_E3X0_BUTTON is not set # CONFIG_INPUT_PCSPKR is not set # CONFIG_INPUT_ATLAS_BTNS is not set # CONFIG_INPUT_ATI_REMOTE2 is not set # CONFIG_INPUT_KEYSPAN_REMOTE is not set # CONFIG_INPUT_POWERMATE is not set # CONFIG_INPUT_YEALINK is not set # CONFIG_INPUT_CM109 is not set # CONFIG_INPUT_UINPUT is not set # CONFIG_INPUT_ADXL34X is not set # CONFIG_INPUT_CMA3000 is not set # CONFIG_RMI4_CORE is not set # # Hardware I/O ports # # CONFIG_SERIO is not set CONFIG_ARCH_MIGHT_HAVE_PC_SERIO=y # CONFIG_GAMEPORT is not set # end of Hardware I/O ports # end of Input device support # # Character devices # CONFIG_TTY=y CONFIG_VT=y CONFIG_CONSOLE_TRANSLATIONS=y CONFIG_VT_CONSOLE=y CONFIG_VT_CONSOLE_SLEEP=y CONFIG_HW_CONSOLE=y CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_UNIX98_PTYS=y # CONFIG_LEGACY_PTYS is not set CONFIG_LDISC_AUTOLOAD=y # # Serial drivers # CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_PNP=y # CONFIG_SERIAL_8250_16550A_VARIANTS is not set # CONFIG_SERIAL_8250_FINTEK is not set CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_DMA=y CONFIG_SERIAL_8250_NR_UARTS=1 CONFIG_SERIAL_8250_RUNTIME_UARTS=1 # CONFIG_SERIAL_8250_EXTENDED is not set # CONFIG_SERIAL_8250_DW is not set # CONFIG_SERIAL_8250_RT288X is not set # # Non-8250 serial port support # # CONFIG_SERIAL_UARTLITE is not set CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_LANTIQ is not set # CONFIG_SERIAL_SCCNXP is not set # CONFIG_SERIAL_ALTERA_JTAGUART is not set # CONFIG_SERIAL_ALTERA_UART is not set # CONFIG_SERIAL_ARC is not set # CONFIG_SERIAL_FSL_LPUART is not set # CONFIG_SERIAL_FSL_LINFLEXUART is not set # CONFIG_SERIAL_SPRD is not set # end of Serial drivers # CONFIG_SERIAL_NONSTANDARD is not set # CONFIG_N_GSM is not set # CONFIG_NULL_TTY is not set CONFIG_HVC_DRIVER=y CONFIG_SERIAL_DEV_BUS=y CONFIG_SERIAL_DEV_CTRL_TTYPORT=y CONFIG_VIRTIO_CONSOLE=y # CONFIG_IPMI_HANDLER is not set CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_TIMERIOMEM is not set # CONFIG_HW_RANDOM_BA431 is not set # CONFIG_HW_RANDOM_VIA is not set CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_XIPHERA is not set # CONFIG_MWAVE is not set CONFIG_DEVMEM=y # CONFIG_NVRAM is not set # CONFIG_HPET is not set # CONFIG_HANGCHECK_TIMER is not set # CONFIG_TCG_TPM is not set # CONFIG_TELCLOCK is not set CONFIG_RANDOM_TRUST_CPU=y CONFIG_RANDOM_TRUST_BOOTLOADER=y # end of Character devices # # I2C support # # CONFIG_I2C is not set # end of I2C support # CONFIG_I3C is not set # CONFIG_SPI is not set # CONFIG_SPMI is not set # CONFIG_HSI is not set CONFIG_PPS=y # CONFIG_PPS_DEBUG is not set # # PPS clients support # # CONFIG_PPS_CLIENT_KTIMER is not set # CONFIG_PPS_CLIENT_LDISC is not set # CONFIG_PPS_CLIENT_GPIO is not set # # PPS generators support # # # PTP clock support # CONFIG_PTP_1588_CLOCK=y CONFIG_PTP_1588_CLOCK_OPTIONAL=y # # Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks. # CONFIG_PTP_1588_CLOCK_KVM=y # CONFIG_PTP_1588_CLOCK_VMCLOCK is not set # CONFIG_PTP_1588_CLOCK_VMW is not set # end of PTP clock support # CONFIG_PINCTRL is not set # CONFIG_GPIOLIB is not set # CONFIG_W1 is not set CONFIG_POWER_RESET=y # CONFIG_POWER_RESET_RESTART is not set CONFIG_POWER_SUPPLY=y # CONFIG_POWER_SUPPLY_DEBUG is not set # CONFIG_PDA_POWER is not set # CONFIG_TEST_POWER is not set # CONFIG_BATTERY_DS2780 is not set # CONFIG_BATTERY_DS2781 is not set # CONFIG_BATTERY_SAMSUNG_SDI is not set # CONFIG_BATTERY_BQ27XXX is not set # CONFIG_CHARGER_MAX8903 is not set # CONFIG_BATTERY_GOLDFISH is not set # CONFIG_HWMON is not set CONFIG_THERMAL=y # CONFIG_THERMAL_NETLINK is not set # CONFIG_THERMAL_STATISTICS is not set CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS=0 CONFIG_THERMAL_WRITABLE_TRIPS=y CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE=y # CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE is not set # CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE is not set CONFIG_THERMAL_GOV_FAIR_SHARE=y CONFIG_THERMAL_GOV_STEP_WISE=y # CONFIG_THERMAL_GOV_BANG_BANG is not set CONFIG_THERMAL_GOV_USER_SPACE=y # CONFIG_THERMAL_EMULATION is not set # # Intel thermal drivers # # CONFIG_INTEL_POWERCLAMP is not set CONFIG_X86_THERMAL_VECTOR=y CONFIG_X86_PKG_TEMP_THERMAL=y # # ACPI INT340X thermal drivers # # end of ACPI INT340X thermal drivers # CONFIG_INTEL_TCC_COOLING is not set # CONFIG_INTEL_HFI_THERMAL is not set # end of Intel thermal drivers CONFIG_WATCHDOG=y CONFIG_WATCHDOG_CORE=y # CONFIG_WATCHDOG_NOWAYOUT is not set CONFIG_WATCHDOG_HANDLE_BOOT_ENABLED=y CONFIG_WATCHDOG_OPEN_TIMEOUT=0 CONFIG_WATCHDOG_SYSFS=y # CONFIG_WATCHDOG_HRTIMER_PRETIMEOUT is not set # # Watchdog Pretimeout Governors # # CONFIG_WATCHDOG_PRETIMEOUT_GOV is not set # # Watchdog Device Drivers # # CONFIG_SOFT_WATCHDOG is not set # CONFIG_WDAT_WDT is not set # CONFIG_XILINX_WATCHDOG is not set # CONFIG_CADENCE_WATCHDOG is not set # CONFIG_DW_WATCHDOG is not set # CONFIG_MAX63XX_WATCHDOG is not set # CONFIG_ACQUIRE_WDT is not set # CONFIG_ADVANTECH_WDT is not set # CONFIG_EBC_C384_WDT is not set # CONFIG_EXAR_WDT is not set # CONFIG_F71808E_WDT is not set # CONFIG_SBC_FITPC2_WATCHDOG is not set # CONFIG_EUROTECH_WDT is not set # CONFIG_IB700_WDT is not set # CONFIG_IBMASR is not set # CONFIG_WAFER_WDT is not set # CONFIG_IT8712F_WDT is not set # CONFIG_IT87_WDT is not set # CONFIG_SC1200_WDT is not set # CONFIG_PC87413_WDT is not set # CONFIG_60XX_WDT is not set # CONFIG_CPU5_WDT is not set # CONFIG_SMSC_SCH311X_WDT is not set # CONFIG_SMSC37B787_WDT is not set # CONFIG_TQMX86_WDT is not set # CONFIG_W83627HF_WDT is not set # CONFIG_W83877F_WDT is not set # CONFIG_W83977F_WDT is not set # CONFIG_MACHZ_WDT is not set # CONFIG_SBC_EPX_C3_WATCHDOG is not set # CONFIG_NI903X_WDT is not set # CONFIG_NIC7018_WDT is not set CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set CONFIG_BCMA_POSSIBLE=y # CONFIG_BCMA is not set # # Multifunction device drivers # # CONFIG_MFD_MADERA is not set # CONFIG_HTC_PASIC3 is not set # CONFIG_MFD_INTEL_LPSS_ACPI is not set # CONFIG_MFD_INTEL_PMC_BXT is not set # CONFIG_MFD_KEMPLD is not set # CONFIG_MFD_MT6397 is not set # CONFIG_MFD_SM501 is not set # CONFIG_MFD_SYSCON is not set # CONFIG_MFD_TQMX86 is not set # CONFIG_RAVE_SP_CORE is not set # end of Multifunction device drivers # CONFIG_REGULATOR is not set # CONFIG_RC_CORE is not set # # CEC support # # CONFIG_MEDIA_CEC_SUPPORT is not set # end of CEC support # CONFIG_MEDIA_SUPPORT is not set # # Graphics support # # CONFIG_DRM is not set # # ARM devices # # end of ARM devices # # Frame buffer Devices # # CONFIG_FB is not set # end of Frame buffer Devices # # Backlight & LCD device support # # CONFIG_LCD_CLASS_DEVICE is not set # CONFIG_BACKLIGHT_CLASS_DEVICE is not set # end of Backlight & LCD device support # # Console display driver support # CONFIG_VGA_CONSOLE=y CONFIG_DUMMY_CONSOLE=y CONFIG_DUMMY_CONSOLE_COLUMNS=80 CONFIG_DUMMY_CONSOLE_ROWS=25 # end of Console display driver support # end of Graphics support # CONFIG_SOUND is not set # # HID support # CONFIG_HID=y # CONFIG_HID_BATTERY_STRENGTH is not set CONFIG_HIDRAW=y # CONFIG_UHID is not set # CONFIG_HID_GENERIC is not set # # Special HID drivers # # CONFIG_HID_A4TECH is not set # CONFIG_HID_ACRUX is not set # CONFIG_HID_AUREAL is not set # CONFIG_HID_BELKIN is not set # CONFIG_HID_CHERRY is not set # CONFIG_HID_COUGAR is not set # CONFIG_HID_MACALLY is not set # CONFIG_HID_CMEDIA is not set # CONFIG_HID_CYPRESS is not set # CONFIG_HID_DRAGONRISE is not set # CONFIG_HID_EMS_FF is not set # CONFIG_HID_ELECOM is not set # CONFIG_HID_EZKEY is not set # CONFIG_HID_GEMBIRD is not set # CONFIG_HID_GFRM is not set # CONFIG_HID_GLORIOUS is not set # CONFIG_HID_VIVALDI is not set # CONFIG_HID_KEYTOUCH is not set # CONFIG_HID_KYE is not set # CONFIG_HID_WALTOP is not set # CONFIG_HID_VIEWSONIC is not set # CONFIG_HID_VRC2 is not set # CONFIG_HID_XIAOMI is not set # CONFIG_HID_GYRATION is not set # CONFIG_HID_ICADE is not set # CONFIG_HID_ITE is not set # CONFIG_HID_JABRA is not set # CONFIG_HID_TWINHAN is not set # CONFIG_HID_KENSINGTON is not set # CONFIG_HID_LCPOWER is not set # CONFIG_HID_LENOVO is not set # CONFIG_HID_MAGICMOUSE is not set # CONFIG_HID_MALTRON is not set # CONFIG_HID_MAYFLASH is not set # CONFIG_HID_REDRAGON is not set # CONFIG_HID_MICROSOFT is not set # CONFIG_HID_MONTEREY is not set # CONFIG_HID_MULTITOUCH is not set # CONFIG_HID_NTI is not set # CONFIG_HID_ORTEK is not set # CONFIG_HID_PANTHERLORD is not set # CONFIG_HID_PETALYNX is not set # CONFIG_HID_PICOLCD is not set # CONFIG_HID_PLANTRONICS is not set # CONFIG_HID_PXRC is not set # CONFIG_HID_RAZER is not set # CONFIG_HID_PRIMAX is not set # CONFIG_HID_SAITEK is not set # CONFIG_HID_SEMITEK is not set # CONFIG_HID_SPEEDLINK is not set # CONFIG_HID_STEAM is not set # CONFIG_HID_STEELSERIES is not set # CONFIG_HID_SUNPLUS is not set # CONFIG_HID_RMI is not set # CONFIG_HID_GREENASIA is not set # CONFIG_HID_SMARTJOYPLUS is not set # CONFIG_HID_TIVO is not set # CONFIG_HID_TOPSEED is not set # CONFIG_HID_TOPRE is not set # CONFIG_HID_UDRAW_PS3 is not set # CONFIG_HID_XINMO is not set # CONFIG_HID_ZEROPLUS is not set # CONFIG_HID_ZYDACRON is not set # CONFIG_HID_SENSOR_HUB is not set # CONFIG_HID_ALPS is not set # end of Special HID drivers # end of HID support CONFIG_USB_OHCI_LITTLE_ENDIAN=y CONFIG_USB_SUPPORT=y # CONFIG_USB_ULPI_BUS is not set CONFIG_USB_ARCH_HAS_HCD=y # CONFIG_USB is not set # # USB port drivers # # # USB Physical Layer drivers # # CONFIG_NOP_USB_XCEIV is not set # end of USB Physical Layer drivers # CONFIG_USB_GADGET is not set # CONFIG_TYPEC is not set # CONFIG_USB_ROLE_SWITCH is not set # CONFIG_MMC is not set # CONFIG_SCSI_UFSHCD is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set # CONFIG_ACCESSIBILITY is not set # CONFIG_INFINIBAND is not set CONFIG_EDAC_ATOMIC_SCRUB=y CONFIG_EDAC_SUPPORT=y # CONFIG_EDAC is not set CONFIG_RTC_LIB=y CONFIG_RTC_MC146818_LIB=y # CONFIG_RTC_CLASS is not set CONFIG_DMADEVICES=y # CONFIG_DMADEVICES_DEBUG is not set # # DMA Devices # CONFIG_DMA_ACPI=y # CONFIG_ALTERA_MSGDMA is not set # CONFIG_INTEL_IDMA64 is not set # CONFIG_QCOM_HIDMA_MGMT is not set # CONFIG_QCOM_HIDMA is not set # CONFIG_DW_DMAC is not set # CONFIG_SF_PDMA is not set # CONFIG_INTEL_LDMA is not set # # DMABUF options # CONFIG_SYNC_FILE=y # CONFIG_SW_SYNC is not set # CONFIG_UDMABUF is not set # CONFIG_DMABUF_MOVE_NOTIFY is not set # CONFIG_DMABUF_DEBUG is not set # CONFIG_DMABUF_SELFTESTS is not set # CONFIG_DMABUF_HEAPS is not set # CONFIG_DMABUF_SYSFS_STATS is not set # end of DMABUF options CONFIG_AUXDISPLAY=y # CONFIG_IMG_ASCII_LCD is not set CONFIG_CHARLCD_BL_OFF=y # CONFIG_CHARLCD_BL_ON is not set # CONFIG_CHARLCD_BL_FLASH is not set # CONFIG_UIO is not set # CONFIG_VFIO is not set CONFIG_VIRT_DRIVERS=y CONFIG_VMGENID=y CONFIG_VIRTIO_ANCHOR=y CONFIG_VIRTIO=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_BALLOON=y # CONFIG_VIRTIO_MEM is not set # CONFIG_VIRTIO_INPUT is not set CONFIG_VIRTIO_MMIO=y # CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES is not set # CONFIG_VDPA is not set CONFIG_VHOST_MENU=y # CONFIG_VHOST_NET is not set # CONFIG_VHOST_VSOCK is not set # CONFIG_VHOST_CROSS_ENDIAN_LEGACY is not set # # Microsoft Hyper-V guest support # # CONFIG_HYPERV is not set # end of Microsoft Hyper-V guest support # CONFIG_GREYBUS is not set # CONFIG_COMEDI is not set CONFIG_STAGING=y # CONFIG_STAGING_MEDIA is not set # CONFIG_FIELDBUS_DEV is not set # CONFIG_CHROME_PLATFORMS is not set # CONFIG_MELLANOX_PLATFORM is not set CONFIG_SURFACE_PLATFORMS=y # CONFIG_SURFACE_GPE is not set # CONFIG_SURFACE_PRO3_BUTTON is not set # CONFIG_SURFACE_AGGREGATOR is not set CONFIG_X86_PLATFORM_DEVICES=y # CONFIG_ACPI_WMI is not set # CONFIG_ACERHDF is not set # CONFIG_ACER_WIRELESS is not set # CONFIG_ADV_SWBUTTON is not set # CONFIG_ASUS_WIRELESS is not set # CONFIG_X86_PLATFORM_DRIVERS_DELL is not set # CONFIG_FUJITSU_TABLET is not set # CONFIG_GPD_POCKET_FAN is not set # CONFIG_X86_PLATFORM_DRIVERS_HP is not set # CONFIG_WIRELESS_HOTKEY is not set # CONFIG_SENSORS_HDAPS is not set # CONFIG_INTEL_SAR_INT1092 is not set # # Intel Uncore Frequency Control # # CONFIG_INTEL_UNCORE_FREQ_CONTROL is not set # end of Intel Uncore Frequency Control # CONFIG_INTEL_PUNIT_IPC is not set # CONFIG_INTEL_RST is not set # CONFIG_INTEL_SMARTCONNECT is not set CONFIG_INTEL_TURBO_MAX_3=y # CONFIG_SAMSUNG_Q10 is not set # CONFIG_TOSHIBA_BT_RFKILL is not set # CONFIG_TOSHIBA_HAPS is not set # CONFIG_ACPI_CMPC is not set # CONFIG_TOPSTAR_LAPTOP is not set # CONFIG_INTEL_SCU_PLATFORM is not set # CONFIG_WINMATE_FM07_KEYS is not set CONFIG_HAVE_CLK=y CONFIG_HAVE_CLK_PREPARE=y CONFIG_COMMON_CLK=y # CONFIG_XILINX_VCU is not set # CONFIG_HWSPINLOCK is not set # # Clock Source drivers # CONFIG_CLKEVT_I8253=y CONFIG_I8253_LOCK=y CONFIG_CLKBLD_I8253=y # end of Clock Source drivers CONFIG_MAILBOX=y CONFIG_PCC=y # CONFIG_ALTERA_MBOX is not set CONFIG_IOMMU_IOVA=y CONFIG_IOMMU_API=y CONFIG_IOMMU_SUPPORT=y # # Generic IOMMU Pagetable Support # # end of Generic IOMMU Pagetable Support # CONFIG_IOMMU_DEBUGFS is not set # CONFIG_IOMMU_DEFAULT_DMA_STRICT is not set CONFIG_IOMMU_DEFAULT_DMA_LAZY=y # CONFIG_IOMMU_DEFAULT_PASSTHROUGH is not set CONFIG_IOMMU_DMA=y # CONFIG_VIRTIO_IOMMU is not set # # Remoteproc drivers # # CONFIG_REMOTEPROC is not set # end of Remoteproc drivers # # Rpmsg drivers # # CONFIG_RPMSG_QCOM_GLINK_RPM is not set # CONFIG_RPMSG_VIRTIO is not set # end of Rpmsg drivers # CONFIG_SOUNDWIRE is not set # # SOC (System On Chip) specific Drivers # # # Amlogic SoC drivers # # end of Amlogic SoC drivers # # Broadcom SoC drivers # # end of Broadcom SoC drivers # # NXP/Freescale QorIQ SoC drivers # # end of NXP/Freescale QorIQ SoC drivers # # fujitsu SoC drivers # # end of fujitsu SoC drivers # # i.MX SoC drivers # # end of i.MX SoC drivers # # Enable LiteX SoC Builder specific drivers # # end of Enable LiteX SoC Builder specific drivers # # Qualcomm SoC drivers # # end of Qualcomm SoC drivers # CONFIG_SOC_TI is not set # # Xilinx SoC drivers # # end of Xilinx SoC drivers # end of SOC (System On Chip) specific Drivers # CONFIG_PM_DEVFREQ is not set # CONFIG_EXTCON is not set # CONFIG_MEMORY is not set # CONFIG_IIO is not set # CONFIG_PWM is not set # # IRQ chip support # # end of IRQ chip support # CONFIG_IPACK_BUS is not set # CONFIG_RESET_CONTROLLER is not set # # PHY Subsystem # # CONFIG_GENERIC_PHY is not set # CONFIG_USB_LGM_PHY is not set # CONFIG_PHY_CAN_TRANSCEIVER is not set # # PHY drivers for Broadcom platforms # # CONFIG_BCM_KONA_USB2_PHY is not set # end of PHY drivers for Broadcom platforms # CONFIG_PHY_PXA_28NM_HSIC is not set # CONFIG_PHY_PXA_28NM_USB2 is not set # CONFIG_PHY_INTEL_LGM_EMMC is not set # end of PHY Subsystem # CONFIG_POWERCAP is not set # CONFIG_MCB is not set # # Performance monitor support # # end of Performance monitor support CONFIG_RAS=y # # Android # # CONFIG_ANDROID_BINDER_IPC is not set # end of Android # CONFIG_LIBNVDIMM is not set # CONFIG_DAX is not set # CONFIG_NVMEM is not set # # HW tracing support # # CONFIG_STM is not set # CONFIG_INTEL_TH is not set # end of HW tracing support # CONFIG_FPGA is not set # CONFIG_TEE is not set # CONFIG_SIOX is not set # CONFIG_SLIMBUS is not set # CONFIG_INTERCONNECT is not set # CONFIG_COUNTER is not set # CONFIG_PECI is not set # CONFIG_HTE is not set # end of Device Drivers # # File systems # CONFIG_DCACHE_WORD_ACCESS=y CONFIG_VALIDATE_FS_PARSER=y CONFIG_FS_IOMAP=y # CONFIG_EXT2_FS is not set # CONFIG_EXT3_FS is not set CONFIG_EXT4_FS=y CONFIG_EXT4_USE_FOR_EXT2=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y CONFIG_EXT4_DEBUG=y CONFIG_JBD2=y CONFIG_JBD2_DEBUG=y CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set CONFIG_XFS_FS=y CONFIG_XFS_SUPPORT_V4=y CONFIG_XFS_QUOTA=y CONFIG_XFS_POSIX_ACL=y # CONFIG_XFS_RT is not set # CONFIG_XFS_ONLINE_SCRUB is not set # CONFIG_XFS_WARN is not set # CONFIG_XFS_DEBUG is not set # CONFIG_GFS2_FS is not set # CONFIG_BTRFS_FS is not set # CONFIG_NILFS2_FS is not set # CONFIG_F2FS_FS is not set CONFIG_FS_POSIX_ACL=y CONFIG_EXPORTFS=y # CONFIG_EXPORTFS_BLOCK_OPS is not set CONFIG_FILE_LOCKING=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_ENCRYPTION_ALGS=y # CONFIG_FS_VERITY is not set CONFIG_FSNOTIFY=y CONFIG_DNOTIFY=y CONFIG_INOTIFY_USER=y CONFIG_FANOTIFY=y CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set # CONFIG_QUOTA_DEBUG is not set # CONFIG_QFMT_V1 is not set # CONFIG_QFMT_V2 is not set CONFIG_QUOTACTL=y CONFIG_AUTOFS4_FS=y CONFIG_AUTOFS_FS=y # CONFIG_FUSE_FS is not set CONFIG_OVERLAY_FS=y # CONFIG_OVERLAY_FS_REDIRECT_DIR is not set CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW=y # CONFIG_OVERLAY_FS_INDEX is not set # CONFIG_OVERLAY_FS_XINO_AUTO is not set # CONFIG_OVERLAY_FS_METACOPY is not set # # Caches # # CONFIG_FSCACHE is not set # end of Caches # # CD-ROM/DVD Filesystems # # CONFIG_ISO9660_FS is not set # CONFIG_UDF_FS is not set # end of CD-ROM/DVD Filesystems # # DOS/FAT/EXFAT/NT Filesystems # # CONFIG_MSDOS_FS is not set # CONFIG_VFAT_FS is not set # CONFIG_EXFAT_FS is not set # CONFIG_NTFS_FS is not set # CONFIG_NTFS3_FS is not set # end of DOS/FAT/EXFAT/NT Filesystems # # Pseudo filesystems # CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_SYSCTL=y CONFIG_PROC_PAGE_MONITOR=y CONFIG_PROC_CHILDREN=y CONFIG_PROC_PID_ARCH_STATUS=y CONFIG_KERNFS=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y # CONFIG_TMPFS_INODE64 is not set CONFIG_HUGETLBFS=y CONFIG_HUGETLB_PAGE=y CONFIG_ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP=y # CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON is not set CONFIG_MEMFD_CREATE=y CONFIG_ARCH_HAS_GIGANTIC_PAGE=y # CONFIG_CONFIGFS_FS is not set # end of Pseudo filesystems CONFIG_MISC_FILESYSTEMS=y # CONFIG_ORANGEFS_FS is not set # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set # CONFIG_HFS_FS is not set # CONFIG_HFSPLUS_FS is not set # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set # CONFIG_CRAMFS is not set CONFIG_SQUASHFS=y CONFIG_SQUASHFS_FILE_CACHE=y # CONFIG_SQUASHFS_FILE_DIRECT is not set CONFIG_SQUASHFS_DECOMP_SINGLE=y # CONFIG_SQUASHFS_DECOMP_MULTI is not set # CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU is not set CONFIG_SQUASHFS_XATTR=y CONFIG_SQUASHFS_ZLIB=y CONFIG_SQUASHFS_LZ4=y CONFIG_SQUASHFS_LZO=y CONFIG_SQUASHFS_XZ=y CONFIG_SQUASHFS_ZSTD=y # CONFIG_SQUASHFS_4K_DEVBLK_SIZE is not set # CONFIG_SQUASHFS_EMBEDDED is not set CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3 # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set # CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set # CONFIG_QNX6FS_FS is not set # CONFIG_ROMFS_FS is not set CONFIG_PSTORE=y CONFIG_PSTORE_DEFAULT_KMSG_BYTES=10240 CONFIG_PSTORE_DEFLATE_COMPRESS=y # CONFIG_PSTORE_LZO_COMPRESS is not set # CONFIG_PSTORE_LZ4_COMPRESS is not set # CONFIG_PSTORE_LZ4HC_COMPRESS is not set # CONFIG_PSTORE_842_COMPRESS is not set # CONFIG_PSTORE_ZSTD_COMPRESS is not set CONFIG_PSTORE_COMPRESS=y CONFIG_PSTORE_DEFLATE_COMPRESS_DEFAULT=y CONFIG_PSTORE_COMPRESS_DEFAULT="deflate" # CONFIG_PSTORE_CONSOLE is not set # CONFIG_PSTORE_PMSG is not set # CONFIG_PSTORE_RAM is not set # CONFIG_PSTORE_BLK is not set # CONFIG_SYSV_FS is not set # CONFIG_UFS_FS is not set # CONFIG_EROFS_FS is not set CONFIG_NETWORK_FILESYSTEMS=y CONFIG_NFS_FS=y # CONFIG_NFS_V2 is not set # CONFIG_NFS_V3 is not set CONFIG_NFS_V4=y CONFIG_NFS_SWAP=y CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_PNFS_FILE_LAYOUT=y CONFIG_NFS_V4_1_IMPLEMENTATION_ID_DOMAIN="kernel.org" # CONFIG_NFS_V4_1_MIGRATION is not set CONFIG_NFS_V4_SECURITY_LABEL=y CONFIG_ROOT_NFS=y # CONFIG_NFS_USE_LEGACY_DNS is not set CONFIG_NFS_USE_KERNEL_DNS=y CONFIG_NFS_DISABLE_UDP_SUPPORT=y # CONFIG_NFS_V4_2_READ_PLUS is not set # CONFIG_NFSD is not set CONFIG_GRACE_PERIOD=y CONFIG_LOCKD=y CONFIG_NFS_COMMON=y CONFIG_NFS_V4_2_SSC_HELPER=y CONFIG_SUNRPC=y CONFIG_SUNRPC_GSS=y CONFIG_SUNRPC_BACKCHANNEL=y CONFIG_SUNRPC_SWAP=y # CONFIG_SUNRPC_DEBUG is not set # CONFIG_CEPH_FS is not set # CONFIG_CIFS is not set # CONFIG_SMB_SERVER is not set # CONFIG_CODA_FS is not set # CONFIG_AFS_FS is not set CONFIG_NLS=y CONFIG_NLS_DEFAULT="utf8" # CONFIG_NLS_CODEPAGE_437 is not set # CONFIG_NLS_CODEPAGE_737 is not set # CONFIG_NLS_CODEPAGE_775 is not set # CONFIG_NLS_CODEPAGE_850 is not set # CONFIG_NLS_CODEPAGE_852 is not set # CONFIG_NLS_CODEPAGE_855 is not set # CONFIG_NLS_CODEPAGE_857 is not set # CONFIG_NLS_CODEPAGE_860 is not set # CONFIG_NLS_CODEPAGE_861 is not set # CONFIG_NLS_CODEPAGE_862 is not set # CONFIG_NLS_CODEPAGE_863 is not set # CONFIG_NLS_CODEPAGE_864 is not set # CONFIG_NLS_CODEPAGE_865 is not set # CONFIG_NLS_CODEPAGE_866 is not set # CONFIG_NLS_CODEPAGE_869 is not set # CONFIG_NLS_CODEPAGE_936 is not set # CONFIG_NLS_CODEPAGE_950 is not set # CONFIG_NLS_CODEPAGE_932 is not set # CONFIG_NLS_CODEPAGE_949 is not set # CONFIG_NLS_CODEPAGE_874 is not set # CONFIG_NLS_ISO8859_8 is not set # CONFIG_NLS_CODEPAGE_1250 is not set # CONFIG_NLS_CODEPAGE_1251 is not set # CONFIG_NLS_ASCII is not set # CONFIG_NLS_ISO8859_1 is not set # CONFIG_NLS_ISO8859_2 is not set # CONFIG_NLS_ISO8859_3 is not set # CONFIG_NLS_ISO8859_4 is not set # CONFIG_NLS_ISO8859_5 is not set # CONFIG_NLS_ISO8859_6 is not set # CONFIG_NLS_ISO8859_7 is not set # CONFIG_NLS_ISO8859_9 is not set # CONFIG_NLS_ISO8859_13 is not set # CONFIG_NLS_ISO8859_14 is not set # CONFIG_NLS_ISO8859_15 is not set # CONFIG_NLS_KOI8_R is not set # CONFIG_NLS_KOI8_U is not set # CONFIG_NLS_MAC_ROMAN is not set # CONFIG_NLS_MAC_CELTIC is not set # CONFIG_NLS_MAC_CENTEURO is not set # CONFIG_NLS_MAC_CROATIAN is not set # CONFIG_NLS_MAC_CYRILLIC is not set # CONFIG_NLS_MAC_GAELIC is not set # CONFIG_NLS_MAC_GREEK is not set # CONFIG_NLS_MAC_ICELAND is not set # CONFIG_NLS_MAC_INUIT is not set # CONFIG_NLS_MAC_ROMANIAN is not set # CONFIG_NLS_MAC_TURKISH is not set # CONFIG_NLS_UTF8 is not set # CONFIG_UNICODE is not set CONFIG_IO_WQ=y # end of File systems # # Security options # CONFIG_KEYS=y # CONFIG_KEYS_REQUEST_CACHE is not set CONFIG_PERSISTENT_KEYRINGS=y # CONFIG_TRUSTED_KEYS is not set CONFIG_ENCRYPTED_KEYS=y # CONFIG_USER_DECRYPTED_DATA is not set # CONFIG_KEY_DH_OPERATIONS is not set # CONFIG_SECURITY_DMESG_RESTRICT is not set CONFIG_SECURITY=y CONFIG_SECURITY_WRITABLE_HOOKS=y CONFIG_SECURITYFS=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_NETWORK_XFRM=y # CONFIG_SECURITY_PATH is not set CONFIG_LSM_MMAP_MIN_ADDR=65536 CONFIG_HAVE_HARDENED_USERCOPY_ALLOCATOR=y CONFIG_HARDENED_USERCOPY=y CONFIG_FORTIFY_SOURCE=y # CONFIG_STATIC_USERMODEHELPER is not set CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_SELINUX_DEVELOP=y CONFIG_SECURITY_SELINUX_AVC_STATS=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS=9 CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE=256 # CONFIG_SECURITY_SMACK is not set # CONFIG_SECURITY_TOMOYO is not set # CONFIG_SECURITY_APPARMOR is not set # CONFIG_SECURITY_LOADPIN is not set # CONFIG_SECURITY_YAMA is not set # CONFIG_SECURITY_SAFESETID is not set # CONFIG_SECURITY_LOCKDOWN_LSM is not set # CONFIG_SECURITY_LANDLOCK is not set # CONFIG_INTEGRITY is not set CONFIG_DEFAULT_SECURITY_SELINUX=y # CONFIG_DEFAULT_SECURITY_DAC is not set CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor,bpf" # # Kernel hardening options # # # Memory initialization # CONFIG_INIT_STACK_NONE=y # CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set # CONFIG_INIT_ON_FREE_DEFAULT_ON is not set CONFIG_CC_HAS_ZERO_CALL_USED_REGS=y # CONFIG_ZERO_CALL_USED_REGS is not set # end of Memory initialization CONFIG_RANDSTRUCT_NONE=y # end of Kernel hardening options # end of Security options CONFIG_CRYPTO=y # # Crypto core or helper # CONFIG_CRYPTO_ALGAPI=y CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=y CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_SKCIPHER=y CONFIG_CRYPTO_SKCIPHER2=y CONFIG_CRYPTO_HASH=y CONFIG_CRYPTO_HASH2=y CONFIG_CRYPTO_RNG=y CONFIG_CRYPTO_RNG2=y CONFIG_CRYPTO_RNG_DEFAULT=y CONFIG_CRYPTO_AKCIPHER2=y CONFIG_CRYPTO_AKCIPHER=y CONFIG_CRYPTO_KPP2=y CONFIG_CRYPTO_KPP=y CONFIG_CRYPTO_ACOMP2=y CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y # CONFIG_CRYPTO_USER is not set CONFIG_CRYPTO_MANAGER_DISABLE_TESTS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_NULL2=y # CONFIG_CRYPTO_PCRYPT is not set # CONFIG_CRYPTO_CRYPTD is not set # CONFIG_CRYPTO_AUTHENC is not set # end of Crypto core or helper # # Public-key cryptography # CONFIG_CRYPTO_RSA=y CONFIG_CRYPTO_DH=y # CONFIG_CRYPTO_DH_RFC7919_GROUPS is not set CONFIG_CRYPTO_ECC=y CONFIG_CRYPTO_ECDH=y # CONFIG_CRYPTO_ECDSA is not set # CONFIG_CRYPTO_ECRDSA is not set # CONFIG_CRYPTO_SM2 is not set # CONFIG_CRYPTO_CURVE25519 is not set # end of Public-key cryptography # # Block ciphers # CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_AES_TI=y # CONFIG_CRYPTO_ARIA is not set # CONFIG_CRYPTO_BLOWFISH is not set # CONFIG_CRYPTO_CAMELLIA is not set # CONFIG_CRYPTO_CAST5 is not set # CONFIG_CRYPTO_CAST6 is not set # CONFIG_CRYPTO_DES is not set # CONFIG_CRYPTO_FCRYPT is not set # CONFIG_CRYPTO_SERPENT is not set # CONFIG_CRYPTO_SM4_GENERIC is not set # CONFIG_CRYPTO_TWOFISH is not set # end of Block ciphers # # Length-preserving ciphers and modes # # CONFIG_CRYPTO_ADIANTUM is not set # CONFIG_CRYPTO_CHACHA20 is not set CONFIG_CRYPTO_CBC=y # CONFIG_CRYPTO_CFB is not set CONFIG_CRYPTO_CTR=y CONFIG_CRYPTO_CTS=y CONFIG_CRYPTO_ECB=y # CONFIG_CRYPTO_HCTR2 is not set # CONFIG_CRYPTO_KEYWRAP is not set # CONFIG_CRYPTO_LRW is not set # CONFIG_CRYPTO_OFB is not set # CONFIG_CRYPTO_PCBC is not set CONFIG_CRYPTO_XTS=y # end of Length-preserving ciphers and modes # # AEAD (authenticated encryption with associated data) ciphers # # CONFIG_CRYPTO_AEGIS128 is not set # CONFIG_CRYPTO_CHACHA20POLY1305 is not set # CONFIG_CRYPTO_CCM is not set # CONFIG_CRYPTO_GCM is not set CONFIG_CRYPTO_SEQIV=y # CONFIG_CRYPTO_ECHAINIV is not set # CONFIG_CRYPTO_ESSIV is not set # end of AEAD (authenticated encryption with associated data) ciphers # # Hashes, digests, and MACs # # CONFIG_CRYPTO_BLAKE2B is not set # CONFIG_CRYPTO_CMAC is not set # CONFIG_CRYPTO_GHASH is not set CONFIG_CRYPTO_HMAC=y # CONFIG_CRYPTO_MD4 is not set CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_MICHAEL_MIC is not set # CONFIG_CRYPTO_POLY1305 is not set # CONFIG_CRYPTO_RMD160 is not set CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y CONFIG_CRYPTO_SHA3=y # CONFIG_CRYPTO_SM3_GENERIC is not set # CONFIG_CRYPTO_STREEBOG is not set # CONFIG_CRYPTO_VMAC is not set # CONFIG_CRYPTO_WP512 is not set # CONFIG_CRYPTO_XCBC is not set CONFIG_CRYPTO_XXHASH=y # end of Hashes, digests, and MACs # # CRCs (cyclic redundancy checks) # CONFIG_CRYPTO_CRC32C=y # CONFIG_CRYPTO_CRC32 is not set CONFIG_CRYPTO_CRCT10DIF=y # end of CRCs (cyclic redundancy checks) # # Compression # CONFIG_CRYPTO_DEFLATE=y CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_842 is not set # CONFIG_CRYPTO_LZ4 is not set # CONFIG_CRYPTO_LZ4HC is not set # CONFIG_CRYPTO_ZSTD is not set # end of Compression # # Random number generation # # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DRBG_MENU=y CONFIG_CRYPTO_DRBG_HMAC=y CONFIG_CRYPTO_DRBG_HASH=y CONFIG_CRYPTO_DRBG_CTR=y CONFIG_CRYPTO_DRBG=y CONFIG_CRYPTO_JITTERENTROPY=y # end of Random number generation # # Userspace interface # # CONFIG_CRYPTO_USER_API_HASH is not set # CONFIG_CRYPTO_USER_API_SKCIPHER is not set # CONFIG_CRYPTO_USER_API_RNG is not set # CONFIG_CRYPTO_USER_API_AEAD is not set # end of Userspace interface CONFIG_CRYPTO_HASH_INFO=y # # Accelerated Cryptographic Algorithms for CPU (x86) # # CONFIG_CRYPTO_CURVE25519_X86 is not set # CONFIG_CRYPTO_AES_NI_INTEL is not set # CONFIG_CRYPTO_BLOWFISH_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64 is not set # CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64 is not set # CONFIG_CRYPTO_CAST5_AVX_X86_64 is not set # CONFIG_CRYPTO_CAST6_AVX_X86_64 is not set # CONFIG_CRYPTO_DES3_EDE_X86_64 is not set # CONFIG_CRYPTO_SERPENT_SSE2_X86_64 is not set # CONFIG_CRYPTO_SERPENT_AVX_X86_64 is not set # CONFIG_CRYPTO_SERPENT_AVX2_X86_64 is not set # CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64 is not set # CONFIG_CRYPTO_SM4_AESNI_AVX2_X86_64 is not set # CONFIG_CRYPTO_TWOFISH_X86_64 is not set # CONFIG_CRYPTO_TWOFISH_X86_64_3WAY is not set # CONFIG_CRYPTO_TWOFISH_AVX_X86_64 is not set # CONFIG_CRYPTO_ARIA_AESNI_AVX_X86_64 is not set # CONFIG_CRYPTO_CHACHA20_X86_64 is not set # CONFIG_CRYPTO_AEGIS128_AESNI_SSE2 is not set # CONFIG_CRYPTO_NHPOLY1305_SSE2 is not set # CONFIG_CRYPTO_NHPOLY1305_AVX2 is not set # CONFIG_CRYPTO_BLAKE2S_X86 is not set # CONFIG_CRYPTO_POLYVAL_CLMUL_NI is not set # CONFIG_CRYPTO_POLY1305_X86_64 is not set CONFIG_CRYPTO_SHA1_SSSE3=y CONFIG_CRYPTO_SHA256_SSSE3=y CONFIG_CRYPTO_SHA512_SSSE3=y # CONFIG_CRYPTO_SM3_AVX_X86_64 is not set # CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL is not set # CONFIG_CRYPTO_CRC32C_INTEL is not set # CONFIG_CRYPTO_CRC32_PCLMUL is not set CONFIG_CRYPTO_CRCT10DIF_PCLMUL=y # end of Accelerated Cryptographic Algorithms for CPU (x86) # CONFIG_CRYPTO_HW is not set CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y CONFIG_X509_CERTIFICATE_PARSER=y # CONFIG_PKCS8_PRIVATE_KEY_PARSER is not set CONFIG_PKCS7_MESSAGE_PARSER=y # CONFIG_FIPS_SIGNATURE_SELFTEST is not set # # Certificates for signature checking # CONFIG_SYSTEM_TRUSTED_KEYRING=y CONFIG_SYSTEM_TRUSTED_KEYS="" # CONFIG_SYSTEM_EXTRA_CERTIFICATE is not set # CONFIG_SECONDARY_TRUSTED_KEYRING is not set CONFIG_SYSTEM_BLACKLIST_KEYRING=y CONFIG_SYSTEM_BLACKLIST_HASH_LIST="" # CONFIG_SYSTEM_REVOCATION_LIST is not set # end of Certificates for signature checking CONFIG_BINARY_PRINTF=y # # Library routines # # CONFIG_PACKING is not set CONFIG_BITREVERSE=y CONFIG_GENERIC_STRNCPY_FROM_USER=y CONFIG_GENERIC_STRNLEN_USER=y CONFIG_GENERIC_NET_UTILS=y # CONFIG_CORDIC is not set # CONFIG_PRIME_NUMBERS is not set CONFIG_RATIONAL=y CONFIG_GENERIC_PCI_IOMAP=y CONFIG_GENERIC_IOMAP=y CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y CONFIG_ARCH_HAS_FAST_MULTIPLIER=y CONFIG_ARCH_USE_SYM_ANNOTATIONS=y # # Crypto library routines # CONFIG_CRYPTO_LIB_UTILS=y CONFIG_CRYPTO_LIB_AES=y CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y # CONFIG_CRYPTO_LIB_CHACHA is not set # CONFIG_CRYPTO_LIB_CURVE25519 is not set CONFIG_CRYPTO_LIB_POLY1305_RSIZE=11 # CONFIG_CRYPTO_LIB_POLY1305 is not set # CONFIG_CRYPTO_LIB_CHACHA20POLY1305 is not set CONFIG_CRYPTO_LIB_SHA1=y CONFIG_CRYPTO_LIB_SHA256=y # end of Crypto library routines CONFIG_CRC_CCITT=y CONFIG_CRC16=y CONFIG_CRC_T10DIF=y # CONFIG_CRC64_ROCKSOFT is not set # CONFIG_CRC_ITU_T is not set CONFIG_CRC32=y # CONFIG_CRC32_SELFTEST is not set CONFIG_CRC32_SLICEBY8=y # CONFIG_CRC32_SLICEBY4 is not set # CONFIG_CRC32_SARWATE is not set # CONFIG_CRC32_BIT is not set # CONFIG_CRC64 is not set # CONFIG_CRC4 is not set # CONFIG_CRC7 is not set CONFIG_LIBCRC32C=y # CONFIG_CRC8 is not set CONFIG_XXHASH=y # CONFIG_RANDOM32_SELFTEST is not set CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=y CONFIG_LZO_COMPRESS=y CONFIG_LZO_DECOMPRESS=y CONFIG_LZ4_DECOMPRESS=y CONFIG_ZSTD_COMMON=y CONFIG_ZSTD_DECOMPRESS=y CONFIG_XZ_DEC=y CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y # CONFIG_XZ_DEC_MICROLZMA is not set CONFIG_XZ_DEC_BCJ=y # CONFIG_XZ_DEC_TEST is not set CONFIG_DECOMPRESS_GZIP=y CONFIG_DECOMPRESS_BZIP2=y CONFIG_DECOMPRESS_LZMA=y CONFIG_DECOMPRESS_XZ=y CONFIG_DECOMPRESS_LZO=y CONFIG_DECOMPRESS_LZ4=y CONFIG_DECOMPRESS_ZSTD=y CONFIG_XARRAY_MULTI=y CONFIG_ASSOCIATIVE_ARRAY=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT_MAP=y CONFIG_HAS_DMA=y CONFIG_DMA_OPS=y # CONFIG_DMA_PAGE_TOUCHING is not set CONFIG_NEED_SG_DMA_LENGTH=y CONFIG_NEED_DMA_MAP_STATE=y CONFIG_ARCH_DMA_ADDR_T_64BIT=y CONFIG_SWIOTLB=y # CONFIG_DMA_API_DEBUG is not set # CONFIG_DMA_MAP_BENCHMARK is not set CONFIG_SGL_ALLOC=y # CONFIG_FORCE_NR_CPUS is not set CONFIG_CPU_RMAP=y CONFIG_DQL=y CONFIG_NLATTR=y CONFIG_CLZ_TAB=y CONFIG_IRQ_POLL=y CONFIG_MPILIB=y CONFIG_OID_REGISTRY=y CONFIG_HAVE_GENERIC_VDSO=y CONFIG_GENERIC_GETTIMEOFDAY=y CONFIG_GENERIC_VDSO_TIME_NS=y CONFIG_SG_POOL=y CONFIG_ARCH_HAS_PMEM_API=y CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE=y CONFIG_ARCH_HAS_COPY_MC=y CONFIG_ARCH_STACKWALK=y CONFIG_STACKDEPOT=y CONFIG_SBITMAP=y # end of Library routines # # Kernel hacking # # # printk and dmesg options # CONFIG_PRINTK_TIME=y # CONFIG_PRINTK_CALLER is not set # CONFIG_STACKTRACE_BUILD_ID is not set CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7 CONFIG_CONSOLE_LOGLEVEL_QUIET=4 CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4 # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_DYNAMIC_DEBUG is not set # CONFIG_DYNAMIC_DEBUG_CORE is not set CONFIG_SYMBOLIC_ERRNAME=y CONFIG_DEBUG_BUGVERBOSE=y # end of printk and dmesg options CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_MISC=y # # Compile-time checks and compiler options # CONFIG_AS_HAS_NON_CONST_LEB128=y CONFIG_DEBUG_INFO_NONE=y # CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set # CONFIG_DEBUG_INFO_DWARF4 is not set # CONFIG_DEBUG_INFO_DWARF5 is not set CONFIG_FRAME_WARN=2048 CONFIG_STRIP_ASM_SYMS=y # CONFIG_READABLE_ASM is not set # CONFIG_HEADERS_INSTALL is not set CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_ARCH_WANT_FRAME_POINTERS=y CONFIG_FRAME_POINTER=y CONFIG_OBJTOOL=y CONFIG_STACK_VALIDATION=y # CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # end of Compile-time checks and compiler options # # Generic Kernel Debugging Instruments # CONFIG_MAGIC_SYSRQ=y CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x1 CONFIG_MAGIC_SYSRQ_SERIAL=y CONFIG_MAGIC_SYSRQ_SERIAL_SEQUENCE="" CONFIG_DEBUG_FS=y CONFIG_DEBUG_FS_ALLOW_ALL=y # CONFIG_DEBUG_FS_DISALLOW_MOUNT is not set # CONFIG_DEBUG_FS_ALLOW_NONE is not set CONFIG_HAVE_ARCH_KGDB=y # CONFIG_KGDB is not set CONFIG_ARCH_HAS_UBSAN_SANITIZE_ALL=y # CONFIG_UBSAN is not set CONFIG_HAVE_ARCH_KCSAN=y CONFIG_HAVE_KCSAN_COMPILER=y # CONFIG_KCSAN is not set # end of Generic Kernel Debugging Instruments # # Networking Debugging # # CONFIG_NET_DEV_REFCNT_TRACKER is not set # CONFIG_NET_NS_REFCNT_TRACKER is not set # CONFIG_DEBUG_NET is not set # end of Networking Debugging # # Memory Debugging # # CONFIG_PAGE_EXTENSION is not set # CONFIG_DEBUG_PAGEALLOC is not set CONFIG_SLUB_DEBUG=y # CONFIG_SLUB_DEBUG_ON is not set # CONFIG_PAGE_OWNER is not set # CONFIG_PAGE_TABLE_CHECK is not set # CONFIG_PAGE_POISONING is not set # CONFIG_DEBUG_RODATA_TEST is not set CONFIG_ARCH_HAS_DEBUG_WX=y # CONFIG_DEBUG_WX is not set CONFIG_GENERIC_PTDUMP=y # CONFIG_PTDUMP_DEBUGFS is not set # CONFIG_DEBUG_OBJECTS is not set # CONFIG_SHRINKER_DEBUG is not set CONFIG_HAVE_DEBUG_KMEMLEAK=y # CONFIG_DEBUG_KMEMLEAK is not set # CONFIG_DEBUG_STACK_USAGE is not set CONFIG_SCHED_STACK_END_CHECK=y CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_VM_PGTABLE is not set CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y # CONFIG_DEBUG_VIRTUAL is not set CONFIG_DEBUG_MEMORY_INIT=y # CONFIG_DEBUG_PER_CPU_MAPS is not set CONFIG_ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP=y # CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP is not set CONFIG_HAVE_ARCH_KASAN=y CONFIG_HAVE_ARCH_KASAN_VMALLOC=y CONFIG_CC_HAS_KASAN_GENERIC=y CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y # CONFIG_KASAN is not set CONFIG_HAVE_ARCH_KFENCE=y # CONFIG_KFENCE is not set CONFIG_HAVE_ARCH_KMSAN=y # end of Memory Debugging # CONFIG_DEBUG_SHIRQ is not set # # Debug Oops, Lockups and Hangs # # CONFIG_PANIC_ON_OOPS is not set CONFIG_PANIC_ON_OOPS_VALUE=0 CONFIG_PANIC_TIMEOUT=0 CONFIG_LOCKUP_DETECTOR=y CONFIG_SOFTLOCKUP_DETECTOR=y # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set CONFIG_HARDLOCKUP_DETECTOR_PERF=y CONFIG_HARDLOCKUP_CHECK_TIMESTAMP=y CONFIG_HARDLOCKUP_DETECTOR=y # CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is not set CONFIG_DETECT_HUNG_TASK=y CONFIG_DEFAULT_HUNG_TASK_TIMEOUT=120 # CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set CONFIG_WQ_WATCHDOG=y # end of Debug Oops, Lockups and Hangs # # Scheduler Debugging # # CONFIG_SCHED_DEBUG is not set CONFIG_SCHED_INFO=y # CONFIG_SCHEDSTATS is not set # end of Scheduler Debugging # CONFIG_DEBUG_TIMEKEEPING is not set # CONFIG_DEBUG_PREEMPT is not set # # Lock Debugging (spinlocks, mutexes, etc...) # CONFIG_LOCK_DEBUGGING_SUPPORT=y # CONFIG_PROVE_LOCKING is not set # CONFIG_LOCK_STAT is not set # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_DEBUG_SPINLOCK is not set # CONFIG_DEBUG_MUTEXES is not set # CONFIG_DEBUG_WW_MUTEX_SLOWPATH is not set # CONFIG_DEBUG_RWSEMS is not set # CONFIG_DEBUG_LOCK_ALLOC is not set # CONFIG_DEBUG_ATOMIC_SLEEP is not set # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_LOCK_TORTURE_TEST is not set # CONFIG_WW_MUTEX_SELFTEST is not set # CONFIG_SCF_TORTURE_TEST is not set # CONFIG_CSD_LOCK_WAIT_DEBUG is not set # end of Lock Debugging (spinlocks, mutexes, etc...) # CONFIG_DEBUG_IRQFLAGS is not set CONFIG_STACKTRACE=y # CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set # CONFIG_DEBUG_KOBJECT is not set # # Debug kernel data structures # CONFIG_DEBUG_LIST=y # CONFIG_DEBUG_PLIST is not set # CONFIG_DEBUG_SG is not set # CONFIG_DEBUG_NOTIFIERS is not set CONFIG_BUG_ON_DATA_CORRUPTION=y # CONFIG_DEBUG_MAPLE_TREE is not set # end of Debug kernel data structures # CONFIG_DEBUG_CREDENTIALS is not set # # RCU Debugging # # CONFIG_RCU_SCALE_TEST is not set # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_RCU_REF_SCALE_TEST is not set CONFIG_RCU_CPU_STALL_TIMEOUT=59 CONFIG_RCU_EXP_CPU_STALL_TIMEOUT=0 # CONFIG_RCU_TRACE is not set # CONFIG_RCU_EQS_DEBUG is not set # end of RCU Debugging # CONFIG_DEBUG_WQ_FORCE_RR_CPU is not set # CONFIG_CPU_HOTPLUG_STATE_CONTROL is not set # CONFIG_LATENCYTOP is not set CONFIG_USER_STACKTRACE_SUPPORT=y CONFIG_HAVE_RETHOOK=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_SYSCALL_TRACEPOINTS=y CONFIG_HAVE_FENTRY=y CONFIG_HAVE_OBJTOOL_MCOUNT=y CONFIG_HAVE_C_RECORDMCOUNT=y CONFIG_HAVE_BUILDTIME_MCOUNT_SORT=y CONFIG_TRACING_SUPPORT=y # CONFIG_FTRACE is not set # CONFIG_SAMPLES is not set CONFIG_HAVE_SAMPLE_FTRACE_DIRECT=y CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y CONFIG_STRICT_DEVMEM=y # CONFIG_IO_STRICT_DEVMEM is not set # # x86 Debugging # CONFIG_X86_VERBOSE_BOOTUP=y CONFIG_EARLY_PRINTK=y # CONFIG_DEBUG_TLBFLUSH is not set CONFIG_HAVE_MMIOTRACE_SUPPORT=y # CONFIG_X86_DECODER_SELFTEST is not set CONFIG_IO_DELAY_0X80=y # CONFIG_IO_DELAY_0XED is not set # CONFIG_IO_DELAY_UDELAY is not set # CONFIG_IO_DELAY_NONE is not set # CONFIG_DEBUG_BOOT_PARAMS is not set # CONFIG_CPA_DEBUG is not set # CONFIG_DEBUG_ENTRY is not set # CONFIG_DEBUG_NMI_SELFTEST is not set # CONFIG_X86_DEBUG_FPU is not set # CONFIG_UNWINDER_ORC is not set CONFIG_UNWINDER_FRAME_POINTER=y # end of x86 Debugging # # Kernel Testing and Coverage # # CONFIG_KUNIT is not set # CONFIG_NOTIFIER_ERROR_INJECTION is not set # CONFIG_FAULT_INJECTION is not set CONFIG_ARCH_HAS_KCOV=y CONFIG_CC_HAS_SANCOV_TRACE_PC=y # CONFIG_KCOV is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_ARCH_USE_MEMTEST=y # CONFIG_MEMTEST is not set # end of Kernel Testing and Coverage # # Rust hacking # # end of Rust hacking # end of Kernel hacking ================================================ FILE: resources/guest_configs/pcie.config ================================================ CONFIG_BLK_MQ_PCI=y CONFIG_PCI=y CONFIG_PCI_MMCONFIG=y CONFIG_PCI_MSI=y CONFIG_PCIEPORTBUS=y CONFIG_VIRTIO_PCI=y CONFIG_PCI_HOST_COMMON=y CONFIG_PCI_HOST_GENERIC=y ================================================ FILE: resources/guest_configs/virtio-mem.config ================================================ CONFIG_VIRTIO_MEM=y CONFIG_STRICT_DEVMEM=y ================================================ FILE: resources/guest_configs/virtio-pmem.config ================================================ # Needed for DAX on aarch64. Will be ignored on x86_64 CONFIG_ARM64_PMEM=y CONFIG_DEVICE_MIGRATION=y CONFIG_ZONE_DEVICE=y CONFIG_VIRTIO_PMEM=y CONFIG_LIBNVDIMM=y CONFIG_BLK_DEV_PMEM=y CONFIG_ND_CLAIM=y CONFIG_ND_BTT=y CONFIG_BTT=y CONFIG_ND_PFN=y CONFIG_NVDIMM_PFN=y CONFIG_NVDIMM_DAX=y CONFIG_OF_PMEM=y CONFIG_NVDIMM_KEYS=y CONFIG_DAX=y CONFIG_DEV_DAX=y CONFIG_DEV_DAX_PMEM=y CONFIG_DEV_DAX_KMEM=y CONFIG_FS_DAX=y CONFIG_FS_DAX_PMD=y ================================================ FILE: resources/guest_configs/vmclock.config ================================================ CONFIG_PTP_1588_CLOCK_VMCLOCK=y ================================================ FILE: resources/overlay/etc/systemd/system/fcnet.service ================================================ [Service] Type=oneshot ExecStartPre=/usr/bin/udevadm settle ExecStart=/usr/local/bin/fcnet-setup.sh [Install] WantedBy=sshd.service ================================================ FILE: resources/overlay/etc/systemd/system/var-lib-systemd.mount ================================================ # # Mount /var/lib/systemd as a temporary filesystem # [Unit] DefaultDependencies=no Conflicts=umount.target Before=local-fs.target umount.target After=swap.target [Mount] What=tmpfs Where=/var/lib/systemd Type=tmpfs Options=mode=1777,strictatime,nosuid,nodev,size=50%%,nr_inodes=10k [Install] WantedBy=local-fs.target ================================================ FILE: resources/overlay/usr/local/bin/devmemread.c ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #include #include #include #include #include /* * We try to trigger ENOSYS by mapping a file into memory and then tries to * load the content from an offset in the file bigger than its length into a * register asm volatile ("ldr %0, [%1], 4" : "=r" (ret), "+r" (buf)); */ int main() { int ret, fd; char *buf; // Assume /dev is mounted fprintf(stderr, "open /dev/mem\n"); fd = open("/dev/mem", O_RDWR); assert(fd > 0); buf = mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0xf0000); assert(buf != MAP_FAILED); fprintf(stderr, "try to ldr\n"); asm volatile("ldr %0, [%1], 4" : "=r" (ret), "+r" (buf)); fprintf(stderr, "success\n"); return 0; } ================================================ FILE: resources/overlay/usr/local/bin/fast_page_fault_helper.c ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // Helper program for triggering fast page faults after UFFD snapshot restore. // Allocates a 128M memory area using mmap, touches every page in it using memset and then // calls `sigwait` to wait for a SIGUSR1 signal. Upon receiving this signal, // set the entire memory area to 1, to trigger fast page fault. // The idea is that an integration test takes a snapshot while the process is // waiting for the SIGUSR1 signal, and then sends the signal after restoring. // This way, the `memset` will trigger a fast page fault for every page in // the memory region. #include // perror, fopen, fprintf #include // sigwait and friends #include // memset #include // mmap #include // clock_gettime #include // open #include // getopt #define MEM_SIZE_MIB (128 * 1024 * 1024) #define NANOS_PER_SEC 1000000000 #define PAGE_SIZE 4096 void touch_memory(void *mem, size_t size, char val) { void *end = mem + size; for (; mem < end; mem += PAGE_SIZE) { *((char *)mem) = val; } } int main(int argc, char *argv[]) { sigset_t set; int signal, character; void *ptr; struct timespec start, end; long duration_nanos; FILE *out_file; int longindex = 0; int signal_wait = 1; struct option longopts[] = { {"nosignal", no_argument, NULL, 's'}, {NULL, 0, NULL, 0} }; while((character = getopt_long(argc, argv, "s", longopts, &longindex)) != -1) { switch (character) { case 's': signal_wait = 0; break; } } if (signal_wait) { sigemptyset(&set); if (sigaddset(&set, SIGUSR1) == -1) { perror("sigaddset"); return 1; } if (sigprocmask(SIG_BLOCK, &set, NULL) == -1) { perror("sigprocmask"); return 1; } } ptr = mmap(NULL, MEM_SIZE_MIB, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (MAP_FAILED == ptr) { perror("mmap"); return 1; } if (signal_wait) { touch_memory(ptr, MEM_SIZE_MIB, 1); sigwait(&set, &signal); } clock_gettime(CLOCK_BOOTTIME, &start); touch_memory(ptr, MEM_SIZE_MIB, 2); clock_gettime(CLOCK_BOOTTIME, &end); duration_nanos = (end.tv_sec - start.tv_sec) * NANOS_PER_SEC + end.tv_nsec - start.tv_nsec; out_file = fopen("/tmp/fast_page_fault_helper.out", "w"); if (out_file == NULL) { perror("fopen"); return 1; } fprintf(out_file, "%ld", duration_nanos); if (fclose(out_file)) { perror("fclose"); return 1; } return 0; } ================================================ FILE: resources/overlay/usr/local/bin/fcnet-setup.sh ================================================ #!/usr/bin/env bash # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # This script assigns IP addresses to the existing # virtual networking devices based on their MAC address. # It is a simple solution on which Firecracker's integration # tests are based. Each network device attached in a test will # assign the next available MAC. # The IP is obtained by converting the last 4 hexa groups of the MAC into decimals. main() { devs=$(ls /sys/class/net | grep -v lo) for dev in $devs; do mac_ip=$(ip link show dev $dev \ | grep link/ether \ | grep -Po "(?<=06:00:)([0-9a-f]{2}:?){4}" ) ip=$(printf "%d.%d.%d.%d" $(echo "0x${mac_ip}" | sed "s/:/ 0x/g")) ip addr add "$ip/30" dev $dev ip link set $dev up done } main ================================================ FILE: resources/overlay/usr/local/bin/fillmem.c ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #define MB (1024 * 1024) int fill_mem(int mb_count) { int i; char *ptr = NULL; for(i = 0; i < mb_count; i++) { do { // We can't map the whole chunk of memory at once because // in case the system is already in a memory pressured // state and we are trying to achieve a process death by // OOM killer, a large allocation is far less likely to // succeed than more granular ones. ptr = mmap( NULL, MB * sizeof(char), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0 ); } while (ptr == MAP_FAILED); memset(ptr, 1, MB * sizeof(char)); } return 0; } int main(int argc, char *const argv[]) { if (argc != 2) { printf("Usage: ./fillmem mb_count\n"); return -1; } int mb_count = atoi(argv[1]); int pid = fork(); if (pid == 0) { return fill_mem(mb_count); } else { int status; wait(&status); int fd = open("/tmp/fillmem_output.txt", O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IXUSR); if (fd < 0) { return -1; } if (WIFSIGNALED(status)) { char buf[200]; sprintf(buf, "OOM Killer stopped the program with signal %d, exit code %d\n", WTERMSIG(status), WEXITSTATUS(status)); write(fd, buf, strlen(buf) + 1); } else { write(fd, "Memory filling was successful\n", 31); } close(fd); return 0; } } ================================================ FILE: resources/overlay/usr/local/bin/go_sdk_cred_provider.go/main.go ================================================ package main import ( "context" "fmt" "log" "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/config" ) func main() { cfg, err := config.LoadDefaultConfig( context.TODO(), config.WithClientLogMode( aws.LogSigning| aws.LogRetries| aws.LogRequest| aws.LogRequestWithBody| aws.LogResponse| aws.LogResponseWithBody, ), ) if err != nil { log.Fatalf("Unable to load config: %v", err) } cred, err := cfg.Credentials.Retrieve(context.TODO()) if err != nil { log.Fatalf("Unable to retrieve credentials: %v", err) } fmt.Printf("%v,%v,%v\n", cred.AccessKeyID, cred.SecretAccessKey, cred.SessionToken) } ================================================ FILE: resources/overlay/usr/local/bin/go_sdk_cred_provider_with_custom_endpoint.go/main.go ================================================ package main import ( "context" "fmt" "io/ioutil" "log" "net/http" "net/http/httputil" "os" "github.com/aws/aws-sdk-go-v2/config" "github.com/aws/aws-sdk-go-v2/credentials/endpointcreds" ) const mmdsBaseUrl = "http://169.254.169.254" func main() { // Get MMDS token token, err := getMmdsToken() if err != nil { log.Fatalf("Failed to get MMDS token: %v", err) } // Construct a client client := &http.Client{ Transport: &tokenInjector{ token: token, next: &loggingRoundTripper{ next: http.DefaultTransport, }, }, } // Construct a credential provider endpoint := fmt.Sprintf("%s/latest/meta-data/iam/security-credentials/role", mmdsBaseUrl) provider := endpointcreds.New(endpoint, func(o *endpointcreds.Options) { o.HTTPClient = client }) // Load config with the custom provider cfg, err := config.LoadDefaultConfig( context.TODO(), config.WithCredentialsProvider(provider), ) if err != nil { log.Fatalf("Unable to load config: %v", err) } // Retrieve credentials cred, err := cfg.Credentials.Retrieve(context.TODO()) if err != nil { log.Fatalf("Unable to retrieve credentials: %v", err) } fmt.Printf("%v,%v,%v\n", cred.AccessKeyID, cred.SecretAccessKey, cred.SessionToken) } func getMmdsToken() (string, error) { client := &http.Client{} // Construct a request req, err := http.NewRequest("PUT", mmdsBaseUrl + "/latest/api/token", nil) if err != nil { return "", err } req.Header.Set("x-aws-ec2-metadata-token-ttl-seconds", "21600") // Log the request dumpReq, err := httputil.DumpRequest(req, true) if err != nil { return "", err } fmt.Fprintf(os.Stderr, "REQUEST:\n%s\n", dumpReq) // Perform the request resp, err := client.Do(req) if err != nil { return "", err } defer resp.Body.Close() // Log the response dumpResp, err := httputil.DumpResponse(resp, true) if err != nil { return "", err } fmt.Fprintf(os.Stderr, "RESPONSE:\n%s\n", dumpResp) // Check the response status code. if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("Status: %s", resp.Status) } // Read the body body, _ := ioutil.ReadAll(resp.Body) return string(body), nil } // tokenInjector adds the token header on every metadata request type tokenInjector struct { token string next http.RoundTripper } func (t *tokenInjector) RoundTrip(req *http.Request) (*http.Response, error) { req.Header.Set("x-aws-ec2-metadata-token", t.token) return t.next.RoundTrip(req) } // logginRoundTripper logs requests and responses type loggingRoundTripper struct { next http.RoundTripper } func (l *loggingRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { // Log the request dumpReq, err := httputil.DumpRequest(req, true) if err != nil { return nil, err } fmt.Fprintf(os.Stderr, "REQUEST:\n%s\n", dumpReq) // Perform the request resp, err := l.next.RoundTrip(req) if err != nil { return nil, err } // Log the response dumpResp, err := httputil.DumpResponse(resp, true) if err != nil { return nil, err } fmt.Fprintf(os.Stderr, "RESPONSE:\n%s\n", dumpResp) return resp, nil } ================================================ FILE: resources/overlay/usr/local/bin/init.c ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // Init wrapper for boot timing. It points at /sbin/init. #include #include #include #include // Base address values are defined in arch/src/lib.rs as arch::MMIO_MEM_START. // Values are computed in arch/src//mod.rs from the architecture layouts. // Position on the bus is defined by MMIO_LEN increments, where MMIO_LEN is // defined as 0x1000 in vmm/src/device_manager/mmio.rs. #ifdef __x86_64__ #define MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE 0xc0000000 #endif #ifdef __aarch64__ #define MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE 0x40000000 #endif #define MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE 123 int main () { int fd = open("/dev/mem", (O_RDWR | O_SYNC | O_CLOEXEC)); int mapped_size = getpagesize(); char *map_base = mmap(NULL, mapped_size, PROT_WRITE, MAP_SHARED, fd, MAGIC_MMIO_SIGNAL_GUEST_BOOT_COMPLETE); *map_base = MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE; msync(map_base, mapped_size, MS_ASYNC); const char *init = "/sbin/init"; char *const argv[] = { "/sbin/init", NULL }; char *const envp[] = { NULL }; execve(init, argv, envp); } ================================================ FILE: resources/overlay/usr/local/bin/readmem.c ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // This is a balloon device helper tool, which allocates an amount of // memory, given as the first starting parameter, and then tries to find // 4 consecutive occurences of an integer, given as the second starting // parameter, in that memory chunk. The program returns 1 if it succeeds // in finding these occurences, 0 otherwise. After performing a deflate // operation on the balloon device, we run this program with the second // starting parameter equal to `1`, which is the value we are using to // write in memory when dirtying it with `fillmem`. If the memory is // indeed scrubbed, we won't be able to find any 4 consecutive occurences // of the integer `1` in newly allocated memory. #define _GNU_SOURCE #include #include #include #include #define MB (1024 * 1024) int read_mem(int mb_count, int value) { int i; char *ptr = NULL; int *cur = NULL; int buf[4] = { value }; do { ptr = mmap( NULL, mb_count * MB * sizeof(char), PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0 ); } while (ptr == MAP_FAILED); cur = (int *) ptr; // We will go through all the memory allocated with an `int` pointer, // so we have to divide the amount of bytes available by the size of // `int`. Furthermore, we compare 4 `int`s at a time, so we will // divide the upper limit of the loop by 4 and also increment the index // by 4. for (i = 0; i < (mb_count * MB * sizeof(char)) / (4 * sizeof(int)); i += 4) { if (memcmp(cur, buf, 4 * sizeof(int)) == 0) { return 1; } } return 0; } int main(int argc, char *const argv[]) { if (argc != 3) { printf("Usage: ./readmem mb_count value\n"); return -1; } int mb_count = atoi(argv[1]); int value = atoi(argv[2]); return read_mem(mb_count, value); } ================================================ FILE: resources/patches/vmclock/5.10/0001-ptp-vmclock-add-vm-generation-counter.patch ================================================ From f2309165752b4af1fb2245fb434f4b0938aecd06 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 21 Jan 2026 14:33:38 +0000 Subject: [PATCH 1/7] ptp: vmclock: add vm generation counter Similar to live migration, loading a VM from some saved state (aka snapshot) is also an event that calls for clock adjustments in the guest. However, guests might want to take more actions as a response to such events, e.g. as discarding UUIDs, resetting network connections, reseeding entropy pools, etc. These are actions that guests don't typically take during live migration, so add a new field in the vmclock_abi called vm_generation_counter which informs the guest about such events. Hypervisor advertises support for vm_generation_counter through the VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT flag. Users need to check the presence of this bit in vmclock_abi flags field before using this flag. Signed-off-by: Babis Chalios Reviewed-by: David Woodhouse Signed-off-by: David Woodhouse --- include/uapi/linux/vmclock-abi.h | 93 ++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 39 deletions(-) diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h index d7ca44313bf8..62b8f2091ca5 100644 --- a/include/uapi/linux/vmclock-abi.h +++ b/include/uapi/linux/vmclock-abi.h @@ -67,22 +67,22 @@ struct vmclock_abi { /* CONSTANT FIELDS */ uint32_t magic; -#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ - uint32_t size; /* Size of region containing this structure */ - uint16_t version; /* 1 */ +#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ + uint32_t size; /* Size of region containing this structure */ + uint16_t version; /* 1 */ uint8_t counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ -#define VMCLOCK_COUNTER_ARM_VCNT 0 -#define VMCLOCK_COUNTER_X86_TSC 1 -#define VMCLOCK_COUNTER_INVALID 0xff +#define VMCLOCK_COUNTER_ARM_VCNT 0 +#define VMCLOCK_COUNTER_X86_TSC 1 +#define VMCLOCK_COUNTER_INVALID 0xff uint8_t time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ -#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ -#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ -#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ -#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ -#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ +#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ +#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ +#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ - uint32_t seq_count; /* Low bit means an update is in progress */ + uint32_t seq_count; /* Low bit means an update is in progress */ /* * This field changes to another non-repeating value when the CPU * counter is disrupted, for example on live migration. This lets @@ -92,19 +92,19 @@ struct vmclock_abi { uint64_t disruption_marker; uint64_t flags; /* Indicates that the tai_offset_sec field is valid */ -#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) +#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) /* * Optionally used to notify guests of pending maintenance events. * A guest which provides latency-sensitive services may wish to * remove itself from service if an event is coming up. Two flags * indicate the approximate imminence of the event. */ -#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ -#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ -#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) -#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) -#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) -#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) +#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ +#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ +#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) +#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) +#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) +#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) /* * If the MONOTONIC flag is set then (other than leap seconds) it is * guaranteed that the time calculated according this structure at @@ -118,15 +118,21 @@ struct vmclock_abi { * a counter reading taken immediately before *clearing* the low * bit again after the update, using the about-to-be-valid fields. */ -#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) +#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) + /* + * If the VM_GEN_COUNTER_PRESENT flag is set, the hypervisor will + * bump the vm_generation_counter field every time the guest is + * loaded from some save state (restored from a snapshot). + */ +#define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) uint8_t pad[2]; uint8_t clock_status; -#define VMCLOCK_STATUS_UNKNOWN 0 -#define VMCLOCK_STATUS_INITIALIZING 1 -#define VMCLOCK_STATUS_SYNCHRONIZED 2 -#define VMCLOCK_STATUS_FREERUNNING 3 -#define VMCLOCK_STATUS_UNRELIABLE 4 +#define VMCLOCK_STATUS_UNKNOWN 0 +#define VMCLOCK_STATUS_INITIALIZING 1 +#define VMCLOCK_STATUS_SYNCHRONIZED 2 +#define VMCLOCK_STATUS_FREERUNNING 3 +#define VMCLOCK_STATUS_UNRELIABLE 4 /* * The time exposed through this device is never smeared. This field @@ -138,9 +144,9 @@ struct vmclock_abi { * in the nearby environment. */ uint8_t leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ -#define VMCLOCK_SMEARING_STRICT 0 -#define VMCLOCK_SMEARING_NOON_LINEAR 1 -#define VMCLOCK_SMEARING_UTC_SLS 2 +#define VMCLOCK_SMEARING_STRICT 0 +#define VMCLOCK_SMEARING_NOON_LINEAR 1 +#define VMCLOCK_SMEARING_UTC_SLS 2 int16_t tai_offset_sec; uint8_t leap_indicator; /* @@ -154,13 +160,13 @@ struct vmclock_abi { * leap second when such smearing may need to continue being applied. * It is hoped that these will be incorporated into virtio-rtc too. */ -#define VMCLOCK_LEAP_NONE 0 /* No known nearby leap second */ -#define VMCLOCK_LEAP_PRE_POS 1 /* Positive leap second at EOM */ -#define VMCLOCK_LEAP_PRE_NEG 2 /* Negative leap second at EOM */ -#define VMCLOCK_LEAP_POS 3 /* Set during 23:59:60 second */ -#define VMCLOCK_LEAP_NEG 4 /* Not used in VMCLOCK */ -#define VMCLOCK_LEAP_POST_POS 5 -#define VMCLOCK_LEAP_POST_NEG 6 +#define VMCLOCK_LEAP_NONE 0 /* No known nearby leap second */ +#define VMCLOCK_LEAP_PRE_POS 1 /* Positive leap second at EOM */ +#define VMCLOCK_LEAP_PRE_NEG 2 /* Negative leap second at EOM */ +#define VMCLOCK_LEAP_POS 3 /* Set during 23:59:60 second */ +#define VMCLOCK_LEAP_NEG 4 /* Not used in VMCLOCK */ +#define VMCLOCK_LEAP_POST_POS 5 +#define VMCLOCK_LEAP_POST_NEG 6 /* Bit shift for counter_period_frac_sec and its error rate */ uint8_t counter_period_shift; @@ -179,10 +185,19 @@ struct vmclock_abi { /* * Time according to time_type field above. */ - uint64_t time_sec; /* Seconds since time_type epoch */ - uint64_t time_frac_sec; /* (seconds >> 64) */ - uint64_t time_esterror_picosec; /* (± picoseconds) */ - uint64_t time_maxerror_picosec; /* (± picoseconds) */ + uint64_t time_sec; /* Seconds since time_type epoch */ + uint64_t time_frac_sec; /* (seconds >> 64) */ + uint64_t time_esterror_picosec; /* (± picoseconds) */ + uint64_t time_maxerror_picosec; /* (± picoseconds) */ + + /* + * This field changes to another non-repeating value when the guest + * has been loaded from a snapshot. In addition to handling a + * disruption in time (which will also be signalled through the + * disruption_marker field), a guest may wish to discard UUIDs, + * reset network connections, reseed entropy, etc. + */ + uint64_t vm_generation_counter; }; #endif /* __VMCLOCK_ABI_H__ */ -- 2.52.0 ================================================ FILE: resources/patches/vmclock/5.10/0002-ptp-vmclock-support-device-notifications.patch ================================================ From b1a7ba47d96753695d9101dde049bc0808f76167 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 21 Jan 2026 14:33:39 +0000 Subject: [PATCH 2/7] ptp: vmclock: support device notifications Add optional support for device notifications in VMClock. When supported, the hypervisor will send a device notification every time it updates the seq_count to a new even value. Moreover, add support for poll() in VMClock as a means to propagate this notification to user space. poll() will return a POLLIN event to listeners every time seq_count changes to a value different than the one last seen (since open() or last read()/pread()). This means that when poll() returns a POLLIN event, listeners need to use read() to observe what has changed and update the reader's view of seq_count. In other words, after a poll() returned, all subsequent calls to poll() will immediately return with a POLLIN event until the listener calls read(). The device advertises support for the notification mechanism by setting flag VMCLOCK_FLAG_NOTIFICATION_PRESENT in vmclock_abi flags field. If the flag is not present the driver won't setup the ACPI notification handler and poll() will always immediately return POLLHUP. Signed-off-by: Babis Chalios Reviewed-by: David Woodhouse Signed-off-by: David Woodhouse --- drivers/ptp/ptp_vmclock.c | 200 ++++++++++++++++++++++++++----- include/uapi/linux/vmclock-abi.h | 5 + 2 files changed, 172 insertions(+), 33 deletions(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 1ce69eada4b2..87435b65ea7b 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -5,6 +5,9 @@ * Copyright © 2024 Amazon.com, Inc. or its affiliates. */ +#include "linux/poll.h" +#include "linux/types.h" +#include "linux/wait.h" #include #include #include @@ -37,6 +40,7 @@ struct vmclock_state { struct resource res; struct vmclock_abi *clk; struct miscdevice miscdev; + wait_queue_head_t disrupt_wait; struct ptp_clock_info ptp_clock_info; struct ptp_clock *ptp_clock; enum clocksource_ids cs_id, sys_cs_id; @@ -46,6 +50,9 @@ struct vmclock_state { #define VMCLOCK_MAX_WAIT ms_to_ktime(100) +/* Require at least the flags field to be present. All else can be optional */ +#define VMCLOCK_MIN_SIZE offsetof(struct vmclock_abi, pad) + /* * Multiply a 64-bit count by a 64-bit tick 'period' in units of seconds >> 64 * and add the fractional second part of the reference time. @@ -313,8 +320,8 @@ static const struct ptp_clock_info ptp_vmclock_info = { static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) { - struct vmclock_state *st = container_of(fp->private_data, - struct vmclock_state, miscdev); + struct vmclock_file_state *fst = fp->private_data; + struct vmclock_state *st = fst->st; if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ) return -EROFS; @@ -322,22 +329,22 @@ static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) if (vma->vm_end - vma->vm_start != PAGE_SIZE || vma->vm_pgoff) return -EINVAL; - if (io_remap_pfn_range(vma, vma->vm_start, - st->res.start >> PAGE_SHIFT, PAGE_SIZE, - vma->vm_page_prot)) - return -EAGAIN; + if (io_remap_pfn_range(vma, vma->vm_start, + st->res.start >> PAGE_SHIFT, PAGE_SIZE, + vma->vm_page_prot)) + return -EAGAIN; - return 0; + return 0; } static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) { - struct vmclock_state *st = container_of(fp->private_data, - struct vmclock_state, miscdev); ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT); + struct vmclock_file_state *fst = fp->private_data; + struct vmclock_state *st = fst->st; + uint32_t seq, old_seq; size_t max_count; - int32_t seq; if (*ppos >= PAGE_SIZE) return 0; @@ -346,6 +353,7 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, if (count > max_count) count = max_count; + old_seq = atomic_read(&fst->seq); while (1) { seq = st->clk->seq_count & ~1ULL; virt_rmb(); @@ -354,8 +362,16 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, return -EFAULT; virt_rmb(); - if (seq == st->clk->seq_count) - break; + if (seq == le32_to_cpu(st->clk->seq_count)) { + /* + * Either we updated fst->seq to seq (the latest version we observed) + * or someone else did (old_seq == seq), so we can break. + */ + if (atomic_try_cmpxchg(&fst->seq, &old_seq, seq) || + old_seq == seq) { + break; + } + } if (ktime_after(ktime_get(), deadline)) return -ETIMEDOUT; @@ -365,32 +381,67 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, return count; } -static const struct file_operations vmclock_miscdev_fops = { - .mmap = vmclock_miscdev_mmap, - .read = vmclock_miscdev_read, -}; +static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait) +{ + struct vmclock_file_state *fst = fp->private_data; + struct vmclock_state *st = fst->st; + uint32_t seq; -/* module operations */ + /* + * Hypervisor will not send us any notifications, so fail immediately + * to avoid having caller sleeping for ever. + */ + if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) + return POLLHUP; + + poll_wait(fp, &st->disrupt_wait, wait); + + seq = le32_to_cpu(st->clk->seq_count); + if (atomic_read(&fst->seq) != seq) + return POLLIN | POLLRDNORM; + + return 0; +} -static int vmclock_remove(struct platform_device *pdev) +static int vmclock_miscdev_open(struct inode *inode, struct file *fp) { - struct device *dev = &pdev->dev; - struct vmclock_state *st = dev_get_drvdata(dev); + struct vmclock_state *st = container_of(fp->private_data, + struct vmclock_state, miscdev); + struct vmclock_file_state *fst = kzalloc(sizeof(*fst), GFP_KERNEL); - if (st->ptp_clock) - ptp_clock_unregister(st->ptp_clock); + if (!fst) + return -ENOMEM; - if (st->miscdev.minor != MISC_DYNAMIC_MINOR) - misc_deregister(&st->miscdev); + fst->st = st; + atomic_set(&fst->seq, 0); + + fp->private_data = fst; + + return 0; +} +static int vmclock_miscdev_release(struct inode *inode, struct file *fp) +{ + kfree(fp->private_data); return 0; } +static const struct file_operations vmclock_miscdev_fops = { + .owner = THIS_MODULE, + .open = vmclock_miscdev_open, + .release = vmclock_miscdev_release, + .mmap = vmclock_miscdev_mmap, + .read = vmclock_miscdev_read, + .poll = vmclock_miscdev_poll, +}; + +/* module operations */ + static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data) { struct vmclock_state *st = data; struct resource_win win; - struct resource *res = &(win.res); + struct resource *res = &win.res; if (ares->type == ACPI_RESOURCE_TYPE_END_TAG) return AE_OK; @@ -399,7 +450,7 @@ static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data if (resource_type(&st->res) == IORESOURCE_MEM) return AE_ERROR; - if (acpi_dev_resource_memory(ares, res) || + if (acpi_dev_resource_memory(ares, res) || acpi_dev_resource_address_space(ares, &win)) { if (resource_type(res) != IORESOURCE_MEM || @@ -413,6 +464,44 @@ static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data return AE_ERROR; } +static void +vmclock_acpi_notification_handler(acpi_handle __always_unused handle, + u32 __always_unused event, void *dev) +{ + struct device *device = dev; + struct vmclock_state *st = device->driver_data; + + wake_up_interruptible(&st->disrupt_wait); +} + +static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + acpi_status status; + + /* + * This should never happen as this function is only called when + * has_acpi_companion(dev) is true, but the logic is sufficiently + * complex that Coverity can't see the tautology. + */ + if (!adev) + return -ENODEV; + + /* The device does not support notifications. Nothing else to do */ + if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) + return 0; + + status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, + vmclock_acpi_notification_handler, + dev); + if (ACPI_FAILURE(status)) { + dev_err(dev, "failed to install notification handler"); + return -ENODEV; + } + + return 0; +} + static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) { struct acpi_device *adev = ACPI_COMPANION(dev); @@ -436,6 +525,30 @@ static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) return 0; } +static void vmclock_remove(void *data) +{ + struct device *dev = data; + struct vmclock_state *st = dev->driver_data; + + if (!st) { + dev_err(dev, "vmclock_remove() called with NULL driver_data"); + return; + } + + if (has_acpi_companion(dev)) + acpi_remove_notify_handler(ACPI_COMPANION(dev)->handle, + ACPI_DEVICE_NOTIFY, + vmclock_acpi_notification_handler); + + if (st->ptp_clock) + ptp_clock_unregister(st->ptp_clock); + + if (st->miscdev.minor != MISC_DYNAMIC_MINOR) + misc_deregister(&st->miscdev); + + dev->driver_data = NULL; +} + static void vmclock_put_idx(void *data) { struct vmclock_state *st = data; @@ -449,7 +562,7 @@ static int vmclock_probe(struct platform_device *pdev) struct vmclock_state *st; int ret; - st = devm_kzalloc(dev, sizeof (*st), GFP_KERNEL); + st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL); if (!st) return -ENOMEM; @@ -463,6 +576,11 @@ static int vmclock_probe(struct platform_device *pdev) goto out; } + if (resource_size(&st->res) < VMCLOCK_MIN_SIZE) { + dev_info(dev, "Region too small (0x%llx)\n", + resource_size(&st->res)); + return -EINVAL; + } st->clk = devm_memremap(dev, st->res.start, resource_size(&st->res), MEMREMAP_WB | MEMREMAP_DEC); if (IS_ERR(st->clk)) { @@ -473,7 +591,7 @@ static int vmclock_probe(struct platform_device *pdev) } if (st->clk->magic != VMCLOCK_MAGIC || - st->clk->size < sizeof(*st->clk) || + st->clk->size > resource_size(&st->res) || st->clk->version != 1) { dev_info(dev, "vmclock magic fields invalid\n"); ret = -EINVAL; @@ -485,7 +603,7 @@ static int vmclock_probe(struct platform_device *pdev) goto out; st->index = ret; - ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st); + ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st); if (ret) goto out; @@ -495,9 +613,26 @@ static int vmclock_probe(struct platform_device *pdev) goto out; } - /* If the structure is big enough, it can be mapped to userspace */ - if (st->clk->size >= PAGE_SIZE) { - st->miscdev.minor = MISC_DYNAMIC_MINOR; + st->miscdev.minor = MISC_DYNAMIC_MINOR; + + init_waitqueue_head(&st->disrupt_wait); + dev->driver_data = st; + + ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, dev); + if (ret) + return ret; + + ret = vmclock_setup_notification(dev, st); + if (ret) + return ret; + + /* + * If the structure is big enough, it can be mapped to userspace. + * Theoretically a guest OS even using larger pages could still + * use 4KiB PTEs to map smaller MMIO regions like this, but let's + * cross that bridge if/when we come to it. + */ + if (le32_to_cpu(st->clk->size) >= PAGE_SIZE) { st->miscdev.fops = &vmclock_miscdev_fops; st->miscdev.name = st->name; @@ -563,7 +698,6 @@ MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); static struct platform_driver vmclock_platform_driver = { .probe = vmclock_probe, - .remove = vmclock_remove, .driver = { .name = "vmclock", .acpi_match_table = vmclock_acpi_ids, diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h index 62b8f2091ca5..412784fd5969 100644 --- a/include/uapi/linux/vmclock-abi.h +++ b/include/uapi/linux/vmclock-abi.h @@ -125,6 +125,11 @@ struct vmclock_abi { * loaded from some save state (restored from a snapshot). */ #define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) + /* + * If the NOTIFICATION_PRESENT flag is set, the hypervisor will send + * a notification every time it updates seq_count to a new even number. + */ +#define VMCLOCK_FLAG_NOTIFICATION_PRESENT (1 << 9) uint8_t pad[2]; uint8_t clock_status; -- 2.52.0 ================================================ FILE: resources/patches/vmclock/5.10/0003-dt-bindings-ptp-Add-amazon-vmclock.patch ================================================ From daf492c70d7e7a2a09d76481fd7ecbc5e99fb58f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 21 Jan 2026 14:33:40 +0000 Subject: [PATCH 3/7] dt-bindings: ptp: Add amazon,vmclock The vmclock device provides a PTP clock source and precise timekeeping across live migration and snapshot/restore operations. The binding has a required memory region containing the vmclock_abi structure and an optional interrupt for clock disruption notifications. The full spec is at https://uapi-group.org/specifications/specs/vmclock/ Signed-off-by: David Woodhouse Signed-off-by: Babis Chalios Reviewed-by: Krzysztof Kozlowski --- .../bindings/ptp/amazon,vmclock.yaml | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml diff --git a/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml b/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml new file mode 100644 index 000000000000..357790df876f --- /dev/null +++ b/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ptp/amazon,vmclock.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Virtual Machine Clock + +maintainers: + - David Woodhouse + +description: + The vmclock device provides a precise clock source and allows for + accurate timekeeping across live migration and snapshot/restore + operations. The full specification of the shared data structure is + available at https://uapi-group.org/specifications/specs/vmclock/ + +properties: + compatible: + const: amazon,vmclock + + reg: + description: + Specifies the shared memory region containing the vmclock_abi structure. + maxItems: 1 + + interrupts: + description: + Interrupt used to notify when the contents of the vmclock_abi structure + have been updated. + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + #include + ptp@80000000 { + compatible = "amazon,vmclock"; + reg = <0x80000000 0x1000>; + interrupts = ; + }; -- 2.52.0 ================================================ FILE: resources/patches/vmclock/5.10/0004-ptp-ptp_vmclock-Add-device-tree-support.patch ================================================ From 30468d547a380aa6db4d9e2ba8ab735daeab0694 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 21 Jan 2026 14:33:41 +0000 Subject: [PATCH 4/7] ptp: ptp_vmclock: Add device tree support Add device tree support to the ptp_vmclock driver, allowing it to probe via device tree in addition to ACPI. Handle optional interrupt for clock disruption notifications, mirroring the ACPI notification behaviour. Although the interrupt is marked as 'optional' in the DT bindings, if the device *advertises* the VMCLOCK_FLAG_NOTIFICATION_ABSENT then it *should* have an interrupt. The driver will refuse to initialize if not. Signed-off-by: David Woodhouse Signed-off-by: Babis Chalios --- drivers/ptp/ptp_vmclock.c | 67 +++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 87435b65ea7b..662fbe93534c 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -14,10 +14,13 @@ #include #include #include +#include +#include #include #include #include #include +#include #include #include #include @@ -474,7 +477,7 @@ vmclock_acpi_notification_handler(acpi_handle __always_unused handle, wake_up_interruptible(&st->disrupt_wait); } -static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st) +static int vmclock_setup_acpi_notification(struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); acpi_status status; @@ -487,10 +490,6 @@ static int vmclock_setup_notification(struct device *dev, struct vmclock_state * if (!adev) return -ENODEV; - /* The device does not support notifications. Nothing else to do */ - if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) - return 0; - status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, vmclock_acpi_notification_handler, dev); @@ -525,6 +524,55 @@ static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) return 0; } +static irqreturn_t vmclock_of_irq_handler(int __always_unused irq, void *_st) +{ + struct vmclock_state *st = _st; + + wake_up_interruptible(&st->disrupt_wait); + return IRQ_HANDLED; +} + +static int vmclock_probe_dt(struct device *dev, struct vmclock_state *st) +{ + struct platform_device *pdev = to_platform_device(dev); + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + st->res = *res; + + return 0; +} + +static int vmclock_setup_of_notification(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + int irq; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + return devm_request_irq(dev, irq, vmclock_of_irq_handler, IRQF_SHARED, + "vmclock", dev->driver_data); +} + +static int vmclock_setup_notification(struct device *dev, + struct vmclock_state *st) +{ + /* The device does not support notifications. Nothing else to do */ + if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) + return 0; + + if (has_acpi_companion(dev)) { + return vmclock_setup_acpi_notification(dev); + } else { + return vmclock_setup_of_notification(dev); + } +} + static void vmclock_remove(void *data) { struct device *dev = data; @@ -569,7 +617,7 @@ static int vmclock_probe(struct platform_device *pdev) if (has_acpi_companion(dev)) ret = vmclock_probe_acpi(dev, st); else - ret = -EINVAL; /* Only ACPI for now */ + ret = vmclock_probe_dt(dev, st); if (ret) { dev_info(dev, "Failed to obtain physical address: %d\n", ret); @@ -696,11 +744,18 @@ static const struct acpi_device_id vmclock_acpi_ids[] = { }; MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); +static const struct of_device_id vmclock_of_ids[] = { + { .compatible = "amazon,vmclock", }, + { }, +}; +MODULE_DEVICE_TABLE(of, vmclock_of_ids); + static struct platform_driver vmclock_platform_driver = { .probe = vmclock_probe, .driver = { .name = "vmclock", .acpi_match_table = vmclock_acpi_ids, + .of_match_table = vmclock_of_ids, }, }; -- 2.52.0 ================================================ FILE: resources/patches/vmclock/5.10/0005-ptp-ptp_vmclock-add-VMCLOCK-to-ACPI-device-match.patch ================================================ From d291cf42344f2f48557e545648bc26eea9b1828f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 21 Jan 2026 14:33:42 +0000 Subject: [PATCH 5/7] ptp: ptp_vmclock: add 'VMCLOCK' to ACPI device match As we finalised the spec, we spotted that vmgenid actually says that the _HID is supposed to be hypervisor-specific. Although in the 13 years since the original vmgenid doc was published, nobody seems to have cared about using _HID to distinguish between implementations on different hypervisors, and we only ever use the _CID. For consistency, match the _CID of "VMCLOCK" too. Signed-off-by: David Woodhouse Signed-off-by: Babis Chalios --- drivers/ptp/ptp_vmclock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 662fbe93534c..dbe549cc4b04 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -739,6 +739,7 @@ static int vmclock_probe(struct platform_device *pdev) } static const struct acpi_device_id vmclock_acpi_ids[] = { + { "AMZNC10C", 0 }, { "VMCLOCK", 0 }, {} }; -- 2.52.0 ================================================ FILE: resources/patches/vmclock/5.10/0006-ptp-ptp_vmclock-remove-dependency-on-CONFIG_ACPI.patch ================================================ From 1cb36e019ef80058db243c7a02696e17429bd0b1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 21 Jan 2026 14:33:43 +0000 Subject: [PATCH 6/7] ptp: ptp_vmclock: remove dependency on CONFIG_ACPI Now that we added device tree support we can remove dependency on CONFIG_ACPI. Signed-off-by: David Woodhouse Signed-off-by: Babis Chalios --- drivers/ptp/Kconfig | 26 +++++++++++++++----------- drivers/ptp/ptp_vmclock.c | 14 ++++++++++---- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index ebadd82c7a7d..e020045bac13 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -132,17 +132,21 @@ config PTP_1588_CLOCK_KVM will be called ptp_kvm. config PTP_1588_CLOCK_VMCLOCK - tristate "Virtual machine PTP clock" - depends on X86_TSC || ARM_ARCH_TIMER - depends on PTP_1588_CLOCK && ACPI && ARCH_SUPPORTS_INT128 - default y - help - This driver adds support for using a virtual precision clock - advertised by the hypervisor. This clock is only useful in virtual - machines where such a device is present. - - To compile this driver as a module, choose M here: the module - will be called ptp_vmclock. + tristate "Virtual machine PTP clock" + depends on X86_TSC || ARM_ARCH_TIMER + depends on PTP_1588_CLOCK && ARCH_SUPPORTS_INT128 + default PTP_1588_CLOCK_KVM + help + This driver adds support for using a virtual precision clock + advertised by the hypervisor. This clock is only useful in virtual + machines where such a device is present. + + Unlike the KVM virtual PTP clock, the VMCLOCK device offers support + for reliable timekeeping even across live migration. So this driver + is enabled by default whenever the KVM PTP clock is. + + To compile this driver as a module, choose M here: the module + will be called ptp_vmclock. config PTP_1588_CLOCK_IDT82P33 tristate "IDT 82P33xxx PTP clock" diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index dbe549cc4b04..2114d5fd760e 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -440,6 +440,7 @@ static const struct file_operations vmclock_miscdev_fops = { /* module operations */ +#if IS_ENABLED(CONFIG_ACPI) static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data) { struct vmclock_state *st = data; @@ -523,6 +524,7 @@ static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) return 0; } +#endif /* CONFIG_ACPI */ static irqreturn_t vmclock_of_irq_handler(int __always_unused irq, void *_st) { @@ -566,11 +568,11 @@ static int vmclock_setup_notification(struct device *dev, if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) return 0; - if (has_acpi_companion(dev)) { +#if IS_ENABLED(CONFIG_ACPI) + if (has_acpi_companion(dev)) return vmclock_setup_acpi_notification(dev); - } else { - return vmclock_setup_of_notification(dev); - } +#endif + return vmclock_setup_of_notification(dev); } static void vmclock_remove(void *data) @@ -583,10 +585,12 @@ static void vmclock_remove(void *data) return; } +#if IS_ENABLED(CONFIG_ACPI) if (has_acpi_companion(dev)) acpi_remove_notify_handler(ACPI_COMPANION(dev)->handle, ACPI_DEVICE_NOTIFY, vmclock_acpi_notification_handler); +#endif if (st->ptp_clock) ptp_clock_unregister(st->ptp_clock); @@ -614,9 +618,11 @@ static int vmclock_probe(struct platform_device *pdev) if (!st) return -ENOMEM; +#if IS_ENABLED(CONFIG_ACPI) if (has_acpi_companion(dev)) ret = vmclock_probe_acpi(dev, st); else +#endif ret = vmclock_probe_dt(dev, st); if (ret) { -- 2.52.0 ================================================ FILE: resources/patches/vmclock/5.10/0007-ptp-ptp_vmclock-return-TAI-not-UTC.patch ================================================ From 726b41d6531d0e77fc20f6d7ea4b3178ade41e80 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 21 Jan 2026 14:33:44 +0000 Subject: [PATCH 7/7] ptp: ptp_vmclock: return TAI not UTC To output UTC would involve complex calculations about whether the time elapsed since the reference time has crossed the end of the month when a leap second takes effect. I've prototyped that, but it made me sad. Much better to report TAI, which is what PHCs should do anyway. And much much simpler. Signed-off-by: David Woodhouse Signed-off-by: Babis Chalios --- drivers/ptp/ptp_vmclock.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 2114d5fd760e..e0da9c5f5d00 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -80,13 +80,13 @@ static inline uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta, static inline bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec) { - if (likely(clk->time_type == VMCLOCK_TIME_UTC)) + if (clk->time_type == VMCLOCK_TIME_TAI) return true; - if (clk->time_type == VMCLOCK_TIME_TAI && - (clk->flags & VMCLOCK_FLAG_TAI_OFFSET_VALID)) { + if (clk->time_type == VMCLOCK_TIME_UTC && + (le64_to_cpu(clk->flags) & VMCLOCK_FLAG_TAI_OFFSET_VALID)) { if (sec) - *sec += clk->tai_offset_sec; + *sec -= (int16_t)le16_to_cpu(clk->tai_offset_sec); return true; } return false; @@ -321,6 +321,11 @@ static const struct ptp_clock_info ptp_vmclock_info = { .getcrosststamp = ptp_vmclock_getcrosststamp, }; +struct vmclock_file_state { + struct vmclock_state *st; + atomic_t seq; +}; + static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) { struct vmclock_file_state *fst = fp->private_data; -- 2.52.0 ================================================ FILE: resources/patches/vmclock/6.1/0001-ptp-vmclock-add-vm-generation-counter.patch ================================================ From f2309165752b4af1fb2245fb434f4b0938aecd06 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 21 Jan 2026 14:33:38 +0000 Subject: [PATCH 1/7] ptp: vmclock: add vm generation counter Similar to live migration, loading a VM from some saved state (aka snapshot) is also an event that calls for clock adjustments in the guest. However, guests might want to take more actions as a response to such events, e.g. as discarding UUIDs, resetting network connections, reseeding entropy pools, etc. These are actions that guests don't typically take during live migration, so add a new field in the vmclock_abi called vm_generation_counter which informs the guest about such events. Hypervisor advertises support for vm_generation_counter through the VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT flag. Users need to check the presence of this bit in vmclock_abi flags field before using this flag. Signed-off-by: Babis Chalios Reviewed-by: David Woodhouse Signed-off-by: David Woodhouse --- include/uapi/linux/vmclock-abi.h | 93 ++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 39 deletions(-) diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h index d7ca44313bf8..62b8f2091ca5 100644 --- a/include/uapi/linux/vmclock-abi.h +++ b/include/uapi/linux/vmclock-abi.h @@ -67,22 +67,22 @@ struct vmclock_abi { /* CONSTANT FIELDS */ uint32_t magic; -#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ - uint32_t size; /* Size of region containing this structure */ - uint16_t version; /* 1 */ +#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ + uint32_t size; /* Size of region containing this structure */ + uint16_t version; /* 1 */ uint8_t counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ -#define VMCLOCK_COUNTER_ARM_VCNT 0 -#define VMCLOCK_COUNTER_X86_TSC 1 -#define VMCLOCK_COUNTER_INVALID 0xff +#define VMCLOCK_COUNTER_ARM_VCNT 0 +#define VMCLOCK_COUNTER_X86_TSC 1 +#define VMCLOCK_COUNTER_INVALID 0xff uint8_t time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ -#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ -#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ -#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ -#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ -#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ +#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ +#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ +#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ - uint32_t seq_count; /* Low bit means an update is in progress */ + uint32_t seq_count; /* Low bit means an update is in progress */ /* * This field changes to another non-repeating value when the CPU * counter is disrupted, for example on live migration. This lets @@ -92,19 +92,19 @@ struct vmclock_abi { uint64_t disruption_marker; uint64_t flags; /* Indicates that the tai_offset_sec field is valid */ -#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) +#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) /* * Optionally used to notify guests of pending maintenance events. * A guest which provides latency-sensitive services may wish to * remove itself from service if an event is coming up. Two flags * indicate the approximate imminence of the event. */ -#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ -#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ -#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) -#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) -#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) -#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) +#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ +#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ +#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) +#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) +#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) +#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) /* * If the MONOTONIC flag is set then (other than leap seconds) it is * guaranteed that the time calculated according this structure at @@ -118,15 +118,21 @@ struct vmclock_abi { * a counter reading taken immediately before *clearing* the low * bit again after the update, using the about-to-be-valid fields. */ -#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) +#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) + /* + * If the VM_GEN_COUNTER_PRESENT flag is set, the hypervisor will + * bump the vm_generation_counter field every time the guest is + * loaded from some save state (restored from a snapshot). + */ +#define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) uint8_t pad[2]; uint8_t clock_status; -#define VMCLOCK_STATUS_UNKNOWN 0 -#define VMCLOCK_STATUS_INITIALIZING 1 -#define VMCLOCK_STATUS_SYNCHRONIZED 2 -#define VMCLOCK_STATUS_FREERUNNING 3 -#define VMCLOCK_STATUS_UNRELIABLE 4 +#define VMCLOCK_STATUS_UNKNOWN 0 +#define VMCLOCK_STATUS_INITIALIZING 1 +#define VMCLOCK_STATUS_SYNCHRONIZED 2 +#define VMCLOCK_STATUS_FREERUNNING 3 +#define VMCLOCK_STATUS_UNRELIABLE 4 /* * The time exposed through this device is never smeared. This field @@ -138,9 +144,9 @@ struct vmclock_abi { * in the nearby environment. */ uint8_t leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ -#define VMCLOCK_SMEARING_STRICT 0 -#define VMCLOCK_SMEARING_NOON_LINEAR 1 -#define VMCLOCK_SMEARING_UTC_SLS 2 +#define VMCLOCK_SMEARING_STRICT 0 +#define VMCLOCK_SMEARING_NOON_LINEAR 1 +#define VMCLOCK_SMEARING_UTC_SLS 2 int16_t tai_offset_sec; uint8_t leap_indicator; /* @@ -154,13 +160,13 @@ struct vmclock_abi { * leap second when such smearing may need to continue being applied. * It is hoped that these will be incorporated into virtio-rtc too. */ -#define VMCLOCK_LEAP_NONE 0 /* No known nearby leap second */ -#define VMCLOCK_LEAP_PRE_POS 1 /* Positive leap second at EOM */ -#define VMCLOCK_LEAP_PRE_NEG 2 /* Negative leap second at EOM */ -#define VMCLOCK_LEAP_POS 3 /* Set during 23:59:60 second */ -#define VMCLOCK_LEAP_NEG 4 /* Not used in VMCLOCK */ -#define VMCLOCK_LEAP_POST_POS 5 -#define VMCLOCK_LEAP_POST_NEG 6 +#define VMCLOCK_LEAP_NONE 0 /* No known nearby leap second */ +#define VMCLOCK_LEAP_PRE_POS 1 /* Positive leap second at EOM */ +#define VMCLOCK_LEAP_PRE_NEG 2 /* Negative leap second at EOM */ +#define VMCLOCK_LEAP_POS 3 /* Set during 23:59:60 second */ +#define VMCLOCK_LEAP_NEG 4 /* Not used in VMCLOCK */ +#define VMCLOCK_LEAP_POST_POS 5 +#define VMCLOCK_LEAP_POST_NEG 6 /* Bit shift for counter_period_frac_sec and its error rate */ uint8_t counter_period_shift; @@ -179,10 +185,19 @@ struct vmclock_abi { /* * Time according to time_type field above. */ - uint64_t time_sec; /* Seconds since time_type epoch */ - uint64_t time_frac_sec; /* (seconds >> 64) */ - uint64_t time_esterror_picosec; /* (± picoseconds) */ - uint64_t time_maxerror_picosec; /* (± picoseconds) */ + uint64_t time_sec; /* Seconds since time_type epoch */ + uint64_t time_frac_sec; /* (seconds >> 64) */ + uint64_t time_esterror_picosec; /* (± picoseconds) */ + uint64_t time_maxerror_picosec; /* (± picoseconds) */ + + /* + * This field changes to another non-repeating value when the guest + * has been loaded from a snapshot. In addition to handling a + * disruption in time (which will also be signalled through the + * disruption_marker field), a guest may wish to discard UUIDs, + * reset network connections, reseed entropy, etc. + */ + uint64_t vm_generation_counter; }; #endif /* __VMCLOCK_ABI_H__ */ -- 2.52.0 ================================================ FILE: resources/patches/vmclock/6.1/0002-ptp-vmclock-support-device-notifications.patch ================================================ From b1a7ba47d96753695d9101dde049bc0808f76167 Mon Sep 17 00:00:00 2001 From: Babis Chalios Date: Wed, 21 Jan 2026 14:33:39 +0000 Subject: [PATCH 2/7] ptp: vmclock: support device notifications Add optional support for device notifications in VMClock. When supported, the hypervisor will send a device notification every time it updates the seq_count to a new even value. Moreover, add support for poll() in VMClock as a means to propagate this notification to user space. poll() will return a POLLIN event to listeners every time seq_count changes to a value different than the one last seen (since open() or last read()/pread()). This means that when poll() returns a POLLIN event, listeners need to use read() to observe what has changed and update the reader's view of seq_count. In other words, after a poll() returned, all subsequent calls to poll() will immediately return with a POLLIN event until the listener calls read(). The device advertises support for the notification mechanism by setting flag VMCLOCK_FLAG_NOTIFICATION_PRESENT in vmclock_abi flags field. If the flag is not present the driver won't setup the ACPI notification handler and poll() will always immediately return POLLHUP. Signed-off-by: Babis Chalios Reviewed-by: David Woodhouse Signed-off-by: David Woodhouse --- drivers/ptp/ptp_vmclock.c | 200 ++++++++++++++++++++++++++----- include/uapi/linux/vmclock-abi.h | 5 + 2 files changed, 172 insertions(+), 33 deletions(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 1ce69eada4b2..87435b65ea7b 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -5,6 +5,9 @@ * Copyright © 2024 Amazon.com, Inc. or its affiliates. */ +#include "linux/poll.h" +#include "linux/types.h" +#include "linux/wait.h" #include #include #include @@ -37,6 +40,7 @@ struct vmclock_state { struct resource res; struct vmclock_abi *clk; struct miscdevice miscdev; + wait_queue_head_t disrupt_wait; struct ptp_clock_info ptp_clock_info; struct ptp_clock *ptp_clock; enum clocksource_ids cs_id, sys_cs_id; @@ -46,6 +50,9 @@ struct vmclock_state { #define VMCLOCK_MAX_WAIT ms_to_ktime(100) +/* Require at least the flags field to be present. All else can be optional */ +#define VMCLOCK_MIN_SIZE offsetof(struct vmclock_abi, pad) + /* * Multiply a 64-bit count by a 64-bit tick 'period' in units of seconds >> 64 * and add the fractional second part of the reference time. @@ -313,8 +320,8 @@ static const struct ptp_clock_info ptp_vmclock_info = { static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) { - struct vmclock_state *st = container_of(fp->private_data, - struct vmclock_state, miscdev); + struct vmclock_file_state *fst = fp->private_data; + struct vmclock_state *st = fst->st; if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ) return -EROFS; @@ -322,22 +329,22 @@ static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) if (vma->vm_end - vma->vm_start != PAGE_SIZE || vma->vm_pgoff) return -EINVAL; - if (io_remap_pfn_range(vma, vma->vm_start, - st->res.start >> PAGE_SHIFT, PAGE_SIZE, - vma->vm_page_prot)) - return -EAGAIN; + if (io_remap_pfn_range(vma, vma->vm_start, + st->res.start >> PAGE_SHIFT, PAGE_SIZE, + vma->vm_page_prot)) + return -EAGAIN; - return 0; + return 0; } static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos) { - struct vmclock_state *st = container_of(fp->private_data, - struct vmclock_state, miscdev); ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT); + struct vmclock_file_state *fst = fp->private_data; + struct vmclock_state *st = fst->st; + uint32_t seq, old_seq; size_t max_count; - int32_t seq; if (*ppos >= PAGE_SIZE) return 0; @@ -346,6 +353,7 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, if (count > max_count) count = max_count; + old_seq = atomic_read(&fst->seq); while (1) { seq = st->clk->seq_count & ~1ULL; virt_rmb(); @@ -354,8 +362,16 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, return -EFAULT; virt_rmb(); - if (seq == st->clk->seq_count) - break; + if (seq == le32_to_cpu(st->clk->seq_count)) { + /* + * Either we updated fst->seq to seq (the latest version we observed) + * or someone else did (old_seq == seq), so we can break. + */ + if (atomic_try_cmpxchg(&fst->seq, &old_seq, seq) || + old_seq == seq) { + break; + } + } if (ktime_after(ktime_get(), deadline)) return -ETIMEDOUT; @@ -365,32 +381,67 @@ static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf, return count; } -static const struct file_operations vmclock_miscdev_fops = { - .mmap = vmclock_miscdev_mmap, - .read = vmclock_miscdev_read, -}; +static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait) +{ + struct vmclock_file_state *fst = fp->private_data; + struct vmclock_state *st = fst->st; + uint32_t seq; -/* module operations */ + /* + * Hypervisor will not send us any notifications, so fail immediately + * to avoid having caller sleeping for ever. + */ + if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) + return POLLHUP; + + poll_wait(fp, &st->disrupt_wait, wait); + + seq = le32_to_cpu(st->clk->seq_count); + if (atomic_read(&fst->seq) != seq) + return POLLIN | POLLRDNORM; + + return 0; +} -static int vmclock_remove(struct platform_device *pdev) +static int vmclock_miscdev_open(struct inode *inode, struct file *fp) { - struct device *dev = &pdev->dev; - struct vmclock_state *st = dev_get_drvdata(dev); + struct vmclock_state *st = container_of(fp->private_data, + struct vmclock_state, miscdev); + struct vmclock_file_state *fst = kzalloc(sizeof(*fst), GFP_KERNEL); - if (st->ptp_clock) - ptp_clock_unregister(st->ptp_clock); + if (!fst) + return -ENOMEM; - if (st->miscdev.minor != MISC_DYNAMIC_MINOR) - misc_deregister(&st->miscdev); + fst->st = st; + atomic_set(&fst->seq, 0); + + fp->private_data = fst; + + return 0; +} +static int vmclock_miscdev_release(struct inode *inode, struct file *fp) +{ + kfree(fp->private_data); return 0; } +static const struct file_operations vmclock_miscdev_fops = { + .owner = THIS_MODULE, + .open = vmclock_miscdev_open, + .release = vmclock_miscdev_release, + .mmap = vmclock_miscdev_mmap, + .read = vmclock_miscdev_read, + .poll = vmclock_miscdev_poll, +}; + +/* module operations */ + static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data) { struct vmclock_state *st = data; struct resource_win win; - struct resource *res = &(win.res); + struct resource *res = &win.res; if (ares->type == ACPI_RESOURCE_TYPE_END_TAG) return AE_OK; @@ -399,7 +450,7 @@ static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data if (resource_type(&st->res) == IORESOURCE_MEM) return AE_ERROR; - if (acpi_dev_resource_memory(ares, res) || + if (acpi_dev_resource_memory(ares, res) || acpi_dev_resource_address_space(ares, &win)) { if (resource_type(res) != IORESOURCE_MEM || @@ -413,6 +464,44 @@ static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data return AE_ERROR; } +static void +vmclock_acpi_notification_handler(acpi_handle __always_unused handle, + u32 __always_unused event, void *dev) +{ + struct device *device = dev; + struct vmclock_state *st = device->driver_data; + + wake_up_interruptible(&st->disrupt_wait); +} + +static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st) +{ + struct acpi_device *adev = ACPI_COMPANION(dev); + acpi_status status; + + /* + * This should never happen as this function is only called when + * has_acpi_companion(dev) is true, but the logic is sufficiently + * complex that Coverity can't see the tautology. + */ + if (!adev) + return -ENODEV; + + /* The device does not support notifications. Nothing else to do */ + if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) + return 0; + + status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, + vmclock_acpi_notification_handler, + dev); + if (ACPI_FAILURE(status)) { + dev_err(dev, "failed to install notification handler"); + return -ENODEV; + } + + return 0; +} + static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) { struct acpi_device *adev = ACPI_COMPANION(dev); @@ -436,6 +525,30 @@ static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) return 0; } +static void vmclock_remove(void *data) +{ + struct device *dev = data; + struct vmclock_state *st = dev->driver_data; + + if (!st) { + dev_err(dev, "vmclock_remove() called with NULL driver_data"); + return; + } + + if (has_acpi_companion(dev)) + acpi_remove_notify_handler(ACPI_COMPANION(dev)->handle, + ACPI_DEVICE_NOTIFY, + vmclock_acpi_notification_handler); + + if (st->ptp_clock) + ptp_clock_unregister(st->ptp_clock); + + if (st->miscdev.minor != MISC_DYNAMIC_MINOR) + misc_deregister(&st->miscdev); + + dev->driver_data = NULL; +} + static void vmclock_put_idx(void *data) { struct vmclock_state *st = data; @@ -449,7 +562,7 @@ static int vmclock_probe(struct platform_device *pdev) struct vmclock_state *st; int ret; - st = devm_kzalloc(dev, sizeof (*st), GFP_KERNEL); + st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL); if (!st) return -ENOMEM; @@ -463,6 +576,11 @@ static int vmclock_probe(struct platform_device *pdev) goto out; } + if (resource_size(&st->res) < VMCLOCK_MIN_SIZE) { + dev_info(dev, "Region too small (0x%llx)\n", + resource_size(&st->res)); + return -EINVAL; + } st->clk = devm_memremap(dev, st->res.start, resource_size(&st->res), MEMREMAP_WB | MEMREMAP_DEC); if (IS_ERR(st->clk)) { @@ -473,7 +591,7 @@ static int vmclock_probe(struct platform_device *pdev) } if (st->clk->magic != VMCLOCK_MAGIC || - st->clk->size < sizeof(*st->clk) || + st->clk->size > resource_size(&st->res) || st->clk->version != 1) { dev_info(dev, "vmclock magic fields invalid\n"); ret = -EINVAL; @@ -485,7 +603,7 @@ static int vmclock_probe(struct platform_device *pdev) goto out; st->index = ret; - ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st); + ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st); if (ret) goto out; @@ -495,9 +613,26 @@ static int vmclock_probe(struct platform_device *pdev) goto out; } - /* If the structure is big enough, it can be mapped to userspace */ - if (st->clk->size >= PAGE_SIZE) { - st->miscdev.minor = MISC_DYNAMIC_MINOR; + st->miscdev.minor = MISC_DYNAMIC_MINOR; + + init_waitqueue_head(&st->disrupt_wait); + dev->driver_data = st; + + ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, dev); + if (ret) + return ret; + + ret = vmclock_setup_notification(dev, st); + if (ret) + return ret; + + /* + * If the structure is big enough, it can be mapped to userspace. + * Theoretically a guest OS even using larger pages could still + * use 4KiB PTEs to map smaller MMIO regions like this, but let's + * cross that bridge if/when we come to it. + */ + if (le32_to_cpu(st->clk->size) >= PAGE_SIZE) { st->miscdev.fops = &vmclock_miscdev_fops; st->miscdev.name = st->name; @@ -563,7 +698,6 @@ MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); static struct platform_driver vmclock_platform_driver = { .probe = vmclock_probe, - .remove = vmclock_remove, .driver = { .name = "vmclock", .acpi_match_table = vmclock_acpi_ids, diff --git a/include/uapi/linux/vmclock-abi.h b/include/uapi/linux/vmclock-abi.h index 62b8f2091ca5..412784fd5969 100644 --- a/include/uapi/linux/vmclock-abi.h +++ b/include/uapi/linux/vmclock-abi.h @@ -125,6 +125,11 @@ struct vmclock_abi { * loaded from some save state (restored from a snapshot). */ #define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) + /* + * If the NOTIFICATION_PRESENT flag is set, the hypervisor will send + * a notification every time it updates seq_count to a new even number. + */ +#define VMCLOCK_FLAG_NOTIFICATION_PRESENT (1 << 9) uint8_t pad[2]; uint8_t clock_status; -- 2.52.0 ================================================ FILE: resources/patches/vmclock/6.1/0003-dt-bindings-ptp-Add-amazon-vmclock.patch ================================================ From daf492c70d7e7a2a09d76481fd7ecbc5e99fb58f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 21 Jan 2026 14:33:40 +0000 Subject: [PATCH 3/7] dt-bindings: ptp: Add amazon,vmclock The vmclock device provides a PTP clock source and precise timekeeping across live migration and snapshot/restore operations. The binding has a required memory region containing the vmclock_abi structure and an optional interrupt for clock disruption notifications. The full spec is at https://uapi-group.org/specifications/specs/vmclock/ Signed-off-by: David Woodhouse Signed-off-by: Babis Chalios Reviewed-by: Krzysztof Kozlowski --- .../bindings/ptp/amazon,vmclock.yaml | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml diff --git a/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml b/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml new file mode 100644 index 000000000000..357790df876f --- /dev/null +++ b/Documentation/devicetree/bindings/ptp/amazon,vmclock.yaml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/ptp/amazon,vmclock.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Virtual Machine Clock + +maintainers: + - David Woodhouse + +description: + The vmclock device provides a precise clock source and allows for + accurate timekeeping across live migration and snapshot/restore + operations. The full specification of the shared data structure is + available at https://uapi-group.org/specifications/specs/vmclock/ + +properties: + compatible: + const: amazon,vmclock + + reg: + description: + Specifies the shared memory region containing the vmclock_abi structure. + maxItems: 1 + + interrupts: + description: + Interrupt used to notify when the contents of the vmclock_abi structure + have been updated. + maxItems: 1 + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + #include + ptp@80000000 { + compatible = "amazon,vmclock"; + reg = <0x80000000 0x1000>; + interrupts = ; + }; -- 2.52.0 ================================================ FILE: resources/patches/vmclock/6.1/0004-ptp-ptp_vmclock-Add-device-tree-support.patch ================================================ From 30468d547a380aa6db4d9e2ba8ab735daeab0694 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 21 Jan 2026 14:33:41 +0000 Subject: [PATCH 4/7] ptp: ptp_vmclock: Add device tree support Add device tree support to the ptp_vmclock driver, allowing it to probe via device tree in addition to ACPI. Handle optional interrupt for clock disruption notifications, mirroring the ACPI notification behaviour. Although the interrupt is marked as 'optional' in the DT bindings, if the device *advertises* the VMCLOCK_FLAG_NOTIFICATION_ABSENT then it *should* have an interrupt. The driver will refuse to initialize if not. Signed-off-by: David Woodhouse Signed-off-by: Babis Chalios --- drivers/ptp/ptp_vmclock.c | 67 +++++++++++++++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 6 deletions(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 87435b65ea7b..662fbe93534c 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -14,10 +14,13 @@ #include #include #include +#include +#include #include #include #include #include +#include #include #include #include @@ -474,7 +477,7 @@ vmclock_acpi_notification_handler(acpi_handle __always_unused handle, wake_up_interruptible(&st->disrupt_wait); } -static int vmclock_setup_notification(struct device *dev, struct vmclock_state *st) +static int vmclock_setup_acpi_notification(struct device *dev) { struct acpi_device *adev = ACPI_COMPANION(dev); acpi_status status; @@ -487,10 +490,6 @@ static int vmclock_setup_notification(struct device *dev, struct vmclock_state * if (!adev) return -ENODEV; - /* The device does not support notifications. Nothing else to do */ - if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) - return 0; - status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY, vmclock_acpi_notification_handler, dev); @@ -525,6 +524,55 @@ static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) return 0; } +static irqreturn_t vmclock_of_irq_handler(int __always_unused irq, void *_st) +{ + struct vmclock_state *st = _st; + + wake_up_interruptible(&st->disrupt_wait); + return IRQ_HANDLED; +} + +static int vmclock_probe_dt(struct device *dev, struct vmclock_state *st) +{ + struct platform_device *pdev = to_platform_device(dev); + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + st->res = *res; + + return 0; +} + +static int vmclock_setup_of_notification(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + int irq; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + return devm_request_irq(dev, irq, vmclock_of_irq_handler, IRQF_SHARED, + "vmclock", dev->driver_data); +} + +static int vmclock_setup_notification(struct device *dev, + struct vmclock_state *st) +{ + /* The device does not support notifications. Nothing else to do */ + if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) + return 0; + + if (has_acpi_companion(dev)) { + return vmclock_setup_acpi_notification(dev); + } else { + return vmclock_setup_of_notification(dev); + } +} + static void vmclock_remove(void *data) { struct device *dev = data; @@ -569,7 +617,7 @@ static int vmclock_probe(struct platform_device *pdev) if (has_acpi_companion(dev)) ret = vmclock_probe_acpi(dev, st); else - ret = -EINVAL; /* Only ACPI for now */ + ret = vmclock_probe_dt(dev, st); if (ret) { dev_info(dev, "Failed to obtain physical address: %d\n", ret); @@ -696,11 +744,18 @@ static const struct acpi_device_id vmclock_acpi_ids[] = { }; MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); +static const struct of_device_id vmclock_of_ids[] = { + { .compatible = "amazon,vmclock", }, + { }, +}; +MODULE_DEVICE_TABLE(of, vmclock_of_ids); + static struct platform_driver vmclock_platform_driver = { .probe = vmclock_probe, .driver = { .name = "vmclock", .acpi_match_table = vmclock_acpi_ids, + .of_match_table = vmclock_of_ids, }, }; -- 2.52.0 ================================================ FILE: resources/patches/vmclock/6.1/0005-ptp-ptp_vmclock-add-VMCLOCK-to-ACPI-device-match.patch ================================================ From d291cf42344f2f48557e545648bc26eea9b1828f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 21 Jan 2026 14:33:42 +0000 Subject: [PATCH 5/7] ptp: ptp_vmclock: add 'VMCLOCK' to ACPI device match As we finalised the spec, we spotted that vmgenid actually says that the _HID is supposed to be hypervisor-specific. Although in the 13 years since the original vmgenid doc was published, nobody seems to have cared about using _HID to distinguish between implementations on different hypervisors, and we only ever use the _CID. For consistency, match the _CID of "VMCLOCK" too. Signed-off-by: David Woodhouse Signed-off-by: Babis Chalios --- drivers/ptp/ptp_vmclock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 662fbe93534c..dbe549cc4b04 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -739,6 +739,7 @@ static int vmclock_probe(struct platform_device *pdev) } static const struct acpi_device_id vmclock_acpi_ids[] = { + { "AMZNC10C", 0 }, { "VMCLOCK", 0 }, {} }; -- 2.52.0 ================================================ FILE: resources/patches/vmclock/6.1/0006-ptp-ptp_vmclock-remove-dependency-on-CONFIG_ACPI.patch ================================================ From 1cb36e019ef80058db243c7a02696e17429bd0b1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 21 Jan 2026 14:33:43 +0000 Subject: [PATCH 6/7] ptp: ptp_vmclock: remove dependency on CONFIG_ACPI Now that we added device tree support we can remove dependency on CONFIG_ACPI. Signed-off-by: David Woodhouse Signed-off-by: Babis Chalios --- drivers/ptp/Kconfig | 26 +++++++++++++++----------- drivers/ptp/ptp_vmclock.c | 14 ++++++++++---- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index ebadd82c7a7d..e020045bac13 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -132,17 +132,21 @@ config PTP_1588_CLOCK_KVM will be called ptp_kvm. config PTP_1588_CLOCK_VMCLOCK - tristate "Virtual machine PTP clock" - depends on X86_TSC || ARM_ARCH_TIMER - depends on PTP_1588_CLOCK && ACPI && ARCH_SUPPORTS_INT128 - default y - help - This driver adds support for using a virtual precision clock - advertised by the hypervisor. This clock is only useful in virtual - machines where such a device is present. - - To compile this driver as a module, choose M here: the module - will be called ptp_vmclock. + tristate "Virtual machine PTP clock" + depends on X86_TSC || ARM_ARCH_TIMER + depends on PTP_1588_CLOCK && ARCH_SUPPORTS_INT128 + default PTP_1588_CLOCK_KVM + help + This driver adds support for using a virtual precision clock + advertised by the hypervisor. This clock is only useful in virtual + machines where such a device is present. + + Unlike the KVM virtual PTP clock, the VMCLOCK device offers support + for reliable timekeeping even across live migration. So this driver + is enabled by default whenever the KVM PTP clock is. + + To compile this driver as a module, choose M here: the module + will be called ptp_vmclock. config PTP_1588_CLOCK_IDT82P33 tristate "IDT 82P33xxx PTP clock" diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index dbe549cc4b04..2114d5fd760e 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -440,6 +440,7 @@ static const struct file_operations vmclock_miscdev_fops = { /* module operations */ +#if IS_ENABLED(CONFIG_ACPI) static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data) { struct vmclock_state *st = data; @@ -523,6 +524,7 @@ static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st) return 0; } +#endif /* CONFIG_ACPI */ static irqreturn_t vmclock_of_irq_handler(int __always_unused irq, void *_st) { @@ -566,11 +568,11 @@ static int vmclock_setup_notification(struct device *dev, if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT)) return 0; - if (has_acpi_companion(dev)) { +#if IS_ENABLED(CONFIG_ACPI) + if (has_acpi_companion(dev)) return vmclock_setup_acpi_notification(dev); - } else { - return vmclock_setup_of_notification(dev); - } +#endif + return vmclock_setup_of_notification(dev); } static void vmclock_remove(void *data) @@ -583,10 +585,12 @@ static void vmclock_remove(void *data) return; } +#if IS_ENABLED(CONFIG_ACPI) if (has_acpi_companion(dev)) acpi_remove_notify_handler(ACPI_COMPANION(dev)->handle, ACPI_DEVICE_NOTIFY, vmclock_acpi_notification_handler); +#endif if (st->ptp_clock) ptp_clock_unregister(st->ptp_clock); @@ -614,9 +618,11 @@ static int vmclock_probe(struct platform_device *pdev) if (!st) return -ENOMEM; +#if IS_ENABLED(CONFIG_ACPI) if (has_acpi_companion(dev)) ret = vmclock_probe_acpi(dev, st); else +#endif ret = vmclock_probe_dt(dev, st); if (ret) { -- 2.52.0 ================================================ FILE: resources/patches/vmclock/6.1/0007-ptp-ptp_vmclock-return-TAI-not-UTC.patch ================================================ From 726b41d6531d0e77fc20f6d7ea4b3178ade41e80 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 21 Jan 2026 14:33:44 +0000 Subject: [PATCH 7/7] ptp: ptp_vmclock: return TAI not UTC To output UTC would involve complex calculations about whether the time elapsed since the reference time has crossed the end of the month when a leap second takes effect. I've prototyped that, but it made me sad. Much better to report TAI, which is what PHCs should do anyway. And much much simpler. Signed-off-by: David Woodhouse Signed-off-by: Babis Chalios --- drivers/ptp/ptp_vmclock.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 2114d5fd760e..e0da9c5f5d00 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -80,13 +80,13 @@ static inline uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta, static inline bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec) { - if (likely(clk->time_type == VMCLOCK_TIME_UTC)) + if (clk->time_type == VMCLOCK_TIME_TAI) return true; - if (clk->time_type == VMCLOCK_TIME_TAI && - (clk->flags & VMCLOCK_FLAG_TAI_OFFSET_VALID)) { + if (clk->time_type == VMCLOCK_TIME_UTC && + (le64_to_cpu(clk->flags) & VMCLOCK_FLAG_TAI_OFFSET_VALID)) { if (sec) - *sec += clk->tai_offset_sec; + *sec -= (int16_t)le16_to_cpu(clk->tai_offset_sec); return true; } return false; @@ -321,6 +321,11 @@ static const struct ptp_clock_info ptp_vmclock_info = { .getcrosststamp = ptp_vmclock_getcrosststamp, }; +struct vmclock_file_state { + struct vmclock_state *st; + atomic_t seq; +}; + static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma) { struct vmclock_file_state *fst = fp->private_data; -- 2.52.0 ================================================ FILE: resources/rebuild.sh ================================================ #!/bin/bash # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # fail if we encounter an error, uninitialized variable or a pipe breaks set -eu -o pipefail PS4='+\t ' cd $(dirname $0) ARCH=$(uname -m) OUTPUT_DIR=$PWD/$ARCH GIT_ROOT_DIR=$(git rev-parse --show-toplevel) source "$GIT_ROOT_DIR/tools/functions" # Make sure we have all the needed tools function install_dependencies { apt update apt install -y bc flex bison gcc make libelf-dev libssl-dev squashfs-tools busybox-static tree cpio curl patch docker.io # Install Go version=$(curl -s https://go.dev/VERSION?m=text | head -n 1) case $ARCH in x86_64) archive="${version}.linux-amd64.tar.gz" ;; aarch64) archive="${version}.linux-arm64.tar.gz" ;; esac curl -LO http://go.dev/dl/${archive} tar -C /usr/local -xzf $archive export PATH=/usr/local/go/bin:$PATH go version rm $archive } function compile_and_install { local SRC=$1 local BIN="${SRC%.*}" if [[ $SRC == *.c ]]; then gcc -Wall -o $BIN $SRC elif [[ $SRC == *.go ]]; then pushd $SRC local MOD=$(basename $BIN) go mod init $MOD go mod tidy go build -o ../$MOD rm go.mod go.sum popd fi } # Build a rootfs function build_ci_rootfs { local IMAGE_NAME=$1 prepare_docker build_rootfs "$IMAGE_NAME" "$OUTPUT_DIR" "$PWD/overlay" "chroot.sh" } # https://wiki.gentoo.org/wiki/Custom_Initramfs#Busybox function build_initramfs { INITRAMFS_BUILD=initramfs mkdir -p $INITRAMFS_BUILD pushd $INITRAMFS_BUILD mkdir bin dev proc sys cp /bin/busybox bin/sh ln bin/sh bin/mount # Report guest boot time back to Firecracker via MMIO # See arch/src/lib.rs and the BootTimer device MAGIC_BOOT_ADDRESS=0xc0000000 if [ $ARCH = "aarch64" ]; then MAGIC_BOOT_ADDRESS=0x40000000 fi MAGIC_BOOT_VALUE=123 cat > init </dev/console exec 2>/dev/console echo Boot took $(cut -d' ' -f1 /proc/uptime) seconds echo ">>> Welcome to fcinitrd <<<" exec /bin/sh EOF chmod +x init find . -print0 |cpio --null -ov --format=newc -R 0:0 > $OUTPUT_DIR/initramfs.cpio popd rm -rf $INITRAMFS_BUILD } function clone_amazon_linux_repo { [ -d linux ] || git clone --no-checkout --filter=tree:0 https://github.com/amazonlinux/linux } # prints the git tag corresponding to the newest and best matching the provided kernel version $1 # this means that if a microvm kernel exists, the tag returned will be of the form # # microvm-kernel-$1..amzn2[023] # # otherwise choose the newest tag matching # # kernel-$1..amzn2[023] function get_tag { local KERNEL_VERSION=$1 # list all tags from newest to oldest (git --no-pager tag -l --sort=-creatordate | grep "microvm-kernel-$1\..*\.amzn2" \ || git --no-pager tag -l --sort=-creatordate | grep "kernel-$1\..*\.amzn2") | head -n1 } function build_al_kernel { local KERNEL_CFG=$1 # Extract the kernel version from the config file provided as parameter. local KERNEL_VERSION=$(echo $KERNEL_CFG | grep -Po "microvm-kernel-ci-$ARCH-\K(\d+\.\d+)") pushd linux # fails immediately after clone because nothing is checked out make distclean || true TAG=$(get_tag $KERNEL_VERSION) git checkout $TAG # Create a temporary branch where we can apply patches and then # easily discard them git checkout -B tmp-$TAG # Apply any patchset we have for our kernels for patchset in ../patches/*; do echo "Applying patchset ${patchset}/${KERNEL_VERSION}" git apply ${patchset}/${KERNEL_VERSION}/*.patch done arch=$(uname -m) if [ "$arch" = "x86_64" ]; then format="elf" target="vmlinux" binary_path="$target" elif [ "$arch" = "aarch64" ]; then format="pe" target="Image" binary_path="arch/arm64/boot/$target" else echo "FATAL: Unsupported architecture!" exit 1 fi # Concatenate all config files into one. olddefconfig will then resolve # as needed. Later values override earlier ones. cat "$@" >.config make olddefconfig make -j $(nproc) $target LATEST_VERSION=$(cat include/config/kernel.release) flavour=$(basename $KERNEL_CFG .config |grep -Po "\d+\.\d+\K(-.*)" || true) # Strip off everything after the last number - sometimes AL kernels have some stuff there. # e.g. vmlinux-4.14.348-openela -> vmlinux-4.14.348 normalized_version=$(echo "$LATEST_VERSION" | sed -E "s/(.*[[:digit:]]).*/\1/g") OUTPUT_FILE=$OUTPUT_DIR/vmlinux-$normalized_version$flavour cp -v $binary_path $OUTPUT_FILE cp -v .config $OUTPUT_FILE.config # Undo any patches previously applied, so that we can build the same kernel with different # configs, e.g. no-acpi git reset --hard HEAD git clean -f -d git checkout - popd &>/dev/null } function prepare_and_build_rootfs { BIN_DIR=overlay/usr/local/bin SRCS=(init.c fillmem.c fast_page_fault_helper.c readmem.c go_sdk_cred_provider.go go_sdk_cred_provider_with_custom_endpoint.go) if [ $ARCH == "aarch64" ]; then SRCS+=(devmemread.c) fi for SRC in ${SRCS[@]}; do compile_and_install $BIN_DIR/$SRC done build_ci_rootfs ubuntu:24.04 build_initramfs for SRC in ${SRCS[@]}; do BIN="${SRC%.*}" rm $BIN_DIR/$BIN done } function vmlinux_split_debuginfo { VMLINUX="$1" DEBUGINFO="$VMLINUX.debug" VMLINUX_ORIG="$VMLINUX" if [ $ARCH = "aarch64" ]; then # in aarch64, the debug info is in vmlinux VMLINUX_ORIG=linux/vmlinux fi objcopy --only-keep-debug $VMLINUX_ORIG $DEBUGINFO objcopy --preserve-dates --strip-debug --add-gnu-debuglink=$DEBUGINFO $VMLINUX # gdb does not support compressed files, but compress them because they are huge gzip -v $DEBUGINFO } function build_al_kernels { if [[ $# = 0 ]]; then local KERNEL_VERSION="all" elif [[ $# -ne 1 ]]; then die "Too many arguments in '$(basename $0) kernels' command. Please use \`$0 help\` for help." else KERNEL_VERSION=$1 if [[ "$KERNEL_VERSION" != @(5.10|5.10-no-acpi|6.1) ]]; then die "Unsupported kernel version: '$KERNEL_VERSION'. Please use \`$0 help\` for help." fi fi clone_amazon_linux_repo CI_CONFIG="$PWD/guest_configs/ci.config" PCIE_CONFIG="$PWD/guest_configs/pcie.config" PMEM_CONFIG="$PWD/guest_configs/virtio-pmem.config" MEM_CONFIG="$PWD/guest_configs/virtio-mem.config" VMCLOCK_CONFIG="$PWD/guest_configs/vmclock.config" if [[ "$KERNEL_VERSION" == @(all|5.10) ]]; then build_al_kernel $PWD/guest_configs/microvm-kernel-ci-$ARCH-5.10.config "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" "$VMCLOCK_CONFIG" fi if [[ $ARCH == "x86_64" && "$KERNEL_VERSION" == @(all|5.10-no-acpi) ]]; then build_al_kernel $PWD/guest_configs/microvm-kernel-ci-$ARCH-5.10-no-acpi.config "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" "$VMCLOCK_CONFIG" fi if [[ "$KERNEL_VERSION" == @(all|6.1) ]]; then build_al_kernel $PWD/guest_configs/microvm-kernel-ci-$ARCH-6.1.config "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" "$VMCLOCK_CONFIG" fi # Build debug kernels FTRACE_CONFIG="$PWD/guest_configs/ftrace.config" DEBUG_CONFIG="$PWD/guest_configs/debug.config" OUTPUT_DIR=$OUTPUT_DIR/debug mkdir -pv $OUTPUT_DIR if [[ "$KERNEL_VERSION" == @(all|5.10) ]]; then build_al_kernel "$PWD/guest_configs/microvm-kernel-ci-$ARCH-5.10.config" "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" "$FTRACE_CONFIG" "$DEBUG_CONFIG" "$VMCLOCK_CONFIG" vmlinux_split_debuginfo $OUTPUT_DIR/vmlinux-5.10.* fi if [[ "$KERNEL_VERSION" == @(all|6.1) ]]; then build_al_kernel "$PWD/guest_configs/microvm-kernel-ci-$ARCH-6.1.config" "$CI_CONFIG" "$PCIE_CONFIG" "$PMEM_CONFIG" "$MEM_CONFIG" "$FTRACE_CONFIG" "$DEBUG_CONFIG" "$VMCLOCK_CONFIG" vmlinux_split_debuginfo $OUTPUT_DIR/vmlinux-6.1.* fi } function print_help { cat <] [] Available commands: all (default) Build CI rootfs and default guest kernels using configurations from resources/guest_configs. This will patch the guest configurations with all the patches under resources/guest_configs/patches. This is the default command, if no command is chosen. rootfs Builds only the CI rootfs. kernels [version] Builds our the currently supported CI kernels. version: Optionally choose a kernel version to build. Supported versions are: 5.10, 5.10-no-acpi or 6.1. help Displays the help message and exits. EOF } function main { if [[ $# = 0 ]]; then local MODE="all" else case $1 in all|rootfs|kernels) local MODE=$1 shift ;; help) print_help exit 0 ;; *) die "Unknown command: '$1'. Please use \`$0 help\` for help." esac fi set -x install_dependencies # Create the directory in which we will store the kernels and rootfs mkdir -pv $OUTPUT_DIR if [[ "$MODE" =~ (all|rootfs) ]]; then say "Building rootfs" prepare_and_build_rootfs fi if [[ "$MODE" =~ (all|kernels) ]]; then say "Building CI kernels" build_al_kernels "$@" fi tree -h $OUTPUT_DIR } main "$@" ================================================ FILE: resources/seccomp/aarch64-unknown-linux-musl.json ================================================ { "vmm": { "default_action": "trap", "filter_action": "allow", "filter": [ { "syscall": "newfstatat", "comment": "Used when creating snapshots in vmm:persist::snapshot_memory_to_file through std::fs::File::metadata" }, { "syscall": "epoll_ctl" }, { "syscall": "epoll_pwait" }, { "syscall": "exit" }, { "syscall": "exit_group" }, { "syscall": "openat" }, { "syscall": "read" }, { "syscall": "write" }, { "syscall": "mincore" }, { "syscall": "writev", "comment": "Used by the VirtIO net device to write to tap" }, { "syscall": "readv", "comment": "Used by the VirtIO net device to read from tap" }, { "syscall": "fsync" }, { "syscall": "close" }, { "syscall": "eventfd2", "comment": "Used for creating io_uring completion event, on drive patch" }, { "syscall": "io_uring_enter", "comment": "Used for submitting io_uring requests" }, { "syscall": "io_uring_setup", "comment": "Used on drive patch" }, { "syscall": "io_uring_register", "comment": "Used on drive patch" }, { "syscall": "brk", "comment": "Called for expanding the heap" }, { "syscall": "gettid", "comment": "Rust std uses it during panic to print the thread id." }, { "syscall": "clock_gettime", "comment": "Used for metrics and logging, via the helpers in utils/src/time.rs. It's not called on some platforms, because of vdso optimisations." }, { "syscall": "connect", "comment": "Needed for vsock" }, { "syscall": "fstat", "comment": "Used for drive patching & rescanning, for reading the local timezone from /etc/localtime" }, { "syscall": "ftruncate", "comment": "Used for snapshotting" }, { "syscall": "lseek", "comment": "Used by the block device" }, { "syscall": "mremap", "comment": "Used for re-allocating large memory regions, for example vectors" }, { "syscall": "munmap", "comment": "Used for freeing memory" }, { "syscall": "recvfrom", "comment": "Used by vsock to retrieve data from the socket" }, { "syscall": "rt_sigprocmask", "comment": "rt_sigprocmask is used by libc::abort during a panic to block and unblock signals" }, { "syscall": "rt_sigreturn", "comment": "rt_sigreturn is needed in case a fault does occur, so that the signal handler can return. Otherwise we get stuck in a fault loop." }, { "syscall": "sigaltstack", "comment": "sigaltstack is used by Rust stdlib to remove alternative signal stack during thread teardown." }, { "syscall": "getrandom", "comment": "getrandom is used by aws-lc library which we consume in virtio-rng" }, { "syscall": "accept4", "comment": "Called to accept vsock connections", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 524288, "comment": "libc::SOCK_CLOEXEC" } ] }, { "syscall": "fcntl", "comment": "Used by snapshotting, drive patching and rescanning", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2, "comment": "FCNTL_F_SETFD" }, { "index": 2, "type": "dword", "op": "eq", "val": 1, "comment": "FCNTL_FD_CLOEXEC" } ] }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown when joining multiple vcpu threads at once)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 0, "comment": "FUTEX_WAIT" } ] }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1, "comment": "FUTEX_WAKE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 128, "comment": "FUTEX_WAIT_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 137, "comment": "FUTEX_WAIT_BITSET_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 129, "comment": "FUTEX_WAKE_PRIVATE" } ] }, { "syscall": "madvise", "comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms." }, { "syscall": "msync", "comment": "Used by the VirtIO pmem device to sync the file content with the backing file.", "args": [ { "index": 2, "type": "dword", "op": "eq", "val": 4, "comment": "libc::MS_SYNC" } ] }, { "syscall": "mmap", "comment": "Used by the VirtIO balloon device", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 50, "comment": "libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE" } ] }, { "syscall": "mmap", "comment": "Used for reading the timezone in LocalTime::now()", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 1, "comment": "libc::MAP_SHARED" } ] }, { "syscall": "mmap", "comment": "Used by rust's stdlib, particularly when creating a diff snapshot of a VM with ~16 GB of memory", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 34, "comment": "libc::MAP_ANONYMOUS | libc::MAP_PRIVATE" } ] }, { "syscall": "mmap", "comment": "Used by io_uring for mapping the queues", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 32769, "comment": "libc::MAP_SHARED | libc::MAP_POPULATE" } ] }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "socket", "comment": "Called to open the vsock UDS", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 1, "comment": "libc::AF_UNIX" }, { "index": 1, "type": "dword", "op": "eq", "val": 524289, "comment": "libc::SOCK_STREAM | libc::SOCK_CLOEXEC" }, { "index": 2, "type": "dword", "op": "eq", "val": 0 } ] }, { "syscall": "sendto", "comment": "Rust std uses it to write to unix socket" }, { "syscall": "tkill", "comment": "tkill is used by libc::abort during a panic to raise SIGABRT", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "tkill", "comment": "Used to kick vcpus", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 35, "comment": "sigrtmin() + vcpu::VCPU_RTSIG_OFFSET" } ] }, { "syscall": "timerfd_settime", "comment": "Needed for rate limiting and metrics", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 0 } ] }, { "syscall": "ioctl", "comment": "Used to make vsock UDS nonblocking", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 21537, "comment": "FIONBIO" } ] }, { "syscall": "ioctl", "comment": "Triggered on shutdown, to restore the initial terminal settings.", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 21523, "comment": "TIOCGWINSZ" } ] }, { "syscall": "ioctl", "comment": "Triggered on shutdown, to restore the initial terminal settings, only when Firecracker was launched from a shell.", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 21505, "comment": "TCGETS" } ] }, { "syscall": "ioctl", "comment": "Triggered on shutdown, to restore the initial terminal settings, only when Firecracker was launched from a shell.", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 21506, "comment": "TCSETS" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1074835010, "comment": "KVM_GET_DIRTY_LOG" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1075359457, "comment": "KVM_SET_DEVICE_ATTR" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1075359458, "comment": "KVM_GET_DEVICE_ATTR" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1075883590, "comment": "KVM_SET_USER_MEMORY_REGION, used to (un)plug memory for the virtio-mem device" } ] }, { "syscall": "sched_yield", "comment": "Used by the rust standard library in std::sync::mpmc. Firecracker uses mpsc channels from this module for inter-thread communication" }, { "syscall": "sendmsg", "comment": "Used by vhost-user frontend to communicate with the backend" }, { "syscall": "recvmsg", "comment": "Used by vhost-user frontend to read response from the backend" }, { "syscall": "restart_syscall", "comment": "automatically issued by the kernel when specific timing-related syscalls (e.g. nanosleep) get interrupted by SIGSTOP" }, { "syscall": "mprotect", "comment": "Used by memory hotplug to protect access to underlying host memory" } ] }, "api": { "default_action": "trap", "filter_action": "allow", "filter": [ { "syscall": "epoll_ctl" }, { "syscall": "epoll_pwait" }, { "syscall": "exit" }, { "syscall": "exit_group" }, { "syscall": "openat" }, { "syscall": "read" }, { "syscall": "write" }, { "syscall": "close" }, { "syscall": "brk", "comment": "Called for expanding the heap" }, { "syscall": "gettid", "comment": "Rust std uses it during panic to print the thread id." }, { "syscall": "clock_gettime", "comment": "Used for metrics and logging, via the helpers in utils/src/time.rs. It's not called on some platforms, because of vdso optimisations." }, { "syscall": "fstat", "comment": "Used for reading the local timezone from /etc/localtime" }, { "syscall": "mremap", "comment": "Used for re-allocating large memory regions, for example vectors" }, { "syscall": "munmap", "comment": "Used for freeing memory" }, { "syscall": "recvfrom", "comment": "Used to retrieve data from the socket" }, { "syscall": "recvmsg", "comment": "Needed by micro-http to read from the byte stream." }, { "syscall": "rt_sigprocmask", "comment": "rt_sigprocmask is used by Rust stdlib to remove custom signal handler during thread teardown." }, { "syscall": "sigaltstack", "comment": "sigaltstack is used by Rust stdlib to remove alternative signal stack during thread teardown." }, { "syscall": "getrandom", "comment": "getrandom is used by `HttpServer` to reinialize `HashMap` after moving to the API thread" }, { "syscall": "accept4", "comment": "Called to accept socket connections", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 524288, "comment": "libc::SOCK_CLOEXEC" } ] }, { "syscall": "fcntl", "comment": "Used by MMDS version 2 to extract entropy", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2, "comment": "FCNTL_F_SETFD" } ] }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 0, "comment": "FUTEX_WAIT" } ] }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1, "comment": "FUTEX_WAKE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 128, "comment": "FUTEX_WAIT_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 137, "comment": "FUTEX_WAIT_BITSET_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 129, "comment": "FUTEX_WAKE_PRIVATE" } ] }, { "syscall": "madvise", "comment": "Triggered by musl for some customer workloads", "args": [ { "index": 2, "type": "dword", "op": "eq", "val": 4, "comment": "libc::MADV_DONTNEED" } ] }, { "syscall": "mmap", "comment": "Used for reading the timezone in LocalTime::now()", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 1, "comment": "libc::MAP_SHARED" } ] }, { "syscall": "mmap", "comment": "Used for large buffers sent to api_server", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 34, "comment": "libc::MAP_ANONYMOUS | libc::MAP_PRIVATE" } ] }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "socket", "comment": "Called to open the unix domain socket", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 1, "comment": "libc::AF_UNIX" }, { "index": 1, "type": "dword", "op": "eq", "val": 524289, "comment": "libc::SOCK_STREAM | libc::SOCK_CLOEXEC" }, { "index": 2, "type": "dword", "op": "eq", "val": 0 } ] }, { "syscall": "sendto", "comment": "Rust std uses it to write to unix socket" }, { "syscall": "tkill", "comment": "tkill is used by libc::abort during a panic to raise SIGABRT", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "ioctl", "comment": "Used to make api socket nonblocking", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 21537, "comment": "FIONBIO" } ] }, { "syscall": "sched_yield", "comment": "Used by the rust standard library in std::sync::mpmc. Firecracker uses mpsc channels from this module for inter-thread communication" }, { "syscall": "restart_syscall", "comment": "automatically issued by the kernel when specific timing-related syscalls (e.g. nanosleep) get interrupted by SIGSTOP" } ] }, "vcpu": { "default_action": "trap", "filter_action": "allow", "filter": [ { "syscall": "exit" }, { "syscall": "exit_group" }, { "syscall": "write" }, { "syscall": "openat" }, { "syscall": "close" }, { "syscall": "fstat", "comment": "Used for reading the local timezone from /etc/localtime" }, { "syscall": "brk", "comment": "Called for expanding the heap" }, { "syscall": "gettid", "comment": "Rust std uses it during panic to print the thread id." }, { "syscall": "clock_gettime", "comment": "Used for metrics and logging, via the helpers in utils/src/time.rs. It's not called on some platforms, because of vdso optimisations." }, { "syscall": "mremap", "comment": "Used for re-allocating large memory regions, for example vectors" }, { "syscall": "munmap", "comment": "Used for freeing memory" }, { "syscall": "rt_sigprocmask", "comment": "rt_sigprocmask is used by Rust stdlib to remove custom signal handler during thread teardown." }, { "syscall": "rt_sigreturn", "comment": "rt_sigreturn is needed in case a fault does occur, so that the signal handler can return. Otherwise we get stuck in a fault loop." }, { "syscall": "sigaltstack", "comment": "sigaltstack is used by Rust stdlib to remove alternative signal stack during thread teardown." }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown when joining multiple vcpu threads at once)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 0, "comment": "FUTEX_WAIT" } ] }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1, "comment": "FUTEX_WAKE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 128, "comment": "FUTEX_WAIT_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 137, "comment": "FUTEX_WAIT_BITSET_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 129, "comment": "FUTEX_WAKE_PRIVATE" } ] }, { "syscall": "madvise", "comment": "Triggered by musl for some customer workloads", "args": [ { "index": 2, "type": "dword", "op": "eq", "val": 4, "comment": "libc::MADV_DONTNEED" } ] }, { "syscall": "mmap", "comment": "Used for reading the timezone in LocalTime::now()", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 1, "comment": "libc::MAP_SHARED" } ] }, { "syscall": "mmap", "comment": "Used for allocating memory for FamStructWrapper called by KvmCpu::get_cpuid", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 34, "comment": "libc::MAP_ANONYMOUS|libc::MAP_PRIVATE" }, { "index": 2, "type": "dword", "op": "eq", "val": 3, "comment": "libc::PROT_READ|libc::PROT_WRITE" } ] }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "timerfd_settime", "comment": "Needed for updating the balloon statistics interval", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 0 } ] }, { "syscall": "tkill", "comment": "tkill is used by libc::abort during a panic to raise SIGABRT", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 44672, "comment": "KVM_RUN" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2147790488, "comment": "KVM_GET_MP_STATE" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1074835115, "comment": "KVM_GET_ONE_REG" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 3221794480, "comment": "KVM_GET_REG_LIST" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1074025680, "comment": "TUNSETOFFLOAD" } ] }, { "syscall": "sched_yield", "comment": "Used by the rust standard library in std::sync::mpmc. Firecracker uses mpsc channels from this module for inter-thread communication" }, { "syscall": "sendmsg", "comment": "Used by vhost-user frontend to communicate with the backend" }, { "syscall": "restart_syscall", "comment": "automatically issued by the kernel when specific timing-related syscalls (e.g. nanosleep) get interrupted by SIGSTOP" }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 44547, "comment": "KVM_CHECK_EXTENSION" }, { "index": 2, "type": "dword", "op": "eq", "val": 131, "comment": "KVM_CAP_MSI_DEVID" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1074310762, "comment": "KVM_SET_GSI_ROUTING" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1075883638, "comment": "KVM_IRQFD" } ] } ] } } ================================================ FILE: resources/seccomp/unimplemented.json ================================================ { "vmm": { "default_action": "allow", "filter_action": "trap", "filter": [] }, "api": { "default_action": "allow", "filter_action": "trap", "filter": [] }, "vcpu": { "default_action": "allow", "filter_action": "trap", "filter": [] } } ================================================ FILE: resources/seccomp/x86_64-unknown-linux-musl.json ================================================ { "vmm": { "default_action": "trap", "filter_action": "allow", "filter": [ { "syscall": "stat", "comment": "Used when creating snapshots in vmm:persist::snapshot_memory_to_file through std::fs::File::metadata" }, { "syscall": "epoll_ctl" }, { "syscall": "epoll_pwait" }, { "syscall": "exit" }, { "syscall": "exit_group" }, { "syscall": "open" }, { "syscall": "read" }, { "syscall": "write" }, { "syscall": "mincore" }, { "syscall": "writev", "comment": "Used by the VirtIO net device to write to tap" }, { "syscall": "readv", "comment": "Used by the VirtIO net device to read from tap" }, { "syscall": "fsync" }, { "syscall": "close" }, { "syscall": "eventfd2", "comment": "Used for creating io_uring completion event, on drive patch" }, { "syscall": "io_uring_enter", "comment": "Used for submitting io_uring requests" }, { "syscall": "io_uring_setup", "comment": "Used on drive patch" }, { "syscall": "io_uring_register", "comment": "Used on drive patch" }, { "syscall": "brk", "comment": "Called for expanding the heap" }, { "syscall": "gettid", "comment": "Rust std uses it during panic to print the thread id." }, { "syscall": "clock_gettime", "comment": "Used for metrics and logging, via the helpers in utils/src/time.rs. It's not called on some platforms, because of vdso optimisations." }, { "syscall": "connect", "comment": "Needed for vsock" }, { "syscall": "fstat", "comment": "Used for drive patching & rescanning, for reading the local timezone from /etc/localtime" }, { "syscall": "ftruncate", "comment": "Used for snapshotting" }, { "syscall": "lseek", "comment": "Used by the block device" }, { "syscall": "mremap", "comment": "Used for re-allocating large memory regions, for example vectors" }, { "syscall": "munmap", "comment": "Used for freeing memory" }, { "syscall": "recvfrom", "comment": "Used by vsock to retrieve data from the socket" }, { "syscall": "rt_sigprocmask", "comment": "rt_sigprocmask is used by libc::abort during a panic to block and unblock signals" }, { "syscall": "rt_sigreturn", "comment": "rt_sigreturn is needed in case a fault does occur, so that the signal handler can return. Otherwise we get stuck in a fault loop." }, { "syscall": "sigaltstack", "comment": "sigaltstack is used by Rust stdlib to remove alternative signal stack during thread teardown." }, { "syscall": "getrandom", "comment": "getrandom is used by aws-lc library which we consume in virtio-rng" }, { "syscall": "accept4", "comment": "Called to accept vsock connections", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 524288, "comment": "libc::SOCK_CLOEXEC" } ] }, { "syscall": "fcntl", "comment": "Used by snapshotting, drive patching and rescanning", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2, "comment": "FCNTL_F_SETFD" }, { "index": 2, "type": "dword", "op": "eq", "val": 1, "comment": "FCNTL_FD_CLOEXEC" } ] }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown when joining multiple vcpu threads at once)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 0, "comment": "FUTEX_WAIT" } ] }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1, "comment": "FUTEX_WAKE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 128, "comment": "FUTEX_WAIT_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 137, "comment": "FUTEX_WAIT_BITSET_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 129, "comment": "FUTEX_WAKE_PRIVATE" } ] }, { "syscall": "madvise", "comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms." }, { "syscall": "msync", "comment": "Used by the VirtIO pmem device to sync the file content with the backing file.", "args": [ { "index": 2, "type": "dword", "op": "eq", "val": 4, "comment": "libc::MS_SYNC" } ] }, { "syscall": "mmap", "comment": "Used by the VirtIO balloon device", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 50, "comment": "libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE" } ] }, { "syscall": "mmap", "comment": "Used for reading the timezone in LocalTime::now()", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 1, "comment": "libc::MAP_SHARED" } ] }, { "syscall": "mmap", "comment": "Used by rust's stdlib, particularly when creating a diff snapshot of a VM with ~16 GB of memory", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 34, "comment": "libc::MAP_ANONYMOUS | libc::MAP_PRIVATE" } ] }, { "syscall": "mmap", "comment": "Used by io_uring for mapping the queues", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 32769, "comment": "libc::MAP_SHARED | libc::MAP_POPULATE" } ] }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "socket", "comment": "Called to open the vsock UDS", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 1, "comment": "libc::AF_UNIX" }, { "index": 1, "type": "dword", "op": "eq", "val": 524289, "comment": "libc::SOCK_STREAM | libc::SOCK_CLOEXEC" }, { "index": 2, "type": "dword", "op": "eq", "val": 0 } ] }, { "syscall": "sendto", "comment": "Rust std uses it to write to unix socket" }, { "syscall": "tkill", "comment": "tkill is used by libc::abort during a panic to raise SIGABRT", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "tkill", "comment": "Used to kick vcpus", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 35, "comment": "sigrtmin() + vcpu::VCPU_RTSIG_OFFSET" } ] }, { "syscall": "timerfd_settime", "comment": "Needed for rate limiting and metrics", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 0 } ] }, { "syscall": "ioctl", "comment": "Used to make vsock UDS nonblocking", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 21537, "comment": "FIONBIO" } ] }, { "syscall": "ioctl", "comment": "Triggered on shutdown, to restore the initial terminal settings.", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 21523, "comment": "TIOCGWINSZ" } ] }, { "syscall": "ioctl", "comment": "Triggered on shutdown, to restore the initial terminal settings, only when Firecracker was launched from a shell.", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 21505, "comment": "TCGETS" } ] }, { "syscall": "ioctl", "comment": "Triggered on shutdown, to restore the initial terminal settings, only when Firecracker was launched from a shell.", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 21506, "comment": "TCSETS" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1074835010, "comment": "KVM_GET_DIRTY_LOG" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 3255348834, "comment": "KVM_GET_IRQCHIP" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2150674044, "comment": "KVM_GET_CLOCK" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2154868383, "comment": "KVM_GET_PIT2" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1075883590, "comment": "KVM_SET_USER_MEMORY_REGION, used to (un)plug memory for the virtio-mem device" } ] }, { "syscall": "sched_yield", "comment": "Used by the rust standard library in std::sync::mpmc. Firecracker uses mpsc channels from this module for inter-thread communication" }, { "syscall": "sendmsg", "comment": "Used by vhost-user frontend to communicate with the backend" }, { "syscall": "recvmsg", "comment": "Used by vhost-user frontend to read response from the backend" }, { "syscall": "restart_syscall", "comment": "automatically issued by the kernel when specific timing-related syscalls (e.g. nanosleep) get interrupted by SIGSTOP" }, { "syscall": "mprotect", "comment": "Used by memory hotplug to protect access to underlying host memory" } ] }, "api": { "default_action": "trap", "filter_action": "allow", "filter": [ { "syscall": "epoll_ctl" }, { "syscall": "epoll_pwait" }, { "syscall": "exit" }, { "syscall": "exit_group" }, { "syscall": "open" }, { "syscall": "read" }, { "syscall": "write" }, { "syscall": "close" }, { "syscall": "brk", "comment": "Called for expanding the heap" }, { "syscall": "gettid", "comment": "Rust std uses it during panic to print the thread id." }, { "syscall": "clock_gettime", "comment": "Used for metrics and logging, via the helpers in utils/src/time.rs. It's not called on some platforms, because of vdso optimisations." }, { "syscall": "fstat", "comment": "Used for reading the local timezone from /etc/localtime" }, { "syscall": "mremap", "comment": "Used for re-allocating large memory regions, for example vectors" }, { "syscall": "munmap", "comment": "Used for freeing memory" }, { "syscall": "recvfrom", "comment": "Used to retrieve data from the socket" }, { "syscall": "recvmsg", "comment": "Needed by micro-http to read from the byte stream." }, { "syscall": "rt_sigprocmask", "comment": "rt_sigprocmask is used by Rust stdlib to remove custom signal handler during thread teardown." }, { "syscall": "sigaltstack", "comment": "sigaltstack is used by Rust stdlib to remove alternative signal stack during thread teardown." }, { "syscall": "getrandom", "comment": "getrandom is used by `HttpServer` to reinialize `HashMap` after moving to the API thread" }, { "syscall": "accept4", "comment": "Called to accept socket connections", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 524288, "comment": "libc::SOCK_CLOEXEC" } ] }, { "syscall": "fcntl", "comment": "Used by MMDS version 2 to extract entropy", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2, "comment": "FCNTL_F_SETFD" } ] }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 0, "comment": "FUTEX_WAIT" } ] }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1, "comment": "FUTEX_WAKE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 128, "comment": "FUTEX_WAIT_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 137, "comment": "FUTEX_WAIT_BITSET_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 129, "comment": "FUTEX_WAKE_PRIVATE" } ] }, { "syscall": "madvise", "comment": "Triggered by musl for some customer workloads", "args": [ { "index": 2, "type": "dword", "op": "eq", "val": 4, "comment": "libc::MADV_DONTNEED" } ] }, { "syscall": "mmap", "comment": "Used for large buffers sent to api_server", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 34, "comment": "libc::MAP_ANONYMOUS | libc::MAP_PRIVATE" } ] }, { "syscall": "mmap", "comment": "Used for reading the timezone in LocalTime::now()", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 1, "comment": "libc::MAP_SHARED" } ] }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "socket", "comment": "Called to open the unix domain socket", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 1, "comment": "libc::AF_UNIX" }, { "index": 1, "type": "dword", "op": "eq", "val": 524289, "comment": "libc::SOCK_STREAM | libc::SOCK_CLOEXEC" }, { "index": 2, "type": "dword", "op": "eq", "val": 0 } ] }, { "syscall": "sendto", "comment": "Rust std uses it to write to unix socket" }, { "syscall": "tkill", "comment": "tkill is used by libc::abort during a panic to raise SIGABRT", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "ioctl", "comment": "Used to make api socket nonblocking", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 21537, "comment": "FIONBIO" } ] }, { "syscall": "sched_yield", "comment": "Used by the rust standard library in std::sync::mpmc. Firecracker uses mpsc channels from this module for inter-thread communication" }, { "syscall": "restart_syscall", "comment": "automatically issued by the kernel when specific timing-related syscalls (e.g. nanosleep) get interrupted by SIGSTOP" } ] }, "vcpu": { "default_action": "trap", "filter_action": "allow", "filter": [ { "syscall": "exit" }, { "syscall": "exit_group" }, { "syscall": "write" }, { "syscall": "open" }, { "syscall": "close" }, { "syscall": "fstat", "comment": "Used for reading the local timezone from /etc/localtime" }, { "syscall": "brk", "comment": "Called for expanding the heap" }, { "syscall": "gettid", "comment": "Rust std uses it during panic to print the thread id." }, { "syscall": "clock_gettime", "comment": "Used for metrics and logging, via the helpers in utils/src/time.rs. It's not called on some platforms, because of vdso optimisations." }, { "syscall": "mremap", "comment": "Used for re-allocating large memory regions, for example vectors" }, { "syscall": "munmap", "comment": "Used for freeing memory" }, { "syscall": "rt_sigprocmask", "comment": "rt_sigprocmask is used by Rust stdlib to remove custom signal handler during thread teardown." }, { "syscall": "rt_sigreturn", "comment": "rt_sigreturn is needed in case a fault does occur, so that the signal handler can return. Otherwise we get stuck in a fault loop." }, { "syscall": "sigaltstack", "comment": "sigaltstack is used by Rust stdlib to remove alternative signal stack during thread teardown." }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown when joining multiple vcpu threads at once)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 0, "comment": "FUTEX_WAIT" } ] }, { "syscall": "futex", "comment": "Used for synchronization (during thread teardown)", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1, "comment": "FUTEX_WAKE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 128, "comment": "FUTEX_WAIT_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 137, "comment": "FUTEX_WAIT_BITSET_PRIVATE" } ] }, { "syscall": "futex", "comment": "Used for synchronization", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 129, "comment": "FUTEX_WAKE_PRIVATE" } ] }, { "syscall": "madvise", "comment": "Triggered by musl for some customer workloads", "args": [ { "index": 2, "type": "dword", "op": "eq", "val": 4, "comment": "libc::MADV_DONTNEED" } ] }, { "syscall": "mmap", "comment": "Used for reading the timezone in LocalTime::now()", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 1, "comment": "libc::MAP_SHARED" } ] }, { "syscall": "mmap", "comment": "Used for allocating memory for FamStructWrapper called by KvmCpu::get_cpuid", "args": [ { "index": 3, "type": "dword", "op": "eq", "val": 34, "comment": "libc::MAP_ANONYMOUS|libc::MAP_PRIVATE" }, { "index": 2, "type": "dword", "op": "eq", "val": 3, "comment": "libc::PROT_READ|libc::PROT_WRITE" } ] }, { "syscall": "rt_sigaction", "comment": "rt_sigaction is used by libc::abort during a panic to install the default handler for SIGABRT", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "timerfd_settime", "comment": "Needed for updating the balloon statistics interval", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 0 } ] }, { "syscall": "tkill", "comment": "tkill is used by libc::abort during a panic to raise SIGABRT", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 6, "comment": "SIGABRT" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 44672, "comment": "KVM_RUN" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2147790488, "comment": "KVM_GET_MP_STATE" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2151722655, "comment": "KVM_GET_VCPU_EVENTS" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2214637198, "comment": "KVM_GET_LAPIC" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2167975555, "comment": "KVM_GET_SREGS" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2156965505, "comment": "KVM_GET_REGS" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 3221794440, "comment": "KVM_GET_MSRS" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 3221794449, "comment": "KVM_GET_CPUID2" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2155916961, "comment": "KVM_GET_DEBUGREGS" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2415963812, "comment": "KVM_GET_XSAVE" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2415963855, "comment": "KVM_GET_XSAVE2" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 2173218470, "comment": "KVM_GET_XCRS" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 44707, "comment": "KVM_GET_TSC_KHZ" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1074025680, "comment": "TUNSETOFFLOAD" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 44717, "comment": "KVM_KVMCLOCK_CTRL. We call this after pausing vCPUs to avoid soft lockups in the guest." } ] }, { "syscall": "sched_yield", "comment": "Used by the rust standard library in std::sync::mpmc. Firecracker uses mpsc channels from this module for inter-thread communication" }, { "syscall": "sendmsg", "comment": "Used by vhost-user frontend to communicate with the backend" }, { "syscall": "restart_syscall", "comment": "automatically issued by the kernel when specific timing-related syscalls (e.g. nanosleep) get interrupted by SIGSTOP" }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 44547, "comment": "KVM_CHECK_EXTENSION" }, { "index": 2, "type": "dword", "op": "eq", "val": 131, "comment": "KVM_CAP_MSI_DEVID" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1074310762, "comment": "KVM_SET_GSI_ROUTING" } ] }, { "syscall": "ioctl", "args": [ { "index": 1, "type": "dword", "op": "eq", "val": 1075883638, "comment": "KVM_IRQFD" } ] } ] } } ================================================ FILE: rust-toolchain.toml ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # We keep a rust-toolchain file checked into the repository such that in the # rare event that we need to do an A/B-test across toolchains, cargo will # download the toolchains of the A and B revisions on the fly (if they do not # match the toolchain installed in the environment in which the test is # executed). This is needed for example if a toolchain upgrade introduces a new # syscall into our seccomp filters. Then, since our PR CI contains A/B-tests, # we will compile a version of Firecracker that does not have this syscall # allowlisted using a toolchain that requires it, causing the A/B-test to # always fail. [toolchain] channel = "1.93.0" targets = ["x86_64-unknown-linux-musl", "aarch64-unknown-linux-musl"] profile = "minimal" ================================================ FILE: rustfmt.toml ================================================ comment_width = 100 wrap_comments = true format_code_in_doc_comments = true format_strings = true imports_granularity = "Module" normalize_comments = true normalize_doc_attributes = true group_imports = "StdExternalCrate" ================================================ FILE: src/acpi-tables/Cargo.toml ================================================ [package] name = "acpi_tables" version = "0.1.0" authors = [ "The Cloud Hypervisor Authors", "Amazon Firecracker team ", ] edition = "2024" license = "Apache-2.0" [lib] bench = false # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] displaydoc = "0.2.5" thiserror = "2.0.18" vm-memory = { version = "0.17.1", features = ["backend-mmap", "backend-bitmap"] } zerocopy = { version = "0.8.42", features = ["derive"] } [lints] workspace = true ================================================ FILE: src/acpi-tables/src/aml.rs ================================================ // Copyright © 2019 Intel Corporation // Copyright © 2023 Rivos, Inc. // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // SPDX-License-Identifier: Apache-2.0 #![allow(missing_debug_implementations)] use std::marker::PhantomData; #[derive(Debug, Clone, thiserror::Error, displaydoc::Display)] pub enum AmlError { /// Aml Path is empty NameEmpty, /// Invalid name part length InvalidPartLength, /// Invalid address range AddressRange, } pub trait Aml { fn append_aml_bytes(&self, _v: &mut Vec) -> Result<(), AmlError>; fn to_aml_bytes(&self) -> Result, AmlError> { let mut v = Vec::new(); self.append_aml_bytes(&mut v)?; Ok(v) } } pub const ZERO: Zero = Zero {}; pub struct Zero {} impl Aml for Zero { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), AmlError> { v.push(0u8); Ok(()) } } pub const ONE: One = One {}; pub struct One {} impl Aml for One { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), AmlError> { v.push(1u8); Ok(()) } } pub const ONES: Ones = Ones {}; pub struct Ones {} impl Aml for Ones { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), AmlError> { v.push(0xffu8); Ok(()) } } pub struct Path { root: bool, name_parts: Vec<[u8; 4]>, } impl Aml for Path { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { if self.root { bytes.push(b'\\'); } match self.name_parts.len() { 0 => return Err(AmlError::NameEmpty), 1 => {} 2 => { bytes.push(0x2e); // DualNamePrefix } n => { bytes.push(0x2f); // MultiNamePrefix bytes.push(n.try_into().unwrap()); } }; for part in &self.name_parts { bytes.extend_from_slice(part); } Ok(()) } } impl Path { pub fn new(name: &str) -> Result { let root = name.starts_with('\\'); let offset = root.into(); let mut name_parts = Vec::new(); for part in name[offset..].split('.') { if part.len() != 4 { return Err(AmlError::InvalidPartLength); } let mut name_part = [0u8; 4]; name_part.copy_from_slice(part.as_bytes()); name_parts.push(name_part); } Ok(Path { root, name_parts }) } } impl TryFrom<&str> for Path { type Error = AmlError; fn try_from(s: &str) -> Result { Path::new(s) } } pub type Byte = u8; impl Aml for Byte { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x0a); // BytePrefix bytes.push(*self); Ok(()) } } pub type Word = u16; impl Aml for Word { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x0b); // WordPrefix bytes.extend_from_slice(&self.to_le_bytes()); Ok(()) } } pub type DWord = u32; impl Aml for DWord { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x0c); // DWordPrefix bytes.extend_from_slice(&self.to_le_bytes()); Ok(()) } } pub type QWord = u64; impl Aml for QWord { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x0e); // QWordPrefix bytes.extend_from_slice(&self.to_le_bytes()); Ok(()) } } pub struct Name { bytes: Vec, } impl Aml for Name { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { // TODO: Refactor this to make more efficient but there are // lifetime/ownership challenges. bytes.extend_from_slice(&self.bytes); Ok(()) } } impl Name { pub fn new(path: Path, inner: &dyn Aml) -> Result { let mut bytes = vec![0x08]; // NameOp path.append_aml_bytes(&mut bytes)?; inner.append_aml_bytes(&mut bytes)?; Ok(Name { bytes }) } } pub struct Package<'a> { children: Vec<&'a dyn Aml>, } impl Aml for Package<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { let mut tmp = vec![self.children.len().try_into().unwrap()]; for child in &self.children { child.append_aml_bytes(&mut tmp)?; } let pkg_length = create_pkg_length(&tmp, true); bytes.push(0x12); // PackageOp bytes.extend_from_slice(&pkg_length); bytes.extend_from_slice(&tmp); Ok(()) } } impl<'a> Package<'a> { pub fn new(children: Vec<&'a dyn Aml>) -> Self { Package { children } } } // From the ACPI spec for PkgLength: // // "The high 2 bits of the first byte reveal how many follow bytes are in the PkgLength. If the // PkgLength has only one byte, bit 0 through 5 are used to encode the package length (in other // words, values 0-63). If the package length value is more than 63, more than one byte must be // used for the encoding in which case bit 4 and 5 of the PkgLeadByte are reserved and must be zero. // If the multiple bytes encoding is used, bits 0-3 of the PkgLeadByte become the least significant // 4 bits of the resulting package length value. The next ByteData will become the next least // significant 8 bits of the resulting value and so on, up to 3 ByteData bytes. Thus, the maximum // package length is 2**28." // // Also used for NamedField but in that case the length is not included in itself fn create_pkg_length(data: &[u8], include_self: bool) -> Vec { let mut result = Vec::new(); // PkgLength is inclusive and includes the length bytes let length_length = if data.len() < (2usize.pow(6) - 1) { 1 } else if data.len() < (2usize.pow(12) - 2) { 2 } else if data.len() < (2usize.pow(20) - 3) { 3 } else { 4 }; let length = data.len() + if include_self { length_length } else { 0 }; match length_length { 1 => result.push(length.try_into().unwrap()), 2 => { result.push((1u8 << 6) | TryInto::::try_into(length & 0xf).unwrap()); result.push(TryInto::::try_into(length >> 4).unwrap()) } 3 => { result.push((2u8 << 6) | TryInto::::try_into(length & 0xf).unwrap()); result.push(((length >> 4) & 0xff).try_into().unwrap()); result.push(((length >> 12) & 0xff).try_into().unwrap()); } _ => { result.push((3u8 << 6) | TryInto::::try_into(length & 0xf).unwrap()); result.push(((length >> 4) & 0xff).try_into().unwrap()); result.push(((length >> 12) & 0xff).try_into().unwrap()); result.push(((length >> 20) & 0xff).try_into().unwrap()); } } result } pub struct EisaName { value: DWord, } impl EisaName { pub fn new(name: &str) -> Result { if name.len() != 7 { return Err(AmlError::InvalidPartLength); } let data = name.as_bytes(); let value: u32 = ((u32::from(data[0] - 0x40) << 26) | (u32::from(data[1] - 0x40) << 21) | (u32::from(data[2] - 0x40) << 16) | (name.chars().nth(3).unwrap().to_digit(16).unwrap() << 12) | (name.chars().nth(4).unwrap().to_digit(16).unwrap() << 8) | (name.chars().nth(5).unwrap().to_digit(16).unwrap() << 4) | name.chars().nth(6).unwrap().to_digit(16).unwrap()) .swap_bytes(); Ok(EisaName { value }) } } impl Aml for EisaName { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { self.value.append_aml_bytes(bytes) } } pub type Usize = usize; impl Aml for Usize { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { if *self <= u8::MAX.into() { TryInto::::try_into(*self) .unwrap() .append_aml_bytes(bytes) } else if *self <= u16::MAX.into() { TryInto::::try_into(*self) .unwrap() .append_aml_bytes(bytes) } else if *self <= u32::MAX as usize { TryInto::::try_into(*self) .unwrap() .append_aml_bytes(bytes) } else { TryInto::::try_into(*self) .unwrap() .append_aml_bytes(bytes) } } } fn append_aml_string(v: &str, bytes: &mut Vec) { bytes.push(0x0D); // String Op bytes.extend_from_slice(v.as_bytes()); bytes.push(0x0); // NullChar } pub type AmlStr = &'static str; impl Aml for AmlStr { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { append_aml_string(self, bytes); Ok(()) } } pub type AmlString = String; impl Aml for AmlString { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { append_aml_string(self, bytes); Ok(()) } } pub struct ResourceTemplate<'a> { children: Vec<&'a dyn Aml>, } impl Aml for ResourceTemplate<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { let mut tmp = Vec::new(); // Add buffer data for child in &self.children { child.append_aml_bytes(&mut tmp)?; } // Mark with end and mark checksum as as always valid tmp.push(0x79); // EndTag tmp.push(0); // zero checksum byte // Buffer length is an encoded integer including buffer data // and EndTag and checksum byte let mut buffer_length = tmp.len().to_aml_bytes()?; buffer_length.reverse(); for byte in buffer_length { tmp.insert(0, byte); } // PkgLength is everything else let pkg_length = create_pkg_length(&tmp, true); bytes.push(0x11); // BufferOp bytes.extend_from_slice(&pkg_length); bytes.extend_from_slice(&tmp); Ok(()) } } impl<'a> ResourceTemplate<'a> { pub fn new(children: Vec<&'a dyn Aml>) -> Self { ResourceTemplate { children } } } pub struct Memory32Fixed { read_write: bool, // true for read & write, false for read only base: u32, length: u32, } impl Memory32Fixed { pub fn new(read_write: bool, base: u32, length: u32) -> Self { Memory32Fixed { read_write, base, length, } } } impl Aml for Memory32Fixed { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x86); // Memory32Fixed bytes.extend_from_slice(&9u16.to_le_bytes()); // 9 bytes of payload bytes.push(self.read_write.into()); bytes.extend_from_slice(&self.base.to_le_bytes()); bytes.extend_from_slice(&self.length.to_le_bytes()); Ok(()) } } #[derive(Copy, Clone)] enum AddressSpaceType { Memory, Io, BusNumber, } #[derive(Copy, Clone)] pub enum AddressSpaceCacheable { NotCacheable, Cacheable, WriteCombining, PreFetchable, } pub struct AddressSpace { r#type: AddressSpaceType, min: T, max: T, type_flags: u8, } impl AddressSpace where T: PartialOrd, { pub fn new_memory( cacheable: AddressSpaceCacheable, read_write: bool, min: T, max: T, ) -> Result { if min > max { return Err(AmlError::AddressRange); } Ok(AddressSpace { r#type: AddressSpaceType::Memory, min, max, type_flags: ((cacheable as u8) << 1) | u8::from(read_write), }) } pub fn new_io(min: T, max: T) -> Result { if min > max { return Err(AmlError::AddressRange); } Ok(AddressSpace { r#type: AddressSpaceType::Io, min, max, type_flags: 3, // EntireRange }) } pub fn new_bus_number(min: T, max: T) -> Result { if min > max { return Err(AmlError::AddressRange); } Ok(AddressSpace { r#type: AddressSpaceType::BusNumber, min, max, type_flags: 0, }) } fn push_header(&self, bytes: &mut Vec, descriptor: u8, length: usize) { bytes.push(descriptor); // Word Address Space Descriptor bytes.extend_from_slice(&(TryInto::::try_into(length).unwrap()).to_le_bytes()); bytes.push(self.r#type as u8); // type let generic_flags = (1 << 2) /* Min Fixed */ | (1 << 3); // Max Fixed bytes.push(generic_flags); bytes.push(self.type_flags); } } impl Aml for AddressSpace { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { self.push_header( bytes, 0x88, // Word Address Space Descriptor 3 + 5 * std::mem::size_of::(), // 3 bytes of header + 5 u16 fields ); bytes.extend_from_slice(&0u16.to_le_bytes()); // Granularity bytes.extend_from_slice(&self.min.to_le_bytes()); // Min bytes.extend_from_slice(&self.max.to_le_bytes()); // Max bytes.extend_from_slice(&0u16.to_le_bytes()); // Translation let len = self.max - self.min + 1; bytes.extend_from_slice(&len.to_le_bytes()); // Length Ok(()) } } impl Aml for AddressSpace { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { self.push_header( bytes, 0x87, // DWord Address Space Descriptor 3 + 5 * std::mem::size_of::(), // 3 bytes of header + 5 u32 fields ); bytes.extend_from_slice(&0u32.to_le_bytes()); // Granularity bytes.extend_from_slice(&self.min.to_le_bytes()); // Min bytes.extend_from_slice(&self.max.to_le_bytes()); // Max bytes.extend_from_slice(&0u32.to_le_bytes()); // Translation let len = self.max - self.min + 1; bytes.extend_from_slice(&len.to_le_bytes()); // Length Ok(()) } } impl Aml for AddressSpace { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { self.push_header( bytes, 0x8A, // QWord Address Space Descriptor 3 + 5 * std::mem::size_of::(), // 3 bytes of header + 5 u64 fields ); bytes.extend_from_slice(&0u64.to_le_bytes()); // Granularity bytes.extend_from_slice(&self.min.to_le_bytes()); // Min bytes.extend_from_slice(&self.max.to_le_bytes()); // Max bytes.extend_from_slice(&0u64.to_le_bytes()); // Translation let len = self.max - self.min + 1; bytes.extend_from_slice(&len.to_le_bytes()); // Length Ok(()) } } pub struct Io { min: u16, max: u16, alignment: u8, length: u8, } impl Io { pub fn new(min: u16, max: u16, alignment: u8, length: u8) -> Self { Io { min, max, alignment, length, } } } impl Aml for Io { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x47); // Io Port Descriptor bytes.push(1); // IODecode16 bytes.extend_from_slice(&self.min.to_le_bytes()); bytes.extend_from_slice(&self.max.to_le_bytes()); bytes.push(self.alignment); bytes.push(self.length); Ok(()) } } pub struct Interrupt { consumer: bool, edge_triggered: bool, active_low: bool, shared: bool, number: u32, } impl Interrupt { pub fn new( consumer: bool, edge_triggered: bool, active_low: bool, shared: bool, number: u32, ) -> Self { Interrupt { consumer, edge_triggered, active_low, shared, number, } } } impl Aml for Interrupt { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x89); // Extended IRQ Descriptor bytes.extend_from_slice(&6u16.to_le_bytes()); let flags = (u8::from(self.shared) << 3) | (u8::from(self.active_low) << 2) | (u8::from(self.edge_triggered) << 1) | u8::from(self.consumer); bytes.push(flags); bytes.push(1u8); // count bytes.extend_from_slice(&self.number.to_le_bytes()); Ok(()) } } pub struct Device<'a> { path: Path, children: Vec<&'a dyn Aml>, } impl Aml for Device<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { let mut tmp = Vec::new(); self.path.append_aml_bytes(&mut tmp)?; for child in &self.children { child.append_aml_bytes(&mut tmp)?; } let pkg_length = create_pkg_length(&tmp, true); bytes.push(0x5b); // ExtOpPrefix bytes.push(0x82); // DeviceOp bytes.extend_from_slice(&pkg_length); bytes.extend_from_slice(&tmp); Ok(()) } } impl<'a> Device<'a> { pub fn new(path: Path, children: Vec<&'a dyn Aml>) -> Self { Device { path, children } } } pub struct Scope<'a> { path: Path, children: Vec<&'a dyn Aml>, } impl Aml for Scope<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { let mut tmp = Vec::new(); self.path.append_aml_bytes(&mut tmp)?; for child in &self.children { child.append_aml_bytes(&mut tmp)?; } let pkg_length = create_pkg_length(&tmp, true); bytes.push(0x10); // ScopeOp bytes.extend_from_slice(&pkg_length); bytes.extend_from_slice(&tmp); Ok(()) } } impl<'a> Scope<'a> { pub fn new(path: Path, children: Vec<&'a dyn Aml>) -> Self { Scope { path, children } } } pub struct Method<'a> { path: Path, children: Vec<&'a dyn Aml>, args: u8, serialized: bool, } impl<'a> Method<'a> { pub fn new(path: Path, args: u8, serialized: bool, children: Vec<&'a dyn Aml>) -> Self { Method { path, children, args, serialized, } } } impl Aml for Method<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { let mut tmp = Vec::new(); self.path.append_aml_bytes(&mut tmp)?; let flags: u8 = (self.args & 0x7) | (u8::from(self.serialized) << 3); tmp.push(flags); for child in &self.children { child.append_aml_bytes(&mut tmp)?; } let pkg_length = create_pkg_length(&tmp, true); bytes.push(0x14); // MethodOp bytes.extend_from_slice(&pkg_length); bytes.extend_from_slice(&tmp); Ok(()) } } pub struct Return<'a> { value: &'a dyn Aml, } impl<'a> Return<'a> { pub fn new(value: &'a dyn Aml) -> Self { Return { value } } } impl Aml for Return<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0xa4); // ReturnOp self.value.append_aml_bytes(bytes)?; Ok(()) } } #[derive(Clone, Copy)] pub enum FieldAccessType { Any, Byte, Word, DWord, QWord, Buffer, } #[derive(Clone, Copy)] pub enum FieldUpdateRule { Preserve = 0, WriteAsOnes = 1, WriteAsZeroes = 2, } pub enum FieldEntry { Named([u8; 4], usize), Reserved(usize), } pub struct Field { path: Path, fields: Vec, access_type: FieldAccessType, update_rule: FieldUpdateRule, } impl Field { pub fn new( path: Path, access_type: FieldAccessType, update_rule: FieldUpdateRule, fields: Vec, ) -> Self { Field { path, fields, access_type, update_rule, } } } impl Aml for Field { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { let mut tmp = Vec::new(); self.path.append_aml_bytes(&mut tmp)?; let flags: u8 = self.access_type as u8 | ((self.update_rule as u8) << 5); tmp.push(flags); for field in self.fields.iter() { match field { FieldEntry::Named(name, length) => { tmp.extend_from_slice(name); tmp.extend_from_slice(&create_pkg_length(&vec![0; *length], false)); } FieldEntry::Reserved(length) => { tmp.push(0x0); tmp.extend_from_slice(&create_pkg_length(&vec![0; *length], false)); } } } let pkg_length = create_pkg_length(&tmp, true); bytes.push(0x5b); // ExtOpPrefix bytes.push(0x81); // FieldOp bytes.extend_from_slice(&pkg_length); bytes.extend_from_slice(&tmp); Ok(()) } } #[derive(Clone, Copy)] pub enum OpRegionSpace { SystemMemory, SystemIo, PConfig, EmbeddedControl, Smbus, SystemCmos, PciBarTarget, Ipmi, GeneralPurposeIo, GenericSerialBus, } pub struct OpRegion { path: Path, space: OpRegionSpace, offset: usize, length: usize, } impl OpRegion { pub fn new(path: Path, space: OpRegionSpace, offset: usize, length: usize) -> Self { OpRegion { path, space, offset, length, } } } impl Aml for OpRegion { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x5b); // ExtOpPrefix bytes.push(0x80); // OpRegionOp self.path.append_aml_bytes(bytes)?; bytes.push(self.space as u8); self.offset.append_aml_bytes(bytes)?; // RegionOffset self.length.append_aml_bytes(bytes)?; // RegionLen Ok(()) } } pub struct If<'a> { predicate: &'a dyn Aml, if_children: Vec<&'a dyn Aml>, } impl<'a> If<'a> { pub fn new(predicate: &'a dyn Aml, if_children: Vec<&'a dyn Aml>) -> Self { If { predicate, if_children, } } } impl Aml for If<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { let mut tmp = Vec::new(); self.predicate.append_aml_bytes(&mut tmp)?; for child in self.if_children.iter() { child.append_aml_bytes(&mut tmp)?; } let pkg_length = create_pkg_length(&tmp, true); bytes.push(0xa0); // IfOp bytes.extend_from_slice(&pkg_length); bytes.extend_from_slice(&tmp); Ok(()) } } pub struct Equal<'a> { left: &'a dyn Aml, right: &'a dyn Aml, } impl<'a> Equal<'a> { pub fn new(left: &'a dyn Aml, right: &'a dyn Aml) -> Self { Equal { left, right } } } impl Aml for Equal<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x93); // LEqualOp self.left.append_aml_bytes(bytes)?; self.right.append_aml_bytes(bytes)?; Ok(()) } } pub struct LessThan<'a> { left: &'a dyn Aml, right: &'a dyn Aml, } impl<'a> LessThan<'a> { pub fn new(left: &'a dyn Aml, right: &'a dyn Aml) -> Self { LessThan { left, right } } } impl Aml for LessThan<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x95); // LLessOp self.left.append_aml_bytes(bytes)?; self.right.append_aml_bytes(bytes)?; Ok(()) } } pub struct Arg(pub u8); impl Aml for Arg { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { if self.0 > 6 { return Err(AmlError::InvalidPartLength); } bytes.push(0x68 + self.0); // Arg0Op Ok(()) } } pub struct Local(pub u8); impl Aml for Local { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { if self.0 > 7 { return Err(AmlError::InvalidPartLength); } bytes.push(0x60 + self.0); // Local0Op Ok(()) } } pub struct Store<'a> { name: &'a dyn Aml, value: &'a dyn Aml, } impl<'a> Store<'a> { pub fn new(name: &'a dyn Aml, value: &'a dyn Aml) -> Self { Store { name, value } } } impl Aml for Store<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x70); // StoreOp self.value.append_aml_bytes(bytes)?; self.name.append_aml_bytes(bytes)?; Ok(()) } } pub struct Mutex { path: Path, sync_level: u8, } impl Mutex { pub fn new(path: Path, sync_level: u8) -> Self { Self { path, sync_level } } } impl Aml for Mutex { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x5b); // ExtOpPrefix bytes.push(0x01); // MutexOp self.path.append_aml_bytes(bytes)?; bytes.push(self.sync_level); Ok(()) } } pub struct Acquire { mutex: Path, timeout: u16, } impl Acquire { pub fn new(mutex: Path, timeout: u16) -> Self { Acquire { mutex, timeout } } } impl Aml for Acquire { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x5b); // ExtOpPrefix bytes.push(0x23); // AcquireOp self.mutex.append_aml_bytes(bytes)?; bytes.extend_from_slice(&self.timeout.to_le_bytes()); Ok(()) } } pub struct Release { mutex: Path, } impl Release { pub fn new(mutex: Path) -> Self { Release { mutex } } } impl Aml for Release { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x5b); // ExtOpPrefix bytes.push(0x27); // ReleaseOp self.mutex.append_aml_bytes(bytes)?; Ok(()) } } pub struct Notify<'a> { object: &'a dyn Aml, value: &'a dyn Aml, } impl<'a> Notify<'a> { pub fn new(object: &'a dyn Aml, value: &'a dyn Aml) -> Self { Notify { object, value } } } impl Aml for Notify<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x86); // NotifyOp self.object.append_aml_bytes(bytes)?; self.value.append_aml_bytes(bytes)?; Ok(()) } } pub struct While<'a> { predicate: &'a dyn Aml, while_children: Vec<&'a dyn Aml>, } impl<'a> While<'a> { pub fn new(predicate: &'a dyn Aml, while_children: Vec<&'a dyn Aml>) -> Self { While { predicate, while_children, } } } impl Aml for While<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { let mut tmp = Vec::new(); self.predicate.append_aml_bytes(&mut tmp)?; for child in self.while_children.iter() { child.append_aml_bytes(&mut tmp)?; } let pkg_length = create_pkg_length(&tmp, true); bytes.push(0xa2); // WhileOp bytes.extend_from_slice(&pkg_length); bytes.extend_from_slice(&tmp); Ok(()) } } macro_rules! binary_op { ($name:ident, $opcode:expr) => { pub struct $name<'a> { a: &'a dyn Aml, b: &'a dyn Aml, target: &'a dyn Aml, } impl<'a> $name<'a> { pub fn new(target: &'a dyn Aml, a: &'a dyn Aml, b: &'a dyn Aml) -> Self { $name { a, b, target } } } impl<'a> Aml for $name<'a> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push($opcode); // Op for the binary operator self.a.append_aml_bytes(bytes)?; self.b.append_aml_bytes(bytes)?; self.target.append_aml_bytes(bytes) } } }; } // binary operators: TermArg TermArg Target binary_op!(Add, 0x72); binary_op!(Concat, 0x73); binary_op!(Subtract, 0x74); binary_op!(Multiply, 0x77); binary_op!(ShiftLeft, 0x79); binary_op!(ShiftRight, 0x7A); binary_op!(And, 0x7B); binary_op!(Nand, 0x7C); binary_op!(Or, 0x7D); binary_op!(Nor, 0x7E); binary_op!(Xor, 0x7F); binary_op!(ConateRes, 0x84); binary_op!(Mod, 0x85); binary_op!(Index, 0x88); binary_op!(ToString, 0x9C); pub struct MethodCall<'a> { name: Path, args: Vec<&'a dyn Aml>, } impl<'a> MethodCall<'a> { pub fn new(name: Path, args: Vec<&'a dyn Aml>) -> Self { MethodCall { name, args } } } impl Aml for MethodCall<'_> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { self.name.append_aml_bytes(bytes)?; for arg in self.args.iter() { arg.append_aml_bytes(bytes)?; } Ok(()) } } pub struct Buffer { data: Vec, } impl Buffer { pub fn new(data: Vec) -> Self { Buffer { data } } } impl Aml for Buffer { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { let mut tmp = Vec::new(); self.data.len().append_aml_bytes(&mut tmp)?; tmp.extend_from_slice(&self.data); let pkg_length = create_pkg_length(&tmp, true); bytes.push(0x11); // BufferOp bytes.extend_from_slice(&pkg_length); bytes.extend_from_slice(&tmp); Ok(()) } } pub struct CreateField<'a, T> { buffer: &'a dyn Aml, offset: &'a dyn Aml, field: Path, phantom: PhantomData<&'a T>, } impl<'a, T> CreateField<'a, T> { pub fn new(buffer: &'a dyn Aml, offset: &'a dyn Aml, field: Path) -> Self { CreateField:: { buffer, offset, field, phantom: PhantomData, } } } impl Aml for CreateField<'_, u64> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x8f); // CreateQWordFieldOp self.buffer.append_aml_bytes(bytes)?; self.offset.append_aml_bytes(bytes)?; self.field.append_aml_bytes(bytes) } } impl Aml for CreateField<'_, u32> { fn append_aml_bytes(&self, bytes: &mut Vec) -> Result<(), AmlError> { bytes.push(0x8a); // CreateDWordFieldOp self.buffer.append_aml_bytes(bytes)?; self.offset.append_aml_bytes(bytes)?; self.field.append_aml_bytes(bytes) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_device() { // Device (_SB.COM1) // { // Name (_HID, EisaId ("PNP0501") /* 16550A-compatible COM Serial Port */) // _HID: // Hardware ID Name (_CRS, ResourceTemplate () // _CRS: Current Resource Settings // { // Interrupt (ResourceConsumer, Edge, ActiveHigh, Exclusive, ,, ) // { // 0x00000004, // } // IO (Decode16, // 0x03F8, // Range Minimum // 0x03F8, // Range Maximum // 0x00, // Alignment // 0x08, // Length // ) // } // } let com1_device = [ 0x5B, 0x82, 0x30, 0x2E, 0x5F, 0x53, 0x42, 0x5F, 0x43, 0x4F, 0x4D, 0x31, 0x08, 0x5F, 0x48, 0x49, 0x44, 0x0C, 0x41, 0xD0, 0x05, 0x01, 0x08, 0x5F, 0x43, 0x52, 0x53, 0x11, 0x16, 0x0A, 0x13, 0x89, 0x06, 0x00, 0x03, 0x01, 0x04, 0x00, 0x00, 0x00, 0x47, 0x01, 0xF8, 0x03, 0xF8, 0x03, 0x00, 0x08, 0x79, 0x00, ]; assert_eq!( Device::new( "_SB_.COM1".try_into().unwrap(), vec![ &Name::new( "_HID".try_into().unwrap(), &EisaName::new("PNP0501").unwrap() ) .unwrap(), &Name::new( "_CRS".try_into().unwrap(), &ResourceTemplate::new(vec![ &Interrupt::new(true, true, false, false, 4), &Io::new(0x3f8, 0x3f8, 0, 0x8) ]) ) .unwrap() ] ) .to_aml_bytes() .unwrap(), &com1_device[..] ); } #[test] fn test_scope() { // Scope (_SB.MBRD) // { // Name (_CRS, ResourceTemplate () // _CRS: Current Resource Settings // { // Memory32Fixed (ReadWrite, // 0xE8000000, // Address Base // 0x10000000, // Address Length // ) // }) // } let mbrd_scope = [ 0x10, 0x21, 0x2E, 0x5F, 0x53, 0x42, 0x5F, 0x4D, 0x42, 0x52, 0x44, 0x08, 0x5F, 0x43, 0x52, 0x53, 0x11, 0x11, 0x0A, 0x0E, 0x86, 0x09, 0x00, 0x01, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, 0x10, 0x79, 0x00, ]; assert_eq!( Scope::new( "_SB_.MBRD".try_into().unwrap(), vec![ &Name::new( "_CRS".try_into().unwrap(), &ResourceTemplate::new(vec![&Memory32Fixed::new( true, 0xE800_0000, 0x1000_0000 )]) ) .unwrap() ] ) .to_aml_bytes() .unwrap(), &mbrd_scope[..] ); } #[test] fn test_resource_template() { // Name (_CRS, ResourceTemplate () // _CRS: Current Resource Settings // { // Memory32Fixed (ReadWrite, // 0xE8000000, // Address Base // 0x10000000, // Address Length // ) // }) let crs_memory_32_fixed = [ 0x08, 0x5F, 0x43, 0x52, 0x53, 0x11, 0x11, 0x0A, 0x0E, 0x86, 0x09, 0x00, 0x01, 0x00, 0x00, 0x00, 0xE8, 0x00, 0x00, 0x00, 0x10, 0x79, 0x00, ]; assert_eq!( Name::new( "_CRS".try_into().unwrap(), &ResourceTemplate::new(vec![&Memory32Fixed::new(true, 0xE800_0000, 0x1000_0000)]) ) .unwrap() .to_aml_bytes() .unwrap(), crs_memory_32_fixed ); // Name (_CRS, ResourceTemplate () // _CRS: Current Resource Settings // { // WordBusNumber (ResourceProducer, MinFixed, MaxFixed, PosDecode, // 0x0000, // Granularity // 0x0000, // Range Minimum // 0x00FF, // Range Maximum // 0x0000, // Translation Offset // 0x0100, // Length // ,, ) // WordIO (ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, // 0x0000, // Granularity // 0x0000, // Range Minimum // 0x0CF7, // Range Maximum // 0x0000, // Translation Offset // 0x0CF8, // Length // ,, , TypeStatic, DenseTranslation) // WordIO (ResourceProducer, MinFixed, MaxFixed, PosDecode, EntireRange, // 0x0000, // Granularity // 0x0D00, // Range Minimum // 0xFFFF, // Range Maximum // 0x0000, // Translation Offset // 0xF300, // Length // ,, , TypeStatic, DenseTranslation) // DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, // 0x00000000, // Granularity // 0x000A0000, // Range Minimum // 0x000BFFFF, // Range Maximum // 0x00000000, // Translation Offset // 0x00020000, // Length // ,, , AddressRangeMemory, TypeStatic) // DWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, NonCacheable, ReadWrite, // 0x00000000, // Granularity // 0xC0000000, // Range Minimum // 0xFEBFFFFF, // Range Maximum // 0x00000000, // Translation Offset // 0x3EC00000, // Length // ,, , AddressRangeMemory, TypeStatic) // QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, // 0x0000000000000000, // Granularity // 0x0000000800000000, // Range Minimum // 0x0000000FFFFFFFFF, // Range Maximum // 0x0000000000000000, // Translation Offset // 0x0000000800000000, // Length // ,, , AddressRangeMemory, TypeStatic) // }) // WordBusNumber from above let crs_word_bus_number = [ 0x08, 0x5F, 0x43, 0x52, 0x53, 0x11, 0x15, 0x0A, 0x12, 0x88, 0x0D, 0x00, 0x02, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x01, 0x79, 0x00, ]; assert_eq!( Name::new( "_CRS".try_into().unwrap(), &ResourceTemplate::new(vec![ &AddressSpace::new_bus_number(0x0u16, 0xffu16).unwrap(), ]) ) .unwrap() .to_aml_bytes() .unwrap(), &crs_word_bus_number ); // WordIO blocks (x 2) from above let crs_word_io = [ 0x08, 0x5F, 0x43, 0x52, 0x53, 0x11, 0x25, 0x0A, 0x22, 0x88, 0x0D, 0x00, 0x01, 0x0C, 0x03, 0x00, 0x00, 0x00, 0x00, 0xF7, 0x0C, 0x00, 0x00, 0xF8, 0x0C, 0x88, 0x0D, 0x00, 0x01, 0x0C, 0x03, 0x00, 0x00, 0x00, 0x0D, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0xF3, 0x79, 0x00, ]; assert_eq!( Name::new( "_CRS".try_into().unwrap(), &ResourceTemplate::new(vec![ &AddressSpace::new_io(0x0u16, 0xcf7u16).unwrap(), &AddressSpace::new_io(0xd00u16, 0xffffu16).unwrap(), ]) ) .unwrap() .to_aml_bytes() .unwrap(), &crs_word_io[..] ); // DWordMemory blocks (x 2) from above let crs_dword_memory = [ 0x08, 0x5F, 0x43, 0x52, 0x53, 0x11, 0x39, 0x0A, 0x36, 0x87, 0x17, 0x00, 0x00, 0x0C, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0xFF, 0xFF, 0x0B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x87, 0x17, 0x00, 0x00, 0x0C, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0xFF, 0xFF, 0xBF, 0xFE, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC0, 0x3E, 0x79, 0x00, ]; assert_eq!( Name::new( "_CRS".try_into().unwrap(), &ResourceTemplate::new(vec![ &AddressSpace::new_memory( AddressSpaceCacheable::Cacheable, true, 0xa_0000u32, 0xb_ffffu32 ) .unwrap(), &AddressSpace::new_memory( AddressSpaceCacheable::NotCacheable, true, 0xc000_0000u32, 0xfebf_ffffu32 ) .unwrap(), ]) ) .unwrap() .to_aml_bytes() .unwrap(), &crs_dword_memory[..] ); // QWordMemory from above let crs_qword_memory = [ 0x08, 0x5F, 0x43, 0x52, 0x53, 0x11, 0x33, 0x0A, 0x30, 0x8A, 0x2B, 0x00, 0x00, 0x0C, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x79, 0x00, ]; assert_eq!( Name::new( "_CRS".try_into().unwrap(), &ResourceTemplate::new(vec![ &AddressSpace::new_memory( AddressSpaceCacheable::Cacheable, true, 0x8_0000_0000u64, 0xf_ffff_ffffu64 ) .unwrap() ]) ) .unwrap() .to_aml_bytes() .unwrap(), &crs_qword_memory[..] ); // Name (_CRS, ResourceTemplate () // _CRS: Current Resource Settings // { // Interrupt (ResourceConsumer, Edge, ActiveHigh, Exclusive, ,, ) // { // 0x00000004, // } // IO (Decode16, // 0x03F8, // Range Minimum // 0x03F8, // Range Maximum // 0x00, // Alignment // 0x08, // Length // ) // }) // let interrupt_io_data = [ 0x08, 0x5F, 0x43, 0x52, 0x53, 0x11, 0x16, 0x0A, 0x13, 0x89, 0x06, 0x00, 0x03, 0x01, 0x04, 0x00, 0x00, 0x00, 0x47, 0x01, 0xF8, 0x03, 0xF8, 0x03, 0x00, 0x08, 0x79, 0x00, ]; assert_eq!( Name::new( "_CRS".try_into().unwrap(), &ResourceTemplate::new(vec![ &Interrupt::new(true, true, false, false, 4), &Io::new(0x3f8, 0x3f8, 0, 0x8) ]) ) .unwrap() .to_aml_bytes() .unwrap(), &interrupt_io_data[..] ); } #[test] fn test_pkg_length() { assert_eq!(create_pkg_length(&[0u8; 62], true), vec![63]); assert_eq!( create_pkg_length(&[0u8; 64], true), vec![(1 << 6) | (66 & 0xf), 66 >> 4] ); assert_eq!( create_pkg_length(&[0u8; 4096], true), vec![ (2 << 6) | (4099 & 0xf) as u8, ((4099 >> 4) & 0xff).try_into().unwrap(), ((4099 >> 12) & 0xff).try_into().unwrap() ] ); } #[test] fn test_package() { // Name (_S5, Package (0x01) // _S5_: S5 System State // { // 0x05 // }) let s5_sleep_data = [0x08, 0x5F, 0x53, 0x35, 0x5F, 0x12, 0x04, 0x01, 0x0A, 0x05]; let s5 = Name::new("_S5_".try_into().unwrap(), &Package::new(vec![&5u8])).unwrap(); assert_eq!(s5_sleep_data.to_vec(), s5.to_aml_bytes().unwrap()); } #[test] fn test_eisa_name() { assert_eq!( Name::new( "_HID".try_into().unwrap(), &EisaName::new("PNP0501").unwrap() ) .unwrap() .to_aml_bytes() .unwrap(), [0x08, 0x5F, 0x48, 0x49, 0x44, 0x0C, 0x41, 0xD0, 0x05, 0x01], ) } #[test] fn test_name_path() { assert_eq!( (&"_SB_".try_into().unwrap() as &Path) .to_aml_bytes() .unwrap(), [0x5Fu8, 0x53, 0x42, 0x5F] ); assert_eq!( (&"\\_SB_".try_into().unwrap() as &Path) .to_aml_bytes() .unwrap(), [0x5C, 0x5F, 0x53, 0x42, 0x5F] ); assert_eq!( (&"_SB_.COM1".try_into().unwrap() as &Path) .to_aml_bytes() .unwrap(), [0x2E, 0x5F, 0x53, 0x42, 0x5F, 0x43, 0x4F, 0x4D, 0x31] ); assert_eq!( (&"_SB_.PCI0._HID".try_into().unwrap() as &Path) .to_aml_bytes() .unwrap(), [ 0x2F, 0x03, 0x5F, 0x53, 0x42, 0x5F, 0x50, 0x43, 0x49, 0x30, 0x5F, 0x48, 0x49, 0x44 ] ); } #[test] fn test_numbers() { assert_eq!(128u8.to_aml_bytes().unwrap(), [0x0a, 0x80]); assert_eq!(1024u16.to_aml_bytes().unwrap(), [0x0b, 0x0, 0x04]); assert_eq!( (16u32 << 20).to_aml_bytes().unwrap(), [0x0c, 0x00, 0x00, 0x0, 0x01] ); assert_eq!( 0xdeca_fbad_deca_fbadu64.to_aml_bytes().unwrap(), [0x0e, 0xad, 0xfb, 0xca, 0xde, 0xad, 0xfb, 0xca, 0xde] ); } #[test] fn test_name() { assert_eq!( Name::new("_SB_.PCI0._UID".try_into().unwrap(), &0x1234u16) .unwrap() .to_aml_bytes() .unwrap(), [ 0x08, // NameOp 0x2F, // MultiNamePrefix 0x03, // 3 name parts 0x5F, 0x53, 0x42, 0x5F, // _SB_ 0x50, 0x43, 0x49, 0x30, // PCI0 0x5F, 0x55, 0x49, 0x44, // _UID 0x0b, // WordPrefix 0x34, 0x12 ] ); } #[test] fn test_string() { assert_eq!( (&"ACPI" as &dyn Aml).to_aml_bytes().unwrap(), [0x0d, b'A', b'C', b'P', b'I', 0] ); assert_eq!( "ACPI".to_owned().to_aml_bytes().unwrap(), [0x0d, b'A', b'C', b'P', b'I', 0] ); } #[test] fn test_method() { assert_eq!( Method::new( "_STA".try_into().unwrap(), 0, false, vec![&Return::new(&0xfu8)] ) .to_aml_bytes() .unwrap(), [0x14, 0x09, 0x5F, 0x53, 0x54, 0x41, 0x00, 0xA4, 0x0A, 0x0F] ); } #[test] fn test_field() { // Field (PRST, ByteAcc, NoLock, WriteAsZeros) // { // Offset (0x04), // CPEN, 1, // CINS, 1, // CRMV, 1, // CEJ0, 1, // Offset (0x05), // CCMD, 8 // } // let field_data = [ 0x5Bu8, 0x81, 0x23, 0x50, 0x52, 0x53, 0x54, 0x41, 0x00, 0x20, 0x43, 0x50, 0x45, 0x4E, 0x01, 0x43, 0x49, 0x4E, 0x53, 0x01, 0x43, 0x52, 0x4D, 0x56, 0x01, 0x43, 0x45, 0x4A, 0x30, 0x01, 0x00, 0x04, 0x43, 0x43, 0x4D, 0x44, 0x08, ]; assert_eq!( Field::new( "PRST".try_into().unwrap(), FieldAccessType::Byte, FieldUpdateRule::WriteAsZeroes, vec![ FieldEntry::Reserved(32), FieldEntry::Named(*b"CPEN", 1), FieldEntry::Named(*b"CINS", 1), FieldEntry::Named(*b"CRMV", 1), FieldEntry::Named(*b"CEJ0", 1), FieldEntry::Reserved(4), FieldEntry::Named(*b"CCMD", 8) ] ) .to_aml_bytes() .unwrap(), &field_data[..] ); // Field (PRST, DWordAcc, NoLock, Preserve) // { // CSEL, 32, // Offset (0x08), // CDAT, 32 // } let field_data = [ 0x5Bu8, 0x81, 0x12, 0x50, 0x52, 0x53, 0x54, 0x03, 0x43, 0x53, 0x45, 0x4C, 0x20, 0x00, 0x20, 0x43, 0x44, 0x41, 0x54, 0x20, ]; assert_eq!( Field::new( "PRST".try_into().unwrap(), FieldAccessType::DWord, FieldUpdateRule::Preserve, vec![ FieldEntry::Named(*b"CSEL", 32), FieldEntry::Reserved(32), FieldEntry::Named(*b"CDAT", 32) ] ) .to_aml_bytes() .unwrap(), &field_data[..] ); } #[test] fn test_op_region() { // OperationRegion (PRST, SystemIo, 0x0CD8, 0x0C) let op_region_data = [ 0x5Bu8, 0x80, 0x50, 0x52, 0x53, 0x54, 0x01, 0x0B, 0xD8, 0x0C, 0x0A, 0x0C, ]; assert_eq!( OpRegion::new( "PRST".try_into().unwrap(), OpRegionSpace::SystemIo, 0xcd8, 0xc ) .to_aml_bytes() .unwrap(), &op_region_data[..] ); } #[test] fn test_arg_if() { // Method(TEST, 1, NotSerialized) { // If (Arg0 == Zero) { // Return(One) // } // Return(Zero) // } let arg_if_data = [ 0x14, 0x0F, 0x54, 0x45, 0x53, 0x54, 0x01, 0xA0, 0x06, 0x93, 0x68, 0x00, 0xA4, 0x01, 0xA4, 0x00, ]; assert_eq!( Method::new( "TEST".try_into().unwrap(), 1, false, vec![ &If::new(&Equal::new(&Arg(0), &ZERO), vec![&Return::new(&ONE)]), &Return::new(&ZERO) ] ) .to_aml_bytes() .unwrap(), &arg_if_data ); } #[test] fn test_local_if() { // Method(TEST, 0, NotSerialized) { // Local0 = One // If (Local0 == Zero) { // Return(One) // } // Return(Zero) // } let local_if_data = [ 0x14, 0x12, 0x54, 0x45, 0x53, 0x54, 0x00, 0x70, 0x01, 0x60, 0xA0, 0x06, 0x93, 0x60, 0x00, 0xA4, 0x01, 0xA4, 0x00, ]; assert_eq!( Method::new( "TEST".try_into().unwrap(), 0, false, vec![ &Store::new(&Local(0), &ONE), &If::new(&Equal::new(&Local(0), &ZERO), vec![&Return::new(&ONE)]), &Return::new(&ZERO) ] ) .to_aml_bytes() .unwrap(), &local_if_data ); } #[test] fn test_mutex() { // Device (_SB_.MHPC) // { // Name (_HID, EisaId("PNP0A06") /* Generic Container Device */) // _HID: Hardware ID // Mutex (MLCK, 0x00) // Method (TEST, 0, NotSerialized) // { // Acquire (MLCK, 0xFFFF) // Local0 = One // Release (MLCK) // } // } let mutex_data = [ 0x5B, 0x82, 0x33, 0x2E, 0x5F, 0x53, 0x42, 0x5F, 0x4D, 0x48, 0x50, 0x43, 0x08, 0x5F, 0x48, 0x49, 0x44, 0x0C, 0x41, 0xD0, 0x0A, 0x06, 0x5B, 0x01, 0x4D, 0x4C, 0x43, 0x4B, 0x00, 0x14, 0x17, 0x54, 0x45, 0x53, 0x54, 0x00, 0x5B, 0x23, 0x4D, 0x4C, 0x43, 0x4B, 0xFF, 0xFF, 0x70, 0x01, 0x60, 0x5B, 0x27, 0x4D, 0x4C, 0x43, 0x4B, ]; let mutex = Mutex::new("MLCK".try_into().unwrap(), 0); assert_eq!( Device::new( "_SB_.MHPC".try_into().unwrap(), vec![ &Name::new( "_HID".try_into().unwrap(), &EisaName::new("PNP0A06").unwrap() ) .unwrap(), &mutex, &Method::new( "TEST".try_into().unwrap(), 0, false, vec![ &Acquire::new("MLCK".try_into().unwrap(), 0xffff), &Store::new(&Local(0), &ONE), &Release::new("MLCK".try_into().unwrap()) ] ) ] ) .to_aml_bytes() .unwrap(), &mutex_data[..] ); } #[test] fn test_notify() { // Device (_SB.MHPC) // { // Name (_HID, EisaId ("PNP0A06") /* Generic Container Device */) // _HID: Hardware ID // Method (TEST, 0, NotSerialized) // { // Notify (MHPC, One) // Device Check // } // } let notify_data = [ 0x5B, 0x82, 0x21, 0x2E, 0x5F, 0x53, 0x42, 0x5F, 0x4D, 0x48, 0x50, 0x43, 0x08, 0x5F, 0x48, 0x49, 0x44, 0x0C, 0x41, 0xD0, 0x0A, 0x06, 0x14, 0x0C, 0x54, 0x45, 0x53, 0x54, 0x00, 0x86, 0x4D, 0x48, 0x50, 0x43, 0x01, ]; assert_eq!( Device::new( "_SB_.MHPC".try_into().unwrap(), vec![ &Name::new( "_HID".try_into().unwrap(), &EisaName::new("PNP0A06").unwrap() ) .unwrap(), &Method::new( "TEST".try_into().unwrap(), 0, false, vec![&Notify::new(&Path::new("MHPC").unwrap(), &ONE),] ) ] ) .to_aml_bytes() .unwrap(), ¬ify_data[..] ); } #[test] fn test_while() { // Device (_SB.MHPC) // { // Name (_HID, EisaId ("PNP0A06") /* Generic Container Device */) // _HID: Hardware ID // Method (TEST, 0, NotSerialized) // { // Local0 = Zero // While ((Local0 < 0x04)) // { // Local0 += One // } // } // } let while_data = [ 0x5B, 0x82, 0x28, 0x2E, 0x5F, 0x53, 0x42, 0x5F, 0x4D, 0x48, 0x50, 0x43, 0x08, 0x5F, 0x48, 0x49, 0x44, 0x0C, 0x41, 0xD0, 0x0A, 0x06, 0x14, 0x13, 0x54, 0x45, 0x53, 0x54, 0x00, 0x70, 0x00, 0x60, 0xA2, 0x09, 0x95, 0x60, 0x0A, 0x04, 0x72, 0x60, 0x01, 0x60, ]; assert_eq!( Device::new( "_SB_.MHPC".try_into().unwrap(), vec![ &Name::new( "_HID".try_into().unwrap(), &EisaName::new("PNP0A06").unwrap() ) .unwrap(), &Method::new( "TEST".try_into().unwrap(), 0, false, vec![ &Store::new(&Local(0), &ZERO), &While::new( &LessThan::new(&Local(0), &4usize), vec![&Add::new(&Local(0), &Local(0), &ONE)] ) ] ) ] ) .to_aml_bytes() .unwrap(), &while_data[..] ) } #[test] fn test_method_call() { // Method (TST1, 1, NotSerialized) // { // TST2 (One, One) // } // // Method (TST2, 2, NotSerialized) // { // TST1 (One) // } let test_data = [ 0x14, 0x0C, 0x54, 0x53, 0x54, 0x31, 0x01, 0x54, 0x53, 0x54, 0x32, 0x01, 0x01, 0x14, 0x0B, 0x54, 0x53, 0x54, 0x32, 0x02, 0x54, 0x53, 0x54, 0x31, 0x01, ]; let mut methods = Vec::new(); methods.extend_from_slice( &Method::new( "TST1".try_into().unwrap(), 1, false, vec![&MethodCall::new( "TST2".try_into().unwrap(), vec![&ONE, &ONE], )], ) .to_aml_bytes() .unwrap(), ); methods.extend_from_slice( &Method::new( "TST2".try_into().unwrap(), 2, false, vec![&MethodCall::new("TST1".try_into().unwrap(), vec![&ONE])], ) .to_aml_bytes() .unwrap(), ); assert_eq!(&methods[..], &test_data[..]) } #[test] fn test_buffer() { // Name (_MAT, Buffer (0x08) // _MAT: Multiple APIC Table Entry // { // 0x00, 0x08, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00 /* ........ */ // }) let buffer_data = [ 0x08, 0x5F, 0x4D, 0x41, 0x54, 0x11, 0x0B, 0x0A, 0x08, 0x00, 0x08, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, ]; assert_eq!( Name::new( "_MAT".try_into().unwrap(), &Buffer::new(vec![0x00, 0x08, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00]) ) .unwrap() .to_aml_bytes() .unwrap(), &buffer_data[..] ) } #[test] fn test_create_field() { // Method (MCRS, 0, Serialized) // { // Name (MR64, ResourceTemplate () // { // QWordMemory (ResourceProducer, PosDecode, MinFixed, MaxFixed, Cacheable, ReadWrite, // 0x0000000000000000, // Granularity // 0x0000000000000000, // Range Minimum // 0xFFFFFFFFFFFFFFFE, // Range Maximum // 0x0000000000000000, // Translation Offset // 0xFFFFFFFFFFFFFFFF, // Length // ,, _Y00, AddressRangeMemory, TypeStatic) // }) // CreateQWordField (MR64, \_SB.MHPC.MCRS._Y00._MIN, MIN) // _MIN: Minimum Base Address // CreateQWordField (MR64, \_SB.MHPC.MCRS._Y00._MAX, MAX) // _MAX: Maximum Base Address // CreateQWordField (MR64, \_SB.MHPC.MCRS._Y00._LEN, LEN) // _LEN: Length // } let data = [ 0x14, 0x41, 0x06, 0x4D, 0x43, 0x52, 0x53, 0x08, 0x08, 0x4D, 0x52, 0x36, 0x34, 0x11, 0x33, 0x0A, 0x30, 0x8A, 0x2B, 0x00, 0x00, 0x0C, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x79, 0x00, 0x8F, 0x4D, 0x52, 0x36, 0x34, 0x0A, 0x0E, 0x4D, 0x49, 0x4E, 0x5F, 0x8F, 0x4D, 0x52, 0x36, 0x34, 0x0A, 0x16, 0x4D, 0x41, 0x58, 0x5F, 0x8F, 0x4D, 0x52, 0x36, 0x34, 0x0A, 0x26, 0x4C, 0x45, 0x4E, 0x5F, ]; assert_eq!( Method::new( "MCRS".try_into().unwrap(), 0, true, vec![ &Name::new( "MR64".try_into().unwrap(), &ResourceTemplate::new(vec![ &AddressSpace::new_memory( AddressSpaceCacheable::Cacheable, true, 0x0000_0000_0000_0000u64, 0xFFFF_FFFF_FFFF_FFFEu64 ) .unwrap() ]) ) .unwrap(), &CreateField::::new( &Path::new("MR64").unwrap(), &14usize, "MIN_".try_into().unwrap() ), &CreateField::::new( &Path::new("MR64").unwrap(), &22usize, "MAX_".try_into().unwrap() ), &CreateField::::new( &Path::new("MR64").unwrap(), &38usize, "LEN_".try_into().unwrap() ), ] ) .to_aml_bytes() .unwrap(), &data[..] ); } } ================================================ FILE: src/acpi-tables/src/dsdt.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::mem::size_of; use vm_memory::{Address, Bytes, GuestAddress, GuestMemory}; use zerocopy::IntoBytes; use crate::{AcpiError, Result, Sdt, SdtHeader, checksum}; /// Differentiated System Description Table (DSDT) /// /// Table that includes hardware definition blocks. /// More information about this table can be found in the ACPI specification: /// https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#differentiated-system-description-table-dsdt #[derive(Debug, Clone)] pub struct Dsdt { header: SdtHeader, definition_block: Vec, } impl Dsdt { pub fn new( oem_id: [u8; 6], oem_table_id: [u8; 8], oem_revision: u32, definition_block: Vec, ) -> Self { let header = SdtHeader::new( *b"DSDT", (size_of::() + definition_block.len()) .try_into() .unwrap(), 2, oem_id, oem_table_id, oem_revision, ); let mut dsdt = Dsdt { header, definition_block, }; dsdt.header.checksum = checksum(&[dsdt.header.as_bytes(), dsdt.definition_block.as_slice()]); dsdt } } impl Sdt for Dsdt { fn len(&self) -> usize { self.header.length.get() as usize } fn write_to_guest(&mut self, mem: &AS, address: GuestAddress) -> Result<()> { mem.write_slice(self.header.as_bytes(), address)?; let address = address .checked_add(size_of::() as u64) .ok_or(AcpiError::InvalidGuestAddress)?; mem.write_slice(self.definition_block.as_slice(), address)?; Ok(()) } } ================================================ FILE: src/acpi-tables/src/fadt.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright 2023 Rivos, Inc. // // SPDX-License-Identifier: Apache-2.0 use vm_memory::{Bytes, GuestAddress, GuestMemory}; use zerocopy::little_endian::{U16, U32, U64}; use zerocopy::{Immutable, IntoBytes}; use crate::{GenericAddressStructure, Result, Sdt, SdtHeader, checksum}; #[cfg(target_arch = "x86_64")] pub const IAPC_BOOT_ARG_FLAGS_VGA_NOT_PRESENT: u16 = 2; #[cfg(target_arch = "x86_64")] pub const IAPC_BOOT_ARG_FLAGS_MSI_NOT_PRESENT: u16 = 3; #[cfg(target_arch = "x86_64")] pub const IAPC_BOOT_ARG_FLAGS_PCI_ASPM: u16 = 4; // ACPI Flags. Reading from the specification here: // https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#fixed-acpi-description-table-fixed-feature-flags /// Flag for the Power Button functionality. /// If the system does not have a power button, this value would be “1” and no power button device /// would be present pub const FADT_F_PWR_BUTTON: u8 = 4; /// Flag for the Sleep Button Functionality. /// If the system does not have a sleep button, this value would be “1” and no power button device /// would be present pub const FADT_F_SLP_BUTTON: u8 = 5; /// Flag for Hardware Reduced API. If enabled, software-only alternatives are used for supported /// fixed features. pub const FADT_F_HW_REDUCED_ACPI: u8 = 20; // clippy doesn't understand that we actually "use" the fields of this struct when we serialize // them as bytes in guest memory, so here we just ignore dead code to avoid having to name // everything with an underscore prefix #[allow(dead_code)] /// Fixed ACPI Description Table (FADT) /// /// This table includes fixed hardware ACPI information such as addresses of register blocks and /// the pointer to the DSDT table. /// More information about this table can be found in the ACPI specification: /// https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#fixed-acpi-description-table-fadt #[repr(C, packed)] #[derive(Debug, Copy, Clone, Default, IntoBytes, Immutable)] pub struct Fadt { header: SdtHeader, firmware_control: U32, dsdt: U32, reserved_1: u8, preferred_pm_profile: u8, // In HW-reduced mode, fields starting from SCI_INT until CENTURY are ignored sci_int: U16, smi_cmd: U32, acpi_enable: u8, acpi_disable: u8, s4bios_req: u8, pstate_cnt: u8, pm1a_evt_blk: U32, pm1b_evt_blk: U32, pm1a_cnt_blk: U32, pm1b_cnt_blk: U32, pm2_cnt_blk: U32, pm_tmr_blk: U32, gpe0_blk: U32, gpe1_blk: U32, pm1_evt_len: u8, pm1_cnt_len: u8, pm2_cnt_len: u8, pm_tmr_len: u8, gpe0_blk_len: u8, gpe1_blk_len: u8, gpe1_base: u8, cst_cnt: u8, p_lvl2_lat: U16, p_lvl3_lat: U16, flush_size: U16, flush_stride: U16, duty_offset: u8, duty_width: u8, day_alrm: u8, mon_alrm: u8, century: u8, iapc_boot_arch: U16, reserved_2: u8, flags: U32, reset_reg: GenericAddressStructure, reset_value: u8, arm_boot_arch: U16, fadt_minor_version: u8, x_firmware_ctrl: U64, x_dsdt: U64, // In HW-reduced mode, fields starting from X_PM1a_EVT_BLK through X_GPE1_BLK // are ignored x_pm1a_evt_blk: GenericAddressStructure, x_pm1b_evt_blk: GenericAddressStructure, x_pm1a_cnt_blk: GenericAddressStructure, x_pm1b_cnt_blk: GenericAddressStructure, x_pm2_cnt_blk: GenericAddressStructure, x_pm_tmr_blk: GenericAddressStructure, x_gpe0_blk: GenericAddressStructure, x_gpe1_blk: GenericAddressStructure, sleep_control_reg: GenericAddressStructure, sleep_status_reg: GenericAddressStructure, hypervisor_vendor_id: [u8; 8], } impl Fadt { pub fn new(oem_id: [u8; 6], oem_table_id: [u8; 8], oem_revision: u32) -> Self { let header = SdtHeader::new( *b"FACP", // It's fine to unwrap here, we know that the size of the Fadt structure fits in 32 // bits. std::mem::size_of::().try_into().unwrap(), 6, // revision 6 oem_id, oem_table_id, oem_revision, ); Fadt { header, fadt_minor_version: 5, ..Default::default() } } /// Set the address of the DSDT table /// /// This sets the 64bit variant, X_DSDT field of the FADT table pub fn set_x_dsdt(&mut self, addr: u64) { self.x_dsdt = U64::new(addr); } /// Set the FADT flags pub fn set_flags(&mut self, flags: u32) { self.flags = U32::new(flags); } /// Set the IA-PC specific flags pub fn setup_iapc_flags(&mut self, flags: u16) { self.iapc_boot_arch = U16::new(flags); } /// Set the hypervisor vendor ID pub fn set_hypervisor_vendor_id(&mut self, hypervisor_vendor_id: [u8; 8]) { self.hypervisor_vendor_id = hypervisor_vendor_id; } } impl Sdt for Fadt { fn len(&self) -> usize { self.header.length.get().try_into().unwrap() } fn write_to_guest(&mut self, mem: &M, address: GuestAddress) -> Result<()> { self.header.checksum = checksum(&[self.as_bytes()]); mem.write_slice(self.as_bytes(), address)?; Ok(()) } } ================================================ FILE: src/acpi-tables/src/lib.rs ================================================ // Copyright © 2019 Intel Corporation // Copyright 2023 Rivos, Inc. // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // SPDX-License-Identifier: Apache-2.0 use vm_memory::{GuestAddress, GuestMemory, GuestMemoryError}; pub mod aml; pub mod dsdt; pub mod fadt; pub mod madt; pub mod mcfg; pub mod rsdp; pub mod xsdt; pub use aml::Aml; pub use dsdt::Dsdt; pub use fadt::Fadt; pub use madt::Madt; pub use mcfg::Mcfg; pub use rsdp::Rsdp; pub use xsdt::Xsdt; use zerocopy::little_endian::{U32, U64}; use zerocopy::{Immutable, IntoBytes}; // This is the creator ID that we will embed in ACPI tables that are created using this crate. const FC_ACPI_CREATOR_ID: [u8; 4] = *b"FCAT"; // This is the created ID revision that we will embed in ACPI tables that are created using this // crate. const FC_ACPI_CREATOR_REVISION: u32 = 0x20240119; fn checksum(buf: &[&[u8]]) -> u8 { (255 - buf .iter() .flat_map(|b| b.iter()) .fold(0u8, |acc, x| acc.wrapping_add(*x))) .wrapping_add(1) } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum AcpiError { /// Guest memory error: {0} GuestMemory(#[from] GuestMemoryError), /// Invalid guest address InvalidGuestAddress, /// Invalid register size InvalidRegisterSize, } pub type Result = std::result::Result; /// ACPI type representing memory addresses #[repr(C, packed)] #[derive(IntoBytes, Immutable, Clone, Copy, Debug, Default)] pub struct GenericAddressStructure { pub address_space_id: u8, pub register_bit_width: u8, pub register_bit_offset: u8, pub access_size: u8, pub address: U64, } impl GenericAddressStructure { pub fn new( address_space_id: u8, register_bit_width: u8, register_bit_offset: u8, access_size: u8, address: u64, ) -> Self { Self { address_space_id, register_bit_width, register_bit_offset, access_size, address: U64::new(address), } } } /// Header included in all System Descriptor Tables #[repr(C, packed)] #[derive(Clone, Debug, Copy, Default, IntoBytes, Immutable)] pub struct SdtHeader { pub signature: [u8; 4], pub length: U32, pub revision: u8, pub checksum: u8, pub oem_id: [u8; 6], pub oem_table_id: [u8; 8], pub oem_revision: U32, pub creator_id: [u8; 4], pub creator_revision: U32, } impl SdtHeader { pub(crate) fn new( signature: [u8; 4], length: u32, table_revision: u8, oem_id: [u8; 6], oem_table_id: [u8; 8], oem_revision: u32, ) -> Self { SdtHeader { signature, length: U32::new(length), revision: table_revision, checksum: 0, oem_id, oem_table_id, oem_revision: U32::new(oem_revision), creator_id: FC_ACPI_CREATOR_ID, creator_revision: U32::new(FC_ACPI_CREATOR_REVISION), } } } /// A trait for functionality around System Descriptor Tables. pub trait Sdt { /// Get the length of the table fn len(&self) -> usize; /// Return true if Sdt is empty fn is_empty(&self) -> bool { self.len() == 0 } /// Write the table in guest memory fn write_to_guest(&mut self, mem: &M, address: GuestAddress) -> Result<()>; } #[cfg(test)] mod tests { use super::checksum; #[test] fn test_checksum() { assert_eq!(checksum(&[&[]]), 0u8); assert_eq!(checksum(&[]), 0u8); assert_eq!(checksum(&[&[1, 2, 3]]), 250u8); assert_eq!(checksum(&[&[1, 2, 3], &[]]), 250u8); assert_eq!(checksum(&[&[1, 2], &[3]]), 250u8); assert_eq!(checksum(&[&[1, 2], &[3], &[250]]), 0u8); assert_eq!(checksum(&[&[255]]), 1u8); assert_eq!(checksum(&[&[1, 2], &[3], &[250], &[255]]), 1u8); } } ================================================ FILE: src/acpi-tables/src/madt.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright 2023 Rivos, Inc. // // SPDX-License-Identifier: Apache-2.0 use std::mem::size_of; use vm_memory::{Address, Bytes, GuestAddress, GuestMemory}; use zerocopy::little_endian::U32; use zerocopy::{Immutable, IntoBytes}; use crate::{AcpiError, Result, Sdt, SdtHeader, checksum}; const MADT_CPU_ENABLE_FLAG: u32 = 0; // clippy doesn't understand that we actually "use" the fields of this struct when we serialize // them as bytes in guest memory, so here we just ignore dead code to avoid having to name // everything with an underscore prefix #[allow(dead_code)] #[repr(C, packed)] #[derive(Copy, Clone, Debug, Default, IntoBytes, Immutable)] pub struct LocalAPIC { r#type: u8, length: u8, processor_uid: u8, apic_id: u8, flags: U32, } impl LocalAPIC { pub fn new(cpu_id: u8) -> Self { Self { r#type: 0, length: 8, processor_uid: cpu_id, apic_id: cpu_id, flags: U32::new(1u32 << MADT_CPU_ENABLE_FLAG), } } } // clippy doesn't understand that we actually "use" the fields of this struct when we serialize // them as bytes in guest memory, so here we just ignore dead code to avoid having to name // everything with an underscore prefix #[allow(dead_code)] #[repr(C, packed)] #[derive(Copy, Clone, Debug, Default, IntoBytes, Immutable)] pub struct IoAPIC { r#type: u8, length: u8, ioapic_id: u8, reserved: u8, apic_address: U32, gsi_base: U32, } impl IoAPIC { pub fn new(ioapic_id: u8, apic_address: u32) -> Self { IoAPIC { r#type: 1, length: 12, ioapic_id, reserved: 0, apic_address: U32::new(apic_address), gsi_base: U32::ZERO, } } } // clippy doesn't understand that we actually "use" the fields of this struct when we serialize // them as bytes in guest memory, so here we just ignore dead code to avoid having to name // everything with an underscore prefix #[allow(dead_code)] #[repr(C, packed)] #[derive(Debug, IntoBytes, Immutable)] struct MadtHeader { sdt: SdtHeader, base_address: U32, flags: U32, } /// Multiple APIC Description Table (MADT) /// /// This table includes information about the interrupt controllers of the device. /// More information about this table can be found in the ACPI specification: /// https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#multiple-apic-description-table-madt #[derive(Debug)] pub struct Madt { header: MadtHeader, interrupt_controllers: Vec, } impl Madt { pub fn new( oem_id: [u8; 6], oem_table_id: [u8; 8], oem_revision: u32, base_address: u32, interrupt_controllers: Vec, ) -> Self { let length = size_of::() + interrupt_controllers.len(); let sdt_header = SdtHeader::new( *b"APIC", // It is ok to unwrap the conversion of `length` to u32. `SdtHeader` is 36 bytes long, // so `length` here has a value of 44. length.try_into().unwrap(), 6, oem_id, oem_table_id, oem_revision, ); let mut header = MadtHeader { sdt: sdt_header, base_address: U32::new(base_address), flags: U32::ZERO, }; header.sdt.checksum = checksum(&[header.as_bytes(), interrupt_controllers.as_bytes()]); Madt { header, interrupt_controllers, } } } impl Sdt for Madt { fn len(&self) -> usize { self.header.sdt.length.get().try_into().unwrap() } fn write_to_guest(&mut self, mem: &M, address: GuestAddress) -> Result<()> { mem.write_slice(self.header.as_bytes(), address)?; let address = address .checked_add(size_of::() as u64) .ok_or(AcpiError::InvalidGuestAddress)?; mem.write_slice(self.interrupt_controllers.as_bytes(), address)?; Ok(()) } } ================================================ FILE: src/acpi-tables/src/mcfg.rs ================================================ // Copyright © 2019 Intel Corporation // Copyright © 2023 Rivos, Inc. // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // SPDX-License-Identifier: Apache-2.0 use std::mem::size_of; use vm_memory::{Bytes, GuestAddress, GuestMemory}; use zerocopy::{Immutable, IntoBytes}; use crate::{Result, Sdt, SdtHeader, checksum}; #[allow(dead_code)] #[repr(C, packed)] #[derive(Default, Debug, IntoBytes, Clone, Copy, Immutable)] struct PciRangeEntry { pub base_address: u64, pub segment: u16, pub start: u8, pub end: u8, _reserved: u32, } #[allow(dead_code)] #[repr(C, packed)] #[derive(Clone, Copy, Debug, Default, IntoBytes, Immutable)] pub struct Mcfg { header: SdtHeader, _reserved: u64, pci_range_entry: PciRangeEntry, } impl Mcfg { pub fn new( oem_id: [u8; 6], oem_table_id: [u8; 8], oem_revision: u32, pci_mmio_config_addr: u64, ) -> Self { let header = SdtHeader::new( *b"MCFG", size_of::().try_into().unwrap(), 1, oem_id, oem_table_id, oem_revision, ); let mut mcfg = Mcfg { header, pci_range_entry: PciRangeEntry { base_address: pci_mmio_config_addr, segment: 0, start: 0, end: 0, ..Default::default() }, ..Default::default() }; mcfg.header.checksum = checksum(&[mcfg.as_bytes()]); mcfg } } impl Sdt for Mcfg { fn len(&self) -> usize { self.as_bytes().len() } fn write_to_guest(&mut self, mem: &M, address: GuestAddress) -> Result<()> { mem.write_slice(self.as_bytes(), address)?; Ok(()) } } ================================================ FILE: src/acpi-tables/src/rsdp.rs ================================================ // Copyright © 2019 Intel Corporation // Copyright © 2023 Rivos, Inc. // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // SPDX-License-Identifier: Apache-2.0 use vm_memory::{Bytes, GuestAddress, GuestMemory}; use zerocopy::little_endian::{U32, U64}; use zerocopy::{Immutable, IntoBytes}; use crate::{Result, Sdt, checksum}; // clippy doesn't understand that we actually "use" the fields of this struct when we serialize // them as bytes in guest memory, so here we just ignore dead code to avoid having to name // everything with an underscore prefix #[allow(dead_code)] /// Root System Description Pointer /// /// This is the root pointer to the ACPI hierarchy. This is what OSs /// are looking for in the memory when initializing ACPI. It includes /// a pointer to XSDT /// More information about this structure can be found in the ACPI specification: /// https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#root-system-description-pointer-rsdp #[repr(C, packed)] #[derive(Clone, Copy, Debug, Default, IntoBytes, Immutable)] pub struct Rsdp { signature: [u8; 8], checksum: u8, oem_id: [u8; 6], revision: u8, rsdt_addr: U32, length: U32, xsdt_addr: U64, extended_checksum: u8, reserved: [u8; 3], } impl Rsdp { pub fn new(oem_id: [u8; 6], xsdt_addr: u64) -> Self { let mut rsdp = Rsdp { // Space in the end of string is needed! signature: *b"RSD PTR ", checksum: 0, oem_id, revision: 2, rsdt_addr: U32::ZERO, length: U32::new(std::mem::size_of::().try_into().unwrap()), xsdt_addr: U64::new(xsdt_addr), extended_checksum: 0, reserved: [0u8; 3], }; rsdp.checksum = checksum(&[&rsdp.as_bytes()[..20]]); rsdp.extended_checksum = checksum(&[rsdp.as_bytes()]); rsdp } } impl Sdt for Rsdp { fn len(&self) -> usize { self.as_bytes().len() } fn write_to_guest(&mut self, mem: &M, address: GuestAddress) -> Result<()> { mem.write_slice(self.as_bytes(), address)?; Ok(()) } } ================================================ FILE: src/acpi-tables/src/xsdt.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright 2023 Rivos, Inc. // // SPDX-License-Identifier: Apache-2.0 use std::mem::size_of; use vm_memory::{Address, Bytes, GuestAddress, GuestMemory}; use zerocopy::IntoBytes; use crate::{AcpiError, Result, Sdt, SdtHeader, checksum}; /// Extended System Description Table (XSDT) /// /// This table provides 64bit addresses to the rest of the ACPI tables defined by the platform /// More information about this table can be found in the ACPI specification: /// https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#extended-system-description-table-xsdt #[derive(Clone, Default, Debug)] pub struct Xsdt { header: SdtHeader, tables: Vec, } impl Xsdt { pub fn new( oem_id: [u8; 6], oem_table_id: [u8; 8], oem_revision: u32, tables: Vec, ) -> Self { let mut tables_bytes = Vec::with_capacity(8 * tables.len()); for addr in tables { tables_bytes.extend(&addr.to_le_bytes()); } let header = SdtHeader::new( *b"XSDT", (std::mem::size_of::() + tables_bytes.len()) .try_into() .unwrap(), 1, oem_id, oem_table_id, oem_revision, ); let mut xsdt = Xsdt { header, tables: tables_bytes, }; xsdt.header.checksum = checksum(&[xsdt.header.as_bytes(), (xsdt.tables.as_slice())]); xsdt } } impl Sdt for Xsdt { fn len(&self) -> usize { std::mem::size_of::() + self.tables.len() } fn write_to_guest(&mut self, mem: &M, address: GuestAddress) -> Result<()> { mem.write_slice(self.header.as_bytes(), address)?; let address = address .checked_add(size_of::() as u64) .ok_or(AcpiError::InvalidGuestAddress)?; mem.write_slice(self.tables.as_slice(), address)?; Ok(()) } } ================================================ FILE: src/clippy-tracing/Cargo.toml ================================================ [package] name = "clippy-tracing" version = "0.1.0" authors = ["Amazon Firecracker team "] edition = "2024" license = "Apache-2.0" [[bin]] name = "clippy-tracing" bench = false [dependencies] clap = { version = "4.6.0", features = ["derive"] } itertools = "0.14.0" proc-macro2 = { version = "1.0.106", features = ["span-locations"] } quote = "1.0.45" syn = { version = "2.0.117", features = ["full", "extra-traits", "visit", "visit-mut", "printing"] } walkdir = "2.5.0" [dev-dependencies] uuid = { version = "1.22.0", features = ["v4"] } [lints] workspace = true ================================================ FILE: src/clippy-tracing/README.md ================================================ # clippy-tracing A tool to add, remove and check for `log_instrument::instrument` in large projects where it is infeasible to manually add it to thousands of functions. ## Usage This is tested in the [`readme()` integration test](../clippy-tracing/tests/integration_tests.rs) . ```rust fn main() { println!("Hello World!"); } fn add(lhs: i32, rhs: i32) -> i32 { lhs + rhs } #[cfg(tests)] mod tests { fn sub(lhs: i32, rhs: i32) -> i32 { lhs - rhs } #[test] fn test_one() { assert_eq!(add(1,1), sub(2, 1)); } } ``` ```bash clippy-tracing --action check # Missing instrumentation at {path}:9:4.\n echo $? # 2 clippy-tracing --action fix echo $? # 0 ``` ```rust #[log_instrument::instrument(level = "trace", skip())] fn main() { println!("Hello World!"); } #[log_instrument::instrument(level = "trace", skip(lhs, rhs))] fn add(lhs: i32, rhs: i32) -> i32 { lhs + rhs } #[cfg(tests)] mod tests { #[log_instrument::instrument(level = "trace", skip(lhs, rhs))] fn sub(lhs: i32, rhs: i32) -> i32 { lhs - rhs } #[test] fn test_one() { assert_eq!(add(1,1), sub(2, 1)); } } ``` ```bash clippy-tracing --action check echo $? # 0 clippy-tracing --action strip echo $? # 0 ``` ```rust fn main() { println!("Hello World!"); } fn add(lhs: i32, rhs: i32) -> i32 { lhs + rhs } #[cfg(tests)] mod tests { fn sub(lhs: i32, rhs: i32) -> i32 { lhs - rhs } #[test] fn test_one() { assert_eq!(add(1,1), sub(2, 1)); } } ``` ================================================ FILE: src/clippy-tracing/src/main.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! A tool to add, remove and check for `tracing::instrument` in large projects where it is //! infeasible to manually add it to thousands of functions. use std::collections::HashMap; use std::error::Error; use std::fmt; use std::fs::OpenOptions; use std::io::{Read, Write}; use std::path::PathBuf; use std::process::ExitCode; use clap::{Parser, ValueEnum}; use syn::spanned::Spanned; use syn::visit::Visit; use walkdir::WalkDir; /// The command line arguments for the application. #[derive(Parser)] struct CommandLineArgs { /// The action to take. #[arg(long)] action: Action, /// The path to look in. #[arg(long)] path: Option, /// When adding instrumentation use a custom suffix e.g. /// `--suffix my::custom::suffix::`. /// /// The tool may be unable to strip instrumentation with an invalid suffix. #[arg(long)] suffix: Option, /// Whether to add a `cfg_attr` condition e.g. /// `#[cfg_attr(feature = "tracing", log_instrument::instrument)]` vs /// `#[log_instrument::instrument]`. #[arg(long)] cfg_attr: Option, /// Sub-paths which contain any of the strings from this list will be ignored. #[arg(long, value_delimiter = ',')] exclude: Vec, } /// The action to take. #[derive(Clone, ValueEnum)] enum Action { /// Checks `tracing::instrument` is on all functions. Check, /// Adds `tracing::instrument` to all functions. Fix, /// Removes `tracing::instrument` from all functions. Strip, } /// A list of text lines split so that newlines can be efficiently inserted between them. struct SegmentedList { /// The first new line. first: String, /// The inner vector used to contain the original lines `.0` and the new lines `.1`. inner: Vec<(String, String)>, } impl SegmentedList { /// Sets the text line before `line` to `text`. fn set_before(&mut self, line: usize, text: String) -> bool { let s = if let Some(i) = line.checked_sub(1) { let Some(mut_ref) = self.inner.get_mut(i) else { return false; }; &mut mut_ref.1 } else { &mut self.first }; *s = text; true } } impl From for String { fn from(list: SegmentedList) -> String { let iter = list .inner .into_iter() .map(|(x, y)| format!("{x}{}{y}", if y.is_empty() { "" } else { "\n" })); format!( "{}{}{}", list.first, if list.first.is_empty() { "" } else { "\n" }, itertools::intersperse(iter, String::from("\n")).collect::() ) } } /// Visitor for the `strip` action. struct StripVisitor(HashMap); impl From for String { fn from(visitor: StripVisitor) -> String { let mut vec = visitor.0.into_iter().collect::>(); vec.sort_by_key(|(i, _)| *i); itertools::intersperse(vec.into_iter().map(|(_, x)| x), String::from("\n")) .collect::() } } macro_rules! create_strip_visitor_function { ($func_name:ident, $item:ident) => { fn $func_name(&mut self, i: &syn::$item) { if let Some(instrument) = find_instrumented(&i.attrs) { let start = instrument.span().start().line - 1; let end = instrument.span().end().line; for line in start..end { self.0.remove(&line); } } self.visit_block(&i.block); } }; } impl syn::visit::Visit<'_> for StripVisitor { create_strip_visitor_function!(visit_impl_item_fn, ImplItemFn); create_strip_visitor_function!(visit_item_fn, ItemFn); } /// Visitor for the `check` action. struct CheckVisitor(Option); macro_rules! create_check_visitor_function { ($func_name:ident, $item:ident) => { fn $func_name(&mut self, i: &syn::$item) { let attr = check_attributes(&i.attrs); if !attr.instrumented && !attr.test && i.sig.constness.is_none() { self.0 = Some(i.span()); } else { self.visit_block(&i.block); } } }; } impl syn::visit::Visit<'_> for CheckVisitor { create_check_visitor_function!(visit_impl_item_fn, ImplItemFn); create_check_visitor_function!(visit_item_fn, ItemFn); } /// Visitor for the `fix` action. struct FixVisitor<'a> { /// A custom path suffix. suffix: &'a Option, /// A `cfg_attr` condition. cfg_attr: &'a Option, /// Source list: SegmentedList, } impl From> for String { fn from(visitor: FixVisitor) -> String { String::from(visitor.list) } } macro_rules! create_fix_visitor_function { ($func_name:ident, $item:ident) => { fn $func_name(&mut self, i: &syn::$item) { let attr = check_attributes(&i.attrs); if !attr.instrumented && !attr.test && i.sig.constness.is_none() { let line = i.span().start().line; let attr_string = instrument(&i.sig, self.suffix, self.cfg_attr); let indent = i.span().start().column; let indent_attr = format!("{}{attr_string}", " ".repeat(indent)); self.list.set_before(line - 1, indent_attr); } self.visit_block(&i.block); } }; } impl syn::visit::Visit<'_> for FixVisitor<'_> { create_fix_visitor_function!(visit_impl_item_fn, ImplItemFn); create_fix_visitor_function!(visit_item_fn, ItemFn); } fn instrument(sig: &syn::Signature, suffix: &Option, cfg_attr: &Option) -> String { let instr = inner_instrument(sig, suffix); if let Some(cfg_attr) = cfg_attr { format!("#[cfg_attr({cfg_attr}, {instr})]") } else { format!("#[{instr}]") } } /// Returns the instrument macro for a given function signature. fn inner_instrument(_sig: &syn::Signature, suffix: &Option) -> String { format!( "{}instrument", suffix.as_ref().map_or("log_instrument::", String::as_str) ) } /// Type to return from `main` to support returning an error then handling it. #[repr(u8)] enum Exit { /// Process completed successfully. Ok = 0, /// Process encountered an error. Error = 1, /// Process ran `check` action and found missing instrumentation. Check = 2, } #[allow(clippy::as_conversions)] impl std::process::Termination for Exit { fn report(self) -> ExitCode { ExitCode::from(self as u8) } } fn main() -> Exit { match exec() { Err(err) => { eprintln!("Error: {err}"); Exit::Error } Ok(None) => Exit::Ok, Ok(Some((path, line, column))) => { println!( "Missing instrumentation at {}:{line}:{column}.", path.display() ); Exit::Check } } } /// Error for [`exec`]. #[derive(Debug)] enum ExecError { /// Failed to read entry in file path. Entry(walkdir::Error), /// Failed to parse file path to string. String, /// Failed to open file. File(std::io::Error), /// Failed to run apply function. Apply(ApplyError), } impl fmt::Display for ExecError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Entry(entry) => write!(f, "Failed to read entry in file path: {entry}"), Self::String => write!(f, "Failed to parse file path to string."), Self::File(file) => write!(f, "Failed to open file: {file}"), Self::Apply(apply) => write!(f, "Failed to run apply function: {apply}"), } } } impl Error for ExecError {} /// Wraps functionality from `main` to support returning an error then handling it. fn exec() -> Result, ExecError> { let args = CommandLineArgs::parse(); let path = args.path.unwrap_or_else(|| PathBuf::from(".")); for entry_res in WalkDir::new(path).follow_links(true) { let entry = entry_res.map_err(ExecError::Entry)?; let entry_path = entry.into_path(); let path_str = entry_path.to_str().ok_or(ExecError::String)?; // File paths must not contain any excluded strings. let no_excluded_strings = !args.exclude.iter().any(|e| path_str.contains(e)); // The file must not be a `build.rs` file. let not_build_file = !entry_path.ends_with("build.rs"); // The file must be a `.rs` file. let is_rs_file = entry_path.extension().is_some_and(|ext| ext == "rs"); if no_excluded_strings && not_build_file && is_rs_file { let file = OpenOptions::new() .read(true) .open(&entry_path) .map_err(ExecError::File)?; let res = apply(&args.action, &args.suffix, &args.cfg_attr, file, |_| { OpenOptions::new() .write(true) .truncate(true) .open(&entry_path) }) .map_err(ExecError::Apply)?; if let Some(span) = res { return Ok(Some((entry_path, span.start().line, span.start().column))); } } } Ok(None) } /// Error for [`apply`]. #[derive(Debug)] enum ApplyError { /// Failed to read file. Read(std::io::Error), /// Failed to parse file to utf8. Utf(core::str::Utf8Error), /// Failed to parse file to syn ast. Syn(syn::parse::Error), /// Failed to get write target. Target(std::io::Error), /// Failed to write result to target. Write(std::io::Error), } impl fmt::Display for ApplyError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Read(read) => write!(f, "Failed to read file: {read}"), Self::Utf(utf) => write!(f, "Failed to parse file to utf8: {utf}"), Self::Syn(syn) => write!(f, "Failed to parse file to syn ast: {syn}"), Self::Target(target) => write!(f, "Failed to get write target: {target}"), Self::Write(write) => write!(f, "Failed to write result to target: {write}"), } } } impl Error for ApplyError {} /// Apply the given action to the given source and outputs the result to the target produced by the /// given closure. fn apply( action: &Action, suffix: &Option, cfg_attr: &Option, mut source: R, target: impl Fn(R) -> Result, ) -> Result, ApplyError> { let mut buf = Vec::new(); source.read_to_end(&mut buf).map_err(ApplyError::Read)?; let text = core::str::from_utf8(&buf).map_err(ApplyError::Utf)?; let ast = syn::parse_file(text).map_err(ApplyError::Syn)?; match action { Action::Strip => { let mut visitor = StripVisitor( text.split('\n') .enumerate() .map(|(i, x)| (i, String::from(x))) .collect(), ); visitor.visit_file(&ast); let out = String::from(visitor); target(source) .map_err(ApplyError::Target)? .write_all(out.as_bytes()) .map_err(ApplyError::Write)?; Ok(None) } Action::Check => { let mut visitor = CheckVisitor(None); visitor.visit_file(&ast); Ok(visitor.0) } Action::Fix => { let mut visitor = FixVisitor { suffix, cfg_attr, list: SegmentedList { first: String::new(), inner: text .split('\n') .map(|x| (String::from(x), String::new())) .collect(), }, }; visitor.visit_file(&ast); let out = String::from(visitor); target(source) .map_err(ApplyError::Target)? .write_all(out.as_bytes()) .map_err(ApplyError::Write)?; Ok(None) } } } /// Finds the `#[instrument]` attribute on a function. fn find_instrumented(attrs: &[syn::Attribute]) -> Option<&syn::Attribute> { attrs.iter().find(|a| is_instrumented(a).is_some()) } /// Checks if a `syn::Attribute` is `#[instrument]`. fn is_instrumented(attr: &syn::Attribute) -> Option<&syn::Attribute> { match &attr.meta { syn::Meta::List(syn::MetaList { path, tokens, .. }) => { // `#[instrument]` let instrumented = matches!(path.segments.last(), Some(syn::PathSegment { ident, .. }) if ident == "instrument"); // `#[cfg_attr(.. , instrument)]` let attr_instrumented = matches!(path.segments.last(), Some(syn::PathSegment { ident, .. }) if ident == "cfg_attr") && tokens.clone().into_iter().any(|token| matches!(token, proc_macro2::TokenTree::Ident(ident) if ident == "instrument")); (instrumented || attr_instrumented).then_some(attr) } syn::Meta::Path(syn::Path { segments, .. }) => { let x = matches!(segments.last(), Some(syn::PathSegment { ident, .. }) if ident == "instrument"); x.then_some(attr) } syn::Meta::NameValue(_) => None, } } /// The description of attributes on a function signature we care about. struct Desc { /// Does the function have the `#[tracing::instrument]` attribute macro? instrumented: bool, /// Does the function have the `#[test]` attribute macro? test: bool, } // A function is considered instrumented if it has the `#[instrument]` attribute or the `#[test]` // attribute. /// Returns a tuple where the 1st element is whether `tracing::instrument` is found in the list of /// attributes and the 2nd is whether `clippy_tracing_attributes::skip` is found in the list of /// attributes. fn check_attributes(attrs: &[syn::Attribute]) -> Desc { let mut instrumented = false; let mut test = false; for attr in attrs { // Match `#[instrument]` and `#[cfg_attr(.., instrument)]`. if is_instrumented(attr).is_some() { instrumented = true; } // Match `#[test]` or `#[kani::proof]`. if match &attr.meta { syn::Meta::List(syn::MetaList { path, .. }) => { matches!(path.segments.last(), Some(syn::PathSegment { ident, .. }) if ident == "proof") } syn::Meta::Path(syn::Path { segments, .. }) => { matches!(segments.last(), Some(syn::PathSegment { ident, .. }) if ident == "test" || ident == "proof") } syn::Meta::NameValue(_) => false, } { test = true; } } Desc { instrumented, test } } ================================================ FILE: src/clippy-tracing/tests/integration_tests.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // Allow test functions outside of test modules #![allow(clippy::tests_outside_test_module)] use std::fs::{OpenOptions, remove_file}; use std::io::{Read, Write}; use std::process::Command; use uuid::Uuid; const BINARY: &str = env!("CARGO_BIN_EXE_clippy-tracing"); fn setup(text: &str) -> String { let id = Uuid::new_v4(); let path = format!("/tmp/{id}.rs"); let mut file = OpenOptions::new() .create(true) .truncate(true) .read(false) .write(true) .open(&path) .unwrap(); file.write_all(text.as_bytes()).unwrap(); path } fn check_file(text: &str, path: &str) { let mut file = OpenOptions::new() .create(false) .read(true) .write(false) .open(path) .unwrap(); let mut buffer = String::new(); file.read_to_string(&mut buffer).unwrap(); assert_eq!(text, buffer); } fn fix(given: &str, expected: &str, cfg_attr: Option<&'static str>) { let path = setup(given); let output = if let Some(cfg_attr) = cfg_attr { Command::new(BINARY) .args(["--action", "fix", "--path", &path, "--cfg-attr", cfg_attr]) .output() .unwrap() } else { Command::new(BINARY) .args(["--action", "fix", "--path", &path]) .output() .unwrap() }; assert_eq!(std::str::from_utf8(&output.stdout).unwrap(), ""); assert_eq!(std::str::from_utf8(&output.stderr).unwrap(), ""); assert_eq!(output.status.code(), Some(0)); check_file(expected, &path); remove_file(path).unwrap(); } fn strip(given: &str, expected: &str) { let path = setup(given); let output = Command::new(BINARY) .args(["--action", "strip", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); check_file(expected, &path); remove_file(path).unwrap(); } #[test] fn exec_error() { // Create file path for a file that doesn't exist. let id = Uuid::new_v4(); let path = format!("/tmp/{id}.rs"); let output = Command::new(BINARY) .args(["--action", "check", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(1)); assert_eq!(output.stdout, []); let expected_stderr = format!( "Error: Failed to read entry in file path: IO error for operation on {path}: No such file \ or directory (os error 2)\n" ); assert_eq!(output.stderr, expected_stderr.as_bytes()); } #[test] fn fix_one() { const GIVEN: &str = "fn main() { }\nfn add(lhs: i32, rhs: i32) {\n lhs + rhs\n}"; const EXPECTED: &str = "#[log_instrument::instrument]\nfn main() { \ }\n#[log_instrument::instrument]\nfn add(lhs: i32, rhs: i32) {\n \ lhs + rhs\n}"; fix(GIVEN, EXPECTED, None); } #[test] fn fix_two() { const GIVEN: &str = "impl Unit {\n fn one() {}\n}"; const EXPECTED: &str = "impl Unit {\n #[log_instrument::instrument]\n fn one() {}\n}"; fix(GIVEN, EXPECTED, None); } #[test] fn fix_three() { const GIVEN: &str = "impl Unit {\n fn one() {}\n}"; const EXPECTED: &str = "impl Unit {\n #[cfg_attr(feature = \"tracing\", \ log_instrument::instrument)]\n fn one() {}\n}"; fix(GIVEN, EXPECTED, Some("feature = \"tracing\"")); } #[test] fn check_one() { const GIVEN: &str = "fn main() { }"; let path = setup(GIVEN); let output = Command::new(BINARY) .args(["--action", "check", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(2)); let expected_stdout = format!("Missing instrumentation at {path}:1:0.\n"); assert_eq!(output.stdout, expected_stdout.as_bytes()); assert_eq!(output.stderr, []); remove_file(path).unwrap(); } #[test] fn check_two() { const GIVEN: &str = "#[log_instrument::instrument]\nfn main() { }\n#[test]\nfn my_test() { }"; let path: String = setup(GIVEN); let output = Command::new(BINARY) .args(["--action", "check", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); remove_file(path).unwrap(); } #[test] fn check_three() { const GIVEN: &str = "impl Unit {\n #[cfg_attr(feature = \"tracing\", \ tracing::instrument(level = \"trace\", skip()))]\n fn one() {}\n}"; let path = setup(GIVEN); let output = Command::new(BINARY) .args([ "--action", "check", "--path", &path, "--cfg-attr", "feature = \"tracing\"", ]) .output() .unwrap(); assert_eq!(std::str::from_utf8(&output.stdout).unwrap(), ""); assert_eq!(std::str::from_utf8(&output.stderr).unwrap(), ""); assert_eq!(output.status.code(), Some(0)); remove_file(path).unwrap(); } #[test] fn strip_one() { const GIVEN: &str = "#[log_instrument::instrument]\nfn main() { }"; const EXPECTED: &str = "fn main() { }"; strip(GIVEN, EXPECTED); } #[test] fn strip_two() { const GIVEN: &str = "#[log_instrument::instrument]\nfn main() { }"; const EXPECTED: &str = "fn main() { }"; strip(GIVEN, EXPECTED); } #[test] fn strip_three() { const EXPECTED: &str = "impl Unit {\n fn one() {}\n}"; const GIVEN: &str = "impl Unit {\n #[log_instrument::instrument]\n fn one() {}\n}"; strip(GIVEN, EXPECTED); } #[test] fn exclude() { const GIVEN: &str = "fn main() { }\nfn add(lhs: i32, rhs: i32) {\n lhs + rhs\n}"; const EXPECTED: &str = "#[log_instrument::instrument]\nfn main() { \ }\n#[log_instrument::instrument]\nfn add(lhs: i32, rhs: i32) {\n \ lhs + rhs\n}"; let dir_path = format!("/tmp/{}", Uuid::new_v4()); std::fs::create_dir(&dir_path).unwrap(); dbg!(&dir_path); let file_path_one = format!("{dir_path}/{}.rs", Uuid::new_v4()); let file_path_two = format!("{dir_path}/{}.rs", Uuid::new_v4()); dbg!(&file_path_one); dbg!(&file_path_two); let mut file_one = OpenOptions::new() .create(true) .truncate(true) .read(false) .write(true) .open(&file_path_one) .unwrap(); file_one.write_all(GIVEN.as_bytes()).unwrap(); let mut file_two = OpenOptions::new() .create(true) .truncate(true) .read(false) .write(true) .open(&file_path_two) .unwrap(); file_two.write_all(GIVEN.as_bytes()).unwrap(); let output = Command::new(BINARY) .args([ "--action", "fix", "--path", &dir_path, "--exclude", &file_path_two, ]) .output() .unwrap(); assert_eq!(std::str::from_utf8(&output.stdout).unwrap(), ""); assert_eq!(std::str::from_utf8(&output.stderr).unwrap(), ""); assert_eq!(output.status.code(), Some(0)); check_file(EXPECTED, &file_path_one); check_file(GIVEN, &file_path_two); remove_file(file_path_one).unwrap(); remove_file(file_path_two).unwrap(); std::fs::remove_dir(dir_path).unwrap(); } #[test] fn readme() { const GIVEN: &str = r#"fn main() { println!("Hello World!"); } fn add(lhs: i32, rhs: i32) -> i32 { lhs + rhs } #[cfg(tests)] mod tests { fn sub(lhs: i32, rhs: i32) -> i32 { lhs - rhs } #[test] fn test_one() { assert_eq!(add(1,1), sub(2, 1)); } }"#; let path: String = setup(GIVEN); // Check let output = Command::new(BINARY) .args(["--action", "check", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(2)); let missing = format!("Missing instrumentation at {path}:9:4.\n"); assert_eq!(output.stdout, missing.as_bytes()); assert_eq!(output.stderr, []); const EXPECTED: &str = r#"#[log_instrument::instrument] fn main() { println!("Hello World!"); } #[log_instrument::instrument] fn add(lhs: i32, rhs: i32) -> i32 { lhs + rhs } #[cfg(tests)] mod tests { #[log_instrument::instrument] fn sub(lhs: i32, rhs: i32) -> i32 { lhs - rhs } #[test] fn test_one() { assert_eq!(add(1,1), sub(2, 1)); } }"#; // Fix let output = Command::new(BINARY) .args(["--action", "fix", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); check_file(EXPECTED, &path); // Check let output = Command::new(BINARY) .args(["--action", "check", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); // Strip let output = Command::new(BINARY) .args(["--action", "strip", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); check_file(GIVEN, &path); } #[test] fn readme_empty_suffix() { const GIVEN: &str = r#"fn main() { println!("Hello World!"); } fn add(lhs: i32, rhs: i32) -> i32 { lhs + rhs } #[cfg(tests)] mod tests { fn sub(lhs: i32, rhs: i32) -> i32 { lhs - rhs } #[test] fn test_one() { assert_eq!(add(1,1), sub(2, 1)); } }"#; let path: String = setup(GIVEN); // Check let output = Command::new(BINARY) .args(["--action", "check", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(2)); let missing = format!("Missing instrumentation at {path}:9:4.\n"); assert_eq!(output.stdout, missing.as_bytes()); assert_eq!(output.stderr, []); const EXPECTED: &str = r#"#[instrument] fn main() { println!("Hello World!"); } #[instrument] fn add(lhs: i32, rhs: i32) -> i32 { lhs + rhs } #[cfg(tests)] mod tests { #[instrument] fn sub(lhs: i32, rhs: i32) -> i32 { lhs - rhs } #[test] fn test_one() { assert_eq!(add(1,1), sub(2, 1)); } }"#; // Fix let output = Command::new(BINARY) .args(["--action", "fix", "--suffix", "", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); check_file(EXPECTED, &path); // Check let output = Command::new(BINARY) .args(["--action", "check", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); // Strip let output = Command::new(BINARY) .args(["--action", "strip", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); check_file(GIVEN, &path); } #[test] fn readme_custom_suffix() { const GIVEN: &str = r#"fn main() { println!("Hello World!"); } fn add(lhs: i32, rhs: i32) -> i32 { lhs + rhs } #[cfg(tests)] mod tests { fn sub(lhs: i32, rhs: i32) -> i32 { lhs - rhs } #[test] fn test_one() { assert_eq!(add(1,1), sub(2, 1)); } }"#; let path: String = setup(GIVEN); // Check let output = Command::new(BINARY) .args(["--action", "check", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(2)); let missing = format!("Missing instrumentation at {path}:9:4.\n"); assert_eq!(output.stdout, missing.as_bytes()); assert_eq!(output.stderr, []); const EXPECTED: &str = r#"#[my::custom::suffix::instrument] fn main() { println!("Hello World!"); } #[my::custom::suffix::instrument] fn add(lhs: i32, rhs: i32) -> i32 { lhs + rhs } #[cfg(tests)] mod tests { #[my::custom::suffix::instrument] fn sub(lhs: i32, rhs: i32) -> i32 { lhs - rhs } #[test] fn test_one() { assert_eq!(add(1,1), sub(2, 1)); } }"#; // Fix let output = Command::new(BINARY) .args([ "--action", "fix", "--suffix", "my::custom::suffix::", "--path", &path, ]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); check_file(EXPECTED, &path); // Check let output = Command::new(BINARY) .args(["--action", "check", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); // Strip let output = Command::new(BINARY) .args(["--action", "strip", "--path", &path]) .output() .unwrap(); assert_eq!(output.status.code(), Some(0)); assert_eq!(output.stdout, []); assert_eq!(output.stderr, []); check_file(GIVEN, &path); } ================================================ FILE: src/cpu-template-helper/Cargo.toml ================================================ [package] name = "cpu-template-helper" version = "1.16.0-dev" authors = ["Amazon Firecracker team "] edition = "2024" license = "Apache-2.0" [[bin]] name = "cpu-template-helper" bench = false [features] tracing = ["log-instrument", "vmm/tracing"] [dependencies] clap = { version = "4.6.0", features = ["derive", "string"] } displaydoc = "0.2.5" libc = "0.2.183" log-instrument = { path = "../log-instrument", optional = true } serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.149" thiserror = "2.0.18" vmm = { path = "../vmm" } vmm-sys-util = "0.15.0" [lints] workspace = true ================================================ FILE: src/cpu-template-helper/build.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::io::Write; const MOCK_KERNEL_PATH: &str = "src/utils/mock_kernel/kernel.bin"; // Kernel header for aarch64 that comes from the kernel doc Documentation/arm64/booting.txt. #[derive(Default)] #[repr(C, packed)] struct KernelHeader { code0: u32, // Executable code code1: u32, // Executable code text_offset: u64, // Image load offset, image_size: u64, // Effective Image size, little endian flags: u64, // kernel flags, little endian res2: u64, // reserved res3: u64, // reserved res4: u64, // reserved magic: u32, // Magic number, little endian, "ARM\x64" res5: u32, // reserved (used for PE COFF offset) } fn main() { if cfg!(target_arch = "x86_64") { println!("cargo:rerun-if-changed=src/utils/mock_kernel/main.c"); let status = std::process::Command::new("gcc") .args([ // Do not use the standard system startup files or libraries when linking. "-nostdlib", // Prevents linking with the shared libraries. "-static", // Do not generate unwind tables. "-fno-asynchronous-unwind-tables", // Remove all symbol table and relocation information. "-s", "-o", MOCK_KERNEL_PATH, "src/utils/mock_kernel/main.c", ]) .status() .expect("Failed to execute gcc command"); if !status.success() { panic!("Failed to compile mock kernel"); } } else if cfg!(target_arch = "aarch64") { let header = KernelHeader { magic: 0x644D5241, ..std::default::Default::default() }; // SAFETY: This is safe as long as `header` is valid as `KernelHeader`. let header_bytes = unsafe { std::slice::from_raw_parts( (&header as *const KernelHeader).cast::(), std::mem::size_of::(), ) }; let mut file = std::fs::File::create(MOCK_KERNEL_PATH).expect("Failed to create a file"); file.write_all(header_bytes) .expect("Failed to write kernel header to a file"); } else { panic!("Unsupported arch"); } } ================================================ FILE: src/cpu-template-helper/src/fingerprint/compare.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use serde::Serialize; use crate::fingerprint::{Fingerprint, FingerprintField}; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum FingerprintCompareError { /// Difference detected between source and target: {0} DiffDetected(String), /// Failed to serialize/deserialize JSON: {0} Serde(#[from] serde_json::Error), } #[derive(Serialize)] struct Diff<'a, T: Serialize> { name: String, prev: &'a T, curr: &'a T, } pub fn compare( prev: Fingerprint, curr: Fingerprint, filters: Vec, ) -> Result<(), FingerprintCompareError> { let compare = |field: &FingerprintField, val1, val2| -> Option> { if val1 != val2 { let diff = Diff { name: format!("{field:#?}"), prev: val1, curr: val2, }; Some(serde_json::to_string_pretty(&diff)) } else { None } }; let results = filters .into_iter() .filter_map(|filter| { match filter { FingerprintField::firecracker_version => compare( &filter, &prev.firecracker_version, &curr.firecracker_version, ), FingerprintField::kernel_version => { compare(&filter, &prev.kernel_version, &curr.kernel_version) } FingerprintField::microcode_version => { compare(&filter, &prev.microcode_version, &curr.microcode_version) } FingerprintField::bios_version => { compare(&filter, &prev.bios_version, &curr.bios_version) } FingerprintField::bios_revision => { compare(&filter, &prev.bios_revision, &curr.bios_revision) } FingerprintField::guest_cpu_config => { if prev.guest_cpu_config != curr.guest_cpu_config { let cpu_configs = vec![prev.guest_cpu_config.clone(), curr.guest_cpu_config.clone()]; // This `strip()` call always succeed since the number of inputs is two. let cpu_configs = crate::template::strip::strip(cpu_configs).unwrap(); let diff = Diff { name: format!("{filter:#?}"), prev: &cpu_configs[0], curr: &cpu_configs[1], }; Some(serde_json::to_string_pretty(&diff)) } else { None } } } }) .collect::, serde_json::Error>>()?; if results.is_empty() { Ok(()) } else { Err(FingerprintCompareError::DiffDetected(format!( "\n{}", results.join("\n") ))) } } #[cfg(test)] mod tests { use clap::ValueEnum; use vmm::cpu_config::templates::CustomCpuTemplate; use super::*; fn build_sample_fingerprint() -> Fingerprint { Fingerprint { firecracker_version: crate::utils::CPU_TEMPLATE_HELPER_VERSION.to_string(), kernel_version: "sample_kernel_version".to_string(), microcode_version: "sample_microcode_version".to_string(), bios_version: "sample_bios_version".to_string(), bios_revision: "sample_bios_revision".to_string(), guest_cpu_config: CustomCpuTemplate::default(), } } #[test] fn test_compare_same_fingerprints() { // Compare two identical fingerprints and verify `Ok` is returned. let f1 = build_sample_fingerprint(); let f2 = build_sample_fingerprint(); let filters = FingerprintField::value_variants().to_vec(); compare(f1, f2, filters).unwrap(); } #[test] #[rustfmt::skip] fn test_compare_different_fingerprints() { // Compare two fingerprints that different on `kernel_version` and `microcode_version` with // a filter of `kernel_version`, and verify that `Err` is returned and only `kernel_version` // change detected. let f1 = build_sample_fingerprint(); let mut f2 = build_sample_fingerprint(); f2.kernel_version = "different_kernel_version".to_string(); f2.microcode_version = "different_microcode_version".to_string(); let filters = vec![FingerprintField::kernel_version]; let result = compare(f1, f2, filters); match result { Err(FingerprintCompareError::DiffDetected(err)) => { assert_eq!( err, "\n{\ \n \"name\": \"kernel_version\",\ \n \"prev\": \"sample_kernel_version\",\ \n \"curr\": \"different_kernel_version\"\ \n}" .to_string() ); } _ => panic!("Should detect difference of `kernel_version`"), } } } ================================================ FILE: src/cpu-template-helper/src/fingerprint/dump.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fs::read_to_string; use std::sync::{Arc, Mutex}; use vmm::Vmm; use crate::fingerprint::Fingerprint; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum FingerprintDumpError { /// Failed to dump CPU config: {0} DumpCpuConfig(#[from] crate::template::dump::DumpError), /// Failed to read {0}: {1} ReadSysfsFile(String, std::io::Error), /// Failed to get kernel version: {0} GetKernelVersion(std::io::Error), } pub fn dump(vmm: Arc>) -> Result { Ok(Fingerprint { firecracker_version: crate::utils::CPU_TEMPLATE_HELPER_VERSION.to_string(), kernel_version: get_kernel_version()?, #[cfg(target_arch = "x86_64")] microcode_version: read_sysfs_file("/sys/devices/system/cpu/cpu0/microcode/version")?, #[cfg(target_arch = "aarch64")] microcode_version: read_sysfs_file( "/sys/devices/system/cpu/cpu0/regs/identification/revidr_el1", )?, bios_version: read_sysfs_file("/sys/devices/virtual/dmi/id/bios_version")?, bios_revision: read_sysfs_file("/sys/devices/virtual/dmi/id/bios_release")?, guest_cpu_config: crate::template::dump::dump(vmm)?, }) } fn get_kernel_version() -> Result { // SAFETY: An all-zeroed value for `libc::utsname` is valid. let mut name: libc::utsname = unsafe { std::mem::zeroed() }; // SAFETY: The passed arg is a valid mutable reference of `libc::utsname`. let ret = unsafe { libc::uname(&mut name) }; if ret < 0 { return Err(FingerprintDumpError::GetKernelVersion( std::io::Error::last_os_error(), )); } // SAFETY: The fields of `libc::utsname` are terminated by a null byte ('\0'). // https://man7.org/linux/man-pages/man2/uname.2.html let c_str = unsafe { std::ffi::CStr::from_ptr(name.release.as_ptr()) }; // SAFETY: The `release` field is an array of `char` in C, in other words, ASCII. let version = c_str.to_str().unwrap(); Ok(version.to_string()) } fn read_sysfs_file(path: &str) -> Result { let s = read_to_string(path) .map_err(|err| FingerprintDumpError::ReadSysfsFile(path.to_string(), err))?; Ok(s.trim_end_matches('\n').to_string()) } #[cfg(test)] mod tests { use super::*; #[test] fn test_get_kernel_version() { // `get_kernel_version()` should always succeed. get_kernel_version().unwrap(); } #[test] fn test_read_valid_sysfs_file() { // The sysfs file for microcode version should exist and be read. let valid_sysfs_path = "/sys/devices/virtual/dmi/id/bios_version"; read_sysfs_file(valid_sysfs_path).unwrap(); } #[test] fn test_read_invalid_sysfs_file() { let invalid_sysfs_path = "/sys/invalid/path"; if read_sysfs_file(invalid_sysfs_path).is_ok() { panic!("Should fail with `No such file or directory`"); } } } ================================================ FILE: src/cpu-template-helper/src/fingerprint/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use serde::{Deserialize, Serialize}; use vmm::cpu_config::templates::CustomCpuTemplate; pub mod compare; pub mod dump; macro_rules! declare_fingerprint_struct_and_enum { ($($field_name:ident : $field_type:ty),+) => { #[derive(Debug, Serialize, Deserialize)] pub struct Fingerprint { $(pub $field_name: $field_type),+ } #[allow(non_camel_case_types)] #[derive(clap::ValueEnum, Clone, Debug)] #[value(rename_all = "snake_case")] pub enum FingerprintField { $($field_name),+ } }; } // This macro is expanded as follows: // ```rs // #[derive(Serialize, Deserialize)] // pub struct Fingerprint { // pub firecracker_version: String, // pub kernel_version: String, // pub microcode_version: String, // pub bios_version: String, // pub bios_revision: String, // pub guest_cpu_config: CustomCpuTemplate, // } // // #[allow(non_camel_case_types)] // #[derive(clap::ValueEnum, Clone, Debug)] // #[value(rename_all = "snake_case")] // pub enum FingerprintField { // firecracker_version, // kernel_version, // microcode_version, // bios_version, // bios_revision, // guest_cpu_config, // } // ``` declare_fingerprint_struct_and_enum!( firecracker_version: String, kernel_version: String, microcode_version: String, bios_version: String, bios_revision: String, guest_cpu_config: CustomCpuTemplate ); ================================================ FILE: src/cpu-template-helper/src/main.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fs::{read_to_string, write}; use std::path::PathBuf; use clap::{Parser, Subcommand, ValueEnum}; use vmm::cpu_config::templates::{GetCpuTemplate, GetCpuTemplateError}; mod fingerprint; mod template; mod utils; #[derive(Debug, thiserror::Error, displaydoc::Display)] enum HelperError { /// Failed to operate file: {0} FileIo(#[from] std::io::Error), /// {0} FingerprintCompare(#[from] fingerprint::compare::FingerprintCompareError), /// {0} FingerprintDump(#[from] fingerprint::dump::FingerprintDumpError), /// CPU template is not specified: {0} NoCpuTemplate(#[from] GetCpuTemplateError), /// Failed to serialize/deserialize JSON file: {0} Serde(#[from] serde_json::Error), /// {0} Utils(#[from] utils::UtilsError), /// {0} TemplateDump(#[from] template::dump::DumpError), /// {0} TemplateStrip(#[from] template::strip::StripError), /// {0} TemplateVerify(#[from] template::verify::VerifyError), } #[derive(Debug, Parser)] #[command(version = format!("v{}", crate::utils::CPU_TEMPLATE_HELPER_VERSION))] struct Cli { #[command(subcommand)] command: Command, } #[derive(Debug, Subcommand)] enum Command { /// Template-related operations #[command(subcommand)] Template(TemplateOperation), /// Fingerprint-related operations #[command(subcommand)] Fingerprint(FingerprintOperation), } #[derive(Debug, Subcommand)] enum TemplateOperation { /// Dump guest CPU configuration in the custom CPU template format. Dump { /// Path of firecracker config file. #[arg(short, long, value_name = "PATH")] config: Option, /// Path of CPU template to apply. #[arg(short, long, value_name = "PATH")] template: Option, /// Path of output file. #[arg(short, long, value_name = "PATH", default_value = "cpu_config.json")] output: PathBuf, }, /// Strip entries shared between multiple CPU template files. Strip { /// List of paths of input CPU configuration files. #[arg(short, long, value_name = "PATH", num_args = 2..)] paths: Vec, /// Suffix of output files. To overwrite input files, specify an empty string ''. #[arg(short, long, default_value = "_stripped")] suffix: String, }, /// Verify that the given CPU template file is applied as intended. Verify { /// Path of firecracker config file. #[arg(short, long, value_name = "PATH")] config: Option, /// Path of the target CPU template. #[arg(short, long, value_name = "PATH")] template: Option, }, } #[derive(Debug, Subcommand)] enum FingerprintOperation { /// Dump fingerprint consisting of host-related information and guest CPU config. Dump { /// Path of firecracker config file. #[arg(short, long, value_name = "PATH")] config: Option, /// Path of CPU template to apply. #[arg(short, long, value_name = "PATH")] template: Option, /// Path of output file. #[arg(short, long, value_name = "PATH", default_value = "fingerprint.json")] output: PathBuf, }, /// Compare two fingerprint files with queries. Compare { /// Path of fingerprint file that stores the previous state at CPU template creation. #[arg(short, long, value_name = "PATH")] prev: PathBuf, /// Path of fingerprint file that stores the current state. #[arg(short, long, value_name = "PATH")] curr: PathBuf, /// List of fields to be compared. #[arg( short, long, value_enum, num_args = 1.., default_values_t = fingerprint::FingerprintField::value_variants() )] filters: Vec, }, } fn run(cli: Cli) -> Result<(), HelperError> { match cli.command { Command::Template(op) => match op { TemplateOperation::Dump { config, template, output, } => { let config = config.map(read_to_string).transpose()?; let template = template .as_ref() .map(utils::load_cpu_template) .transpose()?; let (vmm, _) = utils::build_microvm_from_config(config, template)?; let cpu_config = template::dump::dump(vmm)?; let cpu_config_json = serde_json::to_string_pretty(&cpu_config)?; write(output, cpu_config_json)?; } TemplateOperation::Strip { paths, suffix } => { let templates = paths .iter() .map(utils::load_cpu_template) .collect::, utils::UtilsError>>()?; let stripped_templates = template::strip::strip(templates)?; for (path, template) in paths.into_iter().zip(stripped_templates.into_iter()) { let path = utils::add_suffix(&path, &suffix); let template_json = serde_json::to_string_pretty(&template)?; write(path, template_json)?; } } TemplateOperation::Verify { config, template } => { let config = config.map(read_to_string).transpose()?; let template = template .as_ref() .map(utils::load_cpu_template) .transpose()?; let (vmm, vm_resources) = utils::build_microvm_from_config(config, template)?; let cpu_template = vm_resources .machine_config .cpu_template .get_cpu_template()? .into_owned(); let cpu_config = template::dump::dump(vmm)?; template::verify::verify(cpu_template, cpu_config)?; } }, Command::Fingerprint(op) => match op { FingerprintOperation::Dump { config, template, output, } => { let config = config.map(read_to_string).transpose()?; let template = template .as_ref() .map(utils::load_cpu_template) .transpose()?; let (vmm, _) = utils::build_microvm_from_config(config, template)?; let fingerprint = fingerprint::dump::dump(vmm)?; let fingerprint_json = serde_json::to_string_pretty(&fingerprint)?; write(output, fingerprint_json)?; } FingerprintOperation::Compare { prev, curr, filters, } => { let prev_json = read_to_string(prev)?; let prev = serde_json::from_str(&prev_json)?; let curr_json = read_to_string(curr)?; let curr = serde_json::from_str(&curr_json)?; fingerprint::compare::compare(prev, curr, filters)?; } }, } Ok(()) } fn main() -> std::process::ExitCode { let cli = Cli::parse(); let result = run(cli); if let Err(e) = result { eprintln!("{}", e); std::process::ExitCode::FAILURE } else { std::process::ExitCode::SUCCESS } } #[cfg(test)] mod tests { use std::io::Write; use vmm_sys_util::tempfile::TempFile; use super::*; // Sample modifiers for x86_64 that should work correctly as a CPU template and a guest CPU // config. // * CPUID leaf 0x0 / subleaf 0x0 / register eax indicates the maximum input EAX value for basic // CPUID information. // * MSR index 0x4b564d00 indicates MSR_KVM_WALL_CLOCK_NEW. #[cfg(target_arch = "x86_64")] const SAMPLE_MODIFIERS: &str = r#" { "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000001" } ] } ], "msr_modifiers": [ { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" } ] }"#; // Sample modifiers for aarch64 that should work correctly as a CPU template and a guest CPU // config. // * Register ID 0x6030000000100002 indicates X1 register. #[cfg(target_arch = "aarch64")] const SAMPLE_MODIFIERS: &str = r#" { "reg_modifiers": [ { "addr": "0x6030000000100002", "bitmap": "0b00000001" } ] }"#; // Build a sample custom CPU template. fn generate_sample_template() -> TempFile { let file = TempFile::new().unwrap(); file.as_file() .write_all(SAMPLE_MODIFIERS.as_bytes()) .unwrap(); file } // Build a sample fingerprint file. fn generate_sample_fingerprint() -> TempFile { let fingerprint = fingerprint::Fingerprint { firecracker_version: crate::utils::CPU_TEMPLATE_HELPER_VERSION.to_string(), kernel_version: "sample_kernel_version".to_string(), microcode_version: "sample_microcode_version".to_string(), bios_version: "sample_bios_version".to_string(), bios_revision: "sample_bios_revision".to_string(), guest_cpu_config: serde_json::from_str(SAMPLE_MODIFIERS).unwrap(), }; let file = TempFile::new().unwrap(); file.as_file() .write_all( serde_json::to_string_pretty(&fingerprint) .unwrap() .as_bytes(), ) .unwrap(); file } #[test] fn test_template_dump_command() { let output_file = TempFile::new().unwrap(); let args = vec![ "cpu-template-helper", "template", "dump", "--output", output_file.as_path().to_str().unwrap(), ]; let cli = Cli::parse_from(args); run(cli).unwrap(); } #[test] fn test_template_strip_command() { let files = [generate_sample_template(), generate_sample_template()]; let mut args = vec!["cpu-template-helper", "template", "strip", "-p"]; let paths = files .iter() .map(|file| file.as_path().to_str().unwrap()) .collect::>(); args.extend(paths); let cli = Cli::parse_from(args); run(cli).unwrap(); } #[test] fn test_template_verify_command() { let template_file = generate_sample_template(); let args = vec![ "cpu-template-helper", "template", "verify", "--template", template_file.as_path().to_str().unwrap(), ]; let cli = Cli::parse_from(args); run(cli).unwrap(); } #[test] fn test_fingerprint_dump_command() { let output_file = TempFile::new().unwrap(); let args = vec![ "cpu-template-helper", "fingerprint", "dump", "--output", output_file.as_path().to_str().unwrap(), ]; let cli = Cli::parse_from(args); run(cli).unwrap(); } #[test] fn test_fingerprint_compare_command() { let fingerprint_file1 = generate_sample_fingerprint(); let fingerprint_file2 = generate_sample_fingerprint(); let filters = fingerprint::FingerprintField::value_variants() .iter() .map(|variant| variant.to_possible_value().unwrap().get_name().to_string()) .collect::>(); let mut args = vec![ "cpu-template-helper", "fingerprint", "compare", "--prev", fingerprint_file1.as_path().to_str().unwrap(), "--curr", fingerprint_file2.as_path().to_str().unwrap(), "--filters", ]; for filter in &filters { args.push(filter); } let cli = Cli::parse_from(args); run(cli).unwrap(); } } ================================================ FILE: src/cpu-template-helper/src/template/dump/aarch64.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::arch::aarch64::regs::{PC, RegSize, SYS_CNTPCT_EL0, SYS_CNTV_CVAL_EL0}; use vmm::cpu_config::aarch64::custom_cpu_template::RegisterModifier; use vmm::cpu_config::templates::{CpuConfiguration, CustomCpuTemplate, RegisterValueFilter}; use vmm::logger::warn; use crate::utils::aarch64::reg_modifier; pub fn config_to_template(cpu_config: &CpuConfiguration) -> CustomCpuTemplate { let mut reg_modifiers: Vec = cpu_config .regs .iter() .filter_map(|reg| match reg.size() { RegSize::U32 => Some(reg_modifier!(reg.id, u128::from(reg.value::()))), RegSize::U64 => Some(reg_modifier!(reg.id, u128::from(reg.value::()))), RegSize::U128 => Some(reg_modifier!(reg.id, reg.value::())), _ => { warn!( "Only 32, 64 and 128 bit wide registers are supported in cpu templates. \ Skipping: {:#x}", reg.id ); None } }) .collect(); reg_modifiers.retain(|modifier| !REG_EXCLUSION_LIST.contains(&modifier.addr)); reg_modifiers.sort_by_key(|modifier| modifier.addr); CustomCpuTemplate { reg_modifiers, ..Default::default() } } // List of register IDs excluded from the CPU configuration dump. const REG_EXCLUSION_LIST: [u64; 3] = [ // SYS_CNTV_CVAL_EL0 and SYS_CNTPCT_EL0 are timer registers and depend on the elapsed time. // This type of registers are not useful as guest CPU config dump. SYS_CNTV_CVAL_EL0, SYS_CNTPCT_EL0, // Program counter (PC) value is determined by the given kernel image. It should not be // overwritten by a custom CPU template and does not need to be tracked in a fingerprint file. PC, ]; #[cfg(test)] mod tests { use vmm::arch::aarch64::regs::{Aarch64RegisterRef, Aarch64RegisterVec, reg_size}; use super::*; // These are used as IDs to satisfy requirenments // of `Aarch64RegisterRef::new` const KVM_REG_SIZE_U32: u64 = 0x0020000000000000; const KVM_REG_SIZE_U64: u64 = 0x0030000000000000; const KVM_REG_SIZE_U128: u64 = 0x0040000000000000; const KVM_REG_SIZE_U256: u64 = 0x0050000000000000; const KVM_REG_SIZE_U512: u64 = 0x0060000000000000; const KVM_REG_SIZE_U1024: u64 = 0x0070000000000000; const KVM_REG_SIZE_U2048: u64 = 0x0080000000000000; fn build_sample_regs() -> Aarch64RegisterVec { let mut v = Aarch64RegisterVec::default(); v.push(Aarch64RegisterRef::new( KVM_REG_SIZE_U128, &0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff_u128.to_le_bytes(), )); v.push(Aarch64RegisterRef::new( KVM_REG_SIZE_U32, &0x0000_ffff_u32.to_le_bytes(), )); v.push(Aarch64RegisterRef::new( KVM_REG_SIZE_U64, &0x0000_ffff_0000_ffff_u64.to_le_bytes(), )); // CPU templates only supports 32, 64 and 128 bit wide registers, so the following registers // should be excluded from the result. v.push(Aarch64RegisterRef::new(KVM_REG_SIZE_U256, &[0x69; 32])); v.push(Aarch64RegisterRef::new(KVM_REG_SIZE_U512, &[0x69; 64])); v.push(Aarch64RegisterRef::new(KVM_REG_SIZE_U1024, &[0x69; 128])); v.push(Aarch64RegisterRef::new(KVM_REG_SIZE_U2048, &[0x69; 256])); // The following registers should be excluded from the result. for id in REG_EXCLUSION_LIST { v.push(Aarch64RegisterRef::new(id, &vec![0; reg_size(id)])); } v } fn build_expected_reg_modifiers() -> Vec { vec![ reg_modifier!(KVM_REG_SIZE_U32, 0x0000_ffff), reg_modifier!(KVM_REG_SIZE_U64, 0x0000_ffff_0000_ffff), reg_modifier!(KVM_REG_SIZE_U128, 0xffff_ffff_ffff_ffff_ffff_ffff_ffff_ffff), ] } #[test] fn test_config_to_template() { let cpu_config = CpuConfiguration { regs: build_sample_regs(), }; let cpu_template = CustomCpuTemplate { reg_modifiers: build_expected_reg_modifiers(), ..Default::default() }; assert_eq!(config_to_template(&cpu_config), cpu_template); } } ================================================ FILE: src/cpu-template-helper/src/template/dump/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #[cfg(target_arch = "aarch64")] mod aarch64; #[cfg(target_arch = "x86_64")] mod x86_64; use std::sync::{Arc, Mutex}; use vmm::cpu_config::templates::CustomCpuTemplate; use vmm::{DumpCpuConfigError, Vmm}; #[cfg(target_arch = "aarch64")] use crate::template::dump::aarch64::config_to_template; #[cfg(target_arch = "x86_64")] use crate::template::dump::x86_64::config_to_template; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum DumpError { /// Failed to dump CPU config: {0} DumpCpuConfig(#[from] DumpCpuConfigError), } pub fn dump(vmm: Arc>) -> Result { // Get CPU configuration. let cpu_configs = vmm.lock().unwrap().dump_cpu_config()?; // Convert CPU config to CPU template. Ok(config_to_template(&cpu_configs[0])) } #[cfg(test)] mod tests { use super::*; use crate::utils::build_microvm_from_config; #[test] fn test_dump() { let (vmm, _) = build_microvm_from_config(None, None).unwrap(); dump(vmm).unwrap(); } } ================================================ FILE: src/cpu-template-helper/src/template/dump/x86_64.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::BTreeMap; use vmm::MSR_RANGE; use vmm::arch::x86_64::generated::msr_index::*; use vmm::arch::x86_64::msr::MsrRange; use vmm::cpu_config::templates::{CpuConfiguration, CustomCpuTemplate, RegisterValueFilter}; use vmm::cpu_config::x86_64::cpuid::common::get_vendor_id_from_host; use vmm::cpu_config::x86_64::cpuid::{Cpuid, VENDOR_ID_AMD}; use vmm::cpu_config::x86_64::custom_cpu_template::{ CpuidLeafModifier, CpuidRegister, CpuidRegisterModifier, RegisterModifier, }; use crate::utils::x86_64::{cpuid_leaf_modifier, cpuid_reg_modifier, msr_modifier}; /// Convert `&CpuConfiguration` to `CustomCputemplate`. pub fn config_to_template(cpu_config: &CpuConfiguration) -> CustomCpuTemplate { CustomCpuTemplate { cpuid_modifiers: cpuid_to_modifiers(&cpu_config.cpuid), msr_modifiers: msrs_to_modifier(&cpu_config.msrs), ..Default::default() } } fn cpuid_to_modifiers(cpuid: &Cpuid) -> Vec { cpuid .inner() .iter() .map(|(key, entry)| { cpuid_leaf_modifier!( key.leaf, key.subleaf, entry.flags, vec![ cpuid_reg_modifier!(CpuidRegister::Eax, entry.result.eax), cpuid_reg_modifier!(CpuidRegister::Ebx, entry.result.ebx), cpuid_reg_modifier!(CpuidRegister::Ecx, entry.result.ecx), cpuid_reg_modifier!(CpuidRegister::Edx, entry.result.edx), ] ) }) .collect() } fn msrs_to_modifier(msrs: &BTreeMap) -> Vec { let mut msrs: Vec = msrs .iter() .map(|(index, value)| msr_modifier!(*index, *value)) .collect(); msrs.retain(|modifier| !should_exclude_msr(modifier.addr)); if &get_vendor_id_from_host().unwrap() == VENDOR_ID_AMD { msrs.retain(|modifier| !should_exclude_msr_amd(modifier.addr)); } msrs.sort_by_key(|modifier| modifier.addr); msrs } // List of MSR indices excluded from the CPU configuration dump. // // MSRs that vary depending on the elapsed time (e.g., time stamp counter) are not useful, because // CPU configuration dump is used to check diff between CPU models and detect changes caused by // Firecracker/KVM/BIOS changes. // // Fireracker diables some features (e.g., PMU) and doesn't support some features (e.g., Hyper-V), // MSRs related to such features are not useful as CPU configuration dump. Excluding such MSRs // reduces maintenance cost when KVM makes change their default values. const MSR_EXCLUSION_LIST: [MsrRange; 10] = [ // - MSR_IA32_TSC (0x10): vary depending on the elapsed time. MSR_RANGE!(MSR_IA32_TSC), // - MSR_IA32_TSC_DEADLINE (0x6e0): varies depending on the elapsed time. MSR_RANGE!(MSR_IA32_TSC_DEADLINE), // Firecracker doesn't support MCE. // - MSR_IA32_MCG_STATUS (0x17a) // - MSR_IA32_MCG_EXT_CTL (0x4d0) MSR_RANGE!(MSR_IA32_MCG_STATUS), MSR_RANGE!(MSR_IA32_MCG_EXT_CTL), // - MSR_IA32_PERF_CAPABILITIES (0x345) available if CPUID.01h:ECX[15] = 1 but disabled in the // CPUID normalization process. MSR_RANGE!(MSR_IA32_PERF_CAPABILITIES), // Firecracker doesn't support PEBS (Precise Event-Based Sampling) that is part of Intel's PMU. // - MSR_IA32_PEBS_ENABLE (0x3F1) // - MSR_PEBS_DATA_CFG (0x3F2) // - MSR_IA32_DS_AREA (0x600) MSR_RANGE!(MSR_IA32_PEBS_ENABLE, 2), MSR_RANGE!(MSR_IA32_DS_AREA), // Firecracker doesn't support AMD PMU. // - MSR_K7_EVNTSELn (0xC0010000..=0xC0010003) // - MSR_K7_PERFCTRn (0xC0010004..=0xC0010007) // - MSR_F15H_PERF_CTLn & MSR_F15H_PERF_CTRn (0xC0010200..=0xC001020B) MSR_RANGE!(MSR_K7_EVNTSEL0, 4), MSR_RANGE!(MSR_K7_PERFCTR0, 4), MSR_RANGE!(MSR_F15H_PERF_CTL0, 12), ]; fn should_exclude_msr(index: u32) -> bool { MSR_EXCLUSION_LIST.iter().any(|range| range.contains(index)) } // List of MSR indices excluded from the CPU configuration dump on AMD const MSR_EXCLUSION_LIST_AMD: [MsrRange; 1] = [ // MSR_IA32_ARCH_CAPABILITIES has been emulated by KVM since kernel 5.7. // https://github.com/torvalds/linux/commit/93c380e7b528882396ca463971012222bad7d82e // https://lore.kernel.org/all/20200302235709.27467-1-sean.j.christopherson@intel.com/ // As this MSR is not available on AMD processors, Firecracker disables it explicitly by // setting 0 to CPUID.(EAX=07H,ECX=0):EDX[bit 29], and this MSR should be removed from the // dump on AMD. MSR_RANGE!(MSR_IA32_ARCH_CAPABILITIES), ]; fn should_exclude_msr_amd(index: u32) -> bool { MSR_EXCLUSION_LIST_AMD .iter() .any(|range| range.contains(index)) } #[cfg(test)] mod tests { use std::collections::BTreeMap; use vmm::cpu_config::x86_64::cpuid::{ CpuidEntry, CpuidKey, CpuidRegisters, IntelCpuid, KvmCpuidFlags, }; use super::*; fn build_sample_cpuid() -> Cpuid { Cpuid::Intel(IntelCpuid(BTreeMap::from([ ( CpuidKey { leaf: 0x0, subleaf: 0x0, }, CpuidEntry { flags: KvmCpuidFlags::EMPTY, result: CpuidRegisters { eax: 0xffff_ffff, ebx: 0x0000_ffff, ecx: 0xffff_0000, edx: 0x0000_0000, }, }, ), ( CpuidKey { leaf: 0x1, subleaf: 0x1, }, CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, result: CpuidRegisters { eax: 0xaaaa_aaaa, ebx: 0xaaaa_5555, ecx: 0x5555_aaaa, edx: 0x5555_5555, }, }, ), ]))) } fn build_expected_cpuid_modifiers() -> Vec { vec![ cpuid_leaf_modifier!( 0x0, 0x0, KvmCpuidFlags::EMPTY, vec![ cpuid_reg_modifier!(CpuidRegister::Eax, 0xffff_ffff), cpuid_reg_modifier!(CpuidRegister::Ebx, 0x0000_ffff), cpuid_reg_modifier!(CpuidRegister::Ecx, 0xffff_0000), cpuid_reg_modifier!(CpuidRegister::Edx, 0x0000_0000), ] ), cpuid_leaf_modifier!( 0x1, 0x1, KvmCpuidFlags::SIGNIFICANT_INDEX, vec![ cpuid_reg_modifier!(CpuidRegister::Eax, 0xaaaa_aaaa), cpuid_reg_modifier!(CpuidRegister::Ebx, 0xaaaa_5555), cpuid_reg_modifier!(CpuidRegister::Ecx, 0x5555_aaaa), cpuid_reg_modifier!(CpuidRegister::Edx, 0x5555_5555), ] ), ] } fn build_sample_msrs() -> BTreeMap { let mut map = BTreeMap::from([ // should be sorted in the result. (0x1, 0xffff_ffff_ffff_ffff), (0x5, 0xffff_ffff_0000_0000), (0x3, 0x0000_0000_ffff_ffff), (0x2, 0x0000_0000_0000_0000), ]); // should be excluded from the result. MSR_EXCLUSION_LIST .iter() .chain(MSR_EXCLUSION_LIST_AMD.iter()) .for_each(|range| { (range.base..(range.base + range.nmsrs)).for_each(|id| { map.insert(id, 0); }) }); map } fn build_expected_msr_modifiers() -> Vec { let mut v = vec![ msr_modifier!(0x1, 0xffff_ffff_ffff_ffff), msr_modifier!(0x2, 0x0000_0000_0000_0000), msr_modifier!(0x3, 0x0000_0000_ffff_ffff), msr_modifier!(0x5, 0xffff_ffff_0000_0000), ]; if &get_vendor_id_from_host().unwrap() != VENDOR_ID_AMD { MSR_EXCLUSION_LIST_AMD.iter().for_each(|range| { (range.base..(range.base + range.nmsrs)).for_each(|id| { v.push(msr_modifier!(id, 0)); }) }); } v } #[test] fn test_config_to_template() { let cpu_config = CpuConfiguration { cpuid: build_sample_cpuid(), msrs: build_sample_msrs(), }; let cpu_template = CustomCpuTemplate { cpuid_modifiers: build_expected_cpuid_modifiers(), msr_modifiers: build_expected_msr_modifiers(), ..Default::default() }; assert_eq!(config_to_template(&cpu_config), cpu_template); } } ================================================ FILE: src/cpu-template-helper/src/template/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod dump; pub mod strip; pub mod verify; ================================================ FILE: src/cpu-template-helper/src/template/strip/aarch64.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::cpu_config::aarch64::custom_cpu_template::RegisterModifier; use vmm::cpu_config::templates::CustomCpuTemplate; use crate::template::strip::{StripError, strip_common}; use crate::utils::aarch64::RegModifierMap; #[allow(dead_code)] pub fn strip(templates: Vec) -> Result, StripError> { // Convert `Vec` to `Vec>`. let mut reg_modifiers_maps = templates .into_iter() .map(|template| RegModifierMap::from(template.reg_modifiers).0) .collect::>(); // Remove common items. strip_common(&mut reg_modifiers_maps)?; // Convert back to `Vec`. let templates = reg_modifiers_maps .into_iter() .map(|reg_modifiers_map| { let reg_modifiers = Vec::::from(RegModifierMap(reg_modifiers_map)); CustomCpuTemplate { reg_modifiers, ..Default::default() } }) .collect(); Ok(templates) } #[cfg(test)] mod tests { use vmm::cpu_config::aarch64::custom_cpu_template::RegisterModifier; use vmm::cpu_config::templates::RegisterValueFilter; use super::*; use crate::utils::aarch64::reg_modifier; // Summary of reg modifiers: // * An addr 0x0 modifier exists in all the templates but its value is different. // * An addr 0x1 modifier exists in all the templates and its value is same. // * An addr 0x2 modifier only exist in the third template. #[rustfmt::skip] fn build_input_templates() -> Vec { vec![ CustomCpuTemplate { reg_modifiers: vec![ reg_modifier!(0x0, 0x0), reg_modifier!(0x1, 0x1), ], ..Default::default() }, CustomCpuTemplate { reg_modifiers: vec![ reg_modifier!(0x0, 0x1), reg_modifier!(0x1, 0x1), ], ..Default::default() }, CustomCpuTemplate { reg_modifiers: vec![ reg_modifier!(0x0, 0x2), reg_modifier!(0x1, 0x1), reg_modifier!(0x2, 0x1), ], ..Default::default() }, ] } #[rustfmt::skip] fn build_expected_templates() -> Vec { vec![ CustomCpuTemplate { reg_modifiers: vec![ reg_modifier!(0x0, 0x0, 0b11), ], ..Default::default() }, CustomCpuTemplate { reg_modifiers: vec![ reg_modifier!(0x0, 0x1, 0b11), ], ..Default::default() }, CustomCpuTemplate { reg_modifiers: vec![ reg_modifier!(0x0, 0x2, 0b11), reg_modifier!(0x2, 0x1), ], ..Default::default() }, ] } #[test] fn test_strip_reg_modifiers() { let input = build_input_templates(); let result = strip(input).unwrap(); let expected = build_expected_templates(); assert_eq!(result, expected); } } ================================================ FILE: src/cpu-template-helper/src/template/strip/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::HashMap; use std::fmt::Debug; use vmm::cpu_config::templates::{Numeric, RegisterValueFilter}; use crate::utils::ModifierMapKey; #[cfg(target_arch = "aarch64")] mod aarch64; #[cfg(target_arch = "aarch64")] pub use aarch64::strip; #[cfg(target_arch = "x86_64")] mod x86_64; #[cfg(target_arch = "x86_64")] pub use x86_64::strip; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum StripError { /// The number of inputs should be two or more. NumberOfInputs, } fn strip_common(maps: &mut [HashMap>]) -> Result<(), StripError> where K: ModifierMapKey + Debug, V: Numeric + Debug, { if maps.len() < 2 { return Err(StripError::NumberOfInputs); } // Initialize `common` with the cloned `maps[0]`. let mut common = maps[0].clone(); // Iterate all items included in the `common`. // Use `maps[0]` instead of `common` since the `common` is mutated in the loop. for (key, common_vf) in &maps[0] { // Hold which bits are different from the `common`'s value/filter. // `diff` remains 0 if all the filtered values in all the `maps` are same. let mut diff = V::zero(); for map in maps[1..].iter() { match map.get(key) { // Record which bits of filtered value are different from the `common` if the `key` // is found in the `map`. Some(map_vf) => { let map_filtered_value = map_vf.value & map_vf.filter; let common_filtered_value = common_vf.value & common_vf.filter; diff |= map_filtered_value ^ common_filtered_value; } // Remove the `key` from the `common` if at least one of the `maps` does not have // the `key`. None => { common.remove(key); } } } // Store the `diff` in the `common`'s `filter` if the `key` exist in all the `maps`. if let Some(common_vf) = common.get_mut(key) { common_vf.filter = diff; } } // Remove the `common` items from all the `maps`. for (key, common_vf) in common { for map in maps.iter_mut() { if common_vf.filter == V::zero() { // Remove the `key` if the filtered value is identical in all the `maps`. map.remove(&key).unwrap(); } else { // Update the `filter` with `diff`. let map_vf = map.get_mut(&key).unwrap(); map_vf.filter = map_vf.filter & common_vf.filter; } } } Ok(()) } #[cfg(test)] mod tests { use super::*; use crate::utils::tests::{MockModifierMapKey, mock_modifier}; #[test] fn test_strip_common_with_single_input() { let mut input = vec![HashMap::from([mock_modifier!(0x0, 0b0000_0000)])]; match strip_common(&mut input) { Err(StripError::NumberOfInputs) => (), _ => panic!("Should fail with `Error::NumberOfInputs`."), } } #[test] fn test_strip_common() { let mut input = vec![ HashMap::from([ mock_modifier!(0x0, 0b1111_1111, 0b1111_1111), // 0x0 => 0b1111_1111 mock_modifier!(0x1, 0b1111_1111, 0b1111_1111), // 0x1 => 0b1111_1111 mock_modifier!(0x3, 0b1111_1111, 0b1111_1111), // 0x3 => 0b1111_1111 mock_modifier!(0x4, 0b1111_1111, 0b1111_1111), // 0x4 => 0b1111_1111 mock_modifier!(0x5, 0b1111_1111, 0b1111_1111), // 0x5 => 0b1111_1111 ]), HashMap::from([ mock_modifier!(0x0, 0b1111_1111, 0b1111_1111), // 0x0 => 0b1111_1111 mock_modifier!(0x2, 0b1111_1111, 0b1111_1111), // 0x2 => 0b1111_1111 mock_modifier!(0x3, 0b0000_1111, 0b1111_1111), // 0x3 => 0b0000_1111 mock_modifier!(0x4, 0b1111_0000, 0b1111_1111), // 0x4 => 0b1111_0000 mock_modifier!(0x5, 0b1100_0000, 0b1100_1100), // 0x5 => 0b11xx_00xx ]), HashMap::from([ mock_modifier!(0x0, 0b1111_1111, 0b1111_1111), // 0x0 => 0b1111_1111 mock_modifier!(0x1, 0b1111_1111, 0b1111_1111), // 0x1 => 0b1111_1111 mock_modifier!(0x3, 0b1111_0000, 0b1111_1111), // 0x3 => 0b1111_0000 mock_modifier!(0x4, 0b1100_1100, 0b1111_1111), // 0x4 => 0b1100_1100 mock_modifier!(0x5, 0b1010_0000, 0b1111_0000), // 0x5 => 0b1010_xxxx ]), ]; let expected = vec![ HashMap::from([ mock_modifier!(0x1, 0b1111_1111, 0b1111_1111), // 0x1 => 0b1111_1111 mock_modifier!(0x3, 0b1111_1111, 0b1111_1111), // 0x3 => 0b1111_1111 mock_modifier!(0x4, 0b1111_1111, 0b0011_1111), // 0x4 => 0bxx11_1111 mock_modifier!(0x5, 0b1111_1111, 0b0111_1111), // 0x5 => 0bx111_1111 ]), HashMap::from([ mock_modifier!(0x2, 0b1111_1111, 0b1111_1111), // 0x2 => 0b1111_1111 mock_modifier!(0x3, 0b0000_1111, 0b1111_1111), // 0x3 => 0b0000_1111 mock_modifier!(0x4, 0b1111_0000, 0b0011_1111), // 0x4 => 0bxx11_0000 mock_modifier!(0x5, 0b1100_0000, 0b0100_1100), // 0x5 => 0bx1xx_00xx ]), HashMap::from([ mock_modifier!(0x1, 0b1111_1111, 0b1111_1111), // 0x1 => 0b1111_1111 mock_modifier!(0x3, 0b1111_0000, 0b1111_1111), // 0x3 => 0b1111_0000 mock_modifier!(0x4, 0b1100_1100, 0b0011_1111), // 0x4 => 0bxx00_1100 mock_modifier!(0x5, 0b1010_0000, 0b0111_0000), // 0x5 => 0bx010_xxxx ]), ]; strip_common(&mut input).unwrap(); assert_eq!(input, expected); } } ================================================ FILE: src/cpu-template-helper/src/template/strip/x86_64.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::cpu_config::templates::CustomCpuTemplate; use vmm::cpu_config::x86_64::custom_cpu_template::{CpuidLeafModifier, RegisterModifier}; use crate::template::strip::{StripError, strip_common}; use crate::utils::x86_64::{CpuidModifierMap, MsrModifierMap}; #[allow(dead_code)] pub fn strip(templates: Vec) -> Result, StripError> { // Convert `Vec` to two `Vec>` of modifiers. let (mut cpuid_modifiers_maps, mut msr_modifiers_maps): (Vec<_>, Vec<_>) = templates .into_iter() .map(|template| { ( CpuidModifierMap::from(template.cpuid_modifiers).0, MsrModifierMap::from(template.msr_modifiers).0, ) }) .unzip(); // Remove common items. strip_common(&mut cpuid_modifiers_maps)?; strip_common(&mut msr_modifiers_maps)?; // Convert back to `Vec`. let templates = cpuid_modifiers_maps .into_iter() .zip(msr_modifiers_maps) .map(|(cpuid_modifiers_map, msr_modifiers_map)| { let cpuid_modifiers = Vec::::from(CpuidModifierMap(cpuid_modifiers_map)); let msr_modifiers = Vec::::from(MsrModifierMap(msr_modifiers_map)); CustomCpuTemplate { cpuid_modifiers, msr_modifiers, ..Default::default() } }) .collect::>(); Ok(templates) } #[cfg(test)] mod tests { use vmm::cpu_config::templates::RegisterValueFilter; use vmm::cpu_config::x86_64::cpuid::KvmCpuidFlags; use vmm::cpu_config::x86_64::custom_cpu_template::CpuidRegister::*; use vmm::cpu_config::x86_64::custom_cpu_template::{ CpuidLeafModifier, CpuidRegisterModifier, RegisterModifier, }; use super::*; use crate::utils::x86_64::{cpuid_leaf_modifier, cpuid_reg_modifier, msr_modifier}; // Summary of CPUID modifiers: // * A CPUID leaf 0x0 / subleaf 0x0 modifier exists in all the templates and its value is same. // * A CPUID leaf 0x1 / subleaf 0x0 modifier only exists in the second template. // * A CPUID leaf 0x2 / subleaf 0x1 modifier exists in all the templates, but EAX value is same // and EBX value is different across them. #[rustfmt::skip] fn build_input_cpuid_templates() -> Vec { vec![ CustomCpuTemplate { cpuid_modifiers: vec![ cpuid_leaf_modifier!(0x0, 0x0, KvmCpuidFlags::EMPTY, vec![ cpuid_reg_modifier!(Eax, 0x0), ]), cpuid_leaf_modifier!(0x2, 0x1, KvmCpuidFlags::SIGNIFICANT_INDEX, vec![ cpuid_reg_modifier!(Eax, 0x0), cpuid_reg_modifier!(Ebx, 0x0), ]), ], msr_modifiers: vec![], ..Default::default() }, CustomCpuTemplate { cpuid_modifiers: vec![ cpuid_leaf_modifier!(0x0, 0x0, KvmCpuidFlags::EMPTY, vec![ cpuid_reg_modifier!(Eax, 0x0), ]), cpuid_leaf_modifier!(0x1, 0x0, KvmCpuidFlags::EMPTY, vec![ cpuid_reg_modifier!(Eax, 0x0), ]), cpuid_leaf_modifier!(0x2, 0x1, KvmCpuidFlags::SIGNIFICANT_INDEX, vec![ cpuid_reg_modifier!(Eax, 0x0), cpuid_reg_modifier!(Ebx, 0x1), ]), ], msr_modifiers: vec![], ..Default::default() }, CustomCpuTemplate { cpuid_modifiers: vec![ cpuid_leaf_modifier!(0x0, 0x0, KvmCpuidFlags::EMPTY, vec![ cpuid_reg_modifier!(Eax, 0x0), ]), cpuid_leaf_modifier!(0x2, 0x1, KvmCpuidFlags::SIGNIFICANT_INDEX, vec![ cpuid_reg_modifier!(Eax, 0x0), cpuid_reg_modifier!(Ebx, 0x2), ]), ], msr_modifiers: vec![], ..Default::default() }, ] } #[rustfmt::skip] fn build_expected_cpuid_templates() -> Vec { vec![ CustomCpuTemplate { cpuid_modifiers: vec![ cpuid_leaf_modifier!(0x2, 0x1, KvmCpuidFlags::SIGNIFICANT_INDEX, vec![ cpuid_reg_modifier!(Ebx, 0x0, 0b11), ]), ], msr_modifiers: vec![], ..Default::default() }, CustomCpuTemplate { cpuid_modifiers: vec![ cpuid_leaf_modifier!(0x1, 0x0, KvmCpuidFlags::EMPTY, vec![ cpuid_reg_modifier!(Eax, 0x0), ]), cpuid_leaf_modifier!(0x2, 0x1, KvmCpuidFlags::SIGNIFICANT_INDEX, vec![ cpuid_reg_modifier!(Ebx, 0x1, 0b11), ]), ], msr_modifiers: vec![], ..Default::default() }, CustomCpuTemplate { cpuid_modifiers: vec![ cpuid_leaf_modifier!(0x2, 0x1, KvmCpuidFlags::SIGNIFICANT_INDEX, vec![ cpuid_reg_modifier!(Ebx, 0x2, 0b11), ]), ], msr_modifiers: vec![], ..Default::default() }, ] } // Summary of MSR modifiers: // * An addr 0x0 modifier exists in all the templates but its value is different. // * An addr 0x1 modifier exists in all the templates and its value is same. // * An addr 0x2 modifier only exists in the third template. #[rustfmt::skip] fn build_input_msr_templates() -> Vec { vec![ CustomCpuTemplate { cpuid_modifiers: vec![], msr_modifiers: vec![ msr_modifier!(0x0, 0x0), msr_modifier!(0x1, 0x1), ], ..Default::default() }, CustomCpuTemplate { cpuid_modifiers: vec![], msr_modifiers: vec![ msr_modifier!(0x0, 0x1), msr_modifier!(0x1, 0x1), ], ..Default::default() }, CustomCpuTemplate { cpuid_modifiers: vec![], msr_modifiers: vec![ msr_modifier!(0x0, 0x2), msr_modifier!(0x1, 0x1), msr_modifier!(0x2, 0x1), ], ..Default::default() }, ] } #[rustfmt::skip] fn build_expected_msr_templates() -> Vec { vec![ CustomCpuTemplate { cpuid_modifiers: vec![], msr_modifiers: vec![ msr_modifier!(0x0, 0x0, 0b11), ], ..Default::default() }, CustomCpuTemplate { cpuid_modifiers: vec![], msr_modifiers: vec![ msr_modifier!(0x0, 0x1, 0b11), ], ..Default::default() }, CustomCpuTemplate { cpuid_modifiers: vec![], msr_modifiers: vec![ msr_modifier!(0x0, 0x2, 0b11), msr_modifier!(0x2, 0x1), ], ..Default::default() }, ] } #[test] fn test_strip_cpuid_modifiers() { let input = build_input_cpuid_templates(); let result = strip(input).unwrap(); let expected = build_expected_cpuid_templates(); assert_eq!(result, expected); } #[test] fn test_strip_msr_modifiers() { let input = build_input_msr_templates(); let result = strip(input).unwrap(); let expected = build_expected_msr_templates(); assert_eq!(result, expected); } } ================================================ FILE: src/cpu-template-helper/src/template/verify/aarch64.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::cpu_config::templates::CustomCpuTemplate; use super::{VerifyError, verify_common}; use crate::utils::aarch64::RegModifierMap; pub fn verify( cpu_template: CustomCpuTemplate, cpu_config: CustomCpuTemplate, ) -> Result<(), VerifyError> { let reg_template = RegModifierMap::from(cpu_template.reg_modifiers); let reg_config = RegModifierMap::from(cpu_config.reg_modifiers); verify_common(reg_template.0, reg_config.0) } #[cfg(test)] mod tests { use vmm::cpu_config::aarch64::custom_cpu_template::RegisterModifier; use vmm::cpu_config::templates::RegisterValueFilter; use super::*; use crate::utils::aarch64::reg_modifier; #[test] #[rustfmt::skip] fn test_verify_non_existing_reg() { // Test with a sample whose register exists in template, but not in config. let template = CustomCpuTemplate { reg_modifiers: vec![ reg_modifier!(0x0, 0b00000000), reg_modifier!(0x1, 0b11111111), ], ..Default::default() }; let config = CustomCpuTemplate { reg_modifiers: vec![ reg_modifier!(0x0, 0b00000000), ], ..Default::default() }; assert_eq!( verify(template, config).unwrap_err().to_string(), "ID=0x1 not found in CPU configuration." ); } #[test] fn test_verify_mismatched_reg() { // Test with a sample whose register value mismatches. let template = CustomCpuTemplate { reg_modifiers: vec![reg_modifier!(0x0, 0b10101010, 0b11110000)], ..Default::default() }; let config = CustomCpuTemplate { reg_modifiers: vec![reg_modifier!(0x0, 0b01010101, 0b11111111)], ..Default::default() }; assert_eq!( verify(template, config).unwrap_err().to_string(), "Value for ID=0x0 mismatched.\n\ * CPU template : 0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010100000\n\ * CPU configuration: 0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001010000\n\ * Diff : ^^^^ " ) } } ================================================ FILE: src/cpu-template-helper/src/template/verify/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::HashMap; use std::fmt::Debug; use vmm::cpu_config::templates::{Numeric, RegisterValueFilter}; use crate::utils::{DiffString, ModifierMapKey}; #[cfg(target_arch = "aarch64")] mod aarch64; #[cfg(target_arch = "aarch64")] pub use aarch64::verify; #[cfg(target_arch = "x86_64")] mod x86_64; #[cfg(target_arch = "x86_64")] pub use x86_64::verify; #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VerifyError { /// {0} not found in CPU configuration. KeyNotFound(String), /** Value for {0} mismatched. {1} */ ValueMismatched(String, String), } /// Verify that the given CPU template is applied as intended. /// /// This function is an arch-agnostic part of CPU template verification. As template formats differ /// between x86_64 and aarch64, the arch-specific part converts the structure to an arch-agnostic /// `HashMap` implementing `ModifierMapKey` before calling this arch-agnostic function. pub fn verify_common( template: HashMap>, config: HashMap>, ) -> Result<(), VerifyError> where K: ModifierMapKey + Debug, V: Numeric + Debug, { for (key, template_value_filter) in template { let config_value_filter = config .get(&key) .ok_or_else(|| VerifyError::KeyNotFound(key.to_string()))?; let template_value = template_value_filter.value & template_value_filter.filter; let config_value = config_value_filter.value & template_value_filter.filter; if template_value != config_value { return Err(VerifyError::ValueMismatched( key.to_string(), V::to_diff_string(template_value, config_value), )); } } Ok(()) } #[cfg(test)] mod tests { use super::*; use crate::utils::tests::{MockModifierMapKey, mock_modifier}; #[test] fn test_verify_modifier_map_with_non_existing_key() { // Test with a sample where a key in CPU template is not found in CPU config. let cpu_template_map = HashMap::from([mock_modifier!(0x0, 0b0000_0000)]); let cpu_config_map = HashMap::new(); assert_eq!( verify_common(cpu_template_map, cpu_config_map) .unwrap_err() .to_string(), "ID=0x0 not found in CPU configuration.".to_string() ); } #[test] #[rustfmt::skip] fn test_verify_modifier_map_with_mismatched_value() { // Test with a sample whose filtered value mismatches between CPU config and CPU template. let cpu_template_map = HashMap::from([mock_modifier!(0x0, 0b0000_0101, 0b0000_1111)]); let cpu_config_map = HashMap::from([mock_modifier!(0x0, 0b0000_0000, 0b1111_1111)]); assert_eq!( verify_common(cpu_template_map, cpu_config_map) .unwrap_err() .to_string(), "Value for ID=0x0 mismatched.\n\ * CPU template : 0b00000101\n\ * CPU configuration: 0b00000000\n\ * Diff : ^ ^" ) } #[test] fn test_verify_modifier_map_with_valid_value() { // Test with valid CPU template and CPU config. let cpu_template_map = HashMap::from([mock_modifier!(0x0, 0b0000_1010, 0b0000_1111)]); let cpu_config_map = HashMap::from([mock_modifier!(0x0, 0b1010_1010, 0b1111_1111)]); verify_common(cpu_template_map, cpu_config_map).unwrap(); } } ================================================ FILE: src/cpu-template-helper/src/template/verify/x86_64.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::cpu_config::templates::CustomCpuTemplate; use super::{VerifyError, verify_common}; use crate::utils::x86_64::{CpuidModifierMap, MsrModifierMap}; pub fn verify( cpu_template: CustomCpuTemplate, cpu_config: CustomCpuTemplate, ) -> Result<(), VerifyError> { let cpuid_template = CpuidModifierMap::from(cpu_template.cpuid_modifiers); let cpuid_config = CpuidModifierMap::from(cpu_config.cpuid_modifiers); verify_common(cpuid_template.0, cpuid_config.0)?; let msr_template = MsrModifierMap::from(cpu_template.msr_modifiers); let msr_config = MsrModifierMap::from(cpu_config.msr_modifiers); verify_common(msr_template.0, msr_config.0)?; Ok(()) } #[cfg(test)] mod tests { use std::collections::HashMap; use vmm::cpu_config::templates::RegisterValueFilter; use vmm::cpu_config::x86_64::cpuid::KvmCpuidFlags; use vmm::cpu_config::x86_64::custom_cpu_template::CpuidRegister::*; use vmm::cpu_config::x86_64::custom_cpu_template::{ CpuidLeafModifier, CpuidRegisterModifier, RegisterModifier, }; use super::*; use crate::utils::x86_64::{ CpuidModifierMapKey, MsrModifierMapKey, cpuid_leaf_modifier, cpuid_reg_modifier, msr_modifier, }; macro_rules! cpuid_modifier_map { ($leaf:expr, $subleaf:expr, $flags:expr, $register:expr, $value:expr) => { ( CpuidModifierMapKey { leaf: $leaf, subleaf: $subleaf, flags: $flags, register: $register, }, RegisterValueFilter { filter: u32::MAX.into(), value: $value, }, ) }; } macro_rules! msr_modifier_map { ($addr:expr, $value:expr) => { ( MsrModifierMapKey($addr), RegisterValueFilter { filter: u64::MAX.into(), value: $value, }, ) }; } #[test] fn test_format_cpuid_modifier_map_key() { let key = CpuidModifierMapKey { leaf: 0x0, subleaf: 0x1, flags: KvmCpuidFlags::STATEFUL_FUNC, register: Edx, }; assert_eq!( key.to_string(), "leaf=0x0, subleaf=0x1, flags=0b10, register=edx", ) } #[test] #[rustfmt::skip] fn test_cpuid_modifier_from_vec_to_map() { let modifier_vec = vec![ cpuid_leaf_modifier!(0x0, 0x0, KvmCpuidFlags::EMPTY, vec![ cpuid_reg_modifier!(Eax, 0x0), ]), cpuid_leaf_modifier!(0x1, 0x2, KvmCpuidFlags::SIGNIFICANT_INDEX, vec![ cpuid_reg_modifier!(Ebx, 0x3), cpuid_reg_modifier!(Ecx, 0x4), ]), ]; let modifier_map = HashMap::from([ cpuid_modifier_map!(0x0, 0x0, KvmCpuidFlags::EMPTY, Eax, 0x0), cpuid_modifier_map!(0x1, 0x2, KvmCpuidFlags::SIGNIFICANT_INDEX, Ebx, 0x3), cpuid_modifier_map!(0x1, 0x2, KvmCpuidFlags::SIGNIFICANT_INDEX, Ecx, 0x4), ]); assert_eq!( CpuidModifierMap::from(modifier_vec), CpuidModifierMap(modifier_map), ); } #[test] fn test_format_msr_modifier_map_key() { let key = MsrModifierMapKey(0x1234); assert_eq!(key.to_string(), "index=0x1234"); } #[test] fn test_msr_modifier_from_vec_to_map() { let modifier_vec = vec![ msr_modifier!(0x1, 0x2), msr_modifier!(0x0, 0x0), msr_modifier!(0x3, 0x2), ]; let modifier_map = HashMap::from([ msr_modifier_map!(0x0, 0x0), msr_modifier_map!(0x1, 0x2), msr_modifier_map!(0x3, 0x2), ]); assert_eq!( MsrModifierMap::from(modifier_vec), MsrModifierMap(modifier_map), ); } #[test] #[rustfmt::skip] fn test_verify_non_existing_cpuid() { // Test with a sample whose CPUID exists in template, but not in config. let template = CustomCpuTemplate { cpuid_modifiers: vec![cpuid_leaf_modifier!(0x0, 0x0, KvmCpuidFlags::EMPTY, vec![ cpuid_reg_modifier!(Eax, 0b10101010, 0b11110000), cpuid_reg_modifier!(Ebx, 0b01010101, 0b00001111), ])], msr_modifiers: vec![], ..Default::default() }; let config = CustomCpuTemplate { cpuid_modifiers: vec![cpuid_leaf_modifier!(0x0, 0x0, KvmCpuidFlags::EMPTY, vec![ cpuid_reg_modifier!(Eax, 0b10101010, 0b11111111), ])], msr_modifiers: vec![], ..Default::default() }; assert_eq!( verify(template, config).unwrap_err().to_string(), "leaf=0x0, subleaf=0x0, flags=0b0, register=ebx not found in CPU configuration." ); } #[test] #[rustfmt::skip] fn test_verify_mismatched_cpuid() { // Test with a sample whose CPUID value mismatches. let template = CustomCpuTemplate { cpuid_modifiers: vec![cpuid_leaf_modifier!(0x0, 0x0, KvmCpuidFlags::EMPTY, vec![cpuid_reg_modifier!(Eax, 0b10101010, 0b11110000)] )], msr_modifiers: vec![], ..Default::default() }; let config = CustomCpuTemplate { cpuid_modifiers: vec![cpuid_leaf_modifier!(0x0, 0x0, KvmCpuidFlags::EMPTY, vec![cpuid_reg_modifier!(Eax, 0b11111111)] )], msr_modifiers: vec![], ..Default::default() }; assert_eq!( verify(template, config).unwrap_err().to_string(), "Value for leaf=0x0, subleaf=0x0, flags=0b0, register=eax mismatched.\n\ * CPU template : 0b00000000000000000000000010100000\n\ * CPU configuration: 0b00000000000000000000000011110000\n\ * Diff : ^ ^ ", ); } #[test] #[rustfmt::skip] fn test_verify_non_existing_msr() { // Test with a sample whose MSR exists in template, but not in config. let template = CustomCpuTemplate { cpuid_modifiers: vec![], msr_modifiers: vec![ msr_modifier!(0x0, 0b00000000), msr_modifier!(0x1, 0b11111111), ], ..Default::default() }; let config = CustomCpuTemplate { cpuid_modifiers: vec![], msr_modifiers: vec![ msr_modifier!(0x0, 0b00000000), ], ..Default::default() }; assert_eq!( verify(template, config).unwrap_err().to_string(), "index=0x1 not found in CPU configuration." ); } #[test] #[rustfmt::skip] fn test_verify_mismatched_msr() { // Test with a sample whose CPUID value mismatches. let template = CustomCpuTemplate { cpuid_modifiers: vec![], msr_modifiers: vec![ msr_modifier!(0x0, 0b10101010, 0b11110000), ], ..Default::default() }; let config = CustomCpuTemplate { cpuid_modifiers: vec![], msr_modifiers: vec![ msr_modifier!(0x0, 0b01010101, 0b11111111) ], ..Default::default() }; assert_eq!( verify(template, config).unwrap_err().to_string(), "Value for index=0x0 mismatched.\n\ * CPU template : 0b0000000000000000000000000000000000000000000000000000000010100000\n\ * CPU configuration: 0b0000000000000000000000000000000000000000000000000000000001010000\n\ * Diff : ^^^^ ", ); } } ================================================ FILE: src/cpu-template-helper/src/utils/aarch64.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::HashMap; use std::fmt::Display; use vmm::cpu_config::aarch64::custom_cpu_template::RegisterModifier; use vmm::cpu_config::templates::RegisterValueFilter; use super::ModifierMapKey; #[derive(Debug, Eq, PartialEq, Hash, Clone)] pub struct RegModifierMapKey(pub u64); impl ModifierMapKey for RegModifierMapKey {} impl Display for RegModifierMapKey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "ID={:#x}", self.0) } } #[derive(Debug, Eq, PartialEq)] pub struct RegModifierMap(pub HashMap>); impl From> for RegModifierMap { fn from(modifiers: Vec) -> Self { let mut map = HashMap::new(); for modifier in modifiers { map.insert(RegModifierMapKey(modifier.addr), modifier.bitmap); } RegModifierMap(map) } } impl From for Vec { fn from(modifier_map: RegModifierMap) -> Self { let mut modifier_vec = modifier_map .0 .into_iter() .map(|(modifier_key, modifier_value)| RegisterModifier { addr: modifier_key.0, bitmap: modifier_value, }) .collect::>(); modifier_vec.sort_by_key(|modifier| modifier.addr); modifier_vec } } macro_rules! reg_modifier { ($addr:expr, $value:expr) => { RegisterModifier { addr: $addr, bitmap: RegisterValueFilter { filter: u128::MAX, value: $value, }, } }; ($addr:expr, $value:expr, $filter:expr) => { RegisterModifier { addr: $addr, bitmap: RegisterValueFilter { filter: $filter, value: $value, }, } }; } pub(crate) use reg_modifier; #[cfg(test)] mod tests { use super::*; macro_rules! reg_modifier_map { ($id:expr, $value:expr) => { ( RegModifierMapKey($id), RegisterValueFilter { filter: u128::MAX, value: $value, }, ) }; } #[test] fn test_format_reg_modifier_map_key() { let key = RegModifierMapKey(0x1234); assert_eq!(key.to_string(), "ID=0x1234"); } fn build_sample_reg_modifier_vec() -> Vec { vec![ reg_modifier!(0x0, 0x0), reg_modifier!(0x1, 0x2), reg_modifier!(0x3, 0x2), ] } fn build_sample_reg_modifier_map() -> RegModifierMap { RegModifierMap(HashMap::from([ reg_modifier_map!(0x0, 0x0), reg_modifier_map!(0x1, 0x2), reg_modifier_map!(0x3, 0x2), ])) } #[test] fn test_reg_modifier_from_vec_to_map() { let modifier_vec = build_sample_reg_modifier_vec(); let modifier_map = build_sample_reg_modifier_map(); assert_eq!(RegModifierMap::from(modifier_vec), modifier_map); } #[test] fn test_reg_modifier_from_map_to_vec() { let modifier_map = build_sample_reg_modifier_map(); let modifier_vec = build_sample_reg_modifier_vec(); assert_eq!(Vec::::from(modifier_map), modifier_vec); } } ================================================ FILE: src/cpu-template-helper/src/utils/mock_kernel/main.c ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 void _start() { while (1) ; } ================================================ FILE: src/cpu-template-helper/src/utils/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::ffi::OsString; use std::fmt::Display; use std::fs::read_to_string; use std::hash::Hash; use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; use vmm::builder::{StartMicrovmError, build_microvm_for_boot}; use vmm::cpu_config::templates::{CustomCpuTemplate, Numeric}; use vmm::resources::VmResources; use vmm::seccomp::get_empty_filters; use vmm::vmm_config::instance_info::{InstanceInfo, VmState}; use vmm::{EventManager, HTTP_MAX_PAYLOAD_SIZE, Vmm}; use vmm_sys_util::tempfile::TempFile; #[cfg(target_arch = "aarch64")] pub mod aarch64; #[cfg(target_arch = "x86_64")] pub mod x86_64; pub const CPU_TEMPLATE_HELPER_VERSION: &str = env!("CARGO_PKG_VERSION"); /// Trait for key of `HashMap`-based modifier. /// /// This is a wrapper trait of some traits required for a key of `HashMap` modifier. pub trait ModifierMapKey: Eq + PartialEq + Hash + Display + Clone {} pub trait DiffString { // Generate a string to display difference of filtered values between CPU template and guest // CPU config. #[rustfmt::skip] fn to_diff_string(template: V, config: V) -> String; } impl DiffString for V { // Generate a string to display difference of filtered values between CPU template and guest // CPU config. #[rustfmt::skip] fn to_diff_string(template: V, config: V) -> String { let mut diff = String::new(); for i in (0..V::BITS).rev() { let mask = V::one() << i; let template_bit = template & mask; let config_bit = config & mask; diff.push(match template_bit == config_bit { true => ' ', false => '^', }); } format!( "* CPU template : 0b{template:0width$b}\n\ * CPU configuration: 0b{config:0width$b}\n\ * Diff : {diff}", width = V::BITS as usize, ) } } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum UtilsError { /// Failed to create VmResources: {0} CreateVmResources(vmm::resources::ResourcesError), /// Failed to build microVM: {0} BuildMicroVm(#[from] StartMicrovmError), /// Failed to create temporary file: {0} CreateTempFile(#[from] vmm_sys_util::errno::Error), /// Failed to operate file: {0} FileIo(#[from] std::io::Error), /// Failed to serialize/deserialize JSON file: {0} Serde(#[from] serde_json::Error), } pub fn load_cpu_template(path: &PathBuf) -> Result { let template_json = read_to_string(path)?; let template = serde_json::from_str(&template_json)?; Ok(template) } // Utility function to prepare scratch kernel image and rootfs and build mock Firecracker config. fn build_mock_config() -> Result<(TempFile, TempFile, String), UtilsError> { let kernel = TempFile::new()?; kernel .as_file() .write_all(include_bytes!("mock_kernel/kernel.bin"))?; let rootfs = TempFile::new()?; let config = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}" }}, "drives": [ {{ "drive_id": "rootfs", "is_root_device": true, "path_on_host": "{}" }} ] }}"#, // Temporary file path consists of alphanumerics. kernel.as_path().to_str().unwrap(), rootfs.as_path().to_str().unwrap(), ); Ok((kernel, rootfs, config)) } pub fn build_microvm_from_config( config: Option, template: Option, ) -> Result<(Arc>, VmResources), UtilsError> { // Prepare resources from the given config file. let (_kernel, _rootfs, config) = match config { Some(config) => (None, None, config), None => { let (kernel, rootfs, config) = build_mock_config()?; (Some(kernel), Some(rootfs), config) } }; let instance_info = InstanceInfo { id: "anonymous-instance".to_string(), state: VmState::NotStarted, vmm_version: CPU_TEMPLATE_HELPER_VERSION.to_string(), app_name: "cpu-template-helper".to_string(), }; let mut vm_resources = VmResources::from_json(&config, &instance_info, HTTP_MAX_PAYLOAD_SIZE, None) .map_err(UtilsError::CreateVmResources)?; if let Some(template) = template { vm_resources.set_custom_cpu_template(template); } let mut event_manager = EventManager::new().unwrap(); let seccomp_filters = get_empty_filters(); // Build a microVM. let vmm = build_microvm_for_boot( &instance_info, &vm_resources, &mut event_manager, &seccomp_filters, )?; Ok((vmm, vm_resources)) } pub fn add_suffix(path: &Path, suffix: &str) -> PathBuf { // Extract the part of the filename before the extension. let mut new_file_name = OsString::from(path.file_stem().unwrap()); // Push the suffix and the extension. new_file_name.push(suffix); if let Some(ext) = path.extension() { new_file_name.push("."); new_file_name.push(ext); } // Swap the file name. path.with_file_name(new_file_name) } #[cfg(test)] pub mod tests { use std::fmt::Display; use vmm::resources::VmmConfig; use super::*; const SUFFIX: &str = "_suffix"; #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct MockModifierMapKey(pub u8); impl ModifierMapKey for MockModifierMapKey {} impl Display for MockModifierMapKey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "ID={:#x}", self.0) } } macro_rules! mock_modifier { ($key:expr, $value:expr) => { ( MockModifierMapKey($key), RegisterValueFilter:: { filter: u8::MAX, value: $value, }, ) }; ($key:expr, $value:expr, $filter:expr) => { ( MockModifierMapKey($key), RegisterValueFilter:: { filter: $filter, value: $value, }, ) }; } pub(crate) use mock_modifier; #[test] fn test_build_mock_config() { let kernel_path; let rootfs_path; { let (kernel, rootfs, config) = build_mock_config().unwrap(); kernel_path = kernel.as_path().to_path_buf(); rootfs_path = rootfs.as_path().to_path_buf(); // Ensure the kernel exists and its content is written. assert!(kernel.as_file().metadata().unwrap().len() > 0); // Ensure the rootfs exists and it is empty. assert_eq!(rootfs.as_file().metadata().unwrap().len(), 0); // Ensure the generated config is valid as `VmmConfig`. serde_json::from_str::(&config).unwrap(); } // Ensure the temporary mock resources are deleted. assert!(!kernel_path.exists()); assert!(!rootfs_path.exists()); } #[test] fn test_build_microvm() { build_microvm_from_config(None, None).unwrap(); } #[test] fn test_add_suffix_filename_only() { let path = PathBuf::from("file.ext"); let expected = PathBuf::from(format!("file{SUFFIX}.ext")); assert_eq!(add_suffix(&path, SUFFIX), expected); } #[test] fn test_add_suffix_filename_without_ext() { let path = PathBuf::from("file_no_ext"); let expected = PathBuf::from(format!("file_no_ext{SUFFIX}")); assert_eq!(add_suffix(&path, SUFFIX), expected); } #[test] fn test_add_suffix_rel_path() { let path = PathBuf::from("relative/path/to/file.ext"); let expected = PathBuf::from(format!("relative/path/to/file{SUFFIX}.ext")); assert_eq!(add_suffix(&path, SUFFIX), expected); } #[test] fn test_add_suffix_abs_path() { let path = PathBuf::from("/absolute/path/to/file.ext"); let expected = PathBuf::from(format!("/absolute/path/to/file{SUFFIX}.ext")); assert_eq!(add_suffix(&path, SUFFIX), expected); } } ================================================ FILE: src/cpu-template-helper/src/utils/x86_64.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::HashMap; use std::fmt::Display; use vmm::cpu_config::templates::RegisterValueFilter; use vmm::cpu_config::x86_64::cpuid::KvmCpuidFlags; use vmm::cpu_config::x86_64::custom_cpu_template::{ CpuidLeafModifier, CpuidRegister, CpuidRegisterModifier, RegisterModifier, }; use super::ModifierMapKey; #[derive(Debug, Eq, PartialEq, Hash, Clone)] pub struct CpuidModifierMapKey { pub leaf: u32, pub subleaf: u32, pub flags: KvmCpuidFlags, pub register: CpuidRegister, } impl ModifierMapKey for CpuidModifierMapKey {} impl Display for CpuidModifierMapKey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "leaf={:#x}, subleaf={:#x}, flags={:#b}, register={}", self.leaf, self.subleaf, self.flags.0, format!("{:?}", self.register).to_lowercase() ) } } #[derive(Debug, Eq, PartialEq)] pub struct CpuidModifierMap(pub HashMap>); impl From> for CpuidModifierMap { fn from(leaf_modifiers: Vec) -> Self { let mut map = HashMap::new(); for leaf_modifier in leaf_modifiers { for reg_modifier in leaf_modifier.modifiers { map.insert( CpuidModifierMapKey { leaf: leaf_modifier.leaf, subleaf: leaf_modifier.subleaf, flags: leaf_modifier.flags, register: reg_modifier.register, }, reg_modifier.bitmap, ); } } CpuidModifierMap(map) } } impl From for Vec { fn from(modifier_map: CpuidModifierMap) -> Self { let mut leaf_modifiers = Vec::::new(); for (modifier_key, modifier_value) in modifier_map.0 { let leaf_modifier = leaf_modifiers.iter_mut().find(|leaf_modifier| { leaf_modifier.leaf == modifier_key.leaf && leaf_modifier.subleaf == modifier_key.subleaf && leaf_modifier.flags == modifier_key.flags }); if let Some(leaf_modifier) = leaf_modifier { leaf_modifier.modifiers.push(CpuidRegisterModifier { register: modifier_key.register, bitmap: modifier_value, }); } else { leaf_modifiers.push(CpuidLeafModifier { leaf: modifier_key.leaf, subleaf: modifier_key.subleaf, flags: modifier_key.flags, modifiers: vec![CpuidRegisterModifier { register: modifier_key.register, bitmap: modifier_value, }], }); } } leaf_modifiers.sort_by_key(|leaf_modifier| (leaf_modifier.leaf, leaf_modifier.subleaf)); leaf_modifiers.iter_mut().for_each(|leaf_modifier| { leaf_modifier .modifiers .sort_by_key(|reg_modifier| reg_modifier.register.clone()) }); leaf_modifiers } } #[derive(Debug, Eq, PartialEq, Hash, Clone)] pub struct MsrModifierMapKey(pub u32); impl ModifierMapKey for MsrModifierMapKey {} impl Display for MsrModifierMapKey { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "index={:#x}", self.0) } } #[derive(Debug, Eq, PartialEq)] pub struct MsrModifierMap(pub HashMap>); impl From> for MsrModifierMap { fn from(modifiers: Vec) -> Self { let mut map = HashMap::new(); for modifier in modifiers { map.insert(MsrModifierMapKey(modifier.addr), modifier.bitmap); } MsrModifierMap(map) } } impl From for Vec { fn from(modifier_map: MsrModifierMap) -> Self { let mut modifier_vec = modifier_map .0 .into_iter() .map(|(modifier_key, modifier_value)| RegisterModifier { addr: modifier_key.0, bitmap: modifier_value, }) .collect::>(); modifier_vec.sort_by_key(|modifier| modifier.addr); modifier_vec } } macro_rules! cpuid_reg_modifier { ($register:expr, $value:expr) => { CpuidRegisterModifier { register: $register, bitmap: RegisterValueFilter { filter: u32::MAX.into(), value: $value, }, } }; ($register:expr, $value:expr, $filter:expr) => { CpuidRegisterModifier { register: $register, bitmap: RegisterValueFilter { filter: $filter, value: $value, }, } }; } macro_rules! cpuid_leaf_modifier { ($leaf:expr, $subleaf:expr, $flags:expr, $reg_modifiers:expr) => { CpuidLeafModifier { leaf: $leaf, subleaf: $subleaf, flags: $flags, modifiers: $reg_modifiers, } }; } macro_rules! msr_modifier { ($addr:expr, $value:expr) => { RegisterModifier { addr: $addr, bitmap: RegisterValueFilter { filter: u64::MAX, value: $value, }, } }; ($addr:expr, $value:expr, $filter:expr) => { RegisterModifier { addr: $addr, bitmap: RegisterValueFilter { filter: $filter, value: $value, }, } }; } pub(crate) use {cpuid_leaf_modifier, cpuid_reg_modifier, msr_modifier}; #[cfg(test)] mod tests { use vmm::cpu_config::x86_64::custom_cpu_template::CpuidRegister::*; use vmm::cpu_config::x86_64::custom_cpu_template::CpuidRegisterModifier; use super::*; macro_rules! cpuid_modifier_map { ($leaf:expr, $subleaf:expr, $flags:expr, $register:expr, $value:expr) => { ( CpuidModifierMapKey { leaf: $leaf, subleaf: $subleaf, flags: $flags, register: $register, }, RegisterValueFilter { filter: u32::MAX.into(), value: $value, }, ) }; } macro_rules! msr_modifier_map { ($addr:expr, $value:expr) => { ( MsrModifierMapKey($addr), RegisterValueFilter { filter: u64::MAX.into(), value: $value, }, ) }; } #[test] fn test_format_cpuid_modifier_map_key() { let key = CpuidModifierMapKey { leaf: 0x0, subleaf: 0x1, flags: KvmCpuidFlags::STATEFUL_FUNC, register: Edx, }; assert_eq!( key.to_string(), "leaf=0x0, subleaf=0x1, flags=0b10, register=edx", ) } #[rustfmt::skip] fn build_sample_cpuid_modifier_vec() -> Vec { vec![ cpuid_leaf_modifier!(0x0, 0x0, KvmCpuidFlags::EMPTY, vec![ cpuid_reg_modifier!(Eax, 0x0), ]), cpuid_leaf_modifier!(0x1, 0x2, KvmCpuidFlags::SIGNIFICANT_INDEX, vec![ cpuid_reg_modifier!(Ebx, 0x3), cpuid_reg_modifier!(Ecx, 0x4), ]), ] } #[rustfmt::skip] fn build_sample_cpuid_modifier_map() -> CpuidModifierMap { CpuidModifierMap(HashMap::from([ cpuid_modifier_map!(0x0, 0x0, KvmCpuidFlags::EMPTY, Eax, 0x0), cpuid_modifier_map!(0x1, 0x2, KvmCpuidFlags::SIGNIFICANT_INDEX, Ebx, 0x3), cpuid_modifier_map!(0x1, 0x2, KvmCpuidFlags::SIGNIFICANT_INDEX, Ecx, 0x4), ])) } #[test] fn test_cpuid_modifier_from_vec_to_map() { let modifier_vec = build_sample_cpuid_modifier_vec(); let modifier_map = build_sample_cpuid_modifier_map(); assert_eq!(CpuidModifierMap::from(modifier_vec), modifier_map); } #[test] fn test_cpuid_modifier_from_map_to_vec() { let modifier_map = build_sample_cpuid_modifier_map(); let modifier_vec = build_sample_cpuid_modifier_vec(); assert_eq!(Vec::::from(modifier_map), modifier_vec); } #[test] fn test_format_msr_modifier_map_key() { let key = MsrModifierMapKey(0x1234); assert_eq!(key.to_string(), "index=0x1234"); } fn build_sample_msr_modifier_vec() -> Vec { vec![ msr_modifier!(0x0, 0x0), msr_modifier!(0x1, 0x2), msr_modifier!(0x3, 0x2), ] } fn build_sample_msr_modifier_map() -> MsrModifierMap { MsrModifierMap(HashMap::from([ msr_modifier_map!(0x0, 0x0), msr_modifier_map!(0x1, 0x2), msr_modifier_map!(0x3, 0x2), ])) } #[test] fn test_msr_modifier_from_vec_to_map() { let modifier_vec = build_sample_msr_modifier_vec(); let modifier_map = build_sample_msr_modifier_map(); assert_eq!(MsrModifierMap::from(modifier_vec), modifier_map); } #[test] fn test_msr_modifier_from_map_to_vec() { let modifier_map = build_sample_msr_modifier_map(); let modifier_vec = build_sample_msr_modifier_vec(); assert_eq!(Vec::::from(modifier_map), modifier_vec); } } ================================================ FILE: src/firecracker/Cargo.toml ================================================ [package] name = "firecracker" version = "1.16.0-dev" authors = ["Amazon Firecracker team "] edition = "2024" build = "build.rs" description = "Firecracker enables you to deploy workloads in lightweight virtual machines, called microVMs, which provide enhanced security and workload isolation over traditional VMs, while enabling the speed and resource efficiency of containers." homepage = "https://firecracker-microvm.github.io/" license = "Apache-2.0" [lib] bench = false [[bin]] name = "firecracker" bench = false [features] tracing = ["log-instrument", "utils/tracing", "vmm/tracing"] gdb = ["vmm/gdb"] [dependencies] displaydoc = "0.2.5" event-manager = "0.4.2" libc = "0.2.183" log-instrument = { path = "../log-instrument", optional = true } micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } serde = { version = "1.0.228", features = ["derive"] } serde_derive = "1.0.136" serde_json = "1.0.149" thiserror = "2.0.18" utils = { path = "../utils" } vmm = { path = "../vmm" } vmm-sys-util = { version = "0.15.0", features = ["with-serde"] } [build-dependencies] seccompiler = { path = "../seccompiler" } serde = { version = "1.0.228" } serde_json = "1.0.149" [dev-dependencies] cargo_toml = "0.22.3" libc = "0.2.183" regex = { version = "1.12.3", default-features = false, features = [ "std", "unicode-perl", ] } # Dev-Dependencies for uffd examples serde = { version = "1.0.228", features = ["derive"] } userfaultfd = "0.9.0" [lints] workspace = true [[example]] name = "uffd_malicious_handler" path = "examples/uffd/malicious_handler.rs" [[example]] name = "uffd_on_demand_handler" path = "examples/uffd/on_demand_handler.rs" [[example]] name = "uffd_fault_all_handler" path = "examples/uffd/fault_all_handler.rs" [[example]] name = "seccomp_harmless" path = "examples/seccomp/harmless.rs" [[example]] name = "seccomp_jailer" path = "examples/seccomp/jailer.rs" [[example]] name = "seccomp_malicious" path = "examples/seccomp/malicious.rs" [[example]] name = "seccomp_panic" path = "examples/seccomp/panic.rs" ================================================ FILE: src/firecracker/build.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::path::Path; const ADVANCED_BINARY_FILTER_FILE_NAME: &str = "seccomp_filter.bpf"; const JSON_DIR: &str = "../../resources/seccomp"; const SECCOMPILER_SRC_DIR: &str = "../seccompiler/src"; // This script is run on every modification in the target-specific JSON file in `resources/seccomp`. // It compiles the JSON seccomp policies into a serializable BPF format, using seccompiler-bin. // The generated binary code will get included in Firecracker's code, at compile-time. fn main() { // Target triple let target = std::env::var("TARGET").expect("Missing target."); let debug: bool = std::env::var("DEBUG") .expect("Missing debug.") .parse() .expect("Invalid env variable DEBUG"); let out_dir = std::env::var("OUT_DIR").expect("Missing build-level OUT_DIR."); // Target arch (x86_64 / aarch64) let target_arch = std::env::var("CARGO_CFG_TARGET_ARCH").expect("Missing target arch."); let seccomp_json_path = format!("{}/{}.json", JSON_DIR, target); // If the current target doesn't have a default filter, or if we're building a debug binary, // use a default, empty filter. // This is to make sure that Firecracker builds even with libc toolchains for which we don't // provide a default filter. For example, GNU libc. let seccomp_json_path = if debug { println!( "cargo:warning=Using empty default seccomp policy for debug builds: \ `resources/seccomp/unimplemented.json`." ); format!("{}/unimplemented.json", JSON_DIR) } else if !Path::new(&seccomp_json_path).exists() { println!( "cargo:warning=No default seccomp policy for target: {}. Defaulting to \ `resources/seccomp/unimplemented.json`.", target ); format!("{}/unimplemented.json", JSON_DIR) } else { seccomp_json_path }; // Retrigger the build script if the JSON file has changed. // let json_path = json_path.to_str().expect("Invalid bytes"); println!("cargo:rerun-if-changed={}", seccomp_json_path); // Also retrigger the build script on any seccompiler source code change. println!("cargo:rerun-if-changed={}", SECCOMPILER_SRC_DIR); let out_path = format!("{}/{}", out_dir, ADVANCED_BINARY_FILTER_FILE_NAME); seccompiler::compile_bpf(&seccomp_json_path, &target_arch, &out_path, false, false) .expect("Cannot compile seccomp filters"); } ================================================ FILE: src/firecracker/examples/README.md ================================================ ## Test Utilities The `examples` directory contains various small rust utilities that are used in firecracker's integration test suite. ================================================ FILE: src/firecracker/examples/seccomp/harmless.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 fn main() { // SAFETY: This is just an example to demonstrate syscall filtering. // The syscall is safe because we're only writing a static string to a file descriptor. unsafe { // Harmless print to standard output. libc::syscall(libc::SYS_write, libc::STDOUT_FILENO, "Hello, world!\n", 14); } } ================================================ FILE: src/firecracker/examples/seccomp/jailer.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::env::args; use std::fs::File; use std::os::unix::process::CommandExt; use std::process::{Command, Stdio}; use vmm::seccomp::{apply_filter, deserialize_binary}; fn main() { let args: Vec = args().collect(); let exec_file = &args[1]; let bpf_path = &args[2]; let filter_file = File::open(bpf_path).unwrap(); let map = deserialize_binary(&filter_file).unwrap(); // Loads filters. apply_filter(map.get("main").unwrap()).unwrap(); let _ = Command::new(exec_file) .stdin(Stdio::inherit()) .stdout(Stdio::inherit()) .stderr(Stdio::inherit()) .exec(); } ================================================ FILE: src/firecracker/examples/seccomp/malicious.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 fn main() { // SAFETY: This is just an example to demonstrate syscall filtering. // The syscall is safe because we're only writing a static string to a file descriptor. unsafe { // In this example, the malicious component is outputting to standard input. libc::syscall(libc::SYS_write, libc::STDIN_FILENO, "Hello, world!\n", 14); } } ================================================ FILE: src/firecracker/examples/seccomp/panic.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::env::args; use std::fs::File; use vmm::seccomp::{apply_filter, deserialize_binary}; fn main() { let args: Vec = args().collect(); let bpf_path = &args[1]; let filter_thread = &args[2]; let filter_file = File::open(bpf_path).unwrap(); let map = deserialize_binary(&filter_file).unwrap(); apply_filter(map.get(filter_thread).unwrap()).unwrap(); panic!("Expected panic."); } ================================================ FILE: src/firecracker/examples/uffd/fault_all_handler.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Provides functionality for a userspace page fault handler //! which loads the whole region from the backing memory file //! when a page fault occurs. mod uffd_utils; use std::fs::File; use std::os::unix::net::UnixListener; use uffd_utils::{Runtime, UffdHandler}; use utils::time::{ClockType, get_time_us}; fn main() { let mut args = std::env::args(); let uffd_sock_path = args.nth(1).expect("No socket path given"); let mem_file_path = args.next().expect("No memory file given"); let file = File::open(mem_file_path).expect("Cannot open memfile"); // Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker. let listener = UnixListener::bind(uffd_sock_path).expect("Cannot bind to socket path"); let (stream, _) = listener.accept().expect("Cannot listen on UDS socket"); let mut runtime = Runtime::new(stream, file); runtime.install_panic_hook(); runtime.run(|uffd_handler: &mut UffdHandler| { // Read an event from the userfaultfd. let event = uffd_handler .read_event() .expect("Failed to read uffd_msg") .expect("uffd_msg not ready"); match event { userfaultfd::Event::Pagefault { .. } => { let start = get_time_us(ClockType::Monotonic); for region in uffd_handler.mem_regions.clone() { uffd_handler.serve_pf(region.base_host_virt_addr as _, region.size); } let end = get_time_us(ClockType::Monotonic); println!("Finished Faulting All: {}us", end - start); } _ => panic!("Unexpected event on userfaultfd"), } }); } ================================================ FILE: src/firecracker/examples/uffd/malicious_handler.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Provides functionality for a malicious page fault handler //! which panics when a page fault occurs. mod uffd_utils; use std::fs::File; use std::os::unix::net::UnixListener; use uffd_utils::{Runtime, UffdHandler}; fn main() { let mut args = std::env::args(); let uffd_sock_path = args.nth(1).expect("No socket path given"); let mem_file_path = args.next().expect("No memory file given"); let file = File::open(mem_file_path).expect("Cannot open memfile"); // Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker. let listener = UnixListener::bind(uffd_sock_path).expect("Cannot bind to socket path"); let (stream, _) = listener.accept().expect("Cannot listen on UDS socket"); let mut runtime = Runtime::new(stream, file); runtime.run(|uffd_handler: &mut UffdHandler| { // Read an event from the userfaultfd. let event = uffd_handler .read_event() .expect("Failed to read uffd_msg") .expect("uffd_msg not ready"); if let userfaultfd::Event::Pagefault { .. } = event { panic!("Fear me! I am the malicious page fault handler.") } }); } ================================================ FILE: src/firecracker/examples/uffd/on_demand_handler.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Provides functionality for a userspace page fault handler //! which loads the whole region from the backing memory file //! when a page fault occurs. mod uffd_utils; use std::fs::File; use std::os::unix::net::UnixListener; use uffd_utils::{Runtime, UffdHandler}; fn main() { let mut args = std::env::args(); let uffd_sock_path = args.nth(1).expect("No socket path given"); let mem_file_path = args.next().expect("No memory file given"); let file = File::open(mem_file_path).expect("Cannot open memfile"); // Get Uffd from UDS. We'll use the uffd to handle PFs for Firecracker. let listener = UnixListener::bind(uffd_sock_path).expect("Cannot bind to socket path"); let (stream, _) = listener.accept().expect("Cannot listen on UDS socket"); let mut runtime = Runtime::new(stream, file); runtime.install_panic_hook(); runtime.run(|uffd_handler: &mut UffdHandler| { // !DISCLAIMER! // When using UFFD together with the balloon device, this handler needs to deal with // `remove` and `pagefault` events. There are multiple things to keep in mind in // such setups: // // As long as any `remove` event is pending in the UFFD queue, all ioctls return EAGAIN // ----------------------------------------------------------------------------------- // // This means we cannot process UFFD events simply one-by-one anymore - if a `remove` event // arrives, we need to pre-fetch all other events up to the `remove` event, to unblock the // UFFD, and then go back to the process the pre-fetched events. // // UFFD might receive events in not in their causal order // ----------------------------------------------------- // // For example, the guest // kernel might first respond to a balloon inflation by freeing some memory, and // telling Firecracker about this. Firecracker will then madvise(MADV_DONTNEED) the // free memory range, which causes a `remove` event to be sent to UFFD. Then, the // guest kernel might immediately fault the page in again (for example because // default_on_oom was set). which causes a `pagefault` event to be sent to UFFD. // // However, the pagefault will be triggered from inside KVM on the vCPU thread, while the // balloon device is handled by Firecracker on its VMM thread. This means that potentially // this handler can receive the `pagefault` _before_ the `remove` event. // // This means that the simple "greedy" strategy of simply prefetching _all_ UFFD events // to make sure no `remove` event is blocking us can result in the handler acting on // the `pagefault` event before the `remove` message (despite the `remove` event being // in the causal past of the `pagefault` event), which means that we will fault in a page // from the snapshot file, while really we should be faulting in a zero page. // // In this example handler, we ignore this problem, to avoid // complexity (under the assumption that the guest kernel will zero a newly faulted in // page anyway). A production handler will most likely want to ensure that `remove` // events for a specific range are always handled before `pagefault` events. // // Lastly, we still need to deal with the race condition where a `remove` event arrives // in the UFFD queue after we got done reading all events, in which case we need to go // back to reading more events before we can continue processing `pagefault`s. let mut deferred_events = Vec::new(); loop { // First, try events that we couldn't handle last round let mut events_to_handle = Vec::from_iter(deferred_events.drain(..)); // Read all events from the userfaultfd. while let Some(event) = uffd_handler.read_event().expect("Failed to read uffd_msg") { events_to_handle.push(event); } for event in events_to_handle.drain(..) { // We expect to receive either a Page Fault or `remove` // event (if the balloon device is enabled). match event { userfaultfd::Event::Pagefault { addr, .. } => { if !uffd_handler.serve_pf(addr.cast(), uffd_handler.page_size) { deferred_events.push(event); } } userfaultfd::Event::Remove { start, end } => { uffd_handler.unregister_range(start, end) } _ => panic!("Unexpected event on userfaultfd"), } } // We assume that really only the above removed/pagefault interaction can result in // deferred events. In that scenario, the loop will always terminate (unless // newly arriving `remove` events end up indefinitely blocking it, but there's nothing // we can do about that, and it's a largely theoretical problem). if deferred_events.is_empty() { break; } } }); } ================================================ FILE: src/firecracker/examples/uffd/uffd_utils.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow( clippy::cast_possible_truncation, clippy::cast_sign_loss, clippy::undocumented_unsafe_blocks, // Not everything is used by both binaries dead_code )] use std::collections::HashMap; use std::ffi::c_void; use std::fs::File; use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd}; use std::os::unix::net::UnixStream; use std::ptr; use std::time::Duration; use serde::{Deserialize, Serialize}; use userfaultfd::{Error, Event, Uffd}; use vmm_sys_util::sock_ctrl_msg::ScmSocket; // This is the same with the one used in src/vmm. /// This describes the mapping between Firecracker base virtual address and offset in the /// buffer or file backend for a guest memory region. It is used to tell an external /// process/thread where to populate the guest memory data for this range. /// /// E.g. Guest memory contents for a region of `size` bytes can be found in the backend /// at `offset` bytes from the beginning, and should be copied/populated into `base_host_address`. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct GuestRegionUffdMapping { /// Base host virtual address where the guest memory contents for this region /// should be copied/populated. pub base_host_virt_addr: u64, /// Region size. pub size: usize, /// Offset in the backend file/buffer where the region contents are. pub offset: u64, /// The configured page size for this memory region. pub page_size: usize, } impl GuestRegionUffdMapping { fn contains(&self, fault_page_addr: u64) -> bool { fault_page_addr >= self.base_host_virt_addr && fault_page_addr < self.base_host_virt_addr + self.size as u64 } } #[derive(Debug)] pub struct UffdHandler { pub mem_regions: Vec, pub page_size: usize, backing_buffer: *const u8, uffd: Uffd, } impl UffdHandler { fn try_get_mappings_and_file( stream: &UnixStream, ) -> Result<(String, Option), std::io::Error> { let mut message_buf = vec![0u8; 1024]; let (bytes_read, file) = stream.recv_with_fd(&mut message_buf[..])?; message_buf.resize(bytes_read, 0); // We do not expect to receive non-UTF-8 data from Firecracker, so this is probably // an error we can't recover from. Just immediately abort let body = String::from_utf8(message_buf.clone()).unwrap_or_else(|_| { panic!( "Received body is not a utf-8 valid string. Raw bytes received: {message_buf:#?}" ) }); Ok((body, file)) } fn get_mappings_and_file(stream: &UnixStream) -> (String, File) { // Sometimes, reading from the stream succeeds but we don't receive any // UFFD descriptor. We don't really have a good understanding why this is // happening, but let's try to be a bit more robust and retry a few times // before we declare defeat. for _ in 1..=5 { match Self::try_get_mappings_and_file(stream) { Ok((body, Some(file))) => { return (body, file); } Ok((body, None)) => { println!("Didn't receive UFFD over socket. We received: '{body}'. Retrying..."); } Err(err) => { println!("Could not get UFFD and mapping from Firecracker: {err}. Retrying..."); } } std::thread::sleep(Duration::from_millis(100)); } panic!("Could not get UFFD and mappings after 5 retries"); } pub fn from_unix_stream(stream: &UnixStream, backing_buffer: *const u8, size: usize) -> Self { let (body, file) = Self::get_mappings_and_file(stream); let mappings = serde_json::from_str::>(&body).unwrap_or_else(|_| { panic!("Cannot deserialize memory mappings. Received body: {body}") }); let memsize: usize = mappings.iter().map(|r| r.size).sum(); // Page size is the same for all memory regions, so just grab the first one let first_mapping = mappings.first().unwrap_or_else(|| { panic!( "Cannot get the first mapping. Mappings size is {}. Received body: {body}", mappings.len() ) }); let page_size = first_mapping.page_size; // Make sure memory size matches backing data size. assert_eq!(memsize, size); assert!(page_size.is_power_of_two()); let uffd = unsafe { Uffd::from_raw_fd(file.into_raw_fd()) }; Self { mem_regions: mappings, page_size, backing_buffer, uffd, } } pub fn read_event(&mut self) -> Result, Error> { self.uffd.read_event() } pub fn unregister_range(&mut self, start: *mut c_void, end: *mut c_void) { assert!( (start as usize).is_multiple_of(self.page_size) && (end as usize).is_multiple_of(self.page_size) && end > start ); // SAFETY: start and end are valid and provided by UFFD let len = unsafe { end.offset_from_unsigned(start) }; self.uffd .unregister(start, len) .expect("range should be valid"); } pub fn serve_pf(&mut self, addr: *mut u8, len: usize) -> bool { // Find the start of the page that the current faulting address belongs to. let dst = (addr as usize & !(self.page_size - 1)) as *mut libc::c_void; let fault_page_addr = dst as u64; for region in self.mem_regions.iter() { if region.contains(fault_page_addr) { return self.populate_from_file(region, fault_page_addr, len); } } panic!( "Could not find addr: {:?} within guest region mappings.", addr ); } fn populate_from_file(&self, region: &GuestRegionUffdMapping, dst: u64, len: usize) -> bool { let offset = dst - region.base_host_virt_addr; let src = self.backing_buffer as u64 + region.offset + offset; unsafe { match self.uffd.copy(src as *const _, dst as *mut _, len, true) { // Make sure the UFFD copied some bytes. Ok(value) => assert!(value > 0), // Catch EAGAIN errors, which occur when a `remove` event lands in the UFFD // queue while we're processing `pagefault` events. // The weird cast is because the `bytes_copied` field is based on the // `uffdio_copy->copy` field, which is a signed 64 bit integer, and if something // goes wrong, it gets set to a -errno code. However, uffd-rs always casts this // value to an unsigned `usize`, which scrambled the errno. Err(Error::PartiallyCopied(bytes_copied)) if bytes_copied == 0 || bytes_copied == (-libc::EAGAIN) as usize => { return false; } Err(Error::CopyFailed(errno)) if std::io::Error::from(errno).raw_os_error().unwrap() == libc::EEXIST => {} Err(e) => { panic!("Uffd copy failed: {e:?}"); } } }; true } } #[derive(Debug)] pub struct Runtime { stream: UnixStream, backing_file: File, backing_memory: *mut u8, backing_memory_size: usize, uffds: HashMap, } impl Runtime { pub fn new(stream: UnixStream, backing_file: File) -> Self { let file_meta = backing_file .metadata() .expect("can not get backing file metadata"); let backing_memory_size = file_meta.len() as usize; // # Safety: // File size and fd are valid let ret = unsafe { libc::mmap( ptr::null_mut(), backing_memory_size, libc::PROT_READ, libc::MAP_PRIVATE | libc::MAP_POPULATE, backing_file.as_raw_fd(), 0, ) }; if ret == libc::MAP_FAILED { panic!("mmap on backing file failed"); } Self { stream, backing_file, backing_memory: ret.cast(), backing_memory_size, uffds: HashMap::default(), } } fn peer_process_credentials(&self) -> libc::ucred { let mut creds: libc::ucred = libc::ucred { pid: 0, gid: 0, uid: 0, }; let mut creds_size = size_of::() as u32; let ret = unsafe { libc::getsockopt( self.stream.as_raw_fd(), libc::SOL_SOCKET, libc::SO_PEERCRED, (&raw mut creds).cast::(), &raw mut creds_size, ) }; if ret != 0 { panic!("Failed to get peer process credentials"); } creds } pub fn install_panic_hook(&self) { let peer_creds = self.peer_process_credentials(); let default_panic_hook = std::panic::take_hook(); std::panic::set_hook(Box::new(move |panic_info| { let r = unsafe { libc::kill(peer_creds.pid, libc::SIGKILL) }; if r != 0 { eprintln!("Failed to kill Firecracker process from panic hook"); } default_panic_hook(panic_info); })); } /// Polls the `UnixStream` and UFFD fds in a loop. /// When stream is polled, new uffd is retrieved. /// When uffd is polled, page fault is handled by /// calling `pf_event_dispatch` with corresponding /// uffd object passed in. pub fn run(&mut self, pf_event_dispatch: impl Fn(&mut UffdHandler)) { let mut pollfds = vec![]; // Poll the stream for incoming uffds pollfds.push(libc::pollfd { fd: self.stream.as_raw_fd(), events: libc::POLLIN, revents: 0, }); loop { let pollfd_ptr = pollfds.as_mut_ptr(); let pollfd_size = pollfds.len() as u64; // # Safety: // Pollfds vector is valid let mut nready = unsafe { libc::poll(pollfd_ptr, pollfd_size, -1) }; if nready == -1 { panic!("Could not poll for events!") } for i in 0..pollfds.len() { if nready == 0 { break; } if pollfds[i].revents & libc::POLLIN != 0 { nready -= 1; if pollfds[i].fd == self.stream.as_raw_fd() { // Handle new uffd from stream let handler = UffdHandler::from_unix_stream( &self.stream, self.backing_memory, self.backing_memory_size, ); pollfds.push(libc::pollfd { fd: handler.uffd.as_raw_fd(), events: libc::POLLIN, revents: 0, }); self.uffds.insert(handler.uffd.as_raw_fd(), handler); } else { // Handle one of uffd page faults pf_event_dispatch(self.uffds.get_mut(&pollfds[i].fd).unwrap()); } } } // If connection is closed, we can skip the socket from being polled. pollfds.retain(|pollfd| pollfd.revents & (libc::POLLRDHUP | libc::POLLHUP) == 0); } } } #[cfg(test)] mod tests { use std::mem::MaybeUninit; use std::os::unix::net::UnixListener; use vmm_sys_util::tempdir::TempDir; use vmm_sys_util::tempfile::TempFile; use super::*; unsafe impl Send for Runtime {} #[test] fn test_runtime() { let tmp_dir = TempDir::new().unwrap(); let dummy_socket_path = tmp_dir.as_path().join("dummy_socket"); let dummy_socket_path_clone = dummy_socket_path.clone(); let mut uninit_runtime = Box::new(MaybeUninit::::uninit()); // We will use this pointer to bypass a bunch of Rust Safety // for the sake of convenience. let runtime_ptr = uninit_runtime.as_ptr().cast::(); let runtime_thread = std::thread::spawn(move || { let tmp_file = TempFile::new().unwrap(); tmp_file.as_file().set_len(0x1000).unwrap(); let dummy_mem_path = tmp_file.as_path(); let file = File::open(dummy_mem_path).expect("Cannot open memfile"); let listener = UnixListener::bind(dummy_socket_path).expect("Cannot bind to socket path"); let (stream, _) = listener.accept().expect("Cannot listen on UDS socket"); // Update runtime with actual runtime let runtime = uninit_runtime.write(Runtime::new(stream, file)); runtime.run(|_: &mut UffdHandler| {}); }); // wait for runtime thread to initialize itself std::thread::sleep(std::time::Duration::from_millis(100)); let stream = UnixStream::connect(dummy_socket_path_clone).expect("Cannot connect to the socket"); let dummy_memory_region = vec![GuestRegionUffdMapping { base_host_virt_addr: 0, size: 0x1000, offset: 0, page_size: 4096, }]; let dummy_memory_region_json = serde_json::to_string(&dummy_memory_region).unwrap(); let dummy_file_1 = TempFile::new().unwrap(); let dummy_fd_1 = dummy_file_1.as_file().as_raw_fd(); stream .send_with_fd(dummy_memory_region_json.as_bytes(), dummy_fd_1) .unwrap(); // wait for the runtime thread to process message std::thread::sleep(std::time::Duration::from_millis(100)); unsafe { assert_eq!((*runtime_ptr).uffds.len(), 1); } let dummy_file_2 = TempFile::new().unwrap(); let dummy_fd_2 = dummy_file_2.as_file().as_raw_fd(); stream .send_with_fd(dummy_memory_region_json.as_bytes(), dummy_fd_2) .unwrap(); // wait for the runtime thread to process message std::thread::sleep(std::time::Duration::from_millis(100)); unsafe { assert_eq!((*runtime_ptr).uffds.len(), 2); } // there is no way to properly stop runtime, so // we send a message with an incorrect memory region // to cause runtime thread to panic let error_memory_region = vec![GuestRegionUffdMapping { base_host_virt_addr: 0, size: 0, offset: 0, page_size: 4096, }]; let error_memory_region_json = serde_json::to_string(&error_memory_region).unwrap(); stream .send_with_fd(error_memory_region_json.as_bytes(), dummy_fd_2) .unwrap(); runtime_thread.join().unwrap_err(); } } ================================================ FILE: src/firecracker/src/api_server/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Implements the interface for intercepting API requests, forwarding them to the VMM //! and responding to the user. //! It is constructed on top of an HTTP Server that uses Unix Domain Sockets and `EPOLL` to //! handle multiple connections on the same thread. pub mod parsed_request; pub mod request; use std::fmt::Debug; use std::sync::mpsc; pub use micro_http::{Body, HttpServer, Request, Response, ServerError, StatusCode, Version}; use parsed_request::{ParsedRequest, RequestAction}; use serde_json::json; use utils::time::{ClockType, get_time_us}; use vmm::logger::{ METRICS, ProcessTimeReporter, debug, error, info, update_metric_with_elapsed_time, warn, }; use vmm::rpc_interface::{ApiRequest, ApiResponse, VmmAction}; use vmm::seccomp::BpfProgramRef; use vmm::vmm_config::snapshot::SnapshotType; use vmm_sys_util::eventfd::EventFd; /// Structure associated with the API server implementation. #[derive(Debug)] pub struct ApiServer { /// Sender which allows passing messages to the VMM. api_request_sender: mpsc::Sender, /// Receiver which collects messages from the VMM. vmm_response_receiver: mpsc::Receiver, /// FD on which we notify the VMM that we have sent at least one /// `VmmRequest`. to_vmm_fd: EventFd, } impl ApiServer { /// Constructor for `ApiServer`. /// /// Returns the newly formed `ApiServer`. pub fn new( api_request_sender: mpsc::Sender, vmm_response_receiver: mpsc::Receiver, to_vmm_fd: EventFd, ) -> Self { ApiServer { api_request_sender, vmm_response_receiver, to_vmm_fd, } } /// Runs the Api Server. /// /// # Arguments /// /// * `path` - the socket path on which the server will wait for requests. /// * `start_time_us` - the timestamp for when the process was started in us. /// * `start_time_cpu_us` - the timestamp for when the process was started in CPU us. /// * `seccomp_filter` - the seccomp filter to apply. pub fn run( &mut self, mut server: HttpServer, process_time_reporter: ProcessTimeReporter, seccomp_filter: BpfProgramRef, api_payload_limit: usize, ) { // Set the api payload size limit. server.set_payload_max_size(api_payload_limit); // Load seccomp filters on the API thread. // Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters // altogether is the desired behaviour. if let Err(err) = vmm::seccomp::apply_filter(seccomp_filter) { panic!( "Failed to set the requested seccomp filters on the API thread: {}", err ); } server.start_server().expect("Cannot start HTTP server"); info!("API server started."); // Store process start time metric. process_time_reporter.report_start_time(); // Store process CPU start time metric. process_time_reporter.report_cpu_start_time(); loop { let request_vec = match server.requests() { Ok(vec) => vec, Err(ServerError::ShutdownEvent) => { server.flush_outgoing_writes(); debug!("shutdown request received, API server thread ending."); return; } Err(err) => { // print request error, but keep server running error!("API Server error on retrieving incoming request: {}", err); continue; } }; for server_request in request_vec { let request_processing_start_us = get_time_us(ClockType::Monotonic); // Use `self.handle_request()` as the processing callback. let response = server_request .process(|request| self.handle_request(request, request_processing_start_us)); if let Err(err) = server.respond(response) { error!("API Server encountered an error on response: {}", err); }; let delta_us = get_time_us(ClockType::Monotonic) - request_processing_start_us; debug!("Total previous API call duration: {} us.", delta_us); } } } /// Handles an API request received through the associated socket. pub fn handle_request( &mut self, request: &Request, request_processing_start_us: u64, ) -> Response { match ParsedRequest::try_from(request).map(|r| r.into_parts()) { Ok((req_action, mut parsing_info)) => { let mut response = match req_action { RequestAction::Sync(vmm_action) => { self.serve_vmm_action_request(vmm_action, request_processing_start_us) } }; if let Some(message) = parsing_info.take_deprecation_message() { warn!("{}", message); response.set_deprecation(); } response } Err(err) => { error!("{:?}", err); err.into() } } } fn serve_vmm_action_request( &mut self, vmm_action: Box, request_processing_start_us: u64, ) -> Response { let metric_with_action = match *vmm_action { VmmAction::CreateSnapshot(ref params) => match params.snapshot_type { SnapshotType::Full => Some(( &METRICS.latencies_us.full_create_snapshot, "create full snapshot", )), SnapshotType::Diff => Some(( &METRICS.latencies_us.diff_create_snapshot, "create diff snapshot", )), }, VmmAction::LoadSnapshot(_) => { Some((&METRICS.latencies_us.load_snapshot, "load snapshot")) } VmmAction::Pause => Some((&METRICS.latencies_us.pause_vm, "pause vm")), VmmAction::Resume => Some((&METRICS.latencies_us.resume_vm, "resume vm")), _ => None, }; self.api_request_sender .send(vmm_action) .expect("Failed to send VMM message"); self.to_vmm_fd.write(1).expect("Cannot update send VMM fd"); let vmm_outcome = *(self.vmm_response_receiver.recv().expect("VMM disconnected")); let response = ParsedRequest::convert_to_response(&vmm_outcome); if vmm_outcome.is_ok() && let Some((metric, action)) = metric_with_action { let elapsed_time_us = update_metric_with_elapsed_time(metric, request_processing_start_us); info!("'{}' API request took {} us.", action, elapsed_time_us); } response } /// An HTTP response which also includes a body. pub(crate) fn json_response + Debug>(status: StatusCode, body: T) -> Response { let mut response = Response::new(Version::Http11, status); response.set_body(Body::new(body.into())); response } fn json_fault_message + serde::Serialize + Debug>(msg: T) -> String { json!({ "fault_message": msg }).to_string() } } #[cfg(test)] mod tests { use std::io::{Read, Write}; use std::os::unix::net::UnixStream; use std::path::PathBuf; use std::sync::mpsc::channel; use std::thread; use micro_http::HttpConnection; use utils::time::ClockType; use vmm::builder::StartMicrovmError; use vmm::logger::StoreMetric; use vmm::rpc_interface::{VmmActionError, VmmData}; use vmm::seccomp::get_empty_filters; use vmm::vmm_config::instance_info::InstanceInfo; use vmm::vmm_config::snapshot::CreateSnapshotParams; use vmm_sys_util::tempfile::TempFile; use super::request::cpu_configuration::parse_put_cpu_config; use super::*; /// Test unescaped CPU template in JSON format. /// Newlines injected into a field's value to /// test deserialization and logging. #[cfg(target_arch = "x86_64")] const TEST_UNESCAPED_JSON_TEMPLATE: &str = r#"{ "msr_modifiers": [ { "addr": "0x0\n\n\n\nTEST\n\n\n\n", "bitmap": "0b00" } ] }"#; #[cfg(target_arch = "aarch64")] pub const TEST_UNESCAPED_JSON_TEMPLATE: &str = r#"{ "reg_modifiers": [ { "addr": "0x0\n\n\n\nTEST\n\n\n\n", "bitmap": "0b00" } ] }"#; #[test] fn test_serve_vmm_action_request() { let to_vmm_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let (api_request_sender, _from_api) = channel(); let (to_api, vmm_response_receiver) = channel(); let mut api_server = ApiServer::new(api_request_sender, vmm_response_receiver, to_vmm_fd); to_api .send(Box::new(Err(VmmActionError::StartMicrovm( StartMicrovmError::MissingKernelConfig, )))) .unwrap(); let response = api_server.serve_vmm_action_request(Box::new(VmmAction::StartMicroVm), 0); assert_eq!(response.status(), StatusCode::BadRequest); // Since the vmm side is mocked out in this test, the call to serve_vmm_action_request can // complete very fast (under 1us, the resolution of our metrics). In these cases, the // latencies_us.pause_vm metric can be set to 0, failing the assertion below. By // subtracting 1 we assure that the metric will always be set to at least 1 (if it gets set // at all, which is what this test is trying to prove). let start_time_us = get_time_us(ClockType::Monotonic) - 1; assert_eq!(METRICS.latencies_us.pause_vm.fetch(), 0); to_api.send(Box::new(Ok(VmmData::Empty))).unwrap(); let response = api_server.serve_vmm_action_request(Box::new(VmmAction::Pause), start_time_us); assert_eq!(response.status(), StatusCode::NoContent); assert_ne!(METRICS.latencies_us.pause_vm.fetch(), 0); assert_eq!(METRICS.latencies_us.diff_create_snapshot.fetch(), 0); to_api .send(Box::new(Err(VmmActionError::OperationNotSupportedPreBoot))) .unwrap(); let response = api_server.serve_vmm_action_request( Box::new(VmmAction::CreateSnapshot(CreateSnapshotParams { snapshot_type: SnapshotType::Diff, snapshot_path: PathBuf::new(), mem_file_path: PathBuf::new(), })), start_time_us, ); assert_eq!(response.status(), StatusCode::BadRequest); // The metric should not be updated if the request wasn't successful. assert_eq!(METRICS.latencies_us.diff_create_snapshot.fetch(), 0); to_api.send(Box::new(Ok(VmmData::Empty))).unwrap(); let response = api_server.serve_vmm_action_request( Box::new(VmmAction::CreateSnapshot(CreateSnapshotParams { snapshot_type: SnapshotType::Diff, snapshot_path: PathBuf::new(), mem_file_path: PathBuf::new(), })), start_time_us, ); assert_eq!(response.status(), StatusCode::NoContent); assert_ne!(METRICS.latencies_us.diff_create_snapshot.fetch(), 0); assert_eq!(METRICS.latencies_us.full_create_snapshot.fetch(), 0); } #[test] fn test_handle_request() { let to_vmm_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let (api_request_sender, _from_api) = channel(); let (to_api, vmm_response_receiver) = channel(); let mut api_server = ApiServer::new(api_request_sender, vmm_response_receiver, to_vmm_fd); // Test an Actions request. let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all( b"PUT /actions HTTP/1.1\r\n\ Content-Type: application/json\r\n\ Content-Length: 49\r\n\r\n{ \ \"action_type\": \"Invalid\", \ \"payload\": \"string\" \ }", ) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); let response = api_server.handle_request(&req, 0); assert_eq!(response.status(), StatusCode::BadRequest); // Test a Get Info request. to_api .send(Box::new(Ok(VmmData::InstanceInformation( InstanceInfo::default(), )))) .unwrap(); sender.write_all(b"GET / HTTP/1.1\r\n\r\n").unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); let response = api_server.handle_request(&req, 0); assert_eq!(response.status(), StatusCode::OK); // Test erroneous request. sender .write_all( b"GET /mmds HTTP/1.1\r\n\ Content-Type: application/json\r\n\ Content-Length: 2\r\n\r\n{}", ) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); let response = api_server.handle_request(&req, 0); assert_eq!(response.status(), StatusCode::BadRequest); } #[test] fn test_handle_request_logging() { let cpu_template_json = TEST_UNESCAPED_JSON_TEMPLATE; let result = parse_put_cpu_config(&Body::new(cpu_template_json.as_bytes())); let result_error = result.unwrap_err(); let err_msg = format!("{}", result_error); assert_ne!( 1, err_msg.lines().count(), "Error Body response:\n{}", err_msg ); let err_msg_with_debug = format!("{:?}", result_error); // Check the loglines are on one line. assert_eq!( 1, err_msg_with_debug.lines().count(), "Error Body response:\n{}", err_msg_with_debug ); } #[test] fn test_bind_and_run() { let mut tmp_socket = TempFile::new().unwrap(); tmp_socket.remove().unwrap(); let path_to_socket = tmp_socket.as_path().to_str().unwrap().to_owned(); let api_thread_path_to_socket = path_to_socket.clone(); let to_vmm_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let (api_request_sender, _from_api) = channel(); let (to_api, vmm_response_receiver) = channel(); let seccomp_filters = get_empty_filters(); let server = HttpServer::new(PathBuf::from(api_thread_path_to_socket)).unwrap(); thread::Builder::new() .name("fc_api_test".to_owned()) .spawn(move || { ApiServer::new(api_request_sender, vmm_response_receiver, to_vmm_fd).run( server, ProcessTimeReporter::new(Some(1), Some(1), Some(1)), seccomp_filters.get("api").unwrap(), vmm::HTTP_MAX_PAYLOAD_SIZE, ); }) .unwrap(); to_api .send(Box::new(Ok(VmmData::InstanceInformation( InstanceInfo::default(), )))) .unwrap(); let mut sock = UnixStream::connect(PathBuf::from(path_to_socket)).unwrap(); // Send a GET InstanceInfo request. sock.write_all(b"GET / HTTP/1.1\r\n\r\n").unwrap(); let mut buf: [u8; 100] = [0; 100]; assert!(sock.read(&mut buf[..]).unwrap() > 0); // Send an erroneous request. sock.write_all(b"OPTIONS / HTTP/1.1\r\n\r\n").unwrap(); let mut buf: [u8; 100] = [0; 100]; assert!(sock.read(&mut buf[..]).unwrap() > 0); } #[test] fn test_bind_and_run_with_limit() { let mut tmp_socket = TempFile::new().unwrap(); tmp_socket.remove().unwrap(); let path_to_socket = tmp_socket.as_path().to_str().unwrap().to_owned(); let api_thread_path_to_socket = path_to_socket.clone(); let to_vmm_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let (api_request_sender, _from_api) = channel(); let (_to_api, vmm_response_receiver) = channel(); let seccomp_filters = get_empty_filters(); let server = HttpServer::new(PathBuf::from(api_thread_path_to_socket)).unwrap(); thread::Builder::new() .name("fc_api_test".to_owned()) .spawn(move || { ApiServer::new(api_request_sender, vmm_response_receiver, to_vmm_fd).run( server, ProcessTimeReporter::new(Some(1), Some(1), Some(1)), seccomp_filters.get("api").unwrap(), 50, ) }) .unwrap(); let mut sock = UnixStream::connect(PathBuf::from(path_to_socket)).unwrap(); // Send a GET mmds request. sock.write_all( b"PUT http://localhost/home HTTP/1.1\r\n\ Content-Length: 50000\r\n\r\naaaaaa", ) .unwrap(); let mut buf: [u8; 265] = [0; 265]; assert!(sock.read(&mut buf[..]).unwrap() > 0); let error_message = b"HTTP/1.1 400 \r\n\ Server: Firecracker API\r\n\ Connection: keep-alive\r\n\ Content-Type: application/json\r\n\ Content-Length: 146\r\n\r\n{ \"error\": \"\ Request payload with size 50000 is larger than \ the limit of 50 allowed by server.\nAll previous \ unanswered requests will be dropped.\" }"; assert_eq!(&buf[..], &error_message[..]); } #[test] fn test_kill_switch() { let mut tmp_socket = TempFile::new().unwrap(); tmp_socket.remove().unwrap(); let path_to_socket = tmp_socket.as_path().to_str().unwrap().to_owned(); let to_vmm_fd = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let (api_request_sender, _from_api) = channel(); let (_to_api, vmm_response_receiver) = channel(); let seccomp_filters = get_empty_filters(); let api_kill_switch = EventFd::new(libc::EFD_NONBLOCK).unwrap(); let kill_switch = api_kill_switch.try_clone().unwrap(); let mut server = HttpServer::new(PathBuf::from(path_to_socket)).unwrap(); server.add_kill_switch(kill_switch).unwrap(); let api_thread = thread::Builder::new() .name("fc_api_test".to_owned()) .spawn(move || { ApiServer::new(api_request_sender, vmm_response_receiver, to_vmm_fd).run( server, ProcessTimeReporter::new(Some(1), Some(1), Some(1)), seccomp_filters.get("api").unwrap(), vmm::HTTP_MAX_PAYLOAD_SIZE, ) }) .unwrap(); // Signal the API thread it should shut down. api_kill_switch.write(1).unwrap(); // Verify API thread was brought down. api_thread.join().unwrap(); } } ================================================ FILE: src/firecracker/src/api_server/parsed_request.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; use micro_http::{Body, Method, Request, Response, StatusCode, Version}; use serde::ser::Serialize; use serde_json::Value; use vmm::logger::{Level, error, info, log_enabled}; use vmm::rpc_interface::{VmmAction, VmmActionError, VmmData}; use super::ApiServer; use super::request::actions::parse_put_actions; use super::request::balloon::{parse_get_balloon, parse_patch_balloon, parse_put_balloon}; use super::request::boot_source::parse_put_boot_source; use super::request::cpu_configuration::parse_put_cpu_config; use super::request::drive::{parse_patch_drive, parse_put_drive}; use super::request::entropy::parse_put_entropy; use super::request::instance_info::parse_get_instance_info; use super::request::logger::parse_put_logger; use super::request::machine_configuration::{ parse_get_machine_config, parse_patch_machine_config, parse_put_machine_config, }; use super::request::metrics::parse_put_metrics; use super::request::mmds::{parse_get_mmds, parse_patch_mmds, parse_put_mmds}; use super::request::net::{parse_patch_net, parse_put_net}; use super::request::pmem::parse_put_pmem; use super::request::snapshot::{parse_patch_vm_state, parse_put_snapshot}; use super::request::version::parse_get_version; use super::request::vsock::parse_put_vsock; use crate::api_server::request::hotplug::memory::{ parse_get_memory_hotplug, parse_patch_memory_hotplug, parse_put_memory_hotplug, }; use crate::api_server::request::serial::parse_put_serial; #[derive(Debug)] pub(crate) enum RequestAction { Sync(Box), } #[derive(Debug, Default, PartialEq)] pub(crate) struct ParsingInfo { deprecation_message: Option, } impl ParsingInfo { pub fn append_deprecation_message(&mut self, message: &str) { match self.deprecation_message.as_mut() { None => self.deprecation_message = Some(message.to_owned()), Some(s) => (*s).push_str(message), } } pub fn take_deprecation_message(&mut self) -> Option { self.deprecation_message.take() } } #[derive(Debug)] pub(crate) struct ParsedRequest { action: RequestAction, parsing_info: ParsingInfo, } impl TryFrom<&Request> for ParsedRequest { type Error = RequestError; fn try_from(request: &Request) -> Result { let request_uri = request.uri().get_abs_path().to_string(); let description = describe( request.method(), request_uri.as_str(), request.body.as_ref(), ); info!("The API server received a {description}."); // Split request uri by '/' by doing: // 1. Trim starting '/' characters // 2. Splitting by '/' let mut path_tokens = request_uri.trim_start_matches('/').split_terminator('/'); let path = path_tokens.next().unwrap_or(""); match (request.method(), path, request.body.as_ref()) { (Method::Get, "", None) => parse_get_instance_info(), (Method::Get, "balloon", None) => parse_get_balloon(path_tokens), (Method::Get, "version", None) => parse_get_version(), (Method::Get, "vm", None) if path_tokens.next() == Some("config") => { Ok(ParsedRequest::new_sync(VmmAction::GetFullVmConfig)) } (Method::Get, "machine-config", None) => parse_get_machine_config(), (Method::Get, "mmds", None) => parse_get_mmds(), (Method::Get, "hotplug", None) if path_tokens.next() == Some("memory") => { parse_get_memory_hotplug() } (Method::Get, _, Some(_)) => method_to_error(Method::Get), (Method::Put, "actions", Some(body)) => parse_put_actions(body), (Method::Put, "balloon", Some(body)) => parse_put_balloon(body), (Method::Put, "boot-source", Some(body)) => parse_put_boot_source(body), (Method::Put, "cpu-config", Some(body)) => parse_put_cpu_config(body), (Method::Put, "drives", Some(body)) => parse_put_drive(body, path_tokens.next()), (Method::Put, "pmem", Some(body)) => parse_put_pmem(body, path_tokens.next()), (Method::Put, "logger", Some(body)) => parse_put_logger(body), (Method::Put, "serial", Some(body)) => parse_put_serial(body), (Method::Put, "machine-config", Some(body)) => parse_put_machine_config(body), (Method::Put, "metrics", Some(body)) => parse_put_metrics(body), (Method::Put, "mmds", Some(body)) => parse_put_mmds(body, path_tokens.next()), (Method::Put, "network-interfaces", Some(body)) => { parse_put_net(body, path_tokens.next()) } (Method::Put, "snapshot", Some(body)) => parse_put_snapshot(body, path_tokens.next()), (Method::Put, "vsock", Some(body)) => parse_put_vsock(body), (Method::Put, "entropy", Some(body)) => parse_put_entropy(body), (Method::Put, "hotplug", Some(body)) if path_tokens.next() == Some("memory") => { parse_put_memory_hotplug(body) } (Method::Put, _, None) => method_to_error(Method::Put), (Method::Patch, "balloon", body) => parse_patch_balloon(body, path_tokens), (Method::Patch, "drives", Some(body)) => parse_patch_drive(body, path_tokens.next()), (Method::Patch, "machine-config", Some(body)) => parse_patch_machine_config(body), (Method::Patch, "mmds", Some(body)) => parse_patch_mmds(body), (Method::Patch, "network-interfaces", Some(body)) => { parse_patch_net(body, path_tokens.next()) } (Method::Patch, "vm", Some(body)) => parse_patch_vm_state(body), (Method::Patch, "hotplug", Some(body)) if path_tokens.next() == Some("memory") => { parse_patch_memory_hotplug(body) } (Method::Patch, _, None) => method_to_error(Method::Patch), (method, unknown_uri, _) => Err(RequestError::InvalidPathMethod( unknown_uri.to_string(), method, )), } } } impl ParsedRequest { pub(crate) fn new(action: RequestAction) -> Self { Self { action, parsing_info: Default::default(), } } pub(crate) fn into_parts(self) -> (RequestAction, ParsingInfo) { (self.action, self.parsing_info) } pub(crate) fn parsing_info(&mut self) -> &mut ParsingInfo { &mut self.parsing_info } pub(crate) fn success_response_with_data(body_data: &T) -> Response where T: ?Sized + Serialize + Debug, { info!("The request was executed successfully. Status code: 200 OK."); let mut response = Response::new(Version::Http11, StatusCode::OK); response.set_body(Body::new(serde_json::to_string(body_data).unwrap())); response } pub(crate) fn success_response_with_mmds_value(body_data: &Value) -> Response { info!("The request was executed successfully. Status code: 200 OK."); let mut response = Response::new(Version::Http11, StatusCode::OK); let body_str = match body_data { Value::Null => "{}".to_string(), _ => serde_json::to_string(body_data).unwrap(), }; response.set_body(Body::new(body_str)); response } pub(crate) fn convert_to_response( request_outcome: &Result, ) -> Response { match request_outcome { Ok(vmm_data) => match vmm_data { VmmData::Empty => { info!("The request was executed successfully. Status code: 204 No Content."); Response::new(Version::Http11, StatusCode::NoContent) } VmmData::MachineConfiguration(machine_config) => { Self::success_response_with_data(machine_config) } VmmData::MmdsValue(value) => Self::success_response_with_mmds_value(value), VmmData::BalloonConfig(balloon_config) => { Self::success_response_with_data(balloon_config) } VmmData::BalloonStats(stats) => Self::success_response_with_data(stats), VmmData::VirtioMemStatus(data) => Self::success_response_with_data(data), VmmData::HintingStatus(hinting_status) => { Self::success_response_with_data(hinting_status) } VmmData::InstanceInformation(info) => Self::success_response_with_data(info), VmmData::VmmVersion(version) => Self::success_response_with_data( &serde_json::json!({ "firecracker_version": version.as_str() }), ), VmmData::FullVmConfig(config) => Self::success_response_with_data(config), }, Err(vmm_action_error) => { let mut response = match vmm_action_error { VmmActionError::MmdsLimitExceeded(_err) => { error!( "Received Error. Status code: 413 Payload too large. Message: {}", vmm_action_error ); Response::new(Version::Http11, StatusCode::PayloadTooLarge) } _ => { error!( "Received Error. Status code: 400 Bad Request. Message: {}", vmm_action_error ); Response::new(Version::Http11, StatusCode::BadRequest) } }; response.set_body(Body::new(ApiServer::json_fault_message( vmm_action_error.to_string(), ))); response } } } /// Helper function to avoid boiler-plate code. pub(crate) fn new_sync(vmm_action: VmmAction) -> ParsedRequest { ParsedRequest::new(RequestAction::Sync(Box::new(vmm_action))) } } /// Helper function for metric-logging purposes on API requests. /// /// # Arguments /// /// * `method` - one of `GET`, `PATCH`, `PUT` /// * `path` - path of the API request /// * `body` - body of the API request fn describe(method: Method, path: &str, body: Option<&Body>) -> String { match (path, body) { ("/mmds", Some(_)) | (_, None) => format!("{:?} request on {:?}", method, path), ("/cpu-config", Some(payload_value)) => { // If the log level is at Debug or higher, include the CPU template in // the log line. if log_enabled!(Level::Debug) { describe_with_body(method, path, payload_value) } else { format!( "{:?} request on {:?}. To view the CPU template received by the API, \ configure log-level to DEBUG", method, path ) } } (_, Some(payload_value)) => describe_with_body(method, path, payload_value), } } fn describe_with_body(method: Method, path: &str, payload_value: &Body) -> String { format!( "{:?} request on {:?} with body {:?}", method, path, std::str::from_utf8(payload_value.body.as_slice()) .unwrap_or("inconvertible to UTF-8") .to_string() ) } /// Generates a `GenericError` for each request method. pub(crate) fn method_to_error(method: Method) -> Result { match method { Method::Get => Err(RequestError::Generic( StatusCode::BadRequest, "GET request cannot have a body.".to_string(), )), Method::Put => Err(RequestError::Generic( StatusCode::BadRequest, "Empty PUT request.".to_string(), )), Method::Patch => Err(RequestError::Generic( StatusCode::BadRequest, "Empty PATCH request.".to_string(), )), } } #[derive(Debug, thiserror::Error)] pub(crate) enum RequestError { // The resource ID is empty. #[error("The ID cannot be empty.")] EmptyID, // A generic error, with a given status code and message to be turned into a fault message. #[error("{1}")] Generic(StatusCode, String), // The resource ID must only contain alphanumeric characters and '_'. #[error("API Resource IDs can only contain alphanumeric characters and underscores.")] InvalidID, // The HTTP method & request path combination is not valid. #[error("Invalid request method and/or path: {} {}.", .1.to_str(), .0)] InvalidPathMethod(String, Method), // An error occurred when deserializing the json body of a request. #[error("An error occurred when deserializing the json body of a request: {0}.")] SerdeJson(#[from] serde_json::Error), } // It's convenient to turn errors into HTTP responses directly. impl From for Response { fn from(err: RequestError) -> Self { let msg = ApiServer::json_fault_message(format!("{}", err)); match err { RequestError::Generic(status, _) => ApiServer::json_response(status, msg), RequestError::EmptyID | RequestError::InvalidID | RequestError::InvalidPathMethod(_, _) | RequestError::SerdeJson(_) => ApiServer::json_response(StatusCode::BadRequest, msg), } } } // This function is supposed to do id validation for requests. pub(crate) fn checked_id(id: &str) -> Result<&str, RequestError> { // todo: are there any checks we want to do on id's? // not allow them to be empty strings maybe? // check: ensure string is not empty if id.is_empty() { return Err(RequestError::EmptyID); } // check: ensure string is alphanumeric if !id.chars().all(|c| c == '_' || c.is_alphanumeric()) { return Err(RequestError::InvalidID); } Ok(id) } #[cfg(test)] pub mod tests { use std::io::{Cursor, Write}; use std::os::unix::net::UnixStream; use std::str::FromStr; use micro_http::HttpConnection; use vmm::builder::StartMicrovmError; use vmm::cpu_config::templates::test_utils::build_test_template; use vmm::devices::virtio::balloon::device::HintingStatus; use vmm::resources::VmmConfig; use vmm::rpc_interface::VmmActionError; use vmm::vmm_config::balloon::{BalloonDeviceConfig, BalloonStats}; use vmm::vmm_config::instance_info::InstanceInfo; use vmm::vmm_config::machine_config::MachineConfig; use super::*; impl PartialEq for ParsedRequest { fn eq(&self, other: &ParsedRequest) -> bool { if self.parsing_info.deprecation_message != other.parsing_info.deprecation_message { return false; } match (&self.action, &other.action) { (RequestAction::Sync(sync_req), RequestAction::Sync(other_sync_req)) => { sync_req == other_sync_req } } } } pub(crate) fn vmm_action_from_request(req: ParsedRequest) -> VmmAction { match req.action { RequestAction::Sync(vmm_action) => *vmm_action, } } pub(crate) fn depr_action_from_req(req: ParsedRequest, msg: Option) -> VmmAction { let (action_req, mut parsing_info) = req.into_parts(); match action_req { RequestAction::Sync(vmm_action) => { let req_msg = parsing_info.take_deprecation_message(); assert!(req_msg.is_some()); assert_eq!(req_msg, msg); *vmm_action } } } fn http_response(body: &str, status_code: i32) -> String { let header = format!( "HTTP/1.1 {} \r\nServer: Firecracker API\r\nConnection: keep-alive\r\n", status_code ); if status_code == 204 { // No Content format!("{}{}", header, "\r\n") } else { let content = format!( "Content-Type: application/json\r\nContent-Length: {}\r\n\r\n{}", body.len(), body, ); format!("{}{}", header, content) } } fn http_request(request_type: &str, endpoint: &str, body: Option<&str>) -> String { let req_no_body = format!( "{} {} HTTP/1.1\r\nContent-Type: application/json\r\n", request_type, endpoint ); if let Some(body) = body { return format!( "{}Content-Length: {}\r\n\r\n{}", req_no_body, body.len(), body ); } format!("{}\r\n", req_no_body,) } #[test] fn test_missing_slash() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("GET", "none", Some("body")).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap_err(); } #[test] fn test_checked_id() { checked_id("dummy").unwrap(); checked_id("dummy_1").unwrap(); assert_eq!( format!("{}", checked_id("").unwrap_err()), "The ID cannot be empty." ); assert_eq!( format!("{}", checked_id("dummy!!").unwrap_err()), "API Resource IDs can only contain alphanumeric characters and underscores." ); } #[test] fn test_invalid_get() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("GET", "/mmds", Some("body")).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); let parsed_request = ParsedRequest::try_from(&req); assert!(matches!( &parsed_request, Err(RequestError::Generic(StatusCode::BadRequest, s)) if s == "GET request cannot have a body.", )); } #[test] fn test_invalid_put() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("PUT", "/mmds", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); let parsed_request = ParsedRequest::try_from(&req); assert!(matches!( &parsed_request, Err(RequestError::Generic(StatusCode::BadRequest, s)) if s == "Empty PUT request.", )); } #[test] fn test_invalid_patch() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("PATCH", "/mmds", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); let parsed_request = ParsedRequest::try_from(&req); assert!(matches!( &parsed_request, Err(RequestError::Generic(StatusCode::BadRequest, s)) if s == "Empty PATCH request.", )); sender .write_all(http_request("PATCH", "/balloon", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); let parsed_request = ParsedRequest::try_from(&req); assert!(matches!( &parsed_request, Err(RequestError::Generic(StatusCode::BadRequest, s)) if s == "Empty PATCH request.", )); } #[test] fn test_error_into_response() { // Generic error. let mut buf = Cursor::new(vec![0]); let response: Response = RequestError::Generic(StatusCode::BadRequest, "message".to_string()).into(); response.write_all(&mut buf).unwrap(); let body = ApiServer::json_fault_message("message"); let expected_response = http_response(&body, 400); assert_eq!(buf.into_inner(), expected_response.as_bytes()); // Empty ID error. let mut buf = Cursor::new(vec![0]); let response: Response = RequestError::EmptyID.into(); response.write_all(&mut buf).unwrap(); let body = ApiServer::json_fault_message("The ID cannot be empty."); let expected_response = http_response(&body, 400); assert_eq!(buf.into_inner(), expected_response.as_bytes()); // Invalid ID error. let mut buf = Cursor::new(vec![0]); let response: Response = RequestError::InvalidID.into(); response.write_all(&mut buf).unwrap(); let body = ApiServer::json_fault_message( "API Resource IDs can only contain alphanumeric characters and underscores.", ); let expected_response = http_response(&body, 400); assert_eq!(buf.into_inner(), expected_response.as_bytes()); // Invalid path or method error. let mut buf = Cursor::new(vec![0]); let response: Response = RequestError::InvalidPathMethod("path".to_string(), Method::Get).into(); response.write_all(&mut buf).unwrap(); let body = ApiServer::json_fault_message(format!( "Invalid request method and/or path: {} {}.", Method::Get.to_str(), "path" )); let expected_response = http_response(&body, 400); assert_eq!(buf.into_inner(), expected_response.as_bytes()); // Serde error. let mut buf = Cursor::new(vec![0]); let serde_error = serde_json::Value::from_str("").unwrap_err(); let response: Response = RequestError::SerdeJson(serde_error).into(); response.write_all(&mut buf).unwrap(); let body = ApiServer::json_fault_message( "An error occurred when deserializing the json body of a request: EOF while parsing a \ value at line 1 column 0.", ); let expected_response = http_response(&body, 400); assert_eq!(buf.into_inner(), expected_response.as_bytes()); } #[test] fn test_describe() { assert_eq!( describe(Method::Get, "path", None), "Get request on \"path\"" ); assert_eq!( describe(Method::Put, "/mmds", None), "Put request on \"/mmds\"" ); assert_eq!( describe(Method::Put, "path", Some(&Body::new("body"))), "Put request on \"path\" with body \"body\"" ); } #[test] fn test_convert_to_response() { let verify_ok_response_with = |vmm_data: VmmData| { let data = Ok(vmm_data); let mut buf = Cursor::new(vec![0]); let expected_response = match data.as_ref().unwrap() { VmmData::BalloonConfig(cfg) => { http_response(&serde_json::to_string(cfg).unwrap(), 200) } VmmData::BalloonStats(stats) => { http_response(&serde_json::to_string(stats).unwrap(), 200) } VmmData::VirtioMemStatus(data) => { http_response(&serde_json::to_string(data).unwrap(), 200) } VmmData::HintingStatus(status) => { http_response(&serde_json::to_string(status).unwrap(), 200) } VmmData::Empty => http_response("", 204), VmmData::FullVmConfig(cfg) => { http_response(&serde_json::to_string(cfg).unwrap(), 200) } VmmData::MachineConfiguration(cfg) => { http_response(&serde_json::to_string(cfg).unwrap(), 200) } VmmData::MmdsValue(value) => { http_response(&serde_json::to_string(value).unwrap(), 200) } VmmData::InstanceInformation(info) => { http_response(&serde_json::to_string(info).unwrap(), 200) } VmmData::VmmVersion(version) => http_response( &serde_json::json!({ "firecracker_version": version.as_str() }).to_string(), 200, ), }; let response = ParsedRequest::convert_to_response(&data); response.write_all(&mut buf).unwrap(); assert_eq!(buf.into_inner(), expected_response.as_bytes()); }; verify_ok_response_with(VmmData::BalloonConfig(BalloonDeviceConfig::default())); verify_ok_response_with(VmmData::BalloonStats(BalloonStats { swap_in: Some(1), swap_out: Some(1), ..Default::default() })); verify_ok_response_with(VmmData::HintingStatus(HintingStatus { ..Default::default() })); verify_ok_response_with(VmmData::Empty); verify_ok_response_with(VmmData::FullVmConfig(VmmConfig::default())); verify_ok_response_with(VmmData::MachineConfiguration(MachineConfig::default())); verify_ok_response_with(VmmData::MmdsValue(serde_json::from_str("{}").unwrap())); verify_ok_response_with(VmmData::InstanceInformation(InstanceInfo::default())); verify_ok_response_with(VmmData::VmmVersion(String::default())); // Error. let error = VmmActionError::StartMicrovm(StartMicrovmError::MissingKernelConfig); let mut buf = Cursor::new(vec![0]); let json = ApiServer::json_fault_message(error.to_string()); let response = ParsedRequest::convert_to_response(&Err(error)); response.write_all(&mut buf).unwrap(); let expected_response = http_response(&json, 400); assert_eq!(buf.into_inner(), expected_response.as_bytes()); } #[test] fn test_try_from_get_info() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("GET", "/", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_get_balloon() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("GET", "/balloon", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_get_balloon_stats() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("GET", "/balloon/statistics", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_get_balloon_hinting() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("GET", "/balloon/hinting/status", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_get_machine_config() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("GET", "/machine-config", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_get_mmds() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("GET", "/mmds", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_get_version() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("GET", "/version", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_actions() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"action_type\": \"FlushMetrics\" }"; sender .write_all(http_request("PUT", "/actions", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_balloon() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"amount_mib\": 0, \"deflate_on_oom\": false, \"stats_polling_interval_s\": 0 }"; sender .write_all(http_request("PUT", "/balloon", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_entropy() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"rate_limiter\": { \"bandwidth\" : { \"size\": 0, \"one_time_burst\": 0, \ \"refill_time\": 0 }, \"ops\": { \"size\": 0, \"one_time_burst\": 0, \ \"refill_time\": 0 } } }"; sender .write_all(http_request("PUT", "/entropy", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_boot() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"kernel_image_path\": \"string\", \"boot_args\": \"string\" }"; sender .write_all(http_request("PUT", "/boot-source", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_drives() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"drive_id\": \"string\", \"path_on_host\": \"string\", \"is_root_device\": \ true, \"partuuid\": \"string\", \"is_read_only\": true, \"cache_type\": \ \"Unsafe\", \"io_engine\": \"Sync\", \"rate_limiter\": { \"bandwidth\": { \ \"size\": 0, \"one_time_burst\": 0, \"refill_time\": 0 }, \"ops\": { \ \"size\": 0, \"one_time_burst\": 0, \"refill_time\": 0 } } }"; sender .write_all(http_request("PUT", "/drives/string", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_logger() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"log_path\": \"string\", \"level\": \"Warning\", \"show_level\": false, \ \"show_log_origin\": false }"; sender .write_all(http_request("PUT", "/logger", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_machine_config() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"vcpu_count\": 1, \"mem_size_mib\": 1 }"; sender .write_all(http_request("PUT", "/machine-config", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_metrics() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"metrics_path\": \"string\" }"; sender .write_all(http_request("PUT", "/metrics", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_mmds() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); // `/mmds` sender .write_all(http_request("PUT", "/mmds", Some("{}")).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); let body = "{\"foo\":\"bar\"}"; sender .write_all(http_request("PUT", "/mmds", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); // `/mmds/config` let body = "{ \"ipv4_address\": \"169.254.170.2\", \"network_interfaces\": [\"iface0\"] }"; sender .write_all(http_request("PUT", "/mmds/config", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_netif() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"iface_id\": \"string\", \"guest_mac\": \"12:34:56:78:9a:BC\", \ \"host_dev_name\": \"string\", \"rx_rate_limiter\": { \"bandwidth\": { \ \"size\": 0, \"one_time_burst\": 0, \"refill_time\": 0 }, \"ops\": { \ \"size\": 0, \"one_time_burst\": 0, \"refill_time\": 0 } }, \ \"tx_rate_limiter\": { \"bandwidth\": { \"size\": 0, \"one_time_burst\": 0, \ \"refill_time\": 0 }, \"ops\": { \"size\": 0, \"one_time_burst\": 0, \ \"refill_time\": 0 } } }"; sender .write_all(http_request("PUT", "/network-interfaces/string", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_snapshot() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"snapshot_path\": \"foo\", \"mem_file_path\": \"bar\" }"; sender .write_all(http_request("PUT", "/snapshot/create", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); let body = "{ \"snapshot_path\": \"foo\", \"mem_backend\": { \"backend_type\": \"File\", \ \"backend_path\": \"bar\" }, \"enable_diff_snapshots\": true }"; sender .write_all(http_request("PUT", "/snapshot/load", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); let body = "{ \"snapshot_path\": \"foo\", \"mem_file_path\": \"bar\", \"resume_vm\": true }"; sender .write_all(http_request("PUT", "/snapshot/load", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_patch_vm() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"state\": \"Paused\" }"; sender .write_all(http_request("PATCH", "/vm", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_put_vsock() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"vsock_id\": \"string\", \"guest_cid\": 0, \"uds_path\": \"string\" }"; sender .write_all(http_request("PUT", "/vsock", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_patch_balloon() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"amount_mib\": 1 }"; sender .write_all(http_request("PATCH", "/balloon", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); let body = "{ \"stats_polling_interval_s\": 1 }"; sender .write_all(http_request("PATCH", "/balloon/statistics", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); let body = "{ \"acknowledge_on_stop\": true }"; sender .write_all(http_request("PATCH", "/balloon/hinting/start", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); let body = "{}"; sender .write_all(http_request("PATCH", "/balloon/hinting/start", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); sender .write_all(http_request("PATCH", "/balloon/hinting/start", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); let body = ""; sender .write_all(http_request("PATCH", "/balloon/hinting/stop", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); sender .write_all(http_request("PATCH", "/balloon/hinting/stop", None).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_patch_drives() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"drive_id\": \"string\", \"path_on_host\": \"string\" }"; sender .write_all(http_request("PATCH", "/drives/string", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_patch_machine_config() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"vcpu_count\": 1, \"mem_size_mib\": 1 }"; sender .write_all(http_request("PATCH", "/machine-config", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); let body = "{ \"vcpu_count\": 1, \"mem_size_mib\": 1, \"smt\": false, \"cpu_template\": \"C3\" }"; sender .write_all(http_request("PATCH", "/machine-config", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); #[cfg(target_arch = "x86_64")] ParsedRequest::try_from(&req).unwrap(); #[cfg(target_arch = "aarch64")] ParsedRequest::try_from(&req).unwrap_err(); } #[test] fn test_try_from_put_cpu_config() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let cpu_template = build_test_template(); let cpu_config_json_result = serde_json::to_string(&cpu_template); assert!( cpu_config_json_result.is_ok(), "Unable to serialize custom CPU template" ); let cpu_config_json = cpu_config_json_result.unwrap(); let result = sender.write_all(http_request("PUT", "/cpu-config", Some(&cpu_config_json)).as_bytes()); result.unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); let request_result = ParsedRequest::try_from(&req); assert!(request_result.is_ok(), "{}", request_result.err().unwrap()); } #[test] fn test_try_from_patch_mmds() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); sender .write_all(http_request("PATCH", "/mmds", Some("{}")).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } #[test] fn test_try_from_patch_netif() { let (mut sender, receiver) = UnixStream::pair().unwrap(); let mut connection = HttpConnection::new(receiver); let body = "{ \"iface_id\": \"string\" }"; sender .write_all(http_request("PATCH", "/network-interfaces/string", Some(body)).as_bytes()) .unwrap(); connection.try_read().unwrap(); let req = connection.pop_parsed_request().unwrap(); ParsedRequest::try_from(&req).unwrap(); } } ================================================ FILE: src/firecracker/src/api_server/request/actions.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use serde::{Deserialize, Serialize}; use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; #[cfg(target_arch = "aarch64")] use super::StatusCode; // The names of the members from this enum must precisely correspond (as a string) to the possible // values of "action_type" from the json request body. This is useful to get a strongly typed // struct from the Serde deserialization process. #[derive(Debug, Deserialize, Serialize)] enum ActionType { FlushMetrics, InstanceStart, SendCtrlAltDel, } // The model of the json body from a sync request. We use Serde to transform each associated // json body into this. #[derive(Debug, Deserialize, Serialize)] #[serde(deny_unknown_fields)] struct ActionBody { action_type: ActionType, } pub(crate) fn parse_put_actions(body: &Body) -> Result { METRICS.put_api_requests.actions_count.inc(); let action_body = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.put_api_requests.actions_fails.inc(); })?; match action_body.action_type { ActionType::FlushMetrics => Ok(ParsedRequest::new_sync(VmmAction::FlushMetrics)), ActionType::InstanceStart => Ok(ParsedRequest::new_sync(VmmAction::StartMicroVm)), ActionType::SendCtrlAltDel => { // SendCtrlAltDel not supported on aarch64. #[cfg(target_arch = "aarch64")] return Err(RequestError::Generic( StatusCode::BadRequest, "SendCtrlAltDel does not supported on aarch64.".to_string(), )); #[cfg(target_arch = "x86_64")] Ok(ParsedRequest::new_sync(VmmAction::SendCtrlAltDel)) } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_parse_put_actions_request() { { parse_put_actions(&Body::new("invalid_body")).unwrap_err(); let json = r#"{ "action_type": "InstanceStart" }"#; let req: ParsedRequest = ParsedRequest::new_sync(VmmAction::StartMicroVm); let result = parse_put_actions(&Body::new(json)); assert_eq!(result.unwrap(), req); } #[cfg(target_arch = "x86_64")] { let json = r#"{ "action_type": "SendCtrlAltDel" }"#; let req: ParsedRequest = ParsedRequest::new_sync(VmmAction::SendCtrlAltDel); let result = parse_put_actions(&Body::new(json)); assert_eq!(result.unwrap(), req); } #[cfg(target_arch = "aarch64")] { let json = r#"{ "action_type": "SendCtrlAltDel" }"#; let result = parse_put_actions(&Body::new(json)); result.unwrap_err(); } { let json = r#"{ "action_type": "FlushMetrics" }"#; let req: ParsedRequest = ParsedRequest::new_sync(VmmAction::FlushMetrics); let result = parse_put_actions(&Body::new(json)); assert_eq!(result.unwrap(), req); } } } ================================================ FILE: src/firecracker/src/api_server/request/balloon.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use micro_http::{Method, StatusCode}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::balloon::{ BalloonDeviceConfig, BalloonUpdateConfig, BalloonUpdateStatsConfig, }; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; use crate::api_server::parsed_request::method_to_error; fn parse_get_hinting<'a, T>(mut path_tokens: T) -> Result where T: Iterator, { match path_tokens.next() { Some("status") => Ok(ParsedRequest::new_sync(VmmAction::GetFreePageHintingStatus)), Some(stats_path) => Err(RequestError::Generic( StatusCode::BadRequest, format!("Unrecognized GET request path `/hinting/{stats_path}`."), )), None => Err(RequestError::Generic( StatusCode::BadRequest, "Unrecognized GET request path `/hinting/`.".to_string(), )), } } pub(crate) fn parse_get_balloon<'a, T>(mut path_tokens: T) -> Result where T: Iterator, { match path_tokens.next() { Some("statistics") => Ok(ParsedRequest::new_sync(VmmAction::GetBalloonStats)), Some("hinting") => parse_get_hinting(path_tokens), Some(stats_path) => Err(RequestError::Generic( StatusCode::BadRequest, format!("Unrecognized GET request path `{}`.", stats_path), )), None => Ok(ParsedRequest::new_sync(VmmAction::GetBalloonConfig)), } } pub(crate) fn parse_put_balloon(body: &Body) -> Result { Ok(ParsedRequest::new_sync(VmmAction::SetBalloonDevice( serde_json::from_slice::(body.raw())?, ))) } fn parse_patch_hinting<'a, T>( body: Option<&Body>, mut path_tokens: T, ) -> Result where T: Iterator, { match path_tokens.next() { Some("start") => { let cmd = match body { None => Default::default(), Some(b) if b.is_empty() => Default::default(), Some(b) => serde_json::from_slice(b.raw())?, }; Ok(ParsedRequest::new_sync(VmmAction::StartFreePageHinting( cmd, ))) } Some("stop") => Ok(ParsedRequest::new_sync(VmmAction::StopFreePageHinting)), Some(stats_path) => Err(RequestError::Generic( StatusCode::BadRequest, format!("Unrecognized PATCH request path `/hinting/{stats_path}`."), )), None => Err(RequestError::Generic( StatusCode::BadRequest, "Unrecognized PATCH request path `/hinting/`.".to_string(), )), } } pub(crate) fn parse_patch_balloon<'a, T>( body: Option<&Body>, mut path_tokens: T, ) -> Result where T: Iterator, { match (path_tokens.next(), body) { (Some("statistics"), Some(body)) => { Ok(ParsedRequest::new_sync(VmmAction::UpdateBalloonStatistics( serde_json::from_slice::(body.raw())?, ))) } (Some("hinting"), body) => parse_patch_hinting(body, path_tokens), (_, Some(body)) => Ok(ParsedRequest::new_sync(VmmAction::UpdateBalloon( serde_json::from_slice::(body.raw())?, ))), (_, None) => method_to_error(Method::Patch), } } #[cfg(test)] mod tests { use super::*; use crate::api_server::parsed_request::tests::vmm_action_from_request; #[test] fn test_parse_get_balloon_request() { parse_get_balloon([].into_iter()).unwrap(); parse_get_balloon(["unrelated"].into_iter()).unwrap_err(); parse_get_balloon(["statistics"].into_iter()).unwrap(); parse_get_balloon(["hinting", "status"].into_iter()).unwrap(); parse_get_balloon(["hinting", "unrelated"].into_iter()).unwrap_err(); parse_get_balloon(["hinting"].into_iter()).unwrap_err(); } #[test] fn test_parse_patch_balloon_request() { parse_patch_balloon(Some(&Body::new("invalid_payload")), [].into_iter()).unwrap_err(); // PATCH with invalid fields. let body = r#"{ "amount_mib": "bar", "foo": "bar" }"#; parse_patch_balloon(Some(&Body::new(body)), [].into_iter()).unwrap_err(); // PATCH with invalid types on fields. Adding a polling interval as string instead of bool. let body = r#"{ "amount_mib": 1000, "stats_polling_interval_s": "false" }"#; let res = parse_patch_balloon(Some(&Body::new(body)), [].into_iter()); res.unwrap_err(); // PATCH with invalid types on fields. Adding a amount_mib as a negative number. let body = r#"{ "amount_mib": -1000, "stats_polling_interval_s": true }"#; let res = parse_patch_balloon(Some(&Body::new(body)), [].into_iter()); res.unwrap_err(); // PATCH on statistics with missing ppolling interval field. let body = r#"{ "amount_mib": 100 }"#; let res = parse_patch_balloon(Some(&Body::new(body)), ["statistics"].into_iter()); res.unwrap_err(); // PATCH with missing amount_mib field. let body = r#"{ "stats_polling_interval_s": 0 }"#; let res = parse_patch_balloon(Some(&Body::new(body)), [].into_iter()); res.unwrap_err(); // PATCH that tries to update something else other than allowed fields. let body = r#"{ "amount_mib": "dummy_id", "stats_polling_interval_s": "dummy_host" }"#; let res = parse_patch_balloon(Some(&Body::new(body)), [].into_iter()); res.unwrap_err(); // PATCH with payload that is not a json. let body = r#"{ "fields": "dummy_field" }"#; parse_patch_balloon(Some(&Body::new(body)), [].into_iter()).unwrap_err(); // PATCH on unrecognized path. let body = r#"{ "fields": "dummy_field" }"#; parse_patch_balloon(Some(&Body::new(body)), ["config"].into_iter()).unwrap_err(); let body = r#"{ "amount_mib": 1 }"#; let expected_config = BalloonUpdateConfig { amount_mib: 1 }; assert_eq!( vmm_action_from_request( parse_patch_balloon(Some(&Body::new(body)), [].into_iter()).unwrap() ), VmmAction::UpdateBalloon(expected_config) ); let body = r#"{ "stats_polling_interval_s": 1 }"#; let expected_config = BalloonUpdateStatsConfig { stats_polling_interval_s: 1, }; assert_eq!( vmm_action_from_request( parse_patch_balloon(Some(&Body::new(body)), ["statistics"].into_iter()).unwrap() ), VmmAction::UpdateBalloonStatistics(expected_config) ); // PATCH start hinting run valid data let body = r#"{ "acknowledge_on_stop": true }"#; parse_patch_balloon(Some(&Body::new(body)), ["hinting", "start"].into_iter()).unwrap(); // PATCH start hinting run no body parse_patch_balloon(Some(&Body::new("")), ["hinting", "start"].into_iter()).unwrap(); // PATCH start hinting run invalid data let body = r#"{ "acknowledge_on_stop": "not valid" }"#; parse_patch_balloon(Some(&Body::new(body)), ["hinting", "start"].into_iter()).unwrap_err(); // PATCH start hinting run no body parse_patch_balloon(Some(&Body::new(body)), ["hinting", "start"].into_iter()).unwrap_err(); // PATCH stop hinting run parse_patch_balloon(Some(&Body::new("")), ["hinting", "stop"].into_iter()).unwrap(); // PATCH stop hinting run parse_patch_balloon(None, ["hinting", "stop"].into_iter()).unwrap(); // PATCH stop hinting invalid path parse_patch_balloon(Some(&Body::new("")), ["hinting"].into_iter()).unwrap_err(); // PATCH stop hinting invalid path parse_patch_balloon(Some(&Body::new("")), ["hinting", "other path"].into_iter()) .unwrap_err(); // PATCH no body non hinting parse_patch_balloon(None, ["hinting"].into_iter()).unwrap_err(); } #[test] fn test_parse_put_balloon_request() { parse_put_balloon(&Body::new("invalid_payload")).unwrap_err(); // PUT with invalid fields. let body = r#"{ "amount_mib": "bar", "is_read_only": false }"#; parse_put_balloon(&Body::new(body)).unwrap_err(); // PUT with valid input fields. Hinting reporting missing let body = r#"{ "amount_mib": 1000, "deflate_on_oom": true, "stats_polling_interval_s": 0 }"#; parse_put_balloon(&Body::new(body)).unwrap(); // PUT with valid input hinting let body = r#"{ "amount_mib": 1000, "deflate_on_oom": true, "stats_polling_interval_s": 0, "free_page_hinting": true }"#; parse_put_balloon(&Body::new(body)).unwrap(); // PUT with valid reporting let body = r#"{ "amount_mib": 1000, "deflate_on_oom": true, "stats_polling_interval_s": 0, "free_page_reporting": true }"#; parse_put_balloon(&Body::new(body)).unwrap(); } } ================================================ FILE: src/firecracker/src/api_server/request/boot_source.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::boot_source::BootSourceConfig; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; pub(crate) fn parse_put_boot_source(body: &Body) -> Result { METRICS.put_api_requests.boot_source_count.inc(); Ok(ParsedRequest::new_sync(VmmAction::ConfigureBootSource( serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.put_api_requests.boot_source_fails.inc(); })?, ))) } #[cfg(test)] mod tests { use super::*; #[test] fn test_parse_boot_request() { parse_put_boot_source(&Body::new("invalid_payload")).unwrap_err(); let body = r#"{ "kernel_image_path": "/foo/bar", "initrd_path": "/bar/foo", "boot_args": "foobar" }"#; let same_body = BootSourceConfig { kernel_image_path: String::from("/foo/bar"), initrd_path: Some(String::from("/bar/foo")), boot_args: Some(String::from("foobar")), }; let parsed_req = parse_put_boot_source(&Body::new(body)).unwrap(); assert_eq!( parsed_req, ParsedRequest::new_sync(VmmAction::ConfigureBootSource(same_body)) ); } } ================================================ FILE: src/firecracker/src/api_server/request/cpu_configuration.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::cpu_config::templates::CustomCpuTemplate; use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; pub(crate) fn parse_put_cpu_config(body: &Body) -> Result { METRICS.put_api_requests.cpu_cfg_count.inc(); // Convert the API request into a a deserialized/binary format Ok(ParsedRequest::new_sync(VmmAction::PutCpuConfiguration( CustomCpuTemplate::try_from(body.raw()).map_err(|err| { METRICS.put_api_requests.cpu_cfg_fails.inc(); RequestError::SerdeJson(err) })?, ))) } #[cfg(test)] mod tests { use micro_http::Body; use vmm::cpu_config::templates::test_utils::{TEST_INVALID_TEMPLATE_JSON, build_test_template}; use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use super::*; use crate::api_server::parsed_request::tests::vmm_action_from_request; #[test] fn test_parse_put_cpu_config_request() { let cpu_template = build_test_template(); let cpu_config_json_result = serde_json::to_string(&cpu_template); assert!( &cpu_config_json_result.is_ok(), "Unable to serialize CPU template to JSON" ); let cpu_template_json = cpu_config_json_result.unwrap(); // Test that the CPU config to be used for KVM config is the same that // was read in from a test file. assert_eq!( vmm_action_from_request( parse_put_cpu_config(&Body::new(cpu_template_json.as_bytes())).unwrap() ), VmmAction::PutCpuConfiguration(cpu_template) ); // Test empty request succeeds let parse_cpu_config_result = parse_put_cpu_config(&Body::new(r#"{ }"#)); assert!( parse_cpu_config_result.is_ok(), "Failed to parse cpu-config: [{}]", parse_cpu_config_result.unwrap_err() ); } /// Test basic API server validations like JSON sanity/legibility /// Any testing or validation done involving KVM or OS specific context /// need to be done in integration testing (api_cpu_configuration_integ_tests) #[test] fn test_parse_put_cpu_config_request_errors() { let mut expected_err_count = METRICS.put_api_requests.cpu_cfg_fails.count() + 1; // Test case for invalid payload let unparsable_cpu_config_result = parse_put_cpu_config(&Body::new("")); unparsable_cpu_config_result.unwrap_err(); assert_eq!( METRICS.put_api_requests.cpu_cfg_fails.count(), expected_err_count ); // Test request with invalid fields let invalid_put_result = parse_put_cpu_config(&Body::new(TEST_INVALID_TEMPLATE_JSON)); expected_err_count += 1; assert_eq!( METRICS.put_api_requests.cpu_cfg_fails.count(), expected_err_count ); assert!( matches!(invalid_put_result, Err(RequestError::SerdeJson(_))), "{:?}", invalid_put_result ); } } ================================================ FILE: src/firecracker/src/api_server/request/drive.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig}; use super::super::parsed_request::{ParsedRequest, RequestError, checked_id}; use super::{Body, StatusCode}; pub(crate) fn parse_put_drive( body: &Body, id_from_path: Option<&str>, ) -> Result { METRICS.put_api_requests.drive_count.inc(); let id = if let Some(id) = id_from_path { checked_id(id)? } else { METRICS.put_api_requests.drive_fails.inc(); return Err(RequestError::EmptyID); }; let device_cfg = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.put_api_requests.drive_fails.inc(); })?; if id != device_cfg.drive_id { METRICS.put_api_requests.drive_fails.inc(); Err(RequestError::Generic( StatusCode::BadRequest, "The id from the path does not match the id from the body!".to_string(), )) } else { Ok(ParsedRequest::new_sync(VmmAction::InsertBlockDevice( device_cfg, ))) } } pub(crate) fn parse_patch_drive( body: &Body, id_from_path: Option<&str>, ) -> Result { METRICS.patch_api_requests.drive_count.inc(); let id = if let Some(id) = id_from_path { checked_id(id)? } else { METRICS.patch_api_requests.drive_fails.inc(); return Err(RequestError::EmptyID); }; let block_device_update_cfg: BlockDeviceUpdateConfig = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.patch_api_requests.drive_fails.inc(); })?; if id != block_device_update_cfg.drive_id { METRICS.patch_api_requests.drive_fails.inc(); return Err(RequestError::Generic( StatusCode::BadRequest, String::from("The id from the path does not match the id from the body!"), )); } Ok(ParsedRequest::new_sync(VmmAction::UpdateBlockDevice( block_device_update_cfg, ))) } #[cfg(test)] mod tests { use super::*; use crate::api_server::parsed_request::tests::vmm_action_from_request; #[test] fn test_parse_patch_drive_request() { parse_patch_drive(&Body::new("invalid_payload"), None).unwrap_err(); parse_patch_drive(&Body::new("invalid_payload"), Some("id")).unwrap_err(); // PATCH with invalid fields. let body = r#"{ "drive_id": "bar", "is_read_only": false }"#; parse_patch_drive(&Body::new(body), Some("2")).unwrap_err(); // PATCH with invalid types on fields. Adding a drive_id as number instead of string. let body = r#"{ "drive_id": 1000, "path_on_host": "dummy" }"#; let res = parse_patch_drive(&Body::new(body), Some("1000")); res.unwrap_err(); // PATCH with invalid types on fields. Adding a path_on_host as bool instead of string. let body = r#"{ "drive_id": 1000, "path_on_host": true }"#; let res = parse_patch_drive(&Body::new(body), Some("1000")); res.unwrap_err(); // PATCH with only drive_id field. let body = r#"{ "drive_id": "1000" }"#; let res = parse_patch_drive(&Body::new(body), Some("1000")); res.unwrap(); // PATCH with missing drive_id field. let body = r#"{ "path_on_host": true }"#; let res = parse_patch_drive(&Body::new(body), Some("1000")); res.unwrap_err(); // PATCH that tries to update something else other than path_on_host. let body = r#"{ "drive_id": "dummy_id", "path_on_host": "dummy_host", "is_read_only": false }"#; let res = parse_patch_drive(&Body::new(body), Some("1234")); res.unwrap_err(); // PATCH with payload that is not a json. let body = r#"{ "fields": "dummy_field" }"#; parse_patch_drive(&Body::new(body), Some("1234")).unwrap_err(); let body = r#"{ "drive_id": "foo", "path_on_host": "dummy" }"#; let expected_config = BlockDeviceUpdateConfig { drive_id: "foo".to_string(), path_on_host: Some("dummy".to_string()), rate_limiter: None, }; assert_eq!( vmm_action_from_request(parse_patch_drive(&Body::new(body), Some("foo")).unwrap()), VmmAction::UpdateBlockDevice(expected_config) ); let body = r#"{ "drive_id": "foo", "path_on_host": "dummy" }"#; // Must fail since the drive id differs from id_from_path (foo vs bar). parse_patch_drive(&Body::new(body), Some("bar")).unwrap_err(); let body = r#"{ "drive_id": "foo", "rate_limiter": { "bandwidth": { "size": 5000, "refill_time": 100 }, "ops": { "size": 500, "refill_time": 100 } } }"#; // Validate that updating just the ratelimiter works. parse_patch_drive(&Body::new(body), Some("foo")).unwrap(); let body = r#"{ "drive_id": "foo", "path_on_host": "/there", "rate_limiter": { "bandwidth": { "size": 5000, "refill_time": 100 }, "ops": { "size": 500, "refill_time": 100 } } }"#; // Validate that updating both path and rate limiter succeds. parse_patch_drive(&Body::new(body), Some("foo")).unwrap(); let body = r#"{ "drive_id": "foo", "path_on_host": "/there", "rate_limiter": { "ops": { "size": 100 } } }"#; // Validate that parse_patch_drive fails for invalid rate limiter cfg. parse_patch_drive(&Body::new(body), Some("foo")).unwrap_err(); } #[test] fn test_parse_put_drive_request() { parse_put_drive(&Body::new("invalid_payload"), None).unwrap_err(); parse_put_drive(&Body::new("invalid_payload"), Some("id")).unwrap_err(); // PUT with invalid fields. let body = r#"{ "drive_id": "bar", "is_read_only": false }"#; parse_put_drive(&Body::new(body), Some("2")).unwrap_err(); // PUT with missing all optional fields. let body = r#"{ "drive_id": "1000", "path_on_host": "dummy", "is_root_device": true, "is_read_only": true }"#; parse_put_drive(&Body::new(body), Some("1000")).unwrap(); // PUT with invalid types on fields. Adding a drive_id as number instead of string. parse_put_drive(&Body::new(body), Some("foo")).unwrap_err(); // PUT with the complete configuration. let body = r#"{ "drive_id": "1000", "path_on_host": "dummy", "is_root_device": true, "partuuid": "string", "is_read_only": true, "cache_type": "Unsafe", "io_engine": "Sync", "rate_limiter": { "bandwidth": { "size": 0, "one_time_burst": 0, "refill_time": 0 }, "ops": { "size": 0, "one_time_burst": 0, "refill_time": 0 } } }"#; parse_put_drive(&Body::new(body), Some("1000")).unwrap(); } } ================================================ FILE: src/firecracker/src/api_server/request/entropy.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::rpc_interface::VmmAction; use vmm::vmm_config::entropy::EntropyDeviceConfig; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; pub(crate) fn parse_put_entropy(body: &Body) -> Result { let cfg = serde_json::from_slice::(body.raw())?; Ok(ParsedRequest::new_sync(VmmAction::SetEntropyDevice(cfg))) } #[cfg(test)] mod tests { use super::*; #[test] fn test_parse_put_entropy_request() { parse_put_entropy(&Body::new("invalid_payload")).unwrap_err(); // PUT with invalid fields. let body = r#"{ "some_id": 4 }"#; parse_put_entropy(&Body::new(body)).unwrap_err(); // PUT with valid fields. let body = r#"{}"#; parse_put_entropy(&Body::new(body)).unwrap(); } } ================================================ FILE: src/firecracker/src/api_server/request/hotplug/memory.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use micro_http::Body; use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::memory_hotplug::{MemoryHotplugConfig, MemoryHotplugSizeUpdate}; use crate::api_server::parsed_request::{ParsedRequest, RequestError}; pub(crate) fn parse_put_memory_hotplug(body: &Body) -> Result { METRICS.put_api_requests.hotplug_memory_count.inc(); let config = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.put_api_requests.hotplug_memory_fails.inc(); })?; Ok(ParsedRequest::new_sync(VmmAction::SetMemoryHotplugDevice( config, ))) } pub(crate) fn parse_get_memory_hotplug() -> Result { METRICS.get_api_requests.hotplug_memory_count.inc(); Ok(ParsedRequest::new_sync(VmmAction::GetMemoryHotplugStatus)) } pub(crate) fn parse_patch_memory_hotplug(body: &Body) -> Result { METRICS.patch_api_requests.hotplug_memory_count.inc(); let config = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.patch_api_requests.hotplug_memory_fails.inc(); })?; Ok(ParsedRequest::new_sync(VmmAction::UpdateMemoryHotplugSize( config, ))) } #[cfg(test)] mod tests { use vmm::devices::virtio::mem::{ VIRTIO_MEM_DEFAULT_BLOCK_SIZE_MIB, VIRTIO_MEM_DEFAULT_SLOT_SIZE_MIB, }; use vmm::vmm_config::memory_hotplug::MemoryHotplugSizeUpdate; use super::*; use crate::api_server::parsed_request::tests::vmm_action_from_request; #[test] fn test_parse_put_memory_hotplug_request() { parse_put_memory_hotplug(&Body::new("invalid_payload")).unwrap_err(); // PUT with invalid fields. let body = r#"{ "total_size_mib": "bar" }"#; parse_put_memory_hotplug(&Body::new(body)).unwrap_err(); // PUT with valid input fields with defaults. let body = r#"{ "total_size_mib": 2048 }"#; let expected_config = MemoryHotplugConfig { total_size_mib: 2048, block_size_mib: VIRTIO_MEM_DEFAULT_BLOCK_SIZE_MIB, slot_size_mib: VIRTIO_MEM_DEFAULT_SLOT_SIZE_MIB, }; assert_eq!( vmm_action_from_request(parse_put_memory_hotplug(&Body::new(body)).unwrap()), VmmAction::SetMemoryHotplugDevice(expected_config) ); // PUT with valid input fields. let body = r#"{ "total_size_mib": 2048, "block_size_mib": 64, "slot_size_mib": 64 }"#; let expected_config = MemoryHotplugConfig { total_size_mib: 2048, block_size_mib: 64, slot_size_mib: 64, }; assert_eq!( vmm_action_from_request(parse_put_memory_hotplug(&Body::new(body)).unwrap()), VmmAction::SetMemoryHotplugDevice(expected_config) ); } #[test] fn test_parse_parse_get_memory_hotplug_request() { assert_eq!( vmm_action_from_request(parse_get_memory_hotplug().unwrap()), VmmAction::GetMemoryHotplugStatus ); } #[test] fn test_parse_patch_memory_hotplug_request() { parse_patch_memory_hotplug(&Body::new("invalid_payload")).unwrap_err(); // PATCH with invalid fields. let body = r#"{ "requested_size_mib": "bar" }"#; parse_patch_memory_hotplug(&Body::new(body)).unwrap_err(); // PATCH with valid input fields. let body = r#"{ "requested_size_mib": 2048 }"#; let expected_config = MemoryHotplugSizeUpdate { requested_size_mib: 2048, }; assert_eq!( vmm_action_from_request(parse_patch_memory_hotplug(&Body::new(body)).unwrap()), VmmAction::UpdateMemoryHotplugSize(expected_config) ); } } ================================================ FILE: src/firecracker/src/api_server/request/hotplug/mod.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod memory; ================================================ FILE: src/firecracker/src/api_server/request/instance_info.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use super::super::parsed_request::{ParsedRequest, RequestError}; pub(crate) fn parse_get_instance_info() -> Result { METRICS.get_api_requests.instance_info_count.inc(); Ok(ParsedRequest::new_sync(VmmAction::GetVmInstanceInfo)) } #[cfg(test)] mod tests { use super::*; use crate::api_server::parsed_request::RequestAction; #[test] fn test_parse_get_instance_info_request() { match parse_get_instance_info().unwrap().into_parts() { (RequestAction::Sync(action), _) if *action == VmmAction::GetVmInstanceInfo => {} _ => panic!("Test failed."), } } } ================================================ FILE: src/firecracker/src/api_server/request/logger.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; pub(crate) fn parse_put_logger(body: &Body) -> Result { METRICS.put_api_requests.logger_count.inc(); let res = serde_json::from_slice::(body.raw()); let config = res.inspect_err(|_| { METRICS.put_api_requests.logger_fails.inc(); })?; Ok(ParsedRequest::new_sync(VmmAction::ConfigureLogger(config))) } #[cfg(test)] mod tests { use std::path::PathBuf; use vmm::logger::{LevelFilter, LoggerConfig}; use super::*; use crate::api_server::parsed_request::tests::vmm_action_from_request; #[test] fn test_parse_put_logger_request() { let body = r#"{ "log_path": "log", "level": "Warning", "show_level": false, "show_log_origin": false }"#; let expected_config = LoggerConfig { log_path: Some(PathBuf::from("log")), level: Some(LevelFilter::Warn), show_level: Some(false), show_log_origin: Some(false), module: None, }; assert_eq!( vmm_action_from_request(parse_put_logger(&Body::new(body)).unwrap()), VmmAction::ConfigureLogger(expected_config) ); let body = r#"{ "log_path": "log", "level": "DEBUG", "show_level": false, "show_log_origin": false }"#; let expected_config = LoggerConfig { log_path: Some(PathBuf::from("log")), level: Some(LevelFilter::Debug), show_level: Some(false), show_log_origin: Some(false), module: None, }; assert_eq!( vmm_action_from_request(parse_put_logger(&Body::new(body)).unwrap()), VmmAction::ConfigureLogger(expected_config) ); let invalid_body = r#"{ "invalid_field": "log", "level": "Warning", "show_level": false, "show_log_origin": false }"#; parse_put_logger(&Body::new(invalid_body)).unwrap_err(); } } ================================================ FILE: src/firecracker/src/api_server/request/machine_configuration.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::machine_config::{MachineConfig, MachineConfigUpdate}; use super::super::parsed_request::{ParsedRequest, RequestError, method_to_error}; use super::{Body, Method}; pub(crate) fn parse_get_machine_config() -> Result { METRICS.get_api_requests.machine_cfg_count.inc(); Ok(ParsedRequest::new_sync(VmmAction::GetVmMachineConfig)) } pub(crate) fn parse_put_machine_config(body: &Body) -> Result { METRICS.put_api_requests.machine_cfg_count.inc(); let config = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.put_api_requests.machine_cfg_fails.inc(); })?; // Check for the presence of deprecated `cpu_template` field. let mut deprecation_message = None; if config.cpu_template.is_some() { // `cpu_template` field in request is deprecated. METRICS.deprecated_api.deprecated_http_api_calls.inc(); deprecation_message = Some("PUT /machine-config: cpu_template field is deprecated."); } // Convert `MachineConfig` to `MachineConfigUpdate`. let config_update = MachineConfigUpdate::from(config); // Construct the `ParsedRequest` object. let mut parsed_req = ParsedRequest::new_sync(VmmAction::UpdateMachineConfiguration(config_update)); // If `cpu_template` was present, set the deprecation message in `parsing_info`. if let Some(msg) = deprecation_message { parsed_req.parsing_info().append_deprecation_message(msg); } Ok(parsed_req) } pub(crate) fn parse_patch_machine_config(body: &Body) -> Result { METRICS.patch_api_requests.machine_cfg_count.inc(); let config_update = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.patch_api_requests.machine_cfg_fails.inc(); })?; if config_update.is_empty() { return method_to_error(Method::Patch); } // Check for the presence of deprecated `cpu_template` field. let mut deprecation_message = None; if config_update.cpu_template.is_some() { // `cpu_template` field in request is deprecated. METRICS.deprecated_api.deprecated_http_api_calls.inc(); deprecation_message = Some("PATCH /machine-config: cpu_template field is deprecated."); } // Construct the `ParsedRequest` object. let mut parsed_req = ParsedRequest::new_sync(VmmAction::UpdateMachineConfiguration(config_update)); // If `cpu_template` was present, set the deprecation message in `parsing_info`. if let Some(msg) = deprecation_message { parsed_req.parsing_info().append_deprecation_message(msg); } Ok(parsed_req) } #[cfg(test)] mod tests { use vmm::cpu_config::templates::StaticCpuTemplate; use vmm::vmm_config::machine_config::HugePageConfig; use super::*; use crate::api_server::parsed_request::tests::{depr_action_from_req, vmm_action_from_request}; #[test] fn test_parse_get_machine_config_request() { parse_get_machine_config().unwrap(); assert!(METRICS.get_api_requests.machine_cfg_count.count() > 0); } #[test] fn test_parse_put_machine_config_request() { // 1. Test case for invalid payload. parse_put_machine_config(&Body::new("invalid_payload")).unwrap_err(); assert!(METRICS.put_api_requests.machine_cfg_fails.count() > 0); // 2. Test case for mandatory fields. let body = r#"{ "mem_size_mib": 1024 }"#; parse_put_machine_config(&Body::new(body)).unwrap_err(); let body = r#"{ "vcpu_count": 8 }"#; parse_put_machine_config(&Body::new(body)).unwrap_err(); let huge_pages_cases = [ ("None", HugePageConfig::None), ("2M", HugePageConfig::Hugetlbfs2M), ]; for (huge_page, expected) in huge_pages_cases { // 3. Test case for success scenarios for both architectures. let body = format!( r#"{{ "vcpu_count": 8, "mem_size_mib": 1024, "huge_pages": "{huge_page}" }}"# ); let expected_config = MachineConfigUpdate { vcpu_count: Some(8), mem_size_mib: Some(1024), smt: Some(false), cpu_template: None, track_dirty_pages: Some(false), huge_pages: Some(expected), #[cfg(feature = "gdb")] gdb_socket_path: None, }; assert_eq!( vmm_action_from_request(parse_put_machine_config(&Body::new(body)).unwrap()), VmmAction::UpdateMachineConfiguration(expected_config) ); } let body = r#"{ "vcpu_count": 8, "mem_size_mib": 1024, "cpu_template": "None" }"#; let expected_config = MachineConfigUpdate { vcpu_count: Some(8), mem_size_mib: Some(1024), smt: Some(false), cpu_template: Some(StaticCpuTemplate::None), track_dirty_pages: Some(false), huge_pages: Some(HugePageConfig::None), #[cfg(feature = "gdb")] gdb_socket_path: None, }; assert_eq!( vmm_action_from_request(parse_put_machine_config(&Body::new(body)).unwrap()), VmmAction::UpdateMachineConfiguration(expected_config) ); let body = r#"{ "vcpu_count": 8, "mem_size_mib": 1024, "smt": false, "track_dirty_pages": true }"#; let expected_config = MachineConfigUpdate { vcpu_count: Some(8), mem_size_mib: Some(1024), smt: Some(false), cpu_template: None, track_dirty_pages: Some(true), huge_pages: Some(HugePageConfig::None), #[cfg(feature = "gdb")] gdb_socket_path: None, }; assert_eq!( vmm_action_from_request(parse_put_machine_config(&Body::new(body)).unwrap()), VmmAction::UpdateMachineConfiguration(expected_config) ); // 4. Test that applying a CPU template is successful on x86_64 while on aarch64, it is not. let body = r#"{ "vcpu_count": 8, "mem_size_mib": 1024, "smt": false, "cpu_template": "T2", "track_dirty_pages": true }"#; #[cfg(target_arch = "x86_64")] { let expected_config = MachineConfigUpdate { vcpu_count: Some(8), mem_size_mib: Some(1024), smt: Some(false), cpu_template: Some(StaticCpuTemplate::T2), track_dirty_pages: Some(true), huge_pages: Some(HugePageConfig::None), #[cfg(feature = "gdb")] gdb_socket_path: None, }; assert_eq!( vmm_action_from_request(parse_put_machine_config(&Body::new(body)).unwrap()), VmmAction::UpdateMachineConfiguration(expected_config) ); } #[cfg(target_arch = "aarch64")] { parse_put_machine_config(&Body::new(body)).unwrap_err(); } // 5. Test that setting `smt: true` is successful let body = r#"{ "vcpu_count": 8, "mem_size_mib": 1024, "smt": true, "track_dirty_pages": true }"#; let expected_config = MachineConfigUpdate { vcpu_count: Some(8), mem_size_mib: Some(1024), smt: Some(true), cpu_template: None, track_dirty_pages: Some(true), huge_pages: Some(HugePageConfig::None), #[cfg(feature = "gdb")] gdb_socket_path: None, }; assert_eq!( vmm_action_from_request(parse_put_machine_config(&Body::new(body)).unwrap()), VmmAction::UpdateMachineConfiguration(expected_config) ); // 6. Test nonsense values for huge page size let body = r#"{ "vcpu_count": 8, "mem_size_mib": 1024, "huge_pages": "7M" }"#; parse_put_machine_config(&Body::new(body)).unwrap_err(); } #[test] fn test_parse_patch_machine_config_request() { // 1. Test cases for invalid payload. parse_patch_machine_config(&Body::new("invalid_payload")).unwrap_err(); // 2. Check currently supported fields that can be patched. let body = r#"{ "track_dirty_pages": true }"#; parse_patch_machine_config(&Body::new(body)).unwrap(); // On aarch64, CPU template is also not patch compatible. let body = r#"{ "cpu_template": "T2" }"#; #[cfg(target_arch = "aarch64")] parse_patch_machine_config(&Body::new(body)).unwrap_err(); #[cfg(target_arch = "x86_64")] parse_patch_machine_config(&Body::new(body)).unwrap(); let body = r#"{ "vcpu_count": 8, "mem_size_mib": 1024 }"#; parse_patch_machine_config(&Body::new(body)).unwrap(); // On aarch64, we allow `smt` to be configured to `false` but not `true`. let body = r#"{ "vcpu_count": 8, "mem_size_mib": 1024, "smt": false }"#; parse_patch_machine_config(&Body::new(body)).unwrap(); // 3. Check to see if an empty body returns an error. let body = r#"{}"#; parse_patch_machine_config(&Body::new(body)).unwrap_err(); } #[test] fn test_depr_cpu_template_in_put_req() { // Test that the deprecation message is shown when `cpu_template` is specified. let body = r#"{ "vcpu_count": 8, "mem_size_mib": 1024, "cpu_template": "None" }"#; depr_action_from_req( parse_put_machine_config(&Body::new(body)).unwrap(), Some("PUT /machine-config: cpu_template field is deprecated.".to_string()), ); // Test that the deprecation message is not shown when `cpu_template` is not specified. let body = r#"{ "vcpu_count": 8, "mem_size_mib": 1024 }"#; let (_, mut parsing_info) = parse_put_machine_config(&Body::new(body)) .unwrap() .into_parts(); assert!(parsing_info.take_deprecation_message().is_none()); } #[test] fn test_depr_cpu_template_in_patch_req() { // Test that the deprecation message is shown when `cpu_template` is specified. let body = r#"{ "vcpu_count": 8, "cpu_template": "None" }"#; depr_action_from_req( parse_patch_machine_config(&Body::new(body)).unwrap(), Some("PATCH /machine-config: cpu_template field is deprecated.".to_string()), ); // Test that the deprecation message is not shown when `cpu_template` is not specified. let body = r#"{ "vcpu_count": 8 }"#; let (_, mut parsing_info) = parse_patch_machine_config(&Body::new(body)) .unwrap() .into_parts(); assert!(parsing_info.take_deprecation_message().is_none()); } } ================================================ FILE: src/firecracker/src/api_server/request/metrics.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::metrics::MetricsConfig; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; pub(crate) fn parse_put_metrics(body: &Body) -> Result { METRICS.put_api_requests.metrics_count.inc(); Ok(ParsedRequest::new_sync(VmmAction::ConfigureMetrics( serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.put_api_requests.metrics_fails.inc(); })?, ))) } #[cfg(test)] mod tests { use std::path::PathBuf; use super::*; use crate::api_server::parsed_request::tests::vmm_action_from_request; #[test] fn test_parse_put_metrics_request() { let body = r#"{ "metrics_path": "metrics" }"#; let expected_config = MetricsConfig { metrics_path: PathBuf::from("metrics"), }; assert_eq!( vmm_action_from_request(parse_put_metrics(&Body::new(body)).unwrap()), VmmAction::ConfigureMetrics(expected_config) ); let invalid_body = r#"{ "invalid_field": "metrics" }"#; parse_put_metrics(&Body::new(invalid_body)).unwrap_err(); } } ================================================ FILE: src/firecracker/src/api_server/request/mmds.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use micro_http::StatusCode; use vmm::logger::{IncMetric, METRICS}; use vmm::mmds::data_store::MmdsVersion; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::mmds::MmdsConfig; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; pub(crate) fn parse_get_mmds() -> Result { METRICS.get_api_requests.mmds_count.inc(); Ok(ParsedRequest::new_sync(VmmAction::GetMMDS)) } fn parse_put_mmds_config(body: &Body) -> Result { let config: MmdsConfig = serde_json::from_slice(body.raw()).inspect_err(|_| { METRICS.put_api_requests.mmds_fails.inc(); })?; // Construct the `ParsedRequest` object. let version = config.version; let mut parsed_request = ParsedRequest::new_sync(VmmAction::SetMmdsConfiguration(config)); // MmdsV1 is deprecated. if version == MmdsVersion::V1 { METRICS.deprecated_api.deprecated_http_api_calls.inc(); parsed_request .parsing_info() .append_deprecation_message("PUT /mmds/config: V1 is deprecated. Use V2 instead."); } Ok(parsed_request) } pub(crate) fn parse_put_mmds( body: &Body, path_second_token: Option<&str>, ) -> Result { METRICS.put_api_requests.mmds_count.inc(); match path_second_token { None => Ok(ParsedRequest::new_sync(VmmAction::PutMMDS( serde_json::from_slice(body.raw()).inspect_err(|_| { METRICS.put_api_requests.mmds_fails.inc(); })?, ))), Some("config") => parse_put_mmds_config(body), Some(unrecognized) => { METRICS.put_api_requests.mmds_fails.inc(); Err(RequestError::Generic( StatusCode::BadRequest, format!("Unrecognized PUT request path `{}`.", unrecognized), )) } } } pub(crate) fn parse_patch_mmds(body: &Body) -> Result { METRICS.patch_api_requests.mmds_count.inc(); Ok(ParsedRequest::new_sync(VmmAction::PatchMMDS( serde_json::from_slice(body.raw()).inspect_err(|_| { METRICS.patch_api_requests.mmds_fails.inc(); })?, ))) } #[cfg(test)] mod tests { use super::*; use crate::api_server::parsed_request::tests::depr_action_from_req; #[test] fn test_parse_get_mmds_request() { parse_get_mmds().unwrap(); assert!(METRICS.get_api_requests.mmds_count.count() > 0); } #[test] fn test_parse_put_mmds_request() { let body = r#"{ "foo": "bar" }"#; parse_put_mmds(&Body::new(body), None).unwrap(); let invalid_body = "invalid_body"; parse_put_mmds(&Body::new(invalid_body), None).unwrap_err(); assert!(METRICS.put_api_requests.mmds_fails.count() > 0); // Test `config` path. let body = r#"{ "version": "V2", "ipv4_address": "169.254.170.2", "network_interfaces": [] }"#; let config_path = "config"; parse_put_mmds(&Body::new(body), Some(config_path)).unwrap(); let body = r#"{ "network_interfaces": [] }"#; parse_put_mmds(&Body::new(body), Some(config_path)).unwrap(); let body = r#"{ "version": "foo", "ipv4_address": "169.254.170.2", "network_interfaces": [] }"#; parse_put_mmds(&Body::new(body), Some(config_path)).unwrap_err(); let body = r#"{ "version": "V2" }"#; parse_put_mmds(&Body::new(body), Some(config_path)).unwrap_err(); let body = r#"{ "ipv4_address": "", "network_interfaces": [] }"#; parse_put_mmds(&Body::new(body), Some(config_path)).unwrap_err(); let invalid_config_body = r#"{ "invalid_config": "invalid_value" }"#; parse_put_mmds(&Body::new(invalid_config_body), Some(config_path)).unwrap_err(); parse_put_mmds(&Body::new(body), Some("invalid_path")).unwrap_err(); parse_put_mmds(&Body::new(invalid_body), Some(config_path)).unwrap_err(); } #[test] fn test_deprecated_config() { let config_path = "config"; let body = r#"{ "ipv4_address": "169.254.170.2", "network_interfaces": [] }"#; depr_action_from_req( parse_put_mmds(&Body::new(body), Some(config_path)).unwrap(), Some("PUT /mmds/config: V1 is deprecated. Use V2 instead.".to_string()), ); let body = r#"{ "version": "V1", "ipv4_address": "169.254.170.2", "network_interfaces": [] }"#; depr_action_from_req( parse_put_mmds(&Body::new(body), Some(config_path)).unwrap(), Some("PUT /mmds/config: V1 is deprecated. Use V2 instead.".to_string()), ); let body = r#"{ "version": "V2", "ipv4_address": "169.254.170.2", "network_interfaces": [] }"#; let (_, mut parsing_info) = parse_put_mmds(&Body::new(body), Some(config_path)) .unwrap() .into_parts(); assert!(parsing_info.take_deprecation_message().is_none()); } #[test] fn test_parse_patch_mmds_request() { let body = r#"{ "foo": "bar" }"#; parse_patch_mmds(&Body::new(body)).unwrap(); assert!(METRICS.patch_api_requests.mmds_count.count() > 0); parse_patch_mmds(&Body::new("invalid_body")).unwrap_err(); assert!(METRICS.patch_api_requests.mmds_fails.count() > 0); } } ================================================ FILE: src/firecracker/src/api_server/request/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod actions; pub mod balloon; pub mod boot_source; pub mod cpu_configuration; pub mod drive; pub mod entropy; pub mod hotplug; pub mod instance_info; pub mod logger; pub mod machine_configuration; pub mod metrics; pub mod mmds; pub mod net; pub mod pmem; pub mod serial; pub mod snapshot; pub mod version; pub mod vsock; pub use micro_http::{Body, Method, StatusCode}; ================================================ FILE: src/firecracker/src/api_server/request/net.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::net::{NetworkInterfaceConfig, NetworkInterfaceUpdateConfig}; use super::super::parsed_request::{ParsedRequest, RequestError, checked_id}; use super::{Body, StatusCode}; pub(crate) fn parse_put_net( body: &Body, id_from_path: Option<&str>, ) -> Result { METRICS.put_api_requests.network_count.inc(); let id = if let Some(id) = id_from_path { checked_id(id)? } else { METRICS.put_api_requests.network_fails.inc(); return Err(RequestError::EmptyID); }; let netif = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.put_api_requests.network_fails.inc(); })?; if id != netif.iface_id.as_str() { METRICS.put_api_requests.network_fails.inc(); return Err(RequestError::Generic( StatusCode::BadRequest, format!( "The id from the path [{}] does not match the id from the body [{}]!", id, netif.iface_id.as_str() ), )); } Ok(ParsedRequest::new_sync(VmmAction::InsertNetworkDevice( netif, ))) } pub(crate) fn parse_patch_net( body: &Body, id_from_path: Option<&str>, ) -> Result { METRICS.patch_api_requests.network_count.inc(); let id = if let Some(id) = id_from_path { checked_id(id)? } else { METRICS.patch_api_requests.network_count.inc(); return Err(RequestError::EmptyID); }; let netif = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.patch_api_requests.network_fails.inc(); })?; if id != netif.iface_id { METRICS.patch_api_requests.network_count.inc(); return Err(RequestError::Generic( StatusCode::BadRequest, format!( "The id from the path [{}] does not match the id from the body [{}]!", id, netif.iface_id.as_str() ), )); } Ok(ParsedRequest::new_sync(VmmAction::UpdateNetworkInterface( netif, ))) } #[cfg(test)] mod tests { use super::*; use crate::api_server::parsed_request::tests::vmm_action_from_request; #[test] fn test_parse_put_net_request() { let body = r#"{ "iface_id": "foo", "host_dev_name": "bar", "guest_mac": "12:34:56:78:9A:BC" }"#; // 1. Exercise infamous "The id from the path does not match id from the body!". parse_put_net(&Body::new(body), Some("bar")).unwrap_err(); // 2. The `id_from_path` cannot be None. parse_put_net(&Body::new(body), None).unwrap_err(); // 3. Success case. let expected_config = serde_json::from_str::(body).unwrap(); assert_eq!( vmm_action_from_request(parse_put_net(&Body::new(body), Some("foo")).unwrap()), VmmAction::InsertNetworkDevice(expected_config) ); // 4. Serde error for invalid field (bytes instead of bandwidth). let body = r#"{ "iface_id": "foo", "rx_rate_limiter": { "bytes": { "size": 62500, "refill_time": 1000 } }, "tx_rate_limiter": { "bytes": { "size": 62500, "refill_time": 1000 } } }"#; parse_put_net(&Body::new(body), Some("foo")).unwrap_err(); } #[test] fn test_parse_patch_net_request() { let body = r#"{ "iface_id": "foo", "rx_rate_limiter": {}, "tx_rate_limiter": {} }"#; // 1. Exercise infamous "The id from the path does not match id from the body!". parse_patch_net(&Body::new(body), Some("bar")).unwrap_err(); // 2. The `id_from_path` cannot be None. parse_patch_net(&Body::new(body), None).unwrap_err(); // 3. Success case. let expected_config = serde_json::from_str::(body).unwrap(); assert_eq!( vmm_action_from_request(parse_patch_net(&Body::new(body), Some("foo")).unwrap()), VmmAction::UpdateNetworkInterface(expected_config) ); // 4. Serde error for invalid field (bytes instead of bandwidth). let body = r#"{ "iface_id": "foo", "rx_rate_limiter": { "bytes": { "size": 62500, "refill_time": 1000 } }, "tx_rate_limiter": { "bytes": { "size": 62500, "refill_time": 1000 } } }"#; parse_patch_net(&Body::new(body), Some("foo")).unwrap_err(); } } ================================================ FILE: src/firecracker/src/api_server/request/pmem.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::pmem::PmemConfig; use super::super::parsed_request::{ParsedRequest, RequestError, checked_id}; use super::{Body, StatusCode}; pub(crate) fn parse_put_pmem( body: &Body, id_from_path: Option<&str>, ) -> Result { METRICS.put_api_requests.pmem_count.inc(); let id = if let Some(id) = id_from_path { checked_id(id)? } else { METRICS.put_api_requests.pmem_fails.inc(); return Err(RequestError::EmptyID); }; let device_cfg = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.put_api_requests.pmem_fails.inc(); })?; if id != device_cfg.id { METRICS.put_api_requests.pmem_fails.inc(); Err(RequestError::Generic( StatusCode::BadRequest, "The id from the path does not match the id from the body!".to_string(), )) } else { Ok(ParsedRequest::new_sync(VmmAction::InsertPmemDevice( device_cfg, ))) } } #[cfg(test)] mod tests { use super::*; use crate::api_server::parsed_request::tests::vmm_action_from_request; #[test] fn test_parse_put_pmem_request() { parse_put_pmem(&Body::new("invalid_payload"), None).unwrap_err(); parse_put_pmem(&Body::new("invalid_payload"), Some("id")).unwrap_err(); let body = r#"{ "id": "bar", }"#; parse_put_pmem(&Body::new(body), Some("1")).unwrap_err(); let body = r#"{ "foo": "1", }"#; parse_put_pmem(&Body::new(body), Some("1")).unwrap_err(); let body = r#"{ "id": "1000", "path_on_host": "dummy", "root_device": true, "read_only": true }"#; let r = vmm_action_from_request(parse_put_pmem(&Body::new(body), Some("1000")).unwrap()); let expected_config = PmemConfig { id: "1000".to_string(), path_on_host: "dummy".to_string(), root_device: true, read_only: true, }; assert_eq!(r, VmmAction::InsertPmemDevice(expected_config)); } } ================================================ FILE: src/firecracker/src/api_server/request/serial.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use micro_http::Body; use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::serial::SerialConfig; use crate::api_server::parsed_request::{ParsedRequest, RequestError}; pub(crate) fn parse_put_serial(body: &Body) -> Result { METRICS.put_api_requests.serial_count.inc(); let res = serde_json::from_slice::(body.raw()); let config = res.inspect_err(|_| { METRICS.put_api_requests.serial_fails.inc(); })?; Ok(ParsedRequest::new_sync(VmmAction::ConfigureSerial(config))) } #[cfg(test)] mod tests { use std::path::PathBuf; use super::*; use crate::api_server::parsed_request::tests::vmm_action_from_request; #[test] fn test_parse_put_serial_request() { let body = r#"{"serial_out_path": "serial"}"#; let expected_config = SerialConfig { serial_out_path: Some(PathBuf::from("serial")), }; assert_eq!( vmm_action_from_request(parse_put_serial(&Body::new(body)).unwrap()), VmmAction::ConfigureSerial(expected_config) ); } } ================================================ FILE: src/firecracker/src/api_server/request/snapshot.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use serde::de::Error as DeserializeError; use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::snapshot::{ CreateSnapshotParams, LoadSnapshotConfig, LoadSnapshotParams, MemBackendConfig, MemBackendType, Vm, VmState, }; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::super::request::{Body, Method, StatusCode}; /// Deprecation message for the `mem_file_path` field. const LOAD_DEPRECATION_MESSAGE: &str = "PUT /snapshot/load: mem_file_path and enable_diff_snapshots fields are deprecated."; /// None of the `mem_backend` or `mem_file_path` fields has been specified. pub const MISSING_FIELD: &str = "missing field: either `mem_backend` or `mem_file_path` is required"; /// Both the `mem_backend` and `mem_file_path` fields have been specified. /// Only specifying one of them is allowed. pub const TOO_MANY_FIELDS: &str = "too many fields: either `mem_backend` or `mem_file_path` exclusively is required"; pub(crate) fn parse_put_snapshot( body: &Body, request_type_from_path: Option<&str>, ) -> Result { match request_type_from_path { Some(request_type) => match request_type { "create" => parse_put_snapshot_create(body), "load" => parse_put_snapshot_load(body), _ => Err(RequestError::InvalidPathMethod( format!("/snapshot/{}", request_type), Method::Put, )), }, None => Err(RequestError::Generic( StatusCode::BadRequest, "Missing snapshot operation type.".to_string(), )), } } pub(crate) fn parse_patch_vm_state(body: &Body) -> Result { let vm = serde_json::from_slice::(body.raw())?; match vm.state { VmState::Paused => Ok(ParsedRequest::new_sync(VmmAction::Pause)), VmState::Resumed => Ok(ParsedRequest::new_sync(VmmAction::Resume)), } } fn parse_put_snapshot_create(body: &Body) -> Result { let snapshot_config = serde_json::from_slice::(body.raw())?; Ok(ParsedRequest::new_sync(VmmAction::CreateSnapshot( snapshot_config, ))) } fn parse_put_snapshot_load(body: &Body) -> Result { let snapshot_config = serde_json::from_slice::(body.raw())?; match (&snapshot_config.mem_backend, &snapshot_config.mem_file_path) { // Ensure `mem_file_path` and `mem_backend` fields are not present at the same time. (Some(_), Some(_)) => { return Err(RequestError::SerdeJson(serde_json::Error::custom( TOO_MANY_FIELDS, ))); } // Ensure that one of `mem_file_path` or `mem_backend` fields is always specified. (None, None) => { return Err(RequestError::SerdeJson(serde_json::Error::custom( MISSING_FIELD, ))); } _ => {} } // Check for the presence of deprecated `mem_file_path` field and create // deprecation message if found. let mut deprecation_message = None; #[allow(deprecated)] if snapshot_config.mem_file_path.is_some() || snapshot_config.enable_diff_snapshots { // `mem_file_path` field in request is deprecated. METRICS.deprecated_api.deprecated_http_api_calls.inc(); deprecation_message = Some(LOAD_DEPRECATION_MESSAGE); } // If `mem_file_path` is specified instead of `mem_backend`, we construct the // `MemBackendConfig` object from the path specified, with `File` as backend type. let mem_backend = match snapshot_config.mem_backend { Some(backend_cfg) => backend_cfg, None => { MemBackendConfig { // This is safe to unwrap() because we ensure above that one of the two: // either `mem_file_path` or `mem_backend` field is always specified. backend_path: snapshot_config.mem_file_path.unwrap(), backend_type: MemBackendType::File, } } }; let snapshot_params = LoadSnapshotParams { snapshot_path: snapshot_config.snapshot_path, mem_backend, #[allow(deprecated)] track_dirty_pages: snapshot_config.enable_diff_snapshots || snapshot_config.track_dirty_pages, resume_vm: snapshot_config.resume_vm, network_overrides: snapshot_config.network_overrides, vsock_override: snapshot_config.vsock_override, }; // Construct the `ParsedRequest` object. let mut parsed_req = ParsedRequest::new_sync(VmmAction::LoadSnapshot(snapshot_params)); // If `mem_file_path` was present, set the deprecation message in `parsing_info`. if let Some(msg) = deprecation_message { parsed_req.parsing_info().append_deprecation_message(msg); } Ok(parsed_req) } #[cfg(test)] mod tests { use vmm::vmm_config::snapshot::{MemBackendConfig, MemBackendType, NetworkOverride}; use super::*; use crate::api_server::parsed_request::tests::{depr_action_from_req, vmm_action_from_request}; #[test] fn test_parse_put_snapshot() { use std::path::PathBuf; use vmm::vmm_config::snapshot::SnapshotType; let body = r#"{ "snapshot_type": "Diff", "snapshot_path": "foo", "mem_file_path": "bar" }"#; let expected_config = CreateSnapshotParams { snapshot_type: SnapshotType::Diff, snapshot_path: PathBuf::from("foo"), mem_file_path: PathBuf::from("bar"), }; assert_eq!( vmm_action_from_request(parse_put_snapshot(&Body::new(body), Some("create")).unwrap()), VmmAction::CreateSnapshot(expected_config) ); let body = r#"{ "snapshot_path": "foo", "mem_file_path": "bar" }"#; let expected_config = CreateSnapshotParams { snapshot_type: SnapshotType::Full, snapshot_path: PathBuf::from("foo"), mem_file_path: PathBuf::from("bar"), }; assert_eq!( vmm_action_from_request(parse_put_snapshot(&Body::new(body), Some("create")).unwrap()), VmmAction::CreateSnapshot(expected_config) ); let invalid_body = r#"{ "invalid_field": "foo", "mem_file_path": "bar" }"#; parse_put_snapshot(&Body::new(invalid_body), Some("create")).unwrap_err(); let body = r#"{ "snapshot_path": "foo", "mem_backend": { "backend_path": "bar", "backend_type": "File" } }"#; let expected_config = LoadSnapshotParams { snapshot_path: PathBuf::from("foo"), mem_backend: MemBackendConfig { backend_path: PathBuf::from("bar"), backend_type: MemBackendType::File, }, track_dirty_pages: false, resume_vm: false, network_overrides: vec![], vsock_override: None, }; let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert!( parsed_request .parsing_info() .take_deprecation_message() .is_none() ); assert_eq!( vmm_action_from_request(parsed_request), VmmAction::LoadSnapshot(expected_config) ); let body = r#"{ "snapshot_path": "foo", "mem_backend": { "backend_path": "bar", "backend_type": "File" }, "track_dirty_pages": true }"#; let expected_config = LoadSnapshotParams { snapshot_path: PathBuf::from("foo"), mem_backend: MemBackendConfig { backend_path: PathBuf::from("bar"), backend_type: MemBackendType::File, }, track_dirty_pages: true, resume_vm: false, network_overrides: vec![], vsock_override: None, }; let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert!( parsed_request .parsing_info() .take_deprecation_message() .is_none() ); assert_eq!( vmm_action_from_request(parsed_request), VmmAction::LoadSnapshot(expected_config) ); let body = r#"{ "snapshot_path": "foo", "mem_backend": { "backend_path": "bar", "backend_type": "Uffd" }, "resume_vm": true }"#; let expected_config = LoadSnapshotParams { snapshot_path: PathBuf::from("foo"), mem_backend: MemBackendConfig { backend_path: PathBuf::from("bar"), backend_type: MemBackendType::Uffd, }, track_dirty_pages: false, resume_vm: true, network_overrides: vec![], vsock_override: None, }; let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert!( parsed_request .parsing_info() .take_deprecation_message() .is_none() ); assert_eq!( vmm_action_from_request(parsed_request), VmmAction::LoadSnapshot(expected_config) ); let body = r#"{ "snapshot_path": "foo", "mem_backend": { "backend_path": "bar", "backend_type": "Uffd" }, "resume_vm": true, "network_overrides": [ { "iface_id": "eth0", "host_dev_name": "vmtap2" } ] }"#; let expected_config = LoadSnapshotParams { snapshot_path: PathBuf::from("foo"), mem_backend: MemBackendConfig { backend_path: PathBuf::from("bar"), backend_type: MemBackendType::Uffd, }, track_dirty_pages: false, resume_vm: true, network_overrides: vec![NetworkOverride { iface_id: String::from("eth0"), host_dev_name: String::from("vmtap2"), }], vsock_override: None, }; let mut parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert!( parsed_request .parsing_info() .take_deprecation_message() .is_none() ); assert_eq!( vmm_action_from_request(parsed_request), VmmAction::LoadSnapshot(expected_config) ); let body = r#"{ "snapshot_path": "foo", "mem_file_path": "bar", "resume_vm": true }"#; let expected_config = LoadSnapshotParams { snapshot_path: PathBuf::from("foo"), mem_backend: MemBackendConfig { backend_path: PathBuf::from("bar"), backend_type: MemBackendType::File, }, track_dirty_pages: false, resume_vm: true, network_overrides: vec![], vsock_override: None, }; let parsed_request = parse_put_snapshot(&Body::new(body), Some("load")).unwrap(); assert_eq!( depr_action_from_req(parsed_request, Some(LOAD_DEPRECATION_MESSAGE.to_string())), VmmAction::LoadSnapshot(expected_config) ); let body = r#"{ "snapshot_path": "foo", "mem_backend": { "backend_path": "bar" } }"#; assert_eq!( parse_put_snapshot(&Body::new(body), Some("load")) .err() .unwrap() .to_string(), "An error occurred when deserializing the json body of a request: missing field \ `backend_type` at line 5 column 13." ); let body = r#"{ "snapshot_path": "foo", "mem_backend": { "backend_type": "File", } }"#; assert_eq!( parse_put_snapshot(&Body::new(body), Some("load")) .err() .unwrap() .to_string(), "An error occurred when deserializing the json body of a request: trailing comma at \ line 5 column 13." ); let body = r#"{ "snapshot_path": "foo", "mem_file_path": "bar", "mem_backend": { "backend_path": "bar", "backend_type": "Uffd" } }"#; assert_eq!( parse_put_snapshot(&Body::new(body), Some("load")) .err() .unwrap() .to_string(), RequestError::SerdeJson(serde_json::Error::custom(TOO_MANY_FIELDS.to_string())) .to_string() ); let body = r#"{ "snapshot_path": "foo" }"#; assert_eq!( parse_put_snapshot(&Body::new(body), Some("load")) .err() .unwrap() .to_string(), RequestError::SerdeJson(serde_json::Error::custom(MISSING_FIELD.to_string())) .to_string() ); let body = r#"{ "mem_backend": { "backend_path": "bar", "backend_type": "Uffd" } }"#; assert_eq!( parse_put_snapshot(&Body::new(body), Some("load")) .err() .unwrap() .to_string(), "An error occurred when deserializing the json body of a request: missing field \ `snapshot_path` at line 6 column 9." ); parse_put_snapshot(&Body::new(body), Some("invalid")).unwrap_err(); parse_put_snapshot(&Body::new(body), None).unwrap_err(); } #[test] fn test_parse_patch_vm_state() { let body = r#"{ "state": "Paused" }"#; assert!( parse_patch_vm_state(&Body::new(body)) .unwrap() .eq(&ParsedRequest::new_sync(VmmAction::Pause)) ); let body = r#"{ "state": "Resumed" }"#; assert!( parse_patch_vm_state(&Body::new(body)) .unwrap() .eq(&ParsedRequest::new_sync(VmmAction::Resume)) ); let invalid_body = r#"{ "invalid": "Paused" }"#; parse_patch_vm_state(&Body::new(invalid_body)).unwrap_err(); } } ================================================ FILE: src/firecracker/src/api_server/request/version.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use super::super::parsed_request::{ParsedRequest, RequestError}; pub(crate) fn parse_get_version() -> Result { METRICS.get_api_requests.vmm_version_count.inc(); Ok(ParsedRequest::new_sync(VmmAction::GetVmmVersion)) } #[cfg(test)] mod tests { use super::super::super::parsed_request::RequestAction; use super::*; #[test] fn test_parse_get_version_request() { match parse_get_version().unwrap().into_parts() { (RequestAction::Sync(action), _) if *action == VmmAction::GetVmmVersion => {} _ => panic!("Test failed."), } } } ================================================ FILE: src/firecracker/src/api_server/request/vsock.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm::logger::{IncMetric, METRICS}; use vmm::rpc_interface::VmmAction; use vmm::vmm_config::vsock::VsockDeviceConfig; use super::super::parsed_request::{ParsedRequest, RequestError}; use super::Body; pub(crate) fn parse_put_vsock(body: &Body) -> Result { METRICS.put_api_requests.vsock_count.inc(); let vsock_cfg = serde_json::from_slice::(body.raw()).inspect_err(|_| { METRICS.put_api_requests.vsock_fails.inc(); })?; // Check for the presence of deprecated `vsock_id` field. let mut deprecation_message = None; if vsock_cfg.vsock_id.is_some() { // vsock_id field in request is deprecated. METRICS.deprecated_api.deprecated_http_api_calls.inc(); deprecation_message = Some("PUT /vsock: vsock_id field is deprecated."); } // Construct the `ParsedRequest` object. let mut parsed_req = ParsedRequest::new_sync(VmmAction::SetVsockDevice(vsock_cfg)); // If `vsock_id` was present, set the deprecation message in `parsing_info`. if let Some(msg) = deprecation_message { parsed_req.parsing_info().append_deprecation_message(msg); } Ok(parsed_req) } #[cfg(test)] mod tests { use super::*; use crate::api_server::parsed_request::tests::depr_action_from_req; #[test] fn test_parse_put_vsock_request() { let body = r#"{ "guest_cid": 42, "uds_path": "vsock.sock" }"#; parse_put_vsock(&Body::new(body)).unwrap(); let body = r#"{ "guest_cid": 42, "invalid_field": false }"#; parse_put_vsock(&Body::new(body)).unwrap_err(); } #[test] fn test_depr_vsock_id() { let body = r#"{ "vsock_id": "foo", "guest_cid": 42, "uds_path": "vsock.sock" }"#; depr_action_from_req( parse_put_vsock(&Body::new(body)).unwrap(), Some("PUT /vsock: vsock_id field is deprecated.".to_string()), ); let body = r#"{ "guest_cid": 42, "uds_path": "vsock.sock" }"#; let (_, mut parsing_info) = parse_put_vsock(&Body::new(body)).unwrap().into_parts(); assert!(parsing_info.take_deprecation_message().is_none()); } } ================================================ FILE: src/firecracker/src/api_server_adapter.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::os::unix::io::AsRawFd; use std::path::PathBuf; use std::sync::mpsc::{Receiver, Sender, TryRecvError, channel}; use std::sync::{Arc, Mutex}; use std::thread; use event_manager::{EventOps, Events, MutEventSubscriber, SubscriberOps}; use vmm::logger::{ProcessTimeReporter, error, info, warn}; use vmm::rpc_interface::{ ApiRequest, ApiResponse, BuildMicrovmFromRequestsError, PrebootApiController, RuntimeApiController, VmmAction, }; use vmm::seccomp::BpfThreadMap; use vmm::vmm_config::instance_info::InstanceInfo; use vmm::{EventManager, FcExitCode, Vmm}; use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; use super::api_server::{ApiServer, HttpServer, ServerError}; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum ApiServerError { /// Failed to build MicroVM: {0}. BuildMicroVmError(BuildMicrovmFromRequestsError), /// MicroVM stopped with an error: {0:?} MicroVMStoppedWithError(FcExitCode), /// Failed to open the API socket at: {0}. Check that it is not already used. FailedToBindSocket(String), /// Failed to bind and run the HTTP server: {0} FailedToBindAndRunHttpServer(ServerError), /// Failed to build MicroVM from Json: {0} BuildFromJson(crate::BuildFromJsonError), } #[derive(Debug)] struct ApiServerAdapter { api_event_fd: EventFd, from_api: Receiver, to_api: Sender, controller: RuntimeApiController, } impl ApiServerAdapter { /// Runs the vmm to completion, while any arising control events are deferred /// to a `RuntimeApiController`. fn run_microvm( api_event_fd: EventFd, from_api: Receiver, to_api: Sender, vmm: Arc>, event_manager: &mut EventManager, ) -> Result<(), ApiServerError> { let api_adapter = Arc::new(Mutex::new(Self { api_event_fd, from_api, to_api, controller: RuntimeApiController::new(vmm.clone()), })); event_manager.add_subscriber(api_adapter); loop { event_manager .run() .expect("EventManager events driver fatal error"); match vmm.lock().unwrap().shutdown_exit_code() { Some(FcExitCode::Ok) => break, Some(exit_code) => return Err(ApiServerError::MicroVMStoppedWithError(exit_code)), None => continue, } } Ok(()) } fn handle_request(&mut self, req_action: VmmAction) { let response = self.controller.handle_request(req_action); // Send back the result. self.to_api .send(Box::new(response)) .map_err(|_| ()) .expect("one-shot channel closed"); } } impl MutEventSubscriber for ApiServerAdapter { /// Handle a read event (EPOLLIN). fn process(&mut self, event: Events, _: &mut EventOps) { let source = event.fd(); let event_set = event.event_set(); if source == self.api_event_fd.as_raw_fd() && event_set == EventSet::IN { let _ = self.api_event_fd.read(); match self.from_api.try_recv() { Ok(api_request) => { let request_is_pause = *api_request == VmmAction::Pause; self.handle_request(*api_request); // If the latest req is a pause request, temporarily switch to a mode where we // do blocking `recv`s on the `from_api` receiver in a loop, until we get // unpaused. The device emulation is implicitly paused since we do not // relinquish control to the event manager because we're not returning from // `process`. if request_is_pause { // This loop only attempts to process API requests, so things like the // metric flush timerfd handling are frozen as well. loop { let req = self.from_api.recv().expect("Error receiving API request."); let req_is_resume = *req == VmmAction::Resume; self.handle_request(*req); if req_is_resume { break; } } } } Err(TryRecvError::Empty) => { warn!("Got a spurious notification from api thread"); } Err(TryRecvError::Disconnected) => { panic!("The channel's sending half was disconnected. Cannot receive data."); } }; } else { error!("Spurious EventManager event for handler: ApiServerAdapter"); } } fn init(&mut self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::new(&self.api_event_fd, EventSet::IN)) { error!("Failed to register activate event: {}", err); } } } #[allow(clippy::too_many_arguments)] pub(crate) fn run_with_api( seccomp_filters: &mut BpfThreadMap, config_json: Option, bind_path: PathBuf, instance_info: InstanceInfo, process_time_reporter: ProcessTimeReporter, boot_timer_enabled: bool, pci_enabled: bool, api_payload_limit: usize, mmds_size_limit: usize, metadata_json: Option<&str>, ) -> Result<(), ApiServerError> { // FD to notify of API events. This is a blocking eventfd by design. // It is used in the config/pre-boot loop which is a simple blocking loop // which only consumes API events. let api_event_fd = EventFd::new(libc::EFD_SEMAPHORE).expect("Cannot create API Eventfd."); // FD used to signal API thread to stop/shutdown. let api_kill_switch = EventFd::new(libc::EFD_NONBLOCK).expect("Cannot create API kill switch."); // Channels for both directions between Vmm and Api threads. let (to_vmm, from_api) = channel(); let (to_api, from_vmm) = channel(); let to_vmm_event_fd = api_event_fd .try_clone() .expect("Failed to clone API event FD"); let api_seccomp_filter = seccomp_filters .remove("api") .expect("Missing seccomp filter for API thread."); let mut server = match HttpServer::new(&bind_path) { Ok(s) => s, Err(ServerError::IOError(inner)) if inner.kind() == std::io::ErrorKind::AddrInUse => { let sock_path = bind_path.display().to_string(); return Err(ApiServerError::FailedToBindSocket(sock_path)); } Err(err) => { return Err(ApiServerError::FailedToBindAndRunHttpServer(err)); } }; info!("Listening on API socket ({bind_path:?})."); let api_kill_switch_clone = api_kill_switch .try_clone() .expect("Failed to clone API kill switch"); server .add_kill_switch(api_kill_switch_clone) .expect("Cannot add HTTP server kill switch"); // Start the separate API thread. let api_thread = thread::Builder::new() .name("fc_api".to_owned()) .spawn(move || { ApiServer::new(to_vmm, from_vmm, to_vmm_event_fd).run( server, process_time_reporter, &api_seccomp_filter, api_payload_limit, ); }) .expect("API thread spawn failed."); let mut event_manager = EventManager::new().expect("Unable to create EventManager"); // Create the firecracker metrics object responsible for periodically printing metrics. let firecracker_metrics = Arc::new(Mutex::new(super::metrics::PeriodicMetrics::new())); event_manager.add_subscriber(firecracker_metrics.clone()); // Configure, build and start the microVM. let build_result = match config_json { Some(json) => super::build_microvm_from_json( seccomp_filters, &mut event_manager, json, instance_info, boot_timer_enabled, pci_enabled, mmds_size_limit, metadata_json, ) .map_err(ApiServerError::BuildFromJson), None => PrebootApiController::build_microvm_from_requests( seccomp_filters, &mut event_manager, instance_info, &from_api, &to_api, &api_event_fd, boot_timer_enabled, pci_enabled, mmds_size_limit, metadata_json, ) .map_err(ApiServerError::BuildMicroVmError), }; let result = build_result.and_then(|vmm| { firecracker_metrics .lock() .expect("Poisoned lock") .start(super::metrics::WRITE_METRICS_PERIOD_MS); ApiServerAdapter::run_microvm(api_event_fd, from_api, to_api, vmm, &mut event_manager) }); api_kill_switch.write(1).unwrap(); // This call to thread::join() should block until the API thread has processed the // shutdown-internal and returns from its function. api_thread.join().expect("Api thread should join"); result } ================================================ FILE: src/firecracker/src/generated/mod.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod prctl; ================================================ FILE: src/firecracker/src/generated/prctl.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const PR_SET_PDEATHSIG: u32 = 1; pub const PR_GET_PDEATHSIG: u32 = 2; pub const PR_GET_DUMPABLE: u32 = 3; pub const PR_SET_DUMPABLE: u32 = 4; pub const PR_GET_UNALIGN: u32 = 5; pub const PR_SET_UNALIGN: u32 = 6; pub const PR_UNALIGN_NOPRINT: u32 = 1; pub const PR_UNALIGN_SIGBUS: u32 = 2; pub const PR_GET_KEEPCAPS: u32 = 7; pub const PR_SET_KEEPCAPS: u32 = 8; pub const PR_GET_FPEMU: u32 = 9; pub const PR_SET_FPEMU: u32 = 10; pub const PR_FPEMU_NOPRINT: u32 = 1; pub const PR_FPEMU_SIGFPE: u32 = 2; pub const PR_GET_FPEXC: u32 = 11; pub const PR_SET_FPEXC: u32 = 12; pub const PR_FP_EXC_SW_ENABLE: u32 = 128; pub const PR_FP_EXC_DIV: u32 = 65536; pub const PR_FP_EXC_OVF: u32 = 131072; pub const PR_FP_EXC_UND: u32 = 262144; pub const PR_FP_EXC_RES: u32 = 524288; pub const PR_FP_EXC_INV: u32 = 1048576; pub const PR_FP_EXC_DISABLED: u32 = 0; pub const PR_FP_EXC_NONRECOV: u32 = 1; pub const PR_FP_EXC_ASYNC: u32 = 2; pub const PR_FP_EXC_PRECISE: u32 = 3; pub const PR_GET_TIMING: u32 = 13; pub const PR_SET_TIMING: u32 = 14; pub const PR_TIMING_STATISTICAL: u32 = 0; pub const PR_TIMING_TIMESTAMP: u32 = 1; pub const PR_SET_NAME: u32 = 15; pub const PR_GET_NAME: u32 = 16; pub const PR_GET_ENDIAN: u32 = 19; pub const PR_SET_ENDIAN: u32 = 20; pub const PR_ENDIAN_BIG: u32 = 0; pub const PR_ENDIAN_LITTLE: u32 = 1; pub const PR_ENDIAN_PPC_LITTLE: u32 = 2; pub const PR_GET_SECCOMP: u32 = 21; pub const PR_SET_SECCOMP: u32 = 22; pub const PR_CAPBSET_READ: u32 = 23; pub const PR_CAPBSET_DROP: u32 = 24; pub const PR_GET_TSC: u32 = 25; pub const PR_SET_TSC: u32 = 26; pub const PR_TSC_ENABLE: u32 = 1; pub const PR_TSC_SIGSEGV: u32 = 2; pub const PR_GET_SECUREBITS: u32 = 27; pub const PR_SET_SECUREBITS: u32 = 28; pub const PR_SET_TIMERSLACK: u32 = 29; pub const PR_GET_TIMERSLACK: u32 = 30; pub const PR_TASK_PERF_EVENTS_DISABLE: u32 = 31; pub const PR_TASK_PERF_EVENTS_ENABLE: u32 = 32; pub const PR_MCE_KILL: u32 = 33; pub const PR_MCE_KILL_CLEAR: u32 = 0; pub const PR_MCE_KILL_SET: u32 = 1; pub const PR_MCE_KILL_LATE: u32 = 0; pub const PR_MCE_KILL_EARLY: u32 = 1; pub const PR_MCE_KILL_DEFAULT: u32 = 2; pub const PR_MCE_KILL_GET: u32 = 34; pub const PR_SET_MM: u32 = 35; pub const PR_SET_MM_START_CODE: u32 = 1; pub const PR_SET_MM_END_CODE: u32 = 2; pub const PR_SET_MM_START_DATA: u32 = 3; pub const PR_SET_MM_END_DATA: u32 = 4; pub const PR_SET_MM_START_STACK: u32 = 5; pub const PR_SET_MM_START_BRK: u32 = 6; pub const PR_SET_MM_BRK: u32 = 7; pub const PR_SET_MM_ARG_START: u32 = 8; pub const PR_SET_MM_ARG_END: u32 = 9; pub const PR_SET_MM_ENV_START: u32 = 10; pub const PR_SET_MM_ENV_END: u32 = 11; pub const PR_SET_MM_AUXV: u32 = 12; pub const PR_SET_MM_EXE_FILE: u32 = 13; pub const PR_SET_MM_MAP: u32 = 14; pub const PR_SET_MM_MAP_SIZE: u32 = 15; pub const PR_SET_PTRACER: u32 = 1499557217; pub const PR_SET_CHILD_SUBREAPER: u32 = 36; pub const PR_GET_CHILD_SUBREAPER: u32 = 37; pub const PR_SET_NO_NEW_PRIVS: u32 = 38; pub const PR_GET_NO_NEW_PRIVS: u32 = 39; pub const PR_GET_TID_ADDRESS: u32 = 40; pub const PR_SET_THP_DISABLE: u32 = 41; pub const PR_GET_THP_DISABLE: u32 = 42; pub const PR_MPX_ENABLE_MANAGEMENT: u32 = 43; pub const PR_MPX_DISABLE_MANAGEMENT: u32 = 44; pub const PR_SET_FP_MODE: u32 = 45; pub const PR_GET_FP_MODE: u32 = 46; pub const PR_FP_MODE_FR: u32 = 1; pub const PR_FP_MODE_FRE: u32 = 2; pub const PR_CAP_AMBIENT: u32 = 47; pub const PR_CAP_AMBIENT_IS_SET: u32 = 1; pub const PR_CAP_AMBIENT_RAISE: u32 = 2; pub const PR_CAP_AMBIENT_LOWER: u32 = 3; pub const PR_CAP_AMBIENT_CLEAR_ALL: u32 = 4; pub const PR_SVE_SET_VL: u32 = 50; pub const PR_SVE_SET_VL_ONEXEC: u32 = 262144; pub const PR_SVE_GET_VL: u32 = 51; pub const PR_SVE_VL_LEN_MASK: u32 = 65535; pub const PR_SVE_VL_INHERIT: u32 = 131072; pub const PR_GET_SPECULATION_CTRL: u32 = 52; pub const PR_SET_SPECULATION_CTRL: i32 = 53; pub const PR_SPEC_STORE_BYPASS: u32 = 0; pub const PR_SPEC_INDIRECT_BRANCH: u32 = 1; pub const PR_SPEC_L1D_FLUSH: u32 = 2; pub const PR_SPEC_NOT_AFFECTED: u32 = 0; pub const PR_SPEC_PRCTL: u32 = 1; pub const PR_SPEC_ENABLE: u32 = 2; pub const PR_SPEC_DISABLE: u32 = 4; pub const PR_SPEC_FORCE_DISABLE: u32 = 8; pub const PR_SPEC_DISABLE_NOEXEC: u32 = 16; pub const PR_PAC_RESET_KEYS: u32 = 54; pub const PR_PAC_APIAKEY: u32 = 1; pub const PR_PAC_APIBKEY: u32 = 2; pub const PR_PAC_APDAKEY: u32 = 4; pub const PR_PAC_APDBKEY: u32 = 8; pub const PR_PAC_APGAKEY: u32 = 16; pub const PR_SET_TAGGED_ADDR_CTRL: u32 = 55; pub const PR_GET_TAGGED_ADDR_CTRL: u32 = 56; pub const PR_TAGGED_ADDR_ENABLE: u32 = 1; pub const PR_MTE_TCF_NONE: u32 = 0; pub const PR_MTE_TCF_SYNC: u32 = 2; pub const PR_MTE_TCF_ASYNC: u32 = 4; pub const PR_MTE_TCF_MASK: u32 = 6; pub const PR_MTE_TAG_SHIFT: u32 = 3; pub const PR_MTE_TAG_MASK: u32 = 524280; pub const PR_MTE_TCF_SHIFT: u32 = 1; pub const PR_PMLEN_SHIFT: u32 = 24; pub const PR_PMLEN_MASK: u32 = 2130706432; pub const PR_SET_IO_FLUSHER: u32 = 57; pub const PR_GET_IO_FLUSHER: u32 = 58; pub const PR_SET_SYSCALL_USER_DISPATCH: u32 = 59; pub const PR_SYS_DISPATCH_OFF: u32 = 0; pub const PR_SYS_DISPATCH_ON: u32 = 1; pub const PR_PAC_SET_ENABLED_KEYS: u32 = 60; pub const PR_PAC_GET_ENABLED_KEYS: u32 = 61; pub const PR_SCHED_CORE: u32 = 62; pub const PR_SCHED_CORE_GET: u32 = 0; pub const PR_SCHED_CORE_CREATE: u32 = 1; pub const PR_SCHED_CORE_SHARE_TO: u32 = 2; pub const PR_SCHED_CORE_SHARE_FROM: u32 = 3; pub const PR_SCHED_CORE_MAX: u32 = 4; pub const PR_SCHED_CORE_SCOPE_THREAD: u32 = 0; pub const PR_SCHED_CORE_SCOPE_THREAD_GROUP: u32 = 1; pub const PR_SCHED_CORE_SCOPE_PROCESS_GROUP: u32 = 2; pub const PR_SME_SET_VL: u32 = 63; pub const PR_SME_SET_VL_ONEXEC: u32 = 262144; pub const PR_SME_GET_VL: u32 = 64; pub const PR_SME_VL_LEN_MASK: u32 = 65535; pub const PR_SME_VL_INHERIT: u32 = 131072; pub const PR_SET_MDWE: u32 = 65; pub const PR_MDWE_REFUSE_EXEC_GAIN: u32 = 1; pub const PR_MDWE_NO_INHERIT: u32 = 2; pub const PR_GET_MDWE: u32 = 66; pub const PR_SET_VMA: u32 = 1398164801; pub const PR_SET_VMA_ANON_NAME: u32 = 0; pub const PR_GET_AUXV: u32 = 1096112214; pub const PR_SET_MEMORY_MERGE: u32 = 67; pub const PR_GET_MEMORY_MERGE: u32 = 68; pub const PR_RISCV_V_SET_CONTROL: u32 = 69; pub const PR_RISCV_V_GET_CONTROL: u32 = 70; pub const PR_RISCV_V_VSTATE_CTRL_DEFAULT: u32 = 0; pub const PR_RISCV_V_VSTATE_CTRL_OFF: u32 = 1; pub const PR_RISCV_V_VSTATE_CTRL_ON: u32 = 2; pub const PR_RISCV_V_VSTATE_CTRL_INHERIT: u32 = 16; pub const PR_RISCV_V_VSTATE_CTRL_CUR_MASK: u32 = 3; pub const PR_RISCV_V_VSTATE_CTRL_NEXT_MASK: u32 = 12; pub const PR_RISCV_V_VSTATE_CTRL_MASK: u32 = 31; pub const PR_RISCV_SET_ICACHE_FLUSH_CTX: u32 = 71; pub const PR_RISCV_CTX_SW_FENCEI_ON: u32 = 0; pub const PR_RISCV_CTX_SW_FENCEI_OFF: u32 = 1; pub const PR_RISCV_SCOPE_PER_PROCESS: u32 = 0; pub const PR_RISCV_SCOPE_PER_THREAD: u32 = 1; pub const PR_PPC_GET_DEXCR: u32 = 72; pub const PR_PPC_SET_DEXCR: u32 = 73; pub const PR_PPC_DEXCR_SBHE: u32 = 0; pub const PR_PPC_DEXCR_IBRTPD: u32 = 1; pub const PR_PPC_DEXCR_SRAPD: u32 = 2; pub const PR_PPC_DEXCR_NPHIE: u32 = 3; pub const PR_PPC_DEXCR_CTRL_EDITABLE: u32 = 1; pub const PR_PPC_DEXCR_CTRL_SET: u32 = 2; pub const PR_PPC_DEXCR_CTRL_CLEAR: u32 = 4; pub const PR_PPC_DEXCR_CTRL_SET_ONEXEC: u32 = 8; pub const PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC: u32 = 16; pub const PR_PPC_DEXCR_CTRL_MASK: u32 = 31; pub const PR_GET_SHADOW_STACK_STATUS: u32 = 74; pub const PR_SET_SHADOW_STACK_STATUS: u32 = 75; pub const PR_SHADOW_STACK_ENABLE: u32 = 1; pub const PR_SHADOW_STACK_WRITE: u32 = 2; pub const PR_SHADOW_STACK_PUSH: u32 = 4; pub const PR_LOCK_SHADOW_STACK_STATUS: u32 = 76; ================================================ FILE: src/firecracker/src/lib.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod api_server; ================================================ FILE: src/firecracker/src/main.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 mod api_server; mod api_server_adapter; mod generated; mod metrics; mod seccomp; use std::fs::{self, File}; use std::path::PathBuf; use std::process::ExitCode; use std::str::FromStr; use std::sync::{Arc, Mutex}; use std::{io, panic}; use api_server_adapter::ApiServerError; use event_manager::SubscriberOps; use seccomp::FilterError; use utils::arg_parser::{ArgParser, Argument}; use utils::validators::validate_instance_id; use vmm::arch::host_page_size; use vmm::builder::StartMicrovmError; use vmm::logger::{ LOGGER, LoggerConfig, METRICS, ProcessTimeReporter, StoreMetric, debug, error, info, }; use vmm::persist::SNAPSHOT_VERSION; use vmm::resources::VmResources; use vmm::seccomp::BpfThreadMap; use vmm::signal_handler::register_signal_handlers; use vmm::snapshot::{SnapshotError, get_format_version}; use vmm::vmm_config::instance_info::{InstanceInfo, VmState}; use vmm::vmm_config::metrics::{MetricsConfig, MetricsConfigError, init_metrics}; use vmm::{EventManager, FcExitCode, HTTP_MAX_PAYLOAD_SIZE}; use vmm_sys_util::terminal::Terminal; use crate::seccomp::SeccompConfig; // The reason we place default API socket under /run is that API socket is a // runtime file. // see https://refspecs.linuxfoundation.org/FHS_3.0/fhs/ch03s15.html for more information. const DEFAULT_API_SOCK_PATH: &str = "/run/firecracker.socket"; const FIRECRACKER_VERSION: &str = env!("CARGO_PKG_VERSION"); const MMDS_CONTENT_ARG: &str = "metadata"; #[derive(Debug, thiserror::Error, displaydoc::Display)] enum MainError { /// Failed to set the logger: {0} SetLogger(vmm::logger::LoggerInitError), /// Failed to register signal handlers: {0} RegisterSignalHandlers(#[source] vmm_sys_util::errno::Error), /// Arguments parsing error: {0} \n\nFor more information try --help. ParseArguments(#[from] utils::arg_parser::UtilsArgParserError), /// When printing Snapshot Data format: {0} PrintSnapshotDataFormat(#[from] SnapshotVersionError), /// Invalid value for logger level: {0}.Possible values: [Error, Warning, Info, Debug] InvalidLogLevel(vmm::logger::LevelFilterFromStrError), /// Could not initialize logger: {0} LoggerInitialization(vmm::logger::LoggerUpdateError), /// Could not initialize metrics: {0} MetricsInitialization(MetricsConfigError), /// Seccomp error: {0} SeccompFilter(FilterError), /// Failed to resize fd table: {0} ResizeFdtable(ResizeFdTableError), /// RunWithApiError error: {0} RunWithApi(ApiServerError), /// RunWithoutApiError error: {0} RunWithoutApiError(RunWithoutApiError), } #[derive(Debug, thiserror::Error, displaydoc::Display)] enum ResizeFdTableError { /// Failed to get RLIMIT_NOFILE GetRlimit, /// Failed to call dup2 to resize fdtable Dup2(io::Error), /// Failed to close dup2'd file descriptor Close(io::Error), } impl From for FcExitCode { fn from(value: MainError) -> Self { match value { MainError::ParseArguments(_) => FcExitCode::ArgParsing, MainError::InvalidLogLevel(_) => FcExitCode::BadConfiguration, MainError::RunWithApi(ApiServerError::MicroVMStoppedWithError(code)) => code, MainError::RunWithoutApiError(RunWithoutApiError::Shutdown(code)) => code, _ => FcExitCode::GenericError, } } } fn main() -> ExitCode { let result = main_exec(); if let Err(err) = result { error!("{err}"); eprintln!("Error: {err:?}"); let exit_code = FcExitCode::from(err) as u8; error!("Firecracker exiting with error. exit_code={exit_code}"); ExitCode::from(exit_code) } else { info!("Firecracker exiting successfully. exit_code=0"); ExitCode::SUCCESS } } fn main_exec() -> Result<(), MainError> { // Initialize the logger. LOGGER.init().map_err(MainError::SetLogger)?; // First call to this function updates the value to current // host page size. _ = host_page_size(); // We need this so that we can reset terminal to canonical mode if panic occurs. let stdin = io::stdin(); // Start firecracker by setting up a panic hook, which will be called before // terminating as we're building with panic = "abort". // It's worth noting that the abort is caused by sending a SIG_ABORT signal to the process. panic::set_hook(Box::new(move |info| { // We're currently using the closure parameter, which is a &PanicInfo, for printing the // origin of the panic, including the payload passed to panic! and the source code location // from which the panic originated. error!("Firecracker {}", info); if let Err(err) = stdin.lock().set_canon_mode() { error!( "Failure while trying to reset stdin to canonical mode: {}", err ); } METRICS.vmm.panic_count.store(1); // Write the metrics before aborting. if let Err(err) = METRICS.write() { error!("Failed to write metrics while panicking: {}", err); } })); let http_max_payload_size_str = HTTP_MAX_PAYLOAD_SIZE.to_string(); let mut arg_parser = ArgParser::new() .arg( Argument::new("api-sock") .takes_value(true) .default_value(DEFAULT_API_SOCK_PATH) .help("Path to unix domain socket used by the API."), ) .arg( Argument::new("id") .takes_value(true) .default_value(vmm::logger::DEFAULT_INSTANCE_ID) .help("MicroVM unique identifier."), ) .arg( Argument::new("seccomp-filter") .takes_value(true) .forbids(vec!["no-seccomp"]) .help( "Optional parameter which allows specifying the path to a custom seccomp \ filter. For advanced users.", ), ) .arg( Argument::new("no-seccomp") .takes_value(false) .forbids(vec!["seccomp-filter"]) .help( "Optional parameter which allows starting and using a microVM without \ seccomp filtering. Not recommended.", ), ) .arg( Argument::new("start-time-us").takes_value(true).help( "Process start time (wall clock, microseconds). This parameter is optional.", ), ) .arg(Argument::new("start-time-cpu-us").takes_value(true).help( "Process start CPU time (wall clock, microseconds). This parameter is optional.", )) .arg(Argument::new("parent-cpu-time-us").takes_value(true).help( "Parent process CPU time (wall clock, microseconds). This parameter is optional.", )) .arg( Argument::new("config-file") .takes_value(true) .help("Path to a file that contains the microVM configuration in JSON format."), ) .arg( Argument::new(MMDS_CONTENT_ARG).takes_value(true).help( "Path to a file that contains metadata in JSON format to add to the mmds.", ), ) .arg( Argument::new("no-api") .takes_value(false) .requires("config-file") .help( "Optional parameter which allows starting and using a microVM without an \ active API socket.", ), ) .arg( Argument::new("log-path") .takes_value(true) .help("Path to a fifo or a file used for configuring the logger on startup."), ) .arg( Argument::new("level") .takes_value(true) .help("Set the logger level."), ) .arg( Argument::new("module") .takes_value(true) .help("Set the logger module filter."), ) .arg( Argument::new("show-level") .takes_value(false) .help("Whether or not to output the level in the logs."), ) .arg(Argument::new("show-log-origin").takes_value(false).help( "Whether or not to include the file path and line number of the log's origin.", )) .arg( Argument::new("metrics-path") .takes_value(true) .help("Path to a fifo or a file used for configuring the metrics on startup."), ) .arg(Argument::new("boot-timer").takes_value(false).help( "Whether or not to load boot timer device for logging elapsed time since \ InstanceStart command.", )) .arg( Argument::new("version") .takes_value(false) .help("Print the binary version number."), ) .arg( Argument::new("snapshot-version") .takes_value(false) .help("Print the supported data format version."), ) .arg( Argument::new("describe-snapshot") .takes_value(true) .help("Print the data format version of the provided snapshot state file."), ) .arg( Argument::new("http-api-max-payload-size") .takes_value(true) .default_value(&http_max_payload_size_str) .help("Http API request payload max size, in bytes."), ) .arg( Argument::new("mmds-size-limit") .takes_value(true) .help("Mmds data store limit, in bytes."), ) .arg( Argument::new("enable-pci") .takes_value(false) .help("Enables PCIe support."), ); arg_parser.parse_from_cmdline()?; let arguments = arg_parser.arguments(); if arguments.flag_present("help") { println!("Firecracker v{}\n", FIRECRACKER_VERSION); println!("{}", arg_parser.formatted_help()); return Ok(()); } if arguments.flag_present("version") { println!("Firecracker v{}\n", FIRECRACKER_VERSION); return Ok(()); } if arguments.flag_present("snapshot-version") { println!("v{SNAPSHOT_VERSION}"); return Ok(()); } if let Some(snapshot_path) = arguments.single_value("describe-snapshot") { print_snapshot_data_format(snapshot_path)?; return Ok(()); } // It's safe to unwrap here because the field's been provided with a default value. let instance_id = arguments.single_value("id").unwrap(); validate_instance_id(instance_id.as_str()).expect("Invalid instance ID"); // Apply the logger configuration. vmm::logger::INSTANCE_ID .set(String::from(instance_id)) .unwrap(); let log_path = arguments.single_value("log-path").map(PathBuf::from); let level = arguments .single_value("level") .map(|s| vmm::logger::LevelFilter::from_str(s)) .transpose() .map_err(MainError::InvalidLogLevel)?; let show_level = arguments.flag_present("show-level").then_some(true); let show_log_origin = arguments.flag_present("show-log-origin").then_some(true); let module = arguments.single_value("module").cloned(); LOGGER .update(LoggerConfig { log_path, level, show_level, show_log_origin, module, }) .map_err(MainError::LoggerInitialization)?; info!("Running Firecracker v{FIRECRACKER_VERSION}"); register_signal_handlers().map_err(MainError::RegisterSignalHandlers)?; #[cfg(target_arch = "aarch64")] enable_ssbd_mitigation(); if let Err(err) = resize_fdtable() { match err { // These errors are non-critical: In the worst case we have worse snapshot restore // performance. ResizeFdTableError::GetRlimit | ResizeFdTableError::Dup2(_) => { debug!("Failed to resize fdtable: {err}") } // This error means that we now have a random file descriptor lying around, abort to be // cautious. ResizeFdTableError::Close(_) => return Err(MainError::ResizeFdtable(err)), } } // Display warnings for any used deprecated parameters. // Currently unused since there are no deprecated parameters. Uncomment the line when // deprecating one. // warn_deprecated_parameters(&arguments); let instance_info = InstanceInfo { id: instance_id.clone(), state: VmState::NotStarted, vmm_version: FIRECRACKER_VERSION.to_string(), app_name: "Firecracker".to_string(), }; if let Some(metrics_path) = arguments.single_value("metrics-path") { let metrics_config = MetricsConfig { metrics_path: PathBuf::from(metrics_path), }; init_metrics(metrics_config).map_err(MainError::MetricsInitialization)?; } let mut seccomp_filters: BpfThreadMap = SeccompConfig::from_args( arguments.flag_present("no-seccomp"), arguments.single_value("seccomp-filter"), ) .and_then(seccomp::get_filters) .map_err(MainError::SeccompFilter)?; let vmm_config_json = arguments .single_value("config-file") .map(fs::read_to_string) .map(|x| x.expect("Unable to open or read from the configuration file")); let metadata_json = arguments .single_value(MMDS_CONTENT_ARG) .map(fs::read_to_string) .map(|x| x.expect("Unable to open or read from the mmds content file")); let boot_timer_enabled = arguments.flag_present("boot-timer"); let pci_enabled = arguments.flag_present("enable-pci"); let api_enabled = !arguments.flag_present("no-api"); let api_payload_limit = arg_parser .arguments() .single_value("http-api-max-payload-size") .map(|lim| { lim.parse::() .expect("'http-api-max-payload-size' parameter expected to be of 'usize' type.") }) // Safe to unwrap as we provide a default value. .unwrap(); // If the mmds size limit is not explicitly configured, default to using the // `http-api-max-payload-size` value. let mmds_size_limit = arg_parser .arguments() .single_value("mmds-size-limit") .map(|lim| { lim.parse::() .expect("'mmds-size-limit' parameter expected to be of 'usize' type.") }) .unwrap_or_else(|| api_payload_limit); if api_enabled { let bind_path = arguments .single_value("api-sock") .map(PathBuf::from) .expect("Missing argument: api-sock"); let start_time_us = arguments.single_value("start-time-us").map(|s| { s.parse::() .expect("'start-time-us' parameter expected to be of 'u64' type.") }); let start_time_cpu_us = arguments.single_value("start-time-cpu-us").map(|s| { s.parse::() .expect("'start-time-cpu-us' parameter expected to be of 'u64' type.") }); let parent_cpu_time_us = arguments.single_value("parent-cpu-time-us").map(|s| { s.parse::() .expect("'parent-cpu-time-us' parameter expected to be of 'u64' type.") }); let process_time_reporter = ProcessTimeReporter::new(start_time_us, start_time_cpu_us, parent_cpu_time_us); api_server_adapter::run_with_api( &mut seccomp_filters, vmm_config_json, bind_path, instance_info, process_time_reporter, boot_timer_enabled, pci_enabled, api_payload_limit, mmds_size_limit, metadata_json.as_deref(), ) .map_err(MainError::RunWithApi) } else { let seccomp_filters: BpfThreadMap = seccomp_filters .into_iter() .filter(|(k, _)| k != "api") .collect(); run_without_api( &seccomp_filters, vmm_config_json, instance_info, boot_timer_enabled, pci_enabled, mmds_size_limit, metadata_json.as_deref(), ) .map_err(MainError::RunWithoutApiError) } } /// Attempts to resize the processes file descriptor table to match RLIMIT_NOFILE or 2048 if no /// RLIMIT_NOFILE is set (this can only happen if firecracker is run outside the jailer. 2048 is /// the default the jailer would set). /// /// We do this resizing because the kernel default is 64, with a reallocation happening whenever /// the table fills up. This was happening for some larger microVMs, and reallocating the /// fdtable while a lot of file descriptors are active (due to being eventfds/timerfds registered /// to epoll) incurs a penalty of 30ms-70ms on the snapshot restore path. fn resize_fdtable() -> Result<(), ResizeFdTableError> { let mut rlimit = libc::rlimit { rlim_cur: 0, rlim_max: 0, }; // SAFETY: We pass a pointer to a valid area of memory to which we have exclusive mutable access if unsafe { libc::getrlimit(libc::RLIMIT_NOFILE, &mut rlimit as *mut libc::rlimit) } < 0 { return Err(ResizeFdTableError::GetRlimit); } // If no jailer is used, there might not be an NOFILE limit set. In this case, resize // the table to the default that the jailer would usually impose (2048) let limit: libc::c_int = if rlimit.rlim_cur == libc::RLIM_INFINITY { 2048 } else { rlimit.rlim_cur.try_into().unwrap_or(2048) }; // Resize the file descriptor table to its maximal possible size, to ensure that // firecracker will not need to reallocate it later. If the file descriptor table // needs to be reallocated (which by default happens once more than 64 fds exist, // something that happens for reasonably complex microvms due to each device using // a multitude of eventfds), this can incur a significant performance impact (it // was responsible for a 30ms-70ms impact on snapshot restore times). if limit > 3 { // SAFETY: Duplicating stdin is safe if unsafe { libc::dup2(0, limit - 1) } < 0 { return Err(ResizeFdTableError::Dup2(io::Error::last_os_error())); } // SAFETY: Closing the just created duplicate is safe if unsafe { libc::close(limit - 1) } < 0 { return Err(ResizeFdTableError::Close(io::Error::last_os_error())); } } Ok(()) } /// Enable SSBD mitigation through `prctl`. #[cfg(target_arch = "aarch64")] pub fn enable_ssbd_mitigation() { // SAFETY: Parameters are valid since they are copied verbatim // from the kernel's UAPI. // PR_SET_SPECULATION_CTRL only uses those 2 parameters, so it's ok // to leave the latter 2 as zero. let ret = unsafe { libc::prctl( generated::prctl::PR_SET_SPECULATION_CTRL, generated::prctl::PR_SPEC_STORE_BYPASS, generated::prctl::PR_SPEC_FORCE_DISABLE, 0, 0, ) }; if ret < 0 { let last_error = std::io::Error::last_os_error().raw_os_error().unwrap(); error!( "Could not enable SSBD mitigation through prctl, error {}", last_error ); if last_error == libc::EINVAL { error!("The host does not support SSBD mitigation through prctl."); } } } // Log a warning for any usage of deprecated parameters. #[allow(unused)] fn warn_deprecated_parameters() {} #[derive(Debug, thiserror::Error, displaydoc::Display)] enum SnapshotVersionError { /// Unable to open snapshot state file: {0} OpenSnapshot(io::Error), /// Invalid data format version of snapshot file: {0} SnapshotVersion(SnapshotError), } // Print data format of provided snapshot state file. fn print_snapshot_data_format(snapshot_path: &str) -> Result<(), SnapshotVersionError> { let mut snapshot_reader = File::open(snapshot_path).map_err(SnapshotVersionError::OpenSnapshot)?; let data_format_version = get_format_version(&mut snapshot_reader).map_err(SnapshotVersionError::SnapshotVersion)?; println!("v{}", data_format_version); Ok(()) } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum BuildFromJsonError { /// Configuration for VMM from one single json failed: {0} ParseFromJson(vmm::resources::ResourcesError), /// Could not Start MicroVM from one single json: {0} StartMicroVM(StartMicrovmError), } // Configure and start a microVM as described by the command-line JSON. #[allow(clippy::too_many_arguments)] fn build_microvm_from_json( seccomp_filters: &BpfThreadMap, event_manager: &mut EventManager, config_json: String, instance_info: InstanceInfo, boot_timer_enabled: bool, pci_enabled: bool, mmds_size_limit: usize, metadata_json: Option<&str>, ) -> Result>, BuildFromJsonError> { let mut vm_resources = VmResources::from_json(&config_json, &instance_info, mmds_size_limit, metadata_json) .map_err(BuildFromJsonError::ParseFromJson)?; vm_resources.boot_timer = boot_timer_enabled; vm_resources.pci_enabled = pci_enabled; let vmm = vmm::builder::build_and_boot_microvm( &instance_info, &vm_resources, event_manager, seccomp_filters, ) .map_err(BuildFromJsonError::StartMicroVM)?; info!("Successfully started microvm that was configured from one single json"); Ok(vmm) } #[derive(Debug, thiserror::Error, displaydoc::Display)] enum RunWithoutApiError { /// MicroVMStopped without an error: {0:?} Shutdown(FcExitCode), /// Failed to build MicroVM from Json: {0} BuildMicroVMFromJson(BuildFromJsonError), } fn run_without_api( seccomp_filters: &BpfThreadMap, config_json: Option, instance_info: InstanceInfo, bool_timer_enabled: bool, pci_enabled: bool, mmds_size_limit: usize, metadata_json: Option<&str>, ) -> Result<(), RunWithoutApiError> { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); // Create the firecracker metrics object responsible for periodically printing metrics. let firecracker_metrics = Arc::new(Mutex::new(metrics::PeriodicMetrics::new())); event_manager.add_subscriber(firecracker_metrics.clone()); // Build the microVm. We can ignore VmResources since it's not used without api. let vmm = build_microvm_from_json( seccomp_filters, &mut event_manager, // Safe to unwrap since '--no-api' requires this to be set. config_json.unwrap(), instance_info, bool_timer_enabled, pci_enabled, mmds_size_limit, metadata_json, ) .map_err(RunWithoutApiError::BuildMicroVMFromJson)?; // Start the metrics. firecracker_metrics .lock() .expect("Poisoned lock") .start(metrics::WRITE_METRICS_PERIOD_MS); // Run the EventManager that drives everything in the microVM. loop { event_manager .run() .expect("Failed to start the event manager"); match vmm.lock().unwrap().shutdown_exit_code() { Some(FcExitCode::Ok) => break, Some(exit_code) => return Err(RunWithoutApiError::Shutdown(exit_code)), None => continue, } } Ok(()) } ================================================ FILE: src/firecracker/src/metrics.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::os::unix::io::AsRawFd; use std::time::Duration; use event_manager::{EventOps, Events, MutEventSubscriber}; use utils::time::TimerFd; use vmm::logger::{IncMetric, METRICS, error, warn}; use vmm_sys_util::epoll::EventSet; /// Metrics reporting period. pub(crate) const WRITE_METRICS_PERIOD_MS: u64 = 60000; /// Object to drive periodic reporting of metrics. #[derive(Debug)] pub(crate) struct PeriodicMetrics { write_metrics_event_fd: TimerFd, #[cfg(test)] flush_counter: u64, } impl PeriodicMetrics { /// PeriodicMetrics constructor. Can panic on `TimerFd` creation failure. pub fn new() -> Self { let write_metrics_event_fd = TimerFd::new(); PeriodicMetrics { write_metrics_event_fd, #[cfg(test)] flush_counter: 0, } } /// Start the periodic metrics engine which will flush metrics every `interval_ms` millisecs. pub(crate) fn start(&mut self, interval_ms: u64) { // Arm the log write timer. let duration = Duration::from_millis(interval_ms); self.write_metrics_event_fd.arm(duration, Some(duration)); // Write the metrics straight away to check the process startup time. self.write_metrics(); } fn write_metrics(&mut self) { if let Err(err) = METRICS.write() { METRICS.logger.missed_metrics_count.inc(); error!("Failed to write metrics: {}", err); } #[cfg(test)] { self.flush_counter += 1; } } } impl MutEventSubscriber for PeriodicMetrics { /// Handle a read event (EPOLLIN). fn process(&mut self, event: Events, _: &mut EventOps) { let source = event.fd(); let event_set = event.event_set(); // TODO: also check for errors. Pending high level discussions on how we want // to handle errors in devices. let supported_events = EventSet::IN; if !supported_events.contains(event_set) { warn!( "Received unknown event: {:?} from source: {:?}", event_set, source ); return; } if source == self.write_metrics_event_fd.as_raw_fd() { self.write_metrics_event_fd.read(); self.write_metrics(); } else { error!("Spurious METRICS event!"); } } fn init(&mut self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::new(&self.write_metrics_event_fd, EventSet::IN)) { error!("Failed to register metrics event: {}", err); } } } #[cfg(test)] pub mod tests { use std::sync::{Arc, Mutex}; use event_manager::{EventManager, SubscriberOps}; use super::*; #[test] fn test_periodic_metrics() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let metrics = Arc::new(Mutex::new(PeriodicMetrics::new())); event_manager.add_subscriber(metrics.clone()); let flush_period_ms = 50u16; metrics .lock() .expect("Unlock failed.") .start(u64::from(flush_period_ms)); // .start() does an initial flush. assert_eq!(metrics.lock().expect("Unlock failed.").flush_counter, 1); // Wait for at most 1.5x period. event_manager .run_with_timeout(i32::from(flush_period_ms) + i32::from(flush_period_ms) / 2) .expect("Metrics event timeout or error."); // Verify there was another flush. assert_eq!(metrics.lock().expect("Unlock failed.").flush_counter, 2); } } ================================================ FILE: src/firecracker/src/seccomp.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; use std::fs::File; use std::io::{BufReader, Read}; use std::path::Path; use vmm::seccomp::{BpfThreadMap, DeserializationError, deserialize_binary, get_empty_filters}; const THREAD_CATEGORIES: [&str; 3] = ["vmm", "api", "vcpu"]; /// Error retrieving seccomp filters. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum FilterError { /// Filter deserialization failed: {0} Deserialization(DeserializationError), /// Invalid thread categories: {0} ThreadCategories(String), /// Missing thread category: {0} MissingThreadCategory(String), /// Filter file open error: {0} FileOpen(std::io::Error), } /// Seccomp filter configuration. #[derive(Debug)] pub enum SeccompConfig { /// Seccomp filtering disabled. None, /// Default, advanced filters. Advanced, /// Custom, user-provided filters. Custom(File), } impl SeccompConfig { /// Given the relevant command line args, return the appropriate config type. pub fn from_args + Debug>( no_seccomp: bool, seccomp_filter: Option, ) -> Result { if no_seccomp { Ok(SeccompConfig::None) } else { match seccomp_filter { Some(path) => Ok(SeccompConfig::Custom( File::open(path).map_err(FilterError::FileOpen)?, )), None => Ok(SeccompConfig::Advanced), } } } } /// Retrieve the appropriate filters, based on the SeccompConfig. pub fn get_filters(config: SeccompConfig) -> Result { match config { SeccompConfig::None => Ok(get_empty_filters()), SeccompConfig::Advanced => get_default_filters(), SeccompConfig::Custom(reader) => get_custom_filters(reader), } } /// Retrieve the default filters containing the syscall rules required by `Firecracker` /// to function. The binary file is generated via the `build.rs` script of this crate. fn get_default_filters() -> Result { // Retrieve, at compile-time, the serialized binary filter generated with seccompiler. let bytes: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/seccomp_filter.bpf")); let map = deserialize_binary(bytes).map_err(FilterError::Deserialization)?; filter_thread_categories(map) } /// Retrieve custom seccomp filters. fn get_custom_filters(reader: R) -> Result { let map = deserialize_binary(BufReader::new(reader)).map_err(FilterError::Deserialization)?; filter_thread_categories(map) } /// Return an error if the BpfThreadMap contains invalid thread categories. fn filter_thread_categories(map: BpfThreadMap) -> Result { let (filters, invalid_filters): (BpfThreadMap, BpfThreadMap) = map .into_iter() .partition(|(k, _)| THREAD_CATEGORIES.contains(&k.as_str())); if !invalid_filters.is_empty() { // build the error message let mut thread_categories_string = invalid_filters .keys() .fold("".to_string(), |mut acc, elem| { acc.push_str(elem); acc.push(','); acc }); thread_categories_string.pop(); return Err(FilterError::ThreadCategories(thread_categories_string)); } for &category in THREAD_CATEGORIES.iter() { let category_string = category.to_string(); if !filters.contains_key(&category_string) { return Err(FilterError::MissingThreadCategory(category_string)); } } Ok(filters) } #[cfg(test)] mod tests { use std::sync::Arc; use vmm::seccomp::BpfThreadMap; use vmm_sys_util::tempfile::TempFile; use super::*; #[test] fn test_get_filters() { let mut filters = get_empty_filters(); assert_eq!(filters.len(), 3); assert!(filters.remove("vmm").is_some()); assert!(filters.remove("api").is_some()); assert!(filters.remove("vcpu").is_some()); let mut filters = get_empty_filters(); assert_eq!(filters.len(), 3); assert_eq!(filters.remove("vmm").unwrap().len(), 0); assert_eq!(filters.remove("api").unwrap().len(), 0); assert_eq!(filters.remove("vcpu").unwrap().len(), 0); let file = TempFile::new().unwrap().into_file(); get_filters(SeccompConfig::Custom(file)).unwrap_err(); } #[test] fn test_filter_thread_categories() { // correct categories let mut map = BpfThreadMap::new(); map.insert("vcpu".to_string(), Arc::new(vec![])); map.insert("vmm".to_string(), Arc::new(vec![])); map.insert("api".to_string(), Arc::new(vec![])); assert_eq!(filter_thread_categories(map).unwrap().len(), 3); // invalid categories let mut map = BpfThreadMap::new(); map.insert("vcpu".to_string(), Arc::new(vec![])); map.insert("vmm".to_string(), Arc::new(vec![])); map.insert("thread1".to_string(), Arc::new(vec![])); map.insert("thread2".to_string(), Arc::new(vec![])); match filter_thread_categories(map).unwrap_err() { FilterError::ThreadCategories(err) => { assert!(err == "thread2,thread1" || err == "thread1,thread2") } _ => panic!("Expected ThreadCategories error."), } // missing category let mut map = BpfThreadMap::new(); map.insert("vcpu".to_string(), Arc::new(vec![])); map.insert("vmm".to_string(), Arc::new(vec![])); match filter_thread_categories(map).unwrap_err() { FilterError::MissingThreadCategory(name) => assert_eq!(name, "api"), _ => panic!("Expected MissingThreadCategory error."), } } #[test] fn test_seccomp_config() { assert!(matches!( SeccompConfig::from_args(true, Option::<&str>::None), Ok(SeccompConfig::None) )); assert!(matches!( SeccompConfig::from_args(false, Some("/dev/null")), Ok(SeccompConfig::Custom(_)) )); assert!(matches!( SeccompConfig::from_args(false, Some("invalid_path")), Err(FilterError::FileOpen(_)) )); // test the default case, no parametes -> default advanced. assert!(matches!( SeccompConfig::from_args(false, Option::<&str>::None), Ok(SeccompConfig::Advanced) )); } } ================================================ FILE: src/firecracker/swagger/firecracker.yaml ================================================ swagger: "2.0" info: title: Firecracker API description: RESTful public-facing API. The API is accessible through HTTP calls on specific URLs carrying JSON modeled data. The transport medium is a Unix Domain Socket. version: 1.16.0-dev termsOfService: "" contact: email: "firecracker-maintainers@amazon.com" license: name: "Apache 2.0" url: "http://www.apache.org/licenses/LICENSE-2.0.html" host: "localhost" basePath: "/" schemes: - http consumes: - application/json produces: - application/json paths: /: get: summary: Returns general information about an instance. operationId: describeInstance responses: 200: description: The instance information schema: $ref: "#/definitions/InstanceInfo" default: description: Internal Server Error schema: $ref: "#/definitions/Error" /actions: put: summary: Creates a synchronous action. operationId: createSyncAction parameters: - name: info in: body required: true schema: $ref: "#/definitions/InstanceActionInfo" responses: 204: description: The update was successful 400: description: The action cannot be executed due to bad input schema: $ref: "#/definitions/Error" default: description: Internal Server Error schema: $ref: "#/definitions/Error" /balloon: get: summary: Returns the current balloon device configuration. operationId: describeBalloonConfig responses: 200: description: The balloon device configuration schema: $ref: "#/definitions/Balloon" 400: description: Balloon device not configured. schema: $ref: "#/definitions/Error" default: description: Internal Server Error schema: $ref: "#/definitions/Error" put: summary: Creates or updates a balloon device. description: Creates a new balloon device if one does not already exist, otherwise updates it, before machine startup. This will fail after machine startup. Will fail if update is not possible. operationId: putBalloon parameters: - name: body in: body description: Balloon properties required: true schema: $ref: "#/definitions/Balloon" responses: 204: description: Balloon device created/updated 400: description: Balloon device cannot be created/updated due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" patch: summary: Updates a balloon device. description: Updates an existing balloon device, before or after machine startup. Will fail if update is not possible. operationId: patchBalloon parameters: - name: body in: body description: Balloon properties required: true schema: $ref: "#/definitions/BalloonUpdate" responses: 204: description: Balloon device updated 400: description: Balloon device cannot be updated due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /balloon/statistics: get: summary: Returns the latest balloon device statistics, only if enabled pre-boot. operationId: describeBalloonStats responses: 200: description: The balloon device statistics schema: $ref: "#/definitions/BalloonStats" 400: description: The balloon device statistics were not enabled when the device was configured. schema: $ref: "#/definitions/Error" default: description: Internal Server Error schema: $ref: "#/definitions/Error" patch: summary: Updates a balloon device statistics polling interval. description: Updates an existing balloon device statistics interval, before or after machine startup. Will fail if update is not possible. operationId: patchBalloonStatsInterval parameters: - name: body in: body description: Balloon properties required: true schema: $ref: "#/definitions/BalloonStatsUpdate" responses: 204: description: Balloon statistics interval updated 400: description: Balloon statistics interval cannot be updated due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /balloon/hinting/start: patch: summary: Starts a free page hinting run only if enabled pre-boot. operationId: startBalloonHinting parameters: - name: body in: body description: When the device completes the hinting whether we should automatically ack this. required: false schema: $ref: "#/definitions/BalloonStartCmd" responses: 200: description: Free page hinting run started. 400: description: The balloon free hinting was not enabled when the device was configured. schema: $ref: "#/definitions/Error" default: description: Internal Server Error schema: $ref: "#/definitions/Error" /balloon/hinting/status: get: summary: Returns the balloon hinting statistics, only if enabled pre-boot. operationId: describeBalloonHinting responses: 200: description: The balloon free page hinting statistics schema: $ref: "#/definitions/BalloonHintingStatus" 400: description: The balloon free hinting was not enabled when the device was configured. schema: $ref: "#/definitions/Error" default: description: Internal Server Error schema: $ref: "#/definitions/Error" /balloon/hinting/stop: patch: summary: Stops a free page hinting run only if enabled pre-boot. operationId: stopBalloonHinting responses: 200: description: Free page hinting run stopped. 400: description: The balloon free hinting was not enabled when the device was configured. schema: $ref: "#/definitions/Error" default: description: Internal Server Error schema: $ref: "#/definitions/Error" /boot-source: put: summary: Creates or updates the boot source. Pre-boot only. description: Creates new boot source if one does not already exist, otherwise updates it. Will fail if update is not possible. operationId: putGuestBootSource parameters: - name: body in: body description: Guest boot source properties required: true schema: $ref: "#/definitions/BootSource" responses: 204: description: Boot source created/updated 400: description: Boot source cannot be created due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /cpu-config: put: summary: Configures CPU features flags for the vCPUs of the guest VM. Pre-boot only. description: Provides configuration to the Firecracker process to specify vCPU resource configuration prior to launching the guest machine. operationId: putCpuConfiguration parameters: - name: body in: body description: CPU configuration request schema: $ref: "#/definitions/CpuConfig" responses: 204: description: CPU configuration set successfully 400: description: CPU configuration cannot be updated due to invalid input format schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /drives/{drive_id}: put: summary: Creates or updates a drive. Pre-boot only. description: Creates new drive with ID specified by drive_id path parameter. If a drive with the specified ID already exists, updates its state based on new input. Will fail if update is not possible. operationId: putGuestDriveByID parameters: - name: drive_id in: path description: The id of the guest drive required: true type: string - name: body in: body description: Guest drive properties required: true schema: $ref: "#/definitions/Drive" responses: 204: description: Drive created/updated 400: description: Drive cannot be created/updated due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error. schema: $ref: "#/definitions/Error" patch: summary: Updates the properties of a drive. Post-boot only. description: Updates the properties of the drive with the ID specified by drive_id path parameter. Will fail if update is not possible. operationId: patchGuestDriveByID parameters: - name: drive_id in: path description: The id of the guest drive required: true type: string - name: body in: body description: Guest drive properties required: true schema: $ref: "#/definitions/PartialDrive" responses: 204: description: Drive updated 400: description: Drive cannot be updated due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error. schema: $ref: "#/definitions/Error" /pmem/{id}: put: summary: Creates or updates a pmem device. Pre-boot only. description: Creates new pmem device with ID specified by id parameter. If a pmem device with the specified ID already exists, updates its state based on new input. Will fail if update is not possible. operationId: putGuestPmemByID parameters: - name: id in: path description: The id of the guest pmem device required: true type: string - name: body in: body description: Guest pmem device properties required: true schema: $ref: "#/definitions/Pmem" responses: 204: description: Pmem device is created/updated 400: description: Pmem device cannot be created/updated due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error. schema: $ref: "#/definitions/Error" /logger: put: summary: Initializes the logger by specifying a named pipe or a file for the logs output. operationId: putLogger parameters: - name: body in: body description: Logging system description required: true schema: $ref: "#/definitions/Logger" responses: 204: description: Logger created. 400: description: Logger cannot be initialized due to bad input. schema: $ref: "#/definitions/Error" default: description: Internal server error. schema: $ref: "#/definitions/Error" /machine-config: get: summary: Gets the machine configuration of the VM. description: Gets the machine configuration of the VM. When called before the PUT operation, it will return the default values for the vCPU count (=1), memory size (=128 MiB). By default SMT is disabled and there is no CPU Template. operationId: getMachineConfiguration responses: 200: description: OK schema: $ref: "#/definitions/MachineConfiguration" default: description: Internal server error schema: $ref: "#/definitions/Error" put: summary: Updates the Machine Configuration of the VM. Pre-boot only. description: Updates the Virtual Machine Configuration with the specified input. Firecracker starts with default values for vCPU count (=1) and memory size (=128 MiB). The vCPU count is restricted to the [1, 32] range. With SMT enabled, the vCPU count is required to be either 1 or an even number in the range. otherwise there are no restrictions regarding the vCPU count. If 2M hugetlbfs pages are specified, then `mem_size_mib` must be a multiple of 2. If any of the parameters has an incorrect value, the whole update fails. All parameters that are optional and are not specified are set to their default values (smt = false, track_dirty_pages = false, cpu_template = None, huge_pages = None). operationId: putMachineConfiguration parameters: - name: body in: body description: Machine Configuration Parameters schema: $ref: "#/definitions/MachineConfiguration" responses: 204: description: Machine Configuration created/updated 400: description: Machine Configuration cannot be updated due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" patch: summary: Partially updates the Machine Configuration of the VM. Pre-boot only. description: Partially updates the Virtual Machine Configuration with the specified input. If any of the parameters has an incorrect value, the whole update fails. operationId: patchMachineConfiguration parameters: - name: body in: body description: A subset of Machine Configuration Parameters schema: $ref: "#/definitions/MachineConfiguration" responses: 204: description: Machine Configuration created/updated 400: description: Machine Configuration cannot be updated due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /metrics: put: summary: Initializes the metrics system by specifying a named pipe or a file for the metrics output. operationId: putMetrics parameters: - name: body in: body description: Metrics system description required: true schema: $ref: "#/definitions/Metrics" responses: 204: description: Metrics system created. 400: description: Metrics system cannot be initialized due to bad input request or metrics system already initialized. schema: $ref: "#/definitions/Error" default: description: Internal server error. schema: $ref: "#/definitions/Error" /mmds: put: summary: Creates a MMDS (Microvm Metadata Service) data store. operationId: putMmds parameters: - name: body in: body description: The MMDS data store as JSON. schema: $ref: "#/definitions/MmdsContentsObject" responses: 204: description: MMDS data store created/updated. 400: description: MMDS data store cannot be created due to bad input. schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" patch: summary: Updates the MMDS data store. operationId: patchMmds parameters: - name: body in: body description: The MMDS data store patch JSON. schema: $ref: "#/definitions/MmdsContentsObject" responses: 204: description: MMDS data store updated. 400: description: MMDS data store cannot be updated due to bad input. schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" get: summary: Get the MMDS data store. operationId: getMmds responses: 200: description: The MMDS data store JSON. schema: type: object additionalProperties: true 404: description: The MMDS data store content can not be found. schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /mmds/config: put: summary: Set MMDS configuration. Pre-boot only. operationId: putMmdsConfig description: Configures MMDS version, IPv4 address used by the MMDS network stack and interfaces that allow MMDS requests. parameters: - name: body in: body description: The MMDS configuration as JSON. required: true schema: $ref: "#/definitions/MmdsConfig" responses: 204: description: MMDS configuration was created/updated. 400: description: MMDS configuration cannot be updated due to bad input. schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /entropy: put: summary: Creates an entropy device. Pre-boot only. description: Enables an entropy device that provides high-quality random data to the guest. operationId: putEntropyDevice parameters: - name: body in: body description: Guest entropy device properties required: true schema: $ref: "#/definitions/EntropyDevice" responses: 204: description: Entropy device created default: description: Internal server error schema: $ref: "#/definitions/Error" /serial: put: summary: Configures the serial console operationId: putSerialDevice description: Configure the serial console, which the guest can write its kernel logs to. Has no effect if the serial console is not also enabled on the guest kernel command line parameters: - name: body in: body description: Serial console properties required: true schema: $ref: "#/definitions/SerialDevice" responses: 204: description: Serial device configured default: description: Internal server error schema: $ref: "#/definitions/Error" /hotplug/memory: put: summary: Configures the hotpluggable memory operationId: putMemoryHotplug description: Configure the hotpluggable memory, which is a virtio-mem device, with an associated memory area that can be hot(un)plugged in the guest on demand using the PATCH API. parameters: - name: body in: body description: Hotpluggable memory configuration required: true schema: $ref: "#/definitions/MemoryHotplugConfig" responses: 204: description: Hotpluggable memory configured default: description: Internal server error schema: $ref: "#/definitions/Error" patch: summary: Updates the size of the hotpluggable memory region operationId: patchMemoryHotplug description: Updates the size of the hotpluggable memory region. The guest will plug and unplug memory to hit the requested memory. parameters: - name: body in: body description: Hotpluggable memory size update required: true schema: $ref: "#/definitions/MemoryHotplugSizeUpdate" responses: 204: description: Hotpluggable memory configured default: description: Internal server error schema: $ref: "#/definitions/Error" get: summary: Retrieves the status of the hotpluggable memory operationId: getMemoryHotplug description: Reuturn the status of the hotpluggable memory. This can be used to follow the progress of the guest after a PATCH API. responses: 200: description: OK schema: $ref: "#/definitions/MemoryHotplugStatus" default: description: Internal server error schema: $ref: "#/definitions/Error" /network-interfaces/{iface_id}: put: summary: Creates a network interface. Pre-boot only. description: Creates new network interface with ID specified by iface_id path parameter. operationId: putGuestNetworkInterfaceByID parameters: - name: iface_id in: path description: The id of the guest network interface required: true type: string - name: body in: body description: Guest network interface properties required: true schema: $ref: "#/definitions/NetworkInterface" responses: 204: description: Network interface created/updated 400: description: Network interface cannot be created due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" patch: summary: Updates the rate limiters applied to a network interface. Post-boot only. description: Updates the rate limiters applied to a network interface. operationId: patchGuestNetworkInterfaceByID parameters: - name: iface_id in: path description: The id of the guest network interface required: true type: string - name: body in: body description: A subset of the guest network interface properties required: true schema: $ref: "#/definitions/PartialNetworkInterface" responses: 204: description: Network interface updated 400: description: Network interface cannot be updated due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /snapshot/create: put: summary: Creates a full or diff snapshot. Post-boot only. description: Creates a snapshot of the microVM state. The microVM should be in the `Paused` state. operationId: createSnapshot parameters: - name: body in: body description: The configuration used for creating a snapshot. required: true schema: $ref: "#/definitions/SnapshotCreateParams" responses: 204: description: Snapshot created 400: description: Snapshot cannot be created due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /snapshot/load: put: summary: Loads a snapshot. Pre-boot only. description: Loads the microVM state from a snapshot. Only accepted on a fresh Firecracker process (before configuring any resource other than the Logger and Metrics). operationId: loadSnapshot parameters: - name: body in: body description: The configuration used for loading a snapshot. required: true schema: $ref: "#/definitions/SnapshotLoadParams" responses: 204: description: Snapshot loaded 400: description: Snapshot cannot be loaded due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /version: get: summary: Gets the Firecracker version. operationId: getFirecrackerVersion responses: 200: description: OK schema: $ref: "#/definitions/FirecrackerVersion" default: description: Internal server error schema: $ref: "#/definitions/Error" /vm: patch: summary: Updates the microVM state. description: Sets the desired state (Paused or Resumed) for the microVM. operationId: patchVm parameters: - name: body in: body description: The microVM state required: true schema: $ref: "#/definitions/Vm" responses: 204: description: Vm state updated 400: description: Vm state cannot be updated due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" /vm/config: get: summary: Gets the full VM configuration. description: Gets configuration for all VM resources. If the VM is restored from a snapshot, the boot-source, machine-config.smt and machine-config.cpu_template will be empty. operationId: getExportVmConfig responses: 200: description: OK schema: $ref: "#/definitions/FullVmConfiguration" default: description: Internal server error schema: $ref: "#/definitions/Error" /vsock: put: summary: Creates/updates a vsock device. Pre-boot only. description: The first call creates the device with the configuration specified in body. Subsequent calls will update the device configuration. May fail if update is not possible. operationId: putGuestVsock parameters: - name: body in: body description: Guest vsock properties required: true schema: $ref: "#/definitions/Vsock" responses: 204: description: Vsock created/updated 400: description: Vsock cannot be created due to bad input schema: $ref: "#/definitions/Error" default: description: Internal server error schema: $ref: "#/definitions/Error" definitions: Balloon: type: object required: - amount_mib - deflate_on_oom description: Balloon device descriptor. properties: amount_mib: type: integer description: Target balloon size in MiB. deflate_on_oom: type: boolean description: Whether the balloon should deflate when the guest has memory pressure. stats_polling_interval_s: type: integer description: Interval in seconds between refreshing statistics. A non-zero value will enable the statistics. Defaults to 0. free_page_hinting: type: boolean description: Whether the free page hinting feature is enabled. free_page_reporting: type: boolean description: Whether the free page reporting feature is enabled. BalloonUpdate: type: object required: - amount_mib description: Balloon device descriptor. properties: amount_mib: type: integer description: Target balloon size in MiB. BalloonStats: type: object description: Describes the balloon device statistics. required: - target_pages - actual_pages - target_mib - actual_mib properties: target_pages: description: Target number of pages the device aims to hold. type: integer actual_pages: description: Actual number of pages the device is holding. type: integer target_mib: description: Target amount of memory (in MiB) the device aims to hold. type: integer actual_mib: description: Actual amount of memory (in MiB) the device is holding. type: integer swap_in: description: The amount of memory that has been swapped in (in bytes). type: integer format: int64 swap_out: description: The amount of memory that has been swapped out to disk (in bytes). type: integer format: int64 major_faults: description: The number of major page faults that have occurred. type: integer format: int64 minor_faults: description: The number of minor page faults that have occurred. type: integer format: int64 free_memory: description: The amount of memory not being used for any purpose (in bytes). type: integer format: int64 total_memory: description: The total amount of memory available (in bytes). type: integer format: int64 available_memory: description: An estimate of how much memory is available (in bytes) for starting new applications, without pushing the system to swap. type: integer format: int64 disk_caches: description: The amount of memory, in bytes, that can be quickly reclaimed without additional I/O. Typically these pages are used for caching files from disk. type: integer format: int64 hugetlb_allocations: description: The number of successful hugetlb page allocations in the guest. type: integer format: int64 hugetlb_failures: description: The number of failed hugetlb page allocations in the guest. type: integer format: int64 oom_kill: description: OOM killer invocations, indicating critical memory pressure. type: integer format: int64 alloc_stall: description: Counter of Allocation enter a slow path to gain more memory page. The reclaim/scan metrics can reveal what is actually happening. type: integer format: int64 async_scan: description: Amount of memory scanned asynchronously. type: integer format: int64 direct_scan: description: Amount of memory scanned directly. type: integer format: int64 async_reclaim: description: Amount of memory reclaimed asynchronously. type: integer format: int64 direct_reclaim: description: Amount of memory reclaimed directly. type: integer format: int64 BalloonStartCmd: type: object description: Command used to start a free page hinting run. properties: acknowledge_on_stop: description: If Firecracker should automatically acknowledge when the guest submits a done cmd. type: boolean BalloonHintingStatus: type: object description: Describes the free page hinting status. required: - host_cmd properties: host_cmd: description: The last command issued by the host. type: integer guest_cmd: description: The last command provided by the guest. type: integer BalloonStatsUpdate: type: object required: - stats_polling_interval_s description: Update the statistics polling interval, with the first statistics update scheduled immediately. Statistics cannot be turned on/off after boot. properties: stats_polling_interval_s: type: integer description: Interval in seconds between refreshing statistics. BootSource: type: object required: - kernel_image_path description: Boot source descriptor. properties: boot_args: type: string description: Kernel boot arguments initrd_path: type: string description: Host level path to the initrd image used to boot the guest kernel_image_path: type: string description: Host level path to the kernel image used to boot the guest CpuTemplate: type: string description: The CPU Template defines a set of flags to be disabled from the microvm so that the features exposed to the guest are the same as in the selected instance type. This parameter has been deprecated and it will be removed in future Firecracker release. enum: - C3 - T2 - T2S - T2CL - T2A - V1N1 - None default: "None" CpuConfig: type: object description: The CPU configuration template defines a set of bit maps as modifiers of flags accessed by register to be disabled/enabled for the microvm. properties: kvm_capabilities: type: array description: A collection of KVM capabilities to be added or removed (both x86_64 and aarch64) items: type: string description: KVM capability as a numeric string. Prefix with '!' to remove capability. Example "121" (add) or "!121" (remove) cpuid_modifiers: type: array description: A collection of CPUID leaf modifiers (x86_64 only) items: $ref: "#/definitions/CpuidLeafModifier" msr_modifiers: type: array description: A collection of model specific register modifiers (x86_64 only) items: $ref: "#/definitions/MsrModifier" reg_modifiers: type: array description: A collection of register modifiers (aarch64 only) items: $ref: "#/definitions/ArmRegisterModifier" vcpu_features: type: array description: A collection of vCPU features to be modified (aarch64 only) items: $ref: "#/definitions/VcpuFeatures" CpuidLeafModifier: type: object description: Modifier for a CPUID leaf and subleaf (x86_64) required: - leaf - subleaf - flags - modifiers properties: leaf: type: string description: CPUID leaf index as hex, binary, or decimal string (e.g., "0x0", "0b0", "0")) subleaf: type: string description: CPUID subleaf index as hex, binary, or decimal string (e.g., "0x0", "0b0", "0") flags: type: integer format: int32 description: KVM feature flags for this leaf-subleaf modifiers: type: array description: Register modifiers for this CPUID leaf items: $ref: "#/definitions/CpuidRegisterModifier" CpuidRegisterModifier: type: object description: Modifier for a specific CPUID register within a leaf (x86_64) required: - register - bitmap properties: register: type: string description: Target CPUID register name enum: - eax - ebx - ecx - edx bitmap: type: string description: 32-bit bitmap string defining which bits to modify. Format is "0b" followed by 32 characters where '0' = clear bit, '1' = set bit, 'x' = don't modify. Example "0b00000000000000000000000000000001" or "0bxxxxxxxxxxxxxxxxxxxxxxxxxxxx0001" MsrModifier: type: object description: Modifier for a model specific register (x86_64) required: - addr - bitmap properties: addr: type: string description: 32-bit MSR address as hex, binary, or decimal string (e.g., "0x10a", "0b100001010", "266") bitmap: type: string description: 64-bit bitmap string defining which bits to modify. Format is "0b" followed by 64 characters where '0' = clear bit, '1' = set bit, 'x' = don't modify. Underscores can be used for readability. Example "0b0000000000000000000000000000000000000000000000000000000000000001" ArmRegisterModifier: type: object description: Modifier for an ARM register (aarch64) required: - addr - bitmap properties: addr: type: string description: 64-bit register address as hex, binary, or decimal string (e.g., "0x0", "0b0", "0") bitmap: type: string description: 128-bit bitmap string defining which bits to modify. Format is "0b" followed by up to 128 characters where '0' = clear bit, '1' = set bit, 'x' = don't modify. Underscores can be used for readability. Example "0b0000000000000000000000000000000000000000000000000000000000000001" VcpuFeatures: type: object description: vCPU feature modifier (aarch64) required: - index - bitmap properties: index: type: integer format: int32 description: Index in the kvm_vcpu_init.features array bitmap: type: string description: 32-bit bitmap string defining which bits to modify. Format is "0b" followed by 32 characters where '0' = clear bit, '1' = set bit, 'x' = don't modify. Example "0b00000000000000000000000001100000" Drive: type: object required: - drive_id - is_root_device properties: drive_id: type: string partuuid: type: string description: Represents the unique id of the boot partition of this device. It is optional and it will be taken into account only if the is_root_device field is true. is_root_device: type: boolean cache_type: type: string description: Represents the caching strategy for the block device. enum: ["Unsafe", "Writeback"] default: "Unsafe" # VirtioBlock specific parameters is_read_only: type: boolean description: Is block read only. This field is required for virtio-block config and should be omitted for vhost-user-block configuration. path_on_host: type: string description: Host level path for the guest drive. This field is required for virtio-block config and should be omitted for vhost-user-block configuration. rate_limiter: $ref: "#/definitions/RateLimiter" io_engine: type: string description: Type of the IO engine used by the device. "Async" is supported on host kernels newer than 5.10.51. This field is optional for virtio-block config and should be omitted for vhost-user-block configuration. enum: ["Sync", "Async"] default: "Sync" # VhostUserBlock specific parameters socket: type: string description: Path to the socket of vhost-user-block backend. This field is required for vhost-user-block config should be omitted for virtio-block configuration. Pmem: type: object required: - id - path_on_host properties: id: type: string description: Identificator for this device. path_on_host: type: string description: Host level path for the virtio-pmem device to use as a backing file. root_device: type: boolean description: Flag to make this device be the root device for VM boot. Setting this flag will fail if there is another device configured to be a root device already. read_only: type: boolean description: Flag to map backing file in read-only mode. Error: type: object properties: fault_message: type: string description: A description of the error condition readOnly: true FullVmConfiguration: type: object properties: balloon: $ref: "#/definitions/Balloon" drives: type: array description: Configurations for all block devices. items: $ref: "#/definitions/Drive" boot-source: $ref: "#/definitions/BootSource" cpu-config: $ref: "#/definitions/CpuConfig" logger: $ref: "#/definitions/Logger" machine-config: $ref: "#/definitions/MachineConfiguration" metrics: $ref: "#/definitions/Metrics" memory-hotplug: $ref: "#/definitions/MemoryHotplugConfig" mmds-config: $ref: "#/definitions/MmdsConfig" network-interfaces: type: array description: Configurations for all net devices. items: $ref: "#/definitions/NetworkInterface" pmem: type: array description: Configurations for all pmem devices. items: $ref: "#/definitions/Pmem" vsock: $ref: "#/definitions/Vsock" entropy: $ref: "#/definitions/EntropyDevice" InstanceActionInfo: type: object description: Variant wrapper containing the real action. required: - action_type properties: action_type: description: Enumeration indicating what type of action is contained in the payload type: string enum: - FlushMetrics - InstanceStart - SendCtrlAltDel InstanceInfo: type: object description: Describes MicroVM instance information. required: - app_name - id - state - vmm_version properties: app_name: description: Application name. type: string id: description: MicroVM / instance ID. type: string state: description: The current detailed state (Not started, Running, Paused) of the Firecracker instance. This value is read-only for the control-plane. type: string enum: - Not started - Running - Paused vmm_version: description: MicroVM hypervisor build version. type: string Logger: type: object description: Describes the configuration option for the logging capability. properties: level: type: string description: Set the level. The possible values are case-insensitive. enum: [Error, Warning, Info, Debug, Trace, Off] default: Info log_path: type: string description: Path to the named pipe or file for the human readable log output. show_level: type: boolean description: Whether or not to output the level in the logs. default: false show_log_origin: type: boolean description: Whether or not to include the file path and line number of the log's origin. default: false module: type: string description: The module path to filter log messages by. example: api_server::request MachineConfiguration: type: object description: Describes the number of vCPUs, memory size, SMT capabilities, huge page configuration and the CPU template. required: - mem_size_mib - vcpu_count properties: cpu_template: $ref: "#/definitions/CpuTemplate" # gdb_socket_path: # type: string # description: Path to the GDB socket. Requires the gdb feature to be enabled. smt: type: boolean description: Flag for enabling/disabling simultaneous multithreading. Can be enabled only on x86. default: false mem_size_mib: type: integer description: Memory size of VM track_dirty_pages: type: boolean description: Enable dirty page tracking. If this is enabled, then incremental guest memory snapshots can be created. These belong to diff snapshots, which contain, besides the microVM state, only the memory dirtied since a previous snapshot. Full snapshots each contain a full copy of the guest memory. default: false vcpu_count: type: integer minimum: 1 maximum: 32 description: Number of vCPUs (either 1 or an even number) huge_pages: type: string enum: - None - 2M description: Which huge pages configuration (if any) should be used to back guest memory. MemoryBackend: type: object required: - backend_type - backend_path properties: backend_type: type: string enum: - File - Uffd backend_path: type: string description: Based on 'backend_type' it is either 1) Path to the file that contains the guest memory to be loaded 2) Path to the UDS where a process is listening for a UFFD initialization control payload and open file descriptor that it can use to serve this process's guest memory page faults Metrics: type: object description: Describes the configuration option for the metrics capability. required: - metrics_path properties: metrics_path: type: string description: Path to the named pipe or file where the JSON-formatted metrics are flushed. MmdsConfig: type: object description: Defines the MMDS configuration. required: - network_interfaces properties: version: description: Enumeration indicating the MMDS version to be configured. type: string enum: - V1 - V2 default: V1 network_interfaces: description: List of the network interface IDs capable of forwarding packets to the MMDS. Network interface IDs mentioned must be valid at the time of this request. The net device model will reply to HTTP GET requests sent to the MMDS address via the interfaces mentioned. In this case, both ARP requests and TCP segments heading to `ipv4_address` are intercepted by the device model, and do not reach the associated TAP device. type: array items: type: string ipv4_address: type: string format: "169.254.([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-4]).([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])" default: "169.254.169.254" description: A valid IPv4 link-local address. imds_compat: type: boolean description: MMDS operates compatibly with EC2 IMDS (i.e. responds "text/plain" content regardless of Accept header in requests). default: false MmdsContentsObject: type: object description: Describes the contents of MMDS in JSON format. additionalProperties: true NetworkInterface: type: object description: Defines a network interface. required: - host_dev_name - iface_id properties: guest_mac: type: string host_dev_name: type: string description: Host level path for the guest network interface iface_id: type: string rx_rate_limiter: $ref: "#/definitions/RateLimiter" tx_rate_limiter: $ref: "#/definitions/RateLimiter" PartialDrive: type: object required: - drive_id properties: drive_id: type: string path_on_host: type: string description: Host level path for the guest drive. This field is optional for virtio-block config and should be omitted for vhost-user-block configuration. rate_limiter: $ref: "#/definitions/RateLimiter" PartialNetworkInterface: type: object description: Defines a partial network interface structure, used to update the rate limiters for that interface, after microvm start. required: - iface_id properties: iface_id: type: string rx_rate_limiter: $ref: "#/definitions/RateLimiter" tx_rate_limiter: $ref: "#/definitions/RateLimiter" RateLimiter: type: object description: Defines an IO rate limiter with independent bytes/s and ops/s limits. Limits are defined by configuring each of the _bandwidth_ and _ops_ token buckets. This field is optional for virtio-block config and should be omitted for vhost-user-block configuration. properties: bandwidth: $ref: "#/definitions/TokenBucket" description: Token bucket with bytes as tokens ops: $ref: "#/definitions/TokenBucket" description: Token bucket with operations as tokens SnapshotCreateParams: type: object required: - mem_file_path - snapshot_path properties: mem_file_path: type: string description: Path to the file that will contain the guest memory. snapshot_path: type: string description: Path to the file that will contain the microVM state. snapshot_type: type: string enum: - Full - Diff description: Type of snapshot to create. It is optional and by default, a full snapshot is created. NetworkOverride: type: object description: Allows for changing the backing TAP device of a network interface during snapshot restore. required: - iface_id - host_dev_name properties: iface_id: type: string description: The name of the interface to modify host_dev_name: type: string description: The new host device of the interface VsockOverride: type: object description: Allows for changing the backing Unix Domain Socket of a vsock device during snapshot restore. required: - uds_path properties: uds_path: type: string description: The new path for the backing Unix Domain Socket. SnapshotLoadParams: type: object description: Defines the configuration used for handling snapshot resume. Exactly one of the two `mem_*` fields must be present in the body of the request. required: - snapshot_path properties: enable_diff_snapshots: type: boolean description: (Deprecated) Enable dirty page tracking to improve space efficiency of diff snapshots track_dirty_pages: type: boolean description: Enable dirty page tracking to improve space efficiency of diff snapshots mem_file_path: type: string description: Path to the file that contains the guest memory to be loaded. It is only allowed if `mem_backend` is not present. This parameter has been deprecated and it will be removed in future Firecracker release. mem_backend: $ref: "#/definitions/MemoryBackend" description: Configuration for the backend that handles memory load. If this field is specified, `mem_file_path` is forbidden. Either `mem_backend` or `mem_file_path` must be present at a time. snapshot_path: type: string description: Path to the file that contains the microVM state to be loaded. resume_vm: type: boolean description: When set to true, the vm is also resumed if the snapshot load is successful. network_overrides: type: array description: Network host device names to override items: $ref: "#/definitions/NetworkOverride" vsock_override: $ref: "#/definitions/VsockOverride" description: Overrides the vsock device's UDS path on snapshot restore. This is useful for restoring a snapshot with a different socket path than the one used when the snapshot was created. For example, when the original socket path is no longer available or when deploying to a different environment. TokenBucket: type: object description: Defines a token bucket with a maximum capacity (size), an initial burst size (one_time_burst) and an interval for refilling purposes (refill_time). The refill-rate is derived from size and refill_time, and it is the constant rate at which the tokens replenish. The refill process only starts happening after the initial burst budget is consumed. Consumption from the token bucket is unbounded in speed which allows for bursts bound in size by the amount of tokens available. Once the token bucket is empty, consumption speed is bound by the refill_rate. required: - refill_time - size properties: one_time_burst: type: integer format: int64 description: The initial size of a token bucket. minimum: 0 refill_time: type: integer format: int64 description: The amount of milliseconds it takes for the bucket to refill. minimum: 0 size: type: integer format: int64 description: The total number of tokens this bucket can hold. minimum: 0 Vm: type: object description: Defines the microVM running state. It is especially useful in the snapshotting context. required: - state properties: state: type: string enum: - Paused - Resumed EntropyDevice: type: object description: Defines an entropy device. properties: rate_limiter: $ref: "#/definitions/RateLimiter" SerialDevice: type: object description: The configuration of the serial device properties: serial_out_path: type: string description: Path to a file or named pipe on the host to which serial output should be written. MemoryHotplugConfig: type: object description: The configuration of the hotpluggable memory device (virtio-mem) properties: total_size_mib: type: integer description: Total size of the hotpluggable memory in MiB. slot_size_mib: type: integer default: 128 minimum: 128 description: Slot size for the hotpluggable memory in MiB. This will determine the granularity of hot-plug memory from the host. Refer to the device documentation on how to tune this value. block_size_mib: type: integer default: 2 minimum: 2 description: (Logical) Block size for the hotpluggable memory in MiB. This will determine the logical granularity of hot-plug memory for the guest. Refer to the device documentation on how to tune this value. MemoryHotplugSizeUpdate: type: object description: An update to the size of the hotpluggable memory region. properties: requested_size_mib: type: integer description: New target region size. MemoryHotplugStatus: type: object description: The status of the hotpluggable memory device (virtio-mem) properties: total_size_mib: type: integer description: Total size of the hotpluggable memory in MiB. slot_size_mib: type: integer description: Slot size for the hotpluggable memory in MiB. block_size_mib: type: integer description: (Logical) Block size for the hotpluggable memory in MiB. plugged_size_mib: type: integer description: Plugged size for the hotpluggable memory in MiB. requested_size_mib: type: integer description: Requested size for the hotpluggable memory in MiB. FirecrackerVersion: type: object description: Describes the Firecracker version. required: - firecracker_version properties: firecracker_version: description: Firecracker build version. type: string Vsock: type: object description: Defines a vsock device, backed by a set of Unix Domain Sockets, on the host side. For host-initiated connections, Firecracker will be listening on the Unix socket identified by the path `uds_path`. Firecracker will create this socket, bind and listen on it. Host-initiated connections will be performed by connection to this socket and issuing a connection forwarding request to the desired guest-side vsock port (i.e. `CONNECT 52\n`, to connect to port 52). For guest-initiated connections, Firecracker will expect host software to be bound and listening on Unix sockets at `uds_path_`. E.g. "/path/to/host_vsock.sock_52" for port number 52. required: - guest_cid - uds_path properties: guest_cid: type: integer minimum: 3 description: Guest Vsock CID uds_path: type: string description: Path to UNIX domain socket, used to proxy vsock connections. vsock_id: type: string description: This parameter has been deprecated and it will be removed in future Firecracker release. ================================================ FILE: src/firecracker/tests/verify_dependencies.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow(clippy::tests_outside_test_module)] use std::collections::HashMap; use std::fmt::Debug; use std::path::Path; use cargo_toml::{Dependency, DepsSet, Manifest}; use regex::Regex; #[test] fn test_no_comparison_requirements() { // HashMap mapping crate -> [(violating dependency, specified version)] let mut violating_dependencies = HashMap::new(); let src_firecracker_path = std::env::var("CARGO_MANIFEST_DIR").unwrap(); let src_path = format!("{}/..", src_firecracker_path); for fc_crate in std::fs::read_dir(src_path).unwrap() { let fc_crate = fc_crate.unwrap(); if fc_crate.metadata().unwrap().is_dir() { let violating_in_crate = violating_dependencies_of_cargo_toml(fc_crate.path().join("Cargo.toml")); if !violating_in_crate.is_empty() { violating_dependencies.insert( fc_crate.file_name().into_string().unwrap(), violating_in_crate, ); } } } assert_eq!( violating_dependencies, HashMap::new(), "Dependencies should not be specified as comparison requirements. \ They should use caret requirements. See: \ https://doc.rust-lang.org/cargo/reference/specifying-dependencies.html" ); } /// Parses the specified Cargo.toml file and returns any dependencies specified using a comparison /// requirements. /// /// The return value maps the name of violating dependencies to the specified version fn violating_dependencies_of_cargo_toml + Debug>( path: T, ) -> HashMap { let manifest = Manifest::from_path(path).unwrap(); violating_dependencies_of_depsset(manifest.dependencies) .chain(violating_dependencies_of_depsset(manifest.dev_dependencies)) .chain(violating_dependencies_of_depsset( manifest.build_dependencies, )) .collect() } /// Returns an iterator over all dependencies in the given DepsSet specified using comparison /// requirements /// /// The iterator produces tuples of the form (violating dependency, specified version) fn violating_dependencies_of_depsset(depsset: DepsSet) -> impl Iterator { depsset .into_iter() .filter_map(|(name, dependency)| { match dependency { Dependency::Simple(version) => Some((name, version)), // dependencies specified as `libc = "0.2.117"` Dependency::Detailed(dependency_detail) => { dependency_detail.version.map(|version| (name, version)) } // dependencies specified without version, such as `libc = {path = "../libc"} _ => None, } }) .filter(|(_, version)| !Regex::new(r"^=?\d*\.\d*\.\d*$").unwrap().is_match(version)) } ================================================ FILE: src/jailer/Cargo.toml ================================================ [package] name = "jailer" version = "1.16.0-dev" authors = ["Amazon Firecracker team "] edition = "2024" description = "Process for starting Firecracker in production scenarios; applies a cgroup/namespace isolation barrier and then drops privileges." homepage = "https://firecracker-microvm.github.io/" license = "Apache-2.0" [[bin]] name = "jailer" bench = false [features] tracing = ["log-instrument", "utils/tracing"] [dependencies] libc = "0.2.183" log-instrument = { path = "../log-instrument", optional = true } regex = { version = "1.12.3", default-features = false, features = ["std"] } thiserror = "2.0.18" vmm-sys-util = "0.15.0" utils = { path = "../utils" } [lints] workspace = true ================================================ FILE: src/jailer/src/cgroup.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::hash_map::Entry::{Occupied, Vacant}; use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::fs::{self, File}; use std::io::{BufRead, BufReader}; use std::path::{Path, PathBuf}; use std::process; use regex::Regex; use crate::{JailerError, readln_special, writeln_special}; // Holds information on a cgroup mount point discovered on the system #[derive(Debug)] struct CgroupMountPoint { dir: String, options: String, } // Holds a cache of discovered mount points and cgroup hierarchies #[derive(Debug)] struct CgroupHierarchies { hierarchies: HashMap, mount_points: Vec, } impl CgroupHierarchies { // Constructs a new cache of hierarchies and mount points // It will discover cgroup mount points and hierarchies configured // on the system and cache the info required to create cgroups later // within this hierarchies fn new(ver: u8, proc_mounts_path: &str) -> Result { let mut h = CgroupHierarchies { hierarchies: HashMap::new(), mount_points: Vec::new(), }; // search PROC_MOUNTS for cgroup mount points let f = File::open(proc_mounts_path) .map_err(|err| JailerError::FileOpen(PathBuf::from(proc_mounts_path), err))?; // Regex courtesy of Filippo. // This will match on each line from /proc/mounts for both v1 and v2 mount points. // // /proc/mounts cointains lines that look like this: // cgroup2 /sys/fs/cgroup/unified cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate 0 0 // cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,nosuid,nodev,noexec,relatime,cpu,cpuacct 0 0 // // This Regex will extract: // * "/sys/fs/cgroup/unified" in the "dir" capture group. // * "2" in the "ver" capture group as the cgroup version taken from "cgroup2"; for v1, // the "ver" capture group will be empty (len = 0). // * "[...],relatime,cpu,cpuacct" in the "options" capture group; this is used for // cgroupv1 to determine what controllers are mounted at the location. let re = Regex::new( r"^([a-z2]*)[[:space:]](?P.*)[[:space:]]cgroup(?P2?)[[:space:]](?P.*)[[:space:]]0[[:space:]]0$", ).map_err(JailerError::RegEx)?; for l in BufReader::new(f).lines() { let l = l.map_err(|err| JailerError::ReadLine(PathBuf::from(proc_mounts_path), err))?; if let Some(capture) = re.captures(&l) { if ver == 2 && capture["ver"].len() == 1 { // Found the cgroupv2 unified mountpoint; with cgroupsv2 there is only one // hierarchy so we insert it in the hashmap to use it later when creating // cgroups h.hierarchies .insert("unified".to_string(), PathBuf::from(&capture["dir"])); break; } else if ver == 1 && capture["ver"].is_empty() { // Found a cgroupv1 mountpoint; with cgroupsv1 we can have multiple hierarchies. // Since we don't know which one will be used, we cache the mountpoints now, // and will create the hierarchies on demand when a cgroup is built. h.mount_points.push(CgroupMountPoint { dir: String::from(&capture["dir"]), options: String::from(&capture["options"]), }); } } } if h.hierarchies.is_empty() && h.mount_points.is_empty() { Err(JailerError::CgroupHierarchyMissing( "No hierarchy found for this cgroup version.".to_string(), )) } else { Ok(h) } } // Returns the path to the root of the hierarchy for the controller specified // Cgroups for a controller are arranged in a hierarchy; multiple controllers // may share the same hierarchy fn get_v1_hierarchy_path(&mut self, controller: &str) -> Result<&PathBuf, JailerError> { // First try and see if the path is already discovered. match self.hierarchies.entry(controller.to_string()) { Occupied(entry) => Ok(entry.into_mut()), Vacant(entry) => { // Since the path for this controller type was not already discovered // we need to search through the mount points to find it let mut path = None; for m in self.mount_points.iter() { if m.options.split(',').any(|x| x == controller) { path = Some(PathBuf::from(&m.dir)); break; } } // It's possible that the controller is not mounted or a bad controller // name was specified. Return an error in this case match path { Some(p) => Ok(entry.insert(p)), None => Err(JailerError::CgroupControllerUnavailable( controller.to_string(), )), } } } } // Returns the path to the root of the hierarchy pub fn get_v2_hierarchy_path(&self) -> Result<&PathBuf, JailerError> { match self.hierarchies.get("unified") { Some(entry) => Ok(entry), None => Err(JailerError::CgroupHierarchyMissing( "cgroupsv2 hierarchy missing".to_string(), )), } } } // Allows creation of cgroups on the system for both versions #[derive(Debug)] pub struct CgroupConfigurationBuilder { hierarchies: CgroupHierarchies, cgroup_conf: CgroupConfiguration, } impl CgroupConfigurationBuilder { // Creates the builder object // It will initialize the CgroupHierarchy cache. pub fn new(ver: u8, proc_mounts_path: &str) -> Result { Ok(CgroupConfigurationBuilder { hierarchies: CgroupHierarchies::new(ver, proc_mounts_path)?, cgroup_conf: match ver { 1 => Ok(CgroupConfiguration::V1(HashMap::new())), 2 => Ok(CgroupConfiguration::V2(HashMap::new())), _ => Err(JailerError::CgroupInvalidVersion(ver.to_string())), }?, }) } // Adds a cgroup property to the configuration pub fn add_cgroup_property( &mut self, file: String, value: String, id: &str, parent_cg: &Path, ) -> Result<(), JailerError> { match self.cgroup_conf { CgroupConfiguration::V1(ref mut cgroup_conf_v1) => { let controller = get_controller_from_filename(&file)?; let path = self.hierarchies.get_v1_hierarchy_path(controller)?; let cgroup = cgroup_conf_v1 .entry(String::from(controller)) .or_insert(CgroupV1::new(id, parent_cg, path)?); cgroup.add_property(file, value)?; Ok(()) } CgroupConfiguration::V2(ref mut cgroup_conf_v2) => { let path = self.hierarchies.get_v2_hierarchy_path()?; let cgroup = cgroup_conf_v2 .entry(String::from("unified")) .or_insert(CgroupV2::new(id, parent_cg, path)?); cgroup.add_property(file, value)?; Ok(()) } } } pub fn build(self) -> CgroupConfiguration { self.cgroup_conf } // Returns the path to the unified controller pub fn get_v2_hierarchy_path(&self) -> Result<&PathBuf, JailerError> { self.hierarchies.get_v2_hierarchy_path() } } #[derive(Debug)] struct CgroupProperty { file: String, // file representing the cgroup (e.g cpuset.mems). value: String, // value that will be written into the file. } #[derive(Debug)] struct CgroupBase { properties: Vec, location: PathBuf, // microVM cgroup location for the specific controller. } #[derive(Debug)] pub struct CgroupV1 { base: CgroupBase, cg_parent_depth: u16, // depth of the nested cgroup hierarchy } #[derive(Debug)] pub struct CgroupV2 { base: CgroupBase, available_controllers: HashSet, } pub trait Cgroup: Debug { // Adds a property (file-value) to the group fn add_property(&mut self, file: String, value: String) -> Result<(), JailerError>; // Write the all cgroup property values into the cgroup property files. fn write_values(&self) -> Result<(), JailerError>; // This function will assign the process associated with the pid to the respective cgroup. fn attach_pid(&self) -> Result<(), JailerError>; } #[derive(Debug)] pub enum CgroupConfiguration { V1(HashMap), V2(HashMap), } impl CgroupConfiguration { pub fn setup(&self) -> Result<(), JailerError> { match self { Self::V1(conf) => setup_cgroup_conf(conf), Self::V2(conf) => setup_cgroup_conf(conf), } } } // If we call inherit_from_parent_aux(.../A/B/C, file, condition), the following will happen: // 1) If .../A/B/C/file does not exist, or if .../A/B/file does not exist, return an error. // 2) If .../A/B/file is not empty, write the first line of .../A/B/file into .../A/B/C/file // and return. // 3) If ../A/B/file exists but it is empty, call inherit_from_parent_aux(.../A/B, file, false). // 4) If .../A/B/file is no longer empty, write the first line of .../A/B/file into // .../A/B/C/file, and return. // 5) Otherwise, return an error. // How is this helpful? When creating cgroup folders for the jailer Firecracker instance, the jailer // will create a hierarchy that looks like //. Depending on each // particular cgroup controller, contains a number of configuration files. These are // not actually present on a disk; they are special files exposed by the controller, and they // usually contain a single line with some configuration value(s). When the "parent_cgroup" and // subfolders are created, configuration files with the same name appear automatically in the new // folders, but their contents are not always automatically populated. Moreover, // if //some_file is empty, then we cannot have a non-empty file with // at ///some_file. The inherit_from_parent function (which is based // on the following helper function) helps with propagating the values. // There is also a potential race condition mentioned below. Here is what it refers to: let's say we // start multiple jailer processes, and one of them calls // inherit_from_parent_aux(/A//id1, file, true), and hits case number 3) from the // list above, thus recursively calling inherit_from_parent_aux(/A/, file, false). // It's entirely possible there was another process in the exact same situations, and that process // gets to write something to /A//file first. In this case, the recursive call made // by the first process to inherit_from_parent_aux(/A/, file, false) may fail when // writing to /A//file, but we can still continue, because step 4) only cares about // the file no longer being empty, regardless of who actually got to populated its contents. fn inherit_from_parent_aux( path: &Path, file_name: &str, retry_depth: u16, ) -> Result<(), JailerError> { // The function with_file_name() replaces the last component of a path with the given name. let parent_file = path.with_file_name(file_name); let mut line = readln_special(&parent_file)?; if line.is_empty() { if retry_depth > 0 { // We have to borrow "parent" from "parent_file" as opposed to "path", because then // we wouldn't be able to mutably borrow path at the end of this function (at least not // according to how the Rust borrow checker operates right now :-s) let parent = parent_file .parent() .ok_or_else(|| JailerError::MissingParent(parent_file.clone()))?; // Trying to avoid the race condition described above. We don't care about the result, // because we check once more if line.is_empty() after the end of this block. let _ = inherit_from_parent_aux(parent, file_name, retry_depth - 1); line = readln_special(&parent_file)?; } if line.is_empty() { return Err(JailerError::CgroupInheritFromParent( path.to_path_buf(), file_name.to_string(), )); } } writeln_special(&path.join(file_name), &line)?; Ok(()) } fn inherit_from_parent(path: &Path, file_name: &str, depth: u16) -> Result<(), JailerError> { inherit_from_parent_aux(path, file_name, depth) } // Extract the controller name from the cgroup file. The cgroup file must follow // this format: .. fn get_controller_from_filename(file: &str) -> Result<&str, JailerError> { let v: Vec<&str> = file.split('.').collect(); // Check format . if v.len() < 2 { return Err(JailerError::CgroupInvalidFile(file.to_string())); } Ok(v[0]) } impl CgroupV1 { // Create a new cgroupsv1 controller pub fn new(id: &str, parent_cg: &Path, controller_path: &Path) -> Result { let mut path = controller_path.to_path_buf(); path.push(parent_cg); path.push(id); let mut depth = 0; for _ in parent_cg.components() { depth += 1; } Ok(CgroupV1 { base: CgroupBase { properties: Vec::new(), location: path, }, cg_parent_depth: depth, }) } } impl Cgroup for CgroupV1 { fn add_property(&mut self, file: String, value: String) -> Result<(), JailerError> { self.base.properties.push(CgroupProperty { file, value }); Ok(()) } fn write_values(&self) -> Result<(), JailerError> { // Create the cgroup directory for the controller. fs::create_dir_all(&self.base.location) .map_err(|err| JailerError::CreateDir(self.base.location.clone(), err))?; for property in self.base.properties.iter() { // Write the corresponding cgroup value. inherit_from_parent is used to // correctly propagate the value if not defined. inherit_from_parent(&self.base.location, &property.file, self.cg_parent_depth)?; writeln_special(&self.base.location.join(&property.file), &property.value)?; } Ok(()) } fn attach_pid(&self) -> Result<(), JailerError> { let pid = process::id(); let location = &self.base.location.join("tasks"); writeln_special(location, pid)?; Ok(()) } } impl CgroupV2 { // Enables the specified controller along the cgroup nested path. // To be able to use a leaf controller within a nested cgroup hierarchy, // the controller needs to be enabled by writing to the cgroup.subtree_control // of it's parent. This rule applies recursively. fn write_all_subtree_control

(path: P, controller: &str) -> Result<(), JailerError> where P: AsRef + Debug, { let cg_subtree_ctrl = path.as_ref().join("cgroup.subtree_control"); if !cg_subtree_ctrl.exists() { return Ok(()); } let parent = match path.as_ref().parent() { Some(p) => p, None => { writeln_special(&cg_subtree_ctrl, format!("+{}", &controller))?; return Ok(()); } }; Self::write_all_subtree_control(parent, controller)?; writeln_special(&cg_subtree_ctrl, format!("+{}", &controller)) } // Returns controllers that can be enabled from the cgroup path specified // by the mount_point parameter fn detect_available_controllers

(mount_point: P) -> HashSet where P: AsRef + Debug, { let mut controllers = HashSet::new(); let controller_list_file = mount_point.as_ref().join("cgroup.controllers"); let f = match File::open(controller_list_file) { Ok(f) => f, Err(_) => return controllers, }; for l in BufReader::new(f).lines().map_while(Result::ok) { for controller in l.split(' ') { controllers.insert(controller.to_string()); } } controllers } // Create a new cgroupsv2 controller pub fn new(id: &str, parent_cg: &Path, unified_path: &Path) -> Result { let mut path = unified_path.to_path_buf(); path.push(parent_cg); path.push(id); Ok(CgroupV2 { base: CgroupBase { properties: Vec::new(), location: path, }, available_controllers: Self::detect_available_controllers(unified_path), }) } } impl Cgroup for CgroupV2 { fn add_property(&mut self, file: String, value: String) -> Result<(), JailerError> { let controller = get_controller_from_filename(&file)?; if self.available_controllers.contains(controller) { self.base.properties.push(CgroupProperty { file, value }); Ok(()) } else { Err(JailerError::CgroupControllerUnavailable( controller.to_string(), )) } } fn write_values(&self) -> Result<(), JailerError> { let mut enabled_controllers: HashSet<&str> = HashSet::new(); // Create the cgroup directory for the controller. fs::create_dir_all(&self.base.location) .map_err(|err| JailerError::CreateDir(self.base.location.clone(), err))?; // Ok to unwrap since the path was just created. let parent = self.base.location.parent().unwrap(); for property in self.base.properties.iter() { let controller = get_controller_from_filename(&property.file)?; // enable controllers only once if !enabled_controllers.contains(controller) { // Enable the controller in all parent directories CgroupV2::write_all_subtree_control(parent, controller)?; enabled_controllers.insert(controller); } writeln_special(&self.base.location.join(&property.file), &property.value)?; } Ok(()) } fn attach_pid(&self) -> Result<(), JailerError> { let pid = process::id(); let location = &self.base.location.join("cgroup.procs"); writeln_special(location, pid)?; Ok(()) } } pub fn setup_cgroup_conf(conf: &HashMap) -> Result<(), JailerError> { // cgroups are iterated two times as some cgroups may require others (e.g cpuset requires // cpuset.mems and cpuset.cpus) to be set before attaching any pid. for cgroup in conf.values() { cgroup.write_values()?; } for cgroup in conf.values() { cgroup.attach_pid()?; } Ok(()) } #[cfg(test)] pub mod test_util { use std::fmt::Debug; use std::fs::{self, File, OpenOptions}; use std::io::Write; use std::path::{Path, PathBuf}; use vmm_sys_util::tempdir::TempDir; #[derive(Debug)] pub struct MockCgroupFs { mounts_file: File, // kept to clean up on Drop _mock_jailer_dir: TempDir, pub proc_mounts_path: PathBuf, pub sys_cgroups_path: PathBuf, } // Helper object that simulates the layout of the cgroup file system // This can be used for testing regardless of the availability of a particular // version of cgroups on the system impl MockCgroupFs { pub fn create_file_with_contents + Debug>( filename: P, contents: &str, ) -> Result<(), std::io::Error> { let mut file = OpenOptions::new() .read(true) .write(true) .create(true) .truncate(true) .open(&filename)?; writeln!(file, "{}", contents)?; Ok(()) } pub fn new() -> Result { let mock_jailer_dir = TempDir::new().unwrap(); let mock_proc_mounts = mock_jailer_dir.as_path().join("proc/mounts"); let mock_sys_cgroups = mock_jailer_dir.as_path().join("sys_cgroup"); // create a mock /proc/mounts file in a temporary directory fs::create_dir_all(mock_proc_mounts.parent().unwrap())?; let file = OpenOptions::new() .read(true) .write(true) .create(true) .truncate(true) .open(mock_proc_mounts.clone())?; Ok(MockCgroupFs { mounts_file: file, _mock_jailer_dir: mock_jailer_dir, proc_mounts_path: mock_proc_mounts, sys_cgroups_path: mock_sys_cgroups, }) } // Populate the mocked proc/mounts file with cgroupv2 entries // Also create a directory structure that simulates cgroupsv2 layout pub fn add_v2_mounts(&mut self) -> Result<(), std::io::Error> { writeln!( self.mounts_file, "cgroupv2 {}/unified cgroup2 rw,nosuid,nodev,noexec,relatime,nsdelegate 0 0", self.sys_cgroups_path.to_str().unwrap(), )?; let cg_unified_path = self.sys_cgroups_path.join("unified"); fs::create_dir_all(&cg_unified_path)?; Self::create_file_with_contents( cg_unified_path.join("cgroup.controllers"), "cpuset cpu io memory pids", )?; Self::create_file_with_contents(cg_unified_path.join("cgroup.subtree_control"), "")?; Ok(()) } // Populate the mocked proc/mounts file with cgroupv1 entries pub fn add_v1_mounts(&mut self) -> Result<(), std::io::Error> { let controllers = vec![ "memory", "net_cls,net_prio", "pids", "cpuset", "cpu,cpuacct", ]; for c in &controllers { writeln!( self.mounts_file, "cgroup {}/{} cgroup rw,nosuid,nodev,noexec,relatime,{} 0 0", self.sys_cgroups_path.to_str().unwrap(), c, c, )?; } Ok(()) } } } #[cfg(test)] mod tests { use std::fmt::Debug; use std::io::{BufReader, Write}; use std::path::PathBuf; use vmm_sys_util::tempdir::TempDir; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::cgroup::test_util::MockCgroupFs; // Utility function to read the first line in a file fn read_first_line

(filename: P) -> Result where P: AsRef + Debug, { let file = File::open(filename)?; let mut reader = BufReader::new(file); let mut buf = String::new(); reader.read_line(&mut buf)?; Ok(buf) } #[test] fn test_cgroup_conf_builder_invalid_version() { let mock_cgroups = MockCgroupFs::new().unwrap(); let builder = CgroupConfigurationBuilder::new(0, mock_cgroups.proc_mounts_path.to_str().unwrap()); builder.unwrap_err(); } #[test] fn test_cgroup_conf_builder_no_mounts() { let mock_cgroups = MockCgroupFs::new().unwrap(); let builder = CgroupConfigurationBuilder::new(1, mock_cgroups.proc_mounts_path.to_str().unwrap()); builder.unwrap_err(); } #[test] fn test_cgroup_conf_builder_v1() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); let builder = CgroupConfigurationBuilder::new(1, mock_cgroups.proc_mounts_path.to_str().unwrap()); builder.unwrap(); } #[test] fn test_cgroup_conf_builder_v2() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v2_mounts().unwrap(); let builder = CgroupConfigurationBuilder::new(2, mock_cgroups.proc_mounts_path.to_str().unwrap()); builder.unwrap(); } #[test] fn test_cgroup_conf_builder_v2_with_v1_mounts() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); let builder = CgroupConfigurationBuilder::new(2, mock_cgroups.proc_mounts_path.to_str().unwrap()); builder.unwrap_err(); } #[test] fn test_cgroup_conf_builder_v2_no_mounts() { let mock_cgroups = MockCgroupFs::new().unwrap(); let builder = CgroupConfigurationBuilder::new(2, mock_cgroups.proc_mounts_path.to_str().unwrap()); builder.unwrap_err(); } #[test] fn test_cgroup_conf_builder_v1_with_v2_mounts() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v2_mounts().unwrap(); let builder = CgroupConfigurationBuilder::new(1, mock_cgroups.proc_mounts_path.to_str().unwrap()); builder.unwrap_err(); } #[test] fn test_cgroup_conf_build() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); mock_cgroups.add_v2_mounts().unwrap(); for v in &[1, 2] { let mut builder = CgroupConfigurationBuilder::new( *v, mock_cgroups.proc_mounts_path.to_str().unwrap(), ) .unwrap(); builder .add_cgroup_property( "cpuset.mems".to_string(), "1".to_string(), "101", Path::new("fc_test_cg"), ) .unwrap(); builder.build(); } } #[test] fn test_cgroup_conf_build_invalid() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); mock_cgroups.add_v2_mounts().unwrap(); for v in &[1, 2] { let mut builder = CgroupConfigurationBuilder::new( *v, mock_cgroups.proc_mounts_path.to_str().unwrap(), ) .unwrap(); builder .add_cgroup_property( "invalid.cg".to_string(), "1".to_string(), "101", Path::new("fc_test_cg"), ) .unwrap_err(); } } #[test] fn test_cgroup_conf_v1_write_value() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); let mut builder = CgroupConfigurationBuilder::new(1, mock_cgroups.proc_mounts_path.to_str().unwrap()) .unwrap(); builder .add_cgroup_property( "cpuset.mems".to_string(), "1".to_string(), "101", Path::new("fc_test_cgv1"), ) .unwrap(); let cg_conf = builder.build(); let cg_root = mock_cgroups.sys_cgroups_path.join("cpuset"); // with real cgroups these files are created automatically // since the mock will not do it automatically, we create it here fs::create_dir_all(cg_root.join("fc_test_cgv1/101")).unwrap(); writeln_special(&cg_root.join("cpuset.mems"), "0-1").unwrap(); writeln_special(&cg_root.join("fc_test_cgv1/cpuset.mems"), "0-1").unwrap(); writeln_special(&cg_root.join("fc_test_cgv1/101/cpuset.mems"), "0-1").unwrap(); cg_conf.setup().unwrap(); // check that the value was written correctly assert!(cg_root.join("fc_test_cgv1/101/cpuset.mems").exists()); assert_eq!( read_first_line(cg_root.join("fc_test_cgv1/101/cpuset.mems")).unwrap(), "1\n" ); } #[test] fn test_cgroup_conf_v2_write_value() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v2_mounts().unwrap(); let builder = CgroupConfigurationBuilder::new(2, mock_cgroups.proc_mounts_path.to_str().unwrap()); builder.unwrap(); let mut builder = CgroupConfigurationBuilder::new(2, mock_cgroups.proc_mounts_path.to_str().unwrap()) .unwrap(); builder .add_cgroup_property( "cpuset.mems".to_string(), "1".to_string(), "101", Path::new("fc_test_cgv2"), ) .unwrap(); let cg_root = mock_cgroups.sys_cgroups_path.join("unified"); assert_eq!(builder.get_v2_hierarchy_path().unwrap(), &cg_root); let cg_conf = builder.build(); // with real cgroups these files are created automatically // since the mock will not do it automatically, we create it here fs::create_dir_all(cg_root.join("fc_test_cgv2/101")).unwrap(); MockCgroupFs::create_file_with_contents( cg_root.join("fc_test_cgv2/cgroup.subtree_control"), "", ) .unwrap(); MockCgroupFs::create_file_with_contents( cg_root.join("fc_test_cgv2/101/cgroup.subtree_control"), "", ) .unwrap(); cg_conf.setup().unwrap(); // check that the value was written correctly assert!(cg_root.join("fc_test_cgv2/101/cpuset.mems").exists()); assert_eq!( read_first_line(cg_root.join("fc_test_cgv2/101/cpuset.mems")).unwrap(), "1\n" ); // check that the controller was enabled in all parent dirs assert!( read_first_line(cg_root.join("cgroup.subtree_control")) .unwrap() .contains("cpuset") ); assert!( read_first_line(cg_root.join("fc_test_cgv2/cgroup.subtree_control")) .unwrap() .contains("cpuset") ); assert!( !read_first_line(cg_root.join("fc_test_cgv2/101/cgroup.subtree_control")) .unwrap() .contains("cpuset") ); } #[test] fn test_inherit_from_parent() { // 1. If parent file does not exist, return an error. // This is /A/B/ . let dir = TempDir::new().expect("Cannot create temporary directory."); // This is /A/B/C . let dir2 = TempDir::new_in(dir.as_path()).expect("Cannot create temporary directory."); let path2 = PathBuf::from(dir2.as_path()); let result = inherit_from_parent(&path2, "inexistent", 1); assert!( matches!(result, Err(JailerError::ReadToString(_, _))), "{:?}", result ); // 2. If parent file exists and is empty, will go one level up, and return error because // the grandparent file does not exist. let named_file = TempFile::new_in(dir.as_path()).expect("Cannot create named file."); let result = inherit_from_parent(&path2, named_file.as_path().to_str().unwrap(), 1); assert!( matches!(result, Err(JailerError::CgroupInheritFromParent(_, _))), "{:?}", result ); let child_file = dir2.as_path().join(named_file.as_path().to_str().unwrap()); // 3. If parent file exists and is not empty, will return ok and child file will have its // contents. let some_line = "Parent line"; writeln!(named_file.as_file(), "{}", some_line).expect("Cannot write to file."); let result = inherit_from_parent(&path2, named_file.as_path().to_str().unwrap(), 1); result.unwrap(); let res = readln_special(&child_file).expect("Cannot read from file."); assert!(res == some_line); } #[test] fn test_get_controller() { let mut file = "cpuset.cpu"; // Check valid file. let mut result = get_controller_from_filename(file); assert!( matches!(result, Ok(ctrl) if ctrl == "cpuset"), "{:?}", result ); // Check valid file with multiple '.'. file = "memory.swap.high"; result = get_controller_from_filename(file); assert!( matches!(result, Ok(ctrl) if ctrl == "memory"), "{:?}", result ); // Check invalid file file = "cpusetcpu"; result = get_controller_from_filename(file); assert!( matches!(result, Err(JailerError::CgroupInvalidFile(_))), "{:?}", result ); // Check empty file file = ""; result = get_controller_from_filename(file); assert!( matches!(result, Err(JailerError::CgroupInvalidFile(_))), "{:?}", result ); } } ================================================ FILE: src/jailer/src/chroot.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::env; use std::ffi::CStr; use std::path::Path; use std::ptr::null; use vmm_sys_util::syscall::SyscallReturnCode; use super::{JailerError, to_cstring}; const OLD_ROOT_DIR: &CStr = c"old_root"; const ROOT_DIR: &CStr = c"/"; const CURRENT_DIR: &CStr = c"."; // This uses switching to a new mount namespace + pivot_root(), together with the regular chroot, // to provide a hardened jail (at least compared to only relying on chroot). pub fn chroot(path: &Path) -> Result<(), JailerError> { // We unshare into a new mount namespace. // SAFETY: The call is safe because we're invoking a C library // function with valid parameters. SyscallReturnCode(unsafe { libc::unshare(libc::CLONE_NEWNS) }) .into_empty_result() .map_err(JailerError::UnshareNewNs)?; // Recursively change the propagation type of all the mounts in this namespace to SLAVE, so // we can call pivot_root. // SAFETY: Safe because we provide valid parameters. SyscallReturnCode(unsafe { libc::mount( null(), ROOT_DIR.as_ptr(), null(), libc::MS_SLAVE | libc::MS_REC, null(), ) }) .into_empty_result() .map_err(JailerError::MountPropagationSlave)?; // We need a CString for the following mount call. let chroot_dir = to_cstring(path)?; // Bind mount the jail root directory over itself, so we can go around a restriction // imposed by pivot_root, which states that the new root and the old root should not // be on the same filesystem. // SAFETY: Safe because we provide valid parameters. SyscallReturnCode(unsafe { libc::mount( chroot_dir.as_ptr(), chroot_dir.as_ptr(), null(), libc::MS_BIND | libc::MS_REC, null(), ) }) .into_empty_result() .map_err(JailerError::MountBind)?; // Change current dir to the chroot dir, so we only need to handle relative paths from now on. env::set_current_dir(path).map_err(JailerError::SetCurrentDir)?; // Create the old_root folder we're going to use for pivot_root, using a relative path. // SAFETY: The call is safe because we provide valid arguments. SyscallReturnCode(unsafe { libc::mkdir(OLD_ROOT_DIR.as_ptr(), libc::S_IRUSR | libc::S_IWUSR) }) .into_empty_result() .map_err(JailerError::MkdirOldRoot)?; // We are now ready to call pivot_root. We have to use sys_call because there is no libc // wrapper for pivot_root. // SAFETY: Safe because we provide valid parameters. SyscallReturnCode(unsafe { libc::syscall( libc::SYS_pivot_root, CURRENT_DIR.as_ptr(), OLD_ROOT_DIR.as_ptr(), ) }) .into_empty_result() .map_err(JailerError::PivotRoot)?; // pivot_root doesn't guarantee that we will be in "/" at this point, so switch to "/" // explicitly. // SAFETY: Safe because we provide valid parameters. SyscallReturnCode(unsafe { libc::chdir(ROOT_DIR.as_ptr()) }) .into_empty_result() .map_err(JailerError::ChdirNewRoot)?; // Umount the old_root, thus isolating the process from everything outside the jail root folder. // SAFETY: Safe because we provide valid parameters. SyscallReturnCode(unsafe { libc::umount2(OLD_ROOT_DIR.as_ptr(), libc::MNT_DETACH) }) .into_empty_result() .map_err(JailerError::UmountOldRoot)?; // Remove the no longer necessary old_root directory. // SAFETY: Safe because we provide valid parameters. SyscallReturnCode(unsafe { libc::rmdir(OLD_ROOT_DIR.as_ptr()) }) .into_empty_result() .map_err(JailerError::RmOldRootDir) } ================================================ FILE: src/jailer/src/env.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::ffi::{CStr, CString, OsString}; use std::fs::{self, File, OpenOptions, Permissions, canonicalize, read_to_string}; use std::io; use std::io::Write; use std::os::unix::fs::{MetadataExt, OpenOptionsExt, PermissionsExt, fchown}; use std::os::unix::io::AsRawFd; use std::os::unix::process::CommandExt; use std::path::{Component, Path, PathBuf}; use std::process::{Command, Stdio, exit, id}; use utils::arg_parser::UtilsArgParserError::MissingValue; use utils::time::{ClockType, get_time_us}; use utils::{arg_parser, validators}; use vmm_sys_util::syscall::SyscallReturnCode; use crate::JailerError; use crate::cgroup::{CgroupConfiguration, CgroupConfigurationBuilder}; use crate::chroot::chroot; use crate::resource_limits::{FSIZE_ARG, NO_FILE_ARG, ResourceLimits}; pub const PROC_MOUNTS: &str = "/proc/mounts"; const STDIN_FILENO: libc::c_int = 0; const STDOUT_FILENO: libc::c_int = 1; const STDERR_FILENO: libc::c_int = 2; // Kernel-based virtual machine (hardware virtualization extensions) // minor/major numbers are taken from // https://www.kernel.org/doc/html/latest/admin-guide/devices.html const DEV_KVM: &CStr = c"/dev/kvm"; const DEV_KVM_MAJOR: u32 = 10; const DEV_KVM_MINOR: u32 = 232; // TUN/TAP device minor/major numbers are taken from // www.kernel.org/doc/Documentation/networking/tuntap.txt const DEV_NET_TUN: &CStr = c"/dev/net/tun"; const DEV_NET_TUN_MAJOR: u32 = 10; const DEV_NET_TUN_MINOR: u32 = 200; // Random number generator device minor/major numbers are taken from // https://www.kernel.org/doc/Documentation/admin-guide/devices.txt const DEV_URANDOM: &CStr = c"/dev/urandom"; const DEV_URANDOM_MAJOR: u32 = 1; const DEV_URANDOM_MINOR: u32 = 9; // Userfault file descriptor device path. This is a misc character device // with a MISC_DYNAMIC_MINOR minor device: // https://elixir.bootlin.com/linux/v6.1.51/source/fs/userfaultfd.c#L2176. // // This means that its minor device number will be allocated at run time, // so we will have to find it at initialization time parsing /proc/misc. // What we do know is the major number for misc devices: // https://elixir.bootlin.com/linux/v6.1.51/source/Documentation/admin-guide/devices.txt const DEV_UFFD_PATH: &CStr = c"/dev/userfaultfd"; const DEV_UFFD_MAJOR: u32 = 10; // Relevant folders inside the jail that we create or/and for which we change ownership. // We need /dev in order to be able to create /dev/kvm and /dev/net/tun device. // We need /run for the default location of the api socket. // Since libc::chown is not recursive, we cannot specify only /dev/net as we want // to walk through the entire folder hierarchy. const FOLDER_HIERARCHY: [&str; 4] = ["/", "/dev", "/dev/net", "/run"]; const FOLDER_PERMISSIONS: u32 = 0o700; // When running with `--new-pid-ns` flag, the PID of the process running the exec_file differs // from jailer's and it is stored inside a dedicated file, prefixed with the below extension. const PID_FILE_EXTENSION: &str = ".pid"; // Helper function, since we'll use libc::dup2 a bunch of times for daemonization. fn dup2(old_fd: libc::c_int, new_fd: libc::c_int) -> Result<(), JailerError> { // SAFETY: This is safe because we are using a library function with valid parameters. SyscallReturnCode(unsafe { libc::dup2(old_fd, new_fd) }) .into_empty_result() .map_err(JailerError::Dup2) } // This is a wrapper for the clone system call. When we want to create a new process in a new // pid namespace, we will call clone with a NULL stack pointer. We can do this because we will // not use the CLONE_VM flag, this will result with the original stack replicated, in a similar // manner to the fork syscall. The libc wrapper prevents use of a NULL stack pointer, so we will // call the syscall directly. fn clone(child_stack: *mut libc::c_void, flags: libc::c_int) -> Result { SyscallReturnCode( // SAFETY: This is safe because we are using a library function with valid parameters. libc::c_int::try_from(unsafe { // Note: the order of arguments in the raw syscall differs between platforms. // On x86-64, for example, the parameters passed are `flags`, `stack`, `parent_tid`, // `child_tid`, and `tls`. But on On x86-32, and several other common architectures // (including score, ARM, ARM 64) the order of the last two arguments is reversed, // and instead we must pass `flags`, `stack`, `parent_tid`, `tls`, and `child_tid`. // This difference in architecture currently doesn't matter because the last 2 // arguments are all 0 but if this were to change we should add an attribute such as // #[cfg(target_arch = "x86_64")] or #[cfg(target_arch = "aarch64")] for each different // call. libc::syscall(libc::SYS_clone, flags, child_stack, 0, 0, 0) }) // Unwrap is needed because PIDs are 32-bit. .unwrap(), ) .into_result() .map_err(JailerError::Clone) } #[derive(Debug, thiserror::Error)] enum UserfaultfdParseError { #[error("Could not read /proc/misc: {0}")] ReadProcMisc(#[from] std::io::Error), #[error("Could not parse minor number: {0}")] ParseDevMinor(#[from] std::num::ParseIntError), #[error("userfaultfd device not loaded")] NotFound, } #[derive(Debug)] pub struct Env { id: String, chroot_dir: PathBuf, exec_file_path: PathBuf, uid: u32, gid: u32, netns: Option, daemonize: bool, new_pid_ns: bool, start_time_us: u64, start_time_cpu_us: u64, jailer_cpu_time_us: u64, extra_args: Vec, cgroup_conf: Option, resource_limits: ResourceLimits, uffd_dev_minor: Option, } impl Env { pub fn new( arguments: &arg_parser::Arguments, start_time_us: u64, start_time_cpu_us: u64, proc_mounts: &str, ) -> Result { // Unwraps should not fail because the arguments are mandatory arguments or with default // values. let id = arguments .single_value("id") .ok_or_else(|| JailerError::ArgumentParsing(MissingValue("id".to_string())))?; validators::validate_instance_id(id).map_err(JailerError::InvalidInstanceId)?; let exec_file = arguments .single_value("exec-file") .ok_or_else(|| JailerError::ArgumentParsing(MissingValue("exec-file".to_string())))?; let (exec_file_path, exec_file_name) = Env::validate_exec_file(exec_file)?; let chroot_base = arguments.single_value("chroot-base-dir").ok_or_else(|| { JailerError::ArgumentParsing(MissingValue("chroot-base-dir".to_string())) })?; let mut chroot_dir = canonicalize(chroot_base) .map_err(|err| JailerError::Canonicalize(PathBuf::from(&chroot_base), err))?; if !chroot_dir.is_dir() { return Err(JailerError::NotADirectory(chroot_dir)); } chroot_dir.push(&exec_file_name); chroot_dir.push(id); chroot_dir.push("root"); let uid_str = arguments .single_value("uid") .ok_or_else(|| JailerError::ArgumentParsing(MissingValue("uid".to_string())))?; let uid = uid_str .parse::() .map_err(|_| JailerError::Uid(uid_str.to_owned()))?; let gid_str = arguments .single_value("gid") .ok_or_else(|| JailerError::ArgumentParsing(MissingValue("gid".to_string())))?; let gid = gid_str .parse::() .map_err(|_| JailerError::Gid(gid_str.to_owned()))?; let netns = arguments.single_value("netns").cloned(); let daemonize = arguments.flag_present("daemonize"); let new_pid_ns = arguments.flag_present("new-pid-ns"); // Optional arguments. let mut cgroup_conf = None; let parent_cgroup = match arguments.single_value("parent-cgroup") { Some(parent_cg) => Path::new(parent_cg), None => Path::new(&exec_file_name), }; if parent_cgroup .components() .any(|c| c == Component::CurDir || c == Component::ParentDir || c == Component::RootDir) { return Err(JailerError::CgroupInvalidParentPath()); } let cgroup_ver = arguments.single_value("cgroup-version").ok_or_else(|| { JailerError::ArgumentParsing(MissingValue("cgroup-version".to_string())) })?; let cgroup_ver = cgroup_ver .parse::() .map_err(|_| JailerError::CgroupInvalidVersion(cgroup_ver.to_string()))?; let cgroups_args: &[String] = arguments.multiple_values("cgroup").unwrap_or_default(); // If the --parent-cgroup exists, and we have no other cgroups, // then the intent is to move the process to that cgroup. // Only applies to cgroupsv2 since it's a unified hierarchy if cgroups_args.is_empty() && cgroup_ver == 2 { let builder = CgroupConfigurationBuilder::new(cgroup_ver, proc_mounts)?; let cg_parent = builder.get_v2_hierarchy_path()?.join(parent_cgroup); let cg_parent_procs = cg_parent.join("cgroup.procs"); if cg_parent.exists() { fs::write(cg_parent_procs, std::process::id().to_string()) .map_err(|_| JailerError::CgroupMove(cg_parent, io::Error::last_os_error()))?; } } // cgroup format: .=,... if let Some(cgroups_args) = arguments.multiple_values("cgroup") { let mut builder = CgroupConfigurationBuilder::new(cgroup_ver, proc_mounts)?; for cg in cgroups_args { let aux: Vec<&str> = cg.split('=').collect(); if aux.len() != 2 || aux[1].is_empty() { return Err(JailerError::CgroupFormat(cg.to_string())); } let file = Path::new(aux[0]); if file.components().any(|c| { c == Component::CurDir || c == Component::ParentDir || c == Component::RootDir }) { return Err(JailerError::CgroupInvalidFile(cg.to_string())); } builder.add_cgroup_property( aux[0].to_string(), // cgroup file aux[1].to_string(), // cgroup value id, parent_cgroup, )?; } cgroup_conf = Some(builder.build()); } let mut resource_limits = ResourceLimits::default(); if let Some(args) = arguments.multiple_values("resource-limit") { Env::parse_resource_limits(&mut resource_limits, args)?; } let uffd_dev_minor = Self::get_userfaultfd_minor_dev_number().ok(); Ok(Env { id: id.to_owned(), chroot_dir, exec_file_path, uid, gid, netns, daemonize, new_pid_ns, start_time_us, start_time_cpu_us, jailer_cpu_time_us: 0, extra_args: arguments.extra_args(), cgroup_conf, resource_limits, uffd_dev_minor, }) } pub fn chroot_dir(&self) -> &Path { self.chroot_dir.as_path() } pub fn gid(&self) -> u32 { self.gid } pub fn uid(&self) -> u32 { self.uid } fn validate_exec_file(exec_file: &str) -> Result<(PathBuf, String), JailerError> { let exec_file_path = canonicalize(exec_file) .map_err(|err| JailerError::Canonicalize(PathBuf::from(exec_file), err))?; if !exec_file_path.is_file() { return Err(JailerError::NotAFile(exec_file_path)); } let exec_file_name = exec_file_path .file_name() .ok_or_else(|| JailerError::ExtractFileName(exec_file_path.clone()))? .to_str() // Safe to unwrap as the original `exec_file` is `String`. .unwrap() .to_string(); Ok((exec_file_path, exec_file_name)) } fn parse_resource_limits( resource_limits: &mut ResourceLimits, args: &[String], ) -> Result<(), JailerError> { for arg in args { let (name, value) = arg .split_once('=') .ok_or_else(|| JailerError::ResLimitFormat(arg.to_string()))?; let limit_value = value .parse::() .map_err(|err| JailerError::ResLimitValue(value.to_string(), err.to_string()))?; match name { FSIZE_ARG => resource_limits.set_file_size(limit_value), NO_FILE_ARG => resource_limits.set_no_file(limit_value), _ => return Err(JailerError::ResLimitArgument(name.to_string())), } } Ok(()) } fn exec_into_new_pid_ns(&mut self, chroot_exec_file: PathBuf) -> Result<(), JailerError> { // https://man7.org/linux/man-pages/man7/pid_namespaces.7.html // > a process in an ancestor namespace can send signals to the "init" process of a child // > PID namespace only if the "init" process has established a handler for that signal. // // Firecracker (i.e. the "init" process of the new PID namespace) sets up handlers for some // signals including SIGHUP and jailer exits soon after spawning firecracker into a new PID // namespace. If the jailer process is a session leader and its exit happens after // firecracker configures the signal handlers, SIGHUP will be sent to firecracker and be // caught by the handler unexpectedly. // // In order to avoid the above issue, if jailer is a session leader, creates a new session // and makes the child process (i.e. firecracker) become the leader of the new session to // not get SIGHUP on the exit of jailer. // Check whether jailer is a session leader or not before clone(). // Note that, if `--daemonize` is passed, jailer is always not a session leader. This is // because we use the double fork method, making itself not a session leader. let is_session_leader = match self.daemonize { true => false, false => { // SAFETY: Safe because it doesn't take any input parameters. let sid = SyscallReturnCode(unsafe { libc::getsid(0) }) .into_result() .map_err(JailerError::GetSid)?; // SAFETY: Safe because it doesn't take any input parameters. let ppid = SyscallReturnCode(unsafe { libc::getpid() }) .into_result() .map_err(JailerError::GetPid)?; sid == ppid } }; // Duplicate the current process. The child process will belong to the previously created // PID namespace. The current process will not be moved into the newly created namespace, // but its first child will assume the role of init(1) in the new namespace. let pid = clone(std::ptr::null_mut(), libc::CLONE_NEWPID)?; match pid { 0 => { if is_session_leader { // SAFETY: Safe bacause it doesn't take any input parameters. SyscallReturnCode(unsafe { libc::setsid() }) .into_empty_result() .map_err(JailerError::SetSid)?; } Err(JailerError::Exec(self.exec_command(chroot_exec_file))) } child_pid => { // Save the PID of the process running the exec file provided // inside .pid file. self.save_exec_file_pid(child_pid, chroot_exec_file)?; // SAFETY: This is safe because 0 is valid input to exit. unsafe { libc::exit(0) } } } } fn save_exec_file_pid( &mut self, pid: i32, chroot_exec_file: PathBuf, ) -> Result<(), JailerError> { let chroot_exec_file_str = chroot_exec_file .to_str() .ok_or_else(|| JailerError::ExtractFileName(chroot_exec_file.clone()))?; let pid_file_path = PathBuf::from(format!("{}{}", chroot_exec_file_str, PID_FILE_EXTENSION)); let mut pid_file = OpenOptions::new() .write(true) .create_new(true) .open(pid_file_path.clone()) .map_err(|err| JailerError::FileOpen(pid_file_path.clone(), err))?; // Write PID to file. write!(pid_file, "{}", pid).map_err(|err| JailerError::Write(pid_file_path, err)) } fn get_userfaultfd_minor_dev_number() -> Result { let buf = read_to_string("/proc/misc")?; for line in buf.lines() { let dev: Vec<&str> = line.split(' ').collect(); if dev.len() < 2 { continue; } if dev[1] == "userfaultfd" { return Ok(dev[0].parse::()?); } } Err(UserfaultfdParseError::NotFound) } fn mknod_and_own_dev( &self, dev_path: &CStr, dev_major: u32, dev_minor: u32, ) -> Result<(), JailerError> { // As per sysstat.h: // S_IFCHR -> character special device // S_IRUSR -> read permission, owner // S_IWUSR -> write permission, owner // See www.kernel.org/doc/Documentation/networking/tuntap.txt, 'Configuration' chapter for // more clarity. // SAFETY: This is safe because dev_path is CStr, and hence null-terminated. SyscallReturnCode(unsafe { libc::mknod( dev_path.as_ptr(), libc::S_IFCHR | libc::S_IRUSR | libc::S_IWUSR, libc::makedev(dev_major, dev_minor), ) }) .into_empty_result() .map_err(|err| JailerError::MknodDev(err, dev_path.to_str().unwrap().to_owned()))?; // SAFETY: This is safe because dev_path is CStr, and hence null-terminated. SyscallReturnCode(unsafe { libc::chown(dev_path.as_ptr(), self.uid(), self.gid()) }) .into_empty_result() // Safe to unwrap as we provided valid file names. .map_err(|err| { JailerError::ChangeFileOwner(PathBuf::from(dev_path.to_str().unwrap()), err) }) } fn setup_jailed_folder(&self, folder: impl AsRef) -> Result<(), JailerError> { let folder_path = folder.as_ref(); fs::create_dir_all(folder_path) .map_err(|err| JailerError::CreateDir(folder_path.to_owned(), err))?; fs::set_permissions(folder_path, Permissions::from_mode(FOLDER_PERMISSIONS)) .map_err(|err| JailerError::Chmod(folder_path.to_owned(), err))?; let c_path = CString::new(folder_path.to_str().unwrap()).unwrap(); // SAFETY: This is safe because folder was checked for a null-terminator. SyscallReturnCode(unsafe { libc::chown(c_path.as_ptr(), self.uid(), self.gid()) }) .into_empty_result() .map_err(|err| JailerError::ChangeFileOwner(folder_path.to_owned(), err)) } fn copy_exec_to_chroot(&mut self) -> Result { let exec_file_name = self .exec_file_path .file_name() .ok_or_else(|| JailerError::ExtractFileName(self.exec_file_path.clone()))?; let jailer_exec_file_path = self.chroot_dir.join(exec_file_name); // We do a copy instead of a hard-link for 2 reasons // 1. hard-linking is not possible if the file is in another device // 2. while hardlinking would save up disk space and also memory by sharing parts of the // Firecracker binary (like the executable .text section), this latter part is not // desirable in Firecracker's threat model. Copying prevents 2 Firecracker processes from // sharing memory. let mut src_file = OpenOptions::new() .read(true) .open(&self.exec_file_path) .map_err(|err| JailerError::Open(self.exec_file_path.clone(), err))?; let src_file_metadata = src_file .metadata() .map_err(|err| JailerError::Metadata(self.exec_file_path.clone(), err))?; let src_file_mode = src_file_metadata.mode(); let mut dst_file = OpenOptions::new() .write(true) .create(true) // Don't allow symlinks .custom_flags(libc::O_NOFOLLOW) .mode(src_file_mode) .open(&jailer_exec_file_path) .map_err(|err| JailerError::Open(jailer_exec_file_path.clone(), err))?; let dst_file_metadata = dst_file .metadata() .map_err(|err| JailerError::Metadata(jailer_exec_file_path.clone(), err))?; if 1 < dst_file_metadata.nlink() { return Err(JailerError::HardLink(jailer_exec_file_path.clone())); } // Mark destination file as owned by the specified uid/gid fchown(&dst_file, Some(self.uid()), Some(self.gid())) .map_err(|err| JailerError::ChangeFileOwner(jailer_exec_file_path.clone(), err))?; // Ignore the output since it is not interesting in this case _ = std::io::copy(&mut src_file, &mut dst_file).map_err(|err| { JailerError::Copy( self.exec_file_path.clone(), jailer_exec_file_path.clone(), err, ) })?; Ok(exec_file_name.to_owned()) } fn join_netns(path: &str) -> Result<(), JailerError> { // The fd backing the file will be automatically dropped at the end of the scope let netns = File::open(path).map_err(|err| JailerError::FileOpen(PathBuf::from(path), err))?; // SAFETY: Safe because we are passing valid parameters. SyscallReturnCode(unsafe { libc::setns(netns.as_raw_fd(), libc::CLONE_NEWNET) }) .into_empty_result() .map_err(JailerError::SetNetNs) } fn exec_command(&self, chroot_exec_file: PathBuf) -> io::Error { Command::new(chroot_exec_file) .args(["--id", &self.id]) .args(["--start-time-us", &self.start_time_us.to_string()]) .args([ "--start-time-cpu-us", &get_time_us(ClockType::ProcessCpu).to_string(), ]) .args(["--parent-cpu-time-us", &self.jailer_cpu_time_us.to_string()]) .stdin(Stdio::inherit()) .stdout(Stdio::inherit()) .stderr(Stdio::inherit()) .uid(self.uid()) .gid(self.gid()) .args(&self.extra_args) .exec() } #[cfg(target_arch = "aarch64")] fn copy_cache_info(&self) -> Result<(), JailerError> { use crate::{readln_special, to_cstring, writeln_special}; const HOST_CACHE_INFO: &str = "/sys/devices/system/cpu/cpu0/cache"; // Based on https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/kernel/cacheinfo.c#L29. const MAX_CACHE_LEVEL: u8 = 7; // These are the files that we need to copy in the chroot so that we can create the // cache topology. const FOLDER_HIERARCHY: [&str; 6] = [ "size", "level", "type", "shared_cpu_map", "coherency_line_size", "number_of_sets", ]; // We create the cache folder inside the chroot and then change its permissions. let jailer_cache_dir = Path::new(self.chroot_dir()).join("sys/devices/system/cpu/cpu0/cache/"); fs::create_dir_all(&jailer_cache_dir) .map_err(|err| JailerError::CreateDir(jailer_cache_dir.to_owned(), err))?; for index in 0..(MAX_CACHE_LEVEL + 1) { let index_folder = format!("index{}", index); let host_path = PathBuf::from(HOST_CACHE_INFO).join(&index_folder); if fs::metadata(&host_path).is_err() { // It means the folder does not exist, i.e we exhausted the number of cache levels // existent on the host. break; } // We now create the destination folder in the jailer. let jailer_path = jailer_cache_dir.join(&index_folder); fs::create_dir_all(&jailer_path) .map_err(|err| JailerError::CreateDir(jailer_path.to_owned(), err))?; // We now read the contents of the current directory and copy the files we are // interested in to the destination path. for entry in FOLDER_HIERARCHY.iter() { let host_cache_file = host_path.join(entry); let jailer_cache_file = jailer_path.join(entry); if let Ok(line) = readln_special(&host_cache_file) { writeln_special(&jailer_cache_file, line)?; // We now change the permissions. let dest_path_cstr = to_cstring(&jailer_cache_file)?; // SAFETY: Safe because dest_path_cstr is null-terminated. SyscallReturnCode(unsafe { libc::chown(dest_path_cstr.as_ptr(), self.uid(), self.gid()) }) .into_empty_result() .map_err(|err| { JailerError::ChangeFileOwner(jailer_cache_file.to_owned(), err) })?; } } } Ok(()) } #[cfg(target_arch = "aarch64")] fn copy_midr_el1_info(&self) -> Result<(), JailerError> { use crate::{readln_special, to_cstring, writeln_special}; const HOST_MIDR_EL1_INFO: &str = "/sys/devices/system/cpu/cpu0/regs/identification"; let jailer_midr_el1_directory = Path::new(self.chroot_dir()).join("sys/devices/system/cpu/cpu0/regs/identification/"); fs::create_dir_all(&jailer_midr_el1_directory) .map_err(|err| JailerError::CreateDir(jailer_midr_el1_directory.to_owned(), err))?; let host_midr_el1_file = PathBuf::from(format!("{}/midr_el1", HOST_MIDR_EL1_INFO)); let jailer_midr_el1_file = jailer_midr_el1_directory.join("midr_el1"); // Read and copy the MIDR_EL1 file to Jailer let line = readln_special(&host_midr_el1_file)?; writeln_special(&jailer_midr_el1_file, line)?; // Change the permissions. let dest_path_cstr = to_cstring(&jailer_midr_el1_file)?; // SAFETY: Safe because `dest_path_cstr` is null-terminated. SyscallReturnCode(unsafe { libc::chown(dest_path_cstr.as_ptr(), self.uid(), self.gid()) }) .into_empty_result() .map_err(|err| JailerError::ChangeFileOwner(jailer_midr_el1_file.to_owned(), err))?; Ok(()) } pub fn run(mut self) -> Result<(), JailerError> { let exec_file_name = self.copy_exec_to_chroot()?; let chroot_exec_file = PathBuf::from("/").join(exec_file_name); // Join the specified network namespace, if applicable. if let Some(ref path) = self.netns { Env::join_netns(path)?; } // Set limits on resources. self.resource_limits.install()?; // We have to setup cgroups at this point, because we can't do it anymore after chrooting. if let Some(ref conf) = self.cgroup_conf { conf.setup()?; } // If daemonization was requested, open /dev/null before chrooting. let dev_null = if self.daemonize { Some( File::open("/dev/null") .map_err(|err| JailerError::Open("/dev/null".into(), err))?, ) } else { None }; #[cfg(target_arch = "aarch64")] self.copy_cache_info()?; #[cfg(target_arch = "aarch64")] self.copy_midr_el1_info()?; // Jail self. chroot(self.chroot_dir())?; // This will not only create necessary directories, but will also change ownership // for all of them. FOLDER_HIERARCHY .iter() .try_for_each(|f| self.setup_jailed_folder(f))?; // Here we are creating the /dev/kvm and /dev/net/tun devices inside the jailer. // Following commands can be translated into bash like this: // $: mkdir -p $chroot_dir/dev/net // $: dev_net_tun_path={$chroot_dir}/"tun" // $: mknod $dev_net_tun_path c 10 200 // www.kernel.org/doc/Documentation/networking/tuntap.txt specifies 10 and 200 as the major // and minor for the /dev/net/tun device. self.mknod_and_own_dev(DEV_NET_TUN, DEV_NET_TUN_MAJOR, DEV_NET_TUN_MINOR)?; // Do the same for /dev/kvm with (major, minor) = (10, 232). self.mknod_and_own_dev(DEV_KVM, DEV_KVM_MAJOR, DEV_KVM_MINOR)?; // And for /dev/urandom with (major, minor) = (1, 9). // If the device is not accessible on the host, output a warning to inform user that MMDS // version 2 will not be available to use. let _ = self .mknod_and_own_dev(DEV_URANDOM, DEV_URANDOM_MAJOR, DEV_URANDOM_MINOR) .map_err(|err| { println!( "Warning! Could not create /dev/urandom device inside jailer: {}.", err ); println!("MMDS version 2 will not be available to use."); }); // If we have a minor version for /dev/userfaultfd the device is present on the host. // Expose the device in the jailed environment. if let Some(minor) = self.uffd_dev_minor { self.mknod_and_own_dev(DEV_UFFD_PATH, DEV_UFFD_MAJOR, minor)?; } self.jailer_cpu_time_us = get_time_us(ClockType::ProcessCpu) - self.start_time_cpu_us; // Daemonize before exec, if so required (when the dev_null variable != None). if let Some(dev_null) = dev_null { // We follow the double fork method to daemonize the jailer referring to // https://0xjet.github.io/3OHA/2022/04/11/post.html // setsid() will fail if the calling process is a process group leader. // By calling fork(), we guarantee that the newly created process inherits // the PGID from its parent and, therefore, is not a process group leader. // SAFETY: Safe because it's a library function. let child_pid = unsafe { libc::fork() }; if child_pid < 0 { return Err(JailerError::Daemonize(io::Error::last_os_error())); } if child_pid != 0 { // parent exiting exit(0); } // Call setsid() in child // SAFETY: Safe because it's a library function. SyscallReturnCode(unsafe { libc::setsid() }) .into_empty_result() .map_err(JailerError::SetSid)?; // Meter CPU usage after first fork() self.jailer_cpu_time_us += get_time_us(ClockType::ProcessCpu); // Daemons should not have controlling terminals. // If a daemon has a controlling terminal, it can receive signals // from it that might cause it to halt or exit unexpectedly. // The second fork() ensures that grandchild is not a session, // leader and thus cannot reacquire a controlling terminal. // SAFETY: Safe because it's a library function. let grandchild_pid = unsafe { libc::fork() }; if grandchild_pid < 0 { return Err(JailerError::Daemonize(io::Error::last_os_error())); } if grandchild_pid != 0 { // child exiting exit(0); } // grandchild is the daemon // Replace the stdio file descriptors with the /dev/null fd. dup2(dev_null.as_raw_fd(), STDIN_FILENO)?; dup2(dev_null.as_raw_fd(), STDOUT_FILENO)?; dup2(dev_null.as_raw_fd(), STDERR_FILENO)?; // Meter CPU usage after second fork() self.jailer_cpu_time_us += get_time_us(ClockType::ProcessCpu); } // If specified, exec the provided binary into a new PID namespace. if self.new_pid_ns { self.exec_into_new_pid_ns(chroot_exec_file) } else { self.save_exec_file_pid(id().try_into().unwrap(), chroot_exec_file.clone())?; Err(JailerError::Exec(self.exec_command(chroot_exec_file))) } } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::fs::create_dir_all; use std::os::linux::fs::MetadataExt; use vmm_sys_util::rand; use vmm_sys_util::tempdir::TempDir; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::build_arg_parser; use crate::cgroup::test_util::MockCgroupFs; fn get_pseudo_exec_file_path() -> String { format!( "/tmp/{}/pseudo_firecracker_exec_file", rand::rand_alphanumerics(4).into_string().unwrap() ) } #[derive(Debug, Clone)] struct ArgVals<'a> { pub id: &'a str, pub exec_file: &'a str, pub uid: &'a str, pub gid: &'a str, pub chroot_base: &'a str, pub netns: Option<&'a str>, pub daemonize: bool, pub new_pid_ns: bool, pub cgroups: Vec<&'a str>, pub resource_limits: Vec<&'a str>, pub parent_cgroup: Option<&'a str>, } impl<'a> ArgVals<'a> { pub fn new(pseudo_exec_file_path: &'a str) -> ArgVals<'a> { let pseudo_exec_file_dir = Path::new(&pseudo_exec_file_path).parent().unwrap(); fs::create_dir_all(pseudo_exec_file_dir).unwrap(); File::create(pseudo_exec_file_path).unwrap(); ArgVals { id: "bd65600d-8669-4903-8a14-af88203add38", exec_file: pseudo_exec_file_path, uid: "1001", gid: "1002", chroot_base: "/", netns: Some("zzzns"), daemonize: true, new_pid_ns: true, cgroups: vec!["cpu.shares=2", "cpuset.mems=0"], resource_limits: vec!["no-file=1024", "fsize=1048575"], parent_cgroup: None, } } } fn make_args(arg_vals: &ArgVals) -> Vec { let mut arg_vec = vec![ "--binary-name", "--id", arg_vals.id, "--exec-file", &arg_vals.exec_file, "--uid", arg_vals.uid, "--gid", arg_vals.gid, "--chroot-base-dir", arg_vals.chroot_base, ] .into_iter() .map(String::from) .collect::>(); // Append cgroups arguments for cg in &arg_vals.cgroups { arg_vec.push("--cgroup".to_string()); arg_vec.push((*cg).to_string()); } // Append limits arguments for limit in &arg_vals.resource_limits { arg_vec.push("--resource-limit".to_string()); arg_vec.push((*limit).to_string()); } if let Some(s) = arg_vals.netns { arg_vec.push("--netns".to_string()); arg_vec.push(s.to_string()); } if arg_vals.daemonize { arg_vec.push("--daemonize".to_string()); } if arg_vals.new_pid_ns { arg_vec.push("--new-pid-ns".to_string()); } if let Some(parent_cg) = arg_vals.parent_cgroup { arg_vec.push("--parent-cgroup".to_string()); arg_vec.push(parent_cg.to_string()); } arg_vec } fn create_env(mock_proc_mounts: &Path) -> Env { // Create a standard environment. let arg_parser = build_arg_parser(); let mut args = arg_parser.arguments().clone(); let pseudo_exec_file_path = get_pseudo_exec_file_path(); args.parse(&make_args(&ArgVals::new(pseudo_exec_file_path.as_str()))) .unwrap(); Env::new(&args, 0, 0, mock_proc_mounts.to_str().unwrap()).unwrap() } #[test] fn test_new_env() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); let pseudo_exec_file_path = get_pseudo_exec_file_path(); let good_arg_vals = ArgVals::new(pseudo_exec_file_path.as_str()); let arg_parser = build_arg_parser(); let mut args = arg_parser.arguments().clone(); args.parse(&make_args(&good_arg_vals)).unwrap(); // This should be fine. let good_env = Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()) .expect("This new environment should be created successfully."); let mut chroot_dir = PathBuf::from(good_arg_vals.chroot_base); chroot_dir.push(Path::new(&good_arg_vals.exec_file).file_name().unwrap()); chroot_dir.push(good_arg_vals.id); chroot_dir.push("root"); assert_eq!(good_env.chroot_dir(), chroot_dir); assert_eq!(format!("{}", good_env.gid()), good_arg_vals.gid); assert_eq!(format!("{}", good_env.uid()), good_arg_vals.uid); assert_eq!(good_env.netns, good_arg_vals.netns.map(String::from)); assert!(good_env.daemonize); assert!(good_env.new_pid_ns); let another_good_arg_vals = ArgVals { netns: None, daemonize: false, new_pid_ns: false, ..good_arg_vals }; let arg_parser = build_arg_parser(); args = arg_parser.arguments().clone(); args.parse(&make_args(&another_good_arg_vals)).unwrap(); let another_good_env = Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()) .expect("This another new environment should be created successfully."); assert!(!another_good_env.daemonize); assert!(!another_good_env.new_pid_ns); let base_invalid_arg_vals = ArgVals { daemonize: true, ..another_good_arg_vals.clone() }; let invalid_cgroup_arg_vals = ArgVals { cgroups: vec!["zzz"], ..base_invalid_arg_vals.clone() }; let arg_parser = build_arg_parser(); args = arg_parser.arguments().clone(); args.parse(&make_args(&invalid_cgroup_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); let invalid_res_limit_arg_vals = ArgVals { resource_limits: vec!["zzz"], ..base_invalid_arg_vals.clone() }; let arg_parser = build_arg_parser(); args = arg_parser.arguments().clone(); args.parse(&make_args(&invalid_res_limit_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); let invalid_id_arg_vals = ArgVals { id: "/ad./sa12", ..base_invalid_arg_vals.clone() }; let arg_parser = build_arg_parser(); args = arg_parser.arguments().clone(); args.parse(&make_args(&invalid_id_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); let inexistent_exec_file_arg_vals = ArgVals { exec_file: "/this!/file!/should!/not!/exist!/", ..base_invalid_arg_vals.clone() }; let arg_parser = build_arg_parser(); args = arg_parser.arguments().clone(); args.parse(&make_args(&inexistent_exec_file_arg_vals)) .unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); let invalid_uid_arg_vals = ArgVals { uid: "zzz", ..base_invalid_arg_vals.clone() }; let arg_parser = build_arg_parser(); args = arg_parser.arguments().clone(); args.parse(&make_args(&invalid_uid_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); let invalid_gid_arg_vals = ArgVals { gid: "zzz", ..base_invalid_arg_vals.clone() }; let arg_parser = build_arg_parser(); args = arg_parser.arguments().clone(); args.parse(&make_args(&invalid_gid_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); let invalid_parent_cg_vals = ArgVals { parent_cgroup: Some("/root"), ..base_invalid_arg_vals.clone() }; let arg_parser = build_arg_parser(); args = arg_parser.arguments().clone(); args.parse(&make_args(&invalid_parent_cg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); let invalid_controller_pt = ArgVals { cgroups: vec!["../file_name=1", "./root=1", "/home=1"], ..another_good_arg_vals.clone() }; let arg_parser = build_arg_parser(); args = arg_parser.arguments().clone(); args.parse(&make_args(&invalid_controller_pt)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); let invalid_format = ArgVals { cgroups: vec!["./root/", "../root"], ..another_good_arg_vals.clone() }; let arg_parser = build_arg_parser(); args = arg_parser.arguments().clone(); args.parse(&make_args(&invalid_format)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); // The chroot-base-dir param is not validated by Env::new, but rather in run, when we // actually attempt to create the folder structure (the same goes for netns). } #[test] fn test_dup2() { // Open /dev/kvm since it should be available anyway. let file1 = fs::File::open("/dev/kvm").unwrap(); // We open a second file to make sure its associated fd is not used by something else. let file2 = fs::File::open("/dev/kvm").unwrap(); dup2(file1.as_raw_fd(), file2.as_raw_fd()).unwrap(); } #[test] fn test_validate_exec_file() { // Success case let pseudo_exec_file_path = get_pseudo_exec_file_path(); let pseudo_exec_file_dir = Path::new(&pseudo_exec_file_path).parent().unwrap(); create_dir_all(pseudo_exec_file_dir).unwrap(); File::create(&pseudo_exec_file_path).unwrap(); Env::validate_exec_file(&pseudo_exec_file_path).unwrap(); // Error case 1: No such file exists std::fs::remove_file(&pseudo_exec_file_path).unwrap(); assert_eq!( format!( "{}", Env::validate_exec_file(&pseudo_exec_file_path).unwrap_err() ), format!( "Failed to canonicalize path {}: No such file or directory (os error 2)", pseudo_exec_file_path ) ); // Error case 2: Not a file std::fs::create_dir_all("/tmp/firecracker_test_dir").unwrap(); assert_eq!( format!( "{}", Env::validate_exec_file("/tmp/firecracker_test_dir").unwrap_err() ), "/tmp/firecracker_test_dir is not a file" ); std::fs::remove_dir_all("/tmp/firecracker_test_dir").unwrap(); } #[test] fn test_setup_jailed_folder() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); let env = create_env(&mock_cgroups.proc_mounts_path); // Error case: non UTF-8 paths. let bad_string_bytes: Vec = vec![0, 102, 111, 111, 0]; // A leading nul followed by 'f', 'o', 'o' let bad_string = String::from_utf8(bad_string_bytes).unwrap(); assert_eq!( format!("{}", env.setup_jailed_folder(bad_string).err().unwrap()), format!( "Failed to create directory \\0foo\\0: file name contained an unexpected NUL byte" ) ); // Error case: inaccessible path - can't be triggered with unit tests running as root. // assert_eq!( // format!("{}", env.setup_jailed_folders(vec!["/foo/bar"]).err().unwrap()), // "Failed to create directory /foo/bar: Permission denied (os error 13)" // ); // Success case. let foo_dir = TempDir::new().unwrap().as_path().to_owned(); env.setup_jailed_folder(foo_dir.as_path()).unwrap(); let metadata = fs::metadata(&foo_dir).unwrap(); // The mode bits will also have S_IFDIR set because the path belongs to a directory. assert_eq!( metadata.permissions().mode(), FOLDER_PERMISSIONS | libc::S_IFDIR ); assert_eq!(metadata.st_uid(), env.uid); assert_eq!(metadata.st_gid(), env.gid); // Can't safely test that permissions remain unchanged by umask settings without affecting // the umask of the whole unit test process. // This crate produces a binary, so Rust integ tests aren't an option either. // And changing the umask in the Python integration tests is unsafe because of pytest's // process management; it can't be isolated from side effects. } fn ensure_mknod_and_own_dev(env: &Env, dev_path: &CStr, major: u32, minor: u32) { use std::os::unix::fs::FileTypeExt; // Create a new device node. env.mknod_and_own_dev(dev_path, major, minor).unwrap(); // Ensure device's properties. let metadata = fs::metadata(dev_path.to_str().unwrap()).unwrap(); assert!(metadata.file_type().is_char_device()); assert_eq!(libc::major(metadata.st_rdev()), major); assert_eq!(libc::minor(metadata.st_rdev()), minor); assert_eq!( metadata.permissions().mode(), libc::S_IFCHR | libc::S_IRUSR | libc::S_IWUSR ); // Trying to create again the same device node is not allowed. assert_eq!( format!( "{}", env.mknod_and_own_dev(dev_path, major, minor).unwrap_err() ), format!( "Failed to create {} via mknod inside the jail: File exists (os error 17)", dev_path.to_str().unwrap() ) ); } #[test] fn test_mknod_and_own_dev() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); let env = create_env(&mock_cgroups.proc_mounts_path); let mock_dev_dir = TempDir::new().unwrap(); // Ensure device nodes are created with correct major/minor numbers and permissions. let mut dev_infos: Vec<(PathBuf, u32, u32)> = vec![ ( mock_dev_dir.as_path().join("net/tun-test"), DEV_NET_TUN_MAJOR, DEV_NET_TUN_MINOR, ), ( mock_dev_dir.as_path().join("kvm-test"), DEV_KVM_MAJOR, DEV_KVM_MINOR, ), ]; if let Some(uffd_dev_minor) = env.uffd_dev_minor { dev_infos.push(( mock_dev_dir.as_path().join("userfaultfd-test"), DEV_UFFD_MAJOR, uffd_dev_minor, )); } for (dev, major, minor) in dev_infos { // Ensure the folder where we are creating the node exists fs::create_dir_all(dev.parent().unwrap()).unwrap(); let dev_path = dev.to_str().map(CString::new).unwrap().unwrap(); ensure_mknod_and_own_dev(&env, &dev_path, major, minor); } } #[test] fn test_userfaultfd_dev() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); let env = create_env(&mock_cgroups.proc_mounts_path); if !Path::new(DEV_UFFD_PATH.to_str().unwrap()).exists() { assert_eq!(env.uffd_dev_minor, None); } else { assert!(env.uffd_dev_minor.is_some()); } } #[test] fn test_copy_exec_to_chroot() { // Create a standard environment. let arg_parser = build_arg_parser(); let mut args = arg_parser.arguments().clone(); let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); // Create tmp resources for `exec_file` and `chroot_base`. let exec_file_path = get_pseudo_exec_file_path(); let exec_file_dir = Path::new(&exec_file_path).parent().unwrap(); fs::create_dir_all(exec_file_dir).unwrap(); File::create(&exec_file_path).unwrap(); let some_dir = TempDir::new().unwrap(); let some_dir_path = some_dir.as_path().to_str().unwrap(); fs::write(&exec_file_path, "some_content").unwrap(); let some_arg_vals = ArgVals { id: "bd65600d-8669-4903-8a14-af88203add38", exec_file: exec_file_path.as_str(), uid: "1001", gid: "1002", chroot_base: some_dir_path, netns: Some("zzzns"), daemonize: false, new_pid_ns: false, cgroups: Vec::new(), resource_limits: Vec::new(), parent_cgroup: None, }; let exec_file_name = Path::new(&some_arg_vals.exec_file).file_name().unwrap(); fs::write(some_arg_vals.exec_file, "some_content").unwrap(); args.parse(&make_args(&some_arg_vals)).unwrap(); let mut env = Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap(); // Create the required chroot dir hierarchy. fs::create_dir_all(env.chroot_dir()).expect("Could not create dir hierarchy."); assert_eq!( env.copy_exec_to_chroot().unwrap(), exec_file_name.to_os_string() ); let dest_path = env.chroot_dir.join(exec_file_name); // Check that `fs::copy()` copied src content and permission bits to destination. let metadata_src = fs::metadata(&env.exec_file_path).unwrap(); let metadata_dest = fs::metadata(&dest_path).unwrap(); let content_src = fs::read(&env.exec_file_path).unwrap(); let content_dest = fs::read(&dest_path).unwrap(); assert_eq!(content_src, content_dest); assert_eq!(content_dest, b"some_content"); assert_eq!(metadata_src.permissions(), metadata_dest.permissions()); // Clean up the environment. fs::remove_dir_all(env.chroot_dir()).expect("Could not remove dir hierarchy."); } #[test] fn test_join_netns() { let mut path = "invalid_path"; assert_eq!( format!("{}", Env::join_netns(path).unwrap_err()), format!( "Failed to open file {}: No such file or directory (os error 2)", path ) ); let tmp_file = TempFile::new().unwrap(); path = tmp_file.as_path().to_str().unwrap(); assert_eq!( format!("{}", Env::join_netns(path).unwrap_err()), "Failed to join network namespace: netns: Invalid argument (os error 22)" ); // Testing `join_netns()` with a valid network namespace is not that easy // as Rust std library doesn't offer support for creating such namespaces. } #[test] fn test_cgroups_parsing() { let arg_parser = build_arg_parser(); let pseudo_exec_file_path = get_pseudo_exec_file_path(); let good_arg_vals = ArgVals::new(pseudo_exec_file_path.as_str()); let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); // Cases that should fail // Check string without "." (no controller) let mut args = arg_parser.arguments().clone(); let invalid_cgroup_arg_vals = ArgVals { cgroups: vec!["cpusetcpus=2"], ..good_arg_vals.clone() }; args.parse(&make_args(&invalid_cgroup_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); // Check empty string let mut args = arg_parser.arguments().clone(); let invalid_cgroup_arg_vals = ArgVals { cgroups: vec![""], ..good_arg_vals.clone() }; args.parse(&make_args(&invalid_cgroup_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); // Check valid file empty value let mut args = arg_parser.arguments().clone(); let invalid_cgroup_arg_vals = ArgVals { cgroups: vec!["cpuset.cpus="], ..good_arg_vals.clone() }; args.parse(&make_args(&invalid_cgroup_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); // Check valid file no value let mut args = arg_parser.arguments().clone(); let invalid_cgroup_arg_vals = ArgVals { cgroups: vec!["cpuset.cpus"], ..good_arg_vals.clone() }; args.parse(&make_args(&invalid_cgroup_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap_err(); // Cases that should succeed // Check value with special characters (',', '.', '-') let mut args = arg_parser.arguments().clone(); let invalid_cgroup_arg_vals = ArgVals { cgroups: vec!["cpuset.cpus=2-4,5.3"], ..good_arg_vals.clone() }; args.parse(&make_args(&invalid_cgroup_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap(); // Check valid case let mut args = arg_parser.arguments().clone(); let invalid_cgroup_arg_vals = ArgVals { cgroups: vec!["cpuset.cpus=2"], ..good_arg_vals.clone() }; args.parse(&make_args(&invalid_cgroup_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap(); // Check file with multiple "." let mut args = arg_parser.arguments().clone(); let invalid_cgroup_arg_vals = ArgVals { cgroups: vec!["memory.swap.high=2"], ..good_arg_vals.clone() }; args.parse(&make_args(&invalid_cgroup_arg_vals)).unwrap(); Env::new(&args, 0, 0, mock_cgroups.proc_mounts_path.to_str().unwrap()).unwrap(); } #[test] fn test_parse_resource_limits() { let mut resource_limits = ResourceLimits::default(); // Cases that should fail // Check invalid formats let invalid_formats = ["", "foo"]; for format in invalid_formats.iter() { let arg = vec![format.to_string()]; assert_eq!( format!( "{:?}", Env::parse_resource_limits(&mut resource_limits, &arg) .err() .unwrap() ), format!("{:?}", JailerError::ResLimitFormat(format.to_string())) ); } // Check invalid resource arguments let invalid_resources = ["foo", "", " "]; for res in invalid_resources.iter() { let arg = format!("{}=2", res); assert_eq!( format!( "{:?}", Env::parse_resource_limits(&mut resource_limits, &[arg]) .err() .unwrap() ), format!("{:?}", JailerError::ResLimitArgument(res.to_string())) ); } // Check invalid limit values let invalid_values = ["foo", "2.3", "2-3", " "]; for val in invalid_values.iter() { let arg = format!("fsize={}", val); assert_eq!( format!( "{:?}", Env::parse_resource_limits(&mut resource_limits, &[arg]) .err() .unwrap() ), format!( "{:?}", JailerError::ResLimitValue( val.to_string(), "invalid digit found in string".to_string() ) ) ); } // Check valid cases let resources = [FSIZE_ARG, NO_FILE_ARG]; for resource in resources.iter() { let arg = vec![resource.to_string() + "=4098"]; Env::parse_resource_limits(&mut resource_limits, &arg).unwrap(); } } #[test] #[cfg(target_arch = "aarch64")] fn test_copy_cache_info() { let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); let env = create_env(&mock_cgroups.proc_mounts_path); // Create the required chroot dir hierarchy. fs::create_dir_all(env.chroot_dir()).expect("Could not create dir hierarchy."); env.copy_cache_info().unwrap(); // Make sure that the needed files truly exist. const JAILER_CACHE_INFO: &str = "sys/devices/system/cpu/cpu0/cache"; let dest_path = env.chroot_dir.join(JAILER_CACHE_INFO); fs::metadata(&dest_path).unwrap(); let index_dest_path = dest_path.join("index0"); fs::metadata(&index_dest_path).unwrap(); let entries = fs::read_dir(&index_dest_path).unwrap(); assert_eq!(entries.enumerate().count(), 6); } #[test] fn test_save_exec_file_pid() { let exec_file_name = "file"; let pid_file_name = "file.pid"; let pid = 1; let mut mock_cgroups = MockCgroupFs::new().unwrap(); mock_cgroups.add_v1_mounts().unwrap(); let mut env = create_env(&mock_cgroups.proc_mounts_path); env.save_exec_file_pid(pid, PathBuf::from(exec_file_name)) .unwrap(); let stored_pid = fs::read_to_string(pid_file_name); fs::remove_file(pid_file_name).unwrap(); assert_eq!(stored_pid.unwrap(), "1"); } } ================================================ FILE: src/jailer/src/main.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::ffi::{CString, NulError, OsString}; use std::fmt::{Debug, Display}; use std::path::{Path, PathBuf}; use std::{env as p_env, fs, io}; use env::PROC_MOUNTS; use utils::arg_parser::{ArgParser, Argument, UtilsArgParserError as ParsingError}; use utils::time::{ClockType, get_time_us}; use utils::validators; use vmm_sys_util::syscall::SyscallReturnCode; use crate::env::Env; mod cgroup; mod chroot; mod env; mod resource_limits; const JAILER_VERSION: &str = env!("CARGO_PKG_VERSION"); #[derive(Debug, thiserror::Error)] pub enum JailerError { #[error("Failed to parse arguments: {0}")] ArgumentParsing(ParsingError), #[error("{}", format!("Failed to canonicalize path {:?}: {}", .0, .1).replace('\"', ""))] Canonicalize(PathBuf, io::Error), #[error("{}", format!("Failed to inherit cgroups configurations from file {} in path {:?}", .1, .0).replace('\"', ""))] CgroupInheritFromParent(PathBuf, String), #[error("{1} configurations not found in {0}")] CgroupLineNotFound(String, String), #[error("Cgroup invalid file: {0}")] CgroupInvalidFile(String), #[error("Invalid format for cgroups: {0}")] CgroupFormat(String), #[error("Hierarchy not found: {0}")] CgroupHierarchyMissing(String), #[error("Controller {0} is unavailable")] CgroupControllerUnavailable(String), #[error("{0} is an invalid cgroup version specifier")] CgroupInvalidVersion(String), #[error("Parent cgroup path is invalid. Path should not be absolute or contain '..' or '.'")] CgroupInvalidParentPath(), #[error( "Failed to move process to cgroup ({0}): {1}.\nHint: If you intended to create a child \ cgroup under {0}, pass any --cgroup parameters." )] CgroupMove(PathBuf, io::Error), #[error("Failed to change owner for {0}: {1}")] ChangeFileOwner(PathBuf, io::Error), #[error("Failed to chdir into chroot directory: {0}")] ChdirNewRoot(io::Error), #[error("Failed to change permissions on {0}: {1}")] Chmod(PathBuf, io::Error), #[error("Failed cloning into a new child process: {0}")] Clone(io::Error), #[error("Failed to close netns fd: {0}")] CloseNetNsFd(io::Error), #[error("Failed to close /dev/null fd: {0}")] CloseDevNullFd(io::Error), #[error("Failed to call close range syscall: {0}")] CloseRange(io::Error), #[error("{}", format!("Failed to copy {:?} to {:?}: {}", .0, .1, .2).replace('\"', ""))] Copy(PathBuf, PathBuf, io::Error), #[error("{}", format!("Failed to create directory {:?}: {}", .0, .1).replace('\"', ""))] CreateDir(PathBuf, io::Error), #[error("Encountered interior \\0 while parsing a string")] CStringParsing(NulError), #[error("Failed to daemonize: {0}")] Daemonize(io::Error), #[error("Failed to open directory {0}: {1}")] DirOpen(String, String), #[error("Failed to duplicate fd: {0}")] Dup2(io::Error), #[error("Failed to exec into Firecracker: {0}")] Exec(io::Error), #[error("{}", format!("Failed to extract filename from path {:?}", .0).replace('\"', ""))] ExtractFileName(PathBuf), #[error("{}", format!("Failed to open file {:?}: {}", .0, .1).replace('\"', ""))] FileOpen(PathBuf, io::Error), #[error("Failed to decode string from byte array: {0}")] FromBytesWithNul(std::ffi::FromBytesWithNulError), #[error("Failed to get flags from fd: {0}")] GetOldFdFlags(io::Error), #[error("Failed to get PID (getpid): {0}")] GetPid(io::Error), #[error("Failed to get SID (getsid): {0}")] GetSid(io::Error), #[error("Invalid gid: {0}")] Gid(String), #[error("Detected hard link at: {0}")] HardLink(PathBuf), #[error("Invalid instance ID: {0}")] InvalidInstanceId(validators::ValidatorError), #[error("Cannot get metadata for a file: {0}: {1}")] Metadata(PathBuf, io::Error), #[error("{}", format!("File {:?} doesn't have a parent", .0).replace('\"', ""))] MissingParent(PathBuf), #[error("Failed to create the jail root directory before pivoting root: {0}")] MkdirOldRoot(io::Error), #[error("Failed to create {1} via mknod inside the jail: {0}")] MknodDev(io::Error, String), #[error("Failed to bind mount the jail root directory: {0}")] MountBind(io::Error), #[error("Failed to change the propagation type to slave: {0}")] MountPropagationSlave(io::Error), #[error("{}", format!("{:?} is not a file", .0).replace('\"', ""))] NotAFile(PathBuf), #[error("{}", format!("{:?} is not a directory", .0).replace('\"', ""))] NotADirectory(PathBuf), #[error("Failed to open {0}: {1}")] Open(PathBuf, io::Error), #[error("{}", format!("Failed to parse path {:?} into an OsString", .0).replace('\"', ""))] OsStringParsing(PathBuf, OsString), #[error("Failed to pivot root: {0}")] PivotRoot(io::Error), #[error("{}", format!("Failed to read line from {:?}: {}", .0, .1).replace('\"', ""))] ReadLine(PathBuf, io::Error), #[error("{}", format!("Failed to read file {:?} into a string: {}", .0, .1).replace('\"', ""))] ReadToString(PathBuf, io::Error), #[error("Regex failed: {0}")] RegEx(regex::Error), #[error("Invalid resource argument: {0}")] ResLimitArgument(String), #[error("Invalid format for resources limits: {0}")] ResLimitFormat(String), #[error("Invalid limit value for resource: {0}: {1}")] ResLimitValue(String, String), #[error("Failed to remove old jail root directory: {0}")] RmOldRootDir(io::Error), #[error("Failed to change current directory: {0}")] SetCurrentDir(io::Error), #[error("Failed to join network namespace: netns: {0}")] SetNetNs(io::Error), #[error("Failed to set limit for resource: {0}")] Setrlimit(String), #[error("Failed to daemonize: setsid: {0}")] SetSid(io::Error), #[error("Invalid uid: {0}")] Uid(String), #[error("Failed to unmount the old jail root: {0}")] UmountOldRoot(io::Error), #[error("Unexpected value for the socket listener fd: {0}")] UnexpectedListenerFd(i32), #[error("Failed to unshare into new mount namespace: {0}")] UnshareNewNs(io::Error), #[error("Failed to unset the O_CLOEXEC flag on the socket fd: {0}")] UnsetCloexec(io::Error), #[error("Slice contains invalid UTF-8 data : {0}")] UTF8Parsing(std::str::Utf8Error), #[error("{}", format!("Failed to write to {:?}: {}", .0, .1).replace('\"', ""))] Write(PathBuf, io::Error), } /// Create an ArgParser object which contains info about the command line argument parser and /// populate it with the expected arguments and their characteristics. pub fn build_arg_parser() -> ArgParser<'static> { ArgParser::new() .arg( Argument::new("id") .required(true) .takes_value(true) .help("Jail ID."), ) .arg( Argument::new("exec-file") .required(true) .takes_value(true) .help("File path to exec into."), ) .arg( Argument::new("uid") .required(true) .takes_value(true) .help("The user identifier the jailer switches to after exec."), ) .arg( Argument::new("gid") .required(true) .takes_value(true) .help("The group identifier the jailer switches to after exec."), ) .arg( Argument::new("chroot-base-dir") .takes_value(true) .default_value("/srv/jailer") .help("The base folder where chroot jails are located."), ) .arg( Argument::new("netns") .takes_value(true) .help("Path to the network namespace this microVM should join."), ) .arg(Argument::new("daemonize").takes_value(false).help( "Daemonize the jailer before exec, by invoking setsid(), and redirecting the standard \ I/O file descriptors to /dev/null.", )) .arg( Argument::new("new-pid-ns") .takes_value(false) .help("Exec into a new PID namespace."), ) .arg(Argument::new("cgroup").allow_multiple(true).help( "Cgroup and value to be set by the jailer. It must follow this format: \ = (e.g cpu.shares=10). This argument can be used multiple times \ to add multiple cgroups.", )) .arg(Argument::new("resource-limit").allow_multiple(true).help( "Resource limit values to be set by the jailer. It must follow this format: \ = (e.g no-file=1024). This argument can be used multiple times to \ add multiple resource limits. Current available resource values are:\n\t\tfsize: The \ maximum size in bytes for files created by the process.\n\t\tno-file: Specifies a \ value one greater than the maximum file descriptor number that can be opened by this \ process.", )) .arg( Argument::new("cgroup-version") .takes_value(true) .default_value("1") .help("Select the cgroup version used by the jailer."), ) .arg( Argument::new("parent-cgroup") .takes_value(true) .help("Parent cgroup in which the cgroup of this microvm will be placed."), ) .arg( Argument::new("version") .takes_value(false) .help("Print the binary version number."), ) } // It's called writeln_special because we have to use this rather convoluted way of writing // to special cgroup files, to avoid getting errors. It would be nice to know why that happens :-s pub fn writeln_special(file_path: &T, value: V) -> Result<(), JailerError> where T: AsRef + Debug, V: Display + Debug, { fs::write(file_path, format!("{}\n", value)) .map_err(|err| JailerError::Write(PathBuf::from(file_path.as_ref()), err)) } pub fn readln_special + Debug>(file_path: &T) -> Result { let mut line = fs::read_to_string(file_path) .map_err(|err| JailerError::ReadToString(PathBuf::from(file_path.as_ref()), err))?; // Remove the newline character at the end (if any). line.pop(); Ok(line) } fn close_fds_by_close_range() -> Result<(), JailerError> { // First try using the close_range syscall to close all open FDs in the range of 3..UINT_MAX // SAFETY: if the syscall is not available then ENOSYS will be returned SyscallReturnCode(unsafe { libc::syscall( libc::SYS_close_range, 3, libc::c_uint::MAX, libc::CLOSE_RANGE_UNSHARE, ) }) .into_empty_result() .map_err(JailerError::CloseRange) } // Closes all FDs other than 0 (STDIN), 1 (STDOUT) and 2 (STDERR) fn close_inherited_fds() -> Result<(), JailerError> { // We use the close_range syscall which is available on kernels > 5.9. close_fds_by_close_range()?; Ok(()) } fn sanitize_process() -> Result<(), JailerError> { // First thing to do is make sure we don't keep any inherited FDs // other that IN, OUT and ERR. close_inherited_fds()?; // Cleanup environment variables. clean_env_vars(); Ok(()) } fn clean_env_vars() { // Remove environment variables received from // the parent process so there are no leaks // inside the jailer environment for (key, _) in p_env::vars() { // SAFETY: the function is safe to call in a single-threaded program unsafe { p_env::remove_var(key); } } } /// Turns an [`AsRef`] into a [`CString`] (c style string). /// The expect should not fail, since Linux paths only contain valid Unicode chars (do they?), /// and do not contain null bytes (do they?). pub fn to_cstring + Debug>(path: T) -> Result { let path_str = path .as_ref() .to_path_buf() .into_os_string() .into_string() .map_err(|err| JailerError::OsStringParsing(path.as_ref().to_path_buf(), err))?; CString::new(path_str).map_err(JailerError::CStringParsing) } /// We wrap the actual main in order to pretty print an error with Display trait. fn main() -> Result<(), JailerError> { let result = main_exec(); if let Err(e) = result { eprintln!("{}", e); Err(e) } else { Ok(()) } } fn main_exec() -> Result<(), JailerError> { sanitize_process() .unwrap_or_else(|err| panic!("Failed to sanitize the Jailer process: {}", err)); let mut arg_parser = build_arg_parser(); arg_parser .parse_from_cmdline() .map_err(JailerError::ArgumentParsing)?; let arguments = arg_parser.arguments(); if arguments.flag_present("help") { println!("Jailer v{}\n", JAILER_VERSION); println!("{}\n", arg_parser.formatted_help()); println!("Any arguments after the -- separator will be supplied to the jailed binary.\n"); return Ok(()); } if arguments.flag_present("version") { println!("Jailer v{}\n", JAILER_VERSION); return Ok(()); } Env::new( arguments, get_time_us(ClockType::Monotonic), get_time_us(ClockType::ProcessCpu), PROC_MOUNTS, ) .and_then(|env| { fs::create_dir_all(env.chroot_dir()) .map_err(|err| JailerError::CreateDir(env.chroot_dir().to_owned(), err))?; env.run() })?; Ok(()) } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::env; use std::ffi::CStr; use std::fs::File; use std::os::unix::io::IntoRawFd; use vmm_sys_util::rand; use super::*; fn run_close_fds_test(test_fn: fn() -> Result<(), JailerError>) { let n = 100; let tmp_dir_path = format!( "/tmp/jailer/tests/close_fds/_{}", rand::rand_alphanumerics(4).into_string().unwrap() ); fs::create_dir_all(&tmp_dir_path).unwrap(); let mut fds = Vec::new(); for i in 0..n { let maybe_file = File::create(format!("{}/{}", &tmp_dir_path, i)); fds.push(maybe_file.unwrap().into_raw_fd()); } test_fn().unwrap(); for fd in fds { let is_fd_opened = unsafe { libc::fcntl(fd, libc::F_GETFD) } == 0; assert!(!is_fd_opened); } fs::remove_dir_all(tmp_dir_path).unwrap(); } #[test] fn test_fds_close_range() { // SAFETY: Always safe let mut n = unsafe { std::mem::zeroed() }; // SAFETY: We check if the uname call succeeded assert_eq!(unsafe { libc::uname(&mut n) }, 0); // SAFETY: Always safe let release = unsafe { CStr::from_ptr(n.release.as_ptr()) } .to_string_lossy() .into_owned(); // Parse the major and minor version of the kernel let mut r = release.split('.'); let major: i32 = str::parse(r.next().unwrap()).unwrap(); let minor: i32 = str::parse(r.next().unwrap()).unwrap(); // Skip this test if we're running on a too old kernel if major > 5 || (major == 5 && minor >= 9) { run_close_fds_test(close_fds_by_close_range); } } #[test] fn test_sanitize_process() { run_close_fds_test(sanitize_process); } #[test] fn test_clean_env_vars() { let env_vars: [&str; 5] = ["VAR1", "VAR2", "VAR3", "VAR4", "VAR5"]; // Set environment variables for env_var in env_vars.iter() { // SAFETY: the function is safe to call in a single-threaded program unsafe { env::set_var(env_var, "0"); } } // Cleanup the environment clean_env_vars(); // Assert that the variables set beforehand // do not exist anymore for env_var in env_vars.iter() { assert_eq!(env::var_os(env_var), None); } } #[test] fn test_to_cstring() { let path = Path::new("some_path"); let cstring_path = to_cstring(path).unwrap(); assert_eq!(cstring_path, CString::new("some_path").unwrap()); let path_with_nul = Path::new("some_path\0"); assert_eq!( format!("{}", to_cstring(path_with_nul).unwrap_err()), "Encountered interior \\0 while parsing a string" ); } } ================================================ FILE: src/jailer/src/resource_limits.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt; use std::fmt::{Display, Formatter}; use vmm_sys_util::syscall::SyscallReturnCode; use super::JailerError; // Default limit for the maximum number of file descriptors open at a time. const NO_FILE: u64 = 2048; // File size resource argument name. pub(crate) const FSIZE_ARG: &str = "fsize"; // Number of files resource argument name. pub(crate) const NO_FILE_ARG: &str = "no-file"; #[derive(Debug, Clone, Copy)] pub enum Resource { // Size of created files. RlimitFsize, // Number of open file descriptors. RlimitNoFile, } impl From for u32 { fn from(resource: Resource) -> u32 { match resource { #[allow(clippy::unnecessary_cast)] #[allow(clippy::cast_possible_wrap)] // Definition of libc::RLIMIT_FSIZE depends on the target_env: // * when equals to "musl" -> libc::RLIMIT_FSIZE is a c_int (which is an i32) // * when equals to "gnu" -> libc::RLIMIT_FSIZE is __rlimit_resource_t which is a // c_uint (which is an u32) Resource::RlimitFsize => libc::RLIMIT_FSIZE as u32, #[allow(clippy::unnecessary_cast)] #[allow(clippy::cast_possible_wrap)] // Definition of libc::RLIMIT_NOFILE depends on the target_env: // * when equals to "musl" -> libc::RLIMIT_NOFILE is a c_int (which is an i32) // * when equals to "gnu" -> libc::RLIMIT_NOFILE is __rlimit_resource_t which is a // c_uint (which is an u32) Resource::RlimitNoFile => libc::RLIMIT_NOFILE as u32, } } } impl From for i32 { fn from(resource: Resource) -> i32 { match resource { #[allow(clippy::unnecessary_cast)] #[allow(clippy::cast_possible_wrap)] // Definition of libc::RLIMIT_FSIZE depends on the target_env: // * when equals to "musl" -> libc::RLIMIT_FSIZE is a c_int (which is an i32) // * when equals to "gnu" -> libc::RLIMIT_FSIZE is __rlimit_resource_t which is a // c_uint (which is an u32) Resource::RlimitFsize => libc::RLIMIT_FSIZE as i32, #[allow(clippy::unnecessary_cast)] #[allow(clippy::cast_possible_wrap)] // Definition of libc::RLIMIT_NOFILE depends on the target_env: // * when equals to "musl" -> libc::RLIMIT_NOFILE is a c_int (which is an i32) // * when equals to "gnu" -> libc::RLIMIT_NOFILE is __rlimit_resource_t which is a // c_uint (which is an u32) Resource::RlimitNoFile => libc::RLIMIT_NOFILE as i32, } } } impl Display for Resource { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { Resource::RlimitFsize => write!(f, "size of file"), Resource::RlimitNoFile => write!(f, "number of file descriptors"), } } } #[derive(Clone, Copy, Debug)] pub struct ResourceLimits { file_size: Option, no_file: u64, } impl Default for ResourceLimits { fn default() -> Self { ResourceLimits { file_size: None, no_file: NO_FILE, } } } impl ResourceLimits { pub fn install(self) -> Result<(), JailerError> { if let Some(file_size) = self.file_size { // Set file size limit. ResourceLimits::set_limit(Resource::RlimitFsize, file_size)?; } // Set limit on number of file descriptors. ResourceLimits::set_limit(Resource::RlimitNoFile, self.no_file)?; Ok(()) } fn set_limit(resource: Resource, target: libc::rlim_t) -> Result<(), JailerError> { let rlim: libc::rlimit = libc::rlimit { rlim_cur: target, rlim_max: target, }; // SAFETY: Safe because `resource` is a known-valid constant, and `&rlim` // is non-dangling. SyscallReturnCode(unsafe { libc::setrlimit(resource.into(), &rlim) }) .into_empty_result() .map_err(|_| JailerError::Setrlimit(resource.to_string())) } pub fn set_file_size(&mut self, file_size: u64) { self.file_size = Some(file_size); } pub fn set_no_file(&mut self, no_file: u64) { self.no_file = no_file; } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use super::*; #[test] #[allow(clippy::unnecessary_cast)] fn test_from_resource() { assert_eq!(u32::from(Resource::RlimitFsize), libc::RLIMIT_FSIZE as u32); assert_eq!( u32::from(Resource::RlimitNoFile), libc::RLIMIT_NOFILE as u32 ); } #[test] fn test_display_resource() { assert_eq!( Resource::RlimitFsize.to_string(), "size of file".to_string() ); assert_eq!( Resource::RlimitNoFile.to_string(), "number of file descriptors".to_string() ); } #[test] fn test_default_resource_limits() { let mut rlimits = ResourceLimits::default(); assert!(rlimits.file_size.is_none()); assert_eq!(rlimits.no_file, NO_FILE); rlimits.set_file_size(1); assert_eq!(rlimits.file_size.unwrap(), 1); rlimits.set_no_file(1); assert_eq!(rlimits.no_file, 1); } #[test] fn test_set_resource_limits() { let resource = Resource::RlimitNoFile; let new_limit = NO_FILE - 1; // Get current file size limit. let mut rlim: libc::rlimit = libc::rlimit { rlim_cur: 0, rlim_max: 0, }; unsafe { libc::getrlimit(resource.into(), &mut rlim) }; assert_ne!(rlim.rlim_cur, new_limit); assert_ne!(rlim.rlim_max, new_limit); // Set new file size limit. ResourceLimits::set_limit(resource, new_limit).unwrap(); // Verify new limit. let mut rlim: libc::rlimit = libc::rlimit { rlim_cur: 0, rlim_max: 0, }; unsafe { libc::getrlimit(resource.into(), &mut rlim) }; assert_eq!(rlim.rlim_cur, new_limit); assert_eq!(rlim.rlim_max, new_limit); } #[test] fn test_install() { // Setup the resource limits let mut rlimits = ResourceLimits::default(); let new_file_size_limit = 2097151; let new_no_file_limit = 1000; rlimits.set_file_size(new_file_size_limit); rlimits.set_no_file(new_no_file_limit); // Install the new limits to file size and // the number of file descriptors rlimits.install().unwrap(); // Verify the new limit for file size let file_size_resource = Resource::RlimitFsize; let mut file_size_limit: libc::rlimit = libc::rlimit { rlim_cur: 0, rlim_max: 0, }; unsafe { libc::getrlimit(file_size_resource.into(), &mut file_size_limit) }; assert_eq!(file_size_limit.rlim_cur, new_file_size_limit); assert_eq!(file_size_limit.rlim_max, new_file_size_limit); // Verify the new limit for the number of file descriptors let file_descriptor_resource = Resource::RlimitNoFile; let mut file_descriptor_limit: libc::rlimit = libc::rlimit { rlim_cur: 0, rlim_max: 0, }; unsafe { libc::getrlimit(file_descriptor_resource.into(), &mut file_descriptor_limit) }; assert_eq!(file_descriptor_limit.rlim_cur, new_no_file_limit); assert_eq!(file_descriptor_limit.rlim_max, new_no_file_limit); } } ================================================ FILE: src/log-instrument/Cargo.toml ================================================ [package] name = "log-instrument" version = "0.3.0" authors = ["Amazon Firecracker team "] edition = "2024" description = "Offers an attribute procedural macro that adds `log::trace!` events at the start and end of attributed functions." license = "Apache-2.0" [lib] bench = false [dependencies] log = "0.4.29" log-instrument-macros = { path = "../log-instrument-macros" } [dev-dependencies] env_logger = "0.11.9" [[example]] name = "one" [[example]] name = "two" [[example]] name = "three" [[example]] name = "four" [[example]] name = "five" [[example]] name = "six" [lints] workspace = true ================================================ FILE: src/log-instrument/README.md ================================================ # log-instrument Offers an attribute procedural macro that adds [`log::trace!`](https://docs.rs/log/latest/log/macro.trace.html) events at the start and end of attributed functions. ## Example ```rust use log::*; fn main() { env_logger::builder() .filter_level(LevelFilter::Trace) .init(); info!("{}", one(2)); info!("{}", one(3)); info!("{}", one(4)); } #[log_instrument::instrument] fn one(x: u32) -> u32 { let cmp = x == 2; debug!("cmp: {cmp}"); if cmp { return 4; } two(x + 3) } #[log_instrument::instrument] fn two(x: u32) -> u32 { let res = x % 2; debug!("res: {res}"); res } ``` Outputs: ``` [2023-10-12T16:38:00Z TRACE log_instrument] ThreadId(1)>>one [2023-10-12T16:38:00Z DEBUG six] cmp: true [2023-10-12T16:38:00Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:38:00Z DEBUG six] cmp: false [2023-10-12T16:38:00Z TRACE log_instrument] ThreadId(1)::one>>two [2023-10-12T16:38:00Z DEBUG six] res: 0 [2023-10-12T16:38:00Z TRACE log_instrument] ThreadId(1)::one<>one [2023-10-12T16:38:00Z DEBUG six] cmp: false [2023-10-12T16:38:00Z TRACE log_instrument] ThreadId(1)::one>>two [2023-10-12T16:38:00Z DEBUG six] res: 1 [2023-10-12T16:38:00Z TRACE log_instrument] ThreadId(1)::one<>); impl MyStruct { #[log_instrument::instrument] fn one(&mut self) -> Option<&mut [String]> { const SOMETHING: u32 = 23; match &mut self.0 { Some(y) => { debug!("{y:?}"); debug!("{SOMETHING}"); Some(y) } _ => None, } } } ================================================ FILE: src/log-instrument/examples/four.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use log::*; fn main() { env_logger::builder() .filter_level(LevelFilter::Trace) .init(); let mut my_struct = MyStruct(None); info!("{:?}", my_struct.one()); let mut my_struct = MyStruct(Some(vec![String::from("a"), String::from("b")])); info!("{:?}", my_struct.one()); } struct MyStruct(Option>); impl MyStruct { #[log_instrument::instrument] fn one(&mut self) -> Option<&mut [String]> { match &mut self.0 { Some(y) => { debug!("{y:?}"); Some(y) } _ => None, } } } ================================================ FILE: src/log-instrument/examples/one.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use log::*; fn main() { env_logger::builder() .filter_level(LevelFilter::Trace) .init(); info!("{}", one(2)); info!("{}", one(3)); info!("{}", one(4)); } #[log_instrument::instrument] fn one(x: u32) -> u32 { let cmp = x == 2; debug!("cmp: {cmp}"); if cmp { return 4; } x + 3 } ================================================ FILE: src/log-instrument/examples/six.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use log::*; fn main() { env_logger::builder() .filter_level(LevelFilter::Trace) .init(); info!("{}", one(2)); info!("{}", one(3)); info!("{}", one(4)); } #[log_instrument::instrument] fn one(x: u32) -> u32 { let cmp = x == 2; debug!("cmp: {cmp}"); if cmp { return 4; } two(x + 3) } #[log_instrument::instrument] fn two(x: u32) -> u32 { let res = x % 2; debug!("res: {res}"); res } ================================================ FILE: src/log-instrument/examples/three.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use log::*; fn main() { env_logger::builder() .filter_level(LevelFilter::Trace) .init(); info!("{:?}", one(&mut None)); info!( "{:?}", one(&mut Some(vec![String::from("a"), String::from("b")])) ); } #[log_instrument::instrument] fn one(x: &mut Option>) -> Option<&mut [String]> { match x { Some(y) => { debug!("{y:?}"); Some(y) } _ => None, } } ================================================ FILE: src/log-instrument/examples/two.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use log::*; fn main() { env_logger::builder() .filter_level(LevelFilter::Trace) .init(); info!("{:?}", one(&None)); info!( "{:?}", one(&Some(vec![String::from("a"), String::from("b")])) ); } #[log_instrument::instrument] fn one(x: &Option>) -> Option<&[String]> { match x { Some(y) => { debug!("{y:?}"); Some(y) } _ => None, } } ================================================ FILE: src/log-instrument/src/lib.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::HashMap; use std::fmt::Write; use std::sync::{Mutex, OnceLock}; pub use log_instrument_macros::*; type InnerPath = Mutex>>; static PATH: OnceLock = OnceLock::new(); fn path() -> &'static InnerPath { PATH.get_or_init(InnerPath::default) } #[allow(missing_debug_implementations)] pub struct __Instrument; impl __Instrument { pub fn new(s: &'static str) -> __Instrument { // Get log let mut guard = path().lock().unwrap(); let id = std::thread::current().id(); let prefix = if let Some(spans) = guard.get_mut(&id) { let out = spans.iter().fold(String::new(), |mut s, x| { let _ = write!(s, "::{x}"); s }); spans.push(s); out } else { guard.insert(id, vec![s]); String::new() }; // Write log log::trace!("{id:?}{prefix}>>{s}"); // Return exit struct __Instrument } } impl std::ops::Drop for __Instrument { fn drop(&mut self) { // Get log let mut guard = path().lock().unwrap(); let id = std::thread::current().id(); let spans = guard.get_mut(&id).unwrap(); let s = spans.pop().unwrap(); let out = spans.iter().fold(String::new(), |mut s, x| { let _ = write!(s, "::{x}"); s }); log::trace!("{id:?}{out}<<{s}"); } } ================================================ FILE: src/log-instrument-macros/Cargo.toml ================================================ [package] name = "log-instrument-macros" version = "0.1.0" authors = ["Amazon Firecracker team "] edition = "2024" description = "Offers an attribute procedural macro that adds `log::trace!` events at the start and end of attributed functions." license = "Apache-2.0" [lib] proc-macro = true bench = false [dependencies] proc-macro2 = "1.0.106" quote = "1.0.45" syn = { version = "2.0.117", features = ["full", "extra-traits"] } [lints] workspace = true ================================================ FILE: src/log-instrument-macros/src/lib.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![warn(clippy::pedantic)] extern crate proc_macro; use quote::quote; use syn::parse_quote; /// Adds `log::trace!` events at the start and end of an attributed function. /// /// # Panics /// /// When applied to anything other than a function. #[proc_macro_attribute] pub fn instrument( _attr: proc_macro::TokenStream, item: proc_macro::TokenStream, ) -> proc_macro::TokenStream { let input = syn::parse_macro_input!(item as syn::Item); let syn::Item::Fn(mut item_fn) = input else { panic!("Instrument macro can only be on functions.") }; let clippy_attr: syn::Attribute = parse_quote! { #[allow(clippy::items_after_statements)] }; item_fn.attrs.push(clippy_attr); let item_fn_ident = item_fn.sig.ident.to_string(); let new_stmt: syn::Stmt = parse_quote! { let __ = log_instrument::__Instrument::new(#item_fn_ident); }; item_fn.block.stmts.insert(0, new_stmt); let out = quote! { #item_fn }; proc_macro::TokenStream::from(out) } ================================================ FILE: src/pci/Cargo.toml ================================================ [package] authors = ["Samuel Ortiz "] edition = "2021" name = "pci" version = "0.1.0" license = "Apache-2.0 AND BSD-3-Clause" [lib] bench = false [features] default = [] [dependencies] displaydoc = "0.2.5" libc = "0.2.183" log = "0.4.29" serde = { version = "1.0.228", features = ["derive"] } thiserror = "2.0.18" [dev-dependencies] serde_test = "1.0.177" vmm-sys-util = "0.15.0" ================================================ FILE: src/pci/src/lib.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright 2018 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause //! Implements pci devices and busses. extern crate log; use std::fmt::{self, Debug, Display}; use std::num::ParseIntError; use std::str::FromStr; use serde::de::Visitor; use serde::{Deserialize, Serialize}; /// PCI has four interrupt pins A->D. #[derive(Copy, Clone)] pub enum PciInterruptPin { IntA, IntB, IntC, IntD, } impl PciInterruptPin { pub fn to_mask(self) -> u32 { self as u32 } } #[derive(Clone, Copy, PartialEq, Eq, PartialOrd)] pub struct PciBdf(u32); struct PciBdfVisitor; impl Visitor<'_> for PciBdfVisitor { type Value = PciBdf; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { formatter.write_str("struct PciBdf") } fn visit_str(self, v: &str) -> Result where E: serde::de::Error, { PciBdf::from_str(v).map_err(serde::de::Error::custom) } } impl<'de> serde::Deserialize<'de> for PciBdf { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { deserializer.deserialize_str(PciBdfVisitor) } } impl serde::Serialize for PciBdf { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { serializer.collect_str(&self.to_string()) } } impl PciBdf { pub fn segment(&self) -> u16 { ((self.0 >> 16) & 0xffff) as u16 } pub fn bus(&self) -> u8 { ((self.0 >> 8) & 0xff) as u8 } pub fn device(&self) -> u8 { ((self.0 >> 3) & 0x1f) as u8 } pub fn function(&self) -> u8 { (self.0 & 0x7) as u8 } pub fn new(segment: u16, bus: u8, device: u8, function: u8) -> Self { Self( ((segment as u32) << 16) | ((bus as u32) << 8) | (((device & 0x1f) as u32) << 3) | (function & 0x7) as u32, ) } } impl From for PciBdf { fn from(bdf: u32) -> Self { Self(bdf) } } impl From for u32 { fn from(bdf: PciBdf) -> Self { bdf.0 } } impl From<&PciBdf> for u32 { fn from(bdf: &PciBdf) -> Self { bdf.0 } } impl From for u16 { fn from(bdf: PciBdf) -> Self { (bdf.0 & 0xffff) as u16 } } impl From<&PciBdf> for u16 { fn from(bdf: &PciBdf) -> Self { (bdf.0 & 0xffff) as u16 } } impl Debug for PciBdf { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "{:04x}:{:02x}:{:02x}.{:01x}", self.segment(), self.bus(), self.device(), self.function() ) } } impl Display for PciBdf { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, "{:04x}:{:02x}:{:02x}.{:01x}", self.segment(), self.bus(), self.device(), self.function() ) } } /// Errors associated with parsing a BDF string. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum PciBdfParseError { /// Unable to parse bus/device/function number hex: {0} InvalidHex(#[from] ParseIntError), /// Invalid format: {0} (expected format: 0000:00:00.0) InvalidFormat(String), } impl FromStr for PciBdf { type Err = PciBdfParseError; fn from_str(s: &str) -> Result { let items: Vec<&str> = s.split('.').collect(); if items.len() != 2 { return Err(PciBdfParseError::InvalidFormat(s.to_string())); } let function = u8::from_str_radix(items[1], 16)?; let items: Vec<&str> = items[0].split(':').collect(); if items.len() != 3 { return Err(PciBdfParseError::InvalidFormat(s.to_string())); } let segment = u16::from_str_radix(items[0], 16)?; let bus = u8::from_str_radix(items[1], 16)?; let device = u8::from_str_radix(items[2], 16)?; Ok(PciBdf::new(segment, bus, device, function)) } } /// Represents the types of PCI headers allowed in the configuration registers. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum PciHeaderType { Device, Bridge, } /// Classes of PCI nodes. #[allow(dead_code)] #[derive(Copy, Clone)] pub enum PciClassCode { TooOld, MassStorage, NetworkController, DisplayController, MultimediaController, MemoryController, BridgeDevice, SimpleCommunicationController, BaseSystemPeripheral, InputDevice, DockingStation, Processor, SerialBusController, WirelessController, IntelligentIoController, EncryptionController, DataAcquisitionSignalProcessing, Other = 0xff, } impl PciClassCode { pub fn get_register_value(self) -> u8 { self as u8 } } /// A PCI subclass. Each class in `PciClassCode` can specify a unique set of subclasses. This trait /// is implemented by each subclass. It allows use of a trait object to generate configurations. pub trait PciSubclass { /// Convert this subclass to the value used in the PCI specification. fn get_register_value(&self) -> u8; } /// Subclasses of the MultimediaController class. #[allow(dead_code)] #[derive(Copy, Clone)] pub enum PciMultimediaSubclass { VideoController = 0x00, AudioController = 0x01, TelephonyDevice = 0x02, AudioDevice = 0x03, Other = 0x80, } impl PciSubclass for PciMultimediaSubclass { fn get_register_value(&self) -> u8 { *self as u8 } } /// Subclasses of the BridgeDevice #[allow(dead_code)] #[derive(Copy, Clone)] pub enum PciBridgeSubclass { HostBridge = 0x00, IsaBridge = 0x01, EisaBridge = 0x02, McaBridge = 0x03, PciToPciBridge = 0x04, PcmciaBridge = 0x05, NuBusBridge = 0x06, CardBusBridge = 0x07, RacEwayBridge = 0x08, PciToPciSemiTransparentBridge = 0x09, InfiniBrandToPciHostBridge = 0x0a, OtherBridgeDevice = 0x80, } impl PciSubclass for PciBridgeSubclass { fn get_register_value(&self) -> u8 { *self as u8 } } /// Subclass of the SerialBus #[allow(dead_code)] #[derive(Copy, Clone)] pub enum PciSerialBusSubClass { Firewire = 0x00, Accessbus = 0x01, Ssa = 0x02, Usb = 0x03, } impl PciSubclass for PciSerialBusSubClass { fn get_register_value(&self) -> u8 { *self as u8 } } /// Mass Storage Sub Classes #[allow(dead_code)] #[derive(Copy, Clone)] pub enum PciMassStorageSubclass { ScsiStorage = 0x00, IdeInterface = 0x01, FloppyController = 0x02, IpiController = 0x03, RaidController = 0x04, AtaController = 0x05, SataController = 0x06, SerialScsiController = 0x07, NvmController = 0x08, MassStorage = 0x80, } impl PciSubclass for PciMassStorageSubclass { fn get_register_value(&self) -> u8 { *self as u8 } } /// Network Controller Sub Classes #[allow(dead_code)] #[derive(Copy, Clone)] pub enum PciNetworkControllerSubclass { EthernetController = 0x00, TokenRingController = 0x01, FddiController = 0x02, AtmController = 0x03, IsdnController = 0x04, WorldFipController = 0x05, PicmgController = 0x06, InfinibandController = 0x07, FabricController = 0x08, NetworkController = 0x80, } impl PciSubclass for PciNetworkControllerSubclass { fn get_register_value(&self) -> u8 { *self as u8 } } /// Types of PCI capabilities. #[derive(Debug, PartialEq, Eq, Copy, Clone)] #[allow(dead_code)] #[allow(non_camel_case_types)] #[repr(u8)] pub enum PciCapabilityId { ListId = 0, PowerManagement = 0x01, AcceleratedGraphicsPort = 0x02, VitalProductData = 0x03, SlotIdentification = 0x04, MessageSignalledInterrupts = 0x05, CompactPciHotSwap = 0x06, PciX = 0x07, HyperTransport = 0x08, VendorSpecific = 0x09, Debugport = 0x0A, CompactPciCentralResourceControl = 0x0B, PciStandardHotPlugController = 0x0C, BridgeSubsystemVendorDeviceId = 0x0D, AgpTargetPciPcibridge = 0x0E, SecureDevice = 0x0F, PciExpress = 0x10, MsiX = 0x11, SataDataIndexConf = 0x12, PciAdvancedFeatures = 0x13, PciEnhancedAllocation = 0x14, } impl From for PciCapabilityId { fn from(c: u8) -> Self { match c { 0 => PciCapabilityId::ListId, 0x01 => PciCapabilityId::PowerManagement, 0x02 => PciCapabilityId::AcceleratedGraphicsPort, 0x03 => PciCapabilityId::VitalProductData, 0x04 => PciCapabilityId::SlotIdentification, 0x05 => PciCapabilityId::MessageSignalledInterrupts, 0x06 => PciCapabilityId::CompactPciHotSwap, 0x07 => PciCapabilityId::PciX, 0x08 => PciCapabilityId::HyperTransport, 0x09 => PciCapabilityId::VendorSpecific, 0x0A => PciCapabilityId::Debugport, 0x0B => PciCapabilityId::CompactPciCentralResourceControl, 0x0C => PciCapabilityId::PciStandardHotPlugController, 0x0D => PciCapabilityId::BridgeSubsystemVendorDeviceId, 0x0E => PciCapabilityId::AgpTargetPciPcibridge, 0x0F => PciCapabilityId::SecureDevice, 0x10 => PciCapabilityId::PciExpress, 0x11 => PciCapabilityId::MsiX, 0x12 => PciCapabilityId::SataDataIndexConf, 0x13 => PciCapabilityId::PciAdvancedFeatures, 0x14 => PciCapabilityId::PciEnhancedAllocation, _ => PciCapabilityId::ListId, } } } /// Types of PCI Express capabilities. #[derive(PartialEq, Eq, Copy, Clone, Debug)] #[allow(dead_code)] #[repr(u16)] pub enum PciExpressCapabilityId { NullCapability = 0x0000, AdvancedErrorReporting = 0x0001, VirtualChannelMultiFunctionVirtualChannelNotPresent = 0x0002, DeviceSerialNumber = 0x0003, PowerBudgeting = 0x0004, RootComplexLinkDeclaration = 0x0005, RootComplexInternalLinkControl = 0x0006, RootComplexEventCollectorEndpointAssociation = 0x0007, MultiFunctionVirtualChannel = 0x0008, VirtualChannelMultiFunctionVirtualChannelPresent = 0x0009, RootComplexRegisterBlock = 0x000a, VendorSpecificExtendedCapability = 0x000b, ConfigurationAccessCorrelation = 0x000c, AccessControlServices = 0x000d, AlternativeRoutingIdentificationInterpretation = 0x000e, AddressTranslationServices = 0x000f, SingleRootIoVirtualization = 0x0010, DeprecatedMultiRootIoVirtualization = 0x0011, Multicast = 0x0012, PageRequestInterface = 0x0013, ReservedForAmd = 0x0014, ResizeableBar = 0x0015, DynamicPowerAllocation = 0x0016, ThpRequester = 0x0017, LatencyToleranceReporting = 0x0018, SecondaryPciExpress = 0x0019, ProtocolMultiplexing = 0x001a, ProcessAddressSpaceId = 0x001b, LnRequester = 0x001c, DownstreamPortContainment = 0x001d, L1PmSubstates = 0x001e, PrecisionTimeMeasurement = 0x001f, PciExpressOverMphy = 0x0020, FRSQueueing = 0x0021, ReadinessTimeReporting = 0x0022, DesignatedVendorSpecificExtendedCapability = 0x0023, VfResizeableBar = 0x0024, DataLinkFeature = 0x0025, PhysicalLayerSixteenGts = 0x0026, LaneMarginingAtTheReceiver = 0x0027, HierarchyId = 0x0028, NativePcieEnclosureManagement = 0x0029, PhysicalLayerThirtyTwoGts = 0x002a, AlternateProtocol = 0x002b, SystemFirmwareIntermediary = 0x002c, ShadowFunctions = 0x002d, DataObjectExchange = 0x002e, Reserved = 0x002f, ExtendedCapabilitiesAbsence = 0xffff, } impl From for PciExpressCapabilityId { fn from(c: u16) -> Self { match c { 0x0000 => PciExpressCapabilityId::NullCapability, 0x0001 => PciExpressCapabilityId::AdvancedErrorReporting, 0x0002 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelNotPresent, 0x0003 => PciExpressCapabilityId::DeviceSerialNumber, 0x0004 => PciExpressCapabilityId::PowerBudgeting, 0x0005 => PciExpressCapabilityId::RootComplexLinkDeclaration, 0x0006 => PciExpressCapabilityId::RootComplexInternalLinkControl, 0x0007 => PciExpressCapabilityId::RootComplexEventCollectorEndpointAssociation, 0x0008 => PciExpressCapabilityId::MultiFunctionVirtualChannel, 0x0009 => PciExpressCapabilityId::VirtualChannelMultiFunctionVirtualChannelPresent, 0x000a => PciExpressCapabilityId::RootComplexRegisterBlock, 0x000b => PciExpressCapabilityId::VendorSpecificExtendedCapability, 0x000c => PciExpressCapabilityId::ConfigurationAccessCorrelation, 0x000d => PciExpressCapabilityId::AccessControlServices, 0x000e => PciExpressCapabilityId::AlternativeRoutingIdentificationInterpretation, 0x000f => PciExpressCapabilityId::AddressTranslationServices, 0x0010 => PciExpressCapabilityId::SingleRootIoVirtualization, 0x0011 => PciExpressCapabilityId::DeprecatedMultiRootIoVirtualization, 0x0012 => PciExpressCapabilityId::Multicast, 0x0013 => PciExpressCapabilityId::PageRequestInterface, 0x0014 => PciExpressCapabilityId::ReservedForAmd, 0x0015 => PciExpressCapabilityId::ResizeableBar, 0x0016 => PciExpressCapabilityId::DynamicPowerAllocation, 0x0017 => PciExpressCapabilityId::ThpRequester, 0x0018 => PciExpressCapabilityId::LatencyToleranceReporting, 0x0019 => PciExpressCapabilityId::SecondaryPciExpress, 0x001a => PciExpressCapabilityId::ProtocolMultiplexing, 0x001b => PciExpressCapabilityId::ProcessAddressSpaceId, 0x001c => PciExpressCapabilityId::LnRequester, 0x001d => PciExpressCapabilityId::DownstreamPortContainment, 0x001e => PciExpressCapabilityId::L1PmSubstates, 0x001f => PciExpressCapabilityId::PrecisionTimeMeasurement, 0x0020 => PciExpressCapabilityId::PciExpressOverMphy, 0x0021 => PciExpressCapabilityId::FRSQueueing, 0x0022 => PciExpressCapabilityId::ReadinessTimeReporting, 0x0023 => PciExpressCapabilityId::DesignatedVendorSpecificExtendedCapability, 0x0024 => PciExpressCapabilityId::VfResizeableBar, 0x0025 => PciExpressCapabilityId::DataLinkFeature, 0x0026 => PciExpressCapabilityId::PhysicalLayerSixteenGts, 0x0027 => PciExpressCapabilityId::LaneMarginingAtTheReceiver, 0x0028 => PciExpressCapabilityId::HierarchyId, 0x0029 => PciExpressCapabilityId::NativePcieEnclosureManagement, 0x002a => PciExpressCapabilityId::PhysicalLayerThirtyTwoGts, 0x002b => PciExpressCapabilityId::AlternateProtocol, 0x002c => PciExpressCapabilityId::SystemFirmwareIntermediary, 0x002d => PciExpressCapabilityId::ShadowFunctions, 0x002e => PciExpressCapabilityId::DataObjectExchange, 0xffff => PciExpressCapabilityId::ExtendedCapabilitiesAbsence, _ => PciExpressCapabilityId::Reserved, } } } /// See pci_regs.h in kernel #[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)] pub enum PciBarRegionType { Memory32BitRegion = 0, IoRegion = 0x01, Memory64BitRegion = 0x04, } #[derive(Debug, Copy, Clone, Serialize, Deserialize)] pub enum PciBarPrefetchable { NotPrefetchable = 0, Prefetchable = 0x08, } impl From for bool { fn from(val: PciBarPrefetchable) -> Self { match val { PciBarPrefetchable::NotPrefetchable => false, PciBarPrefetchable::Prefetchable => true, } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_pci_bdf_new() { let bdf = PciBdf::new(0x1234, 0x56, 0x1f, 0x7); assert_eq!(bdf.segment(), 0x1234); assert_eq!(bdf.bus(), 0x56); assert_eq!(bdf.device(), 0x1f); assert_eq!(bdf.function(), 0x7); } #[test] fn test_pci_bdf_from_u32() { let bdf = PciBdf::from(0x12345678); assert_eq!(bdf.segment(), 0x1234); assert_eq!(bdf.bus(), 0x56); assert_eq!(bdf.device(), 0x0f); assert_eq!(bdf.function(), 0x0); } #[test] fn test_pci_bdf_to_u32() { let bdf = PciBdf::new(0x1234, 0x56, 0x1f, 0x7); let val: u32 = bdf.into(); assert_eq!(val, 0x123456ff); } #[test] fn test_pci_bdf_to_u16() { let bdf = PciBdf::new(0x1234, 0x56, 0x1f, 0x7); let val: u16 = bdf.into(); assert_eq!(val, 0x56ff); } #[test] fn test_pci_bdf_from_str_valid() { let bdf = PciBdf::from_str("1234:56:1f.7").unwrap(); assert_eq!(bdf.segment(), 0x1234); assert_eq!(bdf.bus(), 0x56); assert_eq!(bdf.device(), 0x1f); assert_eq!(bdf.function(), 0x7); } #[test] fn test_pci_bdf_from_str_zero() { let bdf = PciBdf::from_str("0000:00:00.0").unwrap(); assert_eq!(bdf.segment(), 0); assert_eq!(bdf.bus(), 0); assert_eq!(bdf.device(), 0); assert_eq!(bdf.function(), 0); } #[test] fn test_pci_bdf_from_str_invalid_format() { assert!(matches!( PciBdf::from_str("invalid"), Err(PciBdfParseError::InvalidFormat(_)) )); assert!(matches!( PciBdf::from_str("1234:56"), Err(PciBdfParseError::InvalidFormat(_)) )); assert!(matches!( PciBdf::from_str("1234:56:78:9a.b"), Err(PciBdfParseError::InvalidFormat(_)) )); } #[test] fn test_pci_bdf_from_str_invalid_hex() { assert!(matches!( PciBdf::from_str("xxxx:00:00.0"), Err(PciBdfParseError::InvalidHex(_)) )); assert!(matches!( PciBdf::from_str("0000:xx:00.0"), Err(PciBdfParseError::InvalidHex(_)) )); assert!(matches!( PciBdf::from_str("0000:00:xx.0"), Err(PciBdfParseError::InvalidHex(_)) )); assert!(matches!( PciBdf::from_str("0000:00:00.x"), Err(PciBdfParseError::InvalidHex(_)) )); } #[test] fn test_pci_bdf_display() { let bdf = PciBdf::new(0x1234, 0x56, 0x1f, 0x7); assert_eq!(format!("{}", bdf), "1234:56:1f.7"); } #[test] fn test_pci_bdf_debug() { let bdf = PciBdf::new(0x1234, 0x56, 0x1f, 0x7); assert_eq!(format!("{:?}", bdf), "1234:56:1f.7"); } #[test] fn test_pci_bdf_partial_eq() { let bdf1 = PciBdf::new(0x1234, 0x56, 0x1f, 0x7); let bdf2 = PciBdf::new(0x1234, 0x56, 0x1f, 0x7); let bdf3 = PciBdf::new(0x1234, 0x56, 0x1f, 0x6); assert_eq!(bdf1, bdf2); assert_ne!(bdf1, bdf3); } #[test] fn test_pci_bdf_partial_ord() { let bdf1 = PciBdf::new(0x1234, 0x56, 0x1f, 0x6); let bdf2 = PciBdf::new(0x1234, 0x56, 0x1f, 0x7); assert!(bdf1 < bdf2); } #[test] fn test_pci_bdf_deserialize_ok() { // Test deserializer let visitor = PciBdfVisitor; let result = visitor .visit_str::("1234:56:1f.7") .unwrap(); assert_eq!(result, PciBdf::new(0x1234, 0x56, 0x1f, 0x7)); } #[test] fn test_pci_bdf_deserialize_invalid() { // Test deserializer with invalid input returns error let visitor = PciBdfVisitor; assert!(visitor .visit_str::("invalid") .is_err()); } #[test] fn test_pci_bdf_serialize() { // Test serializer using serde_test let bdf = PciBdf::new(0x1234, 0x56, 0x1f, 0x7); serde_test::assert_tokens(&bdf, &[serde_test::Token::Str("1234:56:1f.7")]); } } ================================================ FILE: src/rebase-snap/Cargo.toml ================================================ [package] name = "rebase-snap" version = "1.16.0-dev" authors = ["Amazon Firecracker team "] edition = "2024" license = "Apache-2.0" [[bin]] name = "rebase-snap" bench = false [features] tracing = ["log-instrument", "utils/tracing"] [dependencies] displaydoc = "0.2.5" libc = "0.2.183" log-instrument = { path = "../log-instrument", optional = true } thiserror = "2.0.18" vmm-sys-util = "0.15.0" utils = { path = "../utils" } [lints] workspace = true ================================================ FILE: src/rebase-snap/src/main.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::env; use std::fs::{File, OpenOptions}; use std::io::{Seek, SeekFrom}; use std::os::unix::io::AsRawFd; use utils::arg_parser::{ArgParser, Argument, Arguments, UtilsArgParserError as ArgError}; use vmm_sys_util::seek_hole::SeekHole; const REBASE_SNAP_VERSION: &str = env!("CARGO_PKG_VERSION"); const BASE_FILE: &str = "base-file"; const DIFF_FILE: &str = "diff-file"; const DEPRECATION_MSG: &str = "This tool is deprecated and will be removed in the future. Please \ use 'snapshot-editor' instead.\n"; #[derive(Debug, thiserror::Error, displaydoc::Display)] enum FileError { /// Invalid base file: {0} InvalidBaseFile(std::io::Error), /// Invalid diff file: {0} InvalidDiffFile(std::io::Error), /// Failed to seek data: {0} SeekData(std::io::Error), /// Failed to seek hole: {0} SeekHole(std::io::Error), /// Failed to seek: {0} Seek(std::io::Error), /// Failed to send the file: {0} SendFile(std::io::Error), /// Failed to get metadata: {0} Metadata(std::io::Error), } #[derive(Debug, thiserror::Error, displaydoc::Display)] enum RebaseSnapError { /// Arguments parsing error: {0} \n\nFor more information try --help. ArgParse(ArgError), /// Error parsing the cmd line args: {0} SnapFile(FileError), /// Error merging the files: {0} RebaseFiles(FileError), } fn build_arg_parser<'a>() -> ArgParser<'a> { ArgParser::new() .arg( Argument::new(BASE_FILE) .required(true) .takes_value(true) .help("File path of the base mem snapshot."), ) .arg( Argument::new(DIFF_FILE) .required(true) .takes_value(true) .help("File path of the diff mem snapshot."), ) } fn get_files(args: &Arguments) -> Result<(File, File), FileError> { // Safe to unwrap since the required arguments are checked as part of // `arg_parser.parse_from_cmdline()` let base_file_path = args.single_value(BASE_FILE).unwrap(); let base_file = OpenOptions::new() .write(true) .open(base_file_path) .map_err(FileError::InvalidBaseFile)?; // Safe to unwrap since the required arguments are checked as part of // `arg_parser.parse_from_cmdline()` let diff_file_path = args.single_value(DIFF_FILE).unwrap(); let diff_file = OpenOptions::new() .read(true) .open(diff_file_path) .map_err(FileError::InvalidDiffFile)?; Ok((base_file, diff_file)) } fn rebase(base_file: &mut File, diff_file: &mut File) -> Result<(), FileError> { let mut cursor: u64 = 0; while let Some(block_start) = diff_file.seek_data(cursor).map_err(FileError::SeekData)? { cursor = block_start; let block_end = match diff_file .seek_hole(block_start) .map_err(FileError::SeekHole)? { Some(hole_start) => hole_start, None => diff_file.metadata().map_err(FileError::Metadata)?.len(), }; while cursor < block_end { base_file .seek(SeekFrom::Start(cursor)) .map_err(FileError::Seek)?; // SAFETY: Safe because the parameters are valid. let num_transferred_bytes = unsafe { libc::sendfile64( base_file.as_raw_fd(), diff_file.as_raw_fd(), (&mut cursor as *mut u64).cast::(), usize::try_from(block_end.saturating_sub(cursor)).unwrap(), ) }; if num_transferred_bytes < 0 { return Err(FileError::SendFile(std::io::Error::last_os_error())); } } } Ok(()) } fn main() -> Result<(), RebaseSnapError> { let result = main_exec(); if let Err(e) = result { eprintln!("{}", e); Err(e) } else { Ok(()) } } fn main_exec() -> Result<(), RebaseSnapError> { let mut arg_parser = build_arg_parser(); arg_parser .parse_from_cmdline() .map_err(RebaseSnapError::ArgParse)?; let arguments = arg_parser.arguments(); if arguments.flag_present("help") { println!("Rebase_snap v{}", REBASE_SNAP_VERSION); println!( "Tool that copies all the non-sparse sections from a diff file onto a base file.\n" ); println!("{DEPRECATION_MSG}"); println!("{}", arg_parser.formatted_help()); return Ok(()); } if arguments.flag_present("version") { println!("Rebase_snap v{REBASE_SNAP_VERSION}\n{DEPRECATION_MSG}"); return Ok(()); } println!("{DEPRECATION_MSG}"); let (mut base_file, mut diff_file) = get_files(arguments).map_err(RebaseSnapError::SnapFile)?; rebase(&mut base_file, &mut diff_file).map_err(RebaseSnapError::RebaseFiles)?; Ok(()) } #[cfg(test)] mod tests { use std::io::{Seek, SeekFrom, Write}; use std::os::unix::fs::FileExt; use vmm_sys_util::{rand, tempfile}; use super::*; macro_rules! assert_err { ($expression:expr, $($pattern:tt)+) => { match $expression { Err($($pattern)+) => (), ref err => { println!("expected `{}` but got `{:?}`", stringify!($($pattern)+), err); assert!(false) } } } } #[test] fn test_parse_args() { let base_file = tempfile::TempFile::new().unwrap(); let base_file_path = base_file.as_path().to_str().unwrap().to_string(); let diff_file = tempfile::TempFile::new().unwrap(); let diff_file_path = diff_file.as_path().to_str().unwrap().to_string(); let arg_parser = build_arg_parser(); let arguments = &mut arg_parser.arguments().clone(); arguments .parse( vec![ "rebase_snap", "--base-file", "wrong_file", "--diff-file", "diff_file", ] .into_iter() .map(String::from) .collect::>() .as_ref(), ) .unwrap(); assert_err!(get_files(arguments), FileError::InvalidBaseFile(_)); let arguments = &mut arg_parser.arguments().clone(); arguments .parse( vec![ "rebase_snap", "--base-file", &base_file_path, "--diff-file", "diff_file", ] .into_iter() .map(String::from) .collect::>() .as_ref(), ) .unwrap(); assert_err!(get_files(arguments), FileError::InvalidDiffFile(_)); let arguments = &mut arg_parser.arguments().clone(); arguments .parse( vec![ "rebase_snap", "--base-file", &base_file_path, "--diff-file", &diff_file_path, ] .into_iter() .map(String::from) .collect::>() .as_ref(), ) .unwrap(); get_files(arguments).unwrap(); } fn check_file_content(file: &mut File, expected_content: &[u8]) { let mut buf = vec![0u8; expected_content.len()]; file.read_exact_at(buf.as_mut_slice(), 0).unwrap(); assert_eq!(&buf, expected_content); } #[test] fn test_rebase_corner_cases() { let mut base_file = tempfile::TempFile::new().unwrap().into_file(); let mut diff_file = tempfile::TempFile::new().unwrap().into_file(); // 1. Empty files rebase(&mut base_file, &mut diff_file).unwrap(); assert_eq!(base_file.metadata().unwrap().len(), 0); let initial_base_file_content = rand::rand_alphanumerics(50000).into_string().unwrap(); base_file .write_all(initial_base_file_content.as_bytes()) .unwrap(); // 2. Diff file that has only holes diff_file .set_len(initial_base_file_content.len() as u64) .unwrap(); rebase(&mut base_file, &mut diff_file).unwrap(); check_file_content(&mut base_file, initial_base_file_content.as_bytes()); // 3. Diff file that has only data let diff_data = rand::rand_alphanumerics(50000).into_string().unwrap(); diff_file.write_all(diff_data.as_bytes()).unwrap(); rebase(&mut base_file, &mut diff_file).unwrap(); check_file_content(&mut base_file, diff_data.as_bytes()); } #[test] fn test_rebase() { // The filesystem punches holes only for blocks >= 4096. // It doesn't make sense to test for smaller ones. let block_sizes: &[usize] = &[4096, 8192]; for &block_size in block_sizes { let mut expected_result = vec![]; let mut base_file = tempfile::TempFile::new().unwrap().into_file(); let mut diff_file = tempfile::TempFile::new().unwrap().into_file(); // 1. Populated block both in base and diff file let base_block = rand::rand_alphanumerics(block_size).into_string().unwrap(); base_file.write_all(base_block.as_bytes()).unwrap(); let diff_block = rand::rand_alphanumerics(block_size).into_string().unwrap(); diff_file.write_all(diff_block.as_bytes()).unwrap(); expected_result.append(&mut diff_block.into_bytes()); // 2. Populated block in base file, hole in diff file let base_block = rand::rand_alphanumerics(block_size).into_string().unwrap(); base_file.write_all(base_block.as_bytes()).unwrap(); diff_file .seek(SeekFrom::Current(i64::try_from(block_size).unwrap())) .unwrap(); expected_result.append(&mut base_block.into_bytes()); // 3. Populated block in base file, zeroes block in diff file let base_block = rand::rand_alphanumerics(block_size).into_string().unwrap(); base_file.write_all(base_block.as_bytes()).unwrap(); let mut diff_block = vec![0u8; block_size]; diff_file.write_all(&diff_block).unwrap(); expected_result.append(&mut diff_block); // Rebase and check the result rebase(&mut base_file, &mut diff_file).unwrap(); check_file_content(&mut base_file, &expected_result); // 4. The diff file is bigger let diff_block = rand::rand_alphanumerics(block_size).into_string().unwrap(); diff_file.write_all(diff_block.as_bytes()).unwrap(); expected_result.append(&mut diff_block.into_bytes()); // Rebase and check the result rebase(&mut base_file, &mut diff_file).unwrap(); check_file_content(&mut base_file, &expected_result); // 5. The base file is bigger let base_block = rand::rand_alphanumerics(block_size).into_string().unwrap(); base_file.write_all(base_block.as_bytes()).unwrap(); expected_result.append(&mut base_block.into_bytes()); // Rebase and check the result rebase(&mut base_file, &mut diff_file).unwrap(); check_file_content(&mut base_file, &expected_result); } } } ================================================ FILE: src/seccompiler/Cargo.toml ================================================ [package] name = "seccompiler" version = "1.16.0-dev" authors = ["Amazon Firecracker team "] edition = "2024" description = "Program that compiles multi-threaded seccomp-bpf filters expressed as JSON into raw BPF programs, serializing them and outputting them to a file." homepage = "https://firecracker-microvm.github.io/" license = "Apache-2.0" [lib] bench = false [[bin]] name = "seccompiler-bin" path = "src/bin.rs" bench = false [dependencies] bitcode = { version = "0.6.9", features = ["serde"] } clap = { version = "4.6.0", features = ["derive", "string"] } displaydoc = "0.2.5" libc = "0.2.183" serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.149" thiserror = "2.0.18" zerocopy = { version = "0.8.42" } [lints] workspace = true ================================================ FILE: src/seccompiler/build.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 fn main() { println!("cargo::rustc-link-search=/usr/local/lib"); println!("cargo::rustc-link-lib=seccomp"); } ================================================ FILE: src/seccompiler/src/bin.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use clap::Parser; use seccompiler::{CompilationError, compile_bpf}; const DEFAULT_OUTPUT_FILENAME: &str = "seccomp_binary_filter.out"; #[derive(Debug, Parser)] #[command(version = format!("v{}", env!("CARGO_PKG_VERSION")))] struct Cli { #[arg( short, long, help = "The computer architecture where the BPF program runs. Supported architectures: \ x86_64, aarch64." )] target_arch: String, #[arg(short, long, help = "File path of the JSON input.")] input_file: String, #[arg(short, long, help = "Optional path of the output file.", default_value = DEFAULT_OUTPUT_FILENAME)] output_file: String, #[arg( short, long, help = "Deprecated! Transforms the filters into basic filters. Drops all argument checks \ and rule-level actions. Not recommended." )] basic: bool, #[arg( long, help = "Output individual BPF files for each thread instead of a single combined file. \ Used for testing purposes." )] split_output: bool, } fn main() -> Result<(), CompilationError> { let cli = Cli::parse(); compile_bpf( &cli.input_file, &cli.target_arch, &cli.output_file, cli.basic, cli.split_output, ) } ================================================ FILE: src/seccompiler/src/bindings.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright 2021 Sony Group Corporation // // SPDX-License-Identifier: Apache-2.0 #![allow(non_camel_case_types)] #![allow(non_snake_case)] //! Raw FFI bindings for libseccomp library use std::os::raw::*; pub const MINUS_EEXIST: i32 = -libc::EEXIST; /// Filter context/handle (`*mut`) pub type scmp_filter_ctx = *mut c_void; /// Filter context/handle (`*const`) pub type const_scmp_filter_ctx = *const c_void; /// Comparison operators #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[repr(C)] pub enum scmp_compare { _SCMP_CMP_MIN = 0, /// not equal SCMP_CMP_NE = 1, /// less than SCMP_CMP_LT = 2, /// less than or equal SCMP_CMP_LE = 3, /// equal SCMP_CMP_EQ = 4, /// greater than or equal SCMP_CMP_GE = 5, /// greater than SCMP_CMP_GT = 6, /// masked equality SCMP_CMP_MASKED_EQ = 7, _SCMP_CMP_MAX, } /// Argument datum pub type scmp_datum_t = u64; /// Argument / Value comparison definition #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[repr(C)] pub struct scmp_arg_cmp { /// argument number, starting at 0 pub arg: c_uint, /// the comparison op, e.g. `SCMP_CMP_*` pub op: scmp_compare, pub datum_a: scmp_datum_t, pub datum_b: scmp_datum_t, } pub const SCMP_ARCH_X86_64: u32 = 0xc000003e; pub const SCMP_ARCH_AARCH64: u32 = 0xc00000b7; /// Kill the process pub const SCMP_ACT_KILL_PROCESS: u32 = 0x80000000; /// Kill the thread pub const SCMP_ACT_KILL_THREAD: u32 = 0x00000000; /// Throw a `SIGSYS` signal pub const SCMP_ACT_TRAP: u32 = 0x00030000; /// Notifies userspace pub const SCMP_ACT_ERRNO_MASK: u32 = 0x00050000; /// Return the specified error code #[must_use] pub const fn SCMP_ACT_ERRNO(x: u16) -> u32 { SCMP_ACT_ERRNO_MASK | x as u32 } pub const SCMP_ACT_TRACE_MASK: u32 = 0x7ff00000; /// Notify a tracing process with the specified value #[must_use] pub const fn SCMP_ACT_TRACE(x: u16) -> u32 { SCMP_ACT_TRACE_MASK | x as u32 } /// Allow the syscall to be executed after the action has been logged pub const SCMP_ACT_LOG: u32 = 0x7ffc0000; /// Allow the syscall to be executed pub const SCMP_ACT_ALLOW: u32 = 0x7fff0000; #[link(name = "seccomp")] unsafe extern "C" { /// Initialize the filter state /// /// - `def_action`: the default filter action /// /// This function initializes the internal seccomp filter state and should /// be called before any other functions in this library to ensure the filter /// state is initialized. Returns a filter context on success, `ptr::null()` on failure. pub safe fn seccomp_init(def_action: u32) -> scmp_filter_ctx; /// Adds an architecture to the filter /// /// - `ctx`: the filter context /// - `arch_token`: the architecture token, e.g. `SCMP_ARCH_*` /// /// This function adds a new architecture to the given seccomp filter context. /// Any new rules added after this function successfully returns will be added /// to this architecture but existing rules will not be added to this /// architecture. If the architecture token is [`SCMP_ARCH_NATIVE`] then the native /// architecture will be assumed. Returns zero on success, `-libc::EEXIST` if /// specified architecture is already present, other negative values on failure. pub fn seccomp_arch_add(ctx: scmp_filter_ctx, arch_token: u32) -> c_int; /// Resolve a syscall name to a number /// /// - `name`: the syscall name /// /// Resolve the given syscall name to the syscall number. Returns the syscall /// number on success, including negative pseudo syscall numbers (e.g. `__PNR_*`); /// returns [`__NR_SCMP_ERROR`] on failure. pub fn seccomp_syscall_resolve_name(name: *const c_char) -> c_int; /// Add a new rule to the filter /// /// - `ctx`: the filter context /// - `action`: the filter action /// - `syscall`: the syscall number /// - `arg_cnt`: the number of argument filters in the argument filter chain /// - `...`: [`scmp_arg_cmp`] structs /// /// This function adds a series of new argument/value checks to the seccomp /// filter for the given syscall; multiple argument/value checks can be /// specified and they will be chained together (AND'd together) in the filter. /// If the specified rule needs to be adjusted due to architecture specifics it /// will be adjusted without notification. Returns zero on success, negative /// values on failure. pub fn seccomp_rule_add( ctx: scmp_filter_ctx, action: u32, syscall: c_int, arg_cnt: c_uint, ... ) -> c_int; /// Add a new rule to the filter /// /// - `ctx`: the filter context /// - `action`: the filter action /// - `syscall`: the syscall number /// - `arg_cnt`: the number of elements in the arg_array parameter /// - `arg_array`: array of [`scmp_arg_cmp`] structs /// /// This function adds a series of new argument/value checks to the seccomp /// filter for the given syscall; multiple argument/value checks can be /// specified and they will be chained together (AND'd together) in the filter. /// If the specified rule needs to be adjusted due to architecture specifics it /// will be adjusted without notification. Returns zero on success, negative /// values on failure. pub fn seccomp_rule_add_array( ctx: scmp_filter_ctx, action: u32, syscall: c_int, arg_cnt: c_uint, arg_array: *const scmp_arg_cmp, ) -> c_int; /// Generate seccomp Berkeley Packet Filter (BPF) code and export it to a file /// /// - `ctx`: the filter context /// - `fd`: the destination fd /// /// This function generates seccomp Berkeley Packer Filter (BPF) code and writes /// it to the given fd. Returns zero on success, negative values on failure. pub fn seccomp_export_bpf(ctx: const_scmp_filter_ctx, fd: c_int) -> c_int; } /// Negative pseudo syscall number returned by some functions in case of an error pub const __NR_SCMP_ERROR: c_int = -1; ================================================ FILE: src/seccompiler/src/lib.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::BTreeMap; use std::fs::File; use std::io::{Read, Seek}; use std::os::fd::{AsRawFd, FromRawFd}; use std::os::unix::fs::MetadataExt; use std::path::Path; use std::str::FromStr; mod bindings; use bindings::*; pub mod types; pub use types::*; use zerocopy::IntoBytes; // This byte limit is passed to `bitcode` to guard against a potential memory // allocation DOS caused by binary filters that are too large. // This limit can be safely determined since the maximum length of a BPF // filter is 4096 instructions and Firecracker has a finite number of threads. const DESERIALIZATION_BYTES_LIMIT: usize = 100_000; /// Binary filter compilation errors. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum CompilationError { /// Cannot open input file: {0} IntputOpen(std::io::Error), /// Cannot read input file: {0} InputRead(std::io::Error), /// Cannot deserialize json: {0} JsonDeserialize(serde_json::Error), /// Cannot parse arch: {0} ArchParse(String), /// Cannot create libseccomp context LibSeccompContext, /// Cannot add libseccomp arch LibSeccompArch, /// Cannot add libseccomp syscall LibSeccompSycall, /// Cannot add libseccomp syscall rule LibSeccompRule, /// Cannot export libseccomp bpf LibSeccompExport, /// Cannot create memfd: {0} MemfdCreate(std::io::Error), /// Cannot rewind memfd: {0} MemfdRewind(std::io::Error), /// Cannot read from memfd: {0} MemfdRead(std::io::Error), /// Cannot create output file: {0} OutputCreate(std::io::Error), /// Cannot serialize bfp: {0} BitcodeSerialize(bitcode::Error), /// Serialized BPF exceeds size limit of {0} bytes SizeLimitExceeded(usize), } pub fn compile_bpf( input_path: &str, arch: &str, out_path: &str, basic: bool, split_output: bool, ) -> Result<(), CompilationError> { let mut file_content = String::new(); File::open(input_path) .map_err(CompilationError::IntputOpen)? .read_to_string(&mut file_content) .map_err(CompilationError::InputRead)?; let bpf_map_json: BpfJson = serde_json::from_str(&file_content).map_err(CompilationError::JsonDeserialize)?; let arch = TargetArch::from_str(arch).map_err(CompilationError::ArchParse)?; // SAFETY: Safe because the parameters are valid. let memfd_fd = unsafe { libc::memfd_create(c"bpf".as_ptr().cast(), 0) }; if memfd_fd < 0 { return Err(CompilationError::MemfdCreate( std::io::Error::last_os_error(), )); } // SAFETY: Safe because the parameters are valid. let mut memfd = unsafe { File::from_raw_fd(memfd_fd) }; let mut bpf_map: BTreeMap> = BTreeMap::new(); for (name, filter) in bpf_map_json.0.iter() { let default_action = filter.default_action.to_scmp_type(); let filter_action = filter.filter_action.to_scmp_type(); // SAFETY: Safe as all args are correct. let bpf_filter = { let r = seccomp_init(default_action); if r.is_null() { return Err(CompilationError::LibSeccompContext); } r }; // SAFETY: Safe as all args are correct. unsafe { let r = seccomp_arch_add(bpf_filter, arch.to_scmp_type()); if r != 0 && r != MINUS_EEXIST { return Err(CompilationError::LibSeccompArch); } } for rule in filter.filter.iter() { // SAFETY: Safe as all args are correct. let syscall = unsafe { let r = seccomp_syscall_resolve_name(rule.syscall.as_ptr()); if r == __NR_SCMP_ERROR { return Err(CompilationError::LibSeccompSycall); } r }; // TODO remove when we drop deprecated "basic" arg from cli. // "basic" bpf means it ignores condition checks. if basic { // SAFETY: Safe as all args are correct. unsafe { if seccomp_rule_add(bpf_filter, filter_action, syscall, 0) != 0 { return Err(CompilationError::LibSeccompRule); } } } else if let Some(rules) = &rule.args { let comparators = rules .iter() .map(|rule| rule.to_scmp_type()) .collect::>(); // SAFETY: Safe as all args are correct. // We can assume no one will define u32::MAX // filters for a syscall. #[allow(clippy::cast_possible_truncation)] unsafe { if seccomp_rule_add_array( bpf_filter, filter_action, syscall, comparators.len() as u32, comparators.as_ptr(), ) != 0 { return Err(CompilationError::LibSeccompRule); } } } else { // SAFETY: Safe as all args are correct. unsafe { if seccomp_rule_add(bpf_filter, filter_action, syscall, 0) != 0 { return Err(CompilationError::LibSeccompRule); } } } } // SAFETY: Safe as all args are correect. unsafe { if seccomp_export_bpf(bpf_filter, memfd.as_raw_fd()) != 0 { return Err(CompilationError::LibSeccompExport); } } memfd.rewind().map_err(CompilationError::MemfdRewind)?; // Cast is safe because usize == u64 #[allow(clippy::cast_possible_truncation)] let size = memfd.metadata().unwrap().size() as usize; // Bpf instructions are 8 byte values and 4 byte alignment. // We use u64 to satisfy these requirements. let instructions = size / std::mem::size_of::(); let mut bpf = vec![0_u64; instructions]; memfd .read_exact(bpf.as_mut_bytes()) .map_err(CompilationError::MemfdRead)?; memfd.rewind().map_err(CompilationError::MemfdRewind)?; bpf_map.insert(name.clone(), bpf); } if split_output { // Output individual files for each thread (for testing) let base_path = Path::new(out_path); let parent = base_path.parent().unwrap_or_else(|| Path::new(".")); for (thread_name, bpf_data) in &bpf_map { let thread_file_path = parent.join(format!("{}.bpf", thread_name)); let mut thread_file = File::create(&thread_file_path).map_err(CompilationError::OutputCreate)?; // Write raw BPF data as bytes use zerocopy::IntoBytes; std::io::Write::write_all(&mut thread_file, bpf_data.as_bytes()) .map_err(CompilationError::OutputCreate)?; } } else { // Create and write the main bitcode output file let mut output_file = File::create(out_path).map_err(CompilationError::OutputCreate)?; let encoded = bitcode::serialize(&bpf_map).map_err(CompilationError::BitcodeSerialize)?; // Check size limit to prevent DOS attacks if encoded.len() > DESERIALIZATION_BYTES_LIMIT { return Err(CompilationError::SizeLimitExceeded( DESERIALIZATION_BYTES_LIMIT, )); } std::io::Write::write_all(&mut output_file, &encoded) .map_err(CompilationError::OutputCreate)?; } Ok(()) } ================================================ FILE: src/seccompiler/src/types.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::BTreeMap; use std::ffi::CString; use std::str::FromStr; use serde::*; // use libseccomp::{ScmpAction, ScmpArch, ScmpCompareOp}; use crate::bindings::*; /// Comparison to perform when matching a condition. #[derive(Debug, Deserialize)] #[serde(rename_all = "snake_case")] pub enum SeccompCmpOp { Eq, Ge, Gt, Le, Lt, MaskedEq(u64), Ne, } /// Seccomp argument value length. #[derive(Clone, Debug, Deserialize, PartialEq)] #[serde(rename_all = "lowercase")] pub enum SeccompCmpArgLen { /// Argument value length is 4 bytes. Dword, /// Argument value length is 8 bytes. Qword, } /// Condition that syscall must match in order to satisfy a rule. #[derive(Debug, Deserialize)] pub struct SeccompCondition { pub index: u8, pub op: SeccompCmpOp, pub val: u64, #[serde(rename = "type")] pub val_len: SeccompCmpArgLen, } impl SeccompCondition { pub fn to_scmp_type(&self) -> scmp_arg_cmp { match self.op { SeccompCmpOp::Eq => { // When using EQ libseccomp compares the whole 64 bits. In // general this is not a problem, but for example we have // observed musl `ioctl` to leave garbage in the upper bits of // the `request` argument. There is a GH issue to allow 32bit // comparisons (see // https://github.com/seccomp/libseccomp/issues/383) but is not // merged yet. Until that is available, do a masked comparison // with the upper 32bits set to 0, so we will compare that `hi32 // & 0x0 == 0`, which is always true. This costs one additional // instruction, but will be likely be optimized away by the BPF // JIT. match self.val_len { SeccompCmpArgLen::Dword => scmp_arg_cmp { arg: self.index as u32, op: scmp_compare::SCMP_CMP_MASKED_EQ, datum_a: 0x00000000FFFFFFFF, datum_b: self.val, }, SeccompCmpArgLen::Qword => scmp_arg_cmp { arg: self.index as u32, op: scmp_compare::SCMP_CMP_EQ, datum_a: self.val, datum_b: 0, }, } } SeccompCmpOp::Ge => scmp_arg_cmp { arg: self.index as u32, op: scmp_compare::SCMP_CMP_GE, datum_a: self.val, datum_b: 0, }, SeccompCmpOp::Gt => scmp_arg_cmp { arg: self.index as u32, op: scmp_compare::SCMP_CMP_GT, datum_a: self.val, datum_b: 0, }, SeccompCmpOp::Le => scmp_arg_cmp { arg: self.index as u32, op: scmp_compare::SCMP_CMP_LE, datum_a: self.val, datum_b: 0, }, SeccompCmpOp::Lt => scmp_arg_cmp { arg: self.index as u32, op: scmp_compare::SCMP_CMP_LT, datum_a: self.val, datum_b: 0, }, SeccompCmpOp::Ne => scmp_arg_cmp { arg: self.index as u32, op: scmp_compare::SCMP_CMP_NE, datum_a: self.val, datum_b: 0, }, SeccompCmpOp::MaskedEq(m) => scmp_arg_cmp { arg: self.index as u32, op: scmp_compare::SCMP_CMP_MASKED_EQ, datum_a: m, datum_b: self.val, }, } } } /// Actions that `seccomp` can apply to process calling a syscall. #[derive(Debug, Deserialize)] #[serde(rename_all = "snake_case")] pub enum SeccompAction { Allow, Errno(u16), KillThread, KillProcess, Log, Trace(u16), Trap, } impl SeccompAction { pub fn to_scmp_type(&self) -> u32 { match self { SeccompAction::Allow => SCMP_ACT_ALLOW, SeccompAction::Errno(e) => SCMP_ACT_ERRNO(*e), SeccompAction::KillThread => SCMP_ACT_KILL_THREAD, SeccompAction::KillProcess => SCMP_ACT_KILL_PROCESS, SeccompAction::Log => SCMP_ACT_LOG, SeccompAction::Trace(t) => SCMP_ACT_TRACE(*t), SeccompAction::Trap => SCMP_ACT_TRAP, } } } /// Rule that `seccomp` attempts to match for a syscall. /// /// If all conditions match then rule gets matched. /// The action of the first rule that matches will be applied to the calling process. /// If no rule matches the default action is applied. #[derive(Debug, Deserialize)] pub struct SyscallRule { pub syscall: CString, pub args: Option>, } /// Filter containing rules assigned to syscall numbers. #[derive(Debug, Deserialize)] pub struct Filter { pub default_action: SeccompAction, pub filter_action: SeccompAction, pub filter: Vec, } /// Deserializable object that represents the Json filter file. #[derive(Debug, Deserialize)] pub struct BpfJson(pub BTreeMap); /// Supported target architectures. #[derive(Debug)] pub enum TargetArch { X86_64, Aarch64, } impl TargetArch { pub fn to_scmp_type(&self) -> u32 { match self { TargetArch::X86_64 => SCMP_ARCH_X86_64, TargetArch::Aarch64 => SCMP_ARCH_AARCH64, } } } impl FromStr for TargetArch { type Err = String; fn from_str(s: &str) -> Result { match s.to_lowercase().as_str() { "x86_64" => Ok(TargetArch::X86_64), "aarch64" => Ok(TargetArch::Aarch64), _ => Err(s.to_string()), } } } ================================================ FILE: src/snapshot-editor/Cargo.toml ================================================ [package] name = "snapshot-editor" version = "1.16.0-dev" authors = ["Amazon Firecracker team "] edition = "2024" license = "Apache-2.0" [[bin]] name = "snapshot-editor" bench = false [features] tracing = ["log-instrument", "fc_utils/tracing", "vmm/tracing"] [dependencies] clap = { version = "4.6.0", features = ["derive", "string"] } displaydoc = "0.2.5" fc_utils = { package = "utils", path = "../utils" } libc = "0.2.183" log-instrument = { path = "../log-instrument", optional = true } semver = "1.0.27" thiserror = "2.0.18" vmm = { path = "../vmm" } vmm-sys-util = "0.15.0" [target.'cfg(target_arch = "aarch64")'.dependencies] clap-num = "1.2.0" [lints] workspace = true ================================================ FILE: src/snapshot-editor/src/edit_memory.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fs::OpenOptions; use std::io::{Seek, SeekFrom}; use std::os::fd::AsRawFd; use std::path::PathBuf; use clap::Subcommand; use vmm::utils::u64_to_usize; use vmm_sys_util::seek_hole::SeekHole; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum EditMemoryError { /// Could not open memory file: {0} OpenMemoryFile(std::io::Error), /// Could not open diff file: {0} OpenDiffFile(std::io::Error), /// Failed to seek data in diff file: {0} SeekDataDiff(std::io::Error), /// Failed to seek hole in diff file: {0} SeekHoleDiff(std::io::Error), /// Failed to get metadata for diff file: {0} MetadataDiff(std::io::Error), /// Failed to seek in memory file: {0} SeekMemory(std::io::Error), /// Failed to send the file: {0} SendFile(std::io::Error), } #[derive(Debug, Subcommand)] pub enum EditMemorySubCommand { /// Apply a diff snapshot on top of a base one Rebase { /// Path to the memory file. #[arg(short, long)] memory_path: PathBuf, /// Path to the diff file. #[arg(short, long)] diff_path: PathBuf, }, } pub fn edit_memory_command(command: EditMemorySubCommand) -> Result<(), EditMemoryError> { match command { EditMemorySubCommand::Rebase { memory_path, diff_path, } => rebase(memory_path, diff_path)?, } Ok(()) } fn rebase(memory_path: PathBuf, diff_path: PathBuf) -> Result<(), EditMemoryError> { let mut base_file = OpenOptions::new() .write(true) .open(memory_path) .map_err(EditMemoryError::OpenMemoryFile)?; let mut diff_file = OpenOptions::new() .read(true) .open(diff_path) .map_err(EditMemoryError::OpenDiffFile)?; let mut cursor: u64 = 0; while let Some(block_start) = diff_file .seek_data(cursor) .map_err(EditMemoryError::SeekDataDiff)? { cursor = block_start; let block_end = match diff_file .seek_hole(block_start) .map_err(EditMemoryError::SeekHoleDiff)? { Some(hole_start) => hole_start, None => diff_file .metadata() .map_err(EditMemoryError::MetadataDiff)? .len(), }; while cursor < block_end { base_file .seek(SeekFrom::Start(cursor)) .map_err(EditMemoryError::SeekMemory)?; // SAFETY: Safe because the parameters are valid. let num_transferred_bytes = unsafe { libc::sendfile64( base_file.as_raw_fd(), diff_file.as_raw_fd(), (&mut cursor as *mut u64).cast::(), u64_to_usize(block_end.saturating_sub(cursor)), ) }; if num_transferred_bytes < 0 { return Err(EditMemoryError::SendFile(std::io::Error::last_os_error())); } } } Ok(()) } #[cfg(test)] mod tests { use std::fs::File; use std::io::{Seek, SeekFrom, Write}; use std::os::unix::fs::FileExt; use vmm_sys_util::{rand, tempfile}; use super::*; fn check_file_content(file: &File, expected_content: &[u8]) { assert_eq!( file.metadata().unwrap().len(), expected_content.len() as u64 ); let mut buf = vec![0u8; expected_content.len()]; file.read_exact_at(buf.as_mut_slice(), 0).unwrap(); assert_eq!(&buf, expected_content); } #[test] fn test_rebase_empty_files() { let base = tempfile::TempFile::new().unwrap(); let diff = tempfile::TempFile::new().unwrap(); let base_file = base.as_file(); let base_path = base.as_path().to_path_buf(); let diff_path = diff.as_path().to_path_buf(); // Empty files rebase(base_path, diff_path).unwrap(); assert_eq!(base_file.metadata().unwrap().len(), 0); } #[test] fn test_rebase_empty_diff() { let base = tempfile::TempFile::new().unwrap(); let diff = tempfile::TempFile::new().unwrap(); let mut base_file = base.as_file(); let diff_file = diff.as_file(); let base_path = base.as_path().to_path_buf(); let diff_path = diff.as_path().to_path_buf(); let initial_base_file_content = rand::rand_bytes(50000); base_file.write_all(&initial_base_file_content).unwrap(); // Diff file that has only holes diff_file .set_len(initial_base_file_content.len() as u64) .unwrap(); rebase(base_path, diff_path).unwrap(); check_file_content(base_file, &initial_base_file_content); } #[test] fn test_rebase_full_diff() { let base = tempfile::TempFile::new().unwrap(); let diff = tempfile::TempFile::new().unwrap(); let base_file = base.as_file(); let mut diff_file = diff.as_file(); let base_path = base.as_path().to_path_buf(); let diff_path = diff.as_path().to_path_buf(); // Diff file that has only data let diff_data = rand::rand_bytes(50000); diff_file.write_all(&diff_data).unwrap(); rebase(base_path, diff_path).unwrap(); check_file_content(base_file, &diff_data); } #[test] fn test_rebase() { // The filesystem punches holes only for blocks >= 4096. // It doesn't make sense to test for smaller ones. let block_sizes: &[usize] = &[4096, 8192]; for &block_size in block_sizes { let mut expected_result = vec![]; let base = tempfile::TempFile::new().unwrap(); let diff = tempfile::TempFile::new().unwrap(); let mut base_file = base.as_file(); let mut diff_file = diff.as_file(); let base_path = base.as_path().to_path_buf(); let diff_path = diff.as_path().to_path_buf(); // 1. Populated block both in base and diff file // block: [ ] // diff: [ ] // expected: [d] let base_block = rand::rand_bytes(block_size); base_file.write_all(&base_block).unwrap(); let diff_block = rand::rand_bytes(block_size); diff_file.write_all(&diff_block).unwrap(); expected_result.extend(diff_block); // 2. Populated block in base file, hole in diff file // block: [ ] [ ] // diff: [ ] ___ // expected: [d] [b] let base_block = rand::rand_bytes(block_size); base_file.write_all(&base_block).unwrap(); diff_file .seek(SeekFrom::Current(i64::try_from(block_size).unwrap())) .unwrap(); expected_result.extend(base_block); // 3. Populated block in base file, zeroes block in diff file // block: [ ] [ ] [ ] // diff: [ ] ___ [0] // expected: [d] [b] [d] let base_block = rand::rand_bytes(block_size); base_file.write_all(&base_block).unwrap(); let diff_block = vec![0u8; block_size]; diff_file.write_all(&diff_block).unwrap(); expected_result.extend(diff_block); // Rebase and check the result rebase(base_path.clone(), diff_path.clone()).unwrap(); check_file_content(base_file, &expected_result); // 4. The diff file is bigger // block: [ ] [ ] [ ] // diff: [ ] ___ [0] [ ] // expected: [d] [b] [d] [d] let diff_block = rand::rand_bytes(block_size); diff_file.write_all(&diff_block).unwrap(); expected_result.extend(diff_block); // Rebase and check the result rebase(base_path.clone(), diff_path.clone()).unwrap(); check_file_content(base_file, &expected_result); // 5. The base file is bigger // block: [ ] [ ] [ ] [ ] [ ] // diff: [ ] ___ [0] [ ] // expected: [d] [b] [d] [d] [b] let base_block = rand::rand_bytes(block_size); // Adding to the base file 2 times because // it is 1 block smaller then diff right now. base_file.write_all(&base_block).unwrap(); base_file.write_all(&base_block).unwrap(); expected_result.extend(base_block); // Rebase and check the result rebase(base_path, diff_path).unwrap(); check_file_content(base_file, &expected_result); } } } ================================================ FILE: src/snapshot-editor/src/edit_vmstate.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::path::PathBuf; use clap::Subcommand; use clap_num::maybe_hex; use vmm::arch::aarch64::regs::Aarch64RegisterVec; use vmm::persist::MicrovmState; use crate::utils::{UtilsError, open_vmstate, save_vmstate}; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum EditVmStateError { /// {0} Utils(#[from] UtilsError), } #[derive(Debug, Subcommand)] pub enum EditVmStateSubCommand { /// Remove registers from vcpu states. RemoveRegs { /// Set of registers to remove. /// Values should be registers ids as the are defined in KVM. #[arg(value_parser=maybe_hex::, num_args = 1.., value_delimiter = ' ')] regs: Vec, /// Path to the vmstate file. #[arg(short, long)] vmstate_path: PathBuf, /// Path of output file. #[arg(short, long)] output_path: PathBuf, }, } pub fn edit_vmstate_command(command: EditVmStateSubCommand) -> Result<(), EditVmStateError> { match command { EditVmStateSubCommand::RemoveRegs { regs, vmstate_path, output_path, } => edit(&vmstate_path, &output_path, |state| { remove_regs(state, ®s) })?, } Ok(()) } fn edit( vmstate_path: &PathBuf, output_path: &PathBuf, f: impl Fn(MicrovmState) -> Result, ) -> Result<(), EditVmStateError> { let snapshot = open_vmstate(vmstate_path)?; let microvm_state = f(snapshot.data)?; save_vmstate(microvm_state, output_path)?; Ok(()) } fn remove_regs( mut state: MicrovmState, remove_regs: &[u64], ) -> Result { for (i, vcpu_state) in state.vcpu_states.iter_mut().enumerate() { println!("Modifying state for vCPU {i}"); let mut removed = vec![false; remove_regs.len()]; let mut new_regs = Aarch64RegisterVec::default(); for reg in vcpu_state.regs.iter().filter(|reg| { if let Some(pos) = remove_regs.iter().position(|r| r == ®.id) { removed[pos] = true; false } else { true } }) { new_regs.push(reg); } vcpu_state.regs = new_regs; for (reg, removed) in remove_regs.iter().zip(removed.iter()) { print!("Register {reg:#x}: "); match removed { true => println!("removed"), false => println!("not present"), } } } Ok(state) } #[cfg(test)] mod tests { use super::*; #[test] fn test_remove_regs() { const KVM_REG_SIZE_U8: u64 = 0; const KVM_REG_SIZE_U16: u64 = 0x10000000000000; const KVM_REG_SIZE_U32: u64 = 0x20000000000000; use vmm::arch::aarch64::regs::Aarch64RegisterRef; use vmm::arch::aarch64::vcpu::VcpuState; let vcpu_state = VcpuState { regs: { let mut regs = Aarch64RegisterVec::default(); let reg_data: u8 = 69; regs.push(Aarch64RegisterRef::new( KVM_REG_SIZE_U8, ®_data.to_le_bytes(), )); let reg_data: u16 = 69; regs.push(Aarch64RegisterRef::new( KVM_REG_SIZE_U16, ®_data.to_le_bytes(), )); let reg_data: u32 = 69; regs.push(Aarch64RegisterRef::new( KVM_REG_SIZE_U32, ®_data.to_le_bytes(), )); regs }, ..Default::default() }; let state = MicrovmState { vcpu_states: vec![vcpu_state], ..Default::default() }; let new_state = remove_regs(state, &[KVM_REG_SIZE_U32]).unwrap(); let expected_vcpu_state = VcpuState { regs: { let mut regs = Aarch64RegisterVec::default(); let reg_data: u8 = 69; regs.push(Aarch64RegisterRef::new( KVM_REG_SIZE_U8, ®_data.to_le_bytes(), )); let reg_data: u16 = 69; regs.push(Aarch64RegisterRef::new( KVM_REG_SIZE_U16, ®_data.to_le_bytes(), )); regs }, ..Default::default() }; assert_eq!(new_state.vcpu_states[0].regs, expected_vcpu_state.regs); } #[test] fn test_remove_non_existed_regs() { const KVM_REG_SIZE_U8: u64 = 0; const KVM_REG_SIZE_U16: u64 = 0x10000000000000; const KVM_REG_SIZE_U32: u64 = 0x20000000000000; use vmm::arch::aarch64::regs::Aarch64RegisterRef; use vmm::arch::aarch64::vcpu::VcpuState; let vcpu_state = VcpuState { regs: { let mut regs = Aarch64RegisterVec::default(); let reg_data: u8 = 69; regs.push(Aarch64RegisterRef::new( KVM_REG_SIZE_U8, ®_data.to_le_bytes(), )); let reg_data: u16 = 69; regs.push(Aarch64RegisterRef::new( KVM_REG_SIZE_U16, ®_data.to_le_bytes(), )); regs }, ..Default::default() }; let state_clone = MicrovmState { vcpu_states: vec![vcpu_state.clone()], ..Default::default() }; let state = MicrovmState { vcpu_states: vec![vcpu_state], ..Default::default() }; let new_state = remove_regs(state_clone, &[KVM_REG_SIZE_U32]).unwrap(); assert_eq!(new_state.vcpu_states[0].regs, state.vcpu_states[0].regs); } } ================================================ FILE: src/snapshot-editor/src/info.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::path::PathBuf; use clap::Subcommand; use vmm::persist::MicrovmState; use vmm::snapshot::Snapshot; use crate::utils::*; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum InfoVmStateError { /// {0} Utils(#[from] UtilsError), } #[derive(Debug, Subcommand)] pub enum InfoVmStateSubCommand { /// Print snapshot version. Version { /// Path to the vmstate file. #[arg(short, long)] vmstate_path: PathBuf, }, /// Print info about vcpu states. VcpuStates { /// Path to the vmstate file. #[arg(short, long)] vmstate_path: PathBuf, }, /// Print readable MicroVM state. VmState { /// Path to the vmstate file. #[arg(short, long)] vmstate_path: PathBuf, }, } pub fn info_vmstate_command(command: InfoVmStateSubCommand) -> Result<(), InfoVmStateError> { match command { InfoVmStateSubCommand::Version { vmstate_path } => info(&vmstate_path, info_version)?, InfoVmStateSubCommand::VcpuStates { vmstate_path } => { info(&vmstate_path, info_vcpu_states)? } InfoVmStateSubCommand::VmState { vmstate_path } => info(&vmstate_path, info_vmstate)?, } Ok(()) } fn info( vmstate_path: &PathBuf, f: impl Fn(&Snapshot) -> Result<(), InfoVmStateError>, ) -> Result<(), InfoVmStateError> { let snapshot = open_vmstate(vmstate_path)?; f(&snapshot)?; Ok(()) } fn info_version(snapshot: &Snapshot) -> Result<(), InfoVmStateError> { println!("v{}", snapshot.version()); Ok(()) } fn info_vcpu_states(snapshot: &Snapshot) -> Result<(), InfoVmStateError> { for (i, state) in snapshot.data.vcpu_states.iter().enumerate() { println!("vcpu {i}:"); println!("{state:#?}"); } Ok(()) } fn info_vmstate(snapshot: &Snapshot) -> Result<(), InfoVmStateError> { println!("{:#?}", snapshot.data); Ok(()) } ================================================ FILE: src/snapshot-editor/src/main.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use clap::{Parser, Subcommand}; mod edit_memory; #[cfg(target_arch = "aarch64")] mod edit_vmstate; mod info; mod utils; use edit_memory::{EditMemoryError, EditMemorySubCommand, edit_memory_command}; #[cfg(target_arch = "aarch64")] use edit_vmstate::{EditVmStateError, EditVmStateSubCommand, edit_vmstate_command}; use info::{InfoVmStateError, InfoVmStateSubCommand, info_vmstate_command}; #[derive(Debug, thiserror::Error, displaydoc::Display)] enum SnapEditorError { /// Error during editing memory file: {0} EditMemory(#[from] EditMemoryError), #[cfg(target_arch = "aarch64")] /// Error during editing vmstate file: {0} EditVmState(#[from] EditVmStateError), /// Error during getting info from a vmstate file: {0} InfoVmState(#[from] InfoVmStateError), } #[derive(Debug, Parser)] #[command(version = format!("v{}", env!("CARGO_PKG_VERSION")))] struct Cli { #[command(subcommand)] command: Command, } #[derive(Debug, Subcommand)] enum Command { #[command(subcommand)] EditMemory(EditMemorySubCommand), #[cfg(target_arch = "aarch64")] #[command(subcommand)] EditVmstate(EditVmStateSubCommand), #[command(subcommand)] InfoVmstate(InfoVmStateSubCommand), } fn main_exec() -> Result<(), SnapEditorError> { let cli = Cli::parse(); match cli.command { Command::EditMemory(command) => edit_memory_command(command)?, #[cfg(target_arch = "aarch64")] Command::EditVmstate(command) => edit_vmstate_command(command)?, Command::InfoVmstate(command) => info_vmstate_command(command)?, } Ok(()) } fn main() -> Result<(), SnapEditorError> { let result = main_exec(); if let Err(e) = result { eprintln!("{}", e); Err(e) } else { Ok(()) } } ================================================ FILE: src/snapshot-editor/src/utils.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fs::{File, OpenOptions}; use std::path::PathBuf; use vmm::persist::MicrovmState; use vmm::snapshot::Snapshot; // Some errors are only used in aarch64 code #[allow(unused)] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum UtilsError { /// Can not open snapshot file: {0} VmStateFileOpen(std::io::Error), /// Can not load snapshot: {0} VmStateLoad(vmm::snapshot::SnapshotError), /// Can not open output file: {0} OutputFileOpen(std::io::Error), /// Can not save snapshot: {0} VmStateSave(vmm::snapshot::SnapshotError), } #[allow(unused)] pub fn open_vmstate(snapshot_path: &PathBuf) -> Result, UtilsError> { let mut snapshot_reader = File::open(snapshot_path).map_err(UtilsError::VmStateFileOpen)?; Snapshot::load(&mut snapshot_reader).map_err(UtilsError::VmStateLoad) } // This method is used only in aarch64 code so far #[allow(unused)] pub fn save_vmstate(microvm_state: MicrovmState, output_path: &PathBuf) -> Result<(), UtilsError> { let mut output_file = OpenOptions::new() .create(true) .write(true) .truncate(true) .open(output_path) .map_err(UtilsError::OutputFileOpen)?; let mut snapshot = Snapshot::new(microvm_state); snapshot .save(&mut output_file) .map_err(UtilsError::VmStateSave)?; Ok(()) } ================================================ FILE: src/utils/Cargo.toml ================================================ [package] name = "utils" version = "0.1.0" authors = ["Amazon Firecracker team "] edition = "2024" license = "Apache-2.0" [lib] bench = false [features] tracing = ["log-instrument"] [dependencies] displaydoc = "0.2.5" libc = "0.2.183" log-instrument = { path = "../log-instrument", optional = true } thiserror = "2.0.18" [lints] workspace = true ================================================ FILE: src/utils/src/arg_parser.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::BTreeMap; use std::{env, fmt, result}; pub type Result = result::Result; const ARG_PREFIX: &str = "--"; const ARG_SEPARATOR: &str = "--"; const HELP_ARG: &str = "--help"; const SHORT_HELP_ARG: &str = "-h"; const VERSION_ARG: &str = "--version"; /// Errors associated with parsing and validating arguments. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum UtilsArgParserError { /// Argument '{1}' cannot be used together with argument '{0}'. ForbiddenArgument(String, String), /// Argument '{0}' required, but not found. MissingArgument(String), /// The argument '{0}' requires a value, but none was supplied. MissingValue(String), /// Found argument '{0}' which wasn't expected, or isn't valid in this context. UnexpectedArgument(String), /// The argument '{0}' was provided more than once. DuplicateArgument(String), } /// Keep information about the argument parser. #[derive(Debug, Clone, Default)] pub struct ArgParser<'a> { arguments: Arguments<'a>, } impl<'a> ArgParser<'a> { /// Create a new ArgParser instance. pub fn new() -> Self { ArgParser::default() } /// Add an argument with its associated `Argument` in `arguments`. pub fn arg(mut self, argument: Argument<'a>) -> Self { self.arguments.insert_arg(argument); self } /// Parse the command line arguments. pub fn parse_from_cmdline(&mut self) -> Result<()> { self.arguments.parse_from_cmdline() } /// Concatenate the `help` information of every possible argument /// in a message that represents the correct command line usage /// for the application. pub fn formatted_help(&self) -> String { let mut help_builder = vec![]; let required_arguments = self.format_arguments(true); if !required_arguments.is_empty() { help_builder.push("required arguments:".to_string()); help_builder.push(required_arguments); } let optional_arguments = self.format_arguments(false); if !optional_arguments.is_empty() { // Add line break if `required_arguments` is pushed. if !help_builder.is_empty() { help_builder.push("".to_string()); } help_builder.push("optional arguments:".to_string()); help_builder.push(optional_arguments); } help_builder.join("\n") } /// Return a reference to `arguments` field. pub fn arguments(&self) -> &Arguments<'_> { &self.arguments } // Filter arguments by whether or not it is required. // Align arguments by setting width to length of the longest argument. fn format_arguments(&self, is_required: bool) -> String { let filtered_arguments = self .arguments .args .values() .filter(|arg| is_required == arg.required) .collect::>(); let max_arg_width = filtered_arguments .iter() .map(|arg| arg.format_name().len()) .max() .unwrap_or(0); filtered_arguments .into_iter() .map(|arg| arg.format_help(max_arg_width)) .collect::>() .join("\n") } } /// Stores the characteristics of the `name` command line argument. #[derive(Clone, Debug, PartialEq, Eq)] pub struct Argument<'a> { name: &'a str, required: bool, requires: Option<&'a str>, forbids: Vec<&'a str>, takes_value: bool, allow_multiple: bool, default_value: Option, help: Option<&'a str>, user_value: Option, } impl<'a> Argument<'a> { /// Create a new `Argument` that keeps the necessary information for an argument. pub fn new(name: &'a str) -> Argument<'a> { Argument { name, required: false, requires: None, forbids: vec![], takes_value: false, allow_multiple: false, default_value: None, help: None, user_value: None, } } /// Set if the argument *must* be provided by user. pub fn required(mut self, required: bool) -> Self { self.required = required; self } /// Add `other_arg` as a required parameter when `self` is specified. pub fn requires(mut self, other_arg: &'a str) -> Self { self.requires = Some(other_arg); self } /// Add `other_arg` as a forbidden parameter when `self` is specified. pub fn forbids(mut self, args: Vec<&'a str>) -> Self { self.forbids = args; self } /// If `takes_value` is true, then the user *must* provide a value for the /// argument, otherwise that argument is a flag. pub fn takes_value(mut self, takes_value: bool) -> Self { self.takes_value = takes_value; self } /// If `allow_multiple` is true, then the user can provide multiple values for the /// argument (e.g --arg val1 --arg val2). It sets the `takes_value` option to true, /// so the user must provides at least one value. pub fn allow_multiple(mut self, allow_multiple: bool) -> Self { if allow_multiple { self.takes_value = true; } self.allow_multiple = allow_multiple; self } /// Keep a default value which will be used if the user didn't provide a value for /// the argument. pub fn default_value(mut self, default_value: &'a str) -> Self { self.default_value = Some(Value::Single(String::from(default_value))); self } /// Set the information that will be displayed for the argument when user passes /// `--help` flag. pub fn help(mut self, help: &'a str) -> Self { self.help = Some(help); self } fn format_help(&self, arg_width: usize) -> String { let mut help_builder = vec![]; let arg = self.format_name(); help_builder.push(format!("{: { help_builder.push(format!("{} [default: {}]", help, default_value)) } (Some(help), None) => help_builder.push(help.to_string()), (None, Some(default_value)) => { help_builder.push(format!("[default: {}]", default_value)) } (None, None) => (), }; help_builder.concat() } fn format_name(&self) -> String { if self.takes_value { format!(" --{name} <{name}>", name = self.name) } else { format!(" --{}", self.name) } } } /// Represents the type of argument, and the values it takes. #[derive(Clone, Debug, PartialEq, Eq)] pub enum Value { Flag, Single(String), Multiple(Vec), } impl Value { fn as_single_value(&self) -> Option<&String> { match self { Value::Single(s) => Some(s), _ => None, } } fn as_flag(&self) -> bool { matches!(self, Value::Flag) } fn as_multiple(&self) -> Option<&[String]> { match self { Value::Multiple(v) => Some(v), _ => None, } } } impl fmt::Display for Value { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Value::Flag => write!(f, "true"), Value::Single(s) => write!(f, "\"{}\"", s), Value::Multiple(v) => write!(f, "{:?}", v), } } } /// Stores the arguments of the parser. #[derive(Debug, Clone, Default)] pub struct Arguments<'a> { // A BTreeMap in which the key is an argument and the value is its associated `Argument`. args: BTreeMap<&'a str, Argument<'a>>, // The arguments specified after `--` (i.e. end of command options). extra_args: Vec, } impl<'a> Arguments<'a> { /// Add an argument with its associated `Argument` in `args`. fn insert_arg(&mut self, argument: Argument<'a>) { self.args.insert(argument.name, argument); } /// Get the value for the argument specified by `arg_name`. fn value_of(&self, arg_name: &'static str) -> Option<&Value> { self.args.get(arg_name).and_then(|argument| { argument .user_value .as_ref() .or(argument.default_value.as_ref()) }) } /// Return the value of an argument if the argument exists and has the type /// String. Otherwise return None. pub fn single_value(&self, arg_name: &'static str) -> Option<&String> { self.value_of(arg_name) .and_then(|arg_value| arg_value.as_single_value()) } /// Return whether an `arg_name` argument of type flag exists. pub fn flag_present(&self, arg_name: &'static str) -> bool { match self.value_of(arg_name) { Some(v) => v.as_flag(), None => false, } } /// Return the value of an argument if the argument exists and has the type /// vector. Otherwise return None. pub fn multiple_values(&self, arg_name: &'static str) -> Option<&[String]> { self.value_of(arg_name) .and_then(|arg_value| arg_value.as_multiple()) } /// Get the extra arguments (all arguments after `--`). pub fn extra_args(&self) -> Vec { self.extra_args.clone() } // Split `args` in two slices: one with the actual arguments of the process and the other with // the extra arguments, meaning all parameters specified after `--`. fn split_args(args: &[String]) -> (&[String], &[String]) { if let Some(index) = args.iter().position(|arg| arg == ARG_SEPARATOR) { return (&args[..index], &args[index + 1..]); } (args, &[]) } /// Collect the command line arguments and the values provided for them. pub fn parse_from_cmdline(&mut self) -> Result<()> { let args: Vec = env::args().collect(); self.parse(&args) } /// Clear split between the actual arguments of the process, the extra arguments if any /// and the `--help` and `--version` arguments if present. pub fn parse(&mut self, args: &[String]) -> Result<()> { // Skipping the first element of `args` as it is the name of the binary. let (args, extra_args) = Arguments::split_args(&args[1..]); self.extra_args = extra_args.to_vec(); // If `--help` or `-h`is provided as a parameter, we artificially skip the parsing of other // command line arguments by adding just the help argument to the parsed list and // returning. if args.contains(&HELP_ARG.to_string()) || args.contains(&SHORT_HELP_ARG.to_string()) { let mut help_arg = Argument::new("help").help("Show the help message."); help_arg.user_value = Some(Value::Flag); self.insert_arg(help_arg); return Ok(()); } // If `--version` is provided as a parameter, we artificially skip the parsing of other // command line arguments by adding just the version argument to the parsed list and // returning. if args.contains(&VERSION_ARG.to_string()) { let mut version_arg = Argument::new("version"); version_arg.user_value = Some(Value::Flag); self.insert_arg(version_arg); return Ok(()); } // Otherwise, we continue the parsing of the other arguments. self.populate_args(args) } // Check if `required`, `requires` and `forbids` field rules are indeed followed by every // argument. fn validate_requirements(&self, args: &[String]) -> Result<()> { for argument in self.args.values() { // The arguments that are marked `required` must be provided by user. if argument.required && argument.user_value.is_none() { return Err(UtilsArgParserError::MissingArgument( argument.name.to_string(), )); } if argument.user_value.is_some() { // For the arguments that require a specific argument to be also present in the list // of arguments provided by user, search for that argument. if let Some(arg_name) = argument.requires && !args.contains(&(format!("--{}", arg_name))) { return Err(UtilsArgParserError::MissingArgument(arg_name.to_string())); } // Check the user-provided list for potential forbidden arguments. for arg_name in argument.forbids.iter() { if args.contains(&(format!("--{}", arg_name))) { return Err(UtilsArgParserError::ForbiddenArgument( argument.name.to_string(), arg_name.to_string(), )); } } } } Ok(()) } // Does a general validation of `arg` command line argument. fn validate_arg(&self, arg: &str) -> Result<()> { if !arg.starts_with(ARG_PREFIX) { return Err(UtilsArgParserError::UnexpectedArgument(arg.to_string())); } let arg_name = &arg[ARG_PREFIX.len()..]; // Check if the argument is an expected one and, if yes, check that it was not // provided more than once (unless allow_multiple is set). let argument = self .args .get(arg_name) .ok_or_else(|| UtilsArgParserError::UnexpectedArgument(arg_name.to_string()))?; if !argument.allow_multiple && argument.user_value.is_some() { return Err(UtilsArgParserError::DuplicateArgument(arg_name.to_string())); } Ok(()) } /// Validate the arguments provided by user and their values. Insert those /// values in the `Argument` instances of the corresponding arguments. fn populate_args(&mut self, args: &[String]) -> Result<()> { let mut iter = args.iter(); while let Some(arg) = iter.next() { self.validate_arg(arg)?; // If the `arg` argument is indeed an expected one, set the value provided by user // if it's a valid one. let argument = self.args.get_mut(&arg[ARG_PREFIX.len()..]).ok_or_else(|| { UtilsArgParserError::UnexpectedArgument(arg[ARG_PREFIX.len()..].to_string()) })?; let arg_val = if argument.takes_value { let val = iter .next() .filter(|v| !v.starts_with(ARG_PREFIX)) .ok_or_else(|| UtilsArgParserError::MissingValue(argument.name.to_string()))? .clone(); if argument.allow_multiple { match argument.user_value.take() { Some(Value::Multiple(mut v)) => { v.push(val); Value::Multiple(v) } None => Value::Multiple(vec![val]), _ => { return Err(UtilsArgParserError::UnexpectedArgument( argument.name.to_string(), )); } } } else { Value::Single(val) } } else { Value::Flag }; argument.user_value = Some(arg_val); } // Check the constraints for the `required`, `requires` and `forbids` fields of all // arguments. self.validate_requirements(args)?; Ok(()) } } #[cfg(test)] mod tests { use super::*; use crate::arg_parser::Value; fn build_arg_parser() -> ArgParser<'static> { ArgParser::new() .arg( Argument::new("exec-file") .required(true) .takes_value(true) .help("'exec-file' info."), ) .arg( Argument::new("no-api") .requires("config-file") .takes_value(false) .help("'no-api' info."), ) .arg( Argument::new("api-sock") .takes_value(true) .default_value("socket") .help("'api-sock' info."), ) .arg( Argument::new("id") .takes_value(true) .default_value("instance") .help("'id' info."), ) .arg( Argument::new("seccomp-filter") .takes_value(true) .help("'seccomp-filter' info.") .forbids(vec!["no-seccomp"]), ) .arg( Argument::new("no-seccomp") .help("'-no-seccomp' info.") .forbids(vec!["seccomp-filter"]), ) .arg( Argument::new("config-file") .takes_value(true) .help("'config-file' info."), ) .arg( Argument::new("describe-snapshot") .takes_value(true) .help("'describe-snapshot' info."), ) } #[test] fn test_arg_help() { // Checks help format for an argument. let width = 32; let short_width = 16; let mut argument = Argument::new("exec-file").takes_value(false); assert_eq!( argument.format_help(width), " --exec-file " ); assert_eq!(argument.format_help(short_width), " --exec-file "); argument = Argument::new("exec-file").takes_value(true); assert_eq!( argument.format_help(width), " --exec-file " ); assert_eq!( argument.format_help(short_width), " --exec-file " ); argument = Argument::new("exec-file") .takes_value(true) .help("'exec-file' info."); assert_eq!( argument.format_help(width), " --exec-file 'exec-file' info." ); assert_eq!( argument.format_help(short_width), " --exec-file 'exec-file' info." ); argument = Argument::new("exec-file") .takes_value(true) .default_value("./exec-file"); assert_eq!( argument.format_help(width), " --exec-file [default: \"./exec-file\"]" ); assert_eq!( argument.format_help(short_width), " --exec-file [default: \"./exec-file\"]" ); argument = Argument::new("exec-file") .takes_value(true) .default_value("./exec-file") .help("'exec-file' info."); assert_eq!( argument.format_help(width), " --exec-file 'exec-file' info. [default: \"./exec-file\"]" ); assert_eq!( argument.format_help(short_width), " --exec-file 'exec-file' info. [default: \"./exec-file\"]" ); } #[test] fn test_arg_parser_help() { // Checks help information when user passes `--help` flag. let mut arg_parser = ArgParser::new() .arg( Argument::new("exec-file") .required(true) .takes_value(true) .help("'exec-file' info."), ) .arg( Argument::new("api-sock") .takes_value(true) .help("'api-sock' info."), ); assert_eq!( arg_parser.formatted_help(), "required arguments:\n --exec-file 'exec-file' info.\n\noptional \ arguments:\n --api-sock 'api-sock' info." ); arg_parser = ArgParser::new() .arg(Argument::new("id").takes_value(true).help("'id' info.")) .arg( Argument::new("seccomp-filter") .takes_value(true) .help("'seccomp-filter' info."), ) .arg( Argument::new("config-file") .takes_value(true) .help("'config-file' info."), ); assert_eq!( arg_parser.formatted_help(), "optional arguments:\n --config-file 'config-file' info.\n \ --id 'id' info.\n --seccomp-filter \ 'seccomp-filter' info." ); } #[test] fn test_value() { // Test `as_string()` and `as_flag()` functions behaviour. let mut value = Value::Flag; assert!(Value::as_single_value(&value).is_none()); value = Value::Single("arg".to_string()); assert_eq!(Value::as_single_value(&value).unwrap(), "arg"); value = Value::Single("arg".to_string()); assert!(!Value::as_flag(&value)); value = Value::Flag; assert!(Value::as_flag(&value)); } #[test] fn test_parse() { let arg_parser = build_arg_parser(); // Test different scenarios for the command line arguments provided by user. let mut arguments = arg_parser.arguments().clone(); let args = vec!["binary-name", "--exec-file", "foo", "--help"] .into_iter() .map(String::from) .collect::>(); arguments.parse(&args).unwrap(); assert!(arguments.args.contains_key("help")); arguments = arg_parser.arguments().clone(); let args = vec!["binary-name", "--exec-file", "foo", "-h"] .into_iter() .map(String::from) .collect::>(); arguments.parse(&args).unwrap(); assert!(arguments.args.contains_key("help")); arguments = arg_parser.arguments().clone(); let args = vec!["binary-name", "--exec-file", "foo", "--version"] .into_iter() .map(String::from) .collect::>(); arguments.parse(&args).unwrap(); assert!(arguments.args.contains_key("version")); arguments = arg_parser.arguments().clone(); let args = vec!["binary-name", "--exec-file", "foo", "--describe-snapshot"] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::MissingValue( "describe-snapshot".to_string() )) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--describe-snapshot", "--", ] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::MissingValue( "describe-snapshot".to_string() )) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--api-sock", "--id", "bar", ] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::MissingValue("api-sock".to_string())) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--api-sock", "bar", "--api-sock", "foobar", ] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::DuplicateArgument( "api-sock".to_string() )) ); arguments = arg_parser.arguments().clone(); let args = vec!["binary-name", "--api-sock", "foo"] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::MissingArgument( "exec-file".to_string() )) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--api-sock", "bar", "--invalid-arg", ] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::UnexpectedArgument( "invalid-arg".to_string() )) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--api-sock", "bar", "--id", "foobar", "--no-api", ] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::MissingArgument( "config-file".to_string() )) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--api-sock", "bar", "--id", ] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::MissingValue("id".to_string())) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--config-file", "bar", "--no-api", "foobar", ] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::UnexpectedArgument( "foobar".to_string() )) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--api-sock", "bar", "--id", "foobar", "--seccomp-filter", "0", "--no-seccomp", ] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::ForbiddenArgument( "no-seccomp".to_string(), "seccomp-filter".to_string(), )) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--api-sock", "bar", "--id", "foobar", "--no-seccomp", "--seccomp-filter", "0", ] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::ForbiddenArgument( "no-seccomp".to_string(), "seccomp-filter".to_string(), )) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--api-sock", "bar", "foobar", ] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::UnexpectedArgument( "foobar".to_string() )) ); arguments = arg_parser.arguments().clone(); let args = vec!["binary-name", "foo"] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::UnexpectedArgument("foo".to_string())) ); arguments = arg_parser.arguments().clone(); let args = vec![ "binary-name", "--exec-file", "foo", "--api-sock", "bar", "--id", "foobar", "--seccomp-filter", "0", "--", "--extra-flag", ] .into_iter() .map(String::from) .collect::>(); arguments.parse(&args).unwrap(); assert!(arguments.extra_args.contains(&"--extra-flag".to_string())); } #[test] fn test_split() { let mut args = vec!["--exec-file", "foo", "--", "--extra-arg-1", "--extra-arg-2"] .into_iter() .map(String::from) .collect::>(); let (left, right) = Arguments::split_args(&args); assert_eq!(left.to_vec(), vec!["--exec-file", "foo"]); assert_eq!(right.to_vec(), vec!["--extra-arg-1", "--extra-arg-2"]); args = vec!["--exec-file", "foo", "--"] .into_iter() .map(String::from) .collect::>(); let (left, right) = Arguments::split_args(&args); assert_eq!(left.to_vec(), vec!["--exec-file", "foo"]); assert!(right.is_empty()); args = vec!["--exec-file", "foo"] .into_iter() .map(String::from) .collect::>(); let (left, right) = Arguments::split_args(&args); assert_eq!(left.to_vec(), vec!["--exec-file", "foo"]); assert!(right.is_empty()); } #[test] fn test_error_display() { assert_eq!( format!( "{}", UtilsArgParserError::ForbiddenArgument("foo".to_string(), "bar".to_string()) ), "Argument 'bar' cannot be used together with argument 'foo'." ); assert_eq!( format!( "{}", UtilsArgParserError::MissingArgument("foo".to_string()) ), "Argument 'foo' required, but not found." ); assert_eq!( format!("{}", UtilsArgParserError::MissingValue("foo".to_string())), "The argument 'foo' requires a value, but none was supplied." ); assert_eq!( format!( "{}", UtilsArgParserError::UnexpectedArgument("foo".to_string()) ), "Found argument 'foo' which wasn't expected, or isn't valid in this context." ); assert_eq!( format!( "{}", UtilsArgParserError::DuplicateArgument("foo".to_string()) ), "The argument 'foo' was provided more than once." ); } #[test] fn test_value_display() { assert_eq!(format!("{}", Value::Flag), "true"); assert_eq!(format!("{}", Value::Single("foo".to_string())), "\"foo\""); } #[test] fn test_allow_multiple() { let arg_parser = ArgParser::new() .arg( Argument::new("no-multiple") .takes_value(true) .help("argument that takes just one value."), ) .arg( Argument::new("multiple") .allow_multiple(true) .help("argument that allows duplication."), ); let mut arguments = arg_parser.arguments().clone(); // Check single value arguments fails when multiple values are provided. let args = vec!["binary-name", "--no-multiple", "1", "--no-multiple", "2"] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::DuplicateArgument( "no-multiple".to_string() )) ); arguments = arg_parser.arguments().clone(); // Check single value arguments works as expected when just one value // is provided for both arguments. let args = vec!["binary-name", "--no-multiple", "1", "--multiple", "2"] .into_iter() .map(String::from) .collect::>(); arguments.parse(&args).unwrap(); arguments = arg_parser.arguments().clone(); // Check multiple arg allow multiple values let args = vec!["binary-name", "--multiple", "1", "--multiple", "2"] .into_iter() .map(String::from) .collect::>(); arguments.parse(&args).unwrap(); // Check dulicates require a value let args = vec!["binary-name", "--multiple", "--multiple", "2"] .into_iter() .map(String::from) .collect::>(); assert_eq!( arguments.parse(&args), Err(UtilsArgParserError::MissingValue("multiple".to_string())) ); } } ================================================ FILE: src/utils/src/lib.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod arg_parser; pub mod time; pub mod validators; ================================================ FILE: src/utils/src/time.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fs::File; use std::io::{ErrorKind, Read}; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use std::time::Duration; use std::{fmt, ptr}; /// Constant to convert seconds to nanoseconds. pub const NANOS_PER_SECOND: u64 = 1_000_000_000; /// Constant to convert milliseconds to nanoseconds. pub const NANOS_PER_MILLISECOND: u64 = 1_000_000; /// Wrapper over `libc::clockid_t` to specify Linux Kernel clock source. #[derive(Debug)] pub enum ClockType { /// Equivalent to `libc::CLOCK_MONOTONIC`. Monotonic, /// Equivalent to `libc::CLOCK_REALTIME`. Real, /// Equivalent to `libc::CLOCK_PROCESS_CPUTIME_ID`. ProcessCpu, /// Equivalent to `libc::CLOCK_THREAD_CPUTIME_ID`. ThreadCpu, } impl From for libc::clockid_t { fn from(clock_type: ClockType) -> Self { match clock_type { ClockType::Monotonic => libc::CLOCK_MONOTONIC, ClockType::Real => libc::CLOCK_REALTIME, ClockType::ProcessCpu => libc::CLOCK_PROCESS_CPUTIME_ID, ClockType::ThreadCpu => libc::CLOCK_THREAD_CPUTIME_ID, } } } /// Structure representing the date in local time with nanosecond precision. #[derive(Debug)] pub struct LocalTime { /// Seconds in current minute. sec: i32, /// Minutes in current hour. min: i32, /// Hours in current day, 24H format. hour: i32, /// Days in current month. mday: i32, /// Months in current year. mon: i32, /// Years passed since 1900 BC. year: i32, /// Nanoseconds in current second. nsec: i64, } impl LocalTime { /// Returns the [LocalTime](struct.LocalTime.html) structure for the calling moment. pub fn now() -> LocalTime { let mut timespec = libc::timespec { tv_sec: 0, tv_nsec: 0, }; let mut tm: libc::tm = libc::tm { tm_sec: 0, tm_min: 0, tm_hour: 0, tm_mday: 0, tm_mon: 0, tm_year: 0, tm_wday: 0, tm_yday: 0, tm_isdst: 0, tm_gmtoff: 0, tm_zone: std::ptr::null(), }; // SAFETY: Safe because the parameters are valid. unsafe { libc::clock_gettime(libc::CLOCK_REALTIME, &mut timespec); libc::localtime_r(×pec.tv_sec, &mut tm); } LocalTime { sec: tm.tm_sec, min: tm.tm_min, hour: tm.tm_hour, mday: tm.tm_mday, mon: tm.tm_mon, year: tm.tm_year, nsec: timespec.tv_nsec, } } } impl fmt::Display for LocalTime { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, "{}-{:02}-{:02}T{:02}:{:02}:{:02}.{:09}", self.year + 1900, self.mon + 1, self.mday, self.hour, self.min, self.sec, self.nsec ) } } /// Holds a micro-second resolution timestamp with both the real time and cpu time. #[derive(Debug, Clone)] pub struct TimestampUs { /// Real time in microseconds. pub time_us: u64, /// Cpu time in microseconds. pub cputime_us: u64, } impl Default for TimestampUs { fn default() -> TimestampUs { TimestampUs { time_us: get_time_us(ClockType::Monotonic), cputime_us: get_time_us(ClockType::ProcessCpu), } } } /// Returns a timestamp in nanoseconds from a monotonic clock. /// /// Uses `_rdstc` on `x86_64` and [`get_time`](fn.get_time.html) on other architectures. pub fn timestamp_cycles() -> u64 { #[cfg(target_arch = "x86_64")] // SAFETY: Safe because there's nothing that can go wrong with this call. unsafe { std::arch::x86_64::_rdtsc() } #[cfg(not(target_arch = "x86_64"))] { get_time_ns(ClockType::Monotonic) } } /// Returns a timestamp in nanoseconds based on the provided clock type. /// /// # Arguments /// /// * `clock_type` - Identifier of the Linux Kernel clock on which to act. pub fn get_time_ns(clock_type: ClockType) -> u64 { let mut time_struct = libc::timespec { tv_sec: 0, tv_nsec: 0, }; // SAFETY: Safe because the parameters are valid. unsafe { libc::clock_gettime(clock_type.into(), &mut time_struct) }; u64::try_from(seconds_to_nanoseconds(time_struct.tv_sec).expect("Time conversion overflow")) .unwrap() + u64::try_from(time_struct.tv_nsec).unwrap() } /// Returns a timestamp in microseconds based on the provided clock type. /// /// # Arguments /// /// * `clock_type` - Identifier of the Linux Kernel clock on which to act. pub fn get_time_us(clock_type: ClockType) -> u64 { get_time_ns(clock_type) / 1000 } /// Returns a timestamp in milliseconds based on the provided clock type. /// /// # Arguments /// /// * `clock_type` - Identifier of the Linux Kernel clock on which to act. pub fn get_time_ms(clock_type: ClockType) -> u64 { get_time_ns(clock_type) / NANOS_PER_MILLISECOND } /// Converts a timestamp in seconds to an equivalent one in nanoseconds. /// Returns `None` if the conversion overflows. /// /// # Arguments /// /// * `value` - Timestamp in seconds. pub fn seconds_to_nanoseconds(value: i64) -> Option { value.checked_mul(i64::try_from(NANOS_PER_SECOND).unwrap()) } /// Wrapper for timerfd #[derive(Debug)] pub struct TimerFd(File); #[allow(clippy::new_without_default)] impl TimerFd { /// Creates new MONOTONIC and NONBLOCK timerfd pub fn new() -> Self { // SAFETY: all arguments are valid constants let fd = unsafe { libc::timerfd_create( libc::CLOCK_MONOTONIC, libc::TFD_NONBLOCK | libc::TFD_CLOEXEC, ) }; assert!( 0 <= fd, "TimerFd creation failed: {:#}", std::io::Error::last_os_error() ); // SAFETY: we just created valid fd TimerFd(unsafe { File::from_raw_fd(fd) }) } /// Arm the timer to be triggered after `duration` and then /// at optional `interval` pub fn arm(&mut self, duration: Duration, interval: Option) { #[allow(clippy::cast_possible_wrap)] let spec = libc::itimerspec { it_value: libc::timespec { tv_sec: duration.as_secs() as i64, tv_nsec: duration.subsec_nanos() as i64, }, it_interval: if let Some(interval) = interval { libc::timespec { tv_sec: interval.as_secs() as i64, tv_nsec: interval.subsec_nanos() as i64, } } else { libc::timespec { tv_sec: 0, tv_nsec: 0, } }, }; // SAFETY: Safe because this doesn't modify any memory and we check the return value. let ret = unsafe { libc::timerfd_settime(self.as_raw_fd(), 0, &spec, ptr::null_mut()) }; assert!( 0 <= ret, "TimerFd arm failed: {:#}", std::io::Error::last_os_error() ); } /// Read the value from the timerfd. Since it is always created with NONBLOCK flag, /// this function does not block. pub fn read(&mut self) -> u64 { let mut buf = [0u8; size_of::()]; match self.0.read(buf.as_mut_slice()) { Ok(_) => u64::from_ne_bytes(buf), Err(inner) if inner.kind() == ErrorKind::WouldBlock => 0, Err(err) => panic!("TimerFd read failed: {err:#}"), } } /// Tell if the timer is currently armed. pub fn is_armed(&self) -> bool { // SAFETY: Zero init of a PDO type. let mut spec: libc::itimerspec = unsafe { std::mem::zeroed() }; // SAFETY: Safe because timerfd_gettime is trusted to only modify `spec`. let ret = unsafe { libc::timerfd_gettime(self.as_raw_fd(), &mut spec) }; assert!( 0 <= ret, "TimerFd arm failed: {:#}", std::io::Error::last_os_error() ); spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0 } } impl AsRawFd for TimerFd { fn as_raw_fd(&self) -> RawFd { self.0.as_raw_fd() } } #[cfg(test)] mod tests { use super::*; #[test] fn test_get_time() { for _ in 0..1000 { assert!(get_time_ns(ClockType::Monotonic) <= get_time_ns(ClockType::Monotonic)); } for _ in 0..1000 { assert!(get_time_ns(ClockType::ProcessCpu) <= get_time_ns(ClockType::ProcessCpu)); } for _ in 0..1000 { assert!(get_time_ns(ClockType::ThreadCpu) <= get_time_ns(ClockType::ThreadCpu)); } assert_ne!(get_time_ns(ClockType::Real), 0); assert_ne!(get_time_us(ClockType::Real), 0); assert!(get_time_ns(ClockType::Real) / 1000 <= get_time_us(ClockType::Real)); assert!( get_time_ns(ClockType::Real) / NANOS_PER_MILLISECOND <= get_time_ms(ClockType::Real) ); } #[test] fn test_local_time_display() { let local_time = LocalTime { sec: 30, min: 15, hour: 10, mday: 4, mon: 6, year: 119, nsec: 123_456_789, }; assert_eq!( String::from("2019-07-04T10:15:30.123456789"), local_time.to_string() ); let local_time = LocalTime { sec: 5, min: 5, hour: 5, mday: 23, mon: 7, year: 44, nsec: 123, }; assert_eq!( String::from("1944-08-23T05:05:05.000000123"), local_time.to_string() ); let local_time = LocalTime::now(); assert!(local_time.mon >= 0 && local_time.mon <= 11); } #[test] fn test_seconds_to_nanoseconds() { assert_eq!( u64::try_from(seconds_to_nanoseconds(100).unwrap()).unwrap(), 100 * NANOS_PER_SECOND ); assert!(seconds_to_nanoseconds(9_223_372_037).is_none()); } } ================================================ FILE: src/utils/src/validators.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // Misc data format validations, shared by multiple Firecracker components. const MAX_INSTANCE_ID_LEN: usize = 64; const MIN_INSTANCE_ID_LEN: usize = 1; #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum ValidatorError { /// Invalid char ({0}) at position {1} InvalidChar(char, usize), // (char, position) /// Invalid len ({0}); the length must be between {1} and {2} InvalidLen(usize, usize, usize), // (length, min, max) } /// Checks that the instance id only contains alphanumeric chars and hyphens /// and that the size is between 1 and 64 characters. pub fn validate_instance_id(input: &str) -> Result<(), ValidatorError> { if input.len() > MAX_INSTANCE_ID_LEN || input.len() < MIN_INSTANCE_ID_LEN { return Err(ValidatorError::InvalidLen( input.len(), MIN_INSTANCE_ID_LEN, MAX_INSTANCE_ID_LEN, )); } for (i, c) in input.chars().enumerate() { if !(c == '-' || c.is_alphanumeric()) { return Err(ValidatorError::InvalidChar(c, i)); } } Ok(()) } #[cfg(test)] mod tests { use super::*; #[test] fn test_validate_instance_id() { assert_eq!( format!("{}", validate_instance_id("").unwrap_err()), "Invalid len (0); the length must be between 1 and 64" ); validate_instance_id("12-3aa").unwrap(); assert_eq!( format!("{}", validate_instance_id("12_3aa").unwrap_err()), "Invalid char (_) at position 2" ); assert_eq!( validate_instance_id("12:3aa").unwrap_err(), ValidatorError::InvalidChar(':', 2) ); assert_eq!( validate_instance_id(str::repeat("a", MAX_INSTANCE_ID_LEN + 1).as_str()).unwrap_err(), ValidatorError::InvalidLen( MAX_INSTANCE_ID_LEN + 1, MIN_INSTANCE_ID_LEN, MAX_INSTANCE_ID_LEN ) ); } } ================================================ FILE: src/vmm/Cargo.toml ================================================ [package] name = "vmm" version = "0.1.0" authors = ["Amazon Firecracker team "] edition = "2024" license = "Apache-2.0" [lib] bench = false [features] default = [] tracing = ["log-instrument"] gdb = ["arrayvec", "gdbstub", "gdbstub_arch"] [dependencies] acpi_tables = { path = "../acpi-tables" } arrayvec = { version = "0.7.6", optional = true } aws-lc-rs = "1.16.1" base64 = "0.22.1" bitcode = { version = "0.6.9", features = ["serde"] } bitflags = "2.11.0" bitvec = { version = "1.0.1", features = ["atomic", "serde"] } byteorder = "1.5.0" crc64 = "2.0.0" derive_more = { version = "2.1.1", default-features = false, features = [ "from", "display", ] } displaydoc = "0.2.5" event-manager = "0.4.2" gdbstub = { version = "0.7.10", optional = true } gdbstub_arch = { version = "0.3.3", optional = true } kvm-bindings = { version = "0.14.0", features = ["fam-wrappers", "serde"] } kvm-ioctls = "0.24.0" libc = "0.2.183" linux-loader = "0.13.2" log = { version = "0.4.29", features = ["std", "serde"] } log-instrument = { path = "../log-instrument", optional = true } memfd = "0.6.5" micro_http = { git = "https://github.com/firecracker-microvm/micro-http" } pci = { path = "../pci" } semver = { version = "1.0.27", features = ["serde"] } serde = { version = "1.0.228", features = ["derive", "rc"] } serde_json = "1.0.149" slab = "0.4.12" thiserror = "2.0.18" userfaultfd = "0.9.0" utils = { path = "../utils" } uuid = "1.22.0" vhost = { version = "0.15.0", features = ["vhost-user-frontend"] } vm-allocator = { version = "0.1.3", features = ["serde"] } vm-memory = { version = "0.17.1", features = [ "backend-mmap", "backend-bitmap", ] } vm-superio = "0.8.1" vmm-sys-util = { version = "0.15.0", features = ["with-serde"] } zerocopy = { version = "0.8.42" } [target.'cfg(target_arch = "aarch64")'.dependencies] vm-fdt = "0.3.0" [dev-dependencies] criterion = { version = "0.8.2", default-features = false } device_tree = "1.1.0" itertools = "0.14.0" proptest = { version = "1.10.0", default-features = false, features = ["std"] } [[bench]] name = "cpu_templates" harness = false [[bench]] name = "queue" harness = false [[bench]] name = "block_request" harness = false [[bench]] name = "memory_access" harness = false [lints] workspace = true ================================================ FILE: src/vmm/benches/block_request.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Benchmarking cases: // * `Queue.pop` // * `Queue.add_used` // * `DescriptorChain.next_descriptor` use criterion::{Criterion, criterion_group, criterion_main}; use vm_memory::GuestAddress; use vmm::devices::virtio::block::virtio::test_utils::RequestDescriptorChain; use vmm::devices::virtio::block::virtio::{Request, RequestHeader, VIRTIO_BLK_T_IN}; use vmm::devices::virtio::test_utils::VirtQueue; use vmm::test_utils::single_region_mem; pub fn block_request_benchmark(c: &mut Criterion) { let mem = single_region_mem(65562); let virt_queue = VirtQueue::new(GuestAddress(0), &mem, 16); // We don't really care about what request is. We just // need it to be valid. let chain = RequestDescriptorChain::new(&virt_queue); let request_header = RequestHeader::new(VIRTIO_BLK_T_IN, 99); chain.set_header(request_header); let mut queue = virt_queue.create_queue(); let desc = queue.pop().unwrap().unwrap(); c.bench_function("request_parse", |b| { b.iter(|| { let desc = std::hint::black_box(&desc); _ = Request::parse(desc, &mem, 1024); }) }); } criterion_group! { name = block_request_benches; config = Criterion::default().sample_size(1000).noise_threshold(0.05); targets = block_request_benchmark } criterion_main! { block_request_benches } ================================================ FILE: src/vmm/benches/cpu_templates.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Benchmarking cases: // * `CustomCpuTemplate` JSON deserialization // * `CustomCpuTemplate` JSON serialization use std::mem::size_of_val; use criterion::{Criterion, criterion_group, criterion_main}; use vmm::cpu_config::templates::CustomCpuTemplate; use vmm::cpu_config::templates::test_utils::{TEST_TEMPLATE_JSON, build_test_template}; #[inline] pub fn bench_serialize_cpu_template(cpu_template: &CustomCpuTemplate) { let _ = serde_json::to_string(cpu_template); } #[inline] pub fn bench_deserialize_cpu_template(cpu_template_str: &str) { let _ = serde_json::from_str::(cpu_template_str); } pub fn cpu_template_benchmark(c: &mut Criterion) { println!( "Deserialization test - Template size (JSON string): [{}] bytes.", TEST_TEMPLATE_JSON.len() ); let test_cpu_template = build_test_template(); println!( "Serialization test - Template size: [{}] bytes.", size_of_val(&test_cpu_template) ); c.bench_function("deserialize_cpu_template", |b| { b.iter(|| bench_deserialize_cpu_template(TEST_TEMPLATE_JSON)) }); c.bench_function("serialize_cpu_template", |b| { b.iter(|| bench_serialize_cpu_template(&test_cpu_template)) }); } criterion_group! { name = cpu_template_benches; config = Criterion::default().sample_size(200).noise_threshold(0.05); targets = cpu_template_benchmark } criterion_main! { cpu_template_benches } ================================================ FILE: src/vmm/benches/memory_access.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow(clippy::undocumented_unsafe_blocks)] use criterion::{BatchSize, Criterion, criterion_group, criterion_main}; use vmm::resources::VmResources; use vmm::vmm_config::machine_config::{HugePageConfig, MachineConfig}; fn bench_single_page_fault(c: &mut Criterion, configuration: VmResources) { c.bench_function("page_fault", |b| { b.iter_batched( || { let memory = configuration.allocate_guest_memory().unwrap(); // Get a pointer to the first memory region (cannot do `.get_slice(GuestAddress(0), // 1)`, because on ARM64 guest memory does not start at physical // address 0). let ptr = memory.first().unwrap().as_ptr(); // fine to return both here, because ptr is not a reference into `memory` (e.g. no // self-referential structs are happening here) (memory, ptr) }, |(_, ptr)| unsafe { // Cause a single page fault ptr.write_volatile(1); }, BatchSize::SmallInput, ) }); } pub fn bench_4k_page_fault(c: &mut Criterion) { bench_single_page_fault( c, VmResources { machine_config: MachineConfig { vcpu_count: 1, mem_size_mib: 2, ..Default::default() }, ..Default::default() }, ) } pub fn bench_2m_page_fault(c: &mut Criterion) { bench_single_page_fault( c, VmResources { machine_config: MachineConfig { vcpu_count: 1, mem_size_mib: 2, huge_pages: HugePageConfig::Hugetlbfs2M, ..Default::default() }, ..Default::default() }, ) } criterion_group! { name = memory_access_benches; config = Criterion::default().noise_threshold(0.05); targets = bench_4k_page_fault, bench_2m_page_fault } criterion_main! { memory_access_benches } ================================================ FILE: src/vmm/benches/queue.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Benchmarking cases: // * `Queue.pop` // * `Queue.add_used` // * `DescriptorChain.next_descriptor` #![allow(clippy::cast_possible_truncation)] use std::num::Wrapping; use criterion::{Criterion, criterion_group, criterion_main}; use vm_memory::GuestAddress; use vmm::devices::virtio::queue::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; use vmm::devices::virtio::test_utils::VirtQueue; use vmm::test_utils::single_region_mem; /// Create one chain with n descriptors /// Descriptor buffers will leave at the offset of 2048 bytes /// to leave some room for queue objects. /// We don't really care about sizes of descriptors, /// so pick 1024. fn set_dtable_one_chain(rxq: &VirtQueue, n: usize) { let desc_size = 1024; for i in 0..n { rxq.dtable[i].set( (2048 + desc_size * i) as u64, desc_size as u32, VIRTQ_DESC_F_WRITE | VIRTQ_DESC_F_NEXT, (i + 1) as u16, ); } rxq.dtable[n - 1].flags.set(VIRTQ_DESC_F_WRITE); rxq.dtable[n - 1].next.set(0); rxq.avail.ring[0].set(0); rxq.avail.idx.set(n as u16); } /// Create n chains with 1 descriptors each /// Descriptor buffers will leave at the offset of 2048 bytes /// to leave some room for queue objects. /// We don't really care about sizes of descriptors, /// so pick 1024. fn set_dtable_many_chains(rxq: &VirtQueue, n: usize) { let desc_size = 1024; for i in 0..n { rxq.dtable[i].set( (2048 + desc_size * i) as u64, desc_size as u32, VIRTQ_DESC_F_WRITE, 0, ); rxq.avail.ring[i].set(i as u16); } rxq.avail.idx.set(n as u16); } pub fn queue_benchmark(c: &mut Criterion) { let mem = single_region_mem(65562); let rxq = VirtQueue::new(GuestAddress(0), &mem, 256); let mut queue = rxq.create_queue(); set_dtable_one_chain(&rxq, 16); queue.next_avail = Wrapping(0); let desc = queue.pop().unwrap().unwrap(); c.bench_function("next_descriptor_16", |b| { b.iter(|| { let mut head = Some(desc); while let Some(d) = head { head = std::hint::black_box(d.next_descriptor()); } }) }); set_dtable_many_chains(&rxq, 16); c.bench_function("queue_pop_16", |b| { b.iter(|| { queue.next_avail = Wrapping(0); while let Some(desc) = queue.pop().unwrap() { std::hint::black_box(desc); } }) }); c.bench_function("queue_add_used_16", |b| { b.iter(|| { queue.num_added = Wrapping(0); queue.next_used = Wrapping(0); for i in 0_u16..16_u16 { let index = std::hint::black_box(i); let len = std::hint::black_box(i + 1); _ = queue.add_used(index, len as u32); } }) }); c.bench_function("queue_add_used_256", |b| { b.iter(|| { queue.num_added = Wrapping(0); queue.next_used = Wrapping(0); for i in 0_u16..256_u16 { let index = std::hint::black_box(i); let len = std::hint::black_box(i + 1); _ = queue.add_used(index, len as u32); } }) }); } criterion_group! { name = queue_benches; config = Criterion::default().sample_size(1000).noise_threshold(0.15); targets = queue_benchmark } criterion_main! { queue_benches } ================================================ FILE: src/vmm/src/acpi/mod.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use acpi_tables::fadt::{FADT_F_HW_REDUCED_ACPI, FADT_F_PWR_BUTTON, FADT_F_SLP_BUTTON}; use acpi_tables::{Aml, Dsdt, Fadt, Madt, Mcfg, Rsdp, Sdt, Xsdt, aml}; use log::{debug, error}; use vm_allocator::AllocPolicy; use crate::Vcpu; use crate::acpi::x86_64::{ apic_addr, rsdp_addr, setup_arch_dsdt, setup_arch_fadt, setup_interrupt_controllers, }; use crate::arch::x86_64::layout; use crate::device_manager::DeviceManager; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::vstate::resources::ResourceAllocator; mod x86_64; // Our (Original Equipment Manufacturer" (OEM) name. OEM is how ACPI names the manufacturer of the // hardware that is exposed to the OS, through ACPI tables. The OEM name is passed in every ACPI // table, to let the OS know that we are the owner of the table. const OEM_ID: [u8; 6] = *b"FIRECK"; // In reality the OEM revision is per table and it defines the revision of the OEM's implementation // of the particular ACPI table. For our purpose, we can set it to a fixed value for all the tables const OEM_REVISION: u32 = 0; // This is needed for an entry in the FADT table. Populating this entry in FADT is a way to let the // guest know that it runs within a Firecracker microVM. const HYPERVISOR_VENDOR_ID: [u8; 8] = *b"FIRECKVM"; #[derive(Debug, thiserror::Error, displaydoc::Display)] /// Error type for ACPI related operations pub enum AcpiError { /// Could not allocate resources: {0} VmAllocator(#[from] vm_allocator::Error), /// ACPI tables error: {0} AcpiTables(#[from] acpi_tables::AcpiError), /// Error creating AML bytecode: {0} AmlError(#[from] aml::AmlError), } /// Helper type that holds the guest memory in which we write the tables in and a resource /// allocator for allocating space for the tables struct AcpiTableWriter<'a> { mem: &'a GuestMemoryMmap, } impl AcpiTableWriter<'_> { /// Write a table in guest memory /// /// This will allocate enough space inside guest memory and write the table in the allocated /// buffer. It returns the address in which it wrote the table. fn write_acpi_table( &mut self, resource_allocator: &mut ResourceAllocator, table: &mut S, ) -> Result where S: Sdt, { let addr = resource_allocator.allocate_system_memory( table.len().try_into().unwrap(), 1, AllocPolicy::FirstMatch, )?; table .write_to_guest(self.mem, GuestAddress(addr)) .inspect_err(|err| error!("acpi: Could not write table in guest memory: {err}"))?; debug!( "acpi: Wrote table ({} bytes) at address: {:#010x}", table.len(), addr ); Ok(addr) } /// Build the DSDT table for the guest fn build_dsdt( &mut self, device_manager: &mut DeviceManager, resource_allocator: &mut ResourceAllocator, ) -> Result { let mut dsdt_data = Vec::new(); // Virtio-devices DSDT data dsdt_data.extend_from_slice(&device_manager.mmio_devices.dsdt_data); // Add GED and VMGenID AML data. device_manager .acpi_devices .append_aml_bytes(&mut dsdt_data)?; if let Some(pci_segment) = &device_manager.pci_devices.pci_segment { pci_segment.append_aml_bytes(&mut dsdt_data)?; } // Architecture specific DSDT data setup_arch_dsdt(&mut dsdt_data)?; let mut dsdt = Dsdt::new(OEM_ID, *b"FCVMDSDT", OEM_REVISION, dsdt_data); self.write_acpi_table(resource_allocator, &mut dsdt) } /// Build the FADT table for the guest /// /// This includes a pointer with the location of the DSDT in guest memory fn build_fadt( &mut self, resource_allocator: &mut ResourceAllocator, dsdt_addr: u64, ) -> Result { let mut fadt = Fadt::new(OEM_ID, *b"FCVMFADT", OEM_REVISION); fadt.set_hypervisor_vendor_id(HYPERVISOR_VENDOR_ID); fadt.set_x_dsdt(dsdt_addr); fadt.set_flags( (1 << FADT_F_HW_REDUCED_ACPI) | (1 << FADT_F_PWR_BUTTON) | (1 << FADT_F_SLP_BUTTON), ); setup_arch_fadt(&mut fadt); self.write_acpi_table(resource_allocator, &mut fadt) } /// Build the MADT table for the guest /// /// This includes information about the interrupt controllers supported in the platform fn build_madt( &mut self, resource_allocator: &mut ResourceAllocator, nr_vcpus: u8, ) -> Result { let mut madt = Madt::new( OEM_ID, *b"FCVMMADT", OEM_REVISION, apic_addr(), setup_interrupt_controllers(nr_vcpus), ); self.write_acpi_table(resource_allocator, &mut madt) } /// Build the XSDT table for the guest /// /// Currently, we pass to the guest just FADT and MADT tables. fn build_xsdt( &mut self, resource_allocator: &mut ResourceAllocator, fadt_addr: u64, madt_addr: u64, mcfg_addr: u64, ) -> Result { let mut xsdt = Xsdt::new( OEM_ID, *b"FCMVXSDT", OEM_REVISION, vec![fadt_addr, madt_addr, mcfg_addr], ); self.write_acpi_table(resource_allocator, &mut xsdt) } /// Build the MCFG table for the guest. fn build_mcfg( &mut self, resource_allocator: &mut ResourceAllocator, pci_mmio_config_addr: u64, ) -> Result { let mut mcfg = Mcfg::new(OEM_ID, *b"FCMVMCFG", OEM_REVISION, pci_mmio_config_addr); self.write_acpi_table(resource_allocator, &mut mcfg) } /// Build the RSDP pointer for the guest. /// /// This will build the RSDP pointer which points to the XSDT table and write it in guest /// memory. The address in which we write RSDP is pre-determined for every architecture. /// We will not allocate arbitrary memory for it fn build_rsdp(&mut self, xsdt_addr: u64) -> Result<(), AcpiError> { let mut rsdp = Rsdp::new(OEM_ID, xsdt_addr); rsdp.write_to_guest(self.mem, rsdp_addr()) .inspect_err(|err| error!("acpi: Could not write RSDP in guest memory: {err}"))?; debug!( "acpi: Wrote RSDP ({} bytes) at address: {:#010x}", rsdp.len(), rsdp_addr().0 ); Ok(()) } } /// Create ACPI tables for the guest /// /// This will create the ACPI tables needed to describe to the guest OS the available hardware, /// such as interrupt controllers, vCPUs and VirtIO devices. pub(crate) fn create_acpi_tables( mem: &GuestMemoryMmap, device_manager: &mut DeviceManager, resource_allocator: &mut ResourceAllocator, vcpus: &[Vcpu], ) -> Result<(), AcpiError> { let mut writer = AcpiTableWriter { mem }; let dsdt_addr = writer.build_dsdt(device_manager, resource_allocator)?; let fadt_addr = writer.build_fadt(resource_allocator, dsdt_addr)?; let madt_addr = writer.build_madt(resource_allocator, vcpus.len().try_into().unwrap())?; let mcfg_addr = writer.build_mcfg(resource_allocator, layout::PCI_MMCONFIG_START)?; let xsdt_addr = writer.build_xsdt(resource_allocator, fadt_addr, madt_addr, mcfg_addr)?; writer.build_rsdp(xsdt_addr) } #[cfg(test)] mod tests { use acpi_tables::Sdt; use vm_memory::Bytes; use crate::acpi::{AcpiError, AcpiTableWriter}; use crate::arch::x86_64::layout::{SYSTEM_MEM_SIZE, SYSTEM_MEM_START}; use crate::builder::tests::default_vmm; use crate::utils::u64_to_usize; use crate::vstate::resources::ResourceAllocator; use crate::vstate::vm::tests::setup_vm_with_memory; struct MockSdt(Vec); impl Sdt for MockSdt { fn len(&self) -> usize { self.0.len() } fn write_to_guest( &mut self, mem: &M, address: vm_memory::GuestAddress, ) -> acpi_tables::Result<()> { mem.write_slice(&self.0, address)?; Ok(()) } } // Currently we are allocating up to SYSTEM_MEM_SIZE memory for ACPI tables. We are allocating // using the FirstMatch policy, with an 1 byte alignment. This test checks that we are able to // allocate up to this size, and get back the expected addresses. #[test] fn test_write_acpi_table_memory_allocation() { // A mocke Vmm object with 128MBs of memory let vmm = default_vmm(); let mut writer = AcpiTableWriter { mem: vmm.vm.guest_memory(), }; let mut resource_allocator = vmm.vm.resource_allocator(); // This should succeed let mut sdt = MockSdt(vec![0; 4096]); let addr = writer .write_acpi_table(&mut resource_allocator, &mut sdt) .unwrap(); assert_eq!(addr, SYSTEM_MEM_START); // Let's try to write two 4K pages plus one byte let mut sdt = MockSdt(vec![0; usize::try_from(SYSTEM_MEM_SIZE + 1).unwrap()]); let err = writer .write_acpi_table(&mut resource_allocator, &mut sdt) .unwrap_err(); assert!( matches!( err, AcpiError::VmAllocator(vm_allocator::Error::ResourceNotAvailable) ), "{:?}", err ); // We are allocating memory for tables with alignment of 1 byte. All of these should // succeed. let mut sdt = MockSdt(vec![0; 5]); let addr = writer .write_acpi_table(&mut resource_allocator, &mut sdt) .unwrap(); assert_eq!(addr, SYSTEM_MEM_START + 4096); let mut sdt = MockSdt(vec![0; 2]); let addr = writer .write_acpi_table(&mut resource_allocator, &mut sdt) .unwrap(); assert_eq!(addr, SYSTEM_MEM_START + 4101); let mut sdt = MockSdt(vec![0; 4]); let addr = writer .write_acpi_table(&mut resource_allocator, &mut sdt) .unwrap(); assert_eq!(addr, SYSTEM_MEM_START + 4103); let mut sdt = MockSdt(vec![0; 8]); let addr = writer .write_acpi_table(&mut resource_allocator, &mut sdt) .unwrap(); assert_eq!(addr, SYSTEM_MEM_START + 4107); let mut sdt = MockSdt(vec![0; 16]); let addr = writer .write_acpi_table(&mut resource_allocator, &mut sdt) .unwrap(); assert_eq!(addr, SYSTEM_MEM_START + 4115); } // If, for whatever weird reason, we end up with microVM that has less memory than the maximum // address we allocate for ACPI tables, we would be able to allocate the tables but we would // not be able to write them. This is practically impossible in our case. If we get such a // guest memory, we won't be able to load the guest kernel, but the function does // return an error on this case, so let's just check that in case any of these assumptions // change in the future. #[test] fn test_write_acpi_table_small_memory() { let (_, vm) = setup_vm_with_memory(u64_to_usize(SYSTEM_MEM_START + SYSTEM_MEM_SIZE - 4096)); let mut writer = AcpiTableWriter { mem: vm.guest_memory(), }; let mut resource_allocator = ResourceAllocator::new(); let mut sdt = MockSdt(vec![0; usize::try_from(SYSTEM_MEM_SIZE).unwrap()]); let err = writer .write_acpi_table(&mut resource_allocator, &mut sdt) .unwrap_err(); assert!( matches!( err, AcpiError::AcpiTables(acpi_tables::AcpiError::GuestMemory( vm_memory::GuestMemoryError::PartialBuffer { expected: 263168, // SYSTEM_MEM_SIZE completed: 259072 // SYSTEM_MEM_SIZE - 4096 }, )) ), "{:?}", err ); } } ================================================ FILE: src/vmm/src/acpi/x86_64.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::mem::size_of; use acpi_tables::fadt::IAPC_BOOT_ARG_FLAGS_VGA_NOT_PRESENT; use acpi_tables::madt::{IoAPIC, LocalAPIC}; use acpi_tables::{Fadt, aml}; use vm_memory::GuestAddress; use zerocopy::IntoBytes; use crate::arch::x86_64::layout; use crate::device_manager::legacy::PortIODeviceManager; #[inline(always)] pub(crate) fn setup_interrupt_controllers(nr_vcpus: u8) -> Vec { let mut ic = Vec::with_capacity(size_of::() + (nr_vcpus as usize) * size_of::()); ic.extend_from_slice(IoAPIC::new(0, layout::IOAPIC_ADDR).as_bytes()); for i in 0..nr_vcpus { ic.extend_from_slice(LocalAPIC::new(i).as_bytes()); } ic } #[inline(always)] pub(crate) fn setup_arch_fadt(fadt: &mut Fadt) { // Let the guest kernel know that there is not VGA hardware present // neither do we support ASPM, or MSI type of interrupts. // More info here: // https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html?highlight=0a06#ia-pc-boot-architecture-flags fadt.setup_iapc_flags(1 << IAPC_BOOT_ARG_FLAGS_VGA_NOT_PRESENT); } #[inline(always)] pub(crate) fn setup_arch_dsdt(dsdt_data: &mut Vec) -> Result<(), aml::AmlError> { PortIODeviceManager::append_aml_bytes(dsdt_data) } pub(crate) const fn apic_addr() -> u32 { layout::APIC_ADDR } pub(crate) const fn rsdp_addr() -> GuestAddress { GuestAddress(layout::RSDP_ADDR) } ================================================ FILE: src/vmm/src/arch/aarch64/cache_info.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::path::{Path, PathBuf}; use std::{fs, io}; use crate::logger::warn; // Based on https://elixir.free-electrons.com/linux/v4.9.62/source/arch/arm64/kernel/cacheinfo.c#L29. const MAX_CACHE_LEVEL: u8 = 7; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub(crate) enum CacheInfoError { /// Failed to read cache information: {0} FailedToReadCacheInfo(#[from] io::Error), /// Invalid cache configuration found for {0}: {1} InvalidCacheAttr(String, String), /// Cannot read cache level. MissingCacheLevel, /// Cannot read cache type. MissingCacheType, /// {0} MissingOptionalAttr(String, CacheEntry), } struct CacheEngine { store: Box, } trait CacheStore: std::fmt::Debug { fn get_by_key(&self, index: u8, file_name: &str) -> Result; } #[derive(Debug)] pub(crate) struct CacheEntry { // Cache Level: 1, 2, 3.. pub level: u8, // Type of cache: Unified, Data, Instruction. pub type_: CacheType, pub size_: Option, pub number_of_sets: Option, pub line_size: Option, // How many CPUS share this cache. pub cpus_per_unit: u16, } #[derive(Debug)] #[cfg_attr(test, allow(dead_code))] struct HostCacheStore { cache_dir: PathBuf, } #[cfg(not(test))] impl Default for CacheEngine { fn default() -> Self { CacheEngine { store: Box::new(HostCacheStore { cache_dir: PathBuf::from("/sys/devices/system/cpu/cpu0/cache"), }), } } } impl CacheStore for HostCacheStore { fn get_by_key(&self, index: u8, file_name: &str) -> Result { readln_special(&PathBuf::from(format!( "{}/index{}/{}", self.cache_dir.as_path().display(), index, file_name ))) } } impl CacheEntry { fn from_index(index: u8, store: &dyn CacheStore) -> Result { let mut err_str = String::new(); let mut cache: CacheEntry = CacheEntry::default(); // If the cache level or the type cannot be retrieved we stop the process // of populating the cache levels. let level_str = store .get_by_key(index, "level") .map_err(|_| CacheInfoError::MissingCacheLevel)?; cache.level = level_str.parse::().map_err(|err| { CacheInfoError::InvalidCacheAttr("level".to_string(), err.to_string()) })?; let cache_type_str = store .get_by_key(index, "type") .map_err(|_| CacheInfoError::MissingCacheType)?; cache.type_ = CacheType::try_from(&cache_type_str)?; if let Ok(shared_cpu_map) = store.get_by_key(index, "shared_cpu_map") { cache.cpus_per_unit = mask_str2bit_count(shared_cpu_map.trim_end())?; } else { err_str += "shared cpu map"; err_str += ", "; } if let Ok(coherency_line_size) = store.get_by_key(index, "coherency_line_size") { cache.line_size = Some(coherency_line_size.parse::().map_err(|err| { CacheInfoError::InvalidCacheAttr("coherency_line_size".to_string(), err.to_string()) })?); } else { err_str += "coherency line size"; err_str += ", "; } if let Ok(mut size) = store.get_by_key(index, "size") { cache.size_ = Some(to_bytes(&mut size)?); } else { err_str += "size"; err_str += ", "; } if let Ok(number_of_sets) = store.get_by_key(index, "number_of_sets") { cache.number_of_sets = Some(number_of_sets.parse::().map_err(|err| { CacheInfoError::InvalidCacheAttr("number_of_sets".to_string(), err.to_string()) })?); } else { err_str += "number of sets"; err_str += ", "; } // Pop the last 2 chars if a comma and space are present. // The unwrap is safe since we check that the string actually // ends with those 2 chars. if err_str.ends_with(", ") { err_str.pop().unwrap(); err_str.pop().unwrap(); } if !err_str.is_empty() { return Err(CacheInfoError::MissingOptionalAttr(err_str, cache)); } Ok(cache) } } impl Default for CacheEntry { fn default() -> Self { CacheEntry { level: 0, type_: CacheType::Unified, size_: None, number_of_sets: None, line_size: None, cpus_per_unit: 1, } } } #[derive(Debug)] // Based on https://elixir.free-electrons.com/linux/v4.9.62/source/include/linux/cacheinfo.h#L11. pub(crate) enum CacheType { Instruction, Data, Unified, } impl CacheType { fn try_from(string: &str) -> Result { match string.trim() { "Instruction" => Ok(Self::Instruction), "Data" => Ok(Self::Data), "Unified" => Ok(Self::Unified), cache_type => Err(CacheInfoError::InvalidCacheAttr( "type".to_string(), cache_type.to_string(), )), } } // The below are auxiliary functions used for constructing the FDT. pub fn of_cache_size(&self) -> &str { match self { Self::Instruction => "i-cache-size", Self::Data => "d-cache-size", Self::Unified => "cache-size", } } pub fn of_cache_line_size(&self) -> &str { match self { Self::Instruction => "i-cache-line-size", Self::Data => "d-cache-line-size", Self::Unified => "cache-line-size", } } pub fn of_cache_type(&self) -> Option<&'static str> { match self { Self::Instruction => None, Self::Data => None, Self::Unified => Some("cache-unified"), } } pub fn of_cache_sets(&self) -> &str { match self { Self::Instruction => "i-cache-sets", Self::Data => "d-cache-sets", Self::Unified => "cache-sets", } } } #[cfg_attr(test, allow(unused))] fn readln_special>(file_path: &T) -> Result { let line = fs::read_to_string(file_path)?; Ok(line.trim_end().to_string()) } fn to_bytes(cache_size_pretty: &mut String) -> Result { match cache_size_pretty.pop() { Some('K') => Ok(cache_size_pretty.parse::().map_err(|err| { CacheInfoError::InvalidCacheAttr("size".to_string(), err.to_string()) })? * 1024), Some('M') => Ok(cache_size_pretty.parse::().map_err(|err| { CacheInfoError::InvalidCacheAttr("size".to_string(), err.to_string()) })? * 1024 * 1024), Some(letter) => { cache_size_pretty.push(letter); Err(CacheInfoError::InvalidCacheAttr( "size".to_string(), (*cache_size_pretty).to_string(), )) } _ => Err(CacheInfoError::InvalidCacheAttr( "size".to_string(), "Empty string was provided".to_string(), )), } } // Helper function to count the number of set bits from a bitmap // formatted string (see %*pb in the printk formats). // Expected input is a list of 32-bit comma separated hex values, // without the 0x prefix. // fn mask_str2bit_count(mask_str: &str) -> Result { let split_mask_iter = mask_str.split(','); let mut bit_count: u16 = 0; for s in split_mask_iter { let mut s_zero_free = s.trim_start_matches('0'); if s_zero_free.is_empty() { s_zero_free = "0"; } bit_count += u16::try_from( u32::from_str_radix(s_zero_free, 16) .map_err(|err| { CacheInfoError::InvalidCacheAttr("shared_cpu_map".to_string(), err.to_string()) })? .count_ones(), ) .unwrap(); // Safe because this is at most 32 } if bit_count == 0 { return Err(CacheInfoError::InvalidCacheAttr( "shared_cpu_map".to_string(), mask_str.to_string(), )); } Ok(bit_count) } fn append_cache_level( cache_l1: &mut Vec, cache_non_l1: &mut Vec, cache: CacheEntry, ) { if cache.level == 1 { cache_l1.push(cache); } else { cache_non_l1.push(cache); } } pub(crate) fn read_cache_config( cache_l1: &mut Vec, cache_non_l1: &mut Vec, ) -> Result<(), CacheInfoError> { // It is used to make sure we log warnings for missing files only for one level because // if an attribute is missing for a level for sure it will be missing for other levels too. // Also without this mechanism we would be logging the warnings for each level which pollutes // a lot the logs. let mut logged_missing_attr = false; let engine = CacheEngine::default(); for index in 0..=MAX_CACHE_LEVEL { match CacheEntry::from_index(index, engine.store.as_ref()) { Ok(cache) => { append_cache_level(cache_l1, cache_non_l1, cache); } // Missing cache level or type means not further search is necessary. Err(CacheInfoError::MissingCacheLevel) | Err(CacheInfoError::MissingCacheType) => break, // Missing cache files is not necessary an error so we // do not propagate it upwards. We were prudent enough to log it. Err(CacheInfoError::MissingOptionalAttr(msg, cache)) => { let level = cache.level; append_cache_level(cache_l1, cache_non_l1, cache); if !msg.is_empty() && !logged_missing_attr { warn!("Could not read the {msg} for cache level {level}."); logged_missing_attr = true; } } Err(err) => return Err(err), } } Ok(()) } #[cfg(test)] mod tests { use std::collections::HashMap; use super::*; use crate::arch::aarch64::cache_info::{ CacheEngine, CacheEntry, CacheStore, read_cache_config, }; #[derive(Debug)] struct MockCacheStore { dummy_fs: HashMap, } impl Default for CacheEngine { fn default() -> Self { CacheEngine { store: Box::new(MockCacheStore { dummy_fs: create_default_store(), }), } } } impl CacheEngine { fn new(map: &HashMap) -> Self { CacheEngine { store: Box::new(MockCacheStore { dummy_fs: map.clone(), }), } } } impl CacheStore for MockCacheStore { fn get_by_key(&self, index: u8, file_name: &str) -> Result { let key = format!("index{}/{}", index, file_name); if let Some(val) = self.dummy_fs.get(&key) { Ok(val.to_string()) } else { Err(CacheInfoError::FailedToReadCacheInfo( io::Error::from_raw_os_error(0), )) } } } fn create_default_store() -> HashMap { let mut cache_struct = HashMap::new(); cache_struct.insert("index0/level".to_string(), "1".to_string()); cache_struct.insert("index0/type".to_string(), "Data".to_string()); cache_struct.insert("index1/level".to_string(), "1".to_string()); cache_struct.insert("index1/type".to_string(), "Instruction".to_string()); cache_struct.insert("index2/level".to_string(), "2".to_string()); cache_struct.insert("index2/type".to_string(), "Unified".to_string()); cache_struct } #[test] fn test_mask_str2bit_count() { mask_str2bit_count("00000000,00000001").unwrap(); let res = mask_str2bit_count("00000000,00000000"); assert!( res.is_err() && format!("{}", res.unwrap_err()) == "Invalid cache configuration found for shared_cpu_map: 00000000,00000000" ); let res = mask_str2bit_count("00000000;00000001"); assert!( res.is_err() && format!("{}", res.unwrap_err()) == "Invalid cache configuration found for shared_cpu_map: invalid digit found \ in string" ); } #[test] fn test_to_bytes() { to_bytes(&mut "64K".to_string()).unwrap(); to_bytes(&mut "64M".to_string()).unwrap(); match to_bytes(&mut "64KK".to_string()) { Err(err) => assert_eq!( format!("{}", err), "Invalid cache configuration found for size: invalid digit found in string" ), _ => panic!("This should be an error!"), } let res = to_bytes(&mut "64G".to_string()); assert!( res.is_err() && format!("{}", res.unwrap_err()) == "Invalid cache configuration found for size: 64G" ); let res = to_bytes(&mut "".to_string()); assert!( res.is_err() && format!("{}", res.unwrap_err()) == "Invalid cache configuration found for size: Empty string was provided" ); } #[test] fn test_cache_level() { let mut default_map = create_default_store(); let mut map1 = default_map.clone(); map1.remove("index0/type"); let engine = CacheEngine::new(&map1); let res = CacheEntry::from_index(0, engine.store.as_ref()); // We did create the level file but we still do not have the type file. assert!(matches!(res.unwrap_err(), CacheInfoError::MissingCacheType)); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "shared cpu map, coherency line size, size, number of sets", ); // Now putting some invalid values in the type and level files. let mut map2 = default_map.clone(); map2.insert("index0/level".to_string(), "d".to_string()); let engine = CacheEngine::new(&map2); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "Invalid cache configuration found for level: invalid digit found in string" ); default_map.insert("index0/type".to_string(), "Instructionn".to_string()); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "Invalid cache configuration found for type: Instructionn" ); } #[test] fn test_cache_shared_cpu_map() { let mut default_map = create_default_store(); default_map.insert( "index0/shared_cpu_map".to_string(), "00000000,00000001".to_string(), ); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "coherency line size, size, number of sets" ); default_map.insert( "index0/shared_cpu_map".to_string(), "00000000,0000000G".to_string(), ); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "Invalid cache configuration found for shared_cpu_map: invalid digit found in string" ); default_map.insert("index0/shared_cpu_map".to_string(), "00000000".to_string()); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "Invalid cache configuration found for shared_cpu_map: 00000000" ); } #[test] fn test_cache_coherency() { let mut default_map = create_default_store(); default_map.insert("index0/coherency_line_size".to_string(), "64".to_string()); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( "shared cpu map, size, number of sets", format!("{}", res.unwrap_err()) ); default_map.insert( "index0/coherency_line_size".to_string(), "Instruction".to_string(), ); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "Invalid cache configuration found for coherency_line_size: invalid digit found in \ string" ); } #[test] fn test_cache_size() { let mut default_map = create_default_store(); default_map.insert("index0/size".to_string(), "64K".to_string()); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "shared cpu map, coherency line size, number of sets", ); default_map.insert("index0/size".to_string(), "64".to_string()); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "Invalid cache configuration found for size: 64" ); default_map.insert("index0/size".to_string(), "64Z".to_string()); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "Invalid cache configuration found for size: 64Z" ); } #[test] fn test_cache_no_sets() { let mut default_map = create_default_store(); default_map.insert("index0/number_of_sets".to_string(), "64".to_string()); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( "shared cpu map, coherency line size, size", format!("{}", res.unwrap_err()) ); default_map.insert("index0/number_of_sets".to_string(), "64K".to_string()); let engine = CacheEngine::new(&default_map); let res = CacheEntry::from_index(0, engine.store.as_ref()); assert_eq!( format!("{}", res.unwrap_err()), "Invalid cache configuration found for number_of_sets: invalid digit found in string" ); } #[test] fn test_sysfs_read_caches() { let mut l1_caches: Vec = Vec::new(); let mut non_l1_caches: Vec = Vec::new(); // We use sysfs for extracting the cache information. read_cache_config(&mut l1_caches, &mut non_l1_caches).unwrap(); assert_eq!(l1_caches.len(), 2); assert_eq!(l1_caches.len(), 2); } } ================================================ FILE: src/vmm/src/arch/aarch64/fdt.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::ffi::CString; use std::fmt::Debug; use vm_fdt::{Error as VmFdtError, FdtWriter, FdtWriterNode}; use vm_memory::{GuestMemoryError, GuestMemoryRegion}; use super::cache_info::{CacheEntry, read_cache_config}; use super::gic::GICDevice; use crate::arch::{ MEM_32BIT_DEVICES_SIZE, MEM_32BIT_DEVICES_START, MEM_64BIT_DEVICES_SIZE, MEM_64BIT_DEVICES_START, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, }; use crate::device_manager::DeviceManager; use crate::device_manager::mmio::MMIODeviceInfo; use crate::device_manager::pci_mngr::PciDevices; use crate::devices::acpi::vmclock::{VMCLOCK_SIZE, VmClock}; use crate::devices::acpi::vmgenid::{VMGENID_MEM_SIZE, VmGenId}; use crate::initrd::InitrdConfig; use crate::vstate::memory::{Address, GuestMemory, GuestMemoryMmap, GuestRegionType}; // This is a value for uniquely identifying the FDT node declaring the interrupt controller. const GIC_PHANDLE: u32 = 1; // This is a value for uniquely identifying the FDT node containing the clock definition. const CLOCK_PHANDLE: u32 = 2; // This is a value for uniquely identifying the FDT node declaring the MSI controller. const MSI_PHANDLE: u32 = 3; // You may be wondering why this big value? // This phandle is used to uniquely identify the FDT nodes containing cache information. Each cpu // can have a variable number of caches, some of these caches may be shared with other cpus. // So, we start the indexing of the phandles used from a really big number and then subtract from // it as we need more and more phandle for each cache representation. const LAST_CACHE_PHANDLE: u32 = 4000; // Read the documentation specified when appending the root node to the FDT. const ADDRESS_CELLS: u32 = 0x2; const SIZE_CELLS: u32 = 0x2; // As per kvm tool and // https://www.kernel.org/doc/Documentation/devicetree/bindings/interrupt-controller/arm%2Cgic.txt // Look for "The 1st cell..." const GIC_FDT_IRQ_TYPE_SPI: u32 = 0; const GIC_FDT_IRQ_TYPE_PPI: u32 = 1; // From https://elixir.bootlin.com/linux/v4.9.62/source/include/dt-bindings/interrupt-controller/irq.h#L17 const IRQ_TYPE_EDGE_RISING: u32 = 1; const IRQ_TYPE_LEVEL_HI: u32 = 4; /// Errors thrown while configuring the Flattened Device Tree for aarch64. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum FdtError { /// Create FDT error: {0} CreateFdt(#[from] VmFdtError), /// Read cache info error: {0} ReadCacheInfo(String), /// Failure in writing FDT in memory. WriteFdtToMemory(#[from] GuestMemoryError), } #[allow(clippy::too_many_arguments)] /// Creates the flattened device tree for this aarch64 microVM. pub fn create_fdt( guest_mem: &GuestMemoryMmap, vcpu_mpidr: Vec, cmdline: CString, device_manager: &DeviceManager, gic_device: &GICDevice, initrd: &Option, ) -> Result, FdtError> { // Allocate stuff necessary for storing the blob. let mut fdt_writer = FdtWriter::new()?; // For an explanation why these nodes were introduced in the blob take a look at // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L845 // Look for "Required nodes and properties". // Header or the root node as per above mentioned documentation. let root = fdt_writer.begin_node("")?; fdt_writer.property_string("compatible", "linux,dummy-virt")?; // For info on #address-cells and size-cells read "Note about cells and address representation" // from the above mentioned txt file. fdt_writer.property_u32("#address-cells", ADDRESS_CELLS)?; fdt_writer.property_u32("#size-cells", SIZE_CELLS)?; // This is not mandatory but we use it to point the root node to the node // containing description of the interrupt controller for this VM. fdt_writer.property_u32("interrupt-parent", GIC_PHANDLE)?; create_cpu_nodes(&mut fdt_writer, &vcpu_mpidr)?; create_memory_node(&mut fdt_writer, guest_mem)?; create_chosen_node(&mut fdt_writer, cmdline, initrd)?; create_gic_node(&mut fdt_writer, gic_device)?; create_timer_node(&mut fdt_writer)?; create_clock_node(&mut fdt_writer)?; create_psci_node(&mut fdt_writer)?; create_devices_node(&mut fdt_writer, device_manager)?; create_vmgenid_node(&mut fdt_writer, device_manager.acpi_devices.vmgenid())?; create_vmclock_node(&mut fdt_writer, device_manager.acpi_devices.vmclock())?; create_pci_nodes(&mut fdt_writer, &device_manager.pci_devices)?; // End Header node. fdt_writer.end_node(root)?; // Allocate another buffer so we can format and then write fdt to guest. let fdt_final = fdt_writer.finish()?; Ok(fdt_final) } // Following are the auxiliary function for creating the different nodes that we append to our FDT. fn create_cpu_nodes(fdt: &mut FdtWriter, vcpu_mpidr: &[u64]) -> Result<(), FdtError> { // Since the L1 caches are not shareable among CPUs and they are direct attributes of the // cpu in the device tree, we process the L1 and non-L1 caches separately. // We use sysfs for extracting the cache information. let mut l1_caches: Vec = Vec::new(); let mut non_l1_caches: Vec = Vec::new(); // We use sysfs for extracting the cache information. read_cache_config(&mut l1_caches, &mut non_l1_caches) .map_err(|err| FdtError::ReadCacheInfo(err.to_string()))?; // See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/arm/cpus.yaml. let cpus = fdt.begin_node("cpus")?; // As per documentation, on ARM v8 64-bit systems value should be set to 2. fdt.property_u32("#address-cells", 0x02)?; fdt.property_u32("#size-cells", 0x0)?; let num_cpus = vcpu_mpidr.len(); for (cpu_index, mpidr) in vcpu_mpidr.iter().enumerate() { let cpu = fdt.begin_node(&format!("cpu@{:x}", cpu_index))?; fdt.property_string("device_type", "cpu")?; fdt.property_string("compatible", "arm,arm-v8")?; // The power state coordination interface (PSCI) needs to be enabled for // all vcpus. fdt.property_string("enable-method", "psci")?; // Set the field to first 24 bits of the MPIDR - Multiprocessor Affinity Register. // See http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0488c/BABHBJCI.html. fdt.property_u64("reg", mpidr & 0x7FFFFF)?; for cache in l1_caches.iter() { // Please check out // https://github.com/devicetree-org/devicetree-specification/releases/download/v0.3/devicetree-specification-v0.3.pdf, // section 3.8. if let Some(size) = cache.size_ { fdt.property_u32(cache.type_.of_cache_size(), size)?; } if let Some(line_size) = cache.line_size { fdt.property_u32(cache.type_.of_cache_line_size(), u32::from(line_size))?; } if let Some(number_of_sets) = cache.number_of_sets { fdt.property_u32(cache.type_.of_cache_sets(), number_of_sets)?; } } // Some of the non-l1 caches can be shared amongst CPUs. You can see an example of a shared // scenario in https://github.com/devicetree-org/devicetree-specification/releases/download/v0.3/devicetree-specification-v0.3.pdf, // 3.8.1 Example. let mut prev_level = 1; let mut cache_node: Option = None; for cache in non_l1_caches.iter() { // We append the next-level-cache node (the node that specifies the cache hierarchy) // in the next iteration. For example, // L2-cache { // cache-size = <0x8000> ----> first iteration // next-level-cache = <&l3-cache> ---> second iteration // } // The cpus per unit cannot be 0 since the sysfs will also include the current cpu // in the list of shared cpus so it needs to be at least 1. Firecracker trusts the host. // The operation is safe since we already checked when creating cache attributes that // cpus_per_unit is not 0 (.e look for mask_str2bit_count function). let cache_phandle = LAST_CACHE_PHANDLE - u32::try_from( num_cpus * (cache.level - 2) as usize + cpu_index / cache.cpus_per_unit as usize, ) .unwrap(); // Safe because the number of CPUs is bounded if prev_level != cache.level { fdt.property_u32("next-level-cache", cache_phandle)?; if prev_level > 1 && cache_node.is_some() { fdt.end_node(cache_node.take().unwrap())?; } } if cpu_index % cache.cpus_per_unit as usize == 0 { cache_node = Some(fdt.begin_node(&format!( "l{}-{}-cache", cache.level, cpu_index / cache.cpus_per_unit as usize ))?); fdt.property_u32("phandle", cache_phandle)?; fdt.property_string("compatible", "cache")?; fdt.property_u32("cache-level", u32::from(cache.level))?; if let Some(size) = cache.size_ { fdt.property_u32(cache.type_.of_cache_size(), size)?; } if let Some(line_size) = cache.line_size { fdt.property_u32(cache.type_.of_cache_line_size(), u32::from(line_size))?; } if let Some(number_of_sets) = cache.number_of_sets { fdt.property_u32(cache.type_.of_cache_sets(), number_of_sets)?; } if let Some(cache_type) = cache.type_.of_cache_type() { fdt.property_null(cache_type)?; } prev_level = cache.level; } } if let Some(node) = cache_node { fdt.end_node(node)?; } fdt.end_node(cpu)?; } fdt.end_node(cpus)?; Ok(()) } fn create_memory_node(fdt: &mut FdtWriter, guest_mem: &GuestMemoryMmap) -> Result<(), FdtError> { // See https://github.com/torvalds/linux/blob/master/Documentation/devicetree/booting-without-of.txt#L960 // for an explanation of this. // On ARM we reserve some memory so that it can be utilized for devices like VMGenID to send // data to kernel drivers. The range of this memory is: // // [layout::DRAM_MEM_START, layout::DRAM_MEM_START + layout::SYSTEM_MEM_SIZE) // // The reason we do this is that Linux does not allow remapping system memory. However, without // remap, kernel drivers cannot get virtual addresses to read data from device memory. Leaving // this memory region out allows Linux kernel modules to remap and thus read this region. // Pick the first (and only) memory region let dram_region = guest_mem .iter() .find(|region| region.region_type == GuestRegionType::Dram) .unwrap(); // Find the start of memory after the system memory region let start_addr = dram_region .start_addr() .unchecked_add(super::layout::SYSTEM_MEM_SIZE); // Size of the memory is the region size minus the system memory size let mem_size = dram_region.len() - super::layout::SYSTEM_MEM_SIZE; let mem_reg_prop = &[start_addr.raw_value(), mem_size]; let mem = fdt.begin_node("memory@ram")?; fdt.property_string("device_type", "memory")?; fdt.property_array_u64("reg", mem_reg_prop)?; fdt.end_node(mem)?; Ok(()) } fn create_chosen_node( fdt: &mut FdtWriter, cmdline: CString, initrd: &Option, ) -> Result<(), FdtError> { let chosen = fdt.begin_node("chosen")?; // Workaround to be able to reuse an existing property_*() method; in property_string() method, // the cmdline is reconverted to a CString to be written in memory as a null terminated string. let cmdline_string = cmdline .into_string() .map_err(|_| vm_fdt::Error::InvalidString)?; fdt.property_string("bootargs", cmdline_string.as_str())?; if let Some(initrd_config) = initrd { fdt.property_u64("linux,initrd-start", initrd_config.address.raw_value())?; fdt.property_u64( "linux,initrd-end", initrd_config.address.raw_value() + initrd_config.size as u64, )?; } fdt.end_node(chosen)?; Ok(()) } fn create_vmgenid_node(fdt: &mut FdtWriter, vmgenid: &VmGenId) -> Result<(), FdtError> { let vmgenid_node = fdt.begin_node("vmgenid")?; fdt.property_string("compatible", "microsoft,vmgenid")?; fdt.property_array_u64("reg", &[vmgenid.guest_address.0, VMGENID_MEM_SIZE])?; fdt.property_array_u32( "interrupts", &[GIC_FDT_IRQ_TYPE_SPI, vmgenid.gsi, IRQ_TYPE_EDGE_RISING], )?; fdt.end_node(vmgenid_node)?; Ok(()) } fn create_vmclock_node(fdt: &mut FdtWriter, vmclock: &VmClock) -> Result<(), FdtError> { let vmclock_node = fdt.begin_node(&format!("ptp@{}", vmclock.guest_address.0))?; fdt.property_string("compatible", "amazon,vmclock")?; fdt.property_array_u64("reg", &[vmclock.guest_address.0, VMCLOCK_SIZE as u64])?; fdt.property_array_u32( "interrupts", &[GIC_FDT_IRQ_TYPE_SPI, vmclock.gsi, IRQ_TYPE_EDGE_RISING], )?; fdt.end_node(vmclock_node)?; Ok(()) } fn create_gic_node(fdt: &mut FdtWriter, gic_device: &GICDevice) -> Result<(), FdtError> { let interrupt = fdt.begin_node("intc")?; fdt.property_string("compatible", gic_device.fdt_compatibility())?; fdt.property_null("interrupt-controller")?; // "interrupt-cells" field specifies the number of cells needed to encode an // interrupt source. The type shall be a and the value shall be 3 if no PPI affinity // description is required. fdt.property_u32("#interrupt-cells", 3)?; fdt.property_array_u64("reg", gic_device.device_properties())?; fdt.property_u32("phandle", GIC_PHANDLE)?; fdt.property_u32("#address-cells", 2)?; fdt.property_u32("#size-cells", 2)?; fdt.property_null("ranges")?; let gic_intr = [ GIC_FDT_IRQ_TYPE_PPI, gic_device.fdt_maint_irq(), IRQ_TYPE_LEVEL_HI, ]; fdt.property_array_u32("interrupts", &gic_intr)?; if let Some(msi_properties) = gic_device.msi_properties() { let msic_node = fdt.begin_node("msic")?; fdt.property_string("compatible", "arm,gic-v3-its")?; fdt.property_null("msi-controller")?; fdt.property_u32("phandle", MSI_PHANDLE)?; fdt.property_array_u64("reg", msi_properties)?; fdt.end_node(msic_node)?; } fdt.end_node(interrupt)?; Ok(()) } fn create_clock_node(fdt: &mut FdtWriter) -> Result<(), FdtError> { // The Advanced Peripheral Bus (APB) is part of the Advanced Microcontroller Bus Architecture // (AMBA) protocol family. It defines a low-cost interface that is optimized for minimal power // consumption and reduced interface complexity. // PCLK is the clock source and this node defines exactly the clock for the APB. let clock = fdt.begin_node("apb-pclk")?; fdt.property_string("compatible", "fixed-clock")?; fdt.property_u32("#clock-cells", 0x0)?; fdt.property_u32("clock-frequency", 24_000_000)?; fdt.property_string("clock-output-names", "clk24mhz")?; fdt.property_u32("phandle", CLOCK_PHANDLE)?; fdt.end_node(clock)?; Ok(()) } fn create_timer_node(fdt: &mut FdtWriter) -> Result<(), FdtError> { // See // https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/interrupt-controller/arch_timer.txt // These are fixed interrupt numbers for the timer device. let irqs = [13, 14, 11, 10]; let compatible = "arm,armv8-timer"; let mut timer_reg_cells: Vec = Vec::new(); for &irq in irqs.iter() { timer_reg_cells.push(GIC_FDT_IRQ_TYPE_PPI); timer_reg_cells.push(irq); timer_reg_cells.push(IRQ_TYPE_LEVEL_HI); } let timer = fdt.begin_node("timer")?; fdt.property_string("compatible", compatible)?; fdt.property_null("always-on")?; fdt.property_array_u32("interrupts", &timer_reg_cells)?; fdt.end_node(timer)?; Ok(()) } fn create_psci_node(fdt: &mut FdtWriter) -> Result<(), FdtError> { let compatible = "arm,psci-0.2"; let psci = fdt.begin_node("psci")?; fdt.property_string("compatible", compatible)?; // Two methods available: hvc and smc. // As per documentation, PSCI calls between a guest and hypervisor may use the HVC conduit // instead of SMC. So, since we are using kvm, we need to use hvc. fdt.property_string("method", "hvc")?; fdt.end_node(psci)?; Ok(()) } fn create_virtio_node(fdt: &mut FdtWriter, dev_info: &MMIODeviceInfo) -> Result<(), FdtError> { let virtio_mmio = fdt.begin_node(&format!("virtio_mmio@{:x}", dev_info.addr))?; // Adding the dma-coherent property ensures that the guest driver allocates the virtio // queue with the Write-Back attribute, maintaining cache coherency with Firecracker's // accesses to the virtio queue. fdt.property_null("dma-coherent")?; fdt.property_string("compatible", "virtio,mmio")?; fdt.property_array_u64("reg", &[dev_info.addr, dev_info.len])?; fdt.property_array_u32( "interrupts", &[ GIC_FDT_IRQ_TYPE_SPI, dev_info.gsi.unwrap(), IRQ_TYPE_EDGE_RISING, ], )?; fdt.property_u32("interrupt-parent", GIC_PHANDLE)?; fdt.end_node(virtio_mmio)?; Ok(()) } fn create_serial_node(fdt: &mut FdtWriter, dev_info: &MMIODeviceInfo) -> Result<(), FdtError> { let serial = fdt.begin_node(&format!("uart@{:x}", dev_info.addr))?; fdt.property_string("compatible", "ns16550a")?; fdt.property_array_u64("reg", &[dev_info.addr, dev_info.len])?; fdt.property_u32("clocks", CLOCK_PHANDLE)?; fdt.property_string("clock-names", "apb_pclk")?; fdt.property_array_u32( "interrupts", &[ GIC_FDT_IRQ_TYPE_SPI, dev_info.gsi.unwrap(), IRQ_TYPE_EDGE_RISING, ], )?; fdt.end_node(serial)?; Ok(()) } fn create_rtc_node(fdt: &mut FdtWriter, dev_info: &MMIODeviceInfo) -> Result<(), FdtError> { // Driver requirements: // https://elixir.bootlin.com/linux/latest/source/Documentation/devicetree/bindings/rtc/arm,pl031.yaml // We do not offer the `interrupt` property because the device // does not implement interrupt support. let compatible = b"arm,pl031\0arm,primecell\0"; let rtc = fdt.begin_node(&format!("rtc@{:x}", dev_info.addr))?; fdt.property("compatible", compatible)?; fdt.property_array_u64("reg", &[dev_info.addr, dev_info.len])?; fdt.property_u32("clocks", CLOCK_PHANDLE)?; fdt.property_string("clock-names", "apb_pclk")?; fdt.end_node(rtc)?; Ok(()) } fn create_devices_node( fdt: &mut FdtWriter, device_manager: &DeviceManager, ) -> Result<(), FdtError> { if let Some(rtc_info) = device_manager.mmio_devices.rtc_device_info() { create_rtc_node(fdt, rtc_info)?; } if let Some(serial_info) = device_manager.mmio_devices.serial_device_info() { create_serial_node(fdt, serial_info)?; } let mut virtio_mmio = device_manager.mmio_devices.virtio_device_info(); // Sort out virtio devices by address from low to high and insert them into fdt table. virtio_mmio.sort_by_key(|a| a.addr); for ordered_device_info in virtio_mmio.drain(..) { create_virtio_node(fdt, ordered_device_info)?; } Ok(()) } fn create_pci_nodes(fdt: &mut FdtWriter, pci_devices: &PciDevices) -> Result<(), FdtError> { if pci_devices.pci_segment.is_none() { return Ok(()); } // Fine to unwrap here, we just checked it's not `None`. let segment = pci_devices.pci_segment.as_ref().unwrap(); let pci_node_name = format!("pci@{:x}", segment.mmio_config_address); // Each range here is a thruple of `(PCI address, CPU address, PCI size)`. // // More info about the format can be found here: // https://elinux.org/Device_Tree_Usage#PCI_Address_Translation let ranges = [ // 32bit addresses 0x200_0000u32, (MEM_32BIT_DEVICES_START >> 32) as u32, // PCI address (MEM_32BIT_DEVICES_START & 0xffff_ffff) as u32, (MEM_32BIT_DEVICES_START >> 32) as u32, // CPU address (MEM_32BIT_DEVICES_START & 0xffff_ffff) as u32, (MEM_32BIT_DEVICES_SIZE >> 32) as u32, // Range size (MEM_32BIT_DEVICES_SIZE & 0xffff_ffff) as u32, // 64bit addresses 0x300_0000u32, // PCI address (MEM_64BIT_DEVICES_START >> 32) as u32, // PCI address (MEM_64BIT_DEVICES_START & 0xffff_ffff) as u32, // CPU address (MEM_64BIT_DEVICES_START >> 32) as u32, // CPU address (MEM_64BIT_DEVICES_START & 0xffff_ffff) as u32, // Range size (MEM_64BIT_DEVICES_SIZE >> 32) as u32, // Range size ((MEM_64BIT_DEVICES_SIZE & 0xffff_ffff) >> 32) as u32, ]; // See kernel document Documentation/devicetree/bindings/pci/pci-msi.txt let msi_map = [ // rid-base: A single cell describing the first RID matched by the entry. 0x0, // msi-controller: A single phandle to an MSI controller. MSI_PHANDLE, // msi-base: An msi-specifier describing the msi-specifier produced for the // first RID matched by the entry. segment.id as u32, // length: A single cell describing how many consecutive RIDs are matched // following the rid-base. 0x100, ]; let pci_node = fdt.begin_node(&pci_node_name)?; fdt.property_string("compatible", "pci-host-ecam-generic")?; fdt.property_string("device_type", "pci")?; fdt.property_array_u32("ranges", &ranges)?; fdt.property_array_u32("bus-range", &[0, 0])?; fdt.property_u32("linux,pci-domain", segment.id.into())?; fdt.property_u32("#address-cells", 3)?; fdt.property_u32("#size-cells", 2)?; fdt.property_array_u64( "reg", &[ segment.mmio_config_address, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, ], )?; fdt.property_u32("#interrupt-cells", 1)?; fdt.property_null("interrupt-map")?; fdt.property_null("interrupt-map-mask")?; fdt.property_null("dma-coherent")?; fdt.property_array_u32("msi-map", &msi_map)?; fdt.property_u32("msi-parent", MSI_PHANDLE)?; Ok(fdt.end_node(pci_node)?) } #[cfg(test)] mod tests { use std::ffi::CString; use std::sync::{Arc, Mutex}; use linux_loader::cmdline as kernel_cmdline; use super::*; use crate::arch::aarch64::gic::create_gic; use crate::arch::aarch64::layout; use crate::device_manager::mmio::tests::DummyDevice; use crate::device_manager::tests::default_device_manager; use crate::test_utils::arch_mem; use crate::vstate::memory::GuestAddress; use crate::{EventManager, Kvm, Vm}; // The `load` function from the `device_tree` will mistakenly check the actual size // of the buffer with the allocated size. This works around that. fn set_size(buf: &mut [u8], pos: usize, val: u32) { buf[pos] = ((val >> 24) & 0xff) as u8; buf[pos + 1] = ((val >> 16) & 0xff) as u8; buf[pos + 2] = ((val >> 8) & 0xff) as u8; buf[pos + 3] = (val & 0xff) as u8; } #[test] fn test_create_fdt_with_devices() { let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000); let mut event_manager = EventManager::new().unwrap(); let mut device_manager = default_device_manager(); let kvm = Kvm::new(vec![]).unwrap(); let vm = Vm::new(&kvm).unwrap(); let gic = create_gic(vm.fd(), 1, None).unwrap(); let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); cmdline.insert("console", "/dev/tty0").unwrap(); device_manager .attach_legacy_devices_aarch64(&vm, &mut event_manager, &mut cmdline, None) .unwrap(); let dummy = Arc::new(Mutex::new(DummyDevice::new())); device_manager .mmio_devices .register_virtio_test_device( &vm, mem.clone(), dummy, &mut event_manager, &mut cmdline, "dummy", ) .unwrap(); create_fdt( &mem, vec![0], cmdline.as_cstring().unwrap(), &device_manager, &gic, &None, ) .unwrap(); } #[test] fn test_create_fdt() { let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000); let device_manager = default_device_manager(); let kvm = Kvm::new(vec![]).unwrap(); let vm = Vm::new(&kvm).unwrap(); let gic = create_gic(vm.fd(), 1, None).unwrap(); let saved_dtb_bytes = match gic.fdt_compatibility() { "arm,gic-v3" => include_bytes!("output_GICv3.dtb"), "arm,gic-400" => include_bytes!("output_GICv2.dtb"), _ => panic!("Unexpected gic version!"), }; let current_dtb_bytes = create_fdt( &mem, vec![0], CString::new("console=tty0").unwrap(), &device_manager, &gic, &None, ) .unwrap(); // Use this code when wanting to generate a new DTB sample. // { // use std::fs; // use std::io::Write; // use std::path::PathBuf; // let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); // let dtb_path = match gic.fdt_compatibility() { // "arm,gic-v3" => "output_GICv3.dtb", // "arm,gic-400" => ("output_GICv2.dtb"), // _ => panic!("Unexpected gic version!"), // }; // let mut output = fs::OpenOptions::new() // .write(true) // .create(true) // .open(path.join(format!("src/arch/aarch64/{}", dtb_path))) // .unwrap(); // output.write_all(¤t_dtb_bytes).unwrap(); // } let pos = 4; let val = u32::try_from(layout::FDT_MAX_SIZE).unwrap(); let mut buf = vec![]; buf.extend_from_slice(saved_dtb_bytes); set_size(&mut buf, pos, val); let original_fdt = device_tree::DeviceTree::load(&buf).unwrap(); let generated_fdt = device_tree::DeviceTree::load(¤t_dtb_bytes).unwrap(); assert_eq!( format!("{:?}", original_fdt), format!("{:?}", generated_fdt) ); } #[test] fn test_create_fdt_with_initrd() { let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000); let device_manager = default_device_manager(); let kvm = Kvm::new(vec![]).unwrap(); let vm = Vm::new(&kvm).unwrap(); let gic = create_gic(vm.fd(), 1, None).unwrap(); let saved_dtb_bytes = match gic.fdt_compatibility() { "arm,gic-v3" => include_bytes!("output_initrd_GICv3.dtb"), "arm,gic-400" => include_bytes!("output_initrd_GICv2.dtb"), _ => panic!("Unexpected gic version!"), }; let initrd = InitrdConfig { address: GuestAddress(0x1000_0000), size: 0x1000, }; let current_dtb_bytes = create_fdt( &mem, vec![0], CString::new("console=tty0").unwrap(), &device_manager, &gic, &Some(initrd), ) .unwrap(); // Use this code when wanting to generate a new DTB sample. // { // use std::fs; // use std::io::Write; // use std::path::PathBuf; // let path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); // let dtb_path = match gic.fdt_compatibility() { // "arm,gic-v3" => "output_initrd_GICv3.dtb", // "arm,gic-400" => ("output_initrd_GICv2.dtb"), // _ => panic!("Unexpected gic version!"), // }; // let mut output = fs::OpenOptions::new() // .write(true) // .create(true) // .open(path.join(format!("src/arch/aarch64/{}", dtb_path))) // .unwrap(); // output.write_all(¤t_dtb_bytes).unwrap(); // } let pos = 4; let val = u32::try_from(layout::FDT_MAX_SIZE).unwrap(); let mut buf = vec![]; buf.extend_from_slice(saved_dtb_bytes); set_size(&mut buf, pos, val); let original_fdt = device_tree::DeviceTree::load(&buf).unwrap(); let generated_fdt = device_tree::DeviceTree::load(¤t_dtb_bytes).unwrap(); assert_eq!( format!("{:?}", original_fdt), format!("{:?}", generated_fdt) ); } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/gicv2/mod.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 mod regs; use kvm_ioctls::{DeviceFd, VmFd}; use crate::arch::aarch64::gic::{GicError, GicState}; /// Represent a GIC v2 device #[derive(Debug)] pub struct GICv2(super::GIC); impl std::ops::Deref for GICv2 { type Target = super::GIC; fn deref(&self) -> &Self::Target { &self.0 } } impl GICv2 { // Unfortunately bindgen omits defines that are based on other defines. // See arch/arm64/include/uapi/asm/kvm.h file from the linux kernel. const KVM_VGIC_V2_DIST_SIZE: u64 = 0x1000; const KVM_VGIC_V2_CPU_SIZE: u64 = 0x2000; // Device trees specific constants const ARCH_GIC_V2_MAINT_IRQ: u32 = 8; /// Get the address of the GICv2 distributor. const fn get_dist_addr() -> u64 { super::layout::MMIO32_MEM_START - GICv2::KVM_VGIC_V2_DIST_SIZE } /// Get the size of the GIC_v2 distributor. const fn get_dist_size() -> u64 { GICv2::KVM_VGIC_V2_DIST_SIZE } /// Get the address of the GIC_v2 CPU. const fn get_cpu_addr() -> u64 { GICv2::get_dist_addr() - GICv2::KVM_VGIC_V2_CPU_SIZE } /// Get the size of the GIC_v2 CPU. const fn get_cpu_size() -> u64 { GICv2::KVM_VGIC_V2_CPU_SIZE } pub const VERSION: u32 = kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V2; pub fn fdt_compatibility(&self) -> &str { "arm,gic-400" } pub fn fdt_maint_irq(&self) -> u32 { GICv2::ARCH_GIC_V2_MAINT_IRQ } /// Create the GIC device object pub fn create_device(fd: DeviceFd, vcpu_count: u64) -> Self { GICv2(super::GIC { fd, properties: [ GICv2::get_dist_addr(), GICv2::get_dist_size(), GICv2::get_cpu_addr(), GICv2::get_cpu_size(), ], msi_properties: None, vcpu_count, its_device: None, }) } pub fn save_device(&self, mpidrs: &[u64]) -> Result { regs::save_state(&self.fd, mpidrs) } pub fn restore_device(&self, mpidrs: &[u64], state: &GicState) -> Result<(), GicError> { regs::restore_state(&self.fd, mpidrs, state) } pub fn init_device_attributes(gic_device: &Self) -> Result<(), GicError> { // Setting up the distributor attribute. // We are placing the GIC below 1GB so we need to subtract the size of the distributor. Self::set_device_attribute( gic_device.device_fd(), kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, u64::from(kvm_bindings::KVM_VGIC_V2_ADDR_TYPE_DIST), &GICv2::get_dist_addr() as *const u64 as u64, 0, )?; // Setting up the CPU attribute. Self::set_device_attribute( gic_device.device_fd(), kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, u64::from(kvm_bindings::KVM_VGIC_V2_ADDR_TYPE_CPU), &GICv2::get_cpu_addr() as *const u64 as u64, 0, )?; Ok(()) } /// Initialize a GIC device pub fn init_device(vm: &VmFd) -> Result { let mut gic_device = kvm_bindings::kvm_create_device { type_: Self::VERSION, fd: 0, flags: 0, }; vm.create_device(&mut gic_device) .map_err(GicError::CreateGIC) } /// Method to initialize the GIC device pub fn create(vm: &VmFd, vcpu_count: u64) -> Result { let vgic_fd = Self::init_device(vm)?; let device = Self::create_device(vgic_fd, vcpu_count); Self::init_device_attributes(&device)?; Self::finalize_device(&device)?; Ok(device) } /// Finalize the setup of a GIC device pub fn finalize_device(gic_device: &Self) -> Result<(), GicError> { // On arm there are 3 types of interrupts: SGI (0-15), PPI (16-31), SPI (32-1020). // SPIs are used to signal interrupts from various peripherals accessible across // the whole system so these are the ones that we increment when adding a new virtio device. // KVM_DEV_ARM_VGIC_GRP_NR_IRQS sets the number of interrupts (SGI, PPI, and SPI). // Consequently, we need to add 32 to the number of SPIs ("legacy GSI"). let nr_irqs: u32 = crate::arch::GSI_LEGACY_NUM + super::layout::SPI_START; let nr_irqs_ptr = &nr_irqs as *const u32; Self::set_device_attribute( gic_device.device_fd(), kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, nr_irqs_ptr as u64, 0, )?; // Finalize the GIC. // See https://code.woboq.org/linux/linux/virt/kvm/arm/vgic/vgic-kvm-device.c.html#211. Self::set_device_attribute( gic_device.device_fd(), kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), 0, 0, )?; Ok(()) } /// Set a GIC device attribute pub fn set_device_attribute( fd: &DeviceFd, group: u32, attr: u64, addr: u64, flags: u32, ) -> Result<(), GicError> { let attr = kvm_bindings::kvm_device_attr { flags, group, attr, addr, }; fd.set_device_attr(&attr) .map_err(|err| GicError::DeviceAttribute(err, true, group))?; Ok(()) } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/gicv2/regs/dist_regs.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::ops::Range; use kvm_bindings::KVM_DEV_ARM_VGIC_GRP_DIST_REGS; use kvm_ioctls::DeviceFd; use crate::arch::aarch64::gic::GicError; use crate::arch::aarch64::gic::regs::{GicRegState, MmioReg, SimpleReg, VgicRegEngine}; use crate::arch::{GSI_LEGACY_NUM, SPI_START}; // Distributor registers as detailed at page 75 from // https://developer.arm.com/documentation/ihi0048/latest/. // Address offsets are relative to the Distributor base address defined // by the system memory map. const GICD_CTLR: DistReg = DistReg::simple(0x0, 4); const GICD_IGROUPR: DistReg = DistReg::shared_irq(0x0080, 1); const GICD_ISENABLER: DistReg = DistReg::shared_irq(0x0100, 1); const GICD_ICENABLER: DistReg = DistReg::shared_irq(0x0180, 1); const GICD_ISPENDR: DistReg = DistReg::shared_irq(0x0200, 1); const GICD_ICPENDR: DistReg = DistReg::shared_irq(0x0280, 1); const GICD_ISACTIVER: DistReg = DistReg::shared_irq(0x0300, 1); const GICD_ICACTIVER: DistReg = DistReg::shared_irq(0x0380, 1); const GICD_IPRIORITYR: DistReg = DistReg::shared_irq(0x0400, 8); const GICD_ICFGR: DistReg = DistReg::shared_irq(0x0C00, 2); const GICD_CPENDSGIR: DistReg = DistReg::simple(0xF10, 16); const GICD_SPENDSGIR: DistReg = DistReg::simple(0xF20, 16); // List with relevant distributor registers that we will be restoring. // Order is taken from qemu. // Criteria for the present list of registers: only R/W registers, implementation specific registers // are not saved. static VGIC_DIST_REGS: &[DistReg] = &[ GICD_CTLR, GICD_ICENABLER, GICD_ISENABLER, GICD_IGROUPR, GICD_ICFGR, GICD_ICPENDR, GICD_ISPENDR, GICD_ICACTIVER, GICD_ISACTIVER, GICD_IPRIORITYR, GICD_CPENDSGIR, GICD_SPENDSGIR, ]; /// Some registers have variable lengths since they dedicate a specific number of bits to /// each interrupt. So, their length depends on the number of interrupts. /// (i.e the ones that are represented as GICD_REG) in the documentation mentioned above. pub struct SharedIrqReg { /// The offset from the component address. The register is memory mapped here. offset: u64, /// Number of bits per interrupt. bits_per_irq: u8, } impl MmioReg for SharedIrqReg { fn range(&self) -> Range { // The ARM® TrustZone® implements a protection logic which contains a // read-as-zero/write-ignore (RAZ/WI) policy. // The first part of a shared-irq register, the one corresponding to the // SGI and PPI IRQs (0-32) is RAZ/WI, so we skip it. let start = self.offset + u64::from(SPI_START) * u64::from(self.bits_per_irq) / 8; let size_in_bits = u64::from(self.bits_per_irq) * u64::from(GSI_LEGACY_NUM); let mut size_in_bytes = size_in_bits / 8; if size_in_bits % 8 > 0 { size_in_bytes += 1; } start..start + size_in_bytes } } enum DistReg { Simple(SimpleReg), SharedIrq(SharedIrqReg), } impl DistReg { const fn simple(offset: u64, size: u16) -> DistReg { DistReg::Simple(SimpleReg::new(offset, size)) } const fn shared_irq(offset: u64, bits_per_irq: u8) -> DistReg { DistReg::SharedIrq(SharedIrqReg { offset, bits_per_irq, }) } } impl MmioReg for DistReg { fn range(&self) -> Range { match self { DistReg::Simple(reg) => reg.range(), DistReg::SharedIrq(reg) => reg.range(), } } } struct DistRegEngine {} impl VgicRegEngine for DistRegEngine { type Reg = DistReg; type RegChunk = u32; fn group() -> u32 { KVM_DEV_ARM_VGIC_GRP_DIST_REGS } fn mpidr_mask() -> u64 { 0 } } pub(crate) fn get_dist_regs(fd: &DeviceFd) -> Result>, GicError> { DistRegEngine::get_regs_data(fd, Box::new(VGIC_DIST_REGS.iter()), 0) } pub(crate) fn set_dist_regs(fd: &DeviceFd, state: &[GicRegState]) -> Result<(), GicError> { DistRegEngine::set_regs_data(fd, Box::new(VGIC_DIST_REGS.iter()), state, 0) } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::io::AsRawFd; use kvm_ioctls::Kvm; use super::*; use crate::arch::aarch64::gic::{GICVersion, GicError, create_gic}; #[test] fn test_access_dist_regs() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let _ = vm.create_vcpu(0).unwrap(); let gic_fd = match create_gic(&vm, 1, Some(GICVersion::GICV2)) { Ok(gic_fd) => gic_fd, Err(GicError::CreateGIC(_)) => return, _ => panic!("Failed to open setup GICv2"), }; let res = get_dist_regs(gic_fd.device_fd()); let state = res.unwrap(); assert_eq!(state.len(), 7); // Check GICD_CTLR size. assert_eq!(state[0].chunks.len(), 1); let res = set_dist_regs(gic_fd.device_fd(), &state); res.unwrap(); unsafe { libc::close(gic_fd.device_fd().as_raw_fd()) }; let res = get_dist_regs(gic_fd.device_fd()); assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(9), false, 1)" ); // dropping gic_fd would double close the gic fd, so leak it std::mem::forget(gic_fd); } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/gicv2/regs/icc_regs.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use kvm_bindings::*; use kvm_ioctls::DeviceFd; use crate::arch::aarch64::gic::GicError; use crate::arch::aarch64::gic::regs::{SimpleReg, VgicRegEngine, VgicSysRegsState}; // CPU interface registers as detailed at page 76 from // https://developer.arm.com/documentation/ihi0048/latest/. // Address offsets are relative to the cpu interface base address defined // by the system memory map. // Criteria for the present list of registers: only R/W registers, optional registers are not saved. // GICC_NSAPR are not saved since they are only present in GICv2 implementations that include the // GIC security extensions so it might crash on some systems. const GICC_CTLR: SimpleReg = SimpleReg::new(0x0, 4); const GICC_PMR: SimpleReg = SimpleReg::new(0x04, 4); const GICC_BPR: SimpleReg = SimpleReg::new(0x08, 4); const GICC_APBR: SimpleReg = SimpleReg::new(0x001C, 4); const GICC_APR1: SimpleReg = SimpleReg::new(0x00D0, 4); const GICC_APR2: SimpleReg = SimpleReg::new(0x00D4, 4); const GICC_APR3: SimpleReg = SimpleReg::new(0x00D8, 4); const GICC_APR4: SimpleReg = SimpleReg::new(0x00DC, 4); static MAIN_VGIC_ICC_REGS: &[SimpleReg] = &[ GICC_CTLR, GICC_PMR, GICC_BPR, GICC_APBR, GICC_APR1, GICC_APR2, GICC_APR3, GICC_APR4, ]; const KVM_DEV_ARM_VGIC_CPUID_SHIFT: u32 = 32; const KVM_DEV_ARM_VGIC_OFFSET_SHIFT: u32 = 0; struct VgicSysRegEngine {} impl VgicRegEngine for VgicSysRegEngine { type Reg = SimpleReg; type RegChunk = u64; fn group() -> u32 { KVM_DEV_ARM_VGIC_GRP_CPU_REGS } fn kvm_device_attr(offset: u64, val: &mut Self::RegChunk, cpuid: u64) -> kvm_device_attr { kvm_device_attr { group: Self::group(), attr: ((cpuid << KVM_DEV_ARM_VGIC_CPUID_SHIFT) & (0xff << KVM_DEV_ARM_VGIC_CPUID_SHIFT)) | ((offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) & (0xffffffff << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)), addr: val as *mut Self::RegChunk as u64, flags: 0, } } } pub(crate) fn get_icc_regs(fd: &DeviceFd, mpidr: u64) -> Result { let main_icc_regs = VgicSysRegEngine::get_regs_data(fd, Box::new(MAIN_VGIC_ICC_REGS.iter()), mpidr)?; Ok(VgicSysRegsState { main_icc_regs, ap_icc_regs: Vec::new(), }) } pub(crate) fn set_icc_regs( fd: &DeviceFd, mpidr: u64, state: &VgicSysRegsState, ) -> Result<(), GicError> { VgicSysRegEngine::set_regs_data( fd, Box::new(MAIN_VGIC_ICC_REGS.iter()), &state.main_icc_regs, mpidr, )?; Ok(()) } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::io::AsRawFd; use kvm_ioctls::Kvm; use super::*; use crate::arch::aarch64::gic::{GICVersion, GicError, create_gic}; #[test] fn test_access_icc_regs() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let _ = vm.create_vcpu(0).unwrap(); let gic_fd = match create_gic(&vm, 1, Some(GICVersion::GICV2)) { Ok(gic_fd) => gic_fd, Err(GicError::CreateGIC(_)) => return, _ => panic!("Failed to open setup GICv2"), }; let cpu_id = 0; let res = get_icc_regs(gic_fd.device_fd(), cpu_id); let state = res.unwrap(); assert_eq!(state.main_icc_regs.len(), 8); assert_eq!(state.ap_icc_regs.len(), 0); set_icc_regs(gic_fd.device_fd(), cpu_id, &state).unwrap(); unsafe { libc::close(gic_fd.device_fd().as_raw_fd()) }; let res = set_icc_regs(gic_fd.device_fd(), cpu_id, &state); assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(9), true, 2)" ); let res = get_icc_regs(gic_fd.device_fd(), cpu_id); assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(9), false, 2)" ); // dropping gic_fd would double close the gic fd, so leak it std::mem::forget(gic_fd); } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/gicv2/regs/mod.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 mod dist_regs; mod icc_regs; use kvm_ioctls::DeviceFd; use crate::arch::aarch64::gic::GicError; use crate::arch::aarch64::gic::regs::{GicState, GicVcpuState}; /// Save the state of the GIC device. pub fn save_state(fd: &DeviceFd, mpidrs: &[u64]) -> Result { let mut vcpu_states = Vec::with_capacity(mpidrs.len()); for mpidr in mpidrs { vcpu_states.push(GicVcpuState { rdist: Vec::new(), icc: icc_regs::get_icc_regs(fd, *mpidr)?, }) } Ok(GicState { dist: dist_regs::get_dist_regs(fd)?, gic_vcpu_states: vcpu_states, ..Default::default() }) } /// Restore the state of the GIC device. pub fn restore_state(fd: &DeviceFd, mpidrs: &[u64], state: &GicState) -> Result<(), GicError> { dist_regs::set_dist_regs(fd, &state.dist)?; if mpidrs.len() != state.gic_vcpu_states.len() { return Err(GicError::InconsistentVcpuCount); } for (mpidr, vcpu_state) in mpidrs.iter().zip(&state.gic_vcpu_states) { icc_regs::set_icc_regs(fd, *mpidr, &vcpu_state.icc)?; } Ok(()) } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use kvm_ioctls::Kvm; use super::*; use crate::arch::aarch64::gic::{GICVersion, create_gic}; #[test] fn test_vm_save_restore_state() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let gic_fd = match create_gic(&vm, 1, Some(GICVersion::GICV2)) { Ok(gic_fd) => gic_fd, Err(GicError::CreateGIC(_)) => return, _ => panic!("Failed to open setup GICv2"), }; let mpidr = vec![0]; let res = save_state(gic_fd.device_fd(), &mpidr); // We will receive an error if trying to call before creating vcpu. assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(22), false, 2)" ); let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let _vcpu = vm.create_vcpu(0).unwrap(); let gic = create_gic(&vm, 1, Some(GICVersion::GICV2)).expect("Cannot create gic"); let gic_fd = gic.device_fd(); let vm_state = save_state(gic_fd, &mpidr).unwrap(); let val: u32 = 0; let gicd_statusr_off = 0x0010u64; let mut gic_dist_attr = kvm_bindings::kvm_device_attr { group: kvm_bindings::KVM_DEV_ARM_VGIC_GRP_DIST_REGS, attr: gicd_statusr_off, addr: &val as *const u32 as u64, flags: 0, }; unsafe { gic_fd.get_device_attr(&mut gic_dist_attr).unwrap(); } // The second value from the list of distributor registers is the value of the GICD_STATUSR // register. We assert that the one saved in the bitmap is the same with the one we // obtain with KVM_GET_DEVICE_ATTR. let gicd_statusr = &vm_state.dist[1]; assert_eq!(gicd_statusr.chunks[0], val); assert_eq!(vm_state.dist.len(), 7); restore_state(gic_fd, &mpidr, &vm_state).unwrap(); } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/gicv3/mod.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod regs; use kvm_ioctls::{DeviceFd, VmFd}; use crate::arch::aarch64::gic::{GicError, GicState}; #[derive(Debug)] pub struct GICv3(super::GIC); impl std::ops::Deref for GICv3 { type Target = super::GIC; fn deref(&self) -> &Self::Target { &self.0 } } impl std::ops::DerefMut for GICv3 { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } impl GICv3 { // Unfortunately bindgen omits defines that are based on other defines. // See arch/arm64/include/uapi/asm/kvm.h file from the linux kernel. const SZ_64K: u64 = 0x0001_0000; const KVM_VGIC_V3_DIST_SIZE: u64 = GICv3::SZ_64K; const KVM_VGIC_V3_REDIST_SIZE: u64 = (2 * GICv3::SZ_64K); const GIC_V3_ITS_SIZE: u64 = 0x2_0000; // Device trees specific constants const ARCH_GIC_V3_MAINT_IRQ: u32 = 9; /// Get the address of the GIC distributor. fn get_dist_addr() -> u64 { super::layout::MMIO32_MEM_START - GICv3::KVM_VGIC_V3_DIST_SIZE } /// Get the size of the GIC distributor. fn get_dist_size() -> u64 { GICv3::KVM_VGIC_V3_DIST_SIZE } /// Get the address of the GIC redistributors. fn get_redists_addr(vcpu_count: u64) -> u64 { GICv3::get_dist_addr() - GICv3::get_redists_size(vcpu_count) } /// Get the size of the GIC redistributors. fn get_redists_size(vcpu_count: u64) -> u64 { vcpu_count * GICv3::KVM_VGIC_V3_REDIST_SIZE } /// Get the MSI address fn get_msi_address(vcpu_count: u64) -> u64 { Self::get_redists_addr(vcpu_count) - GICv3::GIC_V3_ITS_SIZE } /// Get the MSI size const fn get_msi_size() -> u64 { GICv3::GIC_V3_ITS_SIZE } pub const VERSION: u32 = kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_V3; pub fn fdt_compatibility(&self) -> &str { "arm,gic-v3" } pub fn fdt_maint_irq(&self) -> u32 { GICv3::ARCH_GIC_V3_MAINT_IRQ } /// Create the GIC device object pub fn create_device(vm: &VmFd, vcpu_count: u64) -> Result { // Create the GIC device let mut gic_device = kvm_bindings::kvm_create_device { type_: Self::VERSION, fd: 0, flags: 0, }; let gic_fd = vm .create_device(&mut gic_device) .map_err(GicError::CreateGIC)?; Ok(GICv3(super::GIC { fd: gic_fd, properties: [ GICv3::get_dist_addr(), GICv3::get_dist_size(), GICv3::get_redists_addr(vcpu_count), GICv3::get_redists_size(vcpu_count), ], msi_properties: Some([GICv3::get_msi_address(vcpu_count), GICv3::get_msi_size()]), vcpu_count, its_device: None, })) } pub fn save_device(&self, mpidrs: &[u64]) -> Result { regs::save_state(&self.fd, self.its_device.as_ref().unwrap(), mpidrs) } pub fn restore_device(&self, mpidrs: &[u64], state: &GicState) -> Result<(), GicError> { regs::restore_state(&self.fd, self.its_device.as_ref().unwrap(), mpidrs, state) } pub fn init_device_attributes(gic_device: &Self) -> Result<(), GicError> { // Setting up the distributor attribute. // We are placing the GIC below 1GB so we need to subtract the size of the distributor. Self::set_device_attribute( gic_device.device_fd(), kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_DIST), &GICv3::get_dist_addr() as *const u64 as u64, 0, )?; // Setting up the redistributors' attribute. // We are calculating here the start of the redistributors address. We have one per CPU. Self::set_device_attribute( gic_device.device_fd(), kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, u64::from(kvm_bindings::KVM_VGIC_V3_ADDR_TYPE_REDIST), &GICv3::get_redists_addr(gic_device.vcpu_count()) as *const u64 as u64, 0, )?; Ok(()) } fn init_its(vm: &VmFd, gic_device: &mut Self) -> Result<(), GicError> { // ITS part attributes let mut its_device = kvm_bindings::kvm_create_device { type_: kvm_bindings::kvm_device_type_KVM_DEV_TYPE_ARM_VGIC_ITS, fd: 0, flags: 0, }; let its_fd = vm .create_device(&mut its_device) .map_err(GicError::CreateGIC)?; // Setting up the ITS attributes Self::set_device_attribute( &its_fd, kvm_bindings::KVM_DEV_ARM_VGIC_GRP_ADDR, u64::from(kvm_bindings::KVM_VGIC_ITS_ADDR_TYPE), &Self::get_msi_address(gic_device.vcpu_count()) as *const u64 as u64, 0, )?; Self::set_device_attribute( &its_fd, kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), 0, 0, )?; gic_device.its_device = Some(its_fd); Ok(()) } /// Method to initialize the GIC device pub fn create(vm: &VmFd, vcpu_count: u64) -> Result { let mut device = Self::create_device(vm, vcpu_count)?; Self::init_device_attributes(&device)?; Self::init_its(vm, &mut device)?; Self::finalize_device(&device)?; Ok(device) } /// Finalize the setup of a GIC device pub fn finalize_device(gic_device: &Self) -> Result<(), GicError> { // On arm there are 3 types of interrupts: SGI (0-15), PPI (16-31), SPI (32-1020). // SPIs are used to signal interrupts from various peripherals accessible across // the whole system so these are the ones that we increment when adding a new virtio device. // KVM_DEV_ARM_VGIC_GRP_NR_IRQS sets the number of interrupts (SGI, PPI, and SPI). // Consequently, we need to add 32 to the number of SPIs ("legacy GSI"). let nr_irqs: u32 = crate::arch::GSI_LEGACY_NUM + super::layout::SPI_START; let nr_irqs_ptr = &nr_irqs as *const u32; Self::set_device_attribute( gic_device.device_fd(), kvm_bindings::KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, nr_irqs_ptr as u64, 0, )?; // Finalize the GIC. // See https://code.woboq.org/linux/linux/virt/kvm/arm/vgic/vgic-kvm-device.c.html#211. Self::set_device_attribute( gic_device.device_fd(), kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_CTRL_INIT), 0, 0, )?; Ok(()) } /// Set a GIC device attribute pub fn set_device_attribute( fd: &DeviceFd, group: u32, attr: u64, addr: u64, flags: u32, ) -> Result<(), GicError> { let attr = kvm_bindings::kvm_device_attr { flags, group, attr, addr, }; fd.set_device_attr(&attr) .map_err(|err| GicError::DeviceAttribute(err, true, group))?; Ok(()) } } /// Function that flushes /// RDIST pending tables into guest RAM. /// /// The tables get flushed to guest RAM whenever the VM gets stopped. fn save_pending_tables(gic_device: &DeviceFd) -> Result<(), GicError> { let init_gic_attr = kvm_bindings::kvm_device_attr { group: kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL, attr: u64::from(kvm_bindings::KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES), addr: 0, flags: 0, }; gic_device.set_device_attr(&init_gic_attr).map_err(|err| { GicError::DeviceAttribute(err, true, kvm_bindings::KVM_DEV_ARM_VGIC_GRP_CTRL) }) } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use kvm_ioctls::Kvm; use super::*; use crate::arch::aarch64::gic::{GICVersion, create_gic}; #[test] fn test_save_pending_tables() { use std::os::unix::io::AsRawFd; let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let gic = create_gic(&vm, 1, Some(GICVersion::GICV3)).expect("Cannot create gic"); save_pending_tables(gic.device_fd()).unwrap(); unsafe { libc::close(gic.device_fd().as_raw_fd()) }; let res = save_pending_tables(gic.device_fd()); assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(9), true, 4)" ); // dropping gic_fd would double close the gic fd, so leak it std::mem::forget(gic); } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/gicv3/regs/dist_regs.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::ops::Range; use kvm_bindings::KVM_DEV_ARM_VGIC_GRP_DIST_REGS; use kvm_ioctls::DeviceFd; use crate::arch::aarch64::gic::GicError; use crate::arch::aarch64::gic::regs::{GicRegState, MmioReg, SimpleReg, VgicRegEngine}; use crate::arch::{GSI_LEGACY_NUM, SPI_START}; // Distributor registers as detailed at page 456 from // https://static.docs.arm.com/ihi0069/c/IHI0069C_gic_architecture_specification.pdf. // Address offsets are relative to the Distributor base address defined // by the system memory map. const GICD_CTLR: DistReg = DistReg::simple(0x0, 4); const GICD_STATUSR: DistReg = DistReg::simple(0x0010, 4); const GICD_IGROUPR: DistReg = DistReg::shared_irq(0x0080, 1); const GICD_ISENABLER: DistReg = DistReg::shared_irq(0x0100, 1); const GICD_ICENABLER: DistReg = DistReg::shared_irq(0x0180, 1); const GICD_ISPENDR: DistReg = DistReg::shared_irq(0x0200, 1); const GICD_ICPENDR: DistReg = DistReg::shared_irq(0x0280, 1); const GICD_ISACTIVER: DistReg = DistReg::shared_irq(0x0300, 1); const GICD_ICACTIVER: DistReg = DistReg::shared_irq(0x0380, 1); const GICD_IPRIORITYR: DistReg = DistReg::shared_irq(0x0400, 8); const GICD_ICFGR: DistReg = DistReg::shared_irq(0x0C00, 2); const GICD_IROUTER: DistReg = DistReg::shared_irq(0x6000, 64); // List with relevant distributor registers that we will be restoring. // Order is taken from qemu. // Criteria for the present list of registers: only R/W registers, implementation specific registers // are not saved. GICD_CPENDSGIR and GICD_SPENDSGIR are not saved since these registers are not used // when affinity routing is enabled. Affinity routing GICv3 is enabled by default unless Firecracker // clears the ICD_CTLR.ARE bit which it does not do. static VGIC_DIST_REGS: &[DistReg] = &[ GICD_CTLR, GICD_STATUSR, GICD_ICENABLER, GICD_ISENABLER, GICD_IGROUPR, GICD_IROUTER, GICD_ICFGR, GICD_ICPENDR, GICD_ISPENDR, GICD_ICACTIVER, GICD_ISACTIVER, GICD_IPRIORITYR, ]; /// Some registers have variable lengths since they dedicate a specific number of bits to /// each interrupt. So, their length depends on the number of interrupts. /// (i.e the ones that are represented as GICD_REG) in the documentation mentioned above. pub struct SharedIrqReg { /// The offset from the component address. The register is memory mapped here. offset: u64, /// Number of bits per interrupt. bits_per_irq: u8, } impl MmioReg for SharedIrqReg { fn range(&self) -> Range { // The ARM® TrustZone® implements a protection logic which contains a // read-as-zero/write-ignore (RAZ/WI) policy. // The first part of a shared-irq register, the one corresponding to the // SGI and PPI IRQs (0-32) is RAZ/WI, so we skip it. let start = self.offset + u64::from(SPI_START) * u64::from(self.bits_per_irq) / 8; let size_in_bits = u64::from(self.bits_per_irq) * u64::from(GSI_LEGACY_NUM); let mut size_in_bytes = size_in_bits / 8; if size_in_bits % 8 > 0 { size_in_bytes += 1; } start..start + size_in_bytes } } enum DistReg { Simple(SimpleReg), SharedIrq(SharedIrqReg), } impl DistReg { const fn simple(offset: u64, size: u16) -> DistReg { DistReg::Simple(SimpleReg::new(offset, size)) } const fn shared_irq(offset: u64, bits_per_irq: u8) -> DistReg { DistReg::SharedIrq(SharedIrqReg { offset, bits_per_irq, }) } } impl MmioReg for DistReg { fn range(&self) -> Range { match self { DistReg::Simple(reg) => reg.range(), DistReg::SharedIrq(reg) => reg.range(), } } } struct DistRegEngine {} impl VgicRegEngine for DistRegEngine { type Reg = DistReg; type RegChunk = u32; fn group() -> u32 { KVM_DEV_ARM_VGIC_GRP_DIST_REGS } fn mpidr_mask() -> u64 { 0 } } pub(crate) fn get_dist_regs(fd: &DeviceFd) -> Result>, GicError> { DistRegEngine::get_regs_data(fd, Box::new(VGIC_DIST_REGS.iter()), 0) } pub(crate) fn set_dist_regs(fd: &DeviceFd, state: &[GicRegState]) -> Result<(), GicError> { DistRegEngine::set_regs_data(fd, Box::new(VGIC_DIST_REGS.iter()), state, 0) } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::io::AsRawFd; use kvm_ioctls::Kvm; use super::*; use crate::arch::aarch64::gic::{GICVersion, create_gic}; #[test] fn test_access_dist_regs() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let _ = vm.create_vcpu(0).unwrap(); let gic_fd = create_gic(&vm, 1, Some(GICVersion::GICV3)).expect("Cannot create gic"); let res = get_dist_regs(gic_fd.device_fd()); let state = res.unwrap(); assert_eq!(state.len(), 12); // Check GICD_CTLR size. assert_eq!(state[0].chunks.len(), 1); let res = set_dist_regs(gic_fd.device_fd(), &state); res.unwrap(); unsafe { libc::close(gic_fd.device_fd().as_raw_fd()) }; let res = get_dist_regs(gic_fd.device_fd()); assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(9), false, 1)" ); // dropping gic_fd would double close the gic fd, so leak it std::mem::forget(gic_fd); } #[test] fn test_dist_constructors() { let simple_dist_reg = DistReg::simple(0, 4); let shared_dist_reg = DistReg::shared_irq(0x0010, 2); assert_eq!(simple_dist_reg.range(), Range { start: 0, end: 4 }); assert_eq!(shared_dist_reg.range(), Range { start: 24, end: 48 }); } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/gicv3/regs/icc_regs.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use kvm_bindings::*; use kvm_ioctls::DeviceFd; use crate::arch::aarch64::gic::GicError; use crate::arch::aarch64::gic::regs::{SimpleReg, VgicRegEngine, VgicSysRegsState}; const ICC_CTLR_EL1_PRIBITS_SHIFT: u64 = 8; const ICC_CTLR_EL1_PRIBITS_MASK: u64 = 7 << ICC_CTLR_EL1_PRIBITS_SHIFT; // These registers are taken from the kernel. Look for `gic_v3_icc_reg_descs`. const SYS_ICC_SRE_EL1: SimpleReg = SimpleReg::vgic_sys_reg(3, 0, 12, 12, 5); const SYS_ICC_CTLR_EL1: SimpleReg = SimpleReg::vgic_sys_reg(3, 0, 12, 12, 4); const SYS_ICC_IGRPEN0_EL1: SimpleReg = SimpleReg::vgic_sys_reg(3, 0, 12, 12, 6); const SYS_ICC_IGRPEN1_EL1: SimpleReg = SimpleReg::vgic_sys_reg(3, 0, 12, 12, 7); const SYS_ICC_PMR_EL1: SimpleReg = SimpleReg::vgic_sys_reg(3, 0, 4, 6, 0); const SYS_ICC_BPR0_EL1: SimpleReg = SimpleReg::vgic_sys_reg(3, 0, 12, 8, 3); const SYS_ICC_BPR1_EL1: SimpleReg = SimpleReg::vgic_sys_reg(3, 0, 12, 12, 3); const SYS_ICC_AP0R0_EL1: SimpleReg = SimpleReg::sys_icc_ap0rn_el1(0); const SYS_ICC_AP0R1_EL1: SimpleReg = SimpleReg::sys_icc_ap0rn_el1(1); const SYS_ICC_AP0R2_EL1: SimpleReg = SimpleReg::sys_icc_ap0rn_el1(2); const SYS_ICC_AP0R3_EL1: SimpleReg = SimpleReg::sys_icc_ap0rn_el1(3); const SYS_ICC_AP1R0_EL1: SimpleReg = SimpleReg::sys_icc_ap1rn_el1(0); const SYS_ICC_AP1R1_EL1: SimpleReg = SimpleReg::sys_icc_ap1rn_el1(1); const SYS_ICC_AP1R2_EL1: SimpleReg = SimpleReg::sys_icc_ap1rn_el1(2); const SYS_ICC_AP1R3_EL1: SimpleReg = SimpleReg::sys_icc_ap1rn_el1(3); static MAIN_VGIC_ICC_REGS: &[SimpleReg] = &[ SYS_ICC_SRE_EL1, SYS_ICC_CTLR_EL1, SYS_ICC_IGRPEN0_EL1, SYS_ICC_IGRPEN1_EL1, SYS_ICC_PMR_EL1, SYS_ICC_BPR0_EL1, SYS_ICC_BPR1_EL1, ]; static AP_VGIC_ICC_REGS: &[SimpleReg] = &[ SYS_ICC_AP0R0_EL1, SYS_ICC_AP0R1_EL1, SYS_ICC_AP0R2_EL1, SYS_ICC_AP0R3_EL1, SYS_ICC_AP1R0_EL1, SYS_ICC_AP1R1_EL1, SYS_ICC_AP1R2_EL1, SYS_ICC_AP1R3_EL1, ]; impl SimpleReg { const fn vgic_sys_reg(op0: u64, op1: u64, crn: u64, crm: u64, op2: u64) -> SimpleReg { let offset = ((op0 << KVM_REG_ARM64_SYSREG_OP0_SHIFT) & KVM_REG_ARM64_SYSREG_OP0_MASK as u64) | ((op1 << KVM_REG_ARM64_SYSREG_OP1_SHIFT) & KVM_REG_ARM64_SYSREG_OP1_MASK as u64) | ((crn << KVM_REG_ARM64_SYSREG_CRN_SHIFT) & KVM_REG_ARM64_SYSREG_CRN_MASK as u64) | ((crm << KVM_REG_ARM64_SYSREG_CRM_SHIFT) & KVM_REG_ARM64_SYSREG_CRM_MASK as u64) | ((op2 << KVM_REG_ARM64_SYSREG_OP2_SHIFT) & KVM_REG_ARM64_SYSREG_OP2_MASK as u64); SimpleReg::new(offset, 8) } const fn sys_icc_ap0rn_el1(n: u64) -> SimpleReg { Self::vgic_sys_reg(3, 0, 12, 8, 4 | n) } const fn sys_icc_ap1rn_el1(n: u64) -> SimpleReg { Self::vgic_sys_reg(3, 0, 12, 9, n) } } struct VgicSysRegEngine {} impl VgicRegEngine for VgicSysRegEngine { type Reg = SimpleReg; type RegChunk = u64; fn group() -> u32 { KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS } #[allow(clippy::cast_sign_loss)] // bit mask fn mpidr_mask() -> u64 { KVM_DEV_ARM_VGIC_V3_MPIDR_MASK as u64 } } fn num_priority_bits(fd: &DeviceFd, mpidr: u64) -> Result { let reg_val = &VgicSysRegEngine::get_reg_data(fd, &SYS_ICC_CTLR_EL1, mpidr)?.chunks[0]; Ok(((reg_val & ICC_CTLR_EL1_PRIBITS_MASK) >> ICC_CTLR_EL1_PRIBITS_SHIFT) + 1) } fn is_ap_reg_available(reg: &SimpleReg, num_priority_bits: u64) -> bool { // As per ARMv8 documentation: // https://static.docs.arm.com/ihi0069/c/IHI0069C_gic_architecture_specification.pdf // page 178, // ICC_AP0R1_EL1 is only implemented in implementations that support 6 or more bits of // priority. // ICC_AP0R2_EL1 and ICC_AP0R3_EL1 are only implemented in implementations that support // 7 bits of priority. if (reg == &SYS_ICC_AP0R1_EL1 || reg == &SYS_ICC_AP1R1_EL1) && num_priority_bits < 6 { return false; } if (reg == &SYS_ICC_AP0R2_EL1 || reg == &SYS_ICC_AP0R3_EL1 || reg == &SYS_ICC_AP1R2_EL1 || reg == &SYS_ICC_AP1R3_EL1) && num_priority_bits != 7 { return false; } true } pub(crate) fn get_icc_regs(fd: &DeviceFd, mpidr: u64) -> Result { let main_icc_regs = VgicSysRegEngine::get_regs_data(fd, Box::new(MAIN_VGIC_ICC_REGS.iter()), mpidr)?; let num_priority_bits = num_priority_bits(fd, mpidr)?; let mut ap_icc_regs = Vec::with_capacity(AP_VGIC_ICC_REGS.len()); for reg in AP_VGIC_ICC_REGS { if is_ap_reg_available(reg, num_priority_bits) { ap_icc_regs.push(Some(VgicSysRegEngine::get_reg_data(fd, reg, mpidr)?)); } else { ap_icc_regs.push(None); } } Ok(VgicSysRegsState { main_icc_regs, ap_icc_regs, }) } pub(crate) fn set_icc_regs( fd: &DeviceFd, mpidr: u64, state: &VgicSysRegsState, ) -> Result<(), GicError> { VgicSysRegEngine::set_regs_data( fd, Box::new(MAIN_VGIC_ICC_REGS.iter()), &state.main_icc_regs, mpidr, )?; let num_priority_bits = num_priority_bits(fd, mpidr)?; for (reg, maybe_reg_data) in AP_VGIC_ICC_REGS.iter().zip(&state.ap_icc_regs) { if is_ap_reg_available(reg, num_priority_bits) != maybe_reg_data.is_some() { return Err(GicError::InvalidVgicSysRegState); } if let Some(reg_data) = maybe_reg_data { VgicSysRegEngine::set_reg_data(fd, reg, reg_data, mpidr)?; } } Ok(()) } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::io::AsRawFd; use kvm_ioctls::Kvm; use super::*; use crate::arch::aarch64::gic::{GICVersion, create_gic}; #[test] fn test_access_icc_regs() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let _ = vm.create_vcpu(0).unwrap(); let gic_fd = create_gic(&vm, 1, Some(GICVersion::GICV3)).expect("Cannot create gic"); let gicr_typer = 123; let res = get_icc_regs(gic_fd.device_fd(), gicr_typer); let mut state = res.unwrap(); assert_eq!(state.main_icc_regs.len(), 7); assert_eq!(state.ap_icc_regs.len(), 8); set_icc_regs(gic_fd.device_fd(), gicr_typer, &state).unwrap(); for reg in state.ap_icc_regs.iter_mut() { *reg = None; } let res = set_icc_regs(gic_fd.device_fd(), gicr_typer, &state); assert_eq!(format!("{:?}", res.unwrap_err()), "InvalidVgicSysRegState"); unsafe { libc::close(gic_fd.device_fd().as_raw_fd()) }; let res = set_icc_regs(gic_fd.device_fd(), gicr_typer, &state); assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(9), true, 6)" ); let res = get_icc_regs(gic_fd.device_fd(), gicr_typer); assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(9), false, 6)" ); // dropping gic_fd would double close the gic fd, so leak it std::mem::forget(gic_fd); } #[test] fn test_icc_constructors() { let sys_reg1 = SimpleReg::vgic_sys_reg(3, 0, 12, 12, 5); let sys_reg2 = SimpleReg::sys_icc_ap0rn_el1(1); let sys_reg3 = SimpleReg::sys_icc_ap1rn_el1(1); assert!(sys_reg1 == SimpleReg::new(50789, 8)); assert!(sys_reg2 == SimpleReg::new(50757, 8)); assert!(sys_reg3 == SimpleReg::new(50761, 8)); } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/gicv3/regs/its_regs.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use kvm_bindings::{ KVM_DEV_ARM_ITS_RESTORE_TABLES, KVM_DEV_ARM_ITS_SAVE_TABLES, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_VGIC_GRP_ITS_REGS, }; use kvm_ioctls::DeviceFd; use serde::{Deserialize, Serialize}; use crate::arch::aarch64::gic::GicError; // ITS registers that we want to preserve across snapshots const GITS_CTLR: u32 = 0x0000; const GITS_IIDR: u32 = 0x0004; const GITS_CBASER: u32 = 0x0080; const GITS_CWRITER: u32 = 0x0088; const GITS_CREADR: u32 = 0x0090; const GITS_BASER: u32 = 0x0100; fn set_device_attribute( its_device: &DeviceFd, group: u32, attr: u32, val: u64, ) -> Result<(), GicError> { let gicv3_its_attr = kvm_bindings::kvm_device_attr { group, attr: attr as u64, addr: &val as *const u64 as u64, flags: 0, }; its_device .set_device_attr(&gicv3_its_attr) .map_err(|err| GicError::DeviceAttribute(err, true, group)) } fn get_device_attribute(its_device: &DeviceFd, group: u32, attr: u32) -> Result { let mut val = 0; let mut gicv3_its_attr = kvm_bindings::kvm_device_attr { group, attr: attr as u64, addr: &mut val as *mut u64 as u64, flags: 0, }; // SAFETY: gicv3_its_attr.addr is safe to write to. unsafe { its_device.get_device_attr(&mut gicv3_its_attr) } .map_err(|err| GicError::DeviceAttribute(err, false, group))?; Ok(val) } fn its_read_register(its_fd: &DeviceFd, attr: u32) -> Result { get_device_attribute(its_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS, attr) } fn its_set_register(its_fd: &DeviceFd, attr: u32, val: u64) -> Result<(), GicError> { set_device_attribute(its_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS, attr, val) } pub fn its_save_tables(its_fd: &DeviceFd) -> Result<(), GicError> { set_device_attribute( its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_ITS_SAVE_TABLES, 0, ) } pub fn its_restore_tables(its_fd: &DeviceFd) -> Result<(), GicError> { set_device_attribute( its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, KVM_DEV_ARM_ITS_RESTORE_TABLES, 0, ) } /// ITS registers that we save/restore during snapshot #[derive(Debug, Default, Serialize, Deserialize)] pub struct ItsRegisterState { iidr: u64, cbaser: u64, creadr: u64, cwriter: u64, baser: [u64; 8], ctlr: u64, } impl ItsRegisterState { /// Save ITS state pub fn save(its_fd: &DeviceFd) -> Result { let mut state = ItsRegisterState::default(); for i in 0..8 { state.baser[i as usize] = its_read_register(its_fd, GITS_BASER + i * 8)?; } state.ctlr = its_read_register(its_fd, GITS_CTLR)?; state.cbaser = its_read_register(its_fd, GITS_CBASER)?; state.creadr = its_read_register(its_fd, GITS_CREADR)?; state.cwriter = its_read_register(its_fd, GITS_CWRITER)?; state.iidr = its_read_register(its_fd, GITS_IIDR)?; Ok(state) } /// Restore ITS state /// /// We need to restore ITS registers in a very specific order for things to work. Take a look /// at: /// https://elixir.bootlin.com/linux/v6.1.141/source/Documentation/virt/kvm/devices/arm-vgic-its.rst#L60 /// and /// https://elixir.bootlin.com/linux/v6.1.141/source/Documentation/virt/kvm/devices/arm-vgic-its.rst#L123 /// /// for more details, but TL;DR is: /// /// We need to restore GITS_CBASER, GITS_CREADER, GITS_CWRITER, GITS_BASER and GITS_IIDR /// registers before restoring ITS tables from guest memory. We also need to set GITS_CTLR /// last. pub fn restore(&self, its_fd: &DeviceFd) -> Result<(), GicError> { its_set_register(its_fd, GITS_IIDR, self.iidr)?; its_set_register(its_fd, GITS_CBASER, self.cbaser)?; its_set_register(its_fd, GITS_CREADR, self.creadr)?; its_set_register(its_fd, GITS_CWRITER, self.cwriter)?; for i in 0..8 { its_set_register(its_fd, GITS_BASER + i * 8, self.baser[i as usize])?; } // We need to restore saved ITS tables before restoring GITS_CTLR its_restore_tables(its_fd)?; its_set_register(its_fd, GITS_CTLR, self.ctlr) } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/gicv3/regs/mod.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 mod dist_regs; mod icc_regs; pub mod its_regs; mod redist_regs; use its_regs::{ItsRegisterState, its_save_tables}; use kvm_ioctls::DeviceFd; use crate::arch::aarch64::gic::GicError; use crate::arch::aarch64::gic::regs::{GicState, GicVcpuState}; /// Save the state of the GIC device. pub fn save_state( gic_device: &DeviceFd, its_device: &DeviceFd, mpidrs: &[u64], ) -> Result { // Flush redistributors pending tables to guest RAM. super::save_pending_tables(gic_device)?; // Flush ITS tables into guest memory. its_save_tables(its_device)?; let mut vcpu_states = Vec::with_capacity(mpidrs.len()); for mpidr in mpidrs { vcpu_states.push(GicVcpuState { rdist: redist_regs::get_redist_regs(gic_device, *mpidr)?, icc: icc_regs::get_icc_regs(gic_device, *mpidr)?, }) } let its_state = ItsRegisterState::save(its_device)?; Ok(GicState { dist: dist_regs::get_dist_regs(gic_device)?, gic_vcpu_states: vcpu_states, its_state: Some(its_state), }) } /// Restore the state of the GIC device. pub fn restore_state( gic_device: &DeviceFd, its_device: &DeviceFd, mpidrs: &[u64], state: &GicState, ) -> Result<(), GicError> { dist_regs::set_dist_regs(gic_device, &state.dist)?; if mpidrs.len() != state.gic_vcpu_states.len() { return Err(GicError::InconsistentVcpuCount); } for (mpidr, vcpu_state) in mpidrs.iter().zip(&state.gic_vcpu_states) { redist_regs::set_redist_regs(gic_device, *mpidr, &vcpu_state.rdist)?; icc_regs::set_icc_regs(gic_device, *mpidr, &vcpu_state.icc)?; } state .its_state .as_ref() .ok_or(GicError::MissingItsState)? .restore(its_device) } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use kvm_ioctls::Kvm; use super::*; use crate::arch::aarch64::gic::{GICVersion, create_gic}; #[test] fn test_vm_save_restore_state() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let gic = create_gic(&vm, 1, Some(GICVersion::GICV3)).expect("Cannot create gic"); let gic_fd = gic.device_fd(); let its_fd = gic.its_fd().unwrap(); let mpidr = vec![1]; let res = save_state(gic_fd, its_fd, &mpidr); // We will receive an error if trying to call before creating vcpu. assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(22), false, 5)" ); let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let _vcpu = vm.create_vcpu(0).unwrap(); let gic = create_gic(&vm, 1, Some(GICVersion::GICV3)).expect("Cannot create gic"); let gic_fd = gic.device_fd(); let its_fd = gic.its_fd().unwrap(); let vm_state = save_state(gic_fd, its_fd, &mpidr).unwrap(); let val: u32 = 0; let gicd_statusr_off = 0x0010u64; let mut gic_dist_attr = kvm_bindings::kvm_device_attr { group: kvm_bindings::KVM_DEV_ARM_VGIC_GRP_DIST_REGS, attr: gicd_statusr_off, addr: &val as *const u32 as u64, flags: 0, }; unsafe { gic_fd.get_device_attr(&mut gic_dist_attr).unwrap(); } // The second value from the list of distributor registers is the value of the GICD_STATUSR // register. We assert that the one saved in the bitmap is the same with the one we // obtain with KVM_GET_DEVICE_ATTR. let gicd_statusr = &vm_state.dist[1]; assert_eq!(gicd_statusr.chunks[0], val); assert_eq!(vm_state.dist.len(), 12); restore_state(gic_fd, its_fd, &mpidr, &vm_state).unwrap(); restore_state(gic_fd, its_fd, &[1, 2], &vm_state).unwrap_err(); } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/gicv3/regs/redist_regs.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use kvm_bindings::*; use kvm_ioctls::DeviceFd; use crate::arch::aarch64::gic::GicError; use crate::arch::aarch64::gic::regs::{GicRegState, SimpleReg, VgicRegEngine}; // Relevant PPI redistributor registers that we want to save/restore. const GICR_CTLR: SimpleReg = SimpleReg::new(0x0000, 4); const GICR_STATUSR: SimpleReg = SimpleReg::new(0x0010, 4); const GICR_WAKER: SimpleReg = SimpleReg::new(0x0014, 4); const GICR_PROPBASER: SimpleReg = SimpleReg::new(0x0070, 8); const GICR_PENDBASER: SimpleReg = SimpleReg::new(0x0078, 8); // Relevant SGI redistributor registers that we want to save/restore. const GICR_SGI_OFFSET: u64 = 0x0001_0000; const GICR_IGROUPR0: SimpleReg = SimpleReg::new(GICR_SGI_OFFSET + 0x0080, 4); const GICR_ISENABLER0: SimpleReg = SimpleReg::new(GICR_SGI_OFFSET + 0x0100, 4); const GICR_ICENABLER0: SimpleReg = SimpleReg::new(GICR_SGI_OFFSET + 0x0180, 4); const GICR_ISPENDR0: SimpleReg = SimpleReg::new(GICR_SGI_OFFSET + 0x0200, 4); const GICR_ICPENDR0: SimpleReg = SimpleReg::new(GICR_SGI_OFFSET + 0x0280, 4); const GICR_ISACTIVER0: SimpleReg = SimpleReg::new(GICR_SGI_OFFSET + 0x0300, 4); const GICR_ICACTIVER0: SimpleReg = SimpleReg::new(GICR_SGI_OFFSET + 0x0380, 4); const GICR_IPRIORITYR0: SimpleReg = SimpleReg::new(GICR_SGI_OFFSET + 0x0400, 32); const GICR_ICFGR0: SimpleReg = SimpleReg::new(GICR_SGI_OFFSET + 0x0C00, 8); // List with relevant redistributor registers that we will be restoring. static VGIC_RDIST_REGS: &[SimpleReg] = &[ GICR_STATUSR, GICR_WAKER, GICR_PROPBASER, GICR_PENDBASER, GICR_CTLR, ]; // List with relevant SGI associated redistributor registers that we will be restoring. static VGIC_SGI_REGS: &[SimpleReg] = &[ GICR_IGROUPR0, GICR_ICENABLER0, GICR_ISENABLER0, GICR_ICFGR0, GICR_ICPENDR0, GICR_ISPENDR0, GICR_ICACTIVER0, GICR_ISACTIVER0, GICR_IPRIORITYR0, ]; struct RedistRegEngine {} impl VgicRegEngine for RedistRegEngine { type Reg = SimpleReg; type RegChunk = u32; fn group() -> u32 { KVM_DEV_ARM_VGIC_GRP_REDIST_REGS } #[allow(clippy::cast_sign_loss)] // bit mask fn mpidr_mask() -> u64 { KVM_DEV_ARM_VGIC_V3_MPIDR_MASK as u64 } } fn redist_regs() -> Box> { Box::new(VGIC_RDIST_REGS.iter().chain(VGIC_SGI_REGS)) } pub(crate) fn get_redist_regs( fd: &DeviceFd, mpidr: u64, ) -> Result>, GicError> { RedistRegEngine::get_regs_data(fd, redist_regs(), mpidr) } pub(crate) fn set_redist_regs( fd: &DeviceFd, mpidr: u64, data: &[GicRegState], ) -> Result<(), GicError> { RedistRegEngine::set_regs_data(fd, redist_regs(), data, mpidr) } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::io::AsRawFd; use kvm_ioctls::Kvm; use super::*; use crate::arch::aarch64::gic::{GICVersion, create_gic}; #[test] fn test_access_redist_regs() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let _ = vm.create_vcpu(0).unwrap(); let gic_fd = create_gic(&vm, 1, Some(GICVersion::GICV3)).expect("Cannot create gic"); let gicr_typer = 123; let res = get_redist_regs(gic_fd.device_fd(), gicr_typer); let state = res.unwrap(); assert_eq!(state.len(), 14); set_redist_regs(gic_fd.device_fd(), gicr_typer, &state).unwrap(); unsafe { libc::close(gic_fd.device_fd().as_raw_fd()) }; let res = set_redist_regs(gic_fd.device_fd(), gicr_typer, &state); assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(9), true, 5)" ); let res = get_redist_regs(gic_fd.device_fd(), gicr_typer); assert_eq!( format!("{:?}", res.unwrap_err()), "DeviceAttribute(Error(9), false, 5)" ); // dropping gic_fd would double close the gic fd, so leak it std::mem::forget(gic_fd); } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/mod.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 mod gicv2; mod gicv3; mod regs; use gicv2::GICv2; use gicv3::GICv3; use kvm_ioctls::{DeviceFd, VmFd}; pub use regs::GicState; use super::layout; /// Represent a V2 or V3 GIC device #[derive(Debug)] pub struct GIC { /// The file descriptor for the KVM device fd: DeviceFd, /// GIC device properties, to be used for setting up the fdt entry properties: [u64; 4], /// MSI properties of the GIC device msi_properties: Option<[u64; 2]>, /// Number of CPUs handled by the device vcpu_count: u64, /// ITS device its_device: Option, } impl GIC { /// Returns the file descriptor of the GIC device pub fn device_fd(&self) -> &DeviceFd { &self.fd } /// Returns an array with GIC device properties pub fn device_properties(&self) -> &[u64] { &self.properties } /// Returns the number of vCPUs this GIC handles pub fn vcpu_count(&self) -> u64 { self.vcpu_count } } /// Errors thrown while setting up the GIC. #[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] pub enum GicError { /// Error while calling KVM ioctl for setting up the global interrupt controller: {0} CreateGIC(kvm_ioctls::Error), /// Error while setting or getting device attributes for the GIC: {0}, {1}, {2} DeviceAttribute(kvm_ioctls::Error, bool, u32), /// The number of vCPUs in the GicState doesn't match the number of vCPUs on the system. InconsistentVcpuCount, /// The VgicSysRegsState is invalid. InvalidVgicSysRegState, /// ITS state is missing. MissingItsState, } /// List of implemented GICs. #[derive(Debug)] pub enum GICVersion { /// Legacy version. GICV2, /// GICV3 without ITS. GICV3, } /// Trait for GIC devices. #[derive(Debug)] pub enum GICDevice { /// Legacy version. V2(GICv2), /// GICV3 without ITS. V3(GICv3), } impl GICDevice { /// Returns the file descriptor of the GIC device pub fn device_fd(&self) -> &DeviceFd { match self { Self::V2(x) => x.device_fd(), Self::V3(x) => x.device_fd(), } } /// Returns the file descriptor of the ITS device, if any pub fn its_fd(&self) -> Option<&DeviceFd> { match self { Self::V2(_) => None, Self::V3(x) => x.its_device.as_ref(), } } /// Returns an array with GIC device properties pub fn device_properties(&self) -> &[u64] { match self { Self::V2(x) => x.device_properties(), Self::V3(x) => x.device_properties(), } } /// Returns an array with MSI properties if GIC supports it pub fn msi_properties(&self) -> Option<&[u64; 2]> { match self { Self::V2(x) => x.msi_properties.as_ref(), Self::V3(x) => x.msi_properties.as_ref(), } } /// Returns the number of vCPUs this GIC handles pub fn vcpu_count(&self) -> u64 { match self { Self::V2(x) => x.vcpu_count(), Self::V3(x) => x.vcpu_count(), } } /// Returns the fdt compatibility property of the device pub fn fdt_compatibility(&self) -> &str { match self { Self::V2(x) => x.fdt_compatibility(), Self::V3(x) => x.fdt_compatibility(), } } /// Returns the maint_irq fdt property of the device pub fn fdt_maint_irq(&self) -> u32 { match self { Self::V2(x) => x.fdt_maint_irq(), Self::V3(x) => x.fdt_maint_irq(), } } /// Returns the GIC version of the device pub fn version(&self) -> u32 { match self { Self::V2(_) => GICv2::VERSION, Self::V3(_) => GICv3::VERSION, } } /// Setup the device-specific attributes pub fn init_device_attributes(gic_device: &Self) -> Result<(), GicError> { match gic_device { Self::V2(x) => GICv2::init_device_attributes(x), Self::V3(x) => GICv3::init_device_attributes(x), } } /// Method to save the state of the GIC device. pub fn save_device(&self, mpidrs: &[u64]) -> Result { match self { Self::V2(x) => x.save_device(mpidrs), Self::V3(x) => x.save_device(mpidrs), } } /// Method to restore the state of the GIC device. pub fn restore_device(&self, mpidrs: &[u64], state: &GicState) -> Result<(), GicError> { match self { Self::V2(x) => x.restore_device(mpidrs, state), Self::V3(x) => x.restore_device(mpidrs, state), } } } /// Create a GIC device. /// /// If "version" parameter is "None" the function will try to create by default a GICv3 device. /// If that fails it will try to fall-back to a GICv2 device. /// If version is Some the function will try to create a device of exactly the specified version. pub fn create_gic( vm: &VmFd, vcpu_count: u64, version: Option, ) -> Result { match version { Some(GICVersion::GICV2) => GICv2::create(vm, vcpu_count).map(GICDevice::V2), Some(GICVersion::GICV3) => GICv3::create(vm, vcpu_count).map(GICDevice::V3), None => GICv3::create(vm, vcpu_count) .map(GICDevice::V3) .or_else(|_| GICv2::create(vm, vcpu_count).map(GICDevice::V2)), } } #[cfg(test)] mod tests { use kvm_ioctls::Kvm; use super::*; #[test] fn test_create_gic() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); create_gic(&vm, 1, None).unwrap(); } } ================================================ FILE: src/vmm/src/arch/aarch64/gic/regs.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; use std::iter::StepBy; use std::ops::Range; use kvm_bindings::kvm_device_attr; use kvm_ioctls::DeviceFd; use serde::{Deserialize, Serialize}; use crate::arch::aarch64::gic::GicError; use crate::arch::aarch64::gic::gicv3::regs::its_regs::ItsRegisterState; #[derive(Debug, Serialize, Deserialize)] pub struct GicRegState { pub(crate) chunks: Vec, } /// Structure for serializing the state of the Vgic ICC regs #[derive(Debug, Default, Serialize, Deserialize)] pub struct VgicSysRegsState { pub main_icc_regs: Vec>, pub ap_icc_regs: Vec>>, } /// Structure used for serializing the state of the GIC registers. #[derive(Debug, Default, Serialize, Deserialize)] pub struct GicState { /// The state of the distributor registers. pub dist: Vec>, /// The state of the vcpu interfaces. pub gic_vcpu_states: Vec, /// The state of the ITS device. Only present with GICv3. pub its_state: Option, } /// Structure used for serializing the state of the GIC registers for a specific vCPU. #[derive(Debug, Default, Serialize, Deserialize)] pub struct GicVcpuState { pub rdist: Vec>, pub icc: VgicSysRegsState, } pub(crate) trait MmioReg { fn range(&self) -> Range; fn iter(&self) -> StepBy> where Self: Sized, { self.range().step_by(std::mem::size_of::()) } } pub(crate) trait VgicRegEngine { type Reg: MmioReg; type RegChunk: Clone + Default; fn group() -> u32; fn mpidr_mask() -> u64 { 0 } fn kvm_device_attr(offset: u64, val: &mut Self::RegChunk, mpidr: u64) -> kvm_device_attr { kvm_device_attr { group: Self::group(), attr: (mpidr & Self::mpidr_mask()) | offset, addr: val as *mut Self::RegChunk as u64, flags: 0, } } #[inline] fn get_reg_data( fd: &DeviceFd, reg: &Self::Reg, mpidr: u64, ) -> Result, GicError> where Self: Sized, { let mut data = Vec::with_capacity(reg.iter::().count()); for offset in reg.iter::() { let mut val = Self::RegChunk::default(); // SAFETY: `val` is a mutable memory location sized correctly for the attribute we're // requesting unsafe { fd.get_device_attr(&mut Self::kvm_device_attr(offset, &mut val, mpidr)) .map_err(|err| GicError::DeviceAttribute(err, false, Self::group()))?; } data.push(val); } Ok(GicRegState { chunks: data }) } fn get_regs_data( fd: &DeviceFd, regs: Box>, mpidr: u64, ) -> Result>, GicError> where Self: Sized, { let mut data = Vec::new(); for reg in regs { data.push(Self::get_reg_data(fd, reg, mpidr)?); } Ok(data) } #[inline] fn set_reg_data( fd: &DeviceFd, reg: &Self::Reg, data: &GicRegState, mpidr: u64, ) -> Result<(), GicError> where Self: Sized, { for (offset, val) in reg.iter::().zip(&data.chunks) { fd.set_device_attr(&Self::kvm_device_attr(offset, &mut val.clone(), mpidr)) .map_err(|err| GicError::DeviceAttribute(err, true, Self::group()))?; } Ok(()) } fn set_regs_data( fd: &DeviceFd, regs: Box>, data: &[GicRegState], mpidr: u64, ) -> Result<(), GicError> where Self: Sized, { for (reg, reg_data) in regs.zip(data) { Self::set_reg_data(fd, reg, reg_data, mpidr)?; } Ok(()) } } /// Structure representing a simple register. #[derive(PartialEq)] pub(crate) struct SimpleReg { /// The offset from the component address. The register is memory mapped here. offset: u64, /// Size in bytes. size: u16, } impl SimpleReg { pub const fn new(offset: u64, size: u16) -> SimpleReg { SimpleReg { offset, size } } } impl MmioReg for SimpleReg { fn range(&self) -> Range { self.offset..self.offset + u64::from(self.size) } } ================================================ FILE: src/vmm/src/arch/aarch64/kvm.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::convert::Infallible; use kvm_ioctls::Kvm as KvmFd; use crate::cpu_config::templates::KvmCapability; /// ['Kvm'] initialization can't fail for Aarch64 pub type KvmArchError = Infallible; /// Optional capabilities. #[derive(Debug, Default)] pub struct OptionalCapabilities { /// KVM_CAP_COUNTER_OFFSET pub counter_offset: bool, } /// Struct with kvm fd and kvm associated parameters. #[derive(Debug)] pub struct Kvm { /// KVM fd. pub fd: KvmFd, /// Additional capabilities that were specified in cpu template. pub kvm_cap_modifiers: Vec, } impl Kvm { pub(crate) const DEFAULT_CAPABILITIES: [u32; 7] = [ kvm_bindings::KVM_CAP_IOEVENTFD, kvm_bindings::KVM_CAP_IRQFD, kvm_bindings::KVM_CAP_USER_MEMORY, kvm_bindings::KVM_CAP_ARM_PSCI_0_2, kvm_bindings::KVM_CAP_DEVICE_CTRL, kvm_bindings::KVM_CAP_MP_STATE, kvm_bindings::KVM_CAP_ONE_REG, ]; /// Initialize [`Kvm`] type for Aarch64 architecture pub fn init_arch( fd: KvmFd, kvm_cap_modifiers: Vec, ) -> Result { Ok(Self { fd, kvm_cap_modifiers, }) } /// Returns struct with optional capabilities statuses. pub fn optional_capabilities(&self) -> OptionalCapabilities { OptionalCapabilities { counter_offset: self .fd .check_extension_raw(kvm_bindings::KVM_CAP_COUNTER_OFFSET.into()) != 0, } } } ================================================ FILE: src/vmm/src/arch/aarch64/layout.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // ==== Address map in use in ARM development systems today ==== // // - 32-bit - - 36-bit - - 40-bit - // 1024GB + + +-------------------+ <- 40-bit // | | DRAM | // ~ ~ ~ ~ // | | | // | | | // | | | // | | | // 544GB + + +-------------------+ // | | Hole or DRAM | // | | | // 512GB + + +-------------------+ // | | Mapped | // | | I/O | // ~ ~ ~ ~ // | | | // 256GB + + +-------------------+ // | | Reserved | // ~ ~ ~ ~ // | | | // 64GB + +-----------------------+-------------------+ <- 36-bit // | | DRAM | // ~ ~ ~ ~ // | | | // | | | // 34GB + +-----------------------+-------------------+ // | | Hole or DRAM | // 32GB + +-----------------------+-------------------+ // | | Mapped I/O | // ~ ~ ~ ~ // | | | // 16GB + +-----------------------+-------------------+ // | | Reserved | // ~ ~ ~ ~ // 4GB +-------------------+-----------------------+-------------------+ <- 32-bit // | 2GB of DRAM | // | | // 2GB +-------------------+-----------------------+-------------------+ // | Mapped I/O | // 1GB +-------------------+-----------------------+-------------------+ // | ROM & RAM & I/O | // 0GB +-------------------+-----------------------+-------------------+ 0 // - 32-bit - - 36-bit - - 40-bit - // // Taken from (http://infocenter.arm.com/help/topic/com.arm.doc.den0001c/DEN0001C_principles_of_arm_memory_maps.pdf). use crate::device_manager::mmio::MMIO_LEN; /// Start of RAM on 64 bit ARM. pub const DRAM_MEM_START: u64 = 0x8000_0000; // 2 GB. /// The maximum RAM size. pub const DRAM_MEM_MAX_SIZE: usize = 0x00FF_8000_0000; // 1024 - 2 = 1022G. /// Start of RAM on 64 bit ARM. pub const SYSTEM_MEM_START: u64 = DRAM_MEM_START; /// This is used by ACPI device manager for acpi tables or devices like vmgenid /// In reality, 2MBs is an overkill, but immediately after this we write the kernel /// image, which needs to be 2MB aligned. pub const SYSTEM_MEM_SIZE: u64 = 0x20_0000; /// Kernel command line maximum size. /// As per `arch/arm64/include/uapi/asm/setup.h`. pub const CMDLINE_MAX_SIZE: usize = 2048; /// Maximum size of the device tree blob as specified in https://www.kernel.org/doc/Documentation/arm64/booting.txt. pub const FDT_MAX_SIZE: usize = 0x20_0000; // As per virt/kvm/arm/vgic/vgic-kvm-device.c we need // the number of interrupts our GIC will support to be: // * bigger than 32 // * less than 1023 and // * a multiple of 32. // The first 32 SPIs are reserved, but KVM already shifts the gsi we // pass, so we go from 0 to 95 for legacy gsis ("irq") and the remaining // we use for MSI. /// Offset of first SPI in the GIC pub const SPI_START: u32 = 32; /// Last possible SPI in the GIC (128 total SPIs) pub const SPI_END: u32 = 127; /// First usable GSI id on aarch64 (corresponds to SPI #32). pub const GSI_LEGACY_START: u32 = 0; /// There are 128 SPIs available, but the first 32 are reserved pub const GSI_LEGACY_NUM: u32 = SPI_END - SPI_START + 1; /// Last available GSI pub const GSI_LEGACY_END: u32 = GSI_LEGACY_START + GSI_LEGACY_NUM - 1; /// First GSI used by MSI after legacy GSI pub const GSI_MSI_START: u32 = GSI_LEGACY_END + 1; /// The highest available GSI in KVM (KVM_MAX_IRQ_ROUTES=4096) pub const GSI_MSI_END: u32 = 4095; /// Number of GSI available for MSI. pub const GSI_MSI_NUM: u32 = GSI_MSI_END - GSI_MSI_START + 1; /// The start of the memory area reserved for MMIO 32-bit accesses. /// Below this address will reside the GIC, above this address will reside the MMIO devices. pub const MMIO32_MEM_START: u64 = 1 << 30; // 1GiB /// The size of the memory area reserved for MMIO 32-bit accesses (1GiB). pub const MMIO32_MEM_SIZE: u64 = DRAM_MEM_START - MMIO32_MEM_START; // The rest of the MMIO address space (256 MiB) we dedicate to PCIe for memory-mapped access to // configuration. /// Size of MMIO region for PCIe configuration accesses. pub const PCI_MMCONFIG_SIZE: u64 = 256 << 20; /// Start of MMIO region for PCIe configuration accesses. pub const PCI_MMCONFIG_START: u64 = DRAM_MEM_START - PCI_MMCONFIG_SIZE; /// MMIO space per PCIe segment pub const PCI_MMIO_CONFIG_SIZE_PER_SEGMENT: u64 = 4096 * 256; // We reserve 768 MiB for devices at the beginning of the MMIO region. This includes space both for // pure MMIO and PCIe devices. /// Memory region start for boot device. pub const BOOT_DEVICE_MEM_START: u64 = MMIO32_MEM_START; /// Memory region start for RTC device. pub const RTC_MEM_START: u64 = BOOT_DEVICE_MEM_START + MMIO_LEN; /// Memory region start for Serial device. pub const SERIAL_MEM_START: u64 = RTC_MEM_START + MMIO_LEN; /// Beginning of memory region for device MMIO 32-bit accesses pub const MEM_32BIT_DEVICES_START: u64 = SERIAL_MEM_START + MMIO_LEN; /// Size of memory region for device MMIO 32-bit accesses pub const MEM_32BIT_DEVICES_SIZE: u64 = PCI_MMCONFIG_START - MEM_32BIT_DEVICES_START; // 64-bits region for MMIO accesses /// The start of the memory area reserved for MMIO 64-bit accesses. pub const MMIO64_MEM_START: u64 = 256 << 30; /// The size of the memory area reserved for MMIO 64-bit accesses. pub const MMIO64_MEM_SIZE: u64 = 256 << 30; // At the moment, all of this region goes to devices /// Beginning of memory region for device MMIO 64-bit accesses pub const MEM_64BIT_DEVICES_START: u64 = MMIO64_MEM_START; /// Size of memory region for device MMIO 32-bit accesses pub const MEM_64BIT_DEVICES_SIZE: u64 = MMIO64_MEM_SIZE; /// First address past the 64-bit MMIO gap pub const FIRST_ADDR_PAST_64BITS_MMIO: u64 = MMIO64_MEM_START + MMIO64_MEM_SIZE; /// Size of the memory past 64-bit MMIO gap pub const PAST_64BITS_MMIO_SIZE: u64 = 512 << 30; ================================================ FILE: src/vmm/src/arch/aarch64/mod.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub(crate) mod cache_info; mod fdt; /// Module for the global interrupt controller configuration. pub mod gic; /// Architecture specific KVM-related code pub mod kvm; /// Layout for this aarch64 system. pub mod layout; /// Logic for configuring aarch64 registers. pub mod regs; /// Architecture specific vCPU code pub mod vcpu; /// Architecture specific VM state code pub mod vm; use std::cmp::min; use std::fmt::Debug; use std::fs::File; use linux_loader::loader::pe::PE as Loader; use linux_loader::loader::{Cmdline, KernelLoader}; use vm_memory::{GuestMemoryError, GuestMemoryRegion}; use crate::arch::{BootProtocol, EntryPoint, arch_memory_regions_with_gap}; use crate::cpu_config::aarch64::{CpuConfiguration, CpuConfigurationError}; use crate::cpu_config::templates::CustomCpuTemplate; use crate::initrd::InitrdConfig; use crate::utils::{align_up, u64_to_usize, usize_to_u64}; use crate::vmm_config::machine_config::MachineConfig; use crate::vstate::memory::{ Address, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap, GuestRegionType, }; use crate::vstate::vcpu::KvmVcpuError; use crate::{DeviceManager, Kvm, Vcpu, VcpuConfig, Vm, logger}; /// Errors thrown while configuring aarch64 system. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum ConfigurationError { /// Failed to create a Flattened Device Tree for this aarch64 microVM: {0} SetupFDT(#[from] fdt::FdtError), /// Failed to write to guest memory. MemoryError(#[from] GuestMemoryError), /// Cannot copy kernel file fd KernelFile, /// Cannot load kernel due to invalid memory configuration or invalid kernel image: {0} KernelLoader(#[from] linux_loader::loader::Error), /// Error creating vcpu configuration: {0} VcpuConfig(#[from] CpuConfigurationError), /// Error configuring the vcpu: {0} VcpuConfigure(#[from] KvmVcpuError), } /// Returns a Vec of the valid memory addresses for aarch64. /// See [`layout`](layout) module for a drawing of the specific memory model for this platform. pub fn arch_memory_regions(size: usize) -> Vec<(GuestAddress, usize)> { assert!(size > 0, "Attempt to allocate guest memory of length 0"); let dram_size = min(size, layout::DRAM_MEM_MAX_SIZE); if dram_size != size { logger::warn!( "Requested memory size {} exceeds architectural maximum (1022GiB). Size has been \ truncated to {}", size, dram_size ); } let mut regions = vec![]; if let Some((offset, remaining)) = arch_memory_regions_with_gap( &mut regions, u64_to_usize(layout::DRAM_MEM_START), dram_size, u64_to_usize(layout::MMIO64_MEM_START), u64_to_usize(layout::MMIO64_MEM_SIZE), ) { regions.push((GuestAddress(offset as u64), remaining)); } regions } /// Configures the system for booting Linux. #[allow(clippy::too_many_arguments)] pub fn configure_system_for_boot( kvm: &Kvm, vm: &Vm, device_manager: &mut DeviceManager, vcpus: &mut [Vcpu], machine_config: &MachineConfig, cpu_template: &CustomCpuTemplate, entry_point: EntryPoint, initrd: &Option, boot_cmdline: Cmdline, ) -> Result<(), ConfigurationError> { // Construct the base CpuConfiguration to apply CPU template onto. let cpu_config = CpuConfiguration::new(cpu_template, vcpus)?; // Apply CPU template to the base CpuConfiguration. let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template); let vcpu_config = VcpuConfig { vcpu_count: machine_config.vcpu_count, smt: machine_config.smt, cpu_config, }; let optional_capabilities = kvm.optional_capabilities(); // Configure vCPUs with normalizing and setting the generated CPU configuration. for vcpu in vcpus.iter_mut() { vcpu.kvm_vcpu.configure( vm.guest_memory(), entry_point, &vcpu_config, &optional_capabilities, )?; } let vcpu_mpidr = vcpus .iter_mut() .map(|cpu| cpu.kvm_vcpu.get_mpidr()) .collect::, _>>() .map_err(KvmVcpuError::ConfigureRegisters)?; let cmdline = boot_cmdline .as_cstring() .expect("Cannot create cstring from cmdline string"); let fdt = fdt::create_fdt( vm.guest_memory(), vcpu_mpidr, cmdline, device_manager, vm.get_irqchip(), initrd, )?; let fdt_address = GuestAddress(get_fdt_addr(vm.guest_memory())); vm.guest_memory().write_slice(fdt.as_slice(), fdt_address)?; Ok(()) } /// Returns the memory address where the kernel could be loaded. pub fn get_kernel_start() -> u64 { layout::SYSTEM_MEM_START + layout::SYSTEM_MEM_SIZE } /// Returns the memory address where the initrd could be loaded. pub fn initrd_load_addr(guest_mem: &GuestMemoryMmap, initrd_size: usize) -> Option { let rounded_size = align_up( usize_to_u64(initrd_size), usize_to_u64(super::GUEST_PAGE_SIZE), ); GuestAddress(get_fdt_addr(guest_mem)) .checked_sub(rounded_size) .filter(|&addr| guest_mem.address_in_range(addr)) .map(|addr| addr.raw_value()) } // Auxiliary function to get the address where the device tree blob is loaded. fn get_fdt_addr(mem: &GuestMemoryMmap) -> u64 { // Find the first (and only) DRAM region. let dram_region = mem .iter() .find(|region| region.region_type == GuestRegionType::Dram) .unwrap(); // If the memory allocated is smaller than the size allocated for the FDT, // we return the start of the DRAM so that // we allow the code to try and load the FDT. dram_region .last_addr() .checked_sub(layout::FDT_MAX_SIZE as u64 - 1) .filter(|&addr| mem.address_in_range(addr)) .map(|addr| addr.raw_value()) .unwrap_or(layout::DRAM_MEM_START) } /// Load linux kernel into guest memory. pub fn load_kernel( kernel: &File, guest_memory: &GuestMemoryMmap, ) -> Result { // Need to clone the File because reading from it // mutates it. let mut kernel_file = kernel .try_clone() .map_err(|_| ConfigurationError::KernelFile)?; let entry_addr = Loader::load( guest_memory, Some(GuestAddress(get_kernel_start())), &mut kernel_file, None, )?; Ok(EntryPoint { entry_addr: entry_addr.kernel_load, protocol: BootProtocol::LinuxBoot, }) } #[cfg(kani)] mod verification { use crate::arch::aarch64::layout::{ DRAM_MEM_MAX_SIZE, DRAM_MEM_START, FIRST_ADDR_PAST_64BITS_MMIO, MMIO64_MEM_START, }; use crate::arch::arch_memory_regions; #[kani::proof] #[kani::unwind(3)] fn verify_arch_memory_regions() { let len: usize = kani::any::(); kani::assume(len > 0); let regions = arch_memory_regions(len); for region in ®ions { println!( "region: [{:x}:{:x})", region.0.0, region.0.0 + region.1 as u64 ); } // On Arm we have one MMIO gap that might fall within addressable ranges, // so we can get either 1 or 2 regions. assert!(regions.len() >= 1); assert!(regions.len() <= 2); // The total length of all regions cannot exceed DRAM_MEM_MAX_SIZE let actual_len = regions.iter().map(|&(_, len)| len).sum::(); assert!(actual_len <= DRAM_MEM_MAX_SIZE); // The total length is smaller or equal to the length we asked assert!(actual_len <= len); // If it's smaller, it's because we asked more than the the maximum possible. if (actual_len) < len { assert!(len > DRAM_MEM_MAX_SIZE); } // No region overlaps the 64-bit MMIO gap assert!( regions .iter() .all(|&(start, len)| start.0 >= FIRST_ADDR_PAST_64BITS_MMIO || start.0 + len as u64 <= MMIO64_MEM_START) ); // All regions start after our DRAM_MEM_START assert!(regions.iter().all(|&(start, _)| start.0 >= DRAM_MEM_START)); // All regions have non-zero length assert!(regions.iter().all(|&(_, len)| len > 0)); // If there's two regions, they perfectly snuggle up the 64bit MMIO gap if regions.len() == 2 { kani::cover!(); // The very first address should be DRAM_MEM_START assert_eq!(regions[0].0.0, DRAM_MEM_START); // The first region ends at the beginning of the 64 bits gap. assert_eq!(regions[0].0.0 + regions[0].1 as u64, MMIO64_MEM_START); // The second region starts exactly after the 64 bits gap. assert_eq!(regions[1].0.0, FIRST_ADDR_PAST_64BITS_MMIO); } } } #[cfg(test)] mod tests { use super::*; use crate::arch::aarch64::layout::{ DRAM_MEM_MAX_SIZE, DRAM_MEM_START, FDT_MAX_SIZE, FIRST_ADDR_PAST_64BITS_MMIO, MMIO64_MEM_START, }; use crate::test_utils::arch_mem; #[test] fn test_regions_lt_1024gb() { let regions = arch_memory_regions(1usize << 29); assert_eq!(1, regions.len()); assert_eq!(GuestAddress(DRAM_MEM_START), regions[0].0); assert_eq!(1usize << 29, regions[0].1); } #[test] fn test_regions_gt_1024gb() { let regions = arch_memory_regions(1usize << 41); assert_eq!(2, regions.len()); assert_eq!(GuestAddress(DRAM_MEM_START), regions[0].0); assert_eq!(MMIO64_MEM_START - DRAM_MEM_START, regions[0].1 as u64); assert_eq!(GuestAddress(FIRST_ADDR_PAST_64BITS_MMIO), regions[1].0); assert_eq!( DRAM_MEM_MAX_SIZE as u64 - MMIO64_MEM_START + DRAM_MEM_START, regions[1].1 as u64 ); } #[test] fn test_get_fdt_addr() { let mem = arch_mem(FDT_MAX_SIZE - 0x1000); assert_eq!(get_fdt_addr(&mem), DRAM_MEM_START); let mem = arch_mem(FDT_MAX_SIZE); assert_eq!(get_fdt_addr(&mem), DRAM_MEM_START); let mem = arch_mem(FDT_MAX_SIZE + 0x1000); assert_eq!(get_fdt_addr(&mem), 0x1000 + DRAM_MEM_START); } } ================================================ FILE: src/vmm/src/arch/aarch64/regs.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::fmt::Write; use std::mem::offset_of; use kvm_bindings::*; use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[allow(non_upper_case_globals)] /// PSR (Processor State Register) bits. /// Taken from arch/arm64/include/uapi/asm/ptrace.h. const PSR_MODE_EL1h: u64 = 0x0000_0005; const PSR_F_BIT: u64 = 0x0000_0040; const PSR_I_BIT: u64 = 0x0000_0080; const PSR_A_BIT: u64 = 0x0000_0100; const PSR_D_BIT: u64 = 0x0000_0200; /// Taken from arch/arm64/kvm/inject_fault.c. pub const PSTATE_FAULT_BITS_64: u64 = PSR_MODE_EL1h | PSR_A_BIT | PSR_F_BIT | PSR_I_BIT | PSR_D_BIT; /// Gets a core id. macro_rules! arm64_core_reg_id { ($size: ident, $offset: expr) => { // The core registers of an arm64 machine are represented // in kernel by the `kvm_regs` structure. This structure is a // mix of 32, 64 and 128 bit fields: // struct kvm_regs { // struct user_pt_regs regs; // // __u64 sp_el1; // __u64 elr_el1; // // __u64 spsr[KVM_NR_SPSR]; // // struct user_fpsimd_state fp_regs; // }; // struct user_pt_regs { // __u64 regs[31]; // __u64 sp; // __u64 pc; // __u64 pstate; // }; // The id of a core register can be obtained like this: // offset = id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE). Thus, // id = KVM_REG_ARM64 | KVM_REG_SIZE_U64/KVM_REG_SIZE_U32/KVM_REG_SIZE_U128 | // KVM_REG_ARM_CORE | offset KVM_REG_ARM64 as u64 | KVM_REG_ARM_CORE as u64 | $size | ($offset / std::mem::size_of::()) as u64 }; } pub(crate) use arm64_core_reg_id; /// This macro computes the ID of a specific ARM64 system register similar to how /// the kernel C macro does. /// https://elixir.bootlin.com/linux/v4.20.17/source/arch/arm64/include/uapi/asm/kvm.h#L203 macro_rules! arm64_sys_reg { ($name: tt, $op0: tt, $op1: tt, $crn: tt, $crm: tt, $op2: tt) => { /// System register constant pub const $name: u64 = KVM_REG_ARM64 as u64 | KVM_REG_SIZE_U64 as u64 | KVM_REG_ARM64_SYSREG as u64 | ((($op0 as u64) << KVM_REG_ARM64_SYSREG_OP0_SHIFT) & KVM_REG_ARM64_SYSREG_OP0_MASK as u64) | ((($op1 as u64) << KVM_REG_ARM64_SYSREG_OP1_SHIFT) & KVM_REG_ARM64_SYSREG_OP1_MASK as u64) | ((($crn as u64) << KVM_REG_ARM64_SYSREG_CRN_SHIFT) & KVM_REG_ARM64_SYSREG_CRN_MASK as u64) | ((($crm as u64) << KVM_REG_ARM64_SYSREG_CRM_SHIFT) & KVM_REG_ARM64_SYSREG_CRM_MASK as u64) | ((($op2 as u64) << KVM_REG_ARM64_SYSREG_OP2_SHIFT) & KVM_REG_ARM64_SYSREG_OP2_MASK as u64); }; } // Constants imported from the Linux kernel: // https://elixir.bootlin.com/linux/v4.20.17/source/arch/arm64/include/asm/sysreg.h#L135 arm64_sys_reg!(MPIDR_EL1, 3, 0, 0, 0, 5); arm64_sys_reg!(MIDR_EL1, 3, 0, 0, 0, 0); // ID registers that represent cpu capabilities. // Needed for static cpu templates. arm64_sys_reg!(ID_AA64PFR0_EL1, 3, 0, 0, 4, 0); arm64_sys_reg!(ID_AA64ISAR0_EL1, 3, 0, 0, 6, 0); arm64_sys_reg!(ID_AA64ISAR1_EL1, 3, 0, 0, 6, 1); arm64_sys_reg!(ID_AA64MMFR2_EL1, 3, 0, 0, 7, 2); // Counter-timer Virtual Timer CompareValue register. // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/CNTV-CVAL-EL0--Counter-timer-Virtual-Timer-CompareValue-register // https://elixir.bootlin.com/linux/v6.8/source/arch/arm64/include/asm/sysreg.h#L468 arm64_sys_reg!(SYS_CNTV_CVAL_EL0, 3, 3, 14, 3, 2); // Counter-timer Physical Count Register // https://developer.arm.com/documentation/ddi0601/2023-12/AArch64-Registers/CNTPCT-EL0--Counter-timer-Physical-Count-Register // https://elixir.bootlin.com/linux/v6.8/source/arch/arm64/include/asm/sysreg.h#L459 arm64_sys_reg!(SYS_CNTPCT_EL0, 3, 3, 14, 0, 1); // Physical Timer EL0 count Register // The id of this register is same as SYS_CNTPCT_EL0, but KVM defines it // separately, so we do as well. // https://elixir.bootlin.com/linux/v6.12.6/source/arch/arm64/include/uapi/asm/kvm.h#L259 arm64_sys_reg!(KVM_REG_ARM_PTIMER_CNT, 3, 3, 14, 0, 1); // Translation Table Base Register // https://developer.arm.com/documentation/ddi0595/2021-03/AArch64-Registers/TTBR1-EL1--Translation-Table-Base-Register-1--EL1- arm64_sys_reg!(TTBR1_EL1, 3, 0, 2, 0, 1); // Translation Control Register // https://developer.arm.com/documentation/ddi0601/2024-09/AArch64-Registers/TCR-EL1--Translation-Control-Register--EL1- arm64_sys_reg!(TCR_EL1, 3, 0, 2, 0, 2); // AArch64 Memory Model Feature Register // https://developer.arm.com/documentation/100798/0400/register-descriptions/aarch64-system-registers/id-aa64mmfr0-el1--aarch64-memory-model-feature-register-0--el1 arm64_sys_reg!(ID_AA64MMFR0_EL1, 3, 0, 0, 7, 0); /// Vector lengths pseudo-register /// TODO: this can be removed after https://github.com/rust-vmm/kvm-bindings/pull/89 /// is merged and new version is used in Firecracker. pub const KVM_REG_ARM64_SVE_VLS: u64 = KVM_REG_ARM64 | KVM_REG_ARM64_SVE as u64 | KVM_REG_SIZE_U512 | 0xffff; /// Program Counter /// The offset value (0x100 = 32 * 8) is calcuated as follows: /// - `kvm_regs` includes `regs` field of type `user_pt_regs` at the beginning (i.e., at offset 0). /// - `pc` follows `regs[31]` and `sp` within `user_pt_regs` and they are 8 bytes each (i.e. the /// offset is (31 + 1) * 8 = 256). /// /// https://github.com/torvalds/linux/blob/master/Documentation/virt/kvm/api.rst#L2578 /// > 0x6030 0000 0010 0040 PC 64 regs.pc pub const PC: u64 = { let kreg_off = offset_of!(kvm_regs, regs); let pc_off = offset_of!(user_pt_regs, pc); arm64_core_reg_id!(KVM_REG_SIZE_U64, kreg_off + pc_off) }; /// Different aarch64 registers sizes #[derive(Debug)] pub enum RegSize { /// 8 bit register U8, /// 16 bit register U16, /// 32 bit register U32, /// 64 bit register U64, /// 128 bit register U128, /// 256 bit register U256, /// 512 bit register U512, /// 1024 bit register U1024, /// 2048 bit register U2048, } impl RegSize { /// Size of u8 register in bytes pub const U8_SIZE: usize = 1; /// Size of u16 register in bytes pub const U16_SIZE: usize = 2; /// Size of u32 register in bytes pub const U32_SIZE: usize = 4; /// Size of u64 register in bytes pub const U64_SIZE: usize = 8; /// Size of u128 register in bytes pub const U128_SIZE: usize = 16; /// Size of u256 register in bytes pub const U256_SIZE: usize = 32; /// Size of u512 register in bytes pub const U512_SIZE: usize = 64; /// Size of u1024 register in bytes pub const U1024_SIZE: usize = 128; /// Size of u2048 register in bytes pub const U2048_SIZE: usize = 256; } impl From for RegSize { fn from(value: usize) -> Self { match value { RegSize::U8_SIZE => RegSize::U8, RegSize::U16_SIZE => RegSize::U16, RegSize::U32_SIZE => RegSize::U32, RegSize::U64_SIZE => RegSize::U64, RegSize::U128_SIZE => RegSize::U128, RegSize::U256_SIZE => RegSize::U256, RegSize::U512_SIZE => RegSize::U512, RegSize::U1024_SIZE => RegSize::U1024, RegSize::U2048_SIZE => RegSize::U2048, _ => unreachable!("Registers bigger then 2048 bits are not supported"), } } } impl From for usize { fn from(value: RegSize) -> Self { match value { RegSize::U8 => RegSize::U8_SIZE, RegSize::U16 => RegSize::U16_SIZE, RegSize::U32 => RegSize::U32_SIZE, RegSize::U64 => RegSize::U64_SIZE, RegSize::U128 => RegSize::U128_SIZE, RegSize::U256 => RegSize::U256_SIZE, RegSize::U512 => RegSize::U512_SIZE, RegSize::U1024 => RegSize::U1024_SIZE, RegSize::U2048 => RegSize::U2048_SIZE, } } } /// Returns register size in bytes pub fn reg_size(reg_id: u64) -> usize { 2_usize.pow(((reg_id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT) as u32) } /// Storage for aarch64 registers with different sizes. #[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct Aarch64RegisterVec { ids: Vec, data: Vec, } impl Aarch64RegisterVec { /// Returns the number of elements in the vector. pub fn len(&self) -> usize { self.ids.len() } /// Returns true if the vector contains no elements. pub fn is_empty(&self) -> bool { self.ids.is_empty() } /// Appends a register to the vector, copying register data. pub fn push(&mut self, reg: Aarch64RegisterRef<'_>) { self.ids.push(reg.id); self.data.extend_from_slice(reg.data); } /// Returns an iterator over stored registers. pub fn iter(&self) -> impl Iterator> { Aarch64RegisterVecIterator { index: 0, offset: 0, ids: &self.ids, data: &self.data, } } /// Returns an iterator over stored registers that allows register modifications. pub fn iter_mut(&mut self) -> impl Iterator> { Aarch64RegisterVecIteratorMut { index: 0, offset: 0, ids: &self.ids, data: &mut self.data, } } /// Extract the Manufacturer ID from a VCPU state's registers. /// The ID is found between bits 24-31 of MIDR_EL1 register. pub fn manifacturer_id(&self) -> Option { self.iter() .find(|reg| reg.id == MIDR_EL1) .map(|reg| ((reg.value::() >> 24) & 0xFF) as u32) } } impl Serialize for Aarch64RegisterVec { fn serialize(&self, serializer: S) -> Result where S: Serializer, { Serialize::serialize(&(&self.ids, &self.data), serializer) } } impl<'de> Deserialize<'de> for Aarch64RegisterVec { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let (ids, data): (Vec, Vec) = Deserialize::deserialize(deserializer)?; let mut total_size: usize = 0; for id in ids.iter() { let reg_size = reg_size(*id); if reg_size > RegSize::U2048_SIZE { return Err(serde::de::Error::custom( "Failed to deserialize aarch64 registers. Registers bigger than 2048 bits are \ not supported", )); } total_size += reg_size; } if total_size != data.len() { return Err(serde::de::Error::custom( "Failed to deserialize aarch64 registers. Sum of register sizes is not equal to \ registers data length", )); } Ok(Aarch64RegisterVec { ids, data }) } } /// Iterator over `Aarch64RegisterVec`. #[derive(Debug)] pub struct Aarch64RegisterVecIterator<'a> { index: usize, offset: usize, ids: &'a [u64], data: &'a [u8], } impl<'a> Iterator for Aarch64RegisterVecIterator<'a> { type Item = Aarch64RegisterRef<'a>; fn next(&mut self) -> Option { if self.index < self.ids.len() { let id = self.ids[self.index]; let reg_size = reg_size(id); let reg_ref = Aarch64RegisterRef { id, data: &self.data[self.offset..self.offset + reg_size], }; self.index += 1; self.offset += reg_size; Some(reg_ref) } else { None } } } /// Iterator over `Aarch64RegisterVec` with mutable values. #[derive(Debug)] pub struct Aarch64RegisterVecIteratorMut<'a> { index: usize, offset: usize, ids: &'a [u64], data: &'a mut [u8], } impl<'a> Iterator for Aarch64RegisterVecIteratorMut<'a> { type Item = Aarch64RegisterRefMut<'a>; fn next(&mut self) -> Option { if self.index < self.ids.len() { let id = self.ids[self.index]; let reg_size = reg_size(id); let data = std::mem::take(&mut self.data); let (head, tail) = data.split_at_mut(reg_size); self.index += 1; self.offset += reg_size; self.data = tail; Some(Aarch64RegisterRefMut { id, data: head }) } else { None } } } /// Reference to the aarch64 register. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct Aarch64RegisterRef<'a> { /// ID of the register pub id: u64, data: &'a [u8], } impl<'a> Aarch64RegisterRef<'a> { /// Creates new register reference with provided id and data. /// Register size in `id` should be equal to the /// length of the slice. Otherwise this method /// will panic. pub fn new(id: u64, data: &'a [u8]) -> Self { assert_eq!( reg_size(id), data.len(), "Attempt to create a register reference with incompatible id and data length" ); Self { id, data } } /// Returns register size in bytes pub fn size(&self) -> RegSize { reg_size(self.id).into() } /// Returns a register value. /// Type `T` must be of the same length as an /// underlying data slice. Otherwise this method /// will panic. pub fn value, const N: usize>(&self) -> T { T::from_slice(self.data) } /// Returns a string with hex formatted value of the register. pub fn value_str(&self) -> String { let hex = self.data.iter().rev().fold(String::new(), |mut acc, byte| { write!(&mut acc, "{:02x}", byte).unwrap(); acc }); format!("0x{hex}") } /// Returns register data as a byte slice pub fn as_slice(&self) -> &[u8] { self.data } } /// Reference to the aarch64 register. #[derive(Debug, PartialEq, Eq)] pub struct Aarch64RegisterRefMut<'a> { /// ID of the register pub id: u64, data: &'a mut [u8], } impl<'a> Aarch64RegisterRefMut<'a> { /// Creates new register reference with provided id and data. /// Register size in `id` should be equal to the /// length of the slice. Otherwise this method /// will panic. pub fn new(id: u64, data: &'a mut [u8]) -> Self { assert_eq!( reg_size(id), data.len(), "Attempt to create a register reference with incompatible id and data length" ); Self { id, data } } /// Returns register size in bytes pub fn size(&self) -> RegSize { reg_size(self.id).into() } /// Returns a register value. /// Type `T` must be of the same length as an /// underlying data slice. Otherwise this method /// will panic. pub fn value, const N: usize>(&self) -> T { T::from_slice(self.data) } /// Sets the register value. /// Type `T` must be of the same length as an /// underlying data slice. Otherwise this method /// will panic. pub fn set_value, const N: usize>(&mut self, value: T) { self.data.copy_from_slice(&value.to_bytes()) } } /// Trait for data types that can represent aarch64 /// register data. pub trait Aarch64RegisterData { /// Create data type from slice fn from_slice(slice: &[u8]) -> Self; /// Convert data type to array of bytes fn to_bytes(&self) -> [u8; N]; } macro_rules! reg_data { ($t:ty, $bytes: expr) => { impl Aarch64RegisterData<$bytes> for $t { fn from_slice(slice: &[u8]) -> Self { let mut bytes = [0_u8; $bytes]; bytes.copy_from_slice(slice); <$t>::from_le_bytes(bytes) } fn to_bytes(&self) -> [u8; $bytes] { self.to_le_bytes() } } }; } macro_rules! reg_data_array { ($t:ty, $bytes: expr) => { impl Aarch64RegisterData<$bytes> for $t { fn from_slice(slice: &[u8]) -> Self { let mut bytes = [0_u8; $bytes]; bytes.copy_from_slice(slice); bytes } fn to_bytes(&self) -> [u8; $bytes] { *self } } }; } reg_data!(u8, 1); reg_data!(u16, 2); reg_data!(u32, 4); reg_data!(u64, 8); reg_data!(u128, 16); // 256 reg_data_array!([u8; 32], 32); // 512 reg_data_array!([u8; 64], 64); // 1024 reg_data_array!([u8; 128], 128); // 2048 reg_data_array!([u8; 256], 256); #[cfg(test)] mod tests { use super::*; #[test] fn test_reg_size() { assert_eq!(reg_size(KVM_REG_SIZE_U32), 4); // ID_AA64PFR0_EL1 is 64 bit register assert_eq!(reg_size(ID_AA64PFR0_EL1), 8); } #[test] fn test_aarch64_register_vec_serde() { let mut v = Aarch64RegisterVec::default(); let reg1_bytes = 1_u8.to_le_bytes(); let reg1 = Aarch64RegisterRef::new(u64::from(KVM_REG_SIZE_U8), ®1_bytes); let reg2_bytes = 2_u16.to_le_bytes(); let reg2 = Aarch64RegisterRef::new(KVM_REG_SIZE_U16, ®2_bytes); v.push(reg1); v.push(reg2); let serialized_data = bitcode::serialize(&v).unwrap(); let restored: Aarch64RegisterVec = bitcode::deserialize(&serialized_data).unwrap(); for (old, new) in v.iter().zip(restored.iter()) { assert_eq!(old, new); } } #[test] fn test_aarch64_register_vec_serde_invalid_regs_size_sum() { let mut v = Aarch64RegisterVec::default(); let reg1_bytes = 1_u8.to_le_bytes(); // Creating invalid register with incompatible ID and reg size. let reg1 = Aarch64RegisterRef { id: KVM_REG_SIZE_U16, data: ®1_bytes, }; let reg2_bytes = 2_u16.to_le_bytes(); let reg2 = Aarch64RegisterRef::new(KVM_REG_SIZE_U16, ®2_bytes); v.push(reg1); v.push(reg2); let serialized_data = bitcode::serialize(&v).unwrap(); // Total size of registers according IDs are 16 + 16 = 32, // but actual data size is 8 + 16 = 24. bitcode::deserialize::(&serialized_data).unwrap_err(); } #[test] fn test_aarch64_register_vec_serde_invalid_reg_size() { let mut v = Aarch64RegisterVec::default(); let reg_bytes = [0_u8; 512]; // Creating invalid register with incompatible size. // 512 bytes for 4096 bit wide register. let reg = Aarch64RegisterRef { id: 0x0090000000000000, data: ®_bytes, }; v.push(reg); let serialized_data = bitcode::serialize(&v).unwrap(); // 4096 bit wide registers are not supported. bitcode::deserialize::(&serialized_data).unwrap_err(); } #[test] fn test_aarch64_register_vec() { let mut v = Aarch64RegisterVec::default(); let reg1_bytes = 1_u8.to_le_bytes(); let reg1 = Aarch64RegisterRef::new(u64::from(KVM_REG_SIZE_U8), ®1_bytes); let reg2_bytes = 2_u16.to_le_bytes(); let reg2 = Aarch64RegisterRef::new(KVM_REG_SIZE_U16, ®2_bytes); let reg3_bytes = 3_u32.to_le_bytes(); let reg3 = Aarch64RegisterRef::new(KVM_REG_SIZE_U32, ®3_bytes); let reg4_bytes = 4_u64.to_le_bytes(); let reg4 = Aarch64RegisterRef::new(KVM_REG_SIZE_U64, ®4_bytes); let reg5_bytes = 5_u128.to_le_bytes(); let reg5 = Aarch64RegisterRef::new(KVM_REG_SIZE_U128, ®5_bytes); let reg6 = Aarch64RegisterRef::new(KVM_REG_SIZE_U256, &[6; 32]); let reg7 = Aarch64RegisterRef::new(KVM_REG_SIZE_U512, &[7; 64]); let reg8 = Aarch64RegisterRef::new(KVM_REG_SIZE_U1024, &[8; 128]); let reg9 = Aarch64RegisterRef::new(KVM_REG_SIZE_U2048, &[9; 256]); v.push(reg1); v.push(reg2); v.push(reg3); v.push(reg4); v.push(reg5); v.push(reg6); v.push(reg7); v.push(reg8); v.push(reg9); assert!(!v.is_empty()); assert_eq!(v.len(), 9); // Test iter { macro_rules! test_iter { ($iter:expr, $size: expr, $t:ty, $bytes:expr, $value:expr) => { let reg_ref = $iter.next().unwrap(); assert_eq!(reg_ref.id, u64::from($size)); assert_eq!(reg_ref.value::<$t, $bytes>(), $value); }; } let mut regs_iter = v.iter(); test_iter!(regs_iter, KVM_REG_SIZE_U8, u8, 1, 1); test_iter!(regs_iter, KVM_REG_SIZE_U16, u16, 2, 2); test_iter!(regs_iter, KVM_REG_SIZE_U32, u32, 4, 3); test_iter!(regs_iter, KVM_REG_SIZE_U64, u64, 8, 4); test_iter!(regs_iter, KVM_REG_SIZE_U128, u128, 16, 5); test_iter!(regs_iter, KVM_REG_SIZE_U256, [u8; 32], 32, [6; 32]); test_iter!(regs_iter, KVM_REG_SIZE_U512, [u8; 64], 64, [7; 64]); test_iter!(regs_iter, KVM_REG_SIZE_U1024, [u8; 128], 128, [8; 128]); test_iter!(regs_iter, KVM_REG_SIZE_U2048, [u8; 256], 256, [9; 256]); assert!(regs_iter.next().is_none()); } // Test iter mut { { macro_rules! update_value { ($iter:expr, $t:ty, $bytes:expr) => { let mut reg_ref = $iter.next().unwrap(); reg_ref.set_value(reg_ref.value::<$t, $bytes>() - 1); }; } let mut regs_iter_mut = v.iter_mut(); update_value!(regs_iter_mut, u8, 1); update_value!(regs_iter_mut, u16, 2); update_value!(regs_iter_mut, u32, 4); update_value!(regs_iter_mut, u64, 8); update_value!(regs_iter_mut, u128, 16); } { macro_rules! test_iter { ($iter:expr, $t:ty, $bytes:expr, $value:expr) => { let reg_ref = $iter.next().unwrap(); assert_eq!(reg_ref.value::<$t, $bytes>(), $value); }; } let mut regs_iter = v.iter(); test_iter!(regs_iter, u8, 1, 0); test_iter!(regs_iter, u16, 2, 1); test_iter!(regs_iter, u32, 4, 2); test_iter!(regs_iter, u64, 8, 3); test_iter!(regs_iter, u128, 16, 4); } } } #[test] fn test_reg_ref() { let bytes = 69_u64.to_le_bytes(); let reg_ref = Aarch64RegisterRef::new(KVM_REG_SIZE_U64, &bytes); assert_eq!(usize::from(reg_ref.size()), 8); assert_eq!(reg_ref.value::(), 69); } #[test] fn test_reg_ref_value_str() { let bytes = 0x10_u8.to_le_bytes(); let reg_ref = Aarch64RegisterRef::new(KVM_REG_SIZE_U8 as u64, &bytes); assert_eq!(reg_ref.value_str(), "0x10"); let bytes = 0x1020_u16.to_le_bytes(); let reg_ref = Aarch64RegisterRef::new(KVM_REG_SIZE_U16, &bytes); assert_eq!(reg_ref.value_str(), "0x1020"); let bytes = 0x10203040_u32.to_le_bytes(); let reg_ref = Aarch64RegisterRef::new(KVM_REG_SIZE_U32, &bytes); assert_eq!(reg_ref.value_str(), "0x10203040"); let bytes = 0x1020304050607080_u64.to_le_bytes(); let reg_ref = Aarch64RegisterRef::new(KVM_REG_SIZE_U64, &bytes); assert_eq!(reg_ref.value_str(), "0x1020304050607080"); let bytes = [ 0x71, 0x61, 0x51, 0x41, 0x31, 0x21, 0x11, 0x90, 0x80, 0x70, 0x60, 0x50, 0x40, 0x30, 0x20, 0x10, ]; let reg_ref = Aarch64RegisterRef::new(KVM_REG_SIZE_U128, &bytes); assert_eq!(reg_ref.value_str(), "0x10203040506070809011213141516171"); } /// Should panic because ID has different size from a slice length. /// - Size in ID: 128 /// - Length of slice: 1 #[test] #[should_panic] fn test_reg_ref_new_must_panic() { let _ = Aarch64RegisterRef::new(KVM_REG_SIZE_U128, &[0; 1]); } /// Should panic because of incorrect cast to value. /// - Reference contains 64 bit register /// - Casting to 128 bits. #[test] #[should_panic] fn test_reg_ref_value_must_panic() { let bytes = 69_u64.to_le_bytes(); let reg_ref = Aarch64RegisterRef::new(KVM_REG_SIZE_U64, &bytes); assert_eq!(reg_ref.value::(), 69); } #[test] fn test_reg_ref_mut() { let mut bytes = 69_u64.to_le_bytes(); let mut reg_ref = Aarch64RegisterRefMut::new(KVM_REG_SIZE_U64, &mut bytes); assert_eq!(usize::from(reg_ref.size()), 8); assert_eq!(reg_ref.value::(), 69); reg_ref.set_value(reg_ref.value::() + 1); assert_eq!(reg_ref.value::(), 70); } /// Should panic because ID has different size from a slice length. /// - Size in ID: 128 /// - Length of slice: 1 #[test] #[should_panic] fn test_reg_ref_mut_new_must_panic() { let _ = Aarch64RegisterRefMut::new(KVM_REG_SIZE_U128, &mut [0; 1]); } /// Should panic because of incorrect cast to value. /// - Reference contains 64 bit register /// - Casting to 128 bits. #[test] #[should_panic] fn test_reg_ref_mut_must_panic() { let mut bytes = 69_u64.to_le_bytes(); let reg_ref = Aarch64RegisterRefMut::new(KVM_REG_SIZE_U64, &mut bytes); assert_eq!(reg_ref.value::(), 69); } } ================================================ FILE: src/vmm/src/arch/aarch64/vcpu.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::fmt::{Debug, Write}; use std::mem::offset_of; use std::sync::Arc; use kvm_bindings::*; use kvm_ioctls::{VcpuExit, VcpuFd, VmFd}; use serde::{Deserialize, Serialize}; use vm_memory::GuestAddress; use super::get_fdt_addr; use super::regs::*; use crate::arch::EntryPoint; use crate::arch::aarch64::kvm::OptionalCapabilities; use crate::arch::aarch64::regs::{Aarch64RegisterVec, KVM_REG_ARM64_SVE_VLS}; use crate::cpu_config::aarch64::custom_cpu_template::VcpuFeatures; use crate::cpu_config::templates::CpuConfiguration; use crate::logger::{IncMetric, METRICS, error}; use crate::vcpu::{VcpuConfig, VcpuError}; use crate::vstate::bus::Bus; use crate::vstate::memory::{Address, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuEmulation; use crate::vstate::vm::Vm; /// Errors thrown while setting aarch64 registers. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum VcpuArchError { /// Failed to get register {0}: {1} GetOneReg(u64, kvm_ioctls::Error), /// Failed to set register {0:#x} to value {1}: {2} SetOneReg(u64, String, kvm_ioctls::Error), /// Failed to retrieve list of registers: {0} GetRegList(kvm_ioctls::Error), /// Failed to get multiprocessor state: {0} GetMp(kvm_ioctls::Error), /// Failed to set multiprocessor state: {0} SetMp(kvm_ioctls::Error), /// Failed FamStructWrapper operation: {0} Fam(vmm_sys_util::fam::Error), /// Failed to set/get device attributes for vCPU: {0} DeviceAttribute(kvm_ioctls::Error), } /// Extract the Manufacturer ID from the host. /// The ID is found between bits 24-31 of MIDR_EL1 register. pub fn get_manufacturer_id_from_host() -> Option { let midr_el1_path = "/sys/devices/system/cpu/cpu0/regs/identification/midr_el1"; let midr_el1 = std::fs::read_to_string(midr_el1_path).ok()?; let midr_el1_trimmed = midr_el1.trim_end().trim_start_matches("0x"); let manufacturer_id = u32::from_str_radix(midr_el1_trimmed, 16).ok()?; Some(manufacturer_id >> 24) } /// Saves states of registers into `state`. /// /// # Arguments /// /// * `ids` - Slice of registers ids to save. /// * `regs` - Input/Output vector of registers. pub fn get_registers( vcpu_fd: &VcpuFd, ids: &[u64], regs: &mut Aarch64RegisterVec, ) -> Result<(), VcpuArchError> { let mut big_reg = [0_u8; 256]; for id in ids.iter() { let reg_size = vcpu_fd .get_one_reg(*id, &mut big_reg) .map_err(|e| VcpuArchError::GetOneReg(*id, e))?; let reg_ref = Aarch64RegisterRef::new(*id, &big_reg[0..reg_size]); regs.push(reg_ref); } Ok(()) } /// Errors associated with the wrappers over KVM ioctls. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum KvmVcpuError { /// Error configuring the vcpu registers: {0} ConfigureRegisters(VcpuArchError), /// Error creating vcpu: {0} CreateVcpu(kvm_ioctls::Error), /// Failed to dump CPU configuration: {0} DumpCpuConfig(VcpuArchError), /// Error getting the vcpu preferred target: {0} GetPreferredTarget(kvm_ioctls::Error), /// Error initializing the vcpu: {0} Init(kvm_ioctls::Error), /// Error applying template: {0} ApplyCpuTemplate(VcpuArchError), /// Failed to restore the state of the vcpu: {0} RestoreState(VcpuArchError), /// Failed to save the state of the vcpu: {0} SaveState(VcpuArchError), /// Found unsupported KVM_ARM_VCPU_PMU_V3 bit set in vcpu features. UnsupportedPmuV3, } /// Error type for [`KvmVcpu::configure`]. pub type KvmVcpuConfigureError = KvmVcpuError; /// A wrapper around creating and using a kvm aarch64 vcpu. #[derive(Debug)] pub struct KvmVcpu { /// Index of vcpu. pub index: u8, /// KVM vcpu fd. pub fd: VcpuFd, /// Vcpu peripherals, such as buses pub peripherals: Peripherals, kvi: kvm_vcpu_init, /// IPA of steal_time region pub pvtime_ipa: Option, } /// Vcpu peripherals #[derive(Default, Debug)] pub struct Peripherals { /// mmio bus. pub mmio_bus: Option>, } impl KvmVcpu { /// Constructs a new kvm vcpu with arch specific functionality. /// /// # Arguments /// /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. pub fn new(index: u8, vm: &Vm) -> Result { let kvm_vcpu = vm .fd() .create_vcpu(index.into()) .map_err(KvmVcpuError::CreateVcpu)?; let mut kvi = Self::default_kvi(vm.fd())?; // Secondary vcpus must be powered off for boot process. if 0 < index { kvi.features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; } Ok(KvmVcpu { index, fd: kvm_vcpu, peripherals: Default::default(), kvi, pvtime_ipa: None, }) } /// Read the MPIDR - Multiprocessor Affinity Register. pub fn get_mpidr(&self) -> Result { // MPIDR register is 64 bit wide on aarch64 let mut mpidr = [0_u8; 8]; match self.fd.get_one_reg(MPIDR_EL1, &mut mpidr) { Err(err) => Err(VcpuArchError::GetOneReg(MPIDR_EL1, err)), Ok(_) => Ok(u64::from_le_bytes(mpidr)), } } /// Configures an aarch64 specific vcpu for booting Linux. /// /// # Arguments /// /// * `guest_mem` - The guest memory used by this microvm. /// * `kernel_entry_point` - Specifies the boot protocol and offset from `guest_mem` at which /// the kernel starts. /// * `vcpu_config` - The vCPU configuration. pub fn configure( &mut self, guest_mem: &GuestMemoryMmap, kernel_entry_point: EntryPoint, vcpu_config: &VcpuConfig, optional_capabilities: &OptionalCapabilities, ) -> Result<(), KvmVcpuError> { for reg in vcpu_config.cpu_config.regs.iter() { self.fd.set_one_reg(reg.id, reg.as_slice()).map_err(|err| { KvmVcpuError::ApplyCpuTemplate(VcpuArchError::SetOneReg( reg.id, reg.value_str(), err, )) })?; } self.setup_boot_regs( kernel_entry_point.entry_addr.raw_value(), guest_mem, optional_capabilities, ) .map_err(KvmVcpuError::ConfigureRegisters)?; Ok(()) } /// Initializes an aarch64 specific vcpu for booting Linux. /// /// # Arguments /// /// * `vm_fd` - The kvm `VmFd` for this microvm. pub fn init(&mut self, vcpu_features: &[VcpuFeatures]) -> Result<(), KvmVcpuError> { for feature in vcpu_features.iter() { let index = feature.index as usize; self.kvi.features[index] = feature.bitmap.apply(self.kvi.features[index]); } self.init_vcpu()?; self.finalize_vcpu()?; Ok(()) } /// Creates default kvi struct based on vcpu index. pub fn default_kvi(vm_fd: &VmFd) -> Result { let mut kvi = kvm_vcpu_init::default(); // This reads back the kernel's preferred target type. vm_fd .get_preferred_target(&mut kvi) .map_err(KvmVcpuError::GetPreferredTarget)?; // We already checked that the capability is supported. kvi.features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; Ok(kvi) } /// Save the KVM internal state. pub fn save_state(&self) -> Result { let mut state = VcpuState { mp_state: self.get_mpstate().map_err(KvmVcpuError::SaveState)?, ..Default::default() }; self.get_all_registers(&mut state.regs) .map_err(KvmVcpuError::SaveState)?; state.mpidr = self.get_mpidr().map_err(KvmVcpuError::SaveState)?; state.kvi = self.kvi; // We don't save power off state in a snapshot, because // it was only needed during uVM boot process. // When uVM is restored, the kernel has already passed // the boot state and turned secondary vcpus on. state.kvi.features[0] &= !(1 << KVM_ARM_VCPU_POWER_OFF); state.pvtime_ipa = self.pvtime_ipa.map(|guest_addr| guest_addr.0); Ok(state) } /// Use provided state to populate KVM internal state. pub fn restore_state(&mut self, state: &VcpuState) -> Result<(), KvmVcpuError> { self.kvi = state.kvi; self.init_vcpu()?; // If KVM_REG_ARM64_SVE_VLS is present it needs to // be set before vcpu is finalized. if let Some(sve_vls_reg) = state .regs .iter() .find(|reg| reg.id == KVM_REG_ARM64_SVE_VLS) { self.set_register(sve_vls_reg) .map_err(KvmVcpuError::RestoreState)?; } self.finalize_vcpu()?; // KVM_REG_ARM64_SVE_VLS needs to be skipped after vcpu is finalized. // If it is present it is handled in the code above. for reg in state .regs .iter() .filter(|reg| reg.id != KVM_REG_ARM64_SVE_VLS) { self.set_register(reg).map_err(KvmVcpuError::RestoreState)?; } self.set_mpstate(state.mp_state) .map_err(KvmVcpuError::RestoreState)?; // Assumes that steal time memory region was set up already if let Some(pvtime_ipa) = state.pvtime_ipa { self.enable_pvtime(GuestAddress(pvtime_ipa)) .map_err(KvmVcpuError::RestoreState)?; } Ok(()) } /// Dumps CPU configuration. pub fn dump_cpu_config(&self) -> Result { let mut regs = Aarch64RegisterVec::default(); self.get_all_registers(&mut regs) .map_err(KvmVcpuError::DumpCpuConfig)?; Ok(CpuConfiguration { regs }) } /// Initializes internal vcpufd. fn init_vcpu(&self) -> Result<(), KvmVcpuError> { // Setting KVM_ARM_VCPU_PMU_V3 without initialising the PMU causes KVM // to crash on KVM_RUN with EINVAL. // // To properly initialise the PMU, the KVM_SET_DEVICE_ATTR ioctl must // be made with the flag KVM_ARM_VCPU_PMU_V3_INIT set. Firecracker // currently does not handle this, so we should return an error instead. if (self.kvi.features[0] & (1 << KVM_ARM_VCPU_PMU_V3)) != 0 { return Err(KvmVcpuError::UnsupportedPmuV3); } self.fd.vcpu_init(&self.kvi).map_err(KvmVcpuError::Init)?; Ok(()) } /// Checks for SVE feature and calls `vcpu_finalize` if /// it is enabled. fn finalize_vcpu(&self) -> Result<(), KvmVcpuError> { if (self.kvi.features[0] & (1 << KVM_ARM_VCPU_SVE)) != 0 { // KVM_ARM_VCPU_SVE has value 4 so casting to i32 is safe. #[allow(clippy::cast_possible_wrap)] let feature = KVM_ARM_VCPU_SVE as i32; self.fd.vcpu_finalize(&feature).unwrap(); } Ok(()) } /// Configure relevant boot registers for a given vCPU. /// /// # Arguments /// /// * `boot_ip` - Starting instruction pointer. /// * `mem` - Reserved DRAM for current VM. /// + `optional_capabilities` - which optional capabilities are enabled that might influence /// vcpu configuration pub fn setup_boot_regs( &self, boot_ip: u64, mem: &GuestMemoryMmap, optional_capabilities: &OptionalCapabilities, ) -> Result<(), VcpuArchError> { let kreg_off = offset_of!(kvm_regs, regs); // Get the register index of the PSTATE (Processor State) register. let pstate = offset_of!(user_pt_regs, pstate) + kreg_off; let id = arm64_core_reg_id!(KVM_REG_SIZE_U64, pstate); self.fd .set_one_reg(id, &PSTATE_FAULT_BITS_64.to_le_bytes()) .map_err(|err| { VcpuArchError::SetOneReg(id, format!("{PSTATE_FAULT_BITS_64:#x}"), err) })?; // Other vCPUs are powered off initially awaiting PSCI wakeup. if self.index == 0 { // Setting the PC (Processor Counter) to the current program address (kernel address). let pc = offset_of!(user_pt_regs, pc) + kreg_off; let id = arm64_core_reg_id!(KVM_REG_SIZE_U64, pc); self.fd .set_one_reg(id, &boot_ip.to_le_bytes()) .map_err(|err| VcpuArchError::SetOneReg(id, format!("{boot_ip:#x}"), err))?; // Last mandatory thing to set -> the address pointing to the FDT (also called DTB). // "The device tree blob (dtb) must be placed on an 8-byte boundary and must // not exceed 2 megabytes in size." -> https://www.kernel.org/doc/Documentation/arm64/booting.txt. // We are choosing to place it the end of DRAM. See `get_fdt_addr`. let regs0 = offset_of!(user_pt_regs, regs) + kreg_off; let id = arm64_core_reg_id!(KVM_REG_SIZE_U64, regs0); let fdt_addr = get_fdt_addr(mem); self.fd .set_one_reg(id, &fdt_addr.to_le_bytes()) .map_err(|err| VcpuArchError::SetOneReg(id, format!("{fdt_addr:#x}"), err))?; // Reset the physical counter for the guest. This way we avoid guest reading // host physical counter. // Resetting KVM_REG_ARM_PTIMER_CNT for single vcpu is enough because there is only // one timer struct with offsets per VM. // Because the access to KVM_REG_ARM_PTIMER_CNT is only present starting 6.4 kernel, // we only do the reset if KVM_CAP_COUNTER_OFFSET is present as it was added // in the same patch series as the ability to set the KVM_REG_ARM_PTIMER_CNT register. // Path series which introduced the needed changes: // https://lore.kernel.org/all/20230330174800.2677007-1-maz@kernel.org/ // Note: the value observed by the guest will still be above 0, because there is a delta // time between this resetting and first call to KVM_RUN. if optional_capabilities.counter_offset { self.fd .set_one_reg(KVM_REG_ARM_PTIMER_CNT, &[0; 8]) .map_err(|err| { VcpuArchError::SetOneReg(id, format!("{KVM_REG_ARM_PTIMER_CNT:#x}"), err) })?; } } Ok(()) } /// Saves the states of the system registers into `state`. /// /// # Arguments /// /// * `regs` - Input/Output vector of registers. pub fn get_all_registers(&self, state: &mut Aarch64RegisterVec) -> Result<(), VcpuArchError> { get_registers(&self.fd, &self.get_all_registers_ids()?, state) } /// Returns all registers ids, including core and system pub fn get_all_registers_ids(&self) -> Result, VcpuArchError> { // Call KVM_GET_REG_LIST to get all registers available to the guest. For ArmV8 there are // less than 500 registers expected, resize to the reported size when necessary. let mut reg_list = RegList::new(500).map_err(VcpuArchError::Fam)?; match self.fd.get_reg_list(&mut reg_list) { Ok(_) => Ok(reg_list.as_slice().to_vec()), Err(e) => match e.errno() { libc::E2BIG => { // resize and retry. let size: usize = reg_list .as_fam_struct_ref() .n .try_into() // Safe to unwrap as Firecracker only targets 64-bit machines. .unwrap(); reg_list = RegList::new(size).map_err(VcpuArchError::Fam)?; self.fd .get_reg_list(&mut reg_list) .map_err(VcpuArchError::GetRegList)?; Ok(reg_list.as_slice().to_vec()) } _ => Err(VcpuArchError::GetRegList(e)), }, } } /// Set the state of one system register. /// /// # Arguments /// /// * `reg` - Register to be set. pub fn set_register(&self, reg: Aarch64RegisterRef) -> Result<(), VcpuArchError> { self.fd .set_one_reg(reg.id, reg.as_slice()) .map_err(|e| VcpuArchError::SetOneReg(reg.id, reg.value_str(), e))?; Ok(()) } /// Get the multistate processor. /// /// # Arguments /// /// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. pub fn get_mpstate(&self) -> Result { self.fd.get_mp_state().map_err(VcpuArchError::GetMp) } /// Set the state of the system registers. /// /// # Arguments /// /// * `state` - Structure for returning the state of the system registers. pub fn set_mpstate(&self, state: kvm_mp_state) -> Result<(), VcpuArchError> { self.fd.set_mp_state(state).map_err(VcpuArchError::SetMp) } /// Check if pvtime (steal time on ARM) is supported for vcpu pub fn supports_pvtime(&self) -> bool { let pvtime_device_attr = kvm_bindings::kvm_device_attr { group: kvm_bindings::KVM_ARM_VCPU_PVTIME_CTRL, attr: kvm_bindings::KVM_ARM_VCPU_PVTIME_IPA as u64, addr: 0, flags: 0, }; // Use kvm_has_device_attr to check if PVTime is supported self.fd.has_device_attr(&pvtime_device_attr).is_ok() } /// Enables pvtime for vcpu pub fn enable_pvtime(&mut self, ipa: GuestAddress) -> Result<(), VcpuArchError> { self.pvtime_ipa = Some(ipa); // Use KVM syscall (kvm_set_device_attr) to register the vCPU with the steal_time region let vcpu_device_attr = kvm_bindings::kvm_device_attr { group: KVM_ARM_VCPU_PVTIME_CTRL, attr: KVM_ARM_VCPU_PVTIME_IPA as u64, addr: &ipa.0 as *const u64 as u64, // userspace address of attr data flags: 0, }; self.fd .set_device_attr(&vcpu_device_attr) .map_err(VcpuArchError::DeviceAttribute)?; Ok(()) } } impl Peripherals { /// Runs the vCPU in KVM context and handles the kvm exit reason. /// /// Returns error or enum specifying whether emulation was handled or interrupted. pub fn run_arch_emulation(&self, exit: VcpuExit) -> Result { METRICS.vcpu.failures.inc(); // TODO: Are we sure we want to finish running a vcpu upon // receiving a vm exit that is not necessarily an error? error!("Unexpected exit reason on vcpu run: {:?}", exit); Err(VcpuError::UnhandledKvmExit(format!("{:?}", exit))) } } /// Structure holding VCPU kvm state. #[derive(Default, Clone, Serialize, Deserialize)] pub struct VcpuState { /// Multiprocessing state. pub mp_state: kvm_mp_state, /// Vcpu registers. pub regs: Aarch64RegisterVec, /// We will be using the mpidr for passing it to the VmState. /// The VmState will give this away for saving restoring the icc and redistributor /// registers. pub mpidr: u64, /// kvi states for vcpu initialization. pub kvi: kvm_vcpu_init, /// ipa for steal_time region pub pvtime_ipa: Option, } impl Debug for VcpuState { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!(f, "kvm_mp_state: {:#x}", self.mp_state.mp_state)?; writeln!(f, "mpidr: {:#x}", self.mpidr)?; for reg in self.regs.iter() { writeln!( f, "{:#x} 0x{}", reg.id, reg.as_slice() .iter() .rev() .fold(String::new(), |mut output, b| { let _ = write!(output, "{b:x}"); output }) )?; } Ok(()) } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::io::AsRawFd; use kvm_bindings::{KVM_ARM_VCPU_PSCI_0_2, KVM_REG_SIZE_U64}; use vm_memory::GuestAddress; use super::*; use crate::arch::BootProtocol; use crate::arch::aarch64::layout; use crate::arch::aarch64::regs::Aarch64RegisterRef; use crate::cpu_config::aarch64::CpuConfiguration; use crate::cpu_config::templates::RegisterValueFilter; use crate::test_utils::arch_mem; use crate::vcpu::VcpuConfig; use crate::vstate::kvm::Kvm; use crate::vstate::vm::Vm; use crate::vstate::vm::tests::setup_vm_with_memory; fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, KvmVcpu) { let (kvm, mut vm, mut vcpu) = setup_vcpu_no_init(mem_size); vcpu.init(&[]).unwrap(); vm.setup_irqchip(1).unwrap(); (kvm, vm, vcpu) } fn setup_vcpu_no_init(mem_size: usize) -> (Kvm, Vm, KvmVcpu) { let (kvm, vm) = setup_vm_with_memory(mem_size); let vcpu = KvmVcpu::new(0, &vm).unwrap(); (kvm, vm, vcpu) } #[test] fn test_create_vcpu() { let (_, vm) = setup_vm_with_memory(0x1000); unsafe { libc::close(vm.fd().as_raw_fd()) }; let err = KvmVcpu::new(0, &vm); // dropping vm would double close the gic fd, so leak it // do the drop before assertion. Otherwise if assert fails, // we get IO runtime error instead of assert error. std::mem::forget(vm); assert_eq!( err.err().unwrap().to_string(), "Error creating vcpu: Bad file descriptor (os error 9)".to_string() ); } #[test] fn test_configure_vcpu() { let (kvm, vm, mut vcpu) = setup_vcpu(0x10000); let optional_capabilities = kvm.optional_capabilities(); let vcpu_config = VcpuConfig { vcpu_count: 1, smt: false, cpu_config: CpuConfiguration::default(), }; vcpu.configure( vm.guest_memory(), EntryPoint { entry_addr: GuestAddress(crate::arch::get_kernel_start()), protocol: BootProtocol::LinuxBoot, }, &vcpu_config, &optional_capabilities, ) .unwrap(); unsafe { libc::close(vcpu.fd.as_raw_fd()) }; let err = vcpu.configure( vm.guest_memory(), EntryPoint { entry_addr: GuestAddress(crate::arch::get_kernel_start()), protocol: BootProtocol::LinuxBoot, }, &vcpu_config, &optional_capabilities, ); // dropping vcpu would double close the gic fd, so leak it // do the drop before assertion. Otherwise if assert fails, // we get IO runtime error instead of assert error. std::mem::forget(vcpu); assert_eq!( err.unwrap_err(), KvmVcpuError::ConfigureRegisters(VcpuArchError::SetOneReg( 0x6030000000100042, "0x3c5".to_string(), kvm_ioctls::Error::new(9) )) ); } #[test] fn test_init_vcpu() { let (_, mut vm) = setup_vm_with_memory(0x1000); let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); // KVM_ARM_VCPU_PSCI_0_2 is set by default. // we check if we can remove it. let vcpu_features = vec![VcpuFeatures { index: 0, bitmap: RegisterValueFilter { filter: 1 << KVM_ARM_VCPU_PSCI_0_2, value: 0, }, }]; vcpu.init(&vcpu_features).unwrap(); assert!((vcpu.kvi.features[0] & (1 << KVM_ARM_VCPU_PSCI_0_2)) == 0) } #[test] fn test_pmu_v3_feature_invalid() { let (_, mut vm) = setup_vm_with_memory(0x1000); let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); // Firecracker does not support KVM_ARM_VCPU_PMU_V3. Check that // attempting to enable this feature returns an error. let vcpu_features = vec![VcpuFeatures { index: 0, bitmap: RegisterValueFilter { filter: 1 << KVM_ARM_VCPU_PMU_V3, value: 1 << KVM_ARM_VCPU_PMU_V3, }, }]; let res = vcpu.init(&vcpu_features); assert!(matches!(res.unwrap_err(), KvmVcpuError::UnsupportedPmuV3)); } #[test] fn test_vcpu_save_restore_state() { let (_, mut vm) = setup_vm_with_memory(0x1000); let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); // Calling KVM_GET_REGLIST before KVM_VCPU_INIT will result in error. let res = vcpu.save_state(); assert!(matches!( res.unwrap_err(), KvmVcpuError::SaveState(VcpuArchError::GetRegList(_)) )); // Try to restore the register using a faulty state. let mut faulty_vcpu_state = VcpuState::default(); // Try faulty kvi state let res = vcpu.restore_state(&faulty_vcpu_state); assert!(matches!(res.unwrap_err(), KvmVcpuError::Init(_))); // Try faulty vcpu regs faulty_vcpu_state.kvi = KvmVcpu::default_kvi(vm.fd()).unwrap(); let mut regs = Aarch64RegisterVec::default(); let mut reg = Aarch64RegisterRef::new(KVM_REG_SIZE_U64, &[0; 8]); reg.id = 0; regs.push(reg); faulty_vcpu_state.regs = regs; let res = vcpu.restore_state(&faulty_vcpu_state); assert!(matches!( res.unwrap_err(), KvmVcpuError::RestoreState(VcpuArchError::SetOneReg(0, _, _)) )); vcpu.init(&[]).unwrap(); let state = vcpu.save_state().expect("Cannot save state of vcpu"); assert!(!state.regs.is_empty()); vcpu.restore_state(&state) .expect("Cannot restore state of vcpu"); } #[test] fn test_dump_cpu_config_before_init() { // Test `dump_cpu_config()` before `KVM_VCPU_INIT`. // // This should fail with ENOEXEC. // https://elixir.bootlin.com/linux/v5.10.176/source/arch/arm64/kvm/arm.c#L1165 let (_, mut vm) = setup_vm_with_memory(0x1000); let vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); vcpu.dump_cpu_config().unwrap_err(); } #[test] fn test_dump_cpu_config_after_init() { // Test `dump_cpu_config()` after `KVM_VCPU_INIT`. let (_, mut vm) = setup_vm_with_memory(0x1000); let mut vcpu = KvmVcpu::new(0, &vm).unwrap(); vm.setup_irqchip(1).unwrap(); vcpu.init(&[]).unwrap(); vcpu.dump_cpu_config().unwrap(); } #[test] fn test_setup_non_boot_vcpu() { let (_, vm) = setup_vm_with_memory(0x1000); let mut vcpu1 = KvmVcpu::new(0, &vm).unwrap(); vcpu1.init(&[]).unwrap(); let mut vcpu2 = KvmVcpu::new(1, &vm).unwrap(); vcpu2.init(&[]).unwrap(); } #[test] fn test_get_valid_regs() { // Test `get_regs()` with valid register IDs. // - X0: 0x6030 0000 0010 0000 // - X1: 0x6030 0000 0010 0002 let (_, _, vcpu) = setup_vcpu(0x10000); let reg_list = Vec::::from([0x6030000000100000, 0x6030000000100002]); get_registers(&vcpu.fd, ®_list, &mut Aarch64RegisterVec::default()).unwrap(); } #[test] fn test_get_invalid_regs() { // Test `get_regs()` with invalid register IDs. let (_, _, vcpu) = setup_vcpu(0x10000); let reg_list = Vec::::from([0x6030000000100001, 0x6030000000100003]); get_registers(&vcpu.fd, ®_list, &mut Aarch64RegisterVec::default()).unwrap_err(); } #[test] fn test_setup_regs() { let (kvm, _, vcpu) = setup_vcpu_no_init(0x10000); let mem = arch_mem(layout::FDT_MAX_SIZE + 0x1000); let optional_capabilities = kvm.optional_capabilities(); let res = vcpu.setup_boot_regs(0x0, &mem, &optional_capabilities); assert!(matches!( res.unwrap_err(), VcpuArchError::SetOneReg(0x6030000000100042, _, _) )); vcpu.init_vcpu().unwrap(); vcpu.setup_boot_regs(0x0, &mem, &optional_capabilities) .unwrap(); // Check that the register is reset on compatible kernels. // Because there is a delta in time between we reset the register and time we // read it, we cannot compare with 0. Instead we compare it with meaningfully // small value. if optional_capabilities.counter_offset { let mut reg_bytes = [0_u8; 8]; vcpu.fd.get_one_reg(SYS_CNTPCT_EL0, &mut reg_bytes).unwrap(); let counter_value = u64::from_le_bytes(reg_bytes); // We are reading the SYS_CNTPCT_EL0 right after resetting it. // If reset did happen successfully, the value should be quite small when we read it. // If the reset did not happen, the value will be same as on the host and it surely // will be more that `max_value`. Measurements show that usually value is close // to 1000. Use bigger `max_value` just in case. let max_value = 10_000; assert!(counter_value < max_value); } } #[test] fn test_read_mpidr() { let (_, _, vcpu) = setup_vcpu_no_init(0x10000); // Must fail when vcpu is not initialized yet. let res = vcpu.get_mpidr(); assert!(matches!( res.unwrap_err(), VcpuArchError::GetOneReg(MPIDR_EL1, _) )); vcpu.init_vcpu().unwrap(); assert_eq!(vcpu.get_mpidr().unwrap(), 0x8000_0000); } #[test] fn test_get_set_regs() { let (_, _, vcpu) = setup_vcpu_no_init(0x10000); // Must fail when vcpu is not initialized yet. let mut regs = Aarch64RegisterVec::default(); let res = vcpu.get_all_registers(&mut regs); assert!(matches!(res.unwrap_err(), VcpuArchError::GetRegList(_))); vcpu.init_vcpu().unwrap(); vcpu.get_all_registers(&mut regs).unwrap(); for reg in regs.iter() { vcpu.set_register(reg).unwrap(); } } #[test] fn test_mpstate() { use std::os::unix::io::AsRawFd; let (_, _, vcpu) = setup_vcpu(0x10000); let res = vcpu.get_mpstate(); vcpu.set_mpstate(res.unwrap()).unwrap(); unsafe { libc::close(vcpu.fd.as_raw_fd()) }; let res = vcpu.get_mpstate(); assert!(matches!(res, Err(VcpuArchError::GetMp(_))), "{:?}", res); let res = vcpu.set_mpstate(kvm_mp_state::default()); // dropping vcpu would double close the fd, so leak it // do the drop before assertion. Otherwise if assert fails, // we get IO runtime error instead of assert error. std::mem::forget(vcpu); assert!(matches!(res, Err(VcpuArchError::SetMp(_))), "{:?}", res); } } ================================================ FILE: src/vmm/src/arch/aarch64/vm.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::sync::Mutex; use serde::{Deserialize, Serialize}; use crate::Kvm; use crate::arch::aarch64::gic::GicState; use crate::vstate::memory::{GuestMemoryExtension, GuestMemoryState}; use crate::vstate::resources::ResourceAllocator; use crate::vstate::vm::{VmCommon, VmError}; /// Structure representing the current architecture's understand of what a "virtual machine" is. #[derive(Debug)] pub struct ArchVm { /// Architecture independent parts of a vm. pub common: VmCommon, // On aarch64 we need to keep around the fd obtained by creating the VGIC device. irqchip_handle: Option, } /// Error type for [`Vm::restore_state`] #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum ArchVmError { /// Error creating the global interrupt controller: {0} VmCreateGIC(crate::arch::aarch64::gic::GicError), /// Failed to save the VM's GIC state: {0} SaveGic(crate::arch::aarch64::gic::GicError), /// Failed to restore the VM's GIC state: {0} RestoreGic(crate::arch::aarch64::gic::GicError), } impl ArchVm { /// Create a new `Vm` struct. pub fn new(kvm: &Kvm) -> Result { let common = Self::create_common(kvm)?; Ok(ArchVm { common, irqchip_handle: None, }) } /// Pre-vCPU creation setup. pub fn arch_pre_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { Ok(()) } /// Post-vCPU creation setup. pub fn arch_post_create_vcpus(&mut self, nr_vcpus: u8) -> Result<(), ArchVmError> { // On aarch64, the vCPUs need to be created (i.e call KVM_CREATE_VCPU) before setting up the // IRQ chip because the `KVM_CREATE_VCPU` ioctl will return error if the IRQCHIP // was already initialized. // Search for `kvm_arch_vcpu_create` in arch/arm/kvm/arm.c. self.setup_irqchip(nr_vcpus) } /// Creates the GIC (Global Interrupt Controller). pub fn setup_irqchip(&mut self, vcpu_count: u8) -> Result<(), ArchVmError> { self.irqchip_handle = Some( crate::arch::aarch64::gic::create_gic(self.fd(), vcpu_count.into(), None) .map_err(ArchVmError::VmCreateGIC)?, ); Ok(()) } /// Gets a reference to the irqchip of the VM. pub fn get_irqchip(&self) -> &crate::arch::aarch64::gic::GICDevice { self.irqchip_handle.as_ref().expect("IRQ chip not set") } /// Saves and returns the Kvm Vm state. pub fn save_state(&self, mpidrs: &[u64]) -> Result { Ok(VmState { memory: self.common.guest_memory.describe(), gic: self .get_irqchip() .save_device(mpidrs) .map_err(ArchVmError::SaveGic)?, resource_allocator: self.resource_allocator().clone(), }) } /// Restore the KVM VM state /// /// # Errors /// /// When [`crate::arch::aarch64::gic::GICDevice::restore_device`] errors. pub fn restore_state(&mut self, mpidrs: &[u64], state: &VmState) -> Result<(), ArchVmError> { self.get_irqchip() .restore_device(mpidrs, &state.gic) .map_err(ArchVmError::RestoreGic)?; self.common.resource_allocator = Mutex::new(state.resource_allocator.clone()); Ok(()) } } /// Structure holding an general specific VM state. #[derive(Debug, Default, Serialize, Deserialize)] pub struct VmState { /// Guest memory state pub memory: GuestMemoryState, /// GIC state. pub gic: GicState, /// resource allocator pub resource_allocator: ResourceAllocator, } ================================================ FILE: src/vmm/src/arch/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt; use std::sync::LazyLock; use log::warn; use serde::{Deserialize, Serialize}; use vm_memory::GuestAddress; /// Module for aarch64 related functionality. #[cfg(target_arch = "aarch64")] pub mod aarch64; #[cfg(target_arch = "aarch64")] pub use aarch64::kvm::{Kvm, KvmArchError, OptionalCapabilities}; #[cfg(target_arch = "aarch64")] pub use aarch64::vcpu::*; #[cfg(target_arch = "aarch64")] pub use aarch64::vm::{ArchVm, ArchVmError, VmState}; #[cfg(target_arch = "aarch64")] pub use aarch64::{ ConfigurationError, arch_memory_regions, configure_system_for_boot, get_kernel_start, initrd_load_addr, layout::*, load_kernel, }; /// Module for x86_64 related functionality. #[cfg(target_arch = "x86_64")] pub mod x86_64; #[cfg(target_arch = "x86_64")] pub use x86_64::kvm::{Kvm, KvmArchError}; #[cfg(target_arch = "x86_64")] pub use x86_64::vcpu::*; #[cfg(target_arch = "x86_64")] pub use x86_64::vm::{ArchVm, ArchVmError, VmState}; #[cfg(target_arch = "x86_64")] pub use crate::arch::x86_64::{ ConfigurationError, arch_memory_regions, configure_system_for_boot, get_kernel_start, initrd_load_addr, layout::*, load_kernel, }; /// Types of devices that can get attached to this platform. #[derive(Clone, Debug, PartialEq, Eq, Hash, Copy, Serialize, Deserialize)] pub enum DeviceType { /// Device Type: Virtio. Virtio(u32), /// Device Type: Serial. #[cfg(target_arch = "aarch64")] Serial, /// Device Type: RTC. #[cfg(target_arch = "aarch64")] Rtc, /// Device Type: BootTimer. BootTimer, } /// Default page size for the guest OS. pub const GUEST_PAGE_SIZE: usize = 4096; /// Get the size of the host page size. pub fn host_page_size() -> usize { /// Default page size for the host OS. static PAGE_SIZE: LazyLock = LazyLock::new(|| { // # Safety: Value always valid let r = unsafe { libc::sysconf(libc::_SC_PAGESIZE) }; usize::try_from(r).unwrap_or_else(|_| { warn!("Could not get host page size with sysconf, assuming default 4K host pages"); 4096 }) }); *PAGE_SIZE } impl fmt::Display for DeviceType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:?}", self) } } /// Supported boot protocols for #[derive(Debug, Copy, Clone, PartialEq)] pub enum BootProtocol { /// Linux 64-bit boot protocol LinuxBoot, #[cfg(target_arch = "x86_64")] /// PVH boot protocol (x86/HVM direct boot ABI) PvhBoot, } impl fmt::Display for BootProtocol { fn fmt(&self, f: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { match self { BootProtocol::LinuxBoot => write!(f, "Linux 64-bit boot protocol"), #[cfg(target_arch = "x86_64")] BootProtocol::PvhBoot => write!(f, "PVH boot protocol"), } } } #[derive(Debug, Copy, Clone)] /// Specifies the entry point address where the guest must start /// executing code, as well as which boot protocol is to be used /// to configure the guest initial state. pub struct EntryPoint { /// Address in guest memory where the guest must start execution pub entry_addr: GuestAddress, /// Specifies which boot protocol to use pub protocol: BootProtocol, } /// Adds in [`regions`] the valid memory regions suitable for RAM taking into account a gap in the /// available address space and returns the remaining region (if any) past this gap fn arch_memory_regions_with_gap( regions: &mut Vec<(GuestAddress, usize)>, region_start: usize, region_size: usize, gap_start: usize, gap_size: usize, ) -> Option<(usize, usize)> { // 0-sized gaps don't really make sense. We should never receive such a gap. assert!(gap_size > 0); let first_addr_past_gap = gap_start + gap_size; match (region_start + region_size).checked_sub(gap_start) { // case0: region fits all before gap None | Some(0) => { regions.push((GuestAddress(region_start as u64), region_size)); None } // case1: region starts before the gap and goes past it Some(remaining) if region_start < gap_start => { regions.push((GuestAddress(region_start as u64), gap_start - region_start)); Some((first_addr_past_gap, remaining)) } // case2: region starts past the gap Some(_) => Some((first_addr_past_gap.max(region_start), region_size)), } } ================================================ FILE: src/vmm/src/arch/x86_64/cpu_model.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::arch::x86_64::__cpuid as host_cpuid; use std::cmp::{Eq, PartialEq}; /// Structure representing x86_64 CPU model. #[derive(Debug, Eq, PartialEq)] pub struct CpuModel { /// Extended family. pub extended_family: u8, /// Extended model. pub extended_model: u8, /// Family. pub family: u8, /// Model. pub model: u8, /// Stepping. pub stepping: u8, } /// Family / Model / Stepping for Intel Skylake pub const SKYLAKE_FMS: CpuModel = CpuModel { extended_family: 0x0, extended_model: 0x5, family: 0x6, model: 0x5, stepping: 0x4, }; /// Family / Model / Stepping for Intel Cascade Lake pub const CASCADE_LAKE_FMS: CpuModel = CpuModel { extended_family: 0x0, extended_model: 0x5, family: 0x6, model: 0x5, stepping: 0x7, }; /// Family / Model / Stepping for Intel Ice Lake pub const ICE_LAKE_FMS: CpuModel = CpuModel { extended_family: 0x0, extended_model: 0x6, family: 0x6, model: 0xa, stepping: 0x6, }; /// Family / Model / Stepping for AMD Milan pub const MILAN_FMS: CpuModel = CpuModel { extended_family: 0xa, extended_model: 0x0, family: 0xf, model: 0x1, stepping: 0x1, }; impl CpuModel { /// Get CPU model from current machine. pub fn get_cpu_model() -> Self { // SAFETY: This operation is safe as long as the processor implements this CPUID function. // 0x1 is the defined code for getting the processor version information. let eax = unsafe { host_cpuid(0x1) }.eax; CpuModel::from(&eax) } } impl From<&u32> for CpuModel { fn from(eax: &u32) -> Self { CpuModel { extended_family: ((eax >> 20) & 0xff) as u8, extended_model: ((eax >> 16) & 0xf) as u8, family: ((eax >> 8) & 0xf) as u8, model: ((eax >> 4) & 0xf) as u8, stepping: (eax & 0xf) as u8, } } } #[cfg(test)] mod tests { use super::*; #[test] fn cpu_model_from() { let skylake_eax = 0x00050654; assert_eq!(CpuModel::from(&skylake_eax), SKYLAKE_FMS); let cascade_lake_eax = 0x00050657; assert_eq!(CpuModel::from(&cascade_lake_eax), CASCADE_LAKE_FMS); let ice_lake_eax = 0x000606a6; assert_eq!(CpuModel::from(&ice_lake_eax), ICE_LAKE_FMS); let milan_eax = 0x00a00f11; assert_eq!(CpuModel::from(&milan_eax), MILAN_FMS); } } ================================================ FILE: src/vmm/src/arch/x86_64/gdt.rs ================================================ // Copyright © 2020, Oracle and/or its affiliates. // // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. // For GDT details see arch/x86/include/asm/segment.h use kvm_bindings::kvm_segment; /// Constructor for a conventional segment GDT (or LDT) entry. Derived from the kernel's segment.h. pub fn gdt_entry(flags: u16, base: u32, limit: u32) -> u64 { ((u64::from(base) & 0xff00_0000u64) << (56 - 24)) | ((u64::from(flags) & 0x0000_f0ffu64) << 40) | ((u64::from(limit) & 0x000f_0000u64) << (48 - 16)) | ((u64::from(base) & 0x00ff_ffffu64) << 16) | (u64::from(limit) & 0x0000_ffffu64) } fn get_base(entry: u64) -> u64 { (((entry) & 0xFF00_0000_0000_0000) >> 32) | (((entry) & 0x0000_00FF_0000_0000) >> 16) | (((entry) & 0x0000_0000_FFFF_0000) >> 16) } // Extract the segment limit from the GDT segment descriptor. // // In a segment descriptor, the limit field is 20 bits, so it can directly describe // a range from 0 to 0xFFFFF (1 MB). When G flag is set (4-KByte page granularity) it // scales the value in the limit field by a factor of 2^12 (4 Kbytes), making the effective // limit range from 0xFFF (4 KBytes) to 0xFFFF_FFFF (4 GBytes). // // However, the limit field in the VMCS definition is a 32 bit field, and the limit value is not // automatically scaled using the G flag. This means that for a desired range of 4GB for a // given segment, its limit must be specified as 0xFFFF_FFFF. Therefore the method of obtaining // the limit from the GDT entry is not sufficient, since it only provides 20 bits when 32 bits // are necessary. Fortunately, we can check if the G flag is set when extracting the limit since // the full GDT entry is passed as an argument, and perform the scaling of the limit value to // return the full 32 bit value. // // The scaling mentioned above is required when using PVH boot, since the guest boots in protected // (32-bit) mode and must be able to access the entire 32-bit address space. It does not cause // issues for the case of direct boot to 64-bit (long) mode, since in 64-bit mode the processor does // not perform runtime limit checking on code or data segments. // // (For more information concerning the formats of segment descriptors, VMCS fields, et cetera, // please consult the Intel Software Developer Manual.) fn get_limit(entry: u64) -> u32 { #[allow(clippy::cast_possible_truncation)] // clearly, truncation is not possible let limit: u32 = ((((entry) & 0x000F_0000_0000_0000) >> 32) | ((entry) & 0x0000_0000_0000_FFFF)) as u32; // Perform manual limit scaling if G flag is set match get_g(entry) { 0 => limit, _ => (limit << 12) | 0xFFF, // G flag is either 0 or 1 } } fn get_g(entry: u64) -> u8 { ((entry & 0x0080_0000_0000_0000) >> 55) as u8 } fn get_db(entry: u64) -> u8 { ((entry & 0x0040_0000_0000_0000) >> 54) as u8 } fn get_l(entry: u64) -> u8 { ((entry & 0x0020_0000_0000_0000) >> 53) as u8 } fn get_avl(entry: u64) -> u8 { ((entry & 0x0010_0000_0000_0000) >> 52) as u8 } fn get_p(entry: u64) -> u8 { ((entry & 0x0000_8000_0000_0000) >> 47) as u8 } fn get_dpl(entry: u64) -> u8 { ((entry & 0x0000_6000_0000_0000) >> 45) as u8 } fn get_s(entry: u64) -> u8 { ((entry & 0x0000_1000_0000_0000) >> 44) as u8 } fn get_type(entry: u64) -> u8 { ((entry & 0x0000_0F00_0000_0000) >> 40) as u8 } /// Automatically build the kvm struct for SET_SREGS from the kernel bit fields. /// /// # Arguments /// /// * `entry` - The gdt entry. /// * `table_index` - Index of the entry in the gdt table. pub fn kvm_segment_from_gdt(entry: u64, table_index: u8) -> kvm_segment { kvm_segment { base: get_base(entry), limit: get_limit(entry), selector: u16::from(table_index * 8), type_: get_type(entry), present: get_p(entry), dpl: get_dpl(entry), db: get_db(entry), s: get_s(entry), l: get_l(entry), g: get_g(entry), avl: get_avl(entry), padding: 0, unusable: match get_p(entry) { 0 => 1, _ => 0, }, } } #[cfg(test)] mod tests { use super::*; #[test] fn field_parse() { let gdt = gdt_entry(0xA09B, 0x10_0000, 0xfffff); let seg = kvm_segment_from_gdt(gdt, 0); // 0xA09B // 'A' assert_eq!(0x1, seg.g); assert_eq!(0x0, seg.db); assert_eq!(0x1, seg.l); assert_eq!(0x0, seg.avl); // '9' assert_eq!(0x1, seg.present); assert_eq!(0x0, seg.dpl); assert_eq!(0x1, seg.s); // 'B' assert_eq!(0xB, seg.type_); // base and limit assert_eq!(0x10_0000, seg.base); assert_eq!(0xffff_ffff, seg.limit); assert_eq!(0x0, seg.unusable); } } ================================================ FILE: src/vmm/src/arch/x86_64/generated/arch_prctl.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const ARCH_SET_GS: u32 = 4097; pub const ARCH_SET_FS: u32 = 4098; pub const ARCH_GET_FS: u32 = 4099; pub const ARCH_GET_GS: u32 = 4100; pub const ARCH_GET_CPUID: u32 = 4113; pub const ARCH_SET_CPUID: u32 = 4114; pub const ARCH_GET_XCOMP_SUPP: u32 = 4129; pub const ARCH_GET_XCOMP_PERM: u32 = 4130; pub const ARCH_REQ_XCOMP_PERM: u32 = 4131; pub const ARCH_GET_XCOMP_GUEST_PERM: u32 = 4132; pub const ARCH_REQ_XCOMP_GUEST_PERM: u32 = 4133; pub const ARCH_XCOMP_TILECFG: u32 = 17; pub const ARCH_XCOMP_TILEDATA: u32 = 18; pub const ARCH_MAP_VDSO_X32: u32 = 8193; pub const ARCH_MAP_VDSO_32: u32 = 8194; pub const ARCH_MAP_VDSO_64: u32 = 8195; pub const ARCH_GET_UNTAG_MASK: u32 = 16385; pub const ARCH_ENABLE_TAGGED_ADDR: u32 = 16386; pub const ARCH_GET_MAX_TAG_BITS: u32 = 16387; pub const ARCH_FORCE_TAGGED_SVA: u32 = 16388; pub const ARCH_SHSTK_ENABLE: u32 = 20481; pub const ARCH_SHSTK_DISABLE: u32 = 20482; pub const ARCH_SHSTK_LOCK: u32 = 20483; pub const ARCH_SHSTK_UNLOCK: u32 = 20484; pub const ARCH_SHSTK_STATUS: u32 = 20485; pub const ARCH_SHSTK_SHSTK: u32 = 1; pub const ARCH_SHSTK_WRSS: u32 = 2; ================================================ FILE: src/vmm/src/arch/x86_64/generated/hyperv.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const HV_X64_MSR_SYNDBG_CONTROL: u32 = 0x400000f1; pub const HV_X64_MSR_SYNDBG_STATUS: u32 = 0x400000f2; pub const HV_X64_MSR_SYNDBG_SEND_BUFFER: u32 = 0x400000f3; pub const HV_X64_MSR_SYNDBG_RECV_BUFFER: u32 = 0x400000f4; pub const HV_X64_MSR_SYNDBG_PENDING_BUFFER: u32 = 0x400000f5; pub const HV_X64_MSR_SYNDBG_OPTIONS: u32 = 0x400000ff; ================================================ FILE: src/vmm/src/arch/x86_64/generated/hyperv_tlfs.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const HV_X64_MSR_GUEST_OS_ID: u32 = 0x40000000; pub const HV_X64_MSR_HYPERCALL: u32 = 0x40000001; pub const HV_X64_MSR_VP_INDEX: u32 = 0x40000002; pub const HV_X64_MSR_RESET: u32 = 0x40000003; pub const HV_X64_MSR_VP_RUNTIME: u32 = 0x40000010; pub const HV_X64_MSR_TIME_REF_COUNT: u32 = 0x40000020; pub const HV_X64_MSR_REFERENCE_TSC: u32 = 0x40000021; pub const HV_X64_MSR_TSC_FREQUENCY: u32 = 0x40000022; pub const HV_X64_MSR_APIC_FREQUENCY: u32 = 0x40000023; pub const HV_X64_MSR_EOI: u32 = 0x40000070; pub const HV_X64_MSR_ICR: u32 = 0x40000071; pub const HV_X64_MSR_TPR: u32 = 0x40000072; pub const HV_X64_MSR_VP_ASSIST_PAGE: u32 = 0x40000073; pub const HV_X64_MSR_SCONTROL: u32 = 0x40000080; pub const HV_X64_MSR_SVERSION: u32 = 0x40000081; pub const HV_X64_MSR_SIEFP: u32 = 0x40000082; pub const HV_X64_MSR_SIMP: u32 = 0x40000083; pub const HV_X64_MSR_EOM: u32 = 0x40000084; pub const HV_X64_MSR_SINT0: u32 = 0x40000090; pub const HV_X64_MSR_SINT1: u32 = 0x40000091; pub const HV_X64_MSR_SINT2: u32 = 0x40000092; pub const HV_X64_MSR_SINT3: u32 = 0x40000093; pub const HV_X64_MSR_SINT4: u32 = 0x40000094; pub const HV_X64_MSR_SINT5: u32 = 0x40000095; pub const HV_X64_MSR_SINT6: u32 = 0x40000096; pub const HV_X64_MSR_SINT7: u32 = 0x40000097; pub const HV_X64_MSR_SINT8: u32 = 0x40000098; pub const HV_X64_MSR_SINT9: u32 = 0x40000099; pub const HV_X64_MSR_SINT10: u32 = 0x4000009a; pub const HV_X64_MSR_SINT11: u32 = 0x4000009b; pub const HV_X64_MSR_SINT12: u32 = 0x4000009c; pub const HV_X64_MSR_SINT13: u32 = 0x4000009d; pub const HV_X64_MSR_SINT14: u32 = 0x4000009e; pub const HV_X64_MSR_SINT15: u32 = 0x4000009f; pub const HV_X64_MSR_NESTED_SCONTROL: u32 = 0x40001080; pub const HV_X64_MSR_NESTED_SVERSION: u32 = 0x40001081; pub const HV_X64_MSR_NESTED_SIEFP: u32 = 0x40001082; pub const HV_X64_MSR_NESTED_SIMP: u32 = 0x40001083; pub const HV_X64_MSR_NESTED_EOM: u32 = 0x40001084; pub const HV_X64_MSR_NESTED_SINT0: u32 = 0x40001090; pub const HV_X64_MSR_STIMER0_CONFIG: u32 = 0x400000b0; pub const HV_X64_MSR_STIMER0_COUNT: u32 = 0x400000b1; pub const HV_X64_MSR_STIMER1_CONFIG: u32 = 0x400000b2; pub const HV_X64_MSR_STIMER1_COUNT: u32 = 0x400000b3; pub const HV_X64_MSR_STIMER2_CONFIG: u32 = 0x400000b4; pub const HV_X64_MSR_STIMER2_COUNT: u32 = 0x400000b5; pub const HV_X64_MSR_STIMER3_CONFIG: u32 = 0x400000b6; pub const HV_X64_MSR_STIMER3_COUNT: u32 = 0x400000b7; pub const HV_X64_MSR_GUEST_IDLE: u32 = 0x400000f0; pub const HV_X64_MSR_CRASH_P0: u32 = 0x40000100; pub const HV_X64_MSR_CRASH_P1: u32 = 0x40000101; pub const HV_X64_MSR_CRASH_P2: u32 = 0x40000102; pub const HV_X64_MSR_CRASH_P3: u32 = 0x40000103; pub const HV_X64_MSR_CRASH_P4: u32 = 0x40000104; pub const HV_X64_MSR_CRASH_CTL: u32 = 0x40000105; pub const HV_X64_MSR_REENLIGHTENMENT_CONTROL: u32 = 0x40000106; pub const HV_X64_MSR_TSC_EMULATION_CONTROL: u32 = 0x40000107; pub const HV_X64_MSR_TSC_EMULATION_STATUS: u32 = 0x40000108; pub const HV_X64_MSR_TSC_INVARIANT_CONTROL: u32 = 0x40000118; pub const HV_X64_MSR_HYPERCALL_ENABLE: u32 = 0x1; pub const HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT: u32 = 0xc; pub const HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK: i32 = -4096; pub const HV_X64_MSR_CRASH_PARAMS: u32 = 0x5; pub const HV_X64_MSR_VP_ASSIST_PAGE_ENABLE: u32 = 0x1; pub const HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT: u32 = 0xc; pub const HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK: i32 = -4096; pub const HV_X64_MSR_TSC_REFERENCE_ENABLE: u32 = 0x1; pub const HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT: u32 = 0xc; ================================================ FILE: src/vmm/src/arch/x86_64/generated/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. pub mod arch_prctl; pub mod hyperv; pub mod hyperv_tlfs; pub mod mpspec; pub mod msr_index; pub mod perf_event; ================================================ FILE: src/vmm/src/arch/x86_64/generated/mpspec.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const MPC_SIGNATURE: &[u8; 5] = b"PCMP\0"; pub const MP_PROCESSOR: u32 = 0; pub const MP_BUS: u32 = 1; pub const MP_IOAPIC: u32 = 2; pub const MP_INTSRC: u32 = 3; pub const MP_LINTSRC: u32 = 4; pub const MP_TRANSLATION: u32 = 192; pub const CPU_ENABLED: u32 = 1; pub const CPU_BOOTPROCESSOR: u32 = 2; pub const CPU_STEPPING_MASK: u32 = 15; pub const CPU_MODEL_MASK: u32 = 240; pub const CPU_FAMILY_MASK: u32 = 3840; pub const BUSTYPE_EISA: &[u8; 5] = b"EISA\0"; pub const BUSTYPE_ISA: &[u8; 4] = b"ISA\0"; pub const BUSTYPE_INTERN: &[u8; 7] = b"INTERN\0"; pub const BUSTYPE_MCA: &[u8; 4] = b"MCA\0"; pub const BUSTYPE_VL: &[u8; 3] = b"VL\0"; pub const BUSTYPE_PCI: &[u8; 4] = b"PCI\0"; pub const BUSTYPE_PCMCIA: &[u8; 7] = b"PCMCIA\0"; pub const BUSTYPE_CBUS: &[u8; 5] = b"CBUS\0"; pub const BUSTYPE_CBUSII: &[u8; 7] = b"CBUSII\0"; pub const BUSTYPE_FUTURE: &[u8; 7] = b"FUTURE\0"; pub const BUSTYPE_MBI: &[u8; 4] = b"MBI\0"; pub const BUSTYPE_MBII: &[u8; 5] = b"MBII\0"; pub const BUSTYPE_MPI: &[u8; 4] = b"MPI\0"; pub const BUSTYPE_MPSA: &[u8; 5] = b"MPSA\0"; pub const BUSTYPE_NUBUS: &[u8; 6] = b"NUBUS\0"; pub const BUSTYPE_TC: &[u8; 3] = b"TC\0"; pub const BUSTYPE_VME: &[u8; 4] = b"VME\0"; pub const BUSTYPE_XPRESS: &[u8; 7] = b"XPRESS\0"; pub const MPC_APIC_USABLE: u32 = 1; pub const MP_IRQPOL_DEFAULT: u32 = 0; pub const MP_IRQPOL_ACTIVE_HIGH: u32 = 1; pub const MP_IRQPOL_RESERVED: u32 = 2; pub const MP_IRQPOL_ACTIVE_LOW: u32 = 3; pub const MP_IRQPOL_MASK: u32 = 3; pub const MP_IRQTRIG_DEFAULT: u32 = 0; pub const MP_IRQTRIG_EDGE: u32 = 4; pub const MP_IRQTRIG_RESERVED: u32 = 8; pub const MP_IRQTRIG_LEVEL: u32 = 12; pub const MP_IRQTRIG_MASK: u32 = 12; pub const MP_APIC_ALL: u32 = 255; pub const MPC_OEM_SIGNATURE: &[u8; 5] = b"_OEM\0"; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct mpf_intel { pub signature: [::std::os::raw::c_char; 4usize], pub physptr: ::std::os::raw::c_uint, pub length: ::std::os::raw::c_uchar, pub specification: ::std::os::raw::c_uchar, pub checksum: ::std::os::raw::c_uchar, pub feature1: ::std::os::raw::c_uchar, pub feature2: ::std::os::raw::c_uchar, pub feature3: ::std::os::raw::c_uchar, pub feature4: ::std::os::raw::c_uchar, pub feature5: ::std::os::raw::c_uchar, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of mpf_intel"][::std::mem::size_of::() - 16usize]; ["Alignment of mpf_intel"][::std::mem::align_of::() - 4usize]; ["Offset of field: mpf_intel::signature"] [::std::mem::offset_of!(mpf_intel, signature) - 0usize]; ["Offset of field: mpf_intel::physptr"][::std::mem::offset_of!(mpf_intel, physptr) - 4usize]; ["Offset of field: mpf_intel::length"][::std::mem::offset_of!(mpf_intel, length) - 8usize]; ["Offset of field: mpf_intel::specification"] [::std::mem::offset_of!(mpf_intel, specification) - 9usize]; ["Offset of field: mpf_intel::checksum"][::std::mem::offset_of!(mpf_intel, checksum) - 10usize]; ["Offset of field: mpf_intel::feature1"][::std::mem::offset_of!(mpf_intel, feature1) - 11usize]; ["Offset of field: mpf_intel::feature2"][::std::mem::offset_of!(mpf_intel, feature2) - 12usize]; ["Offset of field: mpf_intel::feature3"][::std::mem::offset_of!(mpf_intel, feature3) - 13usize]; ["Offset of field: mpf_intel::feature4"][::std::mem::offset_of!(mpf_intel, feature4) - 14usize]; ["Offset of field: mpf_intel::feature5"][::std::mem::offset_of!(mpf_intel, feature5) - 15usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct mpc_table { pub signature: [::std::os::raw::c_char; 4usize], pub length: ::std::os::raw::c_ushort, pub spec: ::std::os::raw::c_char, pub checksum: ::std::os::raw::c_char, pub oem: [::std::os::raw::c_char; 8usize], pub productid: [::std::os::raw::c_char; 12usize], pub oemptr: ::std::os::raw::c_uint, pub oemsize: ::std::os::raw::c_ushort, pub oemcount: ::std::os::raw::c_ushort, pub lapic: ::std::os::raw::c_uint, pub reserved: ::std::os::raw::c_uint, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of mpc_table"][::std::mem::size_of::() - 44usize]; ["Alignment of mpc_table"][::std::mem::align_of::() - 4usize]; ["Offset of field: mpc_table::signature"] [::std::mem::offset_of!(mpc_table, signature) - 0usize]; ["Offset of field: mpc_table::length"][::std::mem::offset_of!(mpc_table, length) - 4usize]; ["Offset of field: mpc_table::spec"][::std::mem::offset_of!(mpc_table, spec) - 6usize]; ["Offset of field: mpc_table::checksum"][::std::mem::offset_of!(mpc_table, checksum) - 7usize]; ["Offset of field: mpc_table::oem"][::std::mem::offset_of!(mpc_table, oem) - 8usize]; ["Offset of field: mpc_table::productid"] [::std::mem::offset_of!(mpc_table, productid) - 16usize]; ["Offset of field: mpc_table::oemptr"][::std::mem::offset_of!(mpc_table, oemptr) - 28usize]; ["Offset of field: mpc_table::oemsize"][::std::mem::offset_of!(mpc_table, oemsize) - 32usize]; ["Offset of field: mpc_table::oemcount"][::std::mem::offset_of!(mpc_table, oemcount) - 34usize]; ["Offset of field: mpc_table::lapic"][::std::mem::offset_of!(mpc_table, lapic) - 36usize]; ["Offset of field: mpc_table::reserved"][::std::mem::offset_of!(mpc_table, reserved) - 40usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct mpc_cpu { pub type_: ::std::os::raw::c_uchar, pub apicid: ::std::os::raw::c_uchar, pub apicver: ::std::os::raw::c_uchar, pub cpuflag: ::std::os::raw::c_uchar, pub cpufeature: ::std::os::raw::c_uint, pub featureflag: ::std::os::raw::c_uint, pub reserved: [::std::os::raw::c_uint; 2usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of mpc_cpu"][::std::mem::size_of::() - 20usize]; ["Alignment of mpc_cpu"][::std::mem::align_of::() - 4usize]; ["Offset of field: mpc_cpu::type_"][::std::mem::offset_of!(mpc_cpu, type_) - 0usize]; ["Offset of field: mpc_cpu::apicid"][::std::mem::offset_of!(mpc_cpu, apicid) - 1usize]; ["Offset of field: mpc_cpu::apicver"][::std::mem::offset_of!(mpc_cpu, apicver) - 2usize]; ["Offset of field: mpc_cpu::cpuflag"][::std::mem::offset_of!(mpc_cpu, cpuflag) - 3usize]; ["Offset of field: mpc_cpu::cpufeature"][::std::mem::offset_of!(mpc_cpu, cpufeature) - 4usize]; ["Offset of field: mpc_cpu::featureflag"] [::std::mem::offset_of!(mpc_cpu, featureflag) - 8usize]; ["Offset of field: mpc_cpu::reserved"][::std::mem::offset_of!(mpc_cpu, reserved) - 12usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct mpc_bus { pub type_: ::std::os::raw::c_uchar, pub busid: ::std::os::raw::c_uchar, pub bustype: [::std::os::raw::c_uchar; 6usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of mpc_bus"][::std::mem::size_of::() - 8usize]; ["Alignment of mpc_bus"][::std::mem::align_of::() - 1usize]; ["Offset of field: mpc_bus::type_"][::std::mem::offset_of!(mpc_bus, type_) - 0usize]; ["Offset of field: mpc_bus::busid"][::std::mem::offset_of!(mpc_bus, busid) - 1usize]; ["Offset of field: mpc_bus::bustype"][::std::mem::offset_of!(mpc_bus, bustype) - 2usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct mpc_ioapic { pub type_: ::std::os::raw::c_uchar, pub apicid: ::std::os::raw::c_uchar, pub apicver: ::std::os::raw::c_uchar, pub flags: ::std::os::raw::c_uchar, pub apicaddr: ::std::os::raw::c_uint, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of mpc_ioapic"][::std::mem::size_of::() - 8usize]; ["Alignment of mpc_ioapic"][::std::mem::align_of::() - 4usize]; ["Offset of field: mpc_ioapic::type_"][::std::mem::offset_of!(mpc_ioapic, type_) - 0usize]; ["Offset of field: mpc_ioapic::apicid"][::std::mem::offset_of!(mpc_ioapic, apicid) - 1usize]; ["Offset of field: mpc_ioapic::apicver"][::std::mem::offset_of!(mpc_ioapic, apicver) - 2usize]; ["Offset of field: mpc_ioapic::flags"][::std::mem::offset_of!(mpc_ioapic, flags) - 3usize]; ["Offset of field: mpc_ioapic::apicaddr"] [::std::mem::offset_of!(mpc_ioapic, apicaddr) - 4usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct mpc_intsrc { pub type_: ::std::os::raw::c_uchar, pub irqtype: ::std::os::raw::c_uchar, pub irqflag: ::std::os::raw::c_ushort, pub srcbus: ::std::os::raw::c_uchar, pub srcbusirq: ::std::os::raw::c_uchar, pub dstapic: ::std::os::raw::c_uchar, pub dstirq: ::std::os::raw::c_uchar, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of mpc_intsrc"][::std::mem::size_of::() - 8usize]; ["Alignment of mpc_intsrc"][::std::mem::align_of::() - 2usize]; ["Offset of field: mpc_intsrc::type_"][::std::mem::offset_of!(mpc_intsrc, type_) - 0usize]; ["Offset of field: mpc_intsrc::irqtype"][::std::mem::offset_of!(mpc_intsrc, irqtype) - 1usize]; ["Offset of field: mpc_intsrc::irqflag"][::std::mem::offset_of!(mpc_intsrc, irqflag) - 2usize]; ["Offset of field: mpc_intsrc::srcbus"][::std::mem::offset_of!(mpc_intsrc, srcbus) - 4usize]; ["Offset of field: mpc_intsrc::srcbusirq"] [::std::mem::offset_of!(mpc_intsrc, srcbusirq) - 5usize]; ["Offset of field: mpc_intsrc::dstapic"][::std::mem::offset_of!(mpc_intsrc, dstapic) - 6usize]; ["Offset of field: mpc_intsrc::dstirq"][::std::mem::offset_of!(mpc_intsrc, dstirq) - 7usize]; }; pub mod mp_irq_source_types { pub type Type = ::std::os::raw::c_uint; pub const mp_INT: Type = 0; pub const mp_NMI: Type = 1; pub const mp_SMI: Type = 2; pub const mp_ExtINT: Type = 3; } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct mpc_lintsrc { pub type_: ::std::os::raw::c_uchar, pub irqtype: ::std::os::raw::c_uchar, pub irqflag: ::std::os::raw::c_ushort, pub srcbusid: ::std::os::raw::c_uchar, pub srcbusirq: ::std::os::raw::c_uchar, pub destapic: ::std::os::raw::c_uchar, pub destapiclint: ::std::os::raw::c_uchar, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of mpc_lintsrc"][::std::mem::size_of::() - 8usize]; ["Alignment of mpc_lintsrc"][::std::mem::align_of::() - 2usize]; ["Offset of field: mpc_lintsrc::type_"][::std::mem::offset_of!(mpc_lintsrc, type_) - 0usize]; ["Offset of field: mpc_lintsrc::irqtype"] [::std::mem::offset_of!(mpc_lintsrc, irqtype) - 1usize]; ["Offset of field: mpc_lintsrc::irqflag"] [::std::mem::offset_of!(mpc_lintsrc, irqflag) - 2usize]; ["Offset of field: mpc_lintsrc::srcbusid"] [::std::mem::offset_of!(mpc_lintsrc, srcbusid) - 4usize]; ["Offset of field: mpc_lintsrc::srcbusirq"] [::std::mem::offset_of!(mpc_lintsrc, srcbusirq) - 5usize]; ["Offset of field: mpc_lintsrc::destapic"] [::std::mem::offset_of!(mpc_lintsrc, destapic) - 6usize]; ["Offset of field: mpc_lintsrc::destapiclint"] [::std::mem::offset_of!(mpc_lintsrc, destapiclint) - 7usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct mpc_oemtable { pub signature: [::std::os::raw::c_char; 4usize], pub length: ::std::os::raw::c_ushort, pub rev: ::std::os::raw::c_char, pub checksum: ::std::os::raw::c_char, pub mpc: [::std::os::raw::c_char; 8usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of mpc_oemtable"][::std::mem::size_of::() - 16usize]; ["Alignment of mpc_oemtable"][::std::mem::align_of::() - 2usize]; ["Offset of field: mpc_oemtable::signature"] [::std::mem::offset_of!(mpc_oemtable, signature) - 0usize]; ["Offset of field: mpc_oemtable::length"] [::std::mem::offset_of!(mpc_oemtable, length) - 4usize]; ["Offset of field: mpc_oemtable::rev"][::std::mem::offset_of!(mpc_oemtable, rev) - 6usize]; ["Offset of field: mpc_oemtable::checksum"] [::std::mem::offset_of!(mpc_oemtable, checksum) - 7usize]; ["Offset of field: mpc_oemtable::mpc"][::std::mem::offset_of!(mpc_oemtable, mpc) - 8usize]; }; pub mod mp_bustype { pub type Type = ::std::os::raw::c_uint; pub const MP_BUS_ISA: Type = 1; pub const MP_BUS_EISA: Type = 2; pub const MP_BUS_PCI: Type = 3; } ================================================ FILE: src/vmm/src/arch/x86_64/generated/msr_index.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const MSR_EFER: u32 = 0xc0000080; pub const MSR_STAR: u32 = 0xc0000081; pub const MSR_LSTAR: u32 = 0xc0000082; pub const MSR_CSTAR: u32 = 0xc0000083; pub const MSR_SYSCALL_MASK: u32 = 0xc0000084; pub const MSR_FS_BASE: u32 = 0xc0000100; pub const MSR_GS_BASE: u32 = 0xc0000101; pub const MSR_KERNEL_GS_BASE: u32 = 0xc0000102; pub const MSR_TSC_AUX: u32 = 0xc0000103; pub const MSR_IA32_FRED_RSP0: u32 = 0x1cc; pub const MSR_IA32_FRED_RSP1: u32 = 0x1cd; pub const MSR_IA32_FRED_RSP2: u32 = 0x1ce; pub const MSR_IA32_FRED_RSP3: u32 = 0x1cf; pub const MSR_IA32_FRED_STKLVLS: u32 = 0x1d0; pub const MSR_IA32_FRED_SSP1: u32 = 0x1d1; pub const MSR_IA32_FRED_SSP2: u32 = 0x1d2; pub const MSR_IA32_FRED_SSP3: u32 = 0x1d3; pub const MSR_IA32_FRED_CONFIG: u32 = 0x1d4; pub const MSR_TEST_CTRL: u32 = 0x33; pub const MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT: u32 = 0x1d; pub const MSR_IA32_SPEC_CTRL: u32 = 0x48; pub const MSR_IA32_PRED_CMD: u32 = 0x49; pub const MSR_PPIN_CTL: u32 = 0x4e; pub const MSR_PPIN: u32 = 0x4f; pub const MSR_IA32_PERFCTR0: u32 = 0xc1; pub const MSR_IA32_PERFCTR1: u32 = 0xc2; pub const MSR_FSB_FREQ: u32 = 0xcd; pub const MSR_PLATFORM_INFO: u32 = 0xce; pub const MSR_PLATFORM_INFO_CPUID_FAULT_BIT: u32 = 0x1f; pub const MSR_IA32_UMWAIT_CONTROL: u32 = 0xe1; pub const MSR_IA32_UMWAIT_CONTROL_TIME_MASK: i32 = -4; pub const MSR_IA32_CORE_CAPS: u32 = 0xcf; pub const MSR_IA32_CORE_CAPS_INTEGRITY_CAPS_BIT: u32 = 0x2; pub const MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT: u32 = 0x5; pub const MSR_PKG_CST_CONFIG_CONTROL: u32 = 0xe2; pub const MSR_MTRRcap: u32 = 0xfe; pub const MSR_IA32_ARCH_CAPABILITIES: u32 = 0x10a; pub const MSR_IA32_FLUSH_CMD: u32 = 0x10b; pub const MSR_IA32_BBL_CR_CTL: u32 = 0x119; pub const MSR_IA32_BBL_CR_CTL3: u32 = 0x11e; pub const MSR_IA32_TSX_CTRL: u32 = 0x122; pub const MSR_IA32_MCU_OPT_CTRL: u32 = 0x123; pub const MSR_IA32_SYSENTER_CS: u32 = 0x174; pub const MSR_IA32_SYSENTER_ESP: u32 = 0x175; pub const MSR_IA32_SYSENTER_EIP: u32 = 0x176; pub const MSR_IA32_MCG_CAP: u32 = 0x179; pub const MSR_IA32_MCG_STATUS: u32 = 0x17a; pub const MSR_IA32_MCG_CTL: u32 = 0x17b; pub const MSR_ERROR_CONTROL: u32 = 0x17f; pub const MSR_IA32_MCG_EXT_CTL: u32 = 0x4d0; pub const MSR_OFFCORE_RSP_0: u32 = 0x1a6; pub const MSR_OFFCORE_RSP_1: u32 = 0x1a7; pub const MSR_TURBO_RATIO_LIMIT: u32 = 0x1ad; pub const MSR_TURBO_RATIO_LIMIT1: u32 = 0x1ae; pub const MSR_TURBO_RATIO_LIMIT2: u32 = 0x1af; pub const MSR_SNOOP_RSP_0: u32 = 0x1328; pub const MSR_SNOOP_RSP_1: u32 = 0x1329; pub const MSR_LBR_SELECT: u32 = 0x1c8; pub const MSR_LBR_TOS: u32 = 0x1c9; pub const MSR_IA32_POWER_CTL: u32 = 0x1fc; pub const MSR_IA32_POWER_CTL_BIT_EE: u32 = 0x13; pub const MSR_INTEGRITY_CAPS: u32 = 0x2d9; pub const MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT: u32 = 0x2; pub const MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT: u32 = 0x4; pub const MSR_INTEGRITY_CAPS_SBAF_BIT: u32 = 0x8; pub const MSR_LBR_NHM_FROM: u32 = 0x680; pub const MSR_LBR_NHM_TO: u32 = 0x6c0; pub const MSR_LBR_CORE_FROM: u32 = 0x40; pub const MSR_LBR_CORE_TO: u32 = 0x60; pub const MSR_LBR_INFO_0: u32 = 0xdc0; pub const MSR_ARCH_LBR_CTL: u32 = 0x14ce; pub const MSR_ARCH_LBR_DEPTH: u32 = 0x14cf; pub const MSR_ARCH_LBR_FROM_0: u32 = 0x1500; pub const MSR_ARCH_LBR_TO_0: u32 = 0x1600; pub const MSR_ARCH_LBR_INFO_0: u32 = 0x1200; pub const MSR_IA32_PEBS_ENABLE: u32 = 0x3f1; pub const MSR_PEBS_DATA_CFG: u32 = 0x3f2; pub const MSR_IA32_DS_AREA: u32 = 0x600; pub const MSR_IA32_PERF_CAPABILITIES: u32 = 0x345; pub const MSR_PEBS_LD_LAT_THRESHOLD: u32 = 0x3f6; pub const MSR_IA32_RTIT_CTL: u32 = 0x570; pub const MSR_IA32_RTIT_STATUS: u32 = 0x571; pub const MSR_IA32_RTIT_ADDR0_A: u32 = 0x580; pub const MSR_IA32_RTIT_ADDR0_B: u32 = 0x581; pub const MSR_IA32_RTIT_ADDR1_A: u32 = 0x582; pub const MSR_IA32_RTIT_ADDR1_B: u32 = 0x583; pub const MSR_IA32_RTIT_ADDR2_A: u32 = 0x584; pub const MSR_IA32_RTIT_ADDR2_B: u32 = 0x585; pub const MSR_IA32_RTIT_ADDR3_A: u32 = 0x586; pub const MSR_IA32_RTIT_ADDR3_B: u32 = 0x587; pub const MSR_IA32_RTIT_CR3_MATCH: u32 = 0x572; pub const MSR_IA32_RTIT_OUTPUT_BASE: u32 = 0x560; pub const MSR_IA32_RTIT_OUTPUT_MASK: u32 = 0x561; pub const MSR_MTRRfix64K_00000: u32 = 0x250; pub const MSR_MTRRfix16K_80000: u32 = 0x258; pub const MSR_MTRRfix16K_A0000: u32 = 0x259; pub const MSR_MTRRfix4K_C0000: u32 = 0x268; pub const MSR_MTRRfix4K_C8000: u32 = 0x269; pub const MSR_MTRRfix4K_D0000: u32 = 0x26a; pub const MSR_MTRRfix4K_D8000: u32 = 0x26b; pub const MSR_MTRRfix4K_E0000: u32 = 0x26c; pub const MSR_MTRRfix4K_E8000: u32 = 0x26d; pub const MSR_MTRRfix4K_F0000: u32 = 0x26e; pub const MSR_MTRRfix4K_F8000: u32 = 0x26f; pub const MSR_MTRRdefType: u32 = 0x2ff; pub const MSR_IA32_CR_PAT: u32 = 0x277; pub const MSR_IA32_DEBUGCTLMSR: u32 = 0x1d9; pub const MSR_IA32_LASTBRANCHFROMIP: u32 = 0x1db; pub const MSR_IA32_LASTBRANCHTOIP: u32 = 0x1dc; pub const MSR_IA32_LASTINTFROMIP: u32 = 0x1dd; pub const MSR_IA32_LASTINTTOIP: u32 = 0x1de; pub const MSR_IA32_PASID: u32 = 0xd93; pub const MSR_PEBS_FRONTEND: u32 = 0x3f7; pub const MSR_IA32_MC0_CTL: u32 = 0x400; pub const MSR_IA32_MC0_STATUS: u32 = 0x401; pub const MSR_IA32_MC0_ADDR: u32 = 0x402; pub const MSR_IA32_MC0_MISC: u32 = 0x403; pub const MSR_PKG_C3_RESIDENCY: u32 = 0x3f8; pub const MSR_PKG_C6_RESIDENCY: u32 = 0x3f9; pub const MSR_ATOM_PKG_C6_RESIDENCY: u32 = 0x3fa; pub const MSR_PKG_C7_RESIDENCY: u32 = 0x3fa; pub const MSR_CORE_C3_RESIDENCY: u32 = 0x3fc; pub const MSR_CORE_C6_RESIDENCY: u32 = 0x3fd; pub const MSR_CORE_C7_RESIDENCY: u32 = 0x3fe; pub const MSR_KNL_CORE_C6_RESIDENCY: u32 = 0x3ff; pub const MSR_PKG_C2_RESIDENCY: u32 = 0x60d; pub const MSR_PKG_C8_RESIDENCY: u32 = 0x630; pub const MSR_PKG_C9_RESIDENCY: u32 = 0x631; pub const MSR_PKG_C10_RESIDENCY: u32 = 0x632; pub const MSR_PKGC3_IRTL: u32 = 0x60a; pub const MSR_PKGC6_IRTL: u32 = 0x60b; pub const MSR_PKGC7_IRTL: u32 = 0x60c; pub const MSR_PKGC8_IRTL: u32 = 0x633; pub const MSR_PKGC9_IRTL: u32 = 0x634; pub const MSR_PKGC10_IRTL: u32 = 0x635; pub const MSR_VR_CURRENT_CONFIG: u32 = 0x601; pub const MSR_RAPL_POWER_UNIT: u32 = 0x606; pub const MSR_PKG_POWER_LIMIT: u32 = 0x610; pub const MSR_PKG_ENERGY_STATUS: u32 = 0x611; pub const MSR_PKG_PERF_STATUS: u32 = 0x613; pub const MSR_PKG_POWER_INFO: u32 = 0x614; pub const MSR_DRAM_POWER_LIMIT: u32 = 0x618; pub const MSR_DRAM_ENERGY_STATUS: u32 = 0x619; pub const MSR_DRAM_PERF_STATUS: u32 = 0x61b; pub const MSR_DRAM_POWER_INFO: u32 = 0x61c; pub const MSR_PP0_POWER_LIMIT: u32 = 0x638; pub const MSR_PP0_ENERGY_STATUS: u32 = 0x639; pub const MSR_PP0_POLICY: u32 = 0x63a; pub const MSR_PP0_PERF_STATUS: u32 = 0x63b; pub const MSR_PP1_POWER_LIMIT: u32 = 0x640; pub const MSR_PP1_ENERGY_STATUS: u32 = 0x641; pub const MSR_PP1_POLICY: u32 = 0x642; pub const MSR_AMD_RAPL_POWER_UNIT: u32 = 0xc0010299; pub const MSR_AMD_CORE_ENERGY_STATUS: u32 = 0xc001029a; pub const MSR_AMD_PKG_ENERGY_STATUS: u32 = 0xc001029b; pub const MSR_CONFIG_TDP_NOMINAL: u32 = 0x648; pub const MSR_CONFIG_TDP_LEVEL_1: u32 = 0x649; pub const MSR_CONFIG_TDP_LEVEL_2: u32 = 0x64a; pub const MSR_CONFIG_TDP_CONTROL: u32 = 0x64b; pub const MSR_TURBO_ACTIVATION_RATIO: u32 = 0x64c; pub const MSR_PLATFORM_ENERGY_STATUS: u32 = 0x64d; pub const MSR_SECONDARY_TURBO_RATIO_LIMIT: u32 = 0x650; pub const MSR_PKG_WEIGHTED_CORE_C0_RES: u32 = 0x658; pub const MSR_PKG_ANY_CORE_C0_RES: u32 = 0x659; pub const MSR_PKG_ANY_GFXE_C0_RES: u32 = 0x65a; pub const MSR_PKG_BOTH_CORE_GFXE_C0_RES: u32 = 0x65b; pub const MSR_CORE_C1_RES: u32 = 0x660; pub const MSR_MODULE_C6_RES_MS: u32 = 0x664; pub const MSR_CC6_DEMOTION_POLICY_CONFIG: u32 = 0x668; pub const MSR_MC6_DEMOTION_POLICY_CONFIG: u32 = 0x669; pub const MSR_ATOM_CORE_RATIOS: u32 = 0x66a; pub const MSR_ATOM_CORE_VIDS: u32 = 0x66b; pub const MSR_ATOM_CORE_TURBO_RATIOS: u32 = 0x66c; pub const MSR_ATOM_CORE_TURBO_VIDS: u32 = 0x66d; pub const MSR_CORE_PERF_LIMIT_REASONS: u32 = 0x690; pub const MSR_GFX_PERF_LIMIT_REASONS: u32 = 0x6b0; pub const MSR_RING_PERF_LIMIT_REASONS: u32 = 0x6b1; pub const MSR_IA32_U_CET: u32 = 0x6a0; pub const MSR_IA32_S_CET: u32 = 0x6a2; pub const MSR_IA32_PL0_SSP: u32 = 0x6a4; pub const MSR_IA32_PL1_SSP: u32 = 0x6a5; pub const MSR_IA32_PL2_SSP: u32 = 0x6a6; pub const MSR_IA32_PL3_SSP: u32 = 0x6a7; pub const MSR_IA32_INT_SSP_TAB: u32 = 0x6a8; pub const MSR_PPERF: u32 = 0x64e; pub const MSR_PERF_LIMIT_REASONS: u32 = 0x64f; pub const MSR_PM_ENABLE: u32 = 0x770; pub const MSR_HWP_CAPABILITIES: u32 = 0x771; pub const MSR_HWP_REQUEST_PKG: u32 = 0x772; pub const MSR_HWP_INTERRUPT: u32 = 0x773; pub const MSR_HWP_REQUEST: u32 = 0x774; pub const MSR_HWP_STATUS: u32 = 0x777; pub const MSR_AMD64_MC0_MASK: u32 = 0xc0010044; pub const MSR_IA32_MC0_CTL2: u32 = 0x280; pub const MSR_P6_PERFCTR0: u32 = 0xc1; pub const MSR_P6_PERFCTR1: u32 = 0xc2; pub const MSR_P6_EVNTSEL0: u32 = 0x186; pub const MSR_P6_EVNTSEL1: u32 = 0x187; pub const MSR_KNC_PERFCTR0: u32 = 0x20; pub const MSR_KNC_PERFCTR1: u32 = 0x21; pub const MSR_KNC_EVNTSEL0: u32 = 0x28; pub const MSR_KNC_EVNTSEL1: u32 = 0x29; pub const MSR_IA32_PMC0: u32 = 0x4c1; pub const MSR_RELOAD_PMC0: u32 = 0x14c1; pub const MSR_RELOAD_FIXED_CTR0: u32 = 0x1309; pub const MSR_IA32_PMC_V6_GP0_CTR: u32 = 0x1900; pub const MSR_IA32_PMC_V6_GP0_CFG_A: u32 = 0x1901; pub const MSR_IA32_PMC_V6_FX0_CTR: u32 = 0x1980; pub const MSR_IA32_PMC_V6_STEP: u32 = 0x4; pub const MSR_IA32_MKTME_KEYID_PARTITIONING: u32 = 0x87; pub const MSR_AMD64_PATCH_LEVEL: u32 = 0x8b; pub const MSR_AMD64_TSC_RATIO: u32 = 0xc0000104; pub const MSR_AMD64_NB_CFG: u32 = 0xc001001f; pub const MSR_AMD64_PATCH_LOADER: u32 = 0xc0010020; pub const MSR_AMD_PERF_CTL: u32 = 0xc0010062; pub const MSR_AMD_PERF_STATUS: u32 = 0xc0010063; pub const MSR_AMD_PSTATE_DEF_BASE: u32 = 0xc0010064; pub const MSR_AMD64_OSVW_ID_LENGTH: u32 = 0xc0010140; pub const MSR_AMD64_OSVW_STATUS: u32 = 0xc0010141; pub const MSR_AMD_PPIN_CTL: u32 = 0xc00102f0; pub const MSR_AMD_PPIN: u32 = 0xc00102f1; pub const MSR_AMD64_CPUID_FN_1: u32 = 0xc0011004; pub const MSR_AMD64_LS_CFG: u32 = 0xc0011020; pub const MSR_AMD64_DC_CFG: u32 = 0xc0011022; pub const MSR_AMD64_TW_CFG: u32 = 0xc0011023; pub const MSR_AMD64_DE_CFG: u32 = 0xc0011029; pub const MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT: u32 = 0x1; pub const MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT: u32 = 0x9; pub const MSR_AMD64_BU_CFG2: u32 = 0xc001102a; pub const MSR_AMD64_IBSFETCHCTL: u32 = 0xc0011030; pub const MSR_AMD64_IBSFETCHLINAD: u32 = 0xc0011031; pub const MSR_AMD64_IBSFETCHPHYSAD: u32 = 0xc0011032; pub const MSR_AMD64_IBSFETCH_REG_COUNT: u32 = 0x3; pub const MSR_AMD64_IBSFETCH_REG_MASK: u32 = 0x7; pub const MSR_AMD64_IBSOPCTL: u32 = 0xc0011033; pub const MSR_AMD64_IBSOPRIP: u32 = 0xc0011034; pub const MSR_AMD64_IBSOPDATA: u32 = 0xc0011035; pub const MSR_AMD64_IBSOPDATA2: u32 = 0xc0011036; pub const MSR_AMD64_IBSOPDATA3: u32 = 0xc0011037; pub const MSR_AMD64_IBSDCLINAD: u32 = 0xc0011038; pub const MSR_AMD64_IBSDCPHYSAD: u32 = 0xc0011039; pub const MSR_AMD64_IBSOP_REG_COUNT: u32 = 0x7; pub const MSR_AMD64_IBSOP_REG_MASK: u32 = 0x7f; pub const MSR_AMD64_IBSCTL: u32 = 0xc001103a; pub const MSR_AMD64_IBSBRTARGET: u32 = 0xc001103b; pub const MSR_AMD64_ICIBSEXTDCTL: u32 = 0xc001103c; pub const MSR_AMD64_IBSOPDATA4: u32 = 0xc001103d; pub const MSR_AMD64_IBS_REG_COUNT_MAX: u32 = 0x8; pub const MSR_AMD64_SVM_AVIC_DOORBELL: u32 = 0xc001011b; pub const MSR_AMD64_VM_PAGE_FLUSH: u32 = 0xc001011e; pub const MSR_AMD64_SEV_ES_GHCB: u32 = 0xc0010130; pub const MSR_AMD64_SEV: u32 = 0xc0010131; pub const MSR_AMD64_SEV_ENABLED_BIT: u32 = 0x0; pub const MSR_AMD64_SEV_ES_ENABLED_BIT: u32 = 0x1; pub const MSR_AMD64_SEV_SNP_ENABLED_BIT: u32 = 0x2; pub const MSR_AMD64_SNP_VTOM_BIT: u32 = 0x3; pub const MSR_AMD64_SNP_REFLECT_VC_BIT: u32 = 0x4; pub const MSR_AMD64_SNP_RESTRICTED_INJ_BIT: u32 = 0x5; pub const MSR_AMD64_SNP_ALT_INJ_BIT: u32 = 0x6; pub const MSR_AMD64_SNP_DEBUG_SWAP_BIT: u32 = 0x7; pub const MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT: u32 = 0x8; pub const MSR_AMD64_SNP_BTB_ISOLATION_BIT: u32 = 0x9; pub const MSR_AMD64_SNP_VMPL_SSS_BIT: u32 = 0xa; pub const MSR_AMD64_SNP_SECURE_TSC_BIT: u32 = 0xb; pub const MSR_AMD64_SNP_VMGEXIT_PARAM_BIT: u32 = 0xc; pub const MSR_AMD64_SNP_IBS_VIRT_BIT: u32 = 0xe; pub const MSR_AMD64_SNP_VMSA_REG_PROT_BIT: u32 = 0x10; pub const MSR_AMD64_SNP_SMT_PROT_BIT: u32 = 0x11; pub const MSR_AMD64_SNP_RESV_BIT: u32 = 0x12; pub const MSR_AMD64_VIRT_SPEC_CTRL: u32 = 0xc001011f; pub const MSR_AMD64_RMP_BASE: u32 = 0xc0010132; pub const MSR_AMD64_RMP_END: u32 = 0xc0010133; pub const MSR_SVSM_CAA: u32 = 0xc001f000; pub const MSR_AMD_CPPC_CAP1: u32 = 0xc00102b0; pub const MSR_AMD_CPPC_ENABLE: u32 = 0xc00102b1; pub const MSR_AMD_CPPC_CAP2: u32 = 0xc00102b2; pub const MSR_AMD_CPPC_REQ: u32 = 0xc00102b3; pub const MSR_AMD_CPPC_STATUS: u32 = 0xc00102b4; pub const MSR_AMD64_PERF_CNTR_GLOBAL_STATUS: u32 = 0xc0000300; pub const MSR_AMD64_PERF_CNTR_GLOBAL_CTL: u32 = 0xc0000301; pub const MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR: u32 = 0xc0000302; pub const MSR_AMD64_LBR_SELECT: u32 = 0xc000010e; pub const MSR_ZEN4_BP_CFG: u32 = 0xc001102e; pub const MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT: u32 = 0x5; pub const MSR_F19H_UMC_PERF_CTL: u32 = 0xc0010800; pub const MSR_F19H_UMC_PERF_CTR: u32 = 0xc0010801; pub const MSR_ZEN2_SPECTRAL_CHICKEN: u32 = 0xc00110e3; pub const MSR_F17H_IRPERF: u32 = 0xc00000e9; pub const MSR_F16H_L2I_PERF_CTL: u32 = 0xc0010230; pub const MSR_F16H_L2I_PERF_CTR: u32 = 0xc0010231; pub const MSR_F16H_DR1_ADDR_MASK: u32 = 0xc0011019; pub const MSR_F16H_DR2_ADDR_MASK: u32 = 0xc001101a; pub const MSR_F16H_DR3_ADDR_MASK: u32 = 0xc001101b; pub const MSR_F16H_DR0_ADDR_MASK: u32 = 0xc0011027; pub const MSR_F15H_CU_PWR_ACCUMULATOR: u32 = 0xc001007a; pub const MSR_F15H_CU_MAX_PWR_ACCUMULATOR: u32 = 0xc001007b; pub const MSR_F15H_PERF_CTL: u32 = 0xc0010200; pub const MSR_F15H_PERF_CTL0: u32 = 0xc0010200; pub const MSR_F15H_PERF_CTL1: u32 = 0xc0010202; pub const MSR_F15H_PERF_CTL2: u32 = 0xc0010204; pub const MSR_F15H_PERF_CTL3: u32 = 0xc0010206; pub const MSR_F15H_PERF_CTL4: u32 = 0xc0010208; pub const MSR_F15H_PERF_CTL5: u32 = 0xc001020a; pub const MSR_F15H_PERF_CTR: u32 = 0xc0010201; pub const MSR_F15H_PERF_CTR0: u32 = 0xc0010201; pub const MSR_F15H_PERF_CTR1: u32 = 0xc0010203; pub const MSR_F15H_PERF_CTR2: u32 = 0xc0010205; pub const MSR_F15H_PERF_CTR3: u32 = 0xc0010207; pub const MSR_F15H_PERF_CTR4: u32 = 0xc0010209; pub const MSR_F15H_PERF_CTR5: u32 = 0xc001020b; pub const MSR_F15H_NB_PERF_CTL: u32 = 0xc0010240; pub const MSR_F15H_NB_PERF_CTR: u32 = 0xc0010241; pub const MSR_F15H_PTSC: u32 = 0xc0010280; pub const MSR_F15H_IC_CFG: u32 = 0xc0011021; pub const MSR_F15H_EX_CFG: u32 = 0xc001102c; pub const MSR_FAM10H_MMIO_CONF_BASE: u32 = 0xc0010058; pub const MSR_FAM10H_NODE_ID: u32 = 0xc001100c; pub const MSR_K8_TOP_MEM1: u32 = 0xc001001a; pub const MSR_K8_TOP_MEM2: u32 = 0xc001001d; pub const MSR_AMD64_SYSCFG: u32 = 0xc0010010; pub const MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT: u32 = 0x17; pub const MSR_AMD64_SYSCFG_SNP_EN_BIT: u32 = 0x18; pub const MSR_AMD64_SYSCFG_SNP_VMPL_EN_BIT: u32 = 0x19; pub const MSR_AMD64_SYSCFG_MFDM_BIT: u32 = 0x13; pub const MSR_K8_INT_PENDING_MSG: u32 = 0xc0010055; pub const MSR_K8_TSEG_ADDR: u32 = 0xc0010112; pub const MSR_K8_TSEG_MASK: u32 = 0xc0010113; pub const MSR_K7_EVNTSEL0: u32 = 0xc0010000; pub const MSR_K7_PERFCTR0: u32 = 0xc0010004; pub const MSR_K7_EVNTSEL1: u32 = 0xc0010001; pub const MSR_K7_PERFCTR1: u32 = 0xc0010005; pub const MSR_K7_EVNTSEL2: u32 = 0xc0010002; pub const MSR_K7_PERFCTR2: u32 = 0xc0010006; pub const MSR_K7_EVNTSEL3: u32 = 0xc0010003; pub const MSR_K7_PERFCTR3: u32 = 0xc0010007; pub const MSR_K7_CLK_CTL: u32 = 0xc001001b; pub const MSR_K7_HWCR: u32 = 0xc0010015; pub const MSR_K7_HWCR_SMMLOCK_BIT: u32 = 0x0; pub const MSR_K7_HWCR_IRPERF_EN_BIT: u32 = 0x1e; pub const MSR_K7_FID_VID_CTL: u32 = 0xc0010041; pub const MSR_K7_FID_VID_STATUS: u32 = 0xc0010042; pub const MSR_K7_HWCR_CPB_DIS_BIT: u32 = 0x19; pub const MSR_K6_WHCR: u32 = 0xc0000082; pub const MSR_K6_UWCCR: u32 = 0xc0000085; pub const MSR_K6_EPMR: u32 = 0xc0000086; pub const MSR_K6_PSOR: u32 = 0xc0000087; pub const MSR_K6_PFIR: u32 = 0xc0000088; pub const MSR_IDT_FCR1: u32 = 0x107; pub const MSR_IDT_FCR2: u32 = 0x108; pub const MSR_IDT_FCR3: u32 = 0x109; pub const MSR_IDT_FCR4: u32 = 0x10a; pub const MSR_IDT_MCR0: u32 = 0x110; pub const MSR_IDT_MCR1: u32 = 0x111; pub const MSR_IDT_MCR2: u32 = 0x112; pub const MSR_IDT_MCR3: u32 = 0x113; pub const MSR_IDT_MCR4: u32 = 0x114; pub const MSR_IDT_MCR5: u32 = 0x115; pub const MSR_IDT_MCR6: u32 = 0x116; pub const MSR_IDT_MCR7: u32 = 0x117; pub const MSR_IDT_MCR_CTRL: u32 = 0x120; pub const MSR_VIA_FCR: u32 = 0x1107; pub const MSR_VIA_LONGHAUL: u32 = 0x110a; pub const MSR_VIA_RNG: u32 = 0x110b; pub const MSR_VIA_BCR2: u32 = 0x1147; pub const MSR_TMTA_LONGRUN_CTRL: u32 = 0x80868010; pub const MSR_TMTA_LONGRUN_FLAGS: u32 = 0x80868011; pub const MSR_TMTA_LRTI_READOUT: u32 = 0x80868018; pub const MSR_TMTA_LRTI_VOLT_MHZ: u32 = 0x8086801a; pub const MSR_IA32_P5_MC_ADDR: u32 = 0x0; pub const MSR_IA32_P5_MC_TYPE: u32 = 0x1; pub const MSR_IA32_TSC: u32 = 0x10; pub const MSR_IA32_PLATFORM_ID: u32 = 0x17; pub const MSR_IA32_EBL_CR_POWERON: u32 = 0x2a; pub const MSR_EBC_FREQUENCY_ID: u32 = 0x2c; pub const MSR_SMI_COUNT: u32 = 0x34; pub const MSR_IA32_FEAT_CTL: u32 = 0x3a; pub const MSR_IA32_TSC_ADJUST: u32 = 0x3b; pub const MSR_IA32_BNDCFGS: u32 = 0xd90; pub const MSR_IA32_BNDCFGS_RSVD: u32 = 0xffc; pub const MSR_IA32_XFD: u32 = 0x1c4; pub const MSR_IA32_XFD_ERR: u32 = 0x1c5; pub const MSR_IA32_XSS: u32 = 0xda0; pub const MSR_IA32_APICBASE: u32 = 0x1b; pub const MSR_IA32_APICBASE_BSP: u32 = 0x100; pub const MSR_IA32_APICBASE_ENABLE: u32 = 0x800; pub const MSR_IA32_APICBASE_BASE: u32 = 0xfffff000; pub const MSR_IA32_UCODE_WRITE: u32 = 0x79; pub const MSR_IA32_UCODE_REV: u32 = 0x8b; pub const MSR_IA32_SGXLEPUBKEYHASH0: u32 = 0x8c; pub const MSR_IA32_SGXLEPUBKEYHASH1: u32 = 0x8d; pub const MSR_IA32_SGXLEPUBKEYHASH2: u32 = 0x8e; pub const MSR_IA32_SGXLEPUBKEYHASH3: u32 = 0x8f; pub const MSR_IA32_SMM_MONITOR_CTL: u32 = 0x9b; pub const MSR_IA32_SMBASE: u32 = 0x9e; pub const MSR_IA32_PERF_STATUS: u32 = 0x198; pub const MSR_IA32_PERF_CTL: u32 = 0x199; pub const MSR_AMD_DBG_EXTN_CFG: u32 = 0xc000010f; pub const MSR_AMD_SAMP_BR_FROM: u32 = 0xc0010300; pub const MSR_IA32_MPERF: u32 = 0xe7; pub const MSR_IA32_APERF: u32 = 0xe8; pub const MSR_IA32_THERM_CONTROL: u32 = 0x19a; pub const MSR_IA32_THERM_INTERRUPT: u32 = 0x19b; pub const MSR_IA32_THERM_STATUS: u32 = 0x19c; pub const MSR_THERM2_CTL: u32 = 0x19d; pub const MSR_THERM2_CTL_TM_SELECT: u32 = 0x10000; pub const MSR_IA32_MISC_ENABLE: u32 = 0x1a0; pub const MSR_IA32_TEMPERATURE_TARGET: u32 = 0x1a2; pub const MSR_MISC_FEATURE_CONTROL: u32 = 0x1a4; pub const MSR_MISC_PWR_MGMT: u32 = 0x1aa; pub const MSR_IA32_ENERGY_PERF_BIAS: u32 = 0x1b0; pub const MSR_IA32_PACKAGE_THERM_STATUS: u32 = 0x1b1; pub const MSR_IA32_PACKAGE_THERM_INTERRUPT: u32 = 0x1b2; pub const MSR_IA32_MISC_ENABLE_FAST_STRING_BIT: u32 = 0x0; pub const MSR_IA32_MISC_ENABLE_FAST_STRING: u32 = 0x1; pub const MSR_IA32_MISC_ENABLE_TCC_BIT: u32 = 0x1; pub const MSR_IA32_MISC_ENABLE_TCC: u32 = 0x2; pub const MSR_IA32_MISC_ENABLE_EMON_BIT: u32 = 0x7; pub const MSR_IA32_MISC_ENABLE_EMON: u32 = 0x80; pub const MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT: u32 = 0xb; pub const MSR_IA32_MISC_ENABLE_BTS_UNAVAIL: u32 = 0x800; pub const MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT: u32 = 0xc; pub const MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL: u32 = 0x1000; pub const MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT: u32 = 0x10; pub const MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP: u32 = 0x10000; pub const MSR_IA32_MISC_ENABLE_MWAIT_BIT: u32 = 0x12; pub const MSR_IA32_MISC_ENABLE_MWAIT: u32 = 0x40000; pub const MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT: u32 = 0x16; pub const MSR_IA32_MISC_ENABLE_LIMIT_CPUID: u32 = 0x400000; pub const MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT: u32 = 0x17; pub const MSR_IA32_MISC_ENABLE_XTPR_DISABLE: u32 = 0x800000; pub const MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT: u32 = 0x22; pub const MSR_IA32_MISC_ENABLE_XD_DISABLE: u64 = 0x400000000; pub const MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT: u32 = 0x2; pub const MSR_IA32_MISC_ENABLE_X87_COMPAT: u32 = 0x4; pub const MSR_IA32_MISC_ENABLE_TM1_BIT: u32 = 0x3; pub const MSR_IA32_MISC_ENABLE_TM1: u32 = 0x8; pub const MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT: u32 = 0x4; pub const MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE: u32 = 0x10; pub const MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT: u32 = 0x6; pub const MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE: u32 = 0x40; pub const MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT: u32 = 0x8; pub const MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK: u32 = 0x100; pub const MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT: u32 = 0x9; pub const MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE: u32 = 0x200; pub const MSR_IA32_MISC_ENABLE_FERR_BIT: u32 = 0xa; pub const MSR_IA32_MISC_ENABLE_FERR: u32 = 0x400; pub const MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT: u32 = 0xa; pub const MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX: u32 = 0x400; pub const MSR_IA32_MISC_ENABLE_TM2_BIT: u32 = 0xd; pub const MSR_IA32_MISC_ENABLE_TM2: u32 = 0x2000; pub const MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT: u32 = 0x13; pub const MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE: u32 = 0x80000; pub const MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT: u32 = 0x14; pub const MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK: u32 = 0x100000; pub const MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT: u32 = 0x18; pub const MSR_IA32_MISC_ENABLE_L1D_CONTEXT: u32 = 0x1000000; pub const MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT: u32 = 0x25; pub const MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE: u64 = 0x2000000000; pub const MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT: u32 = 0x26; pub const MSR_IA32_MISC_ENABLE_TURBO_DISABLE: u64 = 0x4000000000; pub const MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT: u32 = 0x27; pub const MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE: u64 = 0x8000000000; pub const MSR_MISC_FEATURES_ENABLES: u32 = 0x140; pub const MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT: u32 = 0x0; pub const MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT: u32 = 0x1; pub const MSR_IA32_TSC_DEADLINE: u32 = 0x6e0; pub const MSR_TSX_FORCE_ABORT: u32 = 0x10f; pub const MSR_TFA_RTM_FORCE_ABORT_BIT: u32 = 0x0; pub const MSR_TFA_TSX_CPUID_CLEAR_BIT: u32 = 0x1; pub const MSR_TFA_SDV_ENABLE_RTM_BIT: u32 = 0x2; pub const MSR_IA32_MCG_EAX: u32 = 0x180; pub const MSR_IA32_MCG_EBX: u32 = 0x181; pub const MSR_IA32_MCG_ECX: u32 = 0x182; pub const MSR_IA32_MCG_EDX: u32 = 0x183; pub const MSR_IA32_MCG_ESI: u32 = 0x184; pub const MSR_IA32_MCG_EDI: u32 = 0x185; pub const MSR_IA32_MCG_EBP: u32 = 0x186; pub const MSR_IA32_MCG_ESP: u32 = 0x187; pub const MSR_IA32_MCG_EFLAGS: u32 = 0x188; pub const MSR_IA32_MCG_EIP: u32 = 0x189; pub const MSR_IA32_MCG_RESERVED: u32 = 0x18a; pub const MSR_P4_BPU_PERFCTR0: u32 = 0x300; pub const MSR_P4_BPU_PERFCTR1: u32 = 0x301; pub const MSR_P4_BPU_PERFCTR2: u32 = 0x302; pub const MSR_P4_BPU_PERFCTR3: u32 = 0x303; pub const MSR_P4_MS_PERFCTR0: u32 = 0x304; pub const MSR_P4_MS_PERFCTR1: u32 = 0x305; pub const MSR_P4_MS_PERFCTR2: u32 = 0x306; pub const MSR_P4_MS_PERFCTR3: u32 = 0x307; pub const MSR_P4_FLAME_PERFCTR0: u32 = 0x308; pub const MSR_P4_FLAME_PERFCTR1: u32 = 0x309; pub const MSR_P4_FLAME_PERFCTR2: u32 = 0x30a; pub const MSR_P4_FLAME_PERFCTR3: u32 = 0x30b; pub const MSR_P4_IQ_PERFCTR0: u32 = 0x30c; pub const MSR_P4_IQ_PERFCTR1: u32 = 0x30d; pub const MSR_P4_IQ_PERFCTR2: u32 = 0x30e; pub const MSR_P4_IQ_PERFCTR3: u32 = 0x30f; pub const MSR_P4_IQ_PERFCTR4: u32 = 0x310; pub const MSR_P4_IQ_PERFCTR5: u32 = 0x311; pub const MSR_P4_BPU_CCCR0: u32 = 0x360; pub const MSR_P4_BPU_CCCR1: u32 = 0x361; pub const MSR_P4_BPU_CCCR2: u32 = 0x362; pub const MSR_P4_BPU_CCCR3: u32 = 0x363; pub const MSR_P4_MS_CCCR0: u32 = 0x364; pub const MSR_P4_MS_CCCR1: u32 = 0x365; pub const MSR_P4_MS_CCCR2: u32 = 0x366; pub const MSR_P4_MS_CCCR3: u32 = 0x367; pub const MSR_P4_FLAME_CCCR0: u32 = 0x368; pub const MSR_P4_FLAME_CCCR1: u32 = 0x369; pub const MSR_P4_FLAME_CCCR2: u32 = 0x36a; pub const MSR_P4_FLAME_CCCR3: u32 = 0x36b; pub const MSR_P4_IQ_CCCR0: u32 = 0x36c; pub const MSR_P4_IQ_CCCR1: u32 = 0x36d; pub const MSR_P4_IQ_CCCR2: u32 = 0x36e; pub const MSR_P4_IQ_CCCR3: u32 = 0x36f; pub const MSR_P4_IQ_CCCR4: u32 = 0x370; pub const MSR_P4_IQ_CCCR5: u32 = 0x371; pub const MSR_P4_ALF_ESCR0: u32 = 0x3ca; pub const MSR_P4_ALF_ESCR1: u32 = 0x3cb; pub const MSR_P4_BPU_ESCR0: u32 = 0x3b2; pub const MSR_P4_BPU_ESCR1: u32 = 0x3b3; pub const MSR_P4_BSU_ESCR0: u32 = 0x3a0; pub const MSR_P4_BSU_ESCR1: u32 = 0x3a1; pub const MSR_P4_CRU_ESCR0: u32 = 0x3b8; pub const MSR_P4_CRU_ESCR1: u32 = 0x3b9; pub const MSR_P4_CRU_ESCR2: u32 = 0x3cc; pub const MSR_P4_CRU_ESCR3: u32 = 0x3cd; pub const MSR_P4_CRU_ESCR4: u32 = 0x3e0; pub const MSR_P4_CRU_ESCR5: u32 = 0x3e1; pub const MSR_P4_DAC_ESCR0: u32 = 0x3a8; pub const MSR_P4_DAC_ESCR1: u32 = 0x3a9; pub const MSR_P4_FIRM_ESCR0: u32 = 0x3a4; pub const MSR_P4_FIRM_ESCR1: u32 = 0x3a5; pub const MSR_P4_FLAME_ESCR0: u32 = 0x3a6; pub const MSR_P4_FLAME_ESCR1: u32 = 0x3a7; pub const MSR_P4_FSB_ESCR0: u32 = 0x3a2; pub const MSR_P4_FSB_ESCR1: u32 = 0x3a3; pub const MSR_P4_IQ_ESCR0: u32 = 0x3ba; pub const MSR_P4_IQ_ESCR1: u32 = 0x3bb; pub const MSR_P4_IS_ESCR0: u32 = 0x3b4; pub const MSR_P4_IS_ESCR1: u32 = 0x3b5; pub const MSR_P4_ITLB_ESCR0: u32 = 0x3b6; pub const MSR_P4_ITLB_ESCR1: u32 = 0x3b7; pub const MSR_P4_IX_ESCR0: u32 = 0x3c8; pub const MSR_P4_IX_ESCR1: u32 = 0x3c9; pub const MSR_P4_MOB_ESCR0: u32 = 0x3aa; pub const MSR_P4_MOB_ESCR1: u32 = 0x3ab; pub const MSR_P4_MS_ESCR0: u32 = 0x3c0; pub const MSR_P4_MS_ESCR1: u32 = 0x3c1; pub const MSR_P4_PMH_ESCR0: u32 = 0x3ac; pub const MSR_P4_PMH_ESCR1: u32 = 0x3ad; pub const MSR_P4_RAT_ESCR0: u32 = 0x3bc; pub const MSR_P4_RAT_ESCR1: u32 = 0x3bd; pub const MSR_P4_SAAT_ESCR0: u32 = 0x3ae; pub const MSR_P4_SAAT_ESCR1: u32 = 0x3af; pub const MSR_P4_SSU_ESCR0: u32 = 0x3be; pub const MSR_P4_SSU_ESCR1: u32 = 0x3bf; pub const MSR_P4_TBPU_ESCR0: u32 = 0x3c2; pub const MSR_P4_TBPU_ESCR1: u32 = 0x3c3; pub const MSR_P4_TC_ESCR0: u32 = 0x3c4; pub const MSR_P4_TC_ESCR1: u32 = 0x3c5; pub const MSR_P4_U2L_ESCR0: u32 = 0x3b0; pub const MSR_P4_U2L_ESCR1: u32 = 0x3b1; pub const MSR_P4_PEBS_MATRIX_VERT: u32 = 0x3f2; pub const MSR_CORE_PERF_FIXED_CTR0: u32 = 0x309; pub const MSR_CORE_PERF_FIXED_CTR1: u32 = 0x30a; pub const MSR_CORE_PERF_FIXED_CTR2: u32 = 0x30b; pub const MSR_CORE_PERF_FIXED_CTR3: u32 = 0x30c; pub const MSR_CORE_PERF_FIXED_CTR_CTRL: u32 = 0x38d; pub const MSR_CORE_PERF_GLOBAL_STATUS: u32 = 0x38e; pub const MSR_CORE_PERF_GLOBAL_CTRL: u32 = 0x38f; pub const MSR_CORE_PERF_GLOBAL_OVF_CTRL: u32 = 0x390; pub const MSR_PERF_METRICS: u32 = 0x329; pub const MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT: u32 = 0x37; pub const MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI: u64 = 0x80000000000000; pub const MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT: u32 = 0x3e; pub const MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF: u64 = 0x4000000000000000; pub const MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT: u32 = 0x3f; pub const MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD: i64 = -9223372036854775808; pub const MSR_GEODE_BUSCONT_CONF0: u32 = 0x1900; pub const MSR_IA32_VMX_BASIC: u32 = 0x480; pub const MSR_IA32_VMX_PINBASED_CTLS: u32 = 0x481; pub const MSR_IA32_VMX_PROCBASED_CTLS: u32 = 0x482; pub const MSR_IA32_VMX_EXIT_CTLS: u32 = 0x483; pub const MSR_IA32_VMX_ENTRY_CTLS: u32 = 0x484; pub const MSR_IA32_VMX_MISC: u32 = 0x485; pub const MSR_IA32_VMX_CR0_FIXED0: u32 = 0x486; pub const MSR_IA32_VMX_CR0_FIXED1: u32 = 0x487; pub const MSR_IA32_VMX_CR4_FIXED0: u32 = 0x488; pub const MSR_IA32_VMX_CR4_FIXED1: u32 = 0x489; pub const MSR_IA32_VMX_VMCS_ENUM: u32 = 0x48a; pub const MSR_IA32_VMX_PROCBASED_CTLS2: u32 = 0x48b; pub const MSR_IA32_VMX_EPT_VPID_CAP: u32 = 0x48c; pub const MSR_IA32_VMX_TRUE_PINBASED_CTLS: u32 = 0x48d; pub const MSR_IA32_VMX_TRUE_PROCBASED_CTLS: u32 = 0x48e; pub const MSR_IA32_VMX_TRUE_EXIT_CTLS: u32 = 0x48f; pub const MSR_IA32_VMX_TRUE_ENTRY_CTLS: u32 = 0x490; pub const MSR_IA32_VMX_VMFUNC: u32 = 0x491; pub const MSR_IA32_VMX_PROCBASED_CTLS3: u32 = 0x492; pub const MSR_IA32_L3_QOS_CFG: u32 = 0xc81; pub const MSR_IA32_L2_QOS_CFG: u32 = 0xc82; pub const MSR_IA32_QM_EVTSEL: u32 = 0xc8d; pub const MSR_IA32_QM_CTR: u32 = 0xc8e; pub const MSR_IA32_PQR_ASSOC: u32 = 0xc8f; pub const MSR_IA32_L3_CBM_BASE: u32 = 0xc90; pub const MSR_RMID_SNC_CONFIG: u32 = 0xca0; pub const MSR_IA32_L2_CBM_BASE: u32 = 0xd10; pub const MSR_IA32_MBA_THRTL_BASE: u32 = 0xd50; pub const MSR_IA32_MBA_BW_BASE: u32 = 0xc0000200; pub const MSR_IA32_SMBA_BW_BASE: u32 = 0xc0000280; pub const MSR_IA32_EVT_CFG_BASE: u32 = 0xc0000400; pub const MSR_VM_CR: u32 = 0xc0010114; pub const MSR_VM_IGNNE: u32 = 0xc0010115; pub const MSR_VM_HSAVE_PA: u32 = 0xc0010117; pub const MSR_IA32_HW_FEEDBACK_PTR: u32 = 0x17d0; pub const MSR_IA32_HW_FEEDBACK_CONFIG: u32 = 0x17d1; pub const MSR_IA32_XAPIC_DISABLE_STATUS: u32 = 0xbd; ================================================ FILE: src/vmm/src/arch/x86_64/generated/perf_event.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const MSR_ARCH_PERFMON_PERFCTR0: u32 = 0xc1; pub const MSR_ARCH_PERFMON_PERFCTR1: u32 = 0xc2; pub const MSR_ARCH_PERFMON_EVENTSEL0: u32 = 0x186; pub const MSR_ARCH_PERFMON_EVENTSEL1: u32 = 0x187; pub const MSR_ARCH_PERFMON_FIXED_CTR_CTRL: u32 = 0x38d; pub const MSR_ARCH_PERFMON_FIXED_CTR0: u32 = 0x309; pub const MSR_ARCH_PERFMON_FIXED_CTR1: u32 = 0x30a; pub const MSR_ARCH_PERFMON_FIXED_CTR2: u32 = 0x30b; pub const MSR_ARCH_PERFMON_FIXED_CTR3: u32 = 0x30c; ================================================ FILE: src/vmm/src/arch/x86_64/interrupts.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use kvm_bindings::kvm_lapic_state; use kvm_ioctls::VcpuFd; use zerocopy::IntoBytes; use crate::utils::byte_order; /// Errors thrown while configuring the LAPIC. #[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] pub enum InterruptError { /// Failure in getting the LAPIC configuration: {0} GetLapic(kvm_ioctls::Error), /// Failure in setting the LAPIC configuration: {0} SetLapic(kvm_ioctls::Error), } // Defines poached from apicdef.h kernel header. const APIC_LVT0: usize = 0x350; const APIC_LVT1: usize = 0x360; const APIC_MODE_NMI: u32 = 0x4; const APIC_MODE_EXTINT: u32 = 0x7; fn get_klapic_reg(klapic: &kvm_lapic_state, reg_offset: usize) -> u32 { let range = reg_offset..reg_offset + 4; let reg = klapic.regs.get(range).expect("get_klapic_reg range"); byte_order::read_le_u32(reg.as_bytes()) } fn set_klapic_reg(klapic: &mut kvm_lapic_state, reg_offset: usize, value: u32) { let range = reg_offset..reg_offset + 4; let reg = klapic.regs.get_mut(range).expect("set_klapic_reg range"); byte_order::write_le_u32(reg.as_mut_bytes(), value); } fn set_apic_delivery_mode(reg: u32, mode: u32) -> u32 { ((reg) & !0x700) | ((mode) << 8) } /// Configures LAPICs. LAPIC0 is set for external interrupts, LAPIC1 is set for NMI. /// /// # Arguments /// * `vcpu` - The VCPU object to configure. pub fn set_lint(vcpu: &VcpuFd) -> Result<(), InterruptError> { let mut klapic = vcpu.get_lapic().map_err(InterruptError::GetLapic)?; let lvt_lint0 = get_klapic_reg(&klapic, APIC_LVT0); set_klapic_reg( &mut klapic, APIC_LVT0, set_apic_delivery_mode(lvt_lint0, APIC_MODE_EXTINT), ); let lvt_lint1 = get_klapic_reg(&klapic, APIC_LVT1); set_klapic_reg( &mut klapic, APIC_LVT1, set_apic_delivery_mode(lvt_lint1, APIC_MODE_NMI), ); vcpu.set_lapic(&klapic).map_err(InterruptError::SetLapic) } #[cfg(test)] mod tests { use kvm_ioctls::Kvm; use super::*; const KVM_APIC_REG_SIZE: usize = 0x400; #[test] fn test_set_and_get_klapic_reg() { let reg_offset = 0x340; let mut klapic = kvm_lapic_state::default(); set_klapic_reg(&mut klapic, reg_offset, 3); let value = get_klapic_reg(&klapic, reg_offset); assert_eq!(value, 3); } #[test] fn test_set_and_get_klapic_reg_overflow() { let reg_offset = 0x340; let mut klapic = kvm_lapic_state::default(); set_klapic_reg( &mut klapic, reg_offset, u32::try_from(i32::MAX).unwrap() + 1u32, ); let value = get_klapic_reg(&klapic, reg_offset); assert_eq!(value, u32::try_from(i32::MAX).unwrap() + 1u32); } #[test] #[should_panic] fn test_set_and_get_klapic_out_of_bounds() { let reg_offset = KVM_APIC_REG_SIZE + 10; let mut klapic = kvm_lapic_state::default(); set_klapic_reg(&mut klapic, reg_offset, 3); } #[test] fn test_apic_delivery_mode() { let mut v: Vec = (0..20) .map(|_| vmm_sys_util::rand::xor_pseudo_rng_u32()) .collect(); v.iter_mut() .for_each(|x| *x = set_apic_delivery_mode(*x, 2)); let after: Vec = v.iter().map(|x| (*x & !0x700) | ((2) << 8)).collect(); assert_eq!(v, after); } #[test] fn test_setlint() { let kvm = Kvm::new().unwrap(); assert!(kvm.check_extension(kvm_ioctls::Cap::Irqchip)); let vm = kvm.create_vm().unwrap(); // the get_lapic ioctl will fail if there is no irqchip created beforehand. vm.create_irq_chip().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); let klapic_before: kvm_lapic_state = vcpu.get_lapic().unwrap(); // Compute the value that is expected to represent LVT0 and LVT1. let lint0 = get_klapic_reg(&klapic_before, APIC_LVT0); let lint1 = get_klapic_reg(&klapic_before, APIC_LVT1); let lint0_mode_expected = set_apic_delivery_mode(lint0, APIC_MODE_EXTINT); let lint1_mode_expected = set_apic_delivery_mode(lint1, APIC_MODE_NMI); set_lint(&vcpu).unwrap(); // Compute the value that represents LVT0 and LVT1 after set_lint. let klapic_actual: kvm_lapic_state = vcpu.get_lapic().unwrap(); let lint0_mode_actual = get_klapic_reg(&klapic_actual, APIC_LVT0); let lint1_mode_actual = get_klapic_reg(&klapic_actual, APIC_LVT1); assert_eq!(lint0_mode_expected, lint0_mode_actual); assert_eq!(lint1_mode_expected, lint1_mode_actual); } #[test] fn test_setlint_fails() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); // 'get_lapic' ioctl triggered by the 'set_lint' function will fail if there is no // irqchip created beforehand. set_lint(&vcpu).unwrap_err(); } } ================================================ FILE: src/vmm/src/arch/x86_64/kvm.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use kvm_bindings::{CpuId, KVM_MAX_CPUID_ENTRIES, MsrList}; use kvm_ioctls::Kvm as KvmFd; use crate::arch::x86_64::xstate::{XstateError, request_dynamic_xstate_features}; use crate::cpu_config::templates::KvmCapability; /// Architecture specific error for KVM initialization #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum KvmArchError { /// Failed to get supported cpuid: {0} GetSupportedCpuId(kvm_ioctls::Error), /// Failed to request permission for dynamic XSTATE features: {0} XstateFeatures(XstateError), } /// Struct with kvm fd and kvm associated parameters. #[derive(Debug)] pub struct Kvm { /// KVM fd. pub fd: KvmFd, /// Additional capabilities that were specified in cpu template. pub kvm_cap_modifiers: Vec, /// Supported CpuIds. pub supported_cpuid: CpuId, } impl Kvm { pub(crate) const DEFAULT_CAPABILITIES: [u32; 14] = [ kvm_bindings::KVM_CAP_IRQCHIP, kvm_bindings::KVM_CAP_IOEVENTFD, kvm_bindings::KVM_CAP_IRQFD, kvm_bindings::KVM_CAP_USER_MEMORY, kvm_bindings::KVM_CAP_SET_TSS_ADDR, kvm_bindings::KVM_CAP_PIT2, kvm_bindings::KVM_CAP_PIT_STATE2, kvm_bindings::KVM_CAP_ADJUST_CLOCK, kvm_bindings::KVM_CAP_DEBUGREGS, kvm_bindings::KVM_CAP_MP_STATE, kvm_bindings::KVM_CAP_VCPU_EVENTS, kvm_bindings::KVM_CAP_XCRS, kvm_bindings::KVM_CAP_XSAVE, kvm_bindings::KVM_CAP_EXT_CPUID, ]; /// Initialize [`Kvm`] type for x86_64 architecture pub fn init_arch( fd: KvmFd, kvm_cap_modifiers: Vec, ) -> Result { request_dynamic_xstate_features().map_err(KvmArchError::XstateFeatures)?; let supported_cpuid = fd .get_supported_cpuid(KVM_MAX_CPUID_ENTRIES) .map_err(KvmArchError::GetSupportedCpuId)?; Ok(Kvm { fd, kvm_cap_modifiers, supported_cpuid, }) } /// Msrs needed to be saved on snapshot creation. pub fn msrs_to_save(&self) -> Result { crate::arch::x86_64::msr::get_msrs_to_save(&self.fd) } } ================================================ FILE: src/vmm/src/arch/x86_64/layout.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. //! Magic addresses externally used to lay out x86_64 VMs. use crate::device_manager::mmio::MMIO_LEN; use crate::utils::mib_to_bytes; /// Initial stack for the boot CPU. pub const BOOT_STACK_POINTER: u64 = 0x8ff0; /// Kernel command line start address. pub const CMDLINE_START: u64 = 0x20000; /// Kernel command line maximum size. pub const CMDLINE_MAX_SIZE: usize = 2048; /// Start of the high memory. pub const HIMEM_START: u64 = 0x0010_0000; // 1 MB. // Typically, on x86 systems 24 IRQs are used for legacy devices (0-23). // However, the first 5 are reserved. // We allocate the remaining GSIs to MSIs. /// First usable GSI for legacy interrupts (IRQ) on x86_64. pub const GSI_LEGACY_START: u32 = 5; /// Last usable GSI for legacy interrupts (IRQ) on x86_64. pub const GSI_LEGACY_END: u32 = 23; /// Number of legacy GSI (IRQ) available on x86_64. pub const GSI_LEGACY_NUM: u32 = GSI_LEGACY_END - GSI_LEGACY_START + 1; /// First GSI used by MSI after legacy GSI. pub const GSI_MSI_START: u32 = GSI_LEGACY_END + 1; /// The highest available GSI in KVM (KVM_MAX_IRQ_ROUTES=4096). pub const GSI_MSI_END: u32 = 4095; /// Number of GSI available for MSI. pub const GSI_MSI_NUM: u32 = GSI_MSI_END - GSI_MSI_START + 1; /// Address for the TSS setup. pub const KVM_TSS_ADDRESS: u64 = 0xfffb_d000; /// Address of the hvm_start_info struct used in PVH boot pub const PVH_INFO_START: u64 = 0x6000; /// Starting address of array of modules of hvm_modlist_entry type. /// Used to enable initrd support using the PVH boot ABI. pub const MODLIST_START: u64 = 0x6040; /// Address of memory map table used in PVH boot. Can overlap /// with the zero page address since they are mutually exclusive. pub const MEMMAP_START: u64 = 0x7000; /// The 'zero page', a.k.a linux kernel bootparams. pub const ZERO_PAGE_START: u64 = 0x7000; /// APIC address pub const APIC_ADDR: u32 = 0xfee0_0000; /// IOAPIC address pub const IOAPIC_ADDR: u32 = 0xfec0_0000; /// Location of RSDP pointer in x86 machines pub const RSDP_ADDR: u64 = 0x000e_0000; /// Start of memory region we will use for system data (MPTable, ACPI, etc). We are putting its /// start address where EBDA normally starts, i.e. in the last 1 KiB of the first 640KiB of memory pub const SYSTEM_MEM_START: u64 = 0x9fc00; /// Size of memory region for system data. /// /// We reserve the memory between the start of the EBDA up until the location of RSDP pointer, /// [0x9fc00, 0xe0000) for system data. This is 257 KiB of memory we is enough for our needs and /// future proof. /// /// For ACPI we currently need: /// /// FADT size: 276 bytes /// XSDT size: 52 bytes (header: 36 bytes, plus pointers of FADT and MADT) /// MADT size: 2104 bytes (header: 44 bytes, IO-APIC: 12 bytes, LocalAPIC: 8 * #vCPUS) /// DSDT size: 1907 bytes (header: 36 bytes, legacy devices: 345, GED: 161, VMGenID: 87, VirtIO /// devices: 71 bytes per device) /// /// The above assumes a maximum of 256 vCPUs, because that's what ACPI allows, but currently /// we have a hard limit of up to 32 vCPUs. /// /// Moreover, for MPTable we need up to 5304 bytes (284 + 20 * #vCPUS) assuming again /// a maximum number of 256 vCPUs. /// /// 257KiB is more than we need, however we reserve this space for potential future use of /// ACPI features (new tables and/or devices). pub const SYSTEM_MEM_SIZE: u64 = RSDP_ADDR - SYSTEM_MEM_START; /// First address that cannot be addressed using 32 bit anymore. pub const FIRST_ADDR_PAST_32BITS: u64 = 1 << 32; /// The size of the memory area reserved for MMIO 32-bit accesses. pub const MMIO32_MEM_SIZE: u64 = mib_to_bytes(1024) as u64; /// The start of the memory area reserved for MMIO 32-bit accesses. pub const MMIO32_MEM_START: u64 = FIRST_ADDR_PAST_32BITS - MMIO32_MEM_SIZE; // We dedicate the last 256 MiB of the 32-bit MMIO address space PCIe for memory-mapped access to // configuration. /// Size of MMIO region for PCIe configuration accesses. pub const PCI_MMCONFIG_SIZE: u64 = 256 << 20; /// Start of MMIO region for PCIe configuration accesses. pub const PCI_MMCONFIG_START: u64 = IOAPIC_ADDR as u64 - PCI_MMCONFIG_SIZE; /// MMIO space per PCIe segment pub const PCI_MMIO_CONFIG_SIZE_PER_SEGMENT: u64 = 4096 * 256; // We reserve 768 MiB for devices at the beginning of the MMIO region. This includes space both for // pure MMIO and PCIe devices. /// Memory region start for boot device. pub const BOOT_DEVICE_MEM_START: u64 = MMIO32_MEM_START; /// Beginning of memory region for device MMIO 32-bit accesses pub const MEM_32BIT_DEVICES_START: u64 = BOOT_DEVICE_MEM_START + MMIO_LEN; /// Size of memory region for device MMIO 32-bit accesses pub const MEM_32BIT_DEVICES_SIZE: u64 = PCI_MMCONFIG_START - MEM_32BIT_DEVICES_START; // 64-bits region for MMIO accesses /// The start of the memory area reserved for MMIO 64-bit accesses. pub const MMIO64_MEM_START: u64 = 256 << 30; /// The size of the memory area reserved for MMIO 64-bit accesses. pub const MMIO64_MEM_SIZE: u64 = 256 << 30; // At the moment, all of this region goes to devices /// Beginning of memory region for device MMIO 64-bit accesses pub const MEM_64BIT_DEVICES_START: u64 = MMIO64_MEM_START; /// Size of memory region for device MMIO 32-bit accesses pub const MEM_64BIT_DEVICES_SIZE: u64 = MMIO64_MEM_SIZE; /// First address past the 64-bit MMIO gap pub const FIRST_ADDR_PAST_64BITS_MMIO: u64 = MMIO64_MEM_START + MMIO64_MEM_SIZE; /// Size of the memory past 64-bit MMIO gap pub const PAST_64BITS_MMIO_SIZE: u64 = 512 << 30; ================================================ FILE: src/vmm/src/arch/x86_64/mod.rs ================================================ // Copyright © 2020, Oracle and/or its affiliates. // // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. /// Logic for handling x86_64 CPU models. pub mod cpu_model; mod gdt; /// Contains logic for setting up Advanced Programmable Interrupt Controller (local version). pub mod interrupts; /// Architecture specific KVM-related code pub mod kvm; /// Layout for the x86_64 system. pub mod layout; mod mptable; /// Logic for configuring x86_64 model specific registers (MSRs). pub mod msr; /// Logic for configuring x86_64 registers. pub mod regs; /// Architecture specific vCPU code pub mod vcpu; /// Architecture specific VM state code pub mod vm; /// Logic for configuring XSTATE features. pub mod xstate; #[allow(missing_docs)] pub mod generated; use std::cmp::max; use std::fs::File; use kvm::Kvm; use layout::{ CMDLINE_START, MMIO32_MEM_SIZE, MMIO32_MEM_START, MMIO64_MEM_SIZE, MMIO64_MEM_START, PCI_MMCONFIG_SIZE, PCI_MMCONFIG_START, }; use linux_loader::configurator::linux::LinuxBootConfigurator; use linux_loader::configurator::pvh::PvhBootConfigurator; use linux_loader::configurator::{BootConfigurator, BootParams}; use linux_loader::loader::bootparam::boot_params; use linux_loader::loader::elf::Elf as Loader; use linux_loader::loader::elf::start_info::{ hvm_memmap_table_entry, hvm_modlist_entry, hvm_start_info, }; use linux_loader::loader::{Cmdline, KernelLoader, PvhBootCapability, load_cmdline}; use log::debug; use super::EntryPoint; use crate::acpi::create_acpi_tables; use crate::arch::{BootProtocol, SYSTEM_MEM_SIZE, SYSTEM_MEM_START, arch_memory_regions_with_gap}; use crate::cpu_config::templates::{CustomCpuTemplate, GuestConfigError}; use crate::cpu_config::x86_64::CpuConfiguration; use crate::device_manager::DeviceManager; use crate::initrd::InitrdConfig; use crate::utils::{align_down, u64_to_usize, usize_to_u64}; use crate::vmm_config::machine_config::MachineConfig; use crate::vstate::memory::{ Address, GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion, GuestRegionType, }; use crate::vstate::vcpu::KvmVcpuConfigureError; use crate::{Vcpu, VcpuConfig, Vm, logger}; // Value taken from https://elixir.bootlin.com/linux/v5.10.68/source/arch/x86/include/uapi/asm/e820.h#L31 // Usable normal RAM const E820_RAM: u32 = 1; // Reserved area that should be avoided during memory allocations const E820_RESERVED: u32 = 2; const MEMMAP_TYPE_RAM: u32 = 1; /// Errors thrown while configuring x86_64 system. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum ConfigurationError { /// Invalid e820 setup params. E820Configuration, /// Error writing MP table to memory: {0} MpTableSetup(#[from] mptable::MptableError), /// Error writing the zero page of guest memory. ZeroPageSetup, /// Error writing module entry to guest memory. ModlistSetup, /// Error writing memory map table to guest memory. MemmapTableSetup, /// Error writing hvm_start_info to guest memory. StartInfoSetup, /// Cannot copy kernel file fd KernelFile, /// Cannot load kernel due to invalid memory configuration or invalid kernel image: {0} KernelLoader(linux_loader::loader::Error), /// Cannot load command line string: {0} LoadCommandline(linux_loader::loader::Error), /// Failed to create guest config: {0} CreateGuestConfig(#[from] GuestConfigError), /// Error configuring the vcpu for boot: {0} VcpuConfigure(#[from] KvmVcpuConfigureError), /// Error configuring ACPI: {0} Acpi(#[from] crate::acpi::AcpiError), } /// Returns a Vec of the valid memory addresses. /// These should be used to configure the GuestMemoryMmap structure for the platform. /// For x86_64 all addresses are valid from the start of the kernel except an 1GB /// carve out at the end of 32bit address space and a second 256GB one at the 256GB limit. pub fn arch_memory_regions(size: usize) -> Vec<(GuestAddress, usize)> { // If we get here with size == 0 something has seriously gone wrong. Firecracker should never // try to allocate guest memory of size 0 assert!(size > 0, "Attempt to allocate guest memory of length 0"); let dram_size = std::cmp::min( usize::MAX - u64_to_usize(MMIO32_MEM_SIZE) - u64_to_usize(MMIO64_MEM_SIZE), size, ); if dram_size != size { logger::warn!( "Requested memory size {} exceeds architectural maximum (1022GiB). Size has been \ truncated to {}", size, dram_size ); } let mut regions = vec![]; if let Some((start_past_32bit_gap, remaining_past_32bit_gap)) = arch_memory_regions_with_gap( &mut regions, 0, dram_size, u64_to_usize(MMIO32_MEM_START), u64_to_usize(MMIO32_MEM_SIZE), ) && let Some((start_past_64bit_gap, remaining_past_64bit_gap)) = arch_memory_regions_with_gap( &mut regions, start_past_32bit_gap, remaining_past_32bit_gap, u64_to_usize(MMIO64_MEM_START), u64_to_usize(MMIO64_MEM_SIZE), ) { regions.push(( GuestAddress(start_past_64bit_gap as u64), remaining_past_64bit_gap, )); } regions } /// Returns the memory address where the kernel could be loaded. pub fn get_kernel_start() -> u64 { layout::HIMEM_START } /// Returns the memory address where the initrd could be loaded. pub fn initrd_load_addr(guest_mem: &GuestMemoryMmap, initrd_size: usize) -> Option { let first_region = guest_mem.find_region(GuestAddress::new(0))?; let lowmem_size = u64_to_usize(first_region.len()); if lowmem_size < initrd_size { return None; } Some(align_down( usize_to_u64(lowmem_size - initrd_size), usize_to_u64(super::GUEST_PAGE_SIZE), )) } /// Configures the system for booting Linux. #[allow(clippy::too_many_arguments)] pub fn configure_system_for_boot( kvm: &Kvm, vm: &Vm, device_manager: &mut DeviceManager, vcpus: &mut [Vcpu], machine_config: &MachineConfig, cpu_template: &CustomCpuTemplate, entry_point: EntryPoint, initrd: &Option, boot_cmdline: Cmdline, ) -> Result<(), ConfigurationError> { // Construct the base CpuConfiguration to apply CPU template onto. let cpu_config = CpuConfiguration::new(kvm.supported_cpuid.clone(), cpu_template, &vcpus[0])?; // Apply CPU template to the base CpuConfiguration. let cpu_config = CpuConfiguration::apply_template(cpu_config, cpu_template)?; let vcpu_config = VcpuConfig { vcpu_count: machine_config.vcpu_count, smt: machine_config.smt, cpu_config, }; // Configure vCPUs with normalizing and setting the generated CPU configuration. for vcpu in vcpus.iter_mut() { vcpu.kvm_vcpu .configure(vm.guest_memory(), entry_point, &vcpu_config)?; } // Write the kernel command line to guest memory. This is x86_64 specific, since on // aarch64 the command line will be specified through the FDT. let cmdline_size = boot_cmdline .as_cstring() .map(|cmdline_cstring| cmdline_cstring.as_bytes_with_nul().len()) .expect("Cannot create cstring from cmdline string"); load_cmdline( vm.guest_memory(), GuestAddress(crate::arch::x86_64::layout::CMDLINE_START), &boot_cmdline, ) .map_err(ConfigurationError::LoadCommandline)?; // Note that this puts the mptable at the last 1k of Linux's 640k base RAM mptable::setup_mptable( vm.guest_memory(), &mut vm.resource_allocator(), vcpu_config.vcpu_count, ) .map_err(ConfigurationError::MpTableSetup)?; match entry_point.protocol { BootProtocol::PvhBoot => { configure_pvh(vm.guest_memory(), GuestAddress(CMDLINE_START), initrd)?; } BootProtocol::LinuxBoot => { configure_64bit_boot( vm.guest_memory(), GuestAddress(CMDLINE_START), cmdline_size, initrd, )?; } } // Create ACPI tables and write them in guest memory // For the time being we only support ACPI in x86_64 create_acpi_tables( vm.guest_memory(), device_manager, &mut vm.resource_allocator(), vcpus, )?; Ok(()) } fn configure_pvh( guest_mem: &GuestMemoryMmap, cmdline_addr: GuestAddress, initrd: &Option, ) -> Result<(), ConfigurationError> { const XEN_HVM_START_MAGIC_VALUE: u32 = 0x336e_c578; let himem_start = GuestAddress(layout::HIMEM_START); // Vector to hold modules (currently either empty or holding initrd). let mut modules: Vec = Vec::new(); if let Some(initrd_config) = initrd { // The initrd has been written to guest memory already, here we just need to // create the module structure that describes it. modules.push(hvm_modlist_entry { paddr: initrd_config.address.raw_value(), size: initrd_config.size as u64, ..Default::default() }); } // Vector to hold the memory maps which needs to be written to guest memory // at MEMMAP_START after all of the mappings are recorded. let mut memmap: Vec = Vec::new(); // Create the memory map entries. memmap.push(hvm_memmap_table_entry { addr: 0, size: SYSTEM_MEM_START, type_: MEMMAP_TYPE_RAM, ..Default::default() }); memmap.push(hvm_memmap_table_entry { addr: SYSTEM_MEM_START, size: SYSTEM_MEM_SIZE, type_: E820_RESERVED, ..Default::default() }); memmap.push(hvm_memmap_table_entry { addr: PCI_MMCONFIG_START, size: PCI_MMCONFIG_SIZE, type_: E820_RESERVED, ..Default::default() }); for region in guest_mem .iter() .filter(|region| region.region_type == GuestRegionType::Dram) { // the first 1MB is reserved for the kernel let addr = max(himem_start, region.start_addr()); memmap.push(hvm_memmap_table_entry { addr: addr.raw_value(), size: region.last_addr().unchecked_offset_from(addr) + 1, type_: MEMMAP_TYPE_RAM, ..Default::default() }); } // Construct the hvm_start_info structure and serialize it into // boot_params. This will be stored at PVH_INFO_START address, and %rbx // will be initialized to contain PVH_INFO_START prior to starting the // guest, as required by the PVH ABI. #[allow(clippy::cast_possible_truncation)] // the vec lengths are single digit integers let mut start_info = hvm_start_info { magic: XEN_HVM_START_MAGIC_VALUE, version: 1, cmdline_paddr: cmdline_addr.raw_value(), memmap_paddr: layout::MEMMAP_START, memmap_entries: memmap.len() as u32, nr_modules: modules.len() as u32, ..Default::default() }; if !modules.is_empty() { start_info.modlist_paddr = layout::MODLIST_START; } let mut boot_params = BootParams::new::(&start_info, GuestAddress(layout::PVH_INFO_START)); // Copy the vector with the memmap table to the MEMMAP_START address // which is already saved in the memmap_paddr field of hvm_start_info struct. boot_params.set_sections::(&memmap, GuestAddress(layout::MEMMAP_START)); // Copy the vector with the modules list to the MODLIST_START address. // Note that we only set the modlist_paddr address if there is a nonzero // number of modules, but serializing an empty list is harmless. boot_params.set_modules::(&modules, GuestAddress(layout::MODLIST_START)); // Write the hvm_start_info struct to guest memory. PvhBootConfigurator::write_bootparams(&boot_params, guest_mem) .map_err(|_| ConfigurationError::StartInfoSetup) } fn configure_64bit_boot( guest_mem: &GuestMemoryMmap, cmdline_addr: GuestAddress, cmdline_size: usize, initrd: &Option, ) -> Result<(), ConfigurationError> { const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; const KERNEL_HDR_MAGIC: u32 = 0x5372_6448; const KERNEL_LOADER_OTHER: u8 = 0xff; const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x0100_0000; // Must be non-zero. let himem_start = GuestAddress(layout::HIMEM_START); // Set the location of RSDP in Boot Parameters to help the guest kernel find it faster. let mut params = boot_params { acpi_rsdp_addr: layout::RSDP_ADDR, ..Default::default() }; params.hdr.type_of_loader = KERNEL_LOADER_OTHER; params.hdr.boot_flag = KERNEL_BOOT_FLAG_MAGIC; params.hdr.header = KERNEL_HDR_MAGIC; params.hdr.cmd_line_ptr = u32::try_from(cmdline_addr.raw_value()).unwrap(); params.hdr.cmdline_size = u32::try_from(cmdline_size).unwrap(); params.hdr.kernel_alignment = KERNEL_MIN_ALIGNMENT_BYTES; if let Some(initrd_config) = initrd { params.hdr.ramdisk_image = u32::try_from(initrd_config.address.raw_value()).unwrap(); params.hdr.ramdisk_size = u32::try_from(initrd_config.size).unwrap(); } // We mark first [0x0, SYSTEM_MEM_START) region as usable RAM and the subsequent // [SYSTEM_MEM_START, (SYSTEM_MEM_START + SYSTEM_MEM_SIZE)) as reserved (note // SYSTEM_MEM_SIZE + SYSTEM_MEM_SIZE == HIMEM_START). add_e820_entry(&mut params, 0, layout::SYSTEM_MEM_START, E820_RAM)?; add_e820_entry( &mut params, layout::SYSTEM_MEM_START, layout::SYSTEM_MEM_SIZE, E820_RESERVED, )?; add_e820_entry( &mut params, PCI_MMCONFIG_START, PCI_MMCONFIG_SIZE, E820_RESERVED, )?; for region in guest_mem .iter() .filter(|region| region.region_type == GuestRegionType::Dram) { // the first 1MB is reserved for the kernel let addr = max(himem_start, region.start_addr()); add_e820_entry( &mut params, addr.raw_value(), region.last_addr().unchecked_offset_from(addr) + 1, E820_RAM, )?; } LinuxBootConfigurator::write_bootparams( &BootParams::new(¶ms, GuestAddress(layout::ZERO_PAGE_START)), guest_mem, ) .map_err(|_| ConfigurationError::ZeroPageSetup) } /// Add an e820 region to the e820 map. /// Returns Ok(()) if successful, or an error if there is no space left in the map. fn add_e820_entry( params: &mut boot_params, addr: u64, size: u64, mem_type: u32, ) -> Result<(), ConfigurationError> { if params.e820_entries as usize >= params.e820_table.len() { return Err(ConfigurationError::E820Configuration); } params.e820_table[params.e820_entries as usize].addr = addr; params.e820_table[params.e820_entries as usize].size = size; params.e820_table[params.e820_entries as usize].type_ = mem_type; params.e820_entries += 1; Ok(()) } /// Load linux kernel into guest memory. pub fn load_kernel( kernel: &File, guest_memory: &GuestMemoryMmap, ) -> Result { // Need to clone the File because reading from it // mutates it. let mut kernel_file = kernel .try_clone() .map_err(|_| ConfigurationError::KernelFile)?; let entry_addr = Loader::load( guest_memory, None, &mut kernel_file, Some(GuestAddress(get_kernel_start())), ) .map_err(ConfigurationError::KernelLoader)?; let mut entry_point_addr: GuestAddress = entry_addr.kernel_load; let mut boot_prot: BootProtocol = BootProtocol::LinuxBoot; if let PvhBootCapability::PvhEntryPresent(pvh_entry_addr) = entry_addr.pvh_boot_cap { // Use the PVH kernel entry point to boot the guest entry_point_addr = pvh_entry_addr; boot_prot = BootProtocol::PvhBoot; } debug!("Kernel loaded using {boot_prot}"); Ok(EntryPoint { entry_addr: entry_point_addr, protocol: boot_prot, }) } #[cfg(kani)] mod verification { use crate::arch::arch_memory_regions; use crate::arch::x86_64::layout::{ FIRST_ADDR_PAST_32BITS, FIRST_ADDR_PAST_64BITS_MMIO, MMIO32_MEM_SIZE, MMIO32_MEM_START, MMIO64_MEM_SIZE, MMIO64_MEM_START, }; use crate::utils::u64_to_usize; #[kani::proof] #[kani::unwind(4)] fn verify_arch_memory_regions() { let len: u64 = kani::any::(); kani::assume(len > 0); let regions = arch_memory_regions(len as usize); // There are two MMIO gaps, so we can get either 1, 2 or 3 regions assert!(regions.len() <= 3); assert!(regions.len() >= 1); // The first address is always 0 assert_eq!(regions[0].0.0, 0); // The total length of all regions is what we requested let actual_size = regions.iter().map(|&(_, len)| len).sum::(); assert!(actual_size <= len as usize); if actual_size < u64_to_usize(len) { assert_eq!( actual_size, usize::MAX - u64_to_usize(MMIO32_MEM_SIZE) - u64_to_usize(MMIO64_MEM_SIZE) ); } // No region overlaps the MMIO gap assert!( regions .iter() .all(|&(start, len)| (start.0 >= FIRST_ADDR_PAST_32BITS || start.0 + len as u64 <= MMIO32_MEM_START) && (start.0 >= FIRST_ADDR_PAST_64BITS_MMIO || start.0 + len as u64 <= MMIO64_MEM_START)) ); // All regions have non-zero length assert!(regions.iter().all(|&(_, len)| len > 0)); // If there's at least two regions, they perfectly snuggle up to one of the two MMIO gaps if regions.len() >= 2 { kani::cover!(); assert_eq!(regions[0].0.0 + regions[0].1 as u64, MMIO32_MEM_START); assert_eq!(regions[1].0.0, FIRST_ADDR_PAST_32BITS); } // If there are three regions, the last two perfectly snuggle up to the 64bit // MMIO gap if regions.len() == 3 { kani::cover!(); assert_eq!(regions[1].0.0 + regions[1].1 as u64, MMIO64_MEM_START); assert_eq!(regions[2].0.0, FIRST_ADDR_PAST_64BITS_MMIO); } } } #[cfg(test)] mod tests { use linux_loader::loader::bootparam::boot_e820_entry; use super::*; use crate::arch::x86_64::layout::FIRST_ADDR_PAST_32BITS; use crate::test_utils::{arch_mem, single_region_mem}; use crate::utils::mib_to_bytes; use crate::vstate::resources::ResourceAllocator; #[test] fn regions_lt_4gb() { let regions = arch_memory_regions(1usize << 29); assert_eq!(1, regions.len()); assert_eq!(GuestAddress(0), regions[0].0); assert_eq!(1usize << 29, regions[0].1); } #[test] fn regions_gt_4gb() { const MEMORY_SIZE: usize = (1 << 32) + 0x8000; let regions = arch_memory_regions(MEMORY_SIZE); assert_eq!(2, regions.len()); assert_eq!(GuestAddress(0), regions[0].0); assert_eq!(GuestAddress(1u64 << 32), regions[1].0); assert_eq!( regions[1], ( GuestAddress(FIRST_ADDR_PAST_32BITS), MEMORY_SIZE - regions[0].1 ) ) } #[test] fn test_system_configuration() { let no_vcpus = 4; let gm = single_region_mem(0x10000); let mut resource_allocator = ResourceAllocator::new(); let err = mptable::setup_mptable(&gm, &mut resource_allocator, 1); assert!(matches!( err.unwrap_err(), mptable::MptableError::NotEnoughMemory )); // Now assigning some memory that falls before the 32bit memory hole. let mem_size = mib_to_bytes(128); let gm = arch_mem(mem_size); let mut resource_allocator = ResourceAllocator::new(); mptable::setup_mptable(&gm, &mut resource_allocator, no_vcpus).unwrap(); configure_64bit_boot(&gm, GuestAddress(0), 0, &None).unwrap(); configure_pvh(&gm, GuestAddress(0), &None).unwrap(); // Now assigning some memory that is equal to the start of the 32bit memory hole. let mem_size = mib_to_bytes(3328); let gm = arch_mem(mem_size); let mut resource_allocator = ResourceAllocator::new(); mptable::setup_mptable(&gm, &mut resource_allocator, no_vcpus).unwrap(); configure_64bit_boot(&gm, GuestAddress(0), 0, &None).unwrap(); configure_pvh(&gm, GuestAddress(0), &None).unwrap(); // Now assigning some memory that falls after the 32bit memory hole. let mem_size = mib_to_bytes(3330); let gm = arch_mem(mem_size); let mut resource_allocator = ResourceAllocator::new(); mptable::setup_mptable(&gm, &mut resource_allocator, no_vcpus).unwrap(); configure_64bit_boot(&gm, GuestAddress(0), 0, &None).unwrap(); configure_pvh(&gm, GuestAddress(0), &None).unwrap(); } #[test] fn test_add_e820_entry() { let e820_map = [(boot_e820_entry { addr: 0x1, size: 4, type_: 1, }); 128]; let expected_params = boot_params { e820_table: e820_map, e820_entries: 1, ..Default::default() }; let mut params: boot_params = Default::default(); add_e820_entry( &mut params, e820_map[0].addr, e820_map[0].size, e820_map[0].type_, ) .unwrap(); assert_eq!( format!("{:?}", params.e820_table[0]), format!("{:?}", expected_params.e820_table[0]) ); assert_eq!(params.e820_entries, expected_params.e820_entries); // Exercise the scenario where the field storing the length of the e820 entry table is // is bigger than the allocated memory. params.e820_entries = u8::try_from(params.e820_table.len()).unwrap() + 1; assert!( add_e820_entry( &mut params, e820_map[0].addr, e820_map[0].size, e820_map[0].type_ ) .is_err() ); } } ================================================ FILE: src/vmm/src/arch/x86_64/mptable.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::convert::TryFrom; use std::fmt::Debug; use std::mem::{self, size_of}; use libc::c_char; use log::debug; use vm_allocator::AllocPolicy; use crate::arch::GSI_LEGACY_END; use crate::arch::x86_64::generated::mpspec; use crate::vstate::memory::{ Address, ByteValued, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap, }; use crate::vstate::resources::ResourceAllocator; // These `mpspec` wrapper types are only data, reading them from data is a safe initialization. // SAFETY: POD unsafe impl ByteValued for mpspec::mpc_bus {} // SAFETY: POD unsafe impl ByteValued for mpspec::mpc_cpu {} // SAFETY: POD unsafe impl ByteValued for mpspec::mpc_intsrc {} // SAFETY: POD unsafe impl ByteValued for mpspec::mpc_ioapic {} // SAFETY: POD unsafe impl ByteValued for mpspec::mpc_table {} // SAFETY: POD unsafe impl ByteValued for mpspec::mpc_lintsrc {} // SAFETY: POD unsafe impl ByteValued for mpspec::mpf_intel {} #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum MptableError { /// There was too little guest memory to store the entire MP table. NotEnoughMemory, /// The MP table has too little address space to be stored. AddressOverflow, /// Failure while zeroing out the memory for the MP table. Clear, /// Number of CPUs exceeds the maximum supported CPUs TooManyCpus, /// Number of IRQs exceeds the maximum supported IRQs TooManyIrqs, /// Failure to write the MP floating pointer. WriteMpfIntel, /// Failure to write MP CPU entry. WriteMpcCpu, /// Failure to write MP ioapic entry. WriteMpcIoapic, /// Failure to write MP bus entry. WriteMpcBus, /// Failure to write MP interrupt source entry. WriteMpcIntsrc, /// Failure to write MP local interrupt source entry. WriteMpcLintsrc, /// Failure to write MP table header. WriteMpcTable, /// Failure to allocate memory for MPTable AllocateMemory(#[from] vm_allocator::Error), } // With APIC/xAPIC, there are only 255 APIC IDs available. And IOAPIC occupies // one APIC ID, so only 254 CPUs at maximum may be supported. Actually it's // a large number for FC usecases. pub const MAX_SUPPORTED_CPUS: u8 = 254; // Convenience macro for making arrays of diverse character types. macro_rules! char_array { ($t:ty; $( $c:expr ),*) => ( [ $( $c as $t ),* ] ) } // Most of these variables are sourced from the Intel MP Spec 1.4. const SMP_MAGIC_IDENT: [c_char; 4] = char_array!(c_char; '_', 'M', 'P', '_'); const MPC_SIGNATURE: [c_char; 4] = char_array!(c_char; 'P', 'C', 'M', 'P'); const MPC_SPEC: i8 = 4; const MPC_OEM: [c_char; 8] = char_array!(c_char; 'F', 'C', ' ', ' ', ' ', ' ', ' ', ' '); const MPC_PRODUCT_ID: [c_char; 12] = ['0' as c_char; 12]; const BUS_TYPE_ISA: [u8; 6] = [b'I', b'S', b'A', b' ', b' ', b' ']; const IO_APIC_DEFAULT_PHYS_BASE: u32 = 0xfec0_0000; // source: linux/arch/x86/include/asm/apicdef.h const APIC_DEFAULT_PHYS_BASE: u32 = 0xfee0_0000; // source: linux/arch/x86/include/asm/apicdef.h const APIC_VERSION: u8 = 0x14; const CPU_STEPPING: u32 = 0x600; const CPU_FEATURE_APIC: u32 = 0x200; const CPU_FEATURE_FPU: u32 = 0x001; fn compute_checksum(v: &T) -> u8 { let mut checksum: u8 = 0; for i in v.as_slice() { checksum = checksum.wrapping_add(*i); } checksum } fn mpf_intel_compute_checksum(v: &mpspec::mpf_intel) -> u8 { let checksum = compute_checksum(v).wrapping_sub(v.checksum); (!checksum).wrapping_add(1) } fn compute_mp_size(num_cpus: u8) -> usize { mem::size_of::() + mem::size_of::() + mem::size_of::() * (num_cpus as usize) + mem::size_of::() + mem::size_of::() + mem::size_of::() * (GSI_LEGACY_END as usize + 1) + mem::size_of::() * 2 } /// Performs setup of the MP table for the given `num_cpus`. pub fn setup_mptable( mem: &GuestMemoryMmap, resource_allocator: &mut ResourceAllocator, num_cpus: u8, ) -> Result<(), MptableError> { if num_cpus > MAX_SUPPORTED_CPUS { return Err(MptableError::TooManyCpus); } let mp_size = compute_mp_size(num_cpus); let mptable_addr = resource_allocator.allocate_system_memory(mp_size as u64, 1, AllocPolicy::FirstMatch)?; debug!( "mptable: Allocated {mp_size} bytes for MPTable {num_cpus} vCPUs at address {:#010x}", mptable_addr ); // Used to keep track of the next base pointer into the MP table. let mut base_mp = GuestAddress(mptable_addr); let mut mp_num_entries: u16 = 0; let mut checksum: u8 = 0; let ioapicid: u8 = num_cpus + 1; // The checked_add here ensures the all of the following base_mp.unchecked_add's will be without // overflow. if let Some(end_mp) = base_mp.checked_add((mp_size - 1) as u64) { if !mem.address_in_range(end_mp) { return Err(MptableError::NotEnoughMemory); } } else { return Err(MptableError::AddressOverflow); } mem.write_slice(&vec![0; mp_size], base_mp) .map_err(|_| MptableError::Clear)?; { let size = mem::size_of::() as u64; let mut mpf_intel = mpspec::mpf_intel { signature: SMP_MAGIC_IDENT, physptr: u32::try_from(base_mp.raw_value() + size).unwrap(), length: 1, specification: 4, ..mpspec::mpf_intel::default() }; mpf_intel.checksum = mpf_intel_compute_checksum(&mpf_intel); mem.write_obj(mpf_intel, base_mp) .map_err(|_| MptableError::WriteMpfIntel)?; base_mp = base_mp.unchecked_add(size); mp_num_entries += 1; } // We set the location of the mpc_table here but we can't fill it out until we have the length // of the entire table later. let table_base = base_mp; base_mp = base_mp.unchecked_add(mem::size_of::() as u64); { let size = mem::size_of::() as u64; for cpu_id in 0..num_cpus { let mpc_cpu = mpspec::mpc_cpu { type_: mpspec::MP_PROCESSOR.try_into().unwrap(), apicid: cpu_id, apicver: APIC_VERSION, cpuflag: u8::try_from(mpspec::CPU_ENABLED).unwrap() | if cpu_id == 0 { u8::try_from(mpspec::CPU_BOOTPROCESSOR).unwrap() } else { 0 }, cpufeature: CPU_STEPPING, featureflag: CPU_FEATURE_APIC | CPU_FEATURE_FPU, ..Default::default() }; mem.write_obj(mpc_cpu, base_mp) .map_err(|_| MptableError::WriteMpcCpu)?; base_mp = base_mp.unchecked_add(size); checksum = checksum.wrapping_add(compute_checksum(&mpc_cpu)); mp_num_entries += 1; } } { let size = mem::size_of::() as u64; let mpc_bus = mpspec::mpc_bus { type_: mpspec::MP_BUS.try_into().unwrap(), busid: 0, bustype: BUS_TYPE_ISA, }; mem.write_obj(mpc_bus, base_mp) .map_err(|_| MptableError::WriteMpcBus)?; base_mp = base_mp.unchecked_add(size); checksum = checksum.wrapping_add(compute_checksum(&mpc_bus)); mp_num_entries += 1; } { let size = mem::size_of::() as u64; let mpc_ioapic = mpspec::mpc_ioapic { type_: mpspec::MP_IOAPIC.try_into().unwrap(), apicid: ioapicid, apicver: APIC_VERSION, flags: mpspec::MPC_APIC_USABLE.try_into().unwrap(), apicaddr: IO_APIC_DEFAULT_PHYS_BASE, }; mem.write_obj(mpc_ioapic, base_mp) .map_err(|_| MptableError::WriteMpcIoapic)?; base_mp = base_mp.unchecked_add(size); checksum = checksum.wrapping_add(compute_checksum(&mpc_ioapic)); mp_num_entries += 1; } // Per kvm_setup_default_irq_routing() in kernel for i in 0..=u8::try_from(GSI_LEGACY_END).map_err(|_| MptableError::TooManyIrqs)? { let size = mem::size_of::() as u64; let mpc_intsrc = mpspec::mpc_intsrc { type_: mpspec::MP_INTSRC.try_into().unwrap(), irqtype: mpspec::mp_irq_source_types::mp_INT.try_into().unwrap(), irqflag: mpspec::MP_IRQPOL_DEFAULT.try_into().unwrap(), srcbus: 0, srcbusirq: i, dstapic: ioapicid, dstirq: i, }; mem.write_obj(mpc_intsrc, base_mp) .map_err(|_| MptableError::WriteMpcIntsrc)?; base_mp = base_mp.unchecked_add(size); checksum = checksum.wrapping_add(compute_checksum(&mpc_intsrc)); mp_num_entries += 1; } { let size = mem::size_of::() as u64; let mpc_lintsrc = mpspec::mpc_lintsrc { type_: mpspec::MP_LINTSRC.try_into().unwrap(), irqtype: mpspec::mp_irq_source_types::mp_ExtINT.try_into().unwrap(), irqflag: mpspec::MP_IRQPOL_DEFAULT.try_into().unwrap(), srcbusid: 0, srcbusirq: 0, destapic: 0, destapiclint: 0, }; mem.write_obj(mpc_lintsrc, base_mp) .map_err(|_| MptableError::WriteMpcLintsrc)?; base_mp = base_mp.unchecked_add(size); checksum = checksum.wrapping_add(compute_checksum(&mpc_lintsrc)); mp_num_entries += 1; } { let size = mem::size_of::() as u64; let mpc_lintsrc = mpspec::mpc_lintsrc { type_: mpspec::MP_LINTSRC.try_into().unwrap(), irqtype: mpspec::mp_irq_source_types::mp_NMI.try_into().unwrap(), irqflag: mpspec::MP_IRQPOL_DEFAULT.try_into().unwrap(), srcbusid: 0, srcbusirq: 0, destapic: 0xFF, destapiclint: 1, }; mem.write_obj(mpc_lintsrc, base_mp) .map_err(|_| MptableError::WriteMpcLintsrc)?; base_mp = base_mp.unchecked_add(size); checksum = checksum.wrapping_add(compute_checksum(&mpc_lintsrc)); mp_num_entries += 1; } // At this point we know the size of the mp_table. let table_end = base_mp; { let mut mpc_table = mpspec::mpc_table { signature: MPC_SIGNATURE, // it's safe to use unchecked_offset_from because // table_end > table_base length: table_end .unchecked_offset_from(table_base) .try_into() .unwrap(), spec: MPC_SPEC, oem: MPC_OEM, oemcount: mp_num_entries, productid: MPC_PRODUCT_ID, lapic: APIC_DEFAULT_PHYS_BASE, ..Default::default() }; debug_assert_eq!( mpc_table.length as usize + size_of::(), mp_size ); checksum = checksum.wrapping_add(compute_checksum(&mpc_table)); #[allow(clippy::cast_possible_wrap)] let checksum_final = (!checksum).wrapping_add(1) as i8; mpc_table.checksum = checksum_final; mem.write_obj(mpc_table, table_base) .map_err(|_| MptableError::WriteMpcTable)?; } Ok(()) } #[cfg(test)] mod tests { use super::*; use crate::arch::SYSTEM_MEM_START; use crate::test_utils::single_region_mem_at; use crate::vstate::memory::Bytes; fn table_entry_size(type_: u8) -> usize { match u32::from(type_) { mpspec::MP_PROCESSOR => mem::size_of::(), mpspec::MP_BUS => mem::size_of::(), mpspec::MP_IOAPIC => mem::size_of::(), mpspec::MP_INTSRC => mem::size_of::(), mpspec::MP_LINTSRC => mem::size_of::(), _ => panic!("unrecognized mpc table entry type: {}", type_), } } #[test] fn bounds_check() { let num_cpus = 4; let mem = single_region_mem_at(SYSTEM_MEM_START, compute_mp_size(num_cpus)); let mut resource_allocator = ResourceAllocator::new(); setup_mptable(&mem, &mut resource_allocator, num_cpus).unwrap(); } #[test] fn bounds_check_fails() { let num_cpus = 4; let mem = single_region_mem_at(SYSTEM_MEM_START, compute_mp_size(num_cpus) - 1); let mut resource_allocator = ResourceAllocator::new(); setup_mptable(&mem, &mut resource_allocator, num_cpus).unwrap_err(); } #[test] fn mpf_intel_checksum() { let num_cpus = 1; let mem = single_region_mem_at(SYSTEM_MEM_START, compute_mp_size(num_cpus)); let mut resource_allocator = ResourceAllocator::new(); setup_mptable(&mem, &mut resource_allocator, num_cpus).unwrap(); let mpf_intel: mpspec::mpf_intel = mem.read_obj(GuestAddress(SYSTEM_MEM_START)).unwrap(); assert_eq!(mpf_intel_compute_checksum(&mpf_intel), mpf_intel.checksum); } #[test] fn mpc_table_checksum() { let num_cpus = 4; let mem = single_region_mem_at(SYSTEM_MEM_START, compute_mp_size(num_cpus)); let mut resource_allocator = ResourceAllocator::new(); setup_mptable(&mem, &mut resource_allocator, num_cpus).unwrap(); let mpf_intel: mpspec::mpf_intel = mem.read_obj(GuestAddress(SYSTEM_MEM_START)).unwrap(); let mpc_offset = GuestAddress(u64::from(mpf_intel.physptr)); let mpc_table: mpspec::mpc_table = mem.read_obj(mpc_offset).unwrap(); let mut buffer = Vec::new(); mem.write_volatile_to(mpc_offset, &mut buffer, mpc_table.length as usize) .unwrap(); assert_eq!( buffer .iter() .fold(0u8, |accum, &item| accum.wrapping_add(item)), 0 ); } #[test] fn mpc_entry_count() { let num_cpus = 1; let mem = single_region_mem_at(SYSTEM_MEM_START, compute_mp_size(num_cpus)); let mut resource_allocator = ResourceAllocator::new(); setup_mptable(&mem, &mut resource_allocator, num_cpus).unwrap(); let mpf_intel: mpspec::mpf_intel = mem.read_obj(GuestAddress(SYSTEM_MEM_START)).unwrap(); let mpc_offset = GuestAddress(u64::from(mpf_intel.physptr)); let mpc_table: mpspec::mpc_table = mem.read_obj(mpc_offset).unwrap(); let expected_entry_count = // Intel floating point 1 // CPU + u16::from(num_cpus) // IOAPIC + 1 // ISA Bus + 1 // IRQ + u16::try_from(GSI_LEGACY_END).unwrap() + 1 // Interrupt source ExtINT + 1 // Interrupt source NMI + 1; assert_eq!(mpc_table.oemcount, expected_entry_count); } #[test] fn cpu_entry_count() { let mem = single_region_mem_at(SYSTEM_MEM_START, compute_mp_size(MAX_SUPPORTED_CPUS)); for i in 0..MAX_SUPPORTED_CPUS { let mut resource_allocator = ResourceAllocator::new(); setup_mptable(&mem, &mut resource_allocator, i).unwrap(); let mpf_intel: mpspec::mpf_intel = mem.read_obj(GuestAddress(SYSTEM_MEM_START)).unwrap(); let mpc_offset = GuestAddress(u64::from(mpf_intel.physptr)); let mpc_table: mpspec::mpc_table = mem.read_obj(mpc_offset).unwrap(); let mpc_end = mpc_offset.checked_add(u64::from(mpc_table.length)).unwrap(); let mut entry_offset = mpc_offset .checked_add(mem::size_of::() as u64) .unwrap(); let mut cpu_count = 0; while entry_offset < mpc_end { let entry_type: u8 = mem.read_obj(entry_offset).unwrap(); entry_offset = entry_offset .checked_add(table_entry_size(entry_type) as u64) .unwrap(); assert!(entry_offset <= mpc_end); if u32::from(entry_type) == mpspec::MP_PROCESSOR { cpu_count += 1; } } assert_eq!(cpu_count, i); } } #[test] fn cpu_entry_count_max() { let cpus = MAX_SUPPORTED_CPUS + 1; let mem = single_region_mem_at(SYSTEM_MEM_START, compute_mp_size(cpus)); let mut resource_allocator = ResourceAllocator::new(); let result = setup_mptable(&mem, &mut resource_allocator, cpus).unwrap_err(); assert_eq!(result, MptableError::TooManyCpus); } } ================================================ FILE: src/vmm/src/arch/x86_64/msr.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 /// Model Specific Registers (MSRs) related functionality. use bitflags::bitflags; use kvm_bindings::{MsrList, Msrs, kvm_msr_entry}; use kvm_ioctls::{Kvm, VcpuFd}; use crate::arch::x86_64::generated::hyperv::*; use crate::arch::x86_64::generated::hyperv_tlfs::*; use crate::arch::x86_64::generated::msr_index::*; use crate::arch::x86_64::generated::perf_event::*; use crate::cpu_config::x86_64::cpuid::common::GetCpuidError; #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] /// MSR related errors. pub enum MsrError { /// Failed to create `vmm_sys_util::fam::FamStructWrapper` for MSRs Fam(#[from] vmm_sys_util::fam::Error), /// Failed to get MSR index list: {0} GetMsrIndexList(kvm_ioctls::Error), /// Invalid CPU vendor: {0} InvalidVendor(#[from] GetCpuidError), /// Failed to set MSRs: {0} SetMsrs(kvm_ioctls::Error), /// Not all given MSRs were set. SetMsrsIncomplete, } /// MSR range #[derive(Debug)] pub struct MsrRange { /// Base MSR address pub base: u32, /// Number of MSRs pub nmsrs: u32, } impl MsrRange { /// Returns whether `msr` is contained in this MSR range. pub fn contains(&self, msr: u32) -> bool { self.base <= msr && msr < self.base + self.nmsrs } } /// Base MSR for APIC const APIC_BASE_MSR: u32 = 0x800; /// Number of APIC MSR indexes const APIC_MSR_INDEXES: u32 = 0x400; /// Custom MSRs fall in the range 0x4b564d00-0x4b564dff const MSR_KVM_WALL_CLOCK_NEW: u32 = 0x4b56_4d00; const MSR_KVM_SYSTEM_TIME_NEW: u32 = 0x4b56_4d01; const MSR_KVM_ASYNC_PF_EN: u32 = 0x4b56_4d02; const MSR_KVM_STEAL_TIME: u32 = 0x4b56_4d03; const MSR_KVM_PV_EOI_EN: u32 = 0x4b56_4d04; const MSR_KVM_POLL_CONTROL: u32 = 0x4b56_4d05; const MSR_KVM_ASYNC_PF_INT: u32 = 0x4b56_4d06; /// Taken from arch/x86/include/asm/msr-index.h /// Spectre mitigations control MSR pub const MSR_IA32_SPEC_CTRL: u32 = 0x0000_0048; /// Architecture capabilities MSR pub const MSR_IA32_ARCH_CAPABILITIES: u32 = 0x0000_010a; const MSR_IA32_PRED_CMD: u32 = 0x0000_0049; bitflags! { /// Feature flags enumerated in the IA32_ARCH_CAPABILITIES MSR. /// See https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/cpuid-enumeration-and-architectural-msrs.html #[derive(Default)] #[repr(C)] pub struct ArchCapaMSRFlags: u64 { /// The processor is not susceptible to Rogue Data Cache Load (RDCL). const RDCL_NO = 1 << 0; /// The processor supports enhanced Indirect Branch Restriction Speculation (IBRS) const IBRS_ALL = 1 << 1; /// The processor supports RSB Alternate. Alternative branch predictors may be used by RET instructions /// when the RSB is empty. Software using retpoline may be affected by this behavior. const RSBA = 1 << 2; /// A value of 1 indicates the hypervisor need not flush the L1D on VM entry. const SKIP_L1DFL_VMENTRY = 1 << 3; /// Processor is not susceptible to Speculative Store Bypass (SSB). const SSB_NO = 1 << 4; /// Processor is not susceptible to Microarchitectural Data Sampling (MDS). const MDS_NO = 1 << 5; /// The processor is not susceptible to a machine check error due to modifying the size of a code page /// without TLB invalidation. const IF_PSCHANGE_MC_NO = 1 << 6; /// The processor supports RTM_DISABLE and TSX_CPUID_CLEAR. const TSX_CTRL = 1 << 7; /// Processor is not susceptible to Intel® Transactional Synchronization Extensions /// (Intel® TSX) Asynchronous Abort (TAA). const TAA_NO = 1 << 8; // Bit 9 is reserved /// Processor supports IA32_MISC_PACKAGE_CTRLS MSR. const MISC_PACKAGE_CTRLS = 1 << 10; /// Processor supports setting and reading IA32_MISC_PACKAGE_CTLS[0] (ENERGY_FILTERING_ENABLE) bit. const ENERGY_FILTERING_CTL = 1 << 11; /// The processor supports data operand independent timing mode. const DOITM = 1 << 12; /// The processor is not affected by either the Shared Buffers Data Read (SBDR) vulnerability or the /// Sideband Stale Data Propagator (SSDP). const SBDR_SSDP_NO = 1 << 13; /// The processor is not affected by the Fill Buffer Stale Data Propagator (FBSDP). const FBSDP_NO = 1 << 14; /// The processor is not affected by vulnerabilities involving the Primary Stale Data Propagator (PSDP). const PSDP_NO = 1 << 15; // Bit 16 is reserved /// The processor will overwrite fill buffer values as part of MD_CLEAR operations with the VERW instruction. /// On these processors, L1D_FLUSH does not overwrite fill buffer values. const FB_CLEAR = 1 << 17; /// The processor supports read and write to the IA32_MCU_OPT_CTRL MSR (MSR 123H) and to the FB_CLEAR_DIS bit /// in that MSR (bit position 3). const FB_CLEAR_CTRL = 1 << 18; /// A value of 1 indicates processor may have the RRSBA alternate prediction behavior, /// if not disabled by RRSBA_DIS_U or RRSBA_DIS_S. const RRSBA = 1 << 19; /// A value of 1 indicates BHI_NO branch prediction behavior, /// regardless of the value of IA32_SPEC_CTRL[BHI_DIS_S] MSR bit. const BHI_NO = 1 << 20; // Bits 21:22 are reserved /// If set, the IA32_OVERCLOCKING STATUS MSR exists. const OVERCLOCKING_STATUS = 1 << 23; // Bits 24:63 are reserved } } /// Macro for generating a MsrRange. #[macro_export] macro_rules! MSR_RANGE { ($base:expr, $nmsrs:expr) => { MsrRange { base: $base, nmsrs: $nmsrs, } }; ($base:expr) => { MSR_RANGE!($base, 1) }; } // List of MSRs that can be serialized. List is sorted in ascending order of MSRs addresses. static SERIALIZABLE_MSR_RANGES: &[MsrRange] = &[ MSR_RANGE!(MSR_IA32_P5_MC_ADDR), MSR_RANGE!(MSR_IA32_P5_MC_TYPE), MSR_RANGE!(MSR_IA32_TSC), MSR_RANGE!(MSR_IA32_PLATFORM_ID), MSR_RANGE!(MSR_IA32_APICBASE), MSR_RANGE!(MSR_IA32_EBL_CR_POWERON), MSR_RANGE!(MSR_EBC_FREQUENCY_ID), MSR_RANGE!(MSR_SMI_COUNT), MSR_RANGE!(MSR_IA32_FEAT_CTL), MSR_RANGE!(MSR_IA32_TSC_ADJUST), MSR_RANGE!(MSR_IA32_SPEC_CTRL), MSR_RANGE!(MSR_IA32_PRED_CMD), MSR_RANGE!(MSR_IA32_UCODE_WRITE), MSR_RANGE!(MSR_IA32_UCODE_REV), MSR_RANGE!(MSR_IA32_SMBASE), MSR_RANGE!(MSR_FSB_FREQ), MSR_RANGE!(MSR_PLATFORM_INFO), MSR_RANGE!(MSR_PKG_CST_CONFIG_CONTROL), MSR_RANGE!(MSR_IA32_MPERF), MSR_RANGE!(MSR_IA32_APERF), MSR_RANGE!(MSR_MTRRcap), MSR_RANGE!(MSR_IA32_BBL_CR_CTL3), MSR_RANGE!(MSR_IA32_SYSENTER_CS), MSR_RANGE!(MSR_IA32_SYSENTER_ESP), MSR_RANGE!(MSR_IA32_SYSENTER_EIP), MSR_RANGE!(MSR_IA32_MCG_CAP), MSR_RANGE!(MSR_IA32_MCG_STATUS), MSR_RANGE!(MSR_IA32_MCG_CTL), MSR_RANGE!(MSR_IA32_PERF_STATUS), MSR_RANGE!(MSR_IA32_MISC_ENABLE), MSR_RANGE!(MSR_MISC_FEATURE_CONTROL), MSR_RANGE!(MSR_MISC_PWR_MGMT), MSR_RANGE!(MSR_TURBO_RATIO_LIMIT), MSR_RANGE!(MSR_TURBO_RATIO_LIMIT1), MSR_RANGE!(MSR_IA32_DEBUGCTLMSR), MSR_RANGE!(MSR_IA32_LASTBRANCHFROMIP), MSR_RANGE!(MSR_IA32_LASTBRANCHTOIP), MSR_RANGE!(MSR_IA32_LASTINTFROMIP), MSR_RANGE!(MSR_IA32_LASTINTTOIP), MSR_RANGE!(MSR_IA32_POWER_CTL), MSR_RANGE!( // IA32_MTRR_PHYSBASE0 0x200, 0x100 ), MSR_RANGE!( // MSR_CORE_C3_RESIDENCY // MSR_CORE_C6_RESIDENCY // MSR_CORE_C7_RESIDENCY MSR_CORE_C3_RESIDENCY, 3 ), MSR_RANGE!(MSR_IA32_MC0_CTL, 0x80), MSR_RANGE!(MSR_RAPL_POWER_UNIT), MSR_RANGE!( // MSR_PKGC3_IRTL // MSR_PKGC6_IRTL // MSR_PKGC7_IRTL MSR_PKGC3_IRTL, 3 ), MSR_RANGE!(MSR_PKG_POWER_LIMIT), MSR_RANGE!(MSR_PKG_ENERGY_STATUS), MSR_RANGE!(MSR_PKG_PERF_STATUS), MSR_RANGE!(MSR_PKG_POWER_INFO), MSR_RANGE!(MSR_DRAM_POWER_LIMIT), MSR_RANGE!(MSR_DRAM_ENERGY_STATUS), MSR_RANGE!(MSR_DRAM_PERF_STATUS), MSR_RANGE!(MSR_DRAM_POWER_INFO), MSR_RANGE!(MSR_CONFIG_TDP_NOMINAL), MSR_RANGE!(MSR_CONFIG_TDP_LEVEL_1), MSR_RANGE!(MSR_CONFIG_TDP_LEVEL_2), MSR_RANGE!(MSR_CONFIG_TDP_CONTROL), MSR_RANGE!(MSR_TURBO_ACTIVATION_RATIO), MSR_RANGE!(MSR_IA32_TSC_DEADLINE), MSR_RANGE!(APIC_BASE_MSR, APIC_MSR_INDEXES), MSR_RANGE!(MSR_KVM_WALL_CLOCK_NEW), MSR_RANGE!(MSR_KVM_SYSTEM_TIME_NEW), MSR_RANGE!(MSR_KVM_ASYNC_PF_EN), MSR_RANGE!(MSR_KVM_STEAL_TIME), MSR_RANGE!(MSR_KVM_PV_EOI_EN), MSR_RANGE!(MSR_EFER), MSR_RANGE!(MSR_STAR), MSR_RANGE!(MSR_LSTAR), MSR_RANGE!(MSR_CSTAR), MSR_RANGE!(MSR_SYSCALL_MASK), MSR_RANGE!(MSR_FS_BASE), MSR_RANGE!(MSR_GS_BASE), MSR_RANGE!(MSR_KERNEL_GS_BASE), MSR_RANGE!(MSR_TSC_AUX), MSR_RANGE!(MSR_MISC_FEATURES_ENABLES), MSR_RANGE!(MSR_K7_HWCR), MSR_RANGE!(MSR_KVM_POLL_CONTROL), MSR_RANGE!(MSR_KVM_ASYNC_PF_INT), MSR_RANGE!(MSR_IA32_TSX_CTRL), ]; /// Specifies whether a particular MSR should be included in vcpu serialization. /// /// # Arguments /// /// * `index` - The index of the MSR that is checked whether it's needed for serialization. pub fn msr_should_serialize(index: u32) -> bool { // Denied MSR not exported by Linux: IA32_MCG_CTL if index == MSR_IA32_MCG_CTL { return false; }; SERIALIZABLE_MSR_RANGES .iter() .any(|range| range.contains(index)) } /// Returns the list of serializable MSR indices. /// /// # Arguments /// /// * `kvm_fd` - Ref to `kvm_ioctls::Kvm`. /// /// # Errors /// /// When: /// - [`kvm_ioctls::Kvm::get_msr_index_list()`] errors. pub fn get_msrs_to_save(kvm_fd: &Kvm) -> Result { let mut msr_index_list = kvm_fd .get_msr_index_list() .map_err(MsrError::GetMsrIndexList)?; msr_index_list.retain(|msr_index| msr_should_serialize(*msr_index)); Ok(msr_index_list) } // List of MSRs that cannot be dumped. // // KVM_GET_MSR_INDEX_LIST returns some MSR indices that KVM_GET_MSRS fails to get depending on // configuration. For example, Firecracker disables PMU by default in CPUID normalization for CPUID // leaf 0xA. Due to this, some PMU-related MSRs cannot be retrieved via KVM_GET_MSRS. The dependency // on CPUID leaf 0xA can be found in the following link. // https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/x86/kvm/vmx/pmu_intel.c?h=v5.10.176#n325 // // The list of MSR indices returned by KVM_GET_MSR_INDEX_LIST can be found in the following link // (`msrs_to_save_all` + `num_emulated_msrs`). // https://elixir.bootlin.com/linux/v5.10.176/source/arch/x86/kvm/x86.c#L1211 const UNDUMPABLE_MSR_RANGES: [MsrRange; 17] = [ // - MSR_ARCH_PERFMON_FIXED_CTRn (0x309..=0x30C): CPUID.0Ah:EDX[0:4] > 0 MSR_RANGE!(MSR_ARCH_PERFMON_FIXED_CTR0, 4), // - MSR_CORE_PERF_FIXED_CTR_CTRL (0x38D): CPUID:0Ah:EAX[7:0] > 1 // - MSR_CORE_PERF_GLOBAL_STATUS (0x38E): CPUID:0Ah:EAX[7:0] > 0 || // (CPUID.(EAX=07H,ECX=0):EBX[25] = 1 && CPUID.(EAX=014H,ECX=0):ECX[0] = 1) // - MSR_CORE_PERF_GLOBAL_CTRL (0x39F): CPUID.0AH: EAX[7:0] > 0 // - MSR_CORE_PERF_GLOBAL_OVF_CTRL (0x390): CPUID.0AH: EAX[7:0] > 0 && CPUID.0AH: EAX[7:0] <= 3 MSR_RANGE!(MSR_CORE_PERF_FIXED_CTR_CTRL, 4), // - MSR_ARCH_PERFMON_PERFCTRn (0xC1..=0xC8): CPUID.0AH:EAX[15:8] > 0 MSR_RANGE!(MSR_ARCH_PERFMON_PERFCTR0, 8), // - MSR_ARCH_PERFMON_EVENTSELn (0x186..=0x18D): CPUID.0AH:EAX[15:8] > 0 MSR_RANGE!(MSR_ARCH_PERFMON_EVENTSEL0, 8), // On kernel 4.14, IA32_MCG_CTL (0x17B) can be retrieved only if IA32_MCG_CAP.CTL_P[8] = 1 for // vCPU. IA32_MCG_CAP can be set up via KVM_X86_SETUP_MCE API, but Firecracker doesn't use it. // https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/x86/kvm/x86.c?h=v4.14.311#n2553 MSR_RANGE!(MSR_IA32_MCG_CTL), // Firecracker is not tested with nested virtualization. Some CPU templates intentionally // disable nested virtualization. If nested virtualization is disabled, VMX-related MSRs cannot // be dumped. It can be seen in the following link that VMX-related MSRs depend on whether // nested virtualization is allowed. // https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/x86/kvm/vmx/vmx.c?h=v5.10.176#n1950 // - MSR_IA32_VMX_BASIC (0x480) // - MSR_IA32_VMX_PINBASED_CTLS (0x481) // - MSR_IA32_VMX_PROCBASED_CTLS (0x482) // - MSR_IA32_VMX_EXIT_CTLS (0x483) // - MSR_IA32_VMX_ENTRY_CTLS (0x484) // - MSR_IA32_VMX_MISC (0x485) // - MSR_IA32_VMX_CR0_FIXED0 (0x486) // - MSR_IA32_VMX_CR0_FIXED1 (0x487) // - MSR_IA32_VMX_CR4_FIXED0 (0x488) // - MSR_IA32_VMX_CR4_FIXED1 (0x489) // - MSR_IA32_VMX_VMCS_ENUM (0x48A) // - MSR_IA32_VMX_PROCBASED_CTLS2 (0x48B) // - MSR_IA32_VMX_EPT_VPID_CAP (0x48C) // - MSR_IA32_VMX_TRUE_PINBASED_CTLS (0x48D) // - MSR_IA32_VMX_TRUE_PROCBASED_CTLS (0x48E) // - MSR_IA32_VMX_TRUE_EXIT_CTLS (0x48F) // - MSR_IA32_VMX_TRUE_ENTRY_CTLS (0x490) // - MSR_IA32_VMX_VMFUNC (0x491) MSR_RANGE!(MSR_IA32_VMX_BASIC, 18), // Firecracker doesn't work with Hyper-V. KVM_GET_MSRS fails on kernel 4.14 because it doesn't // have the following patch. // https://github.com/torvalds/linux/commit/44883f01fe6ae436a8604c47d8435276fef369b0 // - HV_X64_MSR_GUEST_OS_ID (0x40000000) // - HV_X64_MSR_HYPERCALL (0x40000001) // - HV_X64_MSR_VP_INDEX (0x40000002) // - HV_X64_MSR_RESET (0x40000003) // - HV_X64_MSR_VP_RUNTIME (0x40000010) // - HV_X64_MSR_TIME_REF_COUNT (0x40000020) // - HV_X64_MSR_REFERENCE_TSC (0x40000021) // - HV_X64_MSR_TSC_FREQUENCY (0x40000022) // - HV_X64_MSR_APIC_FREQUENCY (0x40000023) // - HV_X64_MSR_VP_ASSIST_PAGE (0x40000073) // - HV_X64_MSR_SCONTROL (0x40000080) // - HV_X64_MSR_STIMER0_CONFIG (0x400000b0) // - HV_X64_MSR_SYNDBG_CONTROL (0x400000f1) // - HV_X64_MSR_SYNDBG_STATUS (0x400000f2) // - HV_X64_MSR_SYNDBG_SEND_BUFFER (0x400000f3) // - HV_X64_MSR_SYNDBG_RECV_BUFFER (0x400000f4) // - HV_X64_MSR_SYNDBG_PENDING_BUFFER (0x400000f5) // - HV_X64_MSR_SYNDBG_OPTIONS (0x400000ff) // - HV_X64_MSR_CRASH_Pn (0x40000100..=0x40000104) // - HV_X64_MSR_CRASH_CTL (0x40000105) // - HV_X64_MSR_REENLIGHTENMENT_CONTROL (0x40000106) // - HV_X64_MSR_TSC_EMULATION_CONTROL (0x40000107) // - HV_X64_MSR_TSC_EMULATION_STATUS (0x40000108) // - HV_X64_MSR_TSC_INVARIANT_CONTROL (0x40000118) MSR_RANGE!(HV_X64_MSR_GUEST_OS_ID, 4), MSR_RANGE!(HV_X64_MSR_VP_RUNTIME), MSR_RANGE!(HV_X64_MSR_TIME_REF_COUNT, 4), MSR_RANGE!(HV_X64_MSR_SCONTROL), MSR_RANGE!(HV_X64_MSR_VP_ASSIST_PAGE), MSR_RANGE!(HV_X64_MSR_STIMER0_CONFIG), MSR_RANGE!(HV_X64_MSR_SYNDBG_CONTROL, 5), MSR_RANGE!(HV_X64_MSR_SYNDBG_OPTIONS), MSR_RANGE!(HV_X64_MSR_CRASH_P0, 6), MSR_RANGE!(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 3), MSR_RANGE!(HV_X64_MSR_TSC_INVARIANT_CONTROL), ]; /// Checks whether a particular MSR can be dumped. /// /// # Arguments /// /// * `index` - The index of the MSR that is checked whether it's needed for serialization. pub fn msr_is_dumpable(index: u32) -> bool { !UNDUMPABLE_MSR_RANGES .iter() .any(|range| range.contains(index)) } /// Returns the list of dumpable MSR indices. /// /// # Arguments /// /// * `kvm_fd` - Ref to `Kvm` /// /// # Errors /// /// When: /// - [`kvm_ioctls::Kvm::get_msr_index_list()`] errors. pub fn get_msrs_to_dump(kvm_fd: &Kvm) -> Result { let mut msr_index_list = kvm_fd .get_msr_index_list() .map_err(MsrError::GetMsrIndexList)?; msr_index_list.retain(|msr_index| msr_is_dumpable(*msr_index)); Ok(msr_index_list) } /// Creates and populates required MSR entries for booting Linux on X86_64. pub fn create_boot_msr_entries() -> Vec { let msr_entry_default = |msr| kvm_msr_entry { index: msr, data: 0x0, ..Default::default() }; vec![ msr_entry_default(MSR_IA32_SYSENTER_CS), msr_entry_default(MSR_IA32_SYSENTER_ESP), msr_entry_default(MSR_IA32_SYSENTER_EIP), // x86_64 specific msrs, we only run on x86_64 not x86. msr_entry_default(MSR_STAR), msr_entry_default(MSR_CSTAR), msr_entry_default(MSR_KERNEL_GS_BASE), msr_entry_default(MSR_SYSCALL_MASK), msr_entry_default(MSR_LSTAR), // end of x86_64 specific code msr_entry_default(MSR_IA32_TSC), kvm_msr_entry { index: MSR_IA32_MISC_ENABLE, data: u64::from(MSR_IA32_MISC_ENABLE_FAST_STRING), ..Default::default() }, // set default memory type for physical memory outside configured // memory ranges to write-back by setting MTRR enable bit (11) and // setting memory type to write-back (value 6). // https://wiki.osdev.org/MTRR kvm_msr_entry { index: MSR_MTRRdefType, data: (1 << 11) | 0x6, ..Default::default() }, ] } /// Configure Model Specific Registers (MSRs) required to boot Linux for a given x86_64 vCPU. /// /// # Arguments /// /// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. /// /// # Errors /// /// When: /// - Failed to create [`vmm_sys_util::fam::FamStructWrapper`] for MSRs. /// - [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_msrs`] errors. /// - [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_msrs`] fails to write all given MSRs entries. pub fn set_msrs(vcpu: &VcpuFd, msr_entries: &[kvm_msr_entry]) -> Result<(), MsrError> { let msrs = Msrs::from_entries(msr_entries)?; vcpu.set_msrs(&msrs) .map_err(MsrError::SetMsrs) .and_then(|msrs_written| { if msrs_written == msrs.as_fam_struct_ref().nmsrs as usize { Ok(()) } else { Err(MsrError::SetMsrsIncomplete) } }) } #[cfg(test)] mod tests { use kvm_ioctls::Kvm; use super::*; fn create_vcpu() -> VcpuFd { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); vm.create_vcpu(0).unwrap() } #[test] fn test_msr_list_to_serialize() { for range in SERIALIZABLE_MSR_RANGES.iter() { for msr in range.base..(range.base + range.nmsrs) { let should = !matches!(msr, MSR_IA32_MCG_CTL); assert_eq!(msr_should_serialize(msr), should); } } } #[test] fn test_msr_list_to_dump() { for range in UNDUMPABLE_MSR_RANGES.iter() { for msr in range.base..(range.base + range.nmsrs) { assert!(!msr_is_dumpable(msr)); } } } #[test] #[allow(clippy::cast_ptr_alignment)] fn test_setup_msrs() { let vcpu = create_vcpu(); let msr_boot_entries = create_boot_msr_entries(); set_msrs(&vcpu, &msr_boot_entries).unwrap(); // This test will check against the last MSR entry configured (the tenth one). // See create_msr_entries() for details. let test_kvm_msrs_entry = [kvm_msr_entry { index: MSR_IA32_MISC_ENABLE, ..Default::default() }]; let mut kvm_msrs_wrapper = Msrs::from_entries(&test_kvm_msrs_entry).unwrap(); // Get_msrs() returns the number of msrs that it succeed in reading. // We only want to read one in this test case scenario. let read_nmsrs = vcpu.get_msrs(&mut kvm_msrs_wrapper).unwrap(); // Validate it only read one. assert_eq!(read_nmsrs, 1); // Official entries that were setup when we did setup_msrs. We need to assert that the // tenth one (i.e the one with index MSR_IA32_MISC_ENABLE has the data we // expect. let entry_vec = create_boot_msr_entries(); assert_eq!(entry_vec[9], kvm_msrs_wrapper.as_slice()[0]); } #[test] fn test_set_valid_msrs() { // Test `set_msrs()` with a valid MSR entry. It should succeed, as IA32_TSC MSR is listed // in supported MSRs as of now. let vcpu = create_vcpu(); let msr_entries = vec![kvm_msr_entry { index: MSR_IA32_TSC, data: 0, ..Default::default() }]; set_msrs(&vcpu, &msr_entries).unwrap(); } #[test] fn test_set_invalid_msrs() { // Test `set_msrs()` with an invalid MSR entry. It should fail, as MSR index 2 is not // listed in supported MSRs as of now. If hardware vendor adds this MSR index and KVM // supports this MSR, we need to change the index as needed. let vcpu = create_vcpu(); let msr_entries = vec![kvm_msr_entry { index: 2, ..Default::default() }]; assert_eq!( set_msrs(&vcpu, &msr_entries).unwrap_err(), MsrError::SetMsrsIncomplete ); } } ================================================ FILE: src/vmm/src/arch/x86_64/regs.rs ================================================ // Copyright © 2020, Oracle and/or its affiliates. // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::mem; use kvm_bindings::{kvm_fpu, kvm_regs, kvm_sregs}; use kvm_ioctls::VcpuFd; use super::super::{BootProtocol, EntryPoint}; use super::gdt::{gdt_entry, kvm_segment_from_gdt}; use crate::vstate::memory::{Address, Bytes, GuestAddress, GuestMemory, GuestMemoryMmap}; // Initial pagetables. const PML4_START: u64 = 0x9000; const PDPTE_START: u64 = 0xa000; const PDE_START: u64 = 0xb000; /// Errors thrown while setting up x86_64 registers. #[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] pub enum RegsError { /// Failed to get SREGs for this CPU: {0} GetStatusRegisters(kvm_ioctls::Error), /// Failed to set base registers for this CPU: {0} SetBaseRegisters(kvm_ioctls::Error), /// Failed to configure the FPU: {0} SetFPURegisters(kvm_ioctls::Error), /// Failed to set SREGs for this CPU: {0} SetStatusRegisters(kvm_ioctls::Error), /// Writing the GDT to RAM failed. WriteGDT, /// Writing the IDT to RAM failed WriteIDT, /// WritePDPTEAddress WritePDPTEAddress, /// WritePDEAddress WritePDEAddress, /// WritePML4Address WritePML4Address, } /// Error type for [`setup_fpu`]. #[derive(Debug, derive_more::From, PartialEq, Eq, thiserror::Error)] #[error("Failed to setup FPU: {0}")] pub struct SetupFpuError(vmm_sys_util::errno::Error); /// Configure Floating-Point Unit (FPU) registers for a given CPU. /// /// # Arguments /// /// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. /// /// # Errors /// /// When [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_fpu`] errors. pub fn setup_fpu(vcpu: &VcpuFd) -> Result<(), SetupFpuError> { let fpu: kvm_fpu = kvm_fpu { fcw: 0x37f, mxcsr: 0x1f80, ..Default::default() }; vcpu.set_fpu(&fpu).map_err(SetupFpuError) } /// Error type of [`setup_regs`]. #[derive(Debug, derive_more::From, PartialEq, Eq, thiserror::Error)] #[error("Failed to setup registers: {0}")] pub struct SetupRegistersError(vmm_sys_util::errno::Error); /// Configure base registers for a given CPU. /// /// # Arguments /// /// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. /// * `boot_ip` - Starting instruction pointer. /// /// # Errors /// /// When [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_regs`] errors. pub fn setup_regs(vcpu: &VcpuFd, entry_point: EntryPoint) -> Result<(), SetupRegistersError> { let regs: kvm_regs = match entry_point.protocol { BootProtocol::PvhBoot => kvm_regs { // Configure regs as required by PVH boot protocol. rflags: 0x0000_0000_0000_0002u64, rbx: super::layout::PVH_INFO_START, rip: entry_point.entry_addr.raw_value(), ..Default::default() }, BootProtocol::LinuxBoot => kvm_regs { // Configure regs as required by Linux 64-bit boot protocol. rflags: 0x0000_0000_0000_0002u64, rip: entry_point.entry_addr.raw_value(), // Frame pointer. It gets a snapshot of the stack pointer (rsp) so that when adjustments // are made to rsp (i.e. reserving space for local variables or pushing // values on to the stack), local variables and function parameters are // still accessible from a constant offset from rbp. rsp: super::layout::BOOT_STACK_POINTER, // Starting stack pointer. rbp: super::layout::BOOT_STACK_POINTER, // Must point to zero page address per Linux ABI. This is x86_64 specific. rsi: super::layout::ZERO_PAGE_START, ..Default::default() }, }; vcpu.set_regs(®s).map_err(SetupRegistersError) } /// Error type for [`setup_sregs`]. #[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] pub enum SetupSpecialRegistersError { /// Failed to get special registers: {0} GetSpecialRegisters(vmm_sys_util::errno::Error), /// Failed to configure segments and special registers: {0} ConfigureSegmentsAndSpecialRegisters(RegsError), /// Failed to setup page tables: {0} SetupPageTables(RegsError), /// Failed to set special registers: {0} SetSpecialRegisters(vmm_sys_util::errno::Error), } /// Configures the special registers and system page tables for a given CPU. /// /// # Arguments /// /// * `mem` - The memory that will be passed to the guest. /// * `vcpu` - Structure for the VCPU that holds the VCPU's fd. /// * `boot_prot` - The boot protocol being used. /// /// # Errors /// /// When: /// - [`kvm_ioctls::ioctls::vcpu::VcpuFd::get_sregs`] errors. /// - [`configure_segments_and_sregs`] errors. /// - [`setup_page_tables`] errors /// - [`kvm_ioctls::ioctls::vcpu::VcpuFd::set_sregs`] errors. pub fn setup_sregs( mem: &GuestMemoryMmap, vcpu: &VcpuFd, boot_prot: BootProtocol, ) -> Result<(), SetupSpecialRegistersError> { let mut sregs: kvm_sregs = vcpu .get_sregs() .map_err(SetupSpecialRegistersError::GetSpecialRegisters)?; configure_segments_and_sregs(mem, &mut sregs, boot_prot) .map_err(SetupSpecialRegistersError::ConfigureSegmentsAndSpecialRegisters)?; if let BootProtocol::LinuxBoot = boot_prot { setup_page_tables(mem, &mut sregs).map_err(SetupSpecialRegistersError::SetupPageTables)?; // TODO(dgreid) - Can this be done once per system instead? } vcpu.set_sregs(&sregs) .map_err(SetupSpecialRegistersError::SetSpecialRegisters) } const BOOT_GDT_OFFSET: u64 = 0x500; const BOOT_IDT_OFFSET: u64 = 0x520; const BOOT_GDT_MAX: usize = 4; const EFER_LMA: u64 = 0x400; const EFER_LME: u64 = 0x100; const X86_CR0_PE: u64 = 0x1; const X86_CR0_ET: u64 = 0x10; const X86_CR0_PG: u64 = 0x8000_0000; const X86_CR4_PAE: u64 = 0x20; fn write_gdt_table(table: &[u64], guest_mem: &GuestMemoryMmap) -> Result<(), RegsError> { let boot_gdt_addr = GuestAddress(BOOT_GDT_OFFSET); for (index, entry) in table.iter().enumerate() { let addr = guest_mem .checked_offset(boot_gdt_addr, index * mem::size_of::()) .ok_or(RegsError::WriteGDT)?; guest_mem .write_obj(*entry, addr) .map_err(|_| RegsError::WriteGDT)?; } Ok(()) } fn write_idt_value(val: u64, guest_mem: &GuestMemoryMmap) -> Result<(), RegsError> { let boot_idt_addr = GuestAddress(BOOT_IDT_OFFSET); guest_mem .write_obj(val, boot_idt_addr) .map_err(|_| RegsError::WriteIDT) } fn configure_segments_and_sregs( mem: &GuestMemoryMmap, sregs: &mut kvm_sregs, boot_prot: BootProtocol, ) -> Result<(), RegsError> { let gdt_table: [u64; BOOT_GDT_MAX] = match boot_prot { BootProtocol::PvhBoot => { // Configure GDT entries as specified by PVH boot protocol [ gdt_entry(0, 0, 0), // NULL gdt_entry(0xc09b, 0, 0xffff_ffff), // CODE gdt_entry(0xc093, 0, 0xffff_ffff), // DATA gdt_entry(0x008b, 0, 0x67), // TSS ] } BootProtocol::LinuxBoot => { // Configure GDT entries as specified by Linux 64bit boot protocol [ gdt_entry(0, 0, 0), // NULL gdt_entry(0xa09b, 0, 0xfffff), // CODE gdt_entry(0xc093, 0, 0xfffff), // DATA gdt_entry(0x808b, 0, 0xfffff), // TSS ] } }; let code_seg = kvm_segment_from_gdt(gdt_table[1], 1); let data_seg = kvm_segment_from_gdt(gdt_table[2], 2); let tss_seg = kvm_segment_from_gdt(gdt_table[3], 3); // Write segments write_gdt_table(&gdt_table[..], mem)?; sregs.gdt.base = BOOT_GDT_OFFSET; sregs.gdt.limit = u16::try_from(mem::size_of_val(&gdt_table)).unwrap() - 1; write_idt_value(0, mem)?; sregs.idt.base = BOOT_IDT_OFFSET; sregs.idt.limit = u16::try_from(mem::size_of::()).unwrap() - 1; sregs.cs = code_seg; sregs.ds = data_seg; sregs.es = data_seg; sregs.fs = data_seg; sregs.gs = data_seg; sregs.ss = data_seg; sregs.tr = tss_seg; match boot_prot { BootProtocol::PvhBoot => { sregs.cr0 = X86_CR0_PE | X86_CR0_ET; sregs.cr4 = 0; } BootProtocol::LinuxBoot => { // 64-bit protected mode sregs.cr0 |= X86_CR0_PE; sregs.efer |= EFER_LME | EFER_LMA; } } Ok(()) } fn setup_page_tables(mem: &GuestMemoryMmap, sregs: &mut kvm_sregs) -> Result<(), RegsError> { // Puts PML4 right after zero page but aligned to 4k. let boot_pml4_addr = GuestAddress(PML4_START); let boot_pdpte_addr = GuestAddress(PDPTE_START); let boot_pde_addr = GuestAddress(PDE_START); // Entry covering VA [0..512GB) mem.write_obj(boot_pdpte_addr.raw_value() | 0x03, boot_pml4_addr) .map_err(|_| RegsError::WritePML4Address)?; // Entry covering VA [0..1GB) mem.write_obj(boot_pde_addr.raw_value() | 0x03, boot_pdpte_addr) .map_err(|_| RegsError::WritePDPTEAddress)?; // 512 2MB entries together covering VA [0..1GB). Note we are assuming // CPU supports 2MB pages (/proc/cpuinfo has 'pse'). All modern CPUs do. for i in 0..512 { mem.write_obj((i << 21) + 0x83u64, boot_pde_addr.unchecked_add(i * 8)) .map_err(|_| RegsError::WritePDEAddress)?; } sregs.cr3 = boot_pml4_addr.raw_value(); sregs.cr4 |= X86_CR4_PAE; sregs.cr0 |= X86_CR0_PG; Ok(()) } #[cfg(test)] mod tests { #![allow(clippy::cast_possible_truncation)] use kvm_ioctls::Kvm; use super::*; use crate::test_utils::single_region_mem; use crate::vstate::memory::{Bytes, GuestAddress, GuestMemoryMmap}; fn read_u64(gm: &GuestMemoryMmap, offset: u64) -> u64 { let read_addr = GuestAddress(offset); gm.read_obj(read_addr).unwrap() } fn validate_segments_and_sregs( gm: &GuestMemoryMmap, sregs: &kvm_sregs, boot_prot: BootProtocol, ) { if let BootProtocol::LinuxBoot = boot_prot { assert_eq!(0xaf_9b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 8)); assert_eq!(0xcf_9300_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 16)); assert_eq!(0x8f_8b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 24)); assert_eq!(0xffff_ffff, sregs.tr.limit); assert!(sregs.cr0 & X86_CR0_PE != 0); assert!(sregs.efer & EFER_LME != 0 && sregs.efer & EFER_LMA != 0); } else { // Validate values that are specific to PVH boot protocol assert_eq!(0xcf_9b00_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 8)); assert_eq!(0xcf_9300_0000_ffff, read_u64(gm, BOOT_GDT_OFFSET + 16)); assert_eq!(0x00_8b00_0000_0067, read_u64(gm, BOOT_GDT_OFFSET + 24)); assert_eq!(0x67, sregs.tr.limit); assert_eq!(0, sregs.tr.g); assert!(sregs.cr0 & X86_CR0_PE != 0 && sregs.cr0 & X86_CR0_ET != 0); assert_eq!(0, sregs.cr4); } // Common settings for both PVH and Linux boot protocol assert_eq!(0x0, read_u64(gm, BOOT_GDT_OFFSET)); assert_eq!(0x0, read_u64(gm, BOOT_IDT_OFFSET)); assert_eq!(0, sregs.cs.base); assert_eq!(0xffff_ffff, sregs.ds.limit); assert_eq!(0x10, sregs.es.selector); assert_eq!(1, sregs.fs.present); assert_eq!(1, sregs.gs.g); assert_eq!(0, sregs.ss.avl); assert_eq!(0, sregs.tr.base); assert_eq!(0, sregs.tr.avl); } fn validate_page_tables(gm: &GuestMemoryMmap, sregs: &kvm_sregs) { assert_eq!(0xa003, read_u64(gm, PML4_START)); assert_eq!(0xb003, read_u64(gm, PDPTE_START)); for i in 0..512 { assert_eq!((i << 21) + 0x83u64, read_u64(gm, PDE_START + (i * 8))); } assert_eq!(PML4_START, sregs.cr3); assert!(sregs.cr4 & X86_CR4_PAE != 0); assert!(sregs.cr0 & X86_CR0_PG != 0); } #[test] fn test_setup_fpu() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); setup_fpu(&vcpu).unwrap(); let expected_fpu: kvm_fpu = kvm_fpu { fcw: 0x37f, mxcsr: 0x1f80, ..Default::default() }; let actual_fpu: kvm_fpu = vcpu.get_fpu().unwrap(); // TODO: auto-generate kvm related structures with PartialEq on. assert_eq!(expected_fpu.fcw, actual_fpu.fcw); // Setting the mxcsr register from kvm_fpu inside setup_fpu does not influence anything. // See 'kvm_arch_vcpu_ioctl_set_fpu' from arch/x86/kvm/x86.c. // The mxcsr will stay 0 and the assert below fails. Decide whether or not we should // remove it at all. // assert!(expected_fpu.mxcsr == actual_fpu.mxcsr); } #[test] fn test_setup_regs() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); let expected_regs: kvm_regs = kvm_regs { rflags: 0x0000_0000_0000_0002u64, rip: 1, rsp: super::super::layout::BOOT_STACK_POINTER, rbp: super::super::layout::BOOT_STACK_POINTER, rsi: super::super::layout::ZERO_PAGE_START, ..Default::default() }; let entry_point: EntryPoint = EntryPoint { entry_addr: GuestAddress(expected_regs.rip), protocol: BootProtocol::LinuxBoot, }; setup_regs(&vcpu, entry_point).unwrap(); let actual_regs: kvm_regs = vcpu.get_regs().unwrap(); assert_eq!(actual_regs, expected_regs); } #[test] fn test_setup_sregs() { let kvm = Kvm::new().unwrap(); let vm = kvm.create_vm().unwrap(); let vcpu = vm.create_vcpu(0).unwrap(); let gm = single_region_mem(0x10000); [BootProtocol::LinuxBoot, BootProtocol::PvhBoot] .iter() .for_each(|boot_prot| { vcpu.set_sregs(&Default::default()).unwrap(); setup_sregs(&gm, &vcpu, *boot_prot).unwrap(); let mut sregs: kvm_sregs = vcpu.get_sregs().unwrap(); // for AMD KVM_GET_SREGS returns g = 0 for each kvm_segment. // We set it to 1, otherwise the test will fail. sregs.gs.g = 1; validate_segments_and_sregs(&gm, &sregs, *boot_prot); if let BootProtocol::LinuxBoot = *boot_prot { validate_page_tables(&gm, &sregs); } }); } #[test] fn test_write_gdt_table() { // Not enough memory for the gdt table to be written. let gm = single_region_mem(BOOT_GDT_OFFSET as usize); let gdt_table: [u64; BOOT_GDT_MAX] = [ gdt_entry(0, 0, 0), // NULL gdt_entry(0xa09b, 0, 0xfffff), // CODE gdt_entry(0xc093, 0, 0xfffff), // DATA gdt_entry(0x808b, 0, 0xfffff), // TSS ]; write_gdt_table(&gdt_table, &gm).unwrap_err(); // We allocate exactly the amount needed to write four u64 to `BOOT_GDT_OFFSET`. let gm = single_region_mem(BOOT_GDT_OFFSET as usize + (mem::size_of::() * BOOT_GDT_MAX)); let gdt_table: [u64; BOOT_GDT_MAX] = [ gdt_entry(0, 0, 0), // NULL gdt_entry(0xa09b, 0, 0xfffff), // CODE gdt_entry(0xc093, 0, 0xfffff), // DATA gdt_entry(0x808b, 0, 0xfffff), // TSS ]; write_gdt_table(&gdt_table, &gm).unwrap(); } #[test] fn test_write_idt_table() { // Not enough memory for the a u64 value to fit. let gm = single_region_mem(BOOT_IDT_OFFSET as usize); let val = 0x100; write_idt_value(val, &gm).unwrap_err(); let gm = single_region_mem(BOOT_IDT_OFFSET as usize + mem::size_of::()); // We have allocated exactly the amount neded to write an u64 to `BOOT_IDT_OFFSET`. write_idt_value(val, &gm).unwrap(); } #[test] fn test_configure_segments_and_sregs() { let mut sregs: kvm_sregs = Default::default(); let gm = single_region_mem(0x10000); configure_segments_and_sregs(&gm, &mut sregs, BootProtocol::LinuxBoot).unwrap(); validate_segments_and_sregs(&gm, &sregs, BootProtocol::LinuxBoot); configure_segments_and_sregs(&gm, &mut sregs, BootProtocol::PvhBoot).unwrap(); validate_segments_and_sregs(&gm, &sregs, BootProtocol::PvhBoot); } #[test] fn test_setup_page_tables() { let mut sregs: kvm_sregs = Default::default(); let gm = single_region_mem(PML4_START as usize); setup_page_tables(&gm, &mut sregs).unwrap_err(); let gm = single_region_mem(PDPTE_START as usize); setup_page_tables(&gm, &mut sregs).unwrap_err(); let gm = single_region_mem(PDE_START as usize); setup_page_tables(&gm, &mut sregs).unwrap_err(); let gm = single_region_mem(0x10000); setup_page_tables(&gm, &mut sregs).unwrap(); validate_page_tables(&gm, &sregs); } } ================================================ FILE: src/vmm/src/arch/x86_64/vcpu.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::collections::BTreeMap; use std::fmt::Debug; use std::sync::Arc; use kvm_bindings::{ CpuId, KVM_MAX_CPUID_ENTRIES, KVM_MAX_MSR_ENTRIES, Msrs, Xsave, kvm_debugregs, kvm_lapic_state, kvm_mp_state, kvm_regs, kvm_sregs, kvm_vcpu_events, kvm_xcrs, kvm_xsave, kvm_xsave2, }; use kvm_ioctls::{VcpuExit, VcpuFd}; use log::{error, warn}; use serde::{Deserialize, Serialize}; use vmm_sys_util::fam::{self, FamStruct}; use crate::arch::EntryPoint; use crate::arch::x86_64::generated::msr_index::{MSR_IA32_TSC, MSR_IA32_TSC_DEADLINE}; use crate::arch::x86_64::interrupts; use crate::arch::x86_64::msr::{MsrError, create_boot_msr_entries}; use crate::arch::x86_64::regs::{SetupFpuError, SetupRegistersError, SetupSpecialRegistersError}; use crate::cpu_config::x86_64::{CpuConfiguration, cpuid}; use crate::logger::{IncMetric, METRICS}; use crate::vstate::bus::Bus; use crate::vstate::memory::GuestMemoryMmap; use crate::vstate::vcpu::{VcpuConfig, VcpuEmulation, VcpuError}; use crate::vstate::vm::Vm; // Tolerance for TSC frequency expected variation. // The value of 250 parts per million is based on // the QEMU approach, more details here: // https://bugzilla.redhat.com/show_bug.cgi?id=1839095 const TSC_KHZ_TOL_NUMERATOR: i64 = 250; const TSC_KHZ_TOL_DENOMINATOR: i64 = 1_000_000; /// A set of MSRs that should be restored separately after all other MSRs have already been restored const DEFERRED_MSRS: [u32; 1] = [ // MSR_IA32_TSC_DEADLINE must be restored after MSR_IA32_TSC, otherwise we risk "losing" timer // interrupts across the snapshot restore boundary (due to KVM querying MSR_IA32_TSC upon // writes to the TSC_DEADLINE MSR to determine whether it needs to prime a timer - if // MSR_IA32_TSC is not initialized correctly, it can wrongly assume no timer needs to be // primed, or the timer can be initialized with a wrong expiry). MSR_IA32_TSC_DEADLINE, ]; /// Errors associated with the wrappers over KVM ioctls. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum KvmVcpuError { /// Failed to convert `kvm_bindings::CpuId` to `Cpuid`: {0} ConvertCpuidType(#[from] cpuid::CpuidTryFromKvmCpuid), /// Failed FamStructWrapper operation: {0} Fam(#[from] vmm_sys_util::fam::Error), /// Failed to get dumpable MSR index list: {0} GetMsrsToDump(#[from] crate::arch::x86_64::msr::MsrError), /// Cannot open the VCPU file descriptor: {0} VcpuFd(kvm_ioctls::Error), /// Failed to get KVM vcpu debug regs: {0} VcpuGetDebugRegs(kvm_ioctls::Error), /// Failed to get KVM vcpu lapic: {0} VcpuGetLapic(kvm_ioctls::Error), /// Failed to get KVM vcpu mp state: {0} VcpuGetMpState(kvm_ioctls::Error), /// Failed to get KVM vcpu msr: {0:#x} VcpuGetMsr(u32), /// Failed to get KVM vcpu msrs: {0} VcpuGetMsrs(kvm_ioctls::Error), /// Failed to get KVM vcpu regs: {0} VcpuGetRegs(kvm_ioctls::Error), /// Failed to get KVM vcpu sregs: {0} VcpuGetSregs(kvm_ioctls::Error), /// Failed to get KVM vcpu event: {0} VcpuGetVcpuEvents(kvm_ioctls::Error), /// Failed to get KVM vcpu xcrs: {0} VcpuGetXcrs(kvm_ioctls::Error), /// Failed to get KVM vcpu xsave via KVM_GET_XSAVE: {0} VcpuGetXsave(kvm_ioctls::Error), /// Failed to get KVM vcpu xsave via KVM_GET_XSAVE2: {0} VcpuGetXsave2(kvm_ioctls::Error), /// Failed to get KVM vcpu cpuid: {0} VcpuGetCpuid(kvm_ioctls::Error), /// Failed to get KVM TSC frequency: {0} VcpuGetTsc(kvm_ioctls::Error), /// Failed to set KVM vcpu cpuid: {0} VcpuSetCpuid(kvm_ioctls::Error), /// Failed to set KVM vcpu debug regs: {0} VcpuSetDebugRegs(kvm_ioctls::Error), /// Failed to set KVM vcpu lapic: {0} VcpuSetLapic(kvm_ioctls::Error), /// Failed to set KVM vcpu mp state: {0} VcpuSetMpState(kvm_ioctls::Error), /// Failed to set KVM vcpu msrs: {0} VcpuSetMsrs(kvm_ioctls::Error), /// Failed to set all KVM MSRs for this vCPU. Only a partial write was done. VcpuSetMsrsIncomplete, /// Failed to set KVM vcpu regs: {0} VcpuSetRegs(kvm_ioctls::Error), /// Failed to set KVM vcpu sregs: {0} VcpuSetSregs(kvm_ioctls::Error), /// Failed to set KVM vcpu event: {0} VcpuSetVcpuEvents(kvm_ioctls::Error), /// Failed to set KVM vcpu xcrs: {0} VcpuSetXcrs(kvm_ioctls::Error), /// Failed to set KVM vcpu xsave: {0} VcpuSetXsave(kvm_ioctls::Error), } /// Error type for [`KvmVcpu::get_tsc_khz`] and [`KvmVcpu::is_tsc_scaling_required`]. #[derive(Debug, thiserror::Error, derive_more::From, Eq, PartialEq)] #[error("{0}")] pub struct GetTscError(vmm_sys_util::errno::Error); /// Error type for [`KvmVcpu::set_tsc_khz`]. #[derive(Debug, thiserror::Error, Eq, PartialEq)] #[error("{0}")] pub struct SetTscError(#[from] kvm_ioctls::Error); /// Error type for [`KvmVcpu::configure`]. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum KvmVcpuConfigureError { /// Failed to convert `Cpuid` to `kvm_bindings::CpuId`: {0} ConvertCpuidType(#[from] vmm_sys_util::fam::Error), /// Failed to apply modifications to CPUID: {0} NormalizeCpuidError(#[from] cpuid::NormalizeCpuidError), /// Failed to set CPUID: {0} SetCpuid(#[from] vmm_sys_util::errno::Error), /// Failed to set MSRs: {0} SetMsrs(#[from] MsrError), /// Failed to setup registers: {0} SetupRegisters(#[from] SetupRegistersError), /// Failed to setup FPU: {0} SetupFpu(#[from] SetupFpuError), /// Failed to setup special registers: {0} SetupSpecialRegisters(#[from] SetupSpecialRegistersError), /// Failed to configure LAPICs: {0} SetLint(#[from] interrupts::InterruptError), } /// A wrapper around creating and using a kvm x86_64 vcpu. #[derive(Debug)] pub struct KvmVcpu { /// Index of vcpu. pub index: u8, /// KVM vcpu fd. pub fd: VcpuFd, /// Vcpu peripherals, such as buses pub peripherals: Peripherals, /// The list of MSRs to include in a VM snapshot, in the same order as KVM returned them /// from KVM_GET_MSR_INDEX_LIST msrs_to_save: Vec, /// Size in bytes requiring to hold the dynamically-sized `kvm_xsave` struct. /// /// `None` if `KVM_CAP_XSAVE2` not supported. xsave2_size: Option, } /// Vcpu peripherals #[derive(Default, Debug)] pub struct Peripherals { /// Pio bus. pub pio_bus: Option>, /// Mmio bus. pub mmio_bus: Option>, } impl KvmVcpu { /// Constructs a new kvm vcpu with arch specific functionality. /// /// # Arguments /// /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. pub fn new(index: u8, vm: &Vm) -> Result { let kvm_vcpu = vm .fd() .create_vcpu(index.into()) .map_err(KvmVcpuError::VcpuFd)?; Ok(KvmVcpu { index, fd: kvm_vcpu, peripherals: Default::default(), msrs_to_save: vm.msrs_to_save().to_vec(), xsave2_size: vm.xsave2_size(), }) } /// Configures a x86_64 specific vcpu for booting Linux and should be called once per vcpu. /// /// # Arguments /// /// * `guest_mem` - The guest memory used by this microvm. /// * `kernel_entry_point` - Specifies the boot protocol and offset from `guest_mem` at which /// the kernel starts. /// * `vcpu_config` - The vCPU configuration. /// * `cpuid` - The capabilities exposed by this vCPU. pub fn configure( &mut self, guest_mem: &GuestMemoryMmap, kernel_entry_point: EntryPoint, vcpu_config: &VcpuConfig, ) -> Result<(), KvmVcpuConfigureError> { let mut cpuid = vcpu_config.cpu_config.cpuid.clone(); // Apply machine specific changes to CPUID. cpuid.normalize( // The index of the current logical CPU in the range [0..cpu_count]. self.index, // The total number of logical CPUs. vcpu_config.vcpu_count, // The number of bits needed to enumerate logical CPUs per core. u8::from(vcpu_config.vcpu_count > 1 && vcpu_config.smt), )?; // Set CPUID. let kvm_cpuid = kvm_bindings::CpuId::try_from(cpuid)?; // Set CPUID in the KVM self.fd .set_cpuid2(&kvm_cpuid) .map_err(KvmVcpuConfigureError::SetCpuid)?; // Clone MSR entries that are modified by CPU template from `VcpuConfig`. let mut msrs = vcpu_config.cpu_config.msrs.clone(); self.msrs_to_save.extend(msrs.keys()); // Apply MSR modification to comply the linux boot protocol. create_boot_msr_entries().into_iter().for_each(|entry| { msrs.insert(entry.index, entry.data); }); // TODO - Add/amend MSRs for vCPUs based on cpu_config // By this point the Guest CPUID is established. Some CPU features require MSRs // to configure and interact with those features. If a MSR is writable from // inside the Guest, or is changed by KVM or Firecracker on behalf of the Guest, // then we will need to save it every time we take a snapshot, and restore its // value when we restore the microVM since the Guest may need that value. // Since CPUID tells us what features are enabled for the Guest, we can infer // the extra MSRs that we need to save based on a dependency map. let extra_msrs = cpuid::common::msrs_to_save_by_cpuid(&kvm_cpuid); self.msrs_to_save.extend(extra_msrs); // TODO: Some MSRs depend on values of other MSRs. This dependency will need to // be implemented. // By this point we know that at snapshot, the list of MSRs we need to // save is `architectural MSRs` + `MSRs inferred through CPUID` + `other // MSRs defined by the template` let kvm_msrs = msrs .into_iter() .map(|entry| kvm_bindings::kvm_msr_entry { index: entry.0, data: entry.1, ..Default::default() }) .collect::>(); crate::arch::x86_64::msr::set_msrs(&self.fd, &kvm_msrs)?; crate::arch::x86_64::regs::setup_regs(&self.fd, kernel_entry_point)?; crate::arch::x86_64::regs::setup_fpu(&self.fd)?; crate::arch::x86_64::regs::setup_sregs(guest_mem, &self.fd, kernel_entry_point.protocol)?; crate::arch::x86_64::interrupts::set_lint(&self.fd)?; Ok(()) } /// Sets a Port Mapped IO bus for this vcpu. pub fn set_pio_bus(&mut self, pio_bus: Arc) { self.peripherals.pio_bus = Some(pio_bus); } /// Calls KVM_KVMCLOCK_CTRL to avoid guest soft lockup watchdog panics on resume. /// See https://docs.kernel.org/virt/kvm/api.html . pub fn kvmclock_ctrl(&self) { // We do not want to fail if the call is not successful, because that may be acceptable // depending on the workload. For example, EINVAL is returned if kvm-clock is not // activated (e.g., no-kvmclock is specified in the guest kernel parameter). // https://elixir.bootlin.com/linux/v6.17.5/source/arch/x86/kvm/x86.c#L5736-L5737 if let Err(err) = self.fd.kvmclock_ctrl() { METRICS.vcpu.kvmclock_ctrl_fails.inc(); warn!("KVM_KVMCLOCK_CTRL call failed {}", err); } } /// Get the current XSAVE state for this vCPU. /// /// The C `kvm_xsave` struct was extended by adding a flexible array member (FAM) in the end /// to support variable-sized XSTATE buffer. /// /// https://elixir.bootlin.com/linux/v6.13.6/source/arch/x86/include/uapi/asm/kvm.h#L381 /// ```c /// struct kvm_xsave { /// __u32 region[1024]; /// __u32 extra[]; /// }; /// ``` /// /// As shown above, the C `kvm_xsave` struct does not have any field for the size of itself or /// the length of its FAM. The required size (in bytes) of `kvm_xsave` struct can be retrieved /// via `KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)`. /// /// kvm-bindings defines `kvm_xsave2` struct that wraps the `kvm_xsave` struct to have `len` /// field that indicates the number of FAM entries (i.e. `extra`), it also defines `Xsave` as /// a `FamStructWrapper` of `kvm_xsave2`. /// /// https://github.com/rust-vmm/kvm/blob/68fff5491703bf32bd35656f7ba994a4cae9ea7d/kvm-bindings/src/x86_64/fam_wrappers.rs#L106 /// ```rs /// pub struct kvm_xsave2 { /// pub len: usize, /// pub xsave: kvm_xsave, /// } /// ``` fn get_xsave(&self) -> Result { match self.xsave2_size { // if `KVM_CAP_XSAVE2` supported Some(xsave2_size) => { // Convert the `kvm_xsave` size in bytes to the length of FAM (i.e. `extra`). let fam_len = // Calculate the size of FAM (`extra`) area in bytes. Note that the subtraction // never underflows because `KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)` always returns // at least 4096 bytes that is the size of `kvm_xsave` without FAM area. (xsave2_size - std::mem::size_of::()) // Divide by the size of FAM (`extra`) entry (i.e. `__u32`). .div_ceil(std::mem::size_of::<::Entry>()); let mut xsave = Xsave::new(fam_len).map_err(KvmVcpuError::Fam)?; // SAFETY: Safe because `xsave` is allocated with enough size to save XSTATE. unsafe { self.fd.get_xsave2(&mut xsave) }.map_err(KvmVcpuError::VcpuGetXsave2)?; Ok(xsave) } // if `KVM_CAP_XSAVE2` not supported None => Ok( // SAFETY: The content is correctly laid out. unsafe { Xsave::from_raw(vec![kvm_xsave2 { // Note that `len` is the number of FAM (`extra`) entries that didn't exist // on older kernels not supporting `KVM_CAP_XSAVE2`. Thus, it's always zero. len: 0, xsave: self.fd.get_xsave().map_err(KvmVcpuError::VcpuGetXsave)?, }]) }, ), } } /// Get the current TSC frequency for this vCPU. /// /// # Errors /// /// When [`kvm_ioctls::VcpuFd::get_tsc_khz`] errors. pub fn get_tsc_khz(&self) -> Result { let res = self.fd.get_tsc_khz()?; Ok(res) } /// Get CPUID for this vCPU. /// /// Opposed to KVM_GET_SUPPORTED_CPUID, KVM_GET_CPUID2 does not update "nent" with valid number /// of entries on success. Thus, when it passes "num_entries" greater than required, zeroed /// entries follow after valid entries. This function removes such zeroed empty entries. /// /// # Errors /// /// * When [`kvm_ioctls::VcpuFd::get_cpuid2`] returns errors. fn get_cpuid(&self) -> Result { let mut cpuid = self .fd .get_cpuid2(KVM_MAX_CPUID_ENTRIES) .map_err(KvmVcpuError::VcpuGetCpuid)?; // As CPUID.0h:EAX should have the largest CPUID standard function, we don't need to check // EBX, ECX and EDX to confirm whether it is a valid entry. cpuid.retain(|entry| { !(entry.function == 0 && entry.index == 0 && entry.flags == 0 && entry.eax == 0) }); Ok(cpuid) } /// If the IA32_TSC_DEADLINE MSR value is zero, update it /// with the IA32_TSC value to guarantee that /// the vCPU will continue receiving interrupts after restoring from a snapshot. /// /// Rationale: we observed that sometimes when taking a snapshot, /// the IA32_TSC_DEADLINE MSR is cleared, but the interrupt is not /// delivered to the guest, leading to a situation where one /// of the vCPUs never receives TSC interrupts after restoring, /// until the MSR is updated externally, eg by setting the system time. fn fix_zero_tsc_deadline_msr(msr_chunks: &mut [Msrs]) { // We do not expect more than 1 TSC MSR entry, but if there are multiple, pick the maximum. let max_tsc_value = msr_chunks .iter() .flat_map(|msrs| msrs.as_slice()) .filter(|msr| msr.index == MSR_IA32_TSC) .map(|msr| msr.data) .max(); if let Some(tsc_value) = max_tsc_value { msr_chunks .iter_mut() .flat_map(|msrs| msrs.as_mut_slice()) .filter(|msr| msr.index == MSR_IA32_TSC_DEADLINE && msr.data == 0) .for_each(|msr| { warn!( "MSR_IA32_TSC_DEADLINE is 0, replacing with {:#x}.", tsc_value ); msr.data = tsc_value; }); } } /// Looks for MSRs from the [`DEFERRED_MSRS`] array and removes them from `msr_chunks`. /// Returns a new [`Msrs`] object containing all the removed MSRs. /// /// We use this to capture some causal dependencies between MSRs where the relative order /// of restoration matters (e.g. MSR_IA32_TSC must be restored before MSR_IA32_TSC_DEADLINE). fn extract_deferred_msrs(msr_chunks: &mut [Msrs]) -> Result { // Use 0 here as FamStructWrapper doesn't really give an equivalent of `Vec::with_capacity`, // and if we specify something N != 0 here, then it will create a FamStructWrapper with N // elements pre-allocated and zero'd out. Unless we then actually "fill" all those N values, // KVM will later yell at us about invalid MSRs. let mut deferred_msrs = Msrs::new(0)?; for msrs in msr_chunks { msrs.retain(|msr| { if DEFERRED_MSRS.contains(&msr.index) { deferred_msrs .push(*msr) .inspect_err(|err| { error!( "Failed to move MSR {} into later chunk: {:?}", msr.index, err ) }) .is_err() } else { true } }); } Ok(deferred_msrs) } /// Get MSR chunks for the given MSR index list. /// /// KVM only supports getting `KVM_MAX_MSR_ENTRIES` at a time, so we divide /// the list of MSR indices into chunks, call `KVM_GET_MSRS` for each /// chunk, and collect into a [`Vec`]. /// /// # Arguments /// /// * `msr_index_iter`: Iterator over MSR indices. /// /// # Errors /// /// * When [`kvm_bindings::Msrs::new`] returns errors. /// * When [`kvm_ioctls::VcpuFd::get_msrs`] returns errors. /// * When the return value of [`kvm_ioctls::VcpuFd::get_msrs`] (the number of entries that /// could be gotten) is less than expected. fn get_msr_chunks( &self, mut msr_index_iter: impl ExactSizeIterator, ) -> Result, KvmVcpuError> { let num_chunks = msr_index_iter.len().div_ceil(KVM_MAX_MSR_ENTRIES); // + 1 for the chunk of deferred MSRs let mut msr_chunks: Vec = Vec::with_capacity(num_chunks + 1); for _ in 0..num_chunks { let chunk_len = msr_index_iter.len().min(KVM_MAX_MSR_ENTRIES); let chunk = self.get_msr_chunk(&mut msr_index_iter, chunk_len)?; msr_chunks.push(chunk); } Self::fix_zero_tsc_deadline_msr(&mut msr_chunks); let deferred = Self::extract_deferred_msrs(&mut msr_chunks)?; msr_chunks.push(deferred); Ok(msr_chunks) } /// Get single MSR chunk for the given MSR index iterator with /// specified length. Iterator should have enough elements /// to fill the chunk with indices, otherwise KVM will /// return an error when processing half filled chunk. /// /// # Arguments /// /// * `msr_index_iter`: Iterator over MSR indices. /// * `chunk_size`: Length of a chunk. /// /// # Errors /// /// * When [`kvm_bindings::Msrs::new`] returns errors. /// * When [`kvm_ioctls::VcpuFd::get_msrs`] returns errors. /// * When the return value of [`kvm_ioctls::VcpuFd::get_msrs`] (the number of entries that /// could be gotten) is less than expected. pub fn get_msr_chunk( &self, msr_index_iter: impl Iterator, chunk_size: usize, ) -> Result { let chunk_iter = msr_index_iter.take(chunk_size); let mut msrs = Msrs::new(chunk_size)?; let msr_entries = msrs.as_mut_slice(); for (pos, msr_index) in chunk_iter.enumerate() { msr_entries[pos].index = msr_index; } let nmsrs = self .fd .get_msrs(&mut msrs) .map_err(KvmVcpuError::VcpuGetMsrs)?; // GET_MSRS returns a number of successfully set msrs. // If number of set msrs is not equal to the length of // `msrs`, then the value returned by GET_MSRS can act // as an index to the problematic msr. if nmsrs != chunk_size { Err(KvmVcpuError::VcpuGetMsr(msrs.as_slice()[nmsrs].index)) } else { Ok(msrs) } } /// Get MSRs for the given MSR index list. /// /// # Arguments /// /// * `msr_index_list`: List of MSR indices /// /// # Errors /// /// * When `KvmVcpu::get_msr_chunks()` returns errors. pub fn get_msrs( &self, msr_index_iter: impl ExactSizeIterator, ) -> Result, KvmVcpuError> { let mut msrs = BTreeMap::new(); self.get_msr_chunks(msr_index_iter)? .iter() .for_each(|msr_chunk| { msr_chunk.as_slice().iter().for_each(|msr| { msrs.insert(msr.index, msr.data); }); }); Ok(msrs) } /// Save the KVM internal state. pub fn save_state(&self) -> Result { // Ordering requirements: // // KVM_GET_MP_STATE calls kvm_apic_accept_events(), which might modify // vCPU/LAPIC state. As such, it must be done before most everything // else, otherwise we cannot restore everything and expect it to work. // // KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are // still running. // // KVM_GET_LAPIC may change state of LAPIC before returning it. // // GET_VCPU_EVENTS should probably be last to save. The code looks as // it might as well be affected by internal state modifications of the // GET ioctls. // // SREGS saves/restores a pending interrupt, similar to what // VCPU_EVENTS also does. let mp_state = self .fd .get_mp_state() .map_err(KvmVcpuError::VcpuGetMpState)?; let regs = self.fd.get_regs().map_err(KvmVcpuError::VcpuGetRegs)?; let sregs = self.fd.get_sregs().map_err(KvmVcpuError::VcpuGetSregs)?; let xsave = self.get_xsave()?; let xcrs = self.fd.get_xcrs().map_err(KvmVcpuError::VcpuGetXcrs)?; let debug_regs = self .fd .get_debug_regs() .map_err(KvmVcpuError::VcpuGetDebugRegs)?; let lapic = self.fd.get_lapic().map_err(KvmVcpuError::VcpuGetLapic)?; let tsc_khz = self.get_tsc_khz().ok().or_else(|| { // v0.25 and newer snapshots without TSC will only work on // the same CPU model as the host on which they were taken. // TODO: Add negative test for this warning failure. warn!("TSC freq not available. Snapshot cannot be loaded on a different CPU model."); None }); let cpuid = self.get_cpuid()?; let saved_msrs = self.get_msr_chunks(self.msrs_to_save.iter().copied())?; let vcpu_events = self .fd .get_vcpu_events() .map_err(KvmVcpuError::VcpuGetVcpuEvents)?; Ok(VcpuState { cpuid, saved_msrs, debug_regs, lapic, mp_state, regs, sregs, vcpu_events, xcrs, xsave, tsc_khz, }) } /// Dumps CPU configuration (CPUID and MSRs). /// /// Opposed to `save_state()`, this dumps all the supported and dumpable MSRs not limited to /// serializable ones. pub fn dump_cpu_config(&self) -> Result { let cpuid = cpuid::Cpuid::try_from(self.get_cpuid()?)?; let kvm = kvm_ioctls::Kvm::new().unwrap(); let msr_index_list = crate::arch::x86_64::msr::get_msrs_to_dump(&kvm)?; let msrs = self.get_msrs(msr_index_list.as_slice().iter().copied())?; Ok(CpuConfiguration { cpuid, msrs }) } /// Checks whether the TSC needs scaling when restoring a snapshot. /// /// # Errors /// /// When pub fn is_tsc_scaling_required(&self, state_tsc_freq: u32) -> Result { // Compare the current TSC freq to the one found // in the state. If they are different, we need to // scale the TSC to the freq found in the state. // We accept values within a tolerance of 250 parts // per million because it is common for TSC frequency // to differ due to calibration at boot time. let diff = (i64::from(self.get_tsc_khz()?) - i64::from(state_tsc_freq)).abs(); // Cannot overflow since u32::MAX * 250 < i64::MAX Ok(diff > i64::from(state_tsc_freq) * TSC_KHZ_TOL_NUMERATOR / TSC_KHZ_TOL_DENOMINATOR) } /// Scale the TSC frequency of this vCPU to the one provided as a parameter. pub fn set_tsc_khz(&self, tsc_freq: u32) -> Result<(), SetTscError> { self.fd.set_tsc_khz(tsc_freq).map_err(SetTscError) } /// Use provided state to populate KVM internal state. pub fn restore_state(&self, state: &VcpuState) -> Result<(), KvmVcpuError> { // Ordering requirements: // // KVM_GET_VCPU_EVENTS/KVM_SET_VCPU_EVENTS is unsafe if other vCPUs are // still running. // // Some SET ioctls (like set_mp_state) depend on kvm_vcpu_is_bsp(), so // if we ever change the BSP, we have to do that before restoring anything. // The same seems to be true for CPUID stuff. // // SREGS saves/restores a pending interrupt, similar to what // VCPU_EVENTS also does. // // SET_REGS clears pending exceptions unconditionally, thus, it must be // done before SET_VCPU_EVENTS, which restores it. // // SET_LAPIC must come after SET_SREGS, because the latter restores // the apic base msr. // // SET_LAPIC must come before SET_MSRS, because the TSC deadline MSR // only restores successfully, when the LAPIC is correctly configured. self.fd .set_cpuid2(&state.cpuid) .map_err(KvmVcpuError::VcpuSetCpuid)?; self.fd .set_mp_state(state.mp_state) .map_err(KvmVcpuError::VcpuSetMpState)?; self.fd .set_regs(&state.regs) .map_err(KvmVcpuError::VcpuSetRegs)?; self.fd .set_sregs(&state.sregs) .map_err(KvmVcpuError::VcpuSetSregs)?; // SAFETY: Safe unless the snapshot is corrupted. unsafe { // kvm-ioctl's `set_xsave2()` can be called even on kernel versions not supporting // `KVM_CAP_XSAVE2`, because it internally calls `KVM_SET_XSAVE` API that was extended // by Linux kernel. Thus, `KVM_SET_XSAVE2` API does not exist as a KVM interface. // However, kvm-ioctl added `set_xsave2()` to allow users to pass `Xsave` instead of the // older `kvm_xsave`. self.fd .set_xsave2(&state.xsave) .map_err(KvmVcpuError::VcpuSetXsave)?; } self.fd .set_xcrs(&state.xcrs) .map_err(KvmVcpuError::VcpuSetXcrs)?; self.fd .set_debug_regs(&state.debug_regs) .map_err(KvmVcpuError::VcpuSetDebugRegs)?; self.fd .set_lapic(&state.lapic) .map_err(KvmVcpuError::VcpuSetLapic)?; for msrs in &state.saved_msrs { let nmsrs = self.fd.set_msrs(msrs).map_err(KvmVcpuError::VcpuSetMsrs)?; if nmsrs < msrs.as_fam_struct_ref().nmsrs as usize { return Err(KvmVcpuError::VcpuSetMsrsIncomplete); } } self.fd .set_vcpu_events(&state.vcpu_events) .map_err(KvmVcpuError::VcpuSetVcpuEvents)?; self.kvmclock_ctrl(); Ok(()) } } impl Peripherals { /// Runs the vCPU in KVM context and handles the kvm exit reason. /// /// Returns error or enum specifying whether emulation was handled or interrupted. pub fn run_arch_emulation(&self, exit: VcpuExit) -> Result { match exit { VcpuExit::IoIn(addr, data) => { if let Some(pio_bus) = &self.pio_bus { let _metric = METRICS.vcpu.exit_io_in_agg.record_latency_metrics(); if let Err(err) = pio_bus.read(u64::from(addr), data) { warn!("vcpu: IO read @ {addr:#x}:{:#x} failed: {err}", data.len()); } METRICS.vcpu.exit_io_in.inc(); } Ok(VcpuEmulation::Handled) } VcpuExit::IoOut(addr, data) => { if let Some(pio_bus) = &self.pio_bus { let _metric = METRICS.vcpu.exit_io_out_agg.record_latency_metrics(); if let Err(err) = pio_bus.write(u64::from(addr), data) { warn!("vcpu: IO write @ {addr:#x}:{:#x} failed: {err}", data.len()); } METRICS.vcpu.exit_io_out.inc(); } Ok(VcpuEmulation::Handled) } unexpected_exit => { METRICS.vcpu.failures.inc(); error!("Unexpected exit reason on vcpu run: {:?}", unexpected_exit); Err(VcpuError::UnhandledKvmExit(format!( "{:?}", unexpected_exit ))) } } } } /// Structure holding VCPU kvm state. #[derive(Serialize, Deserialize)] pub struct VcpuState { /// CpuId. pub cpuid: CpuId, /// Saved msrs. pub saved_msrs: Vec, /// Debug regs. pub debug_regs: kvm_debugregs, /// Lapic. pub lapic: kvm_lapic_state, /// Mp state pub mp_state: kvm_mp_state, /// Kvm regs. pub regs: kvm_regs, /// Sregs. pub sregs: kvm_sregs, /// Vcpu events pub vcpu_events: kvm_vcpu_events, /// Xcrs. pub xcrs: kvm_xcrs, /// Xsave. pub xsave: Xsave, /// Tsc khz. pub tsc_khz: Option, } impl Debug for VcpuState { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut debug_kvm_regs: Vec = Vec::new(); for kvm_msrs in self.saved_msrs.iter() { debug_kvm_regs = kvm_msrs.clone().into_raw(); debug_kvm_regs.sort_by_key(|msr| (msr.nmsrs, msr.pad)); } f.debug_struct("VcpuState") .field("cpuid", &self.cpuid) .field("saved_msrs", &debug_kvm_regs) .field("debug_regs", &self.debug_regs) .field("lapic", &self.lapic) .field("mp_state", &self.mp_state) .field("regs", &self.regs) .field("sregs", &self.sregs) .field("vcpu_events", &self.vcpu_events) .field("xcrs", &self.xcrs) .field("xsave", &self.xsave) .field("tsc_khz", &self.tsc_khz) .finish() } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use kvm_bindings::kvm_msr_entry; use kvm_ioctls::Cap; use vm_memory::GuestAddress; use super::*; use crate::arch::BootProtocol; use crate::arch::x86_64::cpu_model::CpuModel; use crate::cpu_config::templates::{ CpuConfiguration, CpuTemplateType, CustomCpuTemplate, GetCpuTemplate, GuestConfigError, StaticCpuTemplate, }; use crate::cpu_config::x86_64::cpuid::{Cpuid, CpuidEntry, CpuidKey}; use crate::vstate::kvm::Kvm; use crate::vstate::vm::Vm; use crate::vstate::vm::tests::{setup_vm, setup_vm_with_memory}; impl Default for VcpuState { fn default() -> Self { VcpuState { cpuid: CpuId::new(1).unwrap(), saved_msrs: vec![Msrs::new(1).unwrap()], debug_regs: Default::default(), lapic: Default::default(), mp_state: Default::default(), regs: Default::default(), sregs: Default::default(), vcpu_events: Default::default(), xcrs: Default::default(), xsave: Xsave::new(0).unwrap(), tsc_khz: Some(0), } } } fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, KvmVcpu) { let (kvm, vm) = setup_vm_with_memory(mem_size); vm.setup_irqchip().unwrap(); let vcpu = KvmVcpu::new(0, &vm).unwrap(); (kvm, vm, vcpu) } fn create_vcpu_config( kvm: &Kvm, vcpu: &KvmVcpu, template: &CustomCpuTemplate, ) -> Result { let cpuid = Cpuid::try_from(kvm.supported_cpuid.clone()) .map_err(GuestConfigError::CpuidFromKvmCpuid)?; let msrs = vcpu .get_msrs(template.msr_index_iter()) .map_err(GuestConfigError::VcpuIoctl)?; let base_cpu_config = CpuConfiguration { cpuid, msrs }; let cpu_config = CpuConfiguration::apply_template(base_cpu_config, template)?; Ok(VcpuConfig { vcpu_count: 1, smt: false, cpu_config, }) } #[test] fn test_configure_vcpu() { let (kvm, vm, mut vcpu) = setup_vcpu(0x10000); let vcpu_config = create_vcpu_config(&kvm, &vcpu, &CustomCpuTemplate::default()).unwrap(); assert_eq!( vcpu.configure( vm.guest_memory(), EntryPoint { entry_addr: GuestAddress(0), protocol: BootProtocol::LinuxBoot, }, &vcpu_config, ), Ok(()) ); let try_configure = |kvm: &Kvm, vcpu: &mut KvmVcpu, template| -> bool { let cpu_template = Some(CpuTemplateType::Static(template)); let template = cpu_template.get_cpu_template(); match template { Ok(template) => match create_vcpu_config(kvm, vcpu, &template) { Ok(config) => vcpu .configure( vm.guest_memory(), EntryPoint { entry_addr: GuestAddress(crate::arch::get_kernel_start()), protocol: BootProtocol::LinuxBoot, }, &config, ) .is_ok(), Err(_) => false, }, Err(_) => false, } }; // Test configure while using the T2 template. let t2_res = try_configure(&kvm, &mut vcpu, StaticCpuTemplate::T2); // Test configure while using the C3 template. let c3_res = try_configure(&kvm, &mut vcpu, StaticCpuTemplate::C3); // Test configure while using the T2S template. let t2s_res = try_configure(&kvm, &mut vcpu, StaticCpuTemplate::T2S); // Test configure while using the T2CL template. let t2cl_res = try_configure(&kvm, &mut vcpu, StaticCpuTemplate::T2CL); // Test configure while using the T2S template. let t2a_res = try_configure(&kvm, &mut vcpu, StaticCpuTemplate::T2A); let cpu_model = CpuModel::get_cpu_model(); match &cpuid::common::get_vendor_id_from_host().unwrap() { cpuid::VENDOR_ID_INTEL => { assert_eq!( t2_res, StaticCpuTemplate::T2 .get_supported_cpu_models() .contains(&cpu_model) ); assert_eq!( c3_res, StaticCpuTemplate::C3 .get_supported_cpu_models() .contains(&cpu_model) ); assert_eq!( t2s_res, StaticCpuTemplate::T2S .get_supported_cpu_models() .contains(&cpu_model) ); assert_eq!( t2cl_res, StaticCpuTemplate::T2CL .get_supported_cpu_models() .contains(&cpu_model) ); assert!(!t2a_res); } cpuid::VENDOR_ID_AMD => { assert!(!t2_res); assert!(!c3_res); assert!(!t2s_res); assert!(!t2cl_res); assert_eq!( t2a_res, StaticCpuTemplate::T2A .get_supported_cpu_models() .contains(&cpu_model) ); } _ => { assert!(!t2_res); assert!(!c3_res); assert!(!t2s_res); assert!(!t2cl_res); assert!(!t2a_res); } } } #[test] fn test_vcpu_cpuid_restore() { let (kvm, _, vcpu) = setup_vcpu(0x10000); vcpu.fd.set_cpuid2(&kvm.supported_cpuid).unwrap(); // Mutate the CPUID. // Leaf 0x3 / EAX that is an unused (reserved to be accurate) register, so it's harmless. let mut state = vcpu.save_state().unwrap(); state.cpuid.as_mut_slice().iter_mut().for_each(|entry| { if entry.function == 3 && entry.index == 0 { entry.eax = 0x1234_5678; } }); // Restore the state into the existing vcpu. let result1 = vcpu.restore_state(&state); assert!(result1.is_ok(), "{}", result1.unwrap_err()); drop(vcpu); // Restore the state into a new vcpu. let (_, _vm, vcpu) = setup_vcpu(0x10000); let result2 = vcpu.restore_state(&state); assert!(result2.is_ok(), "{}", result2.unwrap_err()); // Validate the mutated cpuid is restored correctly. let state = vcpu.save_state().unwrap(); let cpuid = Cpuid::try_from(state.cpuid).unwrap(); let leaf3 = cpuid .inner() .get(&CpuidKey { leaf: 0x3, subleaf: 0x0, }) .unwrap(); assert!(leaf3.result.eax == 0x1234_5678); } #[test] fn test_empty_cpuid_entries_removed() { // Test that `get_cpuid()` removes zeroed empty entries from the `KVM_GET_CPUID2` result. let (kvm, vm, mut vcpu) = setup_vcpu(0x10000); let vcpu_config = VcpuConfig { vcpu_count: 1, smt: false, cpu_config: CpuConfiguration { cpuid: Cpuid::try_from(kvm.supported_cpuid.clone()).unwrap(), msrs: BTreeMap::new(), }, }; vcpu.configure( vm.guest_memory(), EntryPoint { entry_addr: GuestAddress(0), protocol: BootProtocol::LinuxBoot, }, &vcpu_config, ) .unwrap(); // Invalid entries filled with 0 should not exist. let cpuid = vcpu.get_cpuid().unwrap(); cpuid.as_slice().iter().for_each(|entry| { assert!( !(entry.function == 0 && entry.index == 0 && entry.flags == 0 && entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0) ); }); // Leaf 0 should have non-zero entry in `Cpuid`. let cpuid = Cpuid::try_from(cpuid).unwrap(); assert_ne!( cpuid .inner() .get(&CpuidKey { leaf: 0, subleaf: 0, }) .unwrap(), &CpuidEntry { ..Default::default() } ); } #[test] fn test_dump_cpu_config_with_non_configured_vcpu() { // Test `dump_cpu_config()` before vcpu configuration. // // `KVM_GET_CPUID2` returns the result of `KVM_SET_CPUID2`. See // https://docs.kernel.org/virt/kvm/api.html#kvm-set-cpuid // Since `KVM_SET_CPUID2` has not been called before vcpu configuration, all leaves should // be filled with zero. Therefore, `KvmVcpu::dump_cpu_config()` should fail with CPUID type // conversion error due to the lack of brand string info in leaf 0x0. let (_, _, vcpu) = setup_vcpu(0x10000); match vcpu.dump_cpu_config() { Err(KvmVcpuError::ConvertCpuidType(_)) => (), Err(err) => panic!("Unexpected error: {err}"), Ok(_) => panic!("Dumping CPU configuration should fail before vcpu configuration."), } } #[test] fn test_dump_cpu_config_with_configured_vcpu() { // Test `dump_cpu_config()` after vcpu configuration. let (kvm, vm, mut vcpu) = setup_vcpu(0x10000); let vcpu_config = VcpuConfig { vcpu_count: 1, smt: false, cpu_config: CpuConfiguration { cpuid: Cpuid::try_from(kvm.supported_cpuid.clone()).unwrap(), msrs: BTreeMap::new(), }, }; vcpu.configure( vm.guest_memory(), EntryPoint { entry_addr: GuestAddress(0), protocol: BootProtocol::LinuxBoot, }, &vcpu_config, ) .unwrap(); vcpu.dump_cpu_config().unwrap(); } #[test] #[allow(clippy::redundant_clone)] fn test_is_tsc_scaling_required() { // Test `is_tsc_scaling_required` as if it were on the same // CPU model as the one in the snapshot state. let (_, _, vcpu) = setup_vcpu(0x1000); { // The frequency difference is within tolerance. let mut state = vcpu.save_state().unwrap(); state.tsc_khz = Some( state.tsc_khz.unwrap() + state.tsc_khz.unwrap() * u32::try_from(TSC_KHZ_TOL_NUMERATOR).unwrap() / u32::try_from(TSC_KHZ_TOL_DENOMINATOR).unwrap() / 2, ); assert!( !vcpu .is_tsc_scaling_required(state.tsc_khz.unwrap()) .unwrap() ); } { // The frequency difference is over the tolerance. let mut state = vcpu.save_state().unwrap(); state.tsc_khz = Some( state.tsc_khz.unwrap() + state.tsc_khz.unwrap() * u32::try_from(TSC_KHZ_TOL_NUMERATOR).unwrap() / u32::try_from(TSC_KHZ_TOL_DENOMINATOR).unwrap() * 2, ); assert!( vcpu.is_tsc_scaling_required(state.tsc_khz.unwrap()) .unwrap() ); } { // Try a large frequency (30GHz) in the state and check it doesn't // overflow assert!(vcpu.is_tsc_scaling_required(30_000_000).unwrap()); } } #[test] fn test_set_tsc() { let (kvm, _, vcpu) = setup_vcpu(0x1000); let mut state = vcpu.save_state().unwrap(); state.tsc_khz = Some( state.tsc_khz.unwrap() + state.tsc_khz.unwrap() * u32::try_from(TSC_KHZ_TOL_NUMERATOR).unwrap() / u32::try_from(TSC_KHZ_TOL_DENOMINATOR).unwrap() * 2, ); if kvm.fd.check_extension(Cap::TscControl) { vcpu.set_tsc_khz(state.tsc_khz.unwrap()).unwrap(); if kvm.fd.check_extension(Cap::GetTscKhz) { assert_eq!(vcpu.get_tsc_khz().ok(), state.tsc_khz); } else { vcpu.get_tsc_khz().unwrap_err(); } } else { vcpu.set_tsc_khz(state.tsc_khz.unwrap()).unwrap_err(); } } #[test] fn test_get_msrs_with_msrs_to_save() { // Test `get_msrs()` with the MSR indices that should be serialized into snapshots. // The MSR indices should be valid and this test should succeed. let (_, _, vcpu) = setup_vcpu(0x1000); vcpu.get_msrs(vcpu.msrs_to_save.iter().copied()).unwrap(); } #[test] fn test_get_msrs_with_msrs_to_dump() { // Test `get_msrs()` with the MSR indices that should be dumped. // All the MSR indices should be valid and the call should succeed. let (_, _, vcpu) = setup_vcpu(0x1000); let kvm = kvm_ioctls::Kvm::new().unwrap(); let msrs_to_dump = crate::arch::x86_64::msr::get_msrs_to_dump(&kvm).unwrap(); vcpu.get_msrs(msrs_to_dump.as_slice().iter().copied()) .unwrap(); } #[test] fn test_get_msrs_with_invalid_msr_index() { // Test `get_msrs()` with unsupported MSR indices. This should return `VcpuGetMsr` error // that happens when `KVM_GET_MSRS` fails to populate MSR values in the middle and exits. // Currently, MSR indices 2..=4 are not listed as supported MSRs. let (_, _, vcpu) = setup_vcpu(0x1000); let msr_index_list: Vec = vec![2, 3, 4]; match vcpu.get_msrs(msr_index_list.iter().copied()) { Err(KvmVcpuError::VcpuGetMsr(_)) => (), Err(err) => panic!("Unexpected error: {err}"), Ok(_) => { panic!("KvmVcpu::get_msrs() for unsupported MSRs should fail with VcpuGetMsr.") } } } fn msrs_from_entries(msr_entries: &[(u32, u64)]) -> Msrs { Msrs::from_entries( &msr_entries .iter() .map(|&(index, data)| kvm_msr_entry { index, data, ..Default::default() }) .collect::>(), ) .unwrap() } fn assert_msrs(msr_chunks: &[Msrs], expected_msr_entries: &[(u32, u64)]) { let flattened_msrs = msr_chunks.iter().flat_map(|msrs| msrs.as_slice()); for (a, b) in flattened_msrs.zip(expected_msr_entries.iter()) { assert_eq!(a.index, b.0); assert_eq!(a.data, b.1); } } #[test] fn test_defer_msrs() { let to_defer = DEFERRED_MSRS[0]; let mut msr_chunks = [msrs_from_entries(&[(to_defer, 0), (MSR_IA32_TSC, 1)])]; let deferred = KvmVcpu::extract_deferred_msrs(&mut msr_chunks).unwrap(); assert_eq!(deferred.as_slice().len(), 1, "did not correctly defer MSR"); assert_eq!( msr_chunks[0].as_slice().len(), 1, "deferred MSR not removed from chunk" ); assert_eq!(deferred.as_slice()[0].index, to_defer); assert_eq!(msr_chunks[0].as_slice()[0].index, MSR_IA32_TSC); } #[test] fn test_fix_zero_tsc_deadline_msr_zero_same_chunk() { // Place both TSC and TSC_DEADLINE MSRs in the same chunk. let mut msr_chunks = [msrs_from_entries(&[ (MSR_IA32_TSC_DEADLINE, 0), (MSR_IA32_TSC, 42), ])]; KvmVcpu::fix_zero_tsc_deadline_msr(&mut msr_chunks); // We expect for the MSR_IA32_TSC_DEADLINE to get updated with the MSR_IA32_TSC value. assert_msrs( &msr_chunks, &[(MSR_IA32_TSC_DEADLINE, 42), (MSR_IA32_TSC, 42)], ); } #[test] fn test_fix_zero_tsc_deadline_msr_zero_separate_chunks() { // Place both TSC and TSC_DEADLINE MSRs in separate chunks. let mut msr_chunks = [ msrs_from_entries(&[(MSR_IA32_TSC_DEADLINE, 0)]), msrs_from_entries(&[(MSR_IA32_TSC, 42)]), ]; KvmVcpu::fix_zero_tsc_deadline_msr(&mut msr_chunks); // We expect for the MSR_IA32_TSC_DEADLINE to get updated with the MSR_IA32_TSC value. assert_msrs( &msr_chunks, &[(MSR_IA32_TSC_DEADLINE, 42), (MSR_IA32_TSC, 42)], ); } #[test] fn test_fix_zero_tsc_deadline_msr_non_zero() { let mut msr_chunks = [msrs_from_entries(&[ (MSR_IA32_TSC_DEADLINE, 1), (MSR_IA32_TSC, 2), ])]; KvmVcpu::fix_zero_tsc_deadline_msr(&mut msr_chunks); // We expect that MSR_IA32_TSC_DEADLINE should remain unchanged, because it is non-zero // already. assert_msrs( &msr_chunks, &[(MSR_IA32_TSC_DEADLINE, 1), (MSR_IA32_TSC, 2)], ); } #[test] fn test_get_msr_chunks_preserved_order() { // Regression test for #4666 let (_, vm) = setup_vm(); let vcpu = KvmVcpu::new(0, &vm).unwrap(); // The list of supported MSR indices, in the order they were returned by KVM let msrs_to_save = vm.msrs_to_save(); // The MSRs after processing. The order should be identical to the one returned by KVM, with // the exception of deferred MSRs, which should be moved to the end (but show up in the same // order as they are listed in [`DEFERRED_MSRS`]. let msr_chunks = vcpu .get_msr_chunks(vcpu.msrs_to_save.iter().copied()) .unwrap(); msr_chunks .iter() .flat_map(|chunk| chunk.as_slice().iter()) .zip( msrs_to_save .iter() .filter(|&idx| !DEFERRED_MSRS.contains(idx)) .chain(DEFERRED_MSRS.iter()), ) .for_each(|(left, &right)| assert_eq!(left.index, right)); } } ================================================ FILE: src/vmm/src/arch/x86_64/vm.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt; use std::sync::{Arc, Mutex}; use kvm_bindings::{ KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_PIT_SPEAKER_DUMMY, MsrList, kvm_clock_data, kvm_irqchip, kvm_pit_config, kvm_pit_state2, }; use kvm_ioctls::Cap; use serde::{Deserialize, Serialize}; use crate::arch::x86_64::msr::MsrError; use crate::snapshot::Persist; use crate::utils::u64_to_usize; use crate::vstate::bus::Bus; use crate::vstate::memory::{GuestMemoryExtension, GuestMemoryState}; use crate::vstate::resources::ResourceAllocator; use crate::vstate::vm::{VmCommon, VmError}; /// Error type for [`Vm::restore_state`] #[allow(missing_docs)] #[cfg(target_arch = "x86_64")] #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum ArchVmError { /// Failed to check KVM capability (0): {1} CheckCapability(Cap, kvm_ioctls::Error), /// Set PIT2 error: {0} SetPit2(kvm_ioctls::Error), /// Set clock error: {0} SetClock(kvm_ioctls::Error), /// Set IrqChipPicMaster error: {0} SetIrqChipPicMaster(kvm_ioctls::Error), /// Set IrqChipPicSlave error: {0} SetIrqChipPicSlave(kvm_ioctls::Error), /// Set IrqChipIoAPIC error: {0} SetIrqChipIoAPIC(kvm_ioctls::Error), /// Failed to get KVM vm pit state: {0} VmGetPit2(kvm_ioctls::Error), /// Failed to get KVM vm clock: {0} VmGetClock(kvm_ioctls::Error), /// Failed to get KVM vm irqchip: {0} VmGetIrqChip(kvm_ioctls::Error), /// Failed to set KVM vm irqchip: {0} VmSetIrqChip(kvm_ioctls::Error), /// Failed to get MSR index list to save into snapshots: {0} GetMsrsToSave(MsrError), /// Failed during KVM_SET_TSS_ADDRESS: {0} SetTssAddress(kvm_ioctls::Error), } /// Structure representing the current architecture's understand of what a "virtual machine" is. #[derive(Debug)] pub struct ArchVm { /// Architecture independent parts of a vm pub common: VmCommon, msrs_to_save: MsrList, /// Size in bytes requiring to hold the dynamically-sized `kvm_xsave` struct. /// /// `None` if `KVM_CAP_XSAVE2` not supported. xsave2_size: Option, /// Port IO bus pub pio_bus: Arc, } impl ArchVm { /// Create a new `Vm` struct. pub fn new(kvm: &crate::vstate::kvm::Kvm) -> Result { let common = Self::create_common(kvm)?; let msrs_to_save = kvm.msrs_to_save().map_err(ArchVmError::GetMsrsToSave)?; // `KVM_CAP_XSAVE2` was introduced to support dynamically-sized XSTATE buffer in kernel // v5.17. `KVM_GET_EXTENSION(KVM_CAP_XSAVE2)` returns the required size in byte if // supported; otherwise returns 0. // https://github.com/torvalds/linux/commit/be50b2065dfa3d88428fdfdc340d154d96bf6848 // // Cache the value in order not to call it at each vCPU creation. let xsave2_size = match common.fd.check_extension_int(Cap::Xsave2) { // Catch all negative values just in case although the possible negative return value // of ioctl() is only -1. ..=-1 => { return Err(VmError::Arch(ArchVmError::CheckCapability( Cap::Xsave2, vmm_sys_util::errno::Error::last(), ))); } 0 => None, // SAFETY: Safe because negative values are handled above. ret => Some(usize::try_from(ret).unwrap()), }; common .fd .set_tss_address(u64_to_usize(crate::arch::x86_64::layout::KVM_TSS_ADDRESS)) .map_err(ArchVmError::SetTssAddress)?; let pio_bus = Arc::new(Bus::new()); Ok(ArchVm { common, msrs_to_save, xsave2_size, pio_bus, }) } /// Pre-vCPU creation setup. pub fn arch_pre_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { // For x86_64 we need to create the interrupt controller before calling `KVM_CREATE_VCPUS` self.setup_irqchip() } /// Post-vCPU creation setup. pub fn arch_post_create_vcpus(&mut self, _: u8) -> Result<(), ArchVmError> { Ok(()) } /// Restores the KVM VM state. /// /// # Errors /// /// When: /// - [`kvm_ioctls::VmFd::set_pit`] errors. /// - [`kvm_ioctls::VmFd::set_clock`] errors. /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. /// - [`kvm_ioctls::VmFd::set_irqchip`] errors. pub fn restore_state(&mut self, state: &VmState) -> Result<(), ArchVmError> { self.fd() .set_pit2(&state.pitstate) .map_err(ArchVmError::SetPit2)?; self.fd() .set_clock(&state.clock) .map_err(ArchVmError::SetClock)?; self.fd() .set_irqchip(&state.pic_master) .map_err(ArchVmError::SetIrqChipPicMaster)?; self.fd() .set_irqchip(&state.pic_slave) .map_err(ArchVmError::SetIrqChipPicSlave)?; self.fd() .set_irqchip(&state.ioapic) .map_err(ArchVmError::SetIrqChipIoAPIC)?; self.common.resource_allocator = Mutex::new(state.resource_allocator.clone()); Ok(()) } /// Creates the irq chip and an in-kernel device model for the PIT. pub fn setup_irqchip(&self) -> Result<(), ArchVmError> { self.fd() .create_irq_chip() .map_err(ArchVmError::VmSetIrqChip)?; // We need to enable the emulation of a dummy speaker port stub so that writing to port 0x61 // (i.e. KVM_SPEAKER_BASE_ADDRESS) does not trigger an exit to user space. let pit_config = kvm_pit_config { flags: KVM_PIT_SPEAKER_DUMMY, ..Default::default() }; self.fd() .create_pit2(pit_config) .map_err(ArchVmError::VmSetIrqChip) } /// Saves and returns the Kvm Vm state. pub fn save_state(&self) -> Result { let pitstate = self.fd().get_pit2().map_err(ArchVmError::VmGetPit2)?; let mut clock = self.fd().get_clock().map_err(ArchVmError::VmGetClock)?; // This bit is not accepted in SET_CLOCK, clear it. clock.flags &= !KVM_CLOCK_TSC_STABLE; let mut pic_master = kvm_irqchip { chip_id: KVM_IRQCHIP_PIC_MASTER, ..Default::default() }; self.fd() .get_irqchip(&mut pic_master) .map_err(ArchVmError::VmGetIrqChip)?; let mut pic_slave = kvm_irqchip { chip_id: KVM_IRQCHIP_PIC_SLAVE, ..Default::default() }; self.fd() .get_irqchip(&mut pic_slave) .map_err(ArchVmError::VmGetIrqChip)?; let mut ioapic = kvm_irqchip { chip_id: KVM_IRQCHIP_IOAPIC, ..Default::default() }; self.fd() .get_irqchip(&mut ioapic) .map_err(ArchVmError::VmGetIrqChip)?; Ok(VmState { memory: self.common.guest_memory.describe(), resource_allocator: self.resource_allocator().save(), pitstate, clock, pic_master, pic_slave, ioapic, }) } /// Gets the list of MSRs to save when creating snapshots pub fn msrs_to_save(&self) -> &[u32] { self.msrs_to_save.as_slice() } /// Gets the size (in bytes) of the `kvm_xsave` struct. pub fn xsave2_size(&self) -> Option { self.xsave2_size } } #[derive(Default, Deserialize, Serialize)] /// Structure holding VM kvm state. pub struct VmState { /// guest memory state pub memory: GuestMemoryState, /// resource allocator pub resource_allocator: ResourceAllocator, pitstate: kvm_pit_state2, clock: kvm_clock_data, // TODO: rename this field to adopt inclusive language once Linux updates it, too. pic_master: kvm_irqchip, // TODO: rename this field to adopt inclusive language once Linux updates it, too. pic_slave: kvm_irqchip, ioapic: kvm_irqchip, } impl fmt::Debug for VmState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("VmState") .field("pitstate", &self.pitstate) .field("clock", &self.clock) .field("pic_master", &"?") .field("pic_slave", &"?") .field("ioapic", &"?") .finish() } } #[cfg(test)] mod tests { use kvm_bindings::{ KVM_CLOCK_TSC_STABLE, KVM_IRQCHIP_IOAPIC, KVM_IRQCHIP_PIC_MASTER, KVM_IRQCHIP_PIC_SLAVE, KVM_PIT_SPEAKER_DUMMY, }; use crate::vstate::vm::VmState; use crate::vstate::vm::tests::{setup_vm, setup_vm_with_memory}; #[cfg(target_arch = "x86_64")] #[test] fn test_vm_save_restore_state() { let (_, vm) = setup_vm(); // Irqchips, clock and pitstate are not configured so trying to save state should fail. vm.save_state().unwrap_err(); let (_, vm) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); let vm_state = vm.save_state().unwrap(); assert_eq!( vm_state.pitstate.flags | KVM_PIT_SPEAKER_DUMMY, KVM_PIT_SPEAKER_DUMMY ); assert_eq!(vm_state.clock.flags & KVM_CLOCK_TSC_STABLE, 0); assert_eq!(vm_state.pic_master.chip_id, KVM_IRQCHIP_PIC_MASTER); assert_eq!(vm_state.pic_slave.chip_id, KVM_IRQCHIP_PIC_SLAVE); assert_eq!(vm_state.ioapic.chip_id, KVM_IRQCHIP_IOAPIC); let (_, mut vm) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); vm.restore_state(&vm_state).unwrap(); } #[cfg(target_arch = "x86_64")] #[test] fn test_vm_save_restore_state_bad_irqchip() { use kvm_bindings::KVM_NR_IRQCHIPS; let (_, vm) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); let mut vm_state = vm.save_state().unwrap(); let (_, mut vm) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); // Try to restore an invalid PIC Master chip ID let orig_master_chip_id = vm_state.pic_master.chip_id; vm_state.pic_master.chip_id = KVM_NR_IRQCHIPS; vm.restore_state(&vm_state).unwrap_err(); vm_state.pic_master.chip_id = orig_master_chip_id; // Try to restore an invalid PIC Slave chip ID let orig_slave_chip_id = vm_state.pic_slave.chip_id; vm_state.pic_slave.chip_id = KVM_NR_IRQCHIPS; vm.restore_state(&vm_state).unwrap_err(); vm_state.pic_slave.chip_id = orig_slave_chip_id; // Try to restore an invalid IOPIC chip ID vm_state.ioapic.chip_id = KVM_NR_IRQCHIPS; vm.restore_state(&vm_state).unwrap_err(); } #[cfg(target_arch = "x86_64")] #[test] fn test_vmstate_serde() { let (_, mut vm) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); let state = vm.save_state().unwrap(); // Test direct bitcode serialization let serialized_data = bitcode::serialize(&state).unwrap(); let restored_state: VmState = bitcode::deserialize(&serialized_data).unwrap(); vm.restore_state(&restored_state).unwrap(); } } ================================================ FILE: src/vmm/src/arch/x86_64/xstate.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm_sys_util::syscall::SyscallReturnCode; use crate::arch::x86_64::generated::arch_prctl; use crate::logger::info; const INTEL_AMX_MASK: u64 = 1u64 << arch_prctl::ARCH_XCOMP_TILEDATA; /// Errors assocaited with x86_64's dynamic XSAVE state features. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum XstateError { /// Failed to get supported XSTATE features: {0} GetSupportedFeatures(std::io::Error), /// Failed to request permission for XSTATE feature ({0}): {1} RequestFeaturePermission(u32, std::io::Error), } /// Request permission for all dynamic XSTATE features. /// /// Some XSTATE features are not permitted by default, because they may require a larger area to /// save their states than the tranditional 4096-byte area. Instead, the permission for them can be /// requested via arch_prctl(). /// https://github.com/torvalds/linux/blob/master/Documentation/arch/x86/xstate.rst /// /// Firecracker requests permission for them by default if available in order to retrieve the /// full supported feature set via KVM_GET_SUPPORTED_CPUID. /// https://docs.kernel.org/virt/kvm/api.html#kvm-get-supported-cpuid /// /// Note that requested features can be masked by a CPU template. pub fn request_dynamic_xstate_features() -> Result<(), XstateError> { let supported_xfeatures = match get_supported_xfeatures().map_err(XstateError::GetSupportedFeatures)? { Some(supported_xfeatures) => supported_xfeatures, // Exit early if dynamic XSTATE feature enabling is not supported on the kernel. None => return Ok(()), }; // Intel AMX's TILEDATA // // Unless requested, on kernels prior to v6.4, KVM_GET_SUPPORTED_CPUID returns an // inconsistent state where TILECFG is set but TILEDATA isn't. Such a half-enabled state // causes guest crash during boot because a guest calls XSETBV instruction with all // XSAVE feature bits enumerated on CPUID and XSETBV only accepts either of both Intel // AMX bits enabled or disabled; otherwise resulting in general protection fault. // https://lore.kernel.org/all/20230405004520.421768-1-seanjc@google.com/ if supported_xfeatures & INTEL_AMX_MASK == INTEL_AMX_MASK { request_xfeature_permission(arch_prctl::ARCH_XCOMP_TILEDATA).map_err(|err| { XstateError::RequestFeaturePermission(arch_prctl::ARCH_XCOMP_TILEDATA, err) })?; } Ok(()) } /// Get supported XSTATE features /// /// Returns Ok(None) if dynamic XSTATE feature enabling is not supported. fn get_supported_xfeatures() -> Result, std::io::Error> { let mut supported_xfeatures: u64 = 0; // SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` pointer. // https://man7.org/linux/man-pages/man2/arch_prctl.2.html match SyscallReturnCode(unsafe { libc::syscall( libc::SYS_arch_prctl, arch_prctl::ARCH_GET_XCOMP_SUPP, &mut supported_xfeatures as *mut libc::c_ulong, ) }) .into_empty_result() { Ok(()) => Ok(Some(supported_xfeatures)), // EINVAL is returned if the dynamic XSTATE feature enabling is not supported (e.g. kernel // version prior to v5.16). // https://github.com/torvalds/linux/commit/db8268df0983adc2bb1fb48c9e5f7bfbb5f617f3 Err(err) if err.raw_os_error() == Some(libc::EINVAL) => { info!("Dynamic XSTATE feature enabling is not supported."); Ok(None) } Err(err) => Err(err), } } /// Request permission for a dynamic XSTATE feature. /// /// This should be called after `get_supported_xfeatures()` that retrieves supported dynamic XSTATE /// features. /// /// Returns Ok(()) if the permission request succeeded or dynamic XSTATE feature enabling for /// "guest" is not supported. fn request_xfeature_permission(xfeature: u32) -> Result<(), std::io::Error> { // SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` value. // https://man7.org/linux/man-pages/man2/arch_prctl.2.html match SyscallReturnCode(unsafe { libc::syscall( libc::SYS_arch_prctl, arch_prctl::ARCH_REQ_XCOMP_GUEST_PERM as libc::c_ulong, xfeature as libc::c_ulong, ) }) .into_empty_result() { Ok(()) => Ok(()), // EINVAL is returned if the dynamic XSTATE feature enabling for "guest" is not supported // although that for "userspace application" is supported (e.g. kernel versions >= 5.16 and // < 5.17). // https://github.com/torvalds/linux/commit/980fe2fddcff21937c93532b4597c8ea450346c1 // // Note that XFEATURE_MASK_XTILE (= XFEATURE_MASK_XTILE_DATA | XFEATURE_MASK_XTILE_CFG) was // also added to KVM_SUPPORTED_XCR0 in kernel v5.17. KVM_SUPPORTED_XCR0 is used to // initialize the guest-supported XCR0. Thus, KVM_GET_SUPPORTED_CPUID doesn't // return AMX-half-enabled state, where XTILE_CFG is set but XTILE_DATA is unset, on such // kernels. // https://github.com/torvalds/linux/commit/86aff7a4799286635efd94dab17b513544703cad // https://github.com/torvalds/linux/blame/f443e374ae131c168a065ea1748feac6b2e76613/arch/x86/kvm/x86.c#L8850-L8853 // https://github.com/firecracker-microvm/firecracker/pull/5065 Err(err) if err.raw_os_error() == Some(libc::EINVAL) => { info!("Dynamic XSTATE feature enabling is not supported for guest."); Ok(()) } Err(err) => Err(err), } } #[cfg(test)] mod tests { use super::*; // Get permitted XSTATE features. fn get_permitted_xstate_features() -> Result { let mut permitted_xfeatures: u64 = 0; // SAFETY: Safe because the third input (`addr`) is a valid `c_ulong` pointer. match SyscallReturnCode(unsafe { libc::syscall( libc::SYS_arch_prctl, arch_prctl::ARCH_GET_XCOMP_GUEST_PERM, &mut permitted_xfeatures as *mut libc::c_ulong, ) }) .into_empty_result() { Ok(()) => Ok(permitted_xfeatures), Err(err) => Err(err), } } #[test] fn test_request_xstate_feature_permission() { request_dynamic_xstate_features().unwrap(); let supported_xfeatures = match get_supported_xfeatures().unwrap() { Some(supported_xfeatures) => supported_xfeatures, // Nothing to test if dynamic XSTATE feature enabling is not supported on the kernel. None => return, }; // Check each dynamic feature is enabled. (currently only Intel AMX TILEDATA) if supported_xfeatures & INTEL_AMX_MASK == INTEL_AMX_MASK { let permitted_xfeatures = get_permitted_xstate_features().unwrap(); assert_eq!(permitted_xfeatures & INTEL_AMX_MASK, INTEL_AMX_MASK); } } } ================================================ FILE: src/vmm/src/builder.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Enables pre-boot setup, instantiation and booting of a Firecracker VMM. use std::fmt::Debug; use std::io; #[cfg(feature = "gdb")] use std::sync::mpsc; use std::sync::{Arc, Mutex}; use event_manager::SubscriberOps; use linux_loader::cmdline::Cmdline as LoaderKernelCmdline; use userfaultfd::Uffd; use utils::time::TimestampUs; use vm_allocator::AllocPolicy; use vm_memory::GuestAddress; #[cfg(target_arch = "aarch64")] use crate::Vcpu; use crate::arch::{ConfigurationError, configure_system_for_boot, load_kernel}; #[cfg(target_arch = "aarch64")] use crate::construct_kvm_mpidrs; use crate::cpu_config::templates::{GetCpuTemplate, GetCpuTemplateError, GuestConfigError}; #[cfg(target_arch = "x86_64")] use crate::device_manager; use crate::device_manager::pci_mngr::PciManagerError; use crate::device_manager::{ AttachDeviceError, DeviceManager, DeviceManagerCreateError, DeviceManagerPersistError, DeviceRestoreArgs, }; use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::block::device::Block; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::mem::{VIRTIO_MEM_DEFAULT_SLOT_SIZE_MIB, VirtioMem}; use crate::devices::virtio::net::Net; use crate::devices::virtio::pmem::device::Pmem; use crate::devices::virtio::rng::Entropy; use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; #[cfg(feature = "gdb")] use crate::gdb; use crate::initrd::{InitrdConfig, InitrdError}; use crate::logger::debug; use crate::persist::{MicrovmState, MicrovmStateError}; use crate::resources::VmResources; use crate::seccomp::BpfThreadMap; use crate::snapshot::Persist; use crate::utils::mib_to_bytes; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::MachineConfigError; use crate::vmm_config::memory_hotplug::MemoryHotplugConfig; use crate::vstate::kvm::{Kvm, KvmError}; use crate::vstate::memory::GuestRegionMmap; #[cfg(target_arch = "aarch64")] use crate::vstate::resources::ResourceAllocator; use crate::vstate::vcpu::VcpuError; use crate::vstate::vm::{Vm, VmError}; use crate::{EventManager, Vmm, VmmError}; /// Errors associated with starting the instance. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum StartMicrovmError { /// Unable to attach block device to Vmm: {0} AttachBlockDevice(io::Error), /// Could not attach device: {0} AttachDevice(#[from] AttachDeviceError), /// System configuration error: {0} ConfigureSystem(#[from] ConfigurationError), /// Failed to create device manager: {0} CreateDeviceManager(#[from] DeviceManagerCreateError), /// Failed to create guest config: {0} CreateGuestConfig(#[from] GuestConfigError), /// Cannot create network device: {0} CreateNetDevice(crate::devices::virtio::net::NetError), /// Cannot create pmem device: {0} CreatePmemDevice(#[from] crate::devices::virtio::pmem::device::PmemError), /// Cannot create RateLimiter: {0} CreateRateLimiter(io::Error), /// Error creating legacy device: {0} #[cfg(target_arch = "x86_64")] CreateLegacyDevice(device_manager::legacy::LegacyDeviceError), /// Error enabling PCIe support: {0} EnablePciDevices(#[from] PciManagerError), /// Error enabling pvtime on vcpu: {0} #[cfg(target_arch = "aarch64")] EnablePVTime(crate::arch::VcpuArchError), /// Invalid Memory Configuration: {0} GuestMemory(crate::vstate::memory::MemoryError), /// Error with initrd initialization: {0}. Initrd(#[from] InitrdError), /// Internal error while starting microVM: {0} Internal(#[from] VmmError), /// Failed to get CPU template: {0} GetCpuTemplate(#[from] GetCpuTemplateError), /// Invalid kernel command line: {0} KernelCmdline(String), /// Kvm error: {0} Kvm(#[from] KvmError), /// Cannot load command line string: {0} LoadCommandline(linux_loader::loader::Error), /// Cannot start microvm without kernel configuration. MissingKernelConfig, /// Cannot start microvm without guest mem_size config. MissingMemSizeConfig, /// No seccomp filter for thread category: {0} MissingSeccompFilters(String), /// The net device configuration is missing the tap device. NetDeviceNotConfigured, /// Cannot open the block device backing file: {0} OpenBlockDevice(io::Error), /// Cannot restore microvm state: {0} RestoreMicrovmState(MicrovmStateError), /// Cannot set vm resources: {0} SetVmResources(MachineConfigError), /// Cannot create the entropy device: {0} CreateEntropyDevice(crate::devices::virtio::rng::EntropyError), /// Failed to allocate guest resource: {0} AllocateResources(#[from] vm_allocator::Error), /// Error starting GDB debug session: {0} #[cfg(feature = "gdb")] GdbServer(gdb::target::GdbTargetError), /// Error cloning Vcpu fds #[cfg(feature = "gdb")] VcpuFdCloneError(#[from] crate::vstate::vcpu::CopyKvmFdError), /// Error with the Vm object: {0} Vm(#[from] VmError), } /// It's convenient to automatically convert `linux_loader::cmdline::Error`s /// to `StartMicrovmError`s. impl std::convert::From for StartMicrovmError { fn from(err: linux_loader::cmdline::Error) -> StartMicrovmError { StartMicrovmError::KernelCmdline(err.to_string()) } } /// Builds and starts a microVM based on the current Firecracker VmResources configuration. /// /// The built microVM and all the created vCPUs start off in the paused state. /// To boot the microVM and run those vCPUs, `Vmm::resume_vm()` needs to be /// called. pub fn build_microvm_for_boot( instance_info: &InstanceInfo, vm_resources: &super::resources::VmResources, event_manager: &mut EventManager, seccomp_filters: &BpfThreadMap, ) -> Result>, StartMicrovmError> { // Timestamp for measuring microVM boot duration. let request_ts = TimestampUs::default(); let boot_config = vm_resources .boot_source .builder .as_ref() .ok_or(StartMicrovmError::MissingKernelConfig)?; let guest_memory = vm_resources .allocate_guest_memory() .map_err(StartMicrovmError::GuestMemory)?; // Clone the command-line so that a failed boot doesn't pollute the original. #[allow(unused_mut)] let mut boot_cmdline = boot_config.cmdline.clone(); let cpu_template = vm_resources .machine_config .cpu_template .get_cpu_template()?; let kvm = Kvm::new(cpu_template.kvm_capabilities.clone())?; // Set up Kvm Vm and register memory regions. // Build custom CPU config if a custom template is provided. let mut vm = Vm::new(&kvm)?; let (mut vcpus, vcpus_exit_evt) = vm.create_vcpus(vm_resources.machine_config.vcpu_count)?; vm.register_dram_memory_regions(guest_memory)?; // Allocate memory as soon as possible to make hotpluggable memory available to all consumers, // before they clone the GuestMemoryMmap object let virtio_mem_addr = if let Some(memory_hotplug) = &vm_resources.memory_hotplug { let addr = allocate_virtio_mem_address(&vm, memory_hotplug.total_size_mib)?; let hotplug_memory_region = vm_resources .allocate_memory_region(addr, mib_to_bytes(memory_hotplug.total_size_mib)) .map_err(StartMicrovmError::GuestMemory)?; vm.register_hotpluggable_memory_region( hotplug_memory_region, mib_to_bytes(memory_hotplug.slot_size_mib), )?; Some(addr) } else { None }; let mut device_manager = DeviceManager::new( event_manager, &vcpus_exit_evt, &vm, vm_resources.serial_out_path.as_ref(), )?; let vm = Arc::new(vm); let entry_point = load_kernel(&boot_config.kernel_file, vm.guest_memory())?; let initrd = InitrdConfig::from_config(boot_config, vm.guest_memory())?; if vm_resources.pci_enabled { device_manager.enable_pci(&vm)?; } else { boot_cmdline.insert("pci", "off")?; } // The boot timer device needs to be the first device attached in order // to maintain the same MMIO address referenced in the documentation // and tests. if vm_resources.boot_timer { device_manager.attach_boot_timer_device(&vm, request_ts)?; } if let Some(balloon) = vm_resources.balloon.get() { attach_balloon_device( &mut device_manager, &vm, &mut boot_cmdline, balloon, event_manager, )?; } attach_block_devices( &mut device_manager, &vm, &mut boot_cmdline, vm_resources.block.devices.iter(), event_manager, )?; attach_net_devices( &mut device_manager, &vm, &mut boot_cmdline, vm_resources.net_builder.iter(), event_manager, )?; attach_pmem_devices( &mut device_manager, &vm, &mut boot_cmdline, vm_resources.pmem.devices.iter(), event_manager, )?; if let Some(unix_vsock) = vm_resources.vsock.get() { attach_unixsock_vsock_device( &mut device_manager, &vm, &mut boot_cmdline, unix_vsock, event_manager, )?; } if let Some(entropy) = vm_resources.entropy.get() { attach_entropy_device( &mut device_manager, &vm, &mut boot_cmdline, entropy, event_manager, )?; } // Attach virtio-mem device if configured if let Some(memory_hotplug) = &vm_resources.memory_hotplug { attach_virtio_mem_device( &mut device_manager, &vm, &mut boot_cmdline, memory_hotplug, event_manager, virtio_mem_addr.expect("address should be allocated"), )?; } #[cfg(target_arch = "aarch64")] device_manager.attach_legacy_devices_aarch64( &vm, event_manager, &mut boot_cmdline, vm_resources.serial_out_path.as_ref(), )?; device_manager.attach_vmgenid_device(&vm)?; device_manager.attach_vmclock_device(&vm)?; #[cfg(target_arch = "aarch64")] if vcpus[0].kvm_vcpu.supports_pvtime() { setup_pvtime(&mut vm.resource_allocator(), &mut vcpus)?; } else { log::warn!("Vcpus do not support pvtime, steal time will not be reported to guest"); } configure_system_for_boot( &kvm, &vm, &mut device_manager, vcpus.as_mut(), &vm_resources.machine_config, &cpu_template, entry_point, &initrd, boot_cmdline, )?; let vmm = Vmm { instance_info: instance_info.clone(), machine_config: vm_resources.machine_config.clone(), boot_source_config: vm_resources.boot_source.config.clone(), shutdown_exit_code: None, kvm, vm, uffd: None, vcpus_handles: Vec::new(), vcpus_exit_evt, device_manager, }; let vmm = Arc::new(Mutex::new(vmm)); #[cfg(feature = "gdb")] let (gdb_tx, gdb_rx) = mpsc::channel(); #[cfg(feature = "gdb")] vcpus .iter_mut() .for_each(|vcpu| vcpu.attach_debug_info(gdb_tx.clone())); // Move vcpus to their own threads and start their state machine in the 'Paused' state. vmm.lock() .unwrap() .start_vcpus( vcpus, seccomp_filters .get("vcpu") .ok_or_else(|| StartMicrovmError::MissingSeccompFilters("vcpu".to_string()))? .clone(), ) .map_err(VmmError::VcpuStart)?; #[cfg(feature = "gdb")] if let Some(gdb_socket_path) = &vm_resources.machine_config.gdb_socket_path { gdb::gdb_thread(vmm.clone(), gdb_rx, entry_point.entry_addr, gdb_socket_path) .map_err(StartMicrovmError::GdbServer)?; } else { debug!("No GDB socket provided not starting gdb server."); } // Load seccomp filters for the VMM thread. // Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters // altogether is the desired behaviour. // Keep this as the last step before resuming vcpus. crate::seccomp::apply_filter( seccomp_filters .get("vmm") .ok_or_else(|| StartMicrovmError::MissingSeccompFilters("vmm".to_string()))?, ) .map_err(VmmError::SeccompFilters)?; event_manager.add_subscriber(vmm.clone()); Ok(vmm) } /// Builds and boots a microVM based on the current Firecracker VmResources configuration. /// /// This is the default build recipe, one could build other microVM flavors by using the /// independent functions in this module instead of calling this recipe. /// /// An `Arc` reference of the built `Vmm` is also plugged in the `EventManager`, while another /// is returned. pub fn build_and_boot_microvm( instance_info: &InstanceInfo, vm_resources: &super::resources::VmResources, event_manager: &mut EventManager, seccomp_filters: &BpfThreadMap, ) -> Result>, StartMicrovmError> { debug!("event_start: build microvm for boot"); let vmm = build_microvm_for_boot(instance_info, vm_resources, event_manager, seccomp_filters)?; debug!("event_end: build microvm for boot"); // The vcpus start off in the `Paused` state, let them run. debug!("event_start: boot microvm"); vmm.lock().unwrap().resume_vm()?; debug!("event_end: boot microvm"); Ok(vmm) } /// Error type for [`build_microvm_from_snapshot`]. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum BuildMicrovmFromSnapshotError { /// Failed to create microVM and vCPUs: {0} CreateMicrovmAndVcpus(#[from] StartMicrovmError), /// Could not access KVM: {0} KvmAccess(#[from] vmm_sys_util::errno::Error), /// Error configuring the TSC, frequency not present in the given snapshot. TscFrequencyNotPresent, #[cfg(target_arch = "x86_64")] /// Could not get TSC to check if TSC scaling was required with the snapshot: {0} GetTsc(#[from] crate::arch::GetTscError), #[cfg(target_arch = "x86_64")] /// Could not set TSC scaling within the snapshot: {0} SetTsc(#[from] crate::arch::SetTscError), /// Failed to restore microVM state: {0} RestoreState(#[from] crate::vstate::vm::ArchVmError), /// Failed to update microVM configuration: {0} VmUpdateConfig(#[from] MachineConfigError), /// Failed to restore MMIO device: {0} RestoreMmioDevice(#[from] MicrovmStateError), /// Failed to emulate MMIO serial: {0} EmulateSerialInit(#[from] crate::EmulateSerialInitError), /// Failed to start vCPUs as no vCPU seccomp filter found. MissingVcpuSeccompFilters, /// Failed to start vCPUs: {0} StartVcpus(#[from] crate::StartVcpusError), /// Failed to restore vCPUs: {0} RestoreVcpus(#[from] VcpuError), /// Failed to apply VMM secccomp filter as none found. MissingVmmSeccompFilters, /// Failed to apply VMM secccomp filter: {0} SeccompFiltersInternal(#[from] crate::seccomp::InstallationError), /// Failed to restore devices: {0} RestoreDevices(#[from] DeviceManagerPersistError), } /// Builds and starts a microVM based on the provided MicrovmState. /// /// An `Arc` reference of the built `Vmm` is also plugged in the `EventManager`, while another /// is returned. #[allow(clippy::too_many_arguments)] pub fn build_microvm_from_snapshot( instance_info: &InstanceInfo, event_manager: &mut EventManager, microvm_state: MicrovmState, guest_memory: Vec, uffd: Option, seccomp_filters: &BpfThreadMap, vm_resources: &mut VmResources, ) -> Result>, BuildMicrovmFromSnapshotError> { // Build Vmm. debug!("event_start: build microvm from snapshot"); let kvm = Kvm::new(microvm_state.kvm_state.kvm_cap_modifiers.clone()) .map_err(StartMicrovmError::Kvm)?; // Set up Kvm Vm and register memory regions. // Build custom CPU config if a custom template is provided. let mut vm = Vm::new(&kvm).map_err(StartMicrovmError::Vm)?; let (mut vcpus, vcpus_exit_evt) = vm .create_vcpus(vm_resources.machine_config.vcpu_count) .map_err(StartMicrovmError::Vm)?; vm.restore_memory_regions(guest_memory, µvm_state.vm_state.memory) .map_err(StartMicrovmError::Vm)?; #[cfg(target_arch = "x86_64")] { // Scale TSC to match, extract the TSC freq from the state if specified if let Some(state_tsc) = microvm_state.vcpu_states[0].tsc_khz { // Scale the TSC frequency for all VCPUs. If a TSC frequency is not specified in the // snapshot, by default it uses the host frequency. if vcpus[0].kvm_vcpu.is_tsc_scaling_required(state_tsc)? { for vcpu in &vcpus { vcpu.kvm_vcpu.set_tsc_khz(state_tsc)?; } } } } // Restore vcpus kvm state. for (vcpu, state) in vcpus.iter_mut().zip(microvm_state.vcpu_states.iter()) { vcpu.kvm_vcpu .restore_state(state) .map_err(VcpuError::VcpuResponse) .map_err(BuildMicrovmFromSnapshotError::RestoreVcpus)?; } #[cfg(target_arch = "aarch64")] { let mpidrs = construct_kvm_mpidrs(µvm_state.vcpu_states); // Restore kvm vm state. vm.restore_state(&mpidrs, µvm_state.vm_state)?; } // Restore kvm vm state. #[cfg(target_arch = "x86_64")] vm.restore_state(µvm_state.vm_state)?; // Restore the boot source config paths. vm_resources.boot_source.config = microvm_state.vm_info.boot_source; let vm = Arc::new(vm); // Restore devices states. // Restoring VMGenID injects an interrupt in the guest to notify it about the new generation // ID. As a result, we need to restore DeviceManager after restoring the KVM state, otherwise // the injected interrupt will be overwritten. let device_ctor_args = DeviceRestoreArgs { mem: vm.guest_memory(), vm: &vm, event_manager, vm_resources, instance_id: &instance_info.id, vcpus_exit_evt: &vcpus_exit_evt, }; #[allow(unused_mut)] let mut device_manager = DeviceManager::restore(device_ctor_args, µvm_state.device_states)?; let mut vmm = Vmm { instance_info: instance_info.clone(), machine_config: vm_resources.machine_config.clone(), boot_source_config: vm_resources.boot_source.config.clone(), shutdown_exit_code: None, kvm, vm, uffd, vcpus_handles: Vec::new(), vcpus_exit_evt, device_manager, }; // Move vcpus to their own threads and start their state machine in the 'Paused' state. vmm.start_vcpus( vcpus, seccomp_filters .get("vcpu") .ok_or(BuildMicrovmFromSnapshotError::MissingVcpuSeccompFilters)? .clone(), )?; let vmm = Arc::new(Mutex::new(vmm)); event_manager.add_subscriber(vmm.clone()); // Load seccomp filters for the VMM thread. // Keep this as the last step of the building process. crate::seccomp::apply_filter( seccomp_filters .get("vmm") .ok_or(BuildMicrovmFromSnapshotError::MissingVmmSeccompFilters)?, )?; debug!("event_end: build microvm from snapshot"); Ok(vmm) } /// 64 bytes due to alignment requirement in 3.1 of https://www.kernel.org/doc/html/v5.8/virt/kvm/devices/vcpu.html#attribute-kvm-arm-vcpu-pvtime-ipa #[cfg(target_arch = "aarch64")] const STEALTIME_STRUCT_MEM_SIZE: u64 = 64; /// Helper method to allocate steal time region #[cfg(target_arch = "aarch64")] fn allocate_pvtime_region( resource_allocator: &mut ResourceAllocator, vcpu_count: usize, policy: vm_allocator::AllocPolicy, ) -> Result { let size = STEALTIME_STRUCT_MEM_SIZE * vcpu_count as u64; let addr = resource_allocator .allocate_system_memory(size, STEALTIME_STRUCT_MEM_SIZE, policy) .map_err(StartMicrovmError::AllocateResources)?; Ok(GuestAddress(addr)) } /// Sets up pvtime for all vcpus #[cfg(target_arch = "aarch64")] fn setup_pvtime( resource_allocator: &mut ResourceAllocator, vcpus: &mut [Vcpu], ) -> Result<(), StartMicrovmError> { // Alloc sys mem for steal time region let pvtime_mem: GuestAddress = allocate_pvtime_region( resource_allocator, vcpus.len(), vm_allocator::AllocPolicy::LastMatch, )?; // Register all vcpus with pvtime device for (i, vcpu) in vcpus.iter_mut().enumerate() { vcpu.kvm_vcpu .enable_pvtime(GuestAddress( pvtime_mem.0 + i as u64 * STEALTIME_STRUCT_MEM_SIZE, )) .map_err(StartMicrovmError::EnablePVTime)?; } Ok(()) } fn attach_entropy_device( device_manager: &mut DeviceManager, vm: &Arc, cmdline: &mut LoaderKernelCmdline, entropy_device: &Arc>, event_manager: &mut EventManager, ) -> Result<(), AttachDeviceError> { let id = entropy_device .lock() .expect("Poisoned lock") .id() .to_string(); device_manager.attach_virtio_device( vm, id, entropy_device.clone(), cmdline, event_manager, false, ) } fn allocate_virtio_mem_address( vm: &Vm, total_size_mib: usize, ) -> Result { let addr = vm .resource_allocator() .past_mmio64_memory .allocate( mib_to_bytes(total_size_mib) as u64, mib_to_bytes(VIRTIO_MEM_DEFAULT_SLOT_SIZE_MIB) as u64, AllocPolicy::FirstMatch, )? .start(); Ok(GuestAddress(addr)) } fn attach_virtio_mem_device( device_manager: &mut DeviceManager, vm: &Arc, cmdline: &mut LoaderKernelCmdline, config: &MemoryHotplugConfig, event_manager: &mut EventManager, addr: GuestAddress, ) -> Result<(), StartMicrovmError> { let virtio_mem = Arc::new(Mutex::new( VirtioMem::new( Arc::clone(vm), addr, config.total_size_mib, config.block_size_mib, config.slot_size_mib, ) .map_err(|e| StartMicrovmError::Internal(VmmError::VirtioMem(e)))?, )); let id = virtio_mem.lock().expect("Poisoned lock").id().to_string(); device_manager.attach_virtio_device( vm, id, virtio_mem.clone(), cmdline, event_manager, false, )?; Ok(()) } fn attach_block_devices<'a, I: Iterator>> + Debug>( device_manager: &mut DeviceManager, vm: &Arc, cmdline: &mut LoaderKernelCmdline, blocks: I, event_manager: &mut EventManager, ) -> Result<(), StartMicrovmError> { for block in blocks { let (id, is_vhost_user) = { let locked = block.lock().expect("Poisoned lock"); if locked.root_device() { match locked.partuuid() { Some(partuuid) => cmdline.insert_str(format!("root=PARTUUID={}", partuuid))?, None => cmdline.insert_str("root=/dev/vda")?, } match locked.read_only() { true => cmdline.insert_str("ro")?, false => cmdline.insert_str("rw")?, } } (locked.id().to_string(), locked.is_vhost_user()) }; // The device mutex mustn't be locked here otherwise it will deadlock. device_manager.attach_virtio_device( vm, id, block.clone(), cmdline, event_manager, is_vhost_user, )?; } Ok(()) } fn attach_net_devices<'a, I: Iterator>> + Debug>( device_manager: &mut DeviceManager, vm: &Arc, cmdline: &mut LoaderKernelCmdline, net_devices: I, event_manager: &mut EventManager, ) -> Result<(), StartMicrovmError> { for net_device in net_devices { let id = net_device.lock().expect("Poisoned lock").id().to_string(); // The device mutex mustn't be locked here otherwise it will deadlock. device_manager.attach_virtio_device( vm, id, net_device.clone(), cmdline, event_manager, false, )?; } Ok(()) } fn attach_pmem_devices<'a, I: Iterator>> + Debug>( device_manager: &mut DeviceManager, vm: &Arc, cmdline: &mut LoaderKernelCmdline, pmem_devices: I, event_manager: &mut EventManager, ) -> Result<(), StartMicrovmError> { for (i, device) in pmem_devices.enumerate() { let id = { let mut locked_dev = device.lock().expect("Poisoned lock"); if locked_dev.config.root_device { cmdline.insert_str(format!("root=/dev/pmem{i}"))?; match locked_dev.config.read_only { true => cmdline.insert_str("ro")?, false => cmdline.insert_str("rw")?, } } locked_dev.alloc_region(vm.as_ref()); locked_dev.set_mem_region(vm.as_ref())?; locked_dev.config.id.to_string() }; device_manager.attach_virtio_device( vm, id, device.clone(), cmdline, event_manager, false, )?; } Ok(()) } fn attach_unixsock_vsock_device( device_manager: &mut DeviceManager, vm: &Arc, cmdline: &mut LoaderKernelCmdline, unix_vsock: &Arc>>, event_manager: &mut EventManager, ) -> Result<(), AttachDeviceError> { let id = String::from(unix_vsock.lock().expect("Poisoned lock").id()); // The device mutex mustn't be locked here otherwise it will deadlock. device_manager.attach_virtio_device(vm, id, unix_vsock.clone(), cmdline, event_manager, false) } fn attach_balloon_device( device_manager: &mut DeviceManager, vm: &Arc, cmdline: &mut LoaderKernelCmdline, balloon: &Arc>, event_manager: &mut EventManager, ) -> Result<(), AttachDeviceError> { let id = String::from(balloon.lock().expect("Poisoned lock").id()); // The device mutex mustn't be locked here otherwise it will deadlock. device_manager.attach_virtio_device(vm, id, balloon.clone(), cmdline, event_manager, false) } #[cfg(test)] pub(crate) mod tests { use linux_loader::cmdline::Cmdline; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::device_manager::tests::default_device_manager; use crate::devices::virtio::block::CacheType; use crate::devices::virtio::device::VirtioDeviceType; use crate::devices::virtio::rng::device::ENTROPY_DEV_ID; use crate::devices::virtio::vsock::VSOCK_DEV_ID; use crate::mmds::data_store::{Mmds, MmdsVersion}; use crate::mmds::ns::MmdsNetworkStack; use crate::utils::mib_to_bytes; use crate::vmm_config::balloon::{BALLOON_DEV_ID, BalloonBuilder, BalloonDeviceConfig}; use crate::vmm_config::boot_source::{BootSourceConfig, DEFAULT_KERNEL_CMDLINE}; use crate::vmm_config::drive::{BlockBuilder, BlockDeviceConfig}; use crate::vmm_config::entropy::{EntropyDeviceBuilder, EntropyDeviceConfig}; use crate::vmm_config::machine_config::MachineConfig; use crate::vmm_config::net::{NetBuilder, NetworkInterfaceConfig}; use crate::vmm_config::pmem::{PmemBuilder, PmemConfig}; use crate::vmm_config::vsock::tests::default_config; use crate::vmm_config::vsock::{VsockBuilder, VsockDeviceConfig}; use crate::vstate::vm::tests::setup_vm_with_memory; #[derive(Debug)] pub(crate) struct CustomBlockConfig { drive_id: String, is_root_device: bool, partuuid: Option, is_read_only: bool, cache_type: CacheType, } impl CustomBlockConfig { pub(crate) fn new( drive_id: String, is_root_device: bool, partuuid: Option, is_read_only: bool, cache_type: CacheType, ) -> Self { CustomBlockConfig { drive_id, is_root_device, partuuid, is_read_only, cache_type, } } } fn cmdline_contains(cmdline: &Cmdline, slug: &str) -> bool { // The following unwraps can never fail; the only way any of these methods // would return an `Err` is if one of the following conditions is met: // 1. The command line is empty: We just added things to it, and if insertion of an // argument goes wrong, then `Cmdline::insert` would have already returned `Err`. // 2. There's a spurious null character somewhere in the command line: The // `Cmdline::insert` methods verify that this is not the case. // 3. The `CString` is not valid UTF8: It just got created from a `String`, which was // valid UTF8. cmdline .as_cstring() .unwrap() .into_string() .unwrap() .contains(slug) } pub(crate) fn default_kernel_cmdline() -> Cmdline { linux_loader::cmdline::Cmdline::try_from( DEFAULT_KERNEL_CMDLINE, crate::arch::CMDLINE_MAX_SIZE, ) .unwrap() } pub(crate) fn default_vmm() -> Vmm { let (kvm, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); let (_, vcpus_exit_evt) = vm.create_vcpus(1).unwrap(); Vmm { instance_info: InstanceInfo::default(), machine_config: MachineConfig::default(), boot_source_config: BootSourceConfig::default(), shutdown_exit_code: None, kvm, vm: Arc::new(vm), uffd: None, vcpus_handles: Vec::new(), vcpus_exit_evt, device_manager: default_device_manager(), } } pub(crate) fn insert_block_devices( vmm: &mut Vmm, cmdline: &mut Cmdline, event_manager: &mut EventManager, custom_block_cfgs: Vec, ) -> Vec { let mut block_dev_configs = BlockBuilder::new(); let mut block_files = Vec::new(); for custom_block_cfg in custom_block_cfgs { block_files.push(TempFile::new().unwrap()); let block_device_config = BlockDeviceConfig { drive_id: String::from(&custom_block_cfg.drive_id), partuuid: custom_block_cfg.partuuid, is_root_device: custom_block_cfg.is_root_device, cache_type: custom_block_cfg.cache_type, is_read_only: Some(custom_block_cfg.is_read_only), path_on_host: Some( block_files .last() .unwrap() .as_path() .to_str() .unwrap() .to_string(), ), rate_limiter: None, file_engine_type: None, socket: None, }; block_dev_configs .insert(block_device_config, false) .unwrap(); } attach_block_devices( &mut vmm.device_manager, &vmm.vm, cmdline, block_dev_configs.devices.iter(), event_manager, ) .unwrap(); block_files } pub(crate) fn insert_net_device( vmm: &mut Vmm, cmdline: &mut Cmdline, event_manager: &mut EventManager, net_config: NetworkInterfaceConfig, ) { let mut net_builder = NetBuilder::new(); net_builder.build(net_config).unwrap(); let res = attach_net_devices( &mut vmm.device_manager, &vmm.vm, cmdline, net_builder.iter(), event_manager, ); res.unwrap(); } pub(crate) fn insert_net_device_with_mmds( vmm: &mut Vmm, cmdline: &mut Cmdline, event_manager: &mut EventManager, net_config: NetworkInterfaceConfig, mmds_version: MmdsVersion, ) { let mut net_builder = NetBuilder::new(); net_builder.build(net_config).unwrap(); let net = net_builder.iter().next().unwrap(); let mut mmds = Mmds::default(); mmds.set_version(mmds_version); net.lock().unwrap().configure_mmds_network_stack( MmdsNetworkStack::default_ipv4_addr(), Arc::new(Mutex::new(mmds)), ); attach_net_devices( &mut vmm.device_manager, &vmm.vm, cmdline, net_builder.iter(), event_manager, ) .unwrap(); } pub(crate) fn insert_vsock_device( vmm: &mut Vmm, cmdline: &mut Cmdline, event_manager: &mut EventManager, vsock_config: VsockDeviceConfig, ) { let vsock_dev_id = VSOCK_DEV_ID.to_owned(); let vsock = VsockBuilder::create_unixsock_vsock(vsock_config).unwrap(); let vsock = Arc::new(Mutex::new(vsock)); attach_unixsock_vsock_device( &mut vmm.device_manager, &vmm.vm, cmdline, &vsock, event_manager, ) .unwrap(); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Vsock, &vsock_dev_id) .is_some() ); } pub(crate) fn insert_entropy_device( vmm: &mut Vmm, cmdline: &mut Cmdline, event_manager: &mut EventManager, entropy_config: EntropyDeviceConfig, ) { let mut builder = EntropyDeviceBuilder::new(); let entropy = builder.build(entropy_config).unwrap(); attach_entropy_device( &mut vmm.device_manager, &vmm.vm, cmdline, &entropy, event_manager, ) .unwrap(); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Rng, ENTROPY_DEV_ID) .is_some() ); } pub(crate) fn insert_pmem_devices( vmm: &mut Vmm, cmdline: &mut Cmdline, event_manager: &mut EventManager, configs: Vec, ) -> Vec { let mut builder = PmemBuilder::default(); let mut files = Vec::new(); for mut config in configs { let tmp_file = TempFile::new().unwrap(); tmp_file.as_file().set_len(0x20_0000).unwrap(); let tmp_file_path = tmp_file.as_path().to_str().unwrap().to_string(); files.push(tmp_file); config.path_on_host = tmp_file_path; builder.build(config, false).unwrap(); } attach_pmem_devices( &mut vmm.device_manager, &vmm.vm, cmdline, builder.devices.iter(), event_manager, ) .unwrap(); files } #[cfg(target_arch = "x86_64")] pub(crate) fn insert_vmgenid_device(vmm: &mut Vmm) { vmm.device_manager.attach_vmgenid_device(&vmm.vm).unwrap(); } #[cfg(target_arch = "x86_64")] pub(crate) fn insert_vmclock_device(vmm: &mut Vmm) { vmm.device_manager.attach_vmclock_device(&vmm.vm).unwrap(); } pub(crate) fn insert_balloon_device( vmm: &mut Vmm, cmdline: &mut Cmdline, event_manager: &mut EventManager, balloon_config: BalloonDeviceConfig, ) { let mut builder = BalloonBuilder::new(); builder.set(balloon_config).unwrap(); let balloon = builder.get().unwrap(); attach_balloon_device( &mut vmm.device_manager, &vmm.vm, cmdline, balloon, event_manager, ) .unwrap(); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Balloon, BALLOON_DEV_ID) .is_some() ); } #[test] fn test_attach_net_devices() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let mut vmm = default_vmm(); let network_interface = NetworkInterfaceConfig { iface_id: String::from("netif"), host_dev_name: String::from("hostname"), guest_mac: None, rx_rate_limiter: None, tx_rate_limiter: None, }; let mut cmdline = default_kernel_cmdline(); insert_net_device( &mut vmm, &mut cmdline, &mut event_manager, network_interface.clone(), ); // We can not attach it once more. let mut net_builder = NetBuilder::new(); net_builder.build(network_interface).unwrap_err(); } #[test] fn test_attach_block_devices() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); // Use case 1: root block device is not specified through PARTUUID. { let drive_id = String::from("root"); let block_configs = vec![CustomBlockConfig::new( drive_id.clone(), true, None, true, CacheType::Unsafe, )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=/dev/vda ro")); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Block, drive_id.as_str()) .is_some() ); } // Use case 2: root block device is specified through PARTUUID. { let drive_id = String::from("root"); let block_configs = vec![CustomBlockConfig::new( drive_id.clone(), true, Some("0eaa91a0-01".to_string()), false, CacheType::Unsafe, )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=PARTUUID=0eaa91a0-01 rw")); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Block, drive_id.as_str()) .is_some() ); } // Use case 3: root block device is not added at all. { let drive_id = String::from("non_root"); let block_configs = vec![CustomBlockConfig::new( drive_id.clone(), false, Some("0eaa91a0-01".to_string()), false, CacheType::Unsafe, )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(!cmdline_contains(&cmdline, "root=PARTUUID=")); assert!(!cmdline_contains(&cmdline, "root=/dev/vda")); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Block, drive_id.as_str()) .is_some() ); } // Use case 4: rw root block device and other rw and ro drives. { let block_configs = vec![ CustomBlockConfig::new( String::from("root"), true, Some("0eaa91a0-01".to_string()), false, CacheType::Unsafe, ), CustomBlockConfig::new( String::from("secondary"), false, None, true, CacheType::Unsafe, ), CustomBlockConfig::new( String::from("third"), false, None, false, CacheType::Unsafe, ), ]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=PARTUUID=0eaa91a0-01 rw")); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Block, "root") .is_some() ); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Block, "secondary") .is_some() ); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Block, "third") .is_some() ); // Check if these three block devices are inserted in kernel_cmdline. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] assert!(cmdline_contains( &cmdline, "virtio_mmio.device=4K@0xc0001000:5 virtio_mmio.device=4K@0xc0002000:6 \ virtio_mmio.device=4K@0xc0003000:7" )); } // Use case 5: root block device is rw. { let drive_id = String::from("root"); let block_configs = vec![CustomBlockConfig::new( drive_id.clone(), true, None, false, CacheType::Unsafe, )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=/dev/vda rw")); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Block, drive_id.as_str()) .is_some() ); } // Use case 6: root block device is ro, with PARTUUID. { let drive_id = String::from("root"); let block_configs = vec![CustomBlockConfig::new( drive_id.clone(), true, Some("0eaa91a0-01".to_string()), true, CacheType::Unsafe, )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=PARTUUID=0eaa91a0-01 ro")); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Block, drive_id.as_str()) .is_some() ); } // Use case 7: root block device is rw with flush enabled { let drive_id = String::from("root"); let block_configs = vec![CustomBlockConfig::new( drive_id.clone(), true, None, false, CacheType::Writeback, )]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); assert!(cmdline_contains(&cmdline, "root=/dev/vda rw")); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Block, drive_id.as_str()) .is_some() ); } } #[test] fn test_attach_pmem_devices() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let id = String::from("root"); let configs = vec![PmemConfig { id: id.clone(), path_on_host: "".into(), root_device: true, read_only: true, }]; let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); _ = insert_pmem_devices(&mut vmm, &mut cmdline, &mut event_manager, configs); assert!(cmdline_contains(&cmdline, "root=/dev/pmem0 ro")); assert!( vmm.device_manager .get_virtio_device(VirtioDeviceType::Pmem, id.as_str()) .is_some() ); } #[test] fn test_attach_boot_timer_device() { let mut vmm = default_vmm(); let request_ts = TimestampUs::default(); let res = vmm .device_manager .attach_boot_timer_device(&vmm.vm, request_ts); res.unwrap(); assert!(vmm.device_manager.mmio_devices.boot_timer.is_some()); } #[test] fn test_attach_balloon_device() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let mut vmm = default_vmm(); let balloon_config = BalloonDeviceConfig { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, free_page_hinting: false, free_page_reporting: false, }; let mut cmdline = default_kernel_cmdline(); insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_config); // Check if the vsock device is described in kernel_cmdline. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] assert!(cmdline_contains( &cmdline, "virtio_mmio.device=4K@0xc0001000:5" )); } #[test] fn test_attach_entropy_device() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let mut vmm = default_vmm(); let entropy_config = EntropyDeviceConfig::default(); let mut cmdline = default_kernel_cmdline(); insert_entropy_device(&mut vmm, &mut cmdline, &mut event_manager, entropy_config); // Check if the vsock device is described in kernel_cmdline. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] assert!(cmdline_contains( &cmdline, "virtio_mmio.device=4K@0xc0001000:5" )); } #[test] fn test_attach_vsock_device() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let mut vmm = default_vmm(); let mut tmp_sock_file = TempFile::new().unwrap(); tmp_sock_file.remove().unwrap(); let vsock_config = default_config(&tmp_sock_file); let mut cmdline = default_kernel_cmdline(); insert_vsock_device(&mut vmm, &mut cmdline, &mut event_manager, vsock_config); // Check if the vsock device is described in kernel_cmdline. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] assert!(cmdline_contains( &cmdline, "virtio_mmio.device=4K@0xc0001000:5" )); } pub(crate) fn insert_virtio_mem_device( vmm: &mut Vmm, cmdline: &mut Cmdline, event_manager: &mut EventManager, config: MemoryHotplugConfig, ) { attach_virtio_mem_device( &mut vmm.device_manager, &vmm.vm, cmdline, &config, event_manager, GuestAddress(512 << 30), ) .unwrap(); } #[test] fn test_attach_virtio_mem_device() { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let mut vmm = default_vmm(); let config = MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 2, slot_size_mib: 128, }; let mut cmdline = default_kernel_cmdline(); insert_virtio_mem_device(&mut vmm, &mut cmdline, &mut event_manager, config); // Check if the vsock device is described in kernel_cmdline. #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] assert!(cmdline_contains( &cmdline, "virtio_mmio.device=4K@0xc0001000:5" )); } } ================================================ FILE: src/vmm/src/cpu_config/aarch64/custom_cpu_template.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 /// Guest config sub-module specifically for /// config templates. use std::borrow::Cow; use serde::de::Error; use serde::{Deserialize, Serialize}; use crate::arch::aarch64::regs::{RegSize, reg_size}; use crate::cpu_config::aarch64::static_cpu_templates::v1n1; use crate::cpu_config::templates::{ CpuTemplateType, GetCpuTemplate, GetCpuTemplateError, KvmCapability, RegisterValueFilter, StaticCpuTemplate, }; use crate::cpu_config::templates_serde::*; impl GetCpuTemplate for Option { fn get_cpu_template(&self) -> Result, GetCpuTemplateError> { match self { Some(template_type) => match template_type { CpuTemplateType::Custom(template) => Ok(Cow::Borrowed(template)), CpuTemplateType::Static(template) => match template { // TODO: Check if the CPU model is Neoverse-V1. StaticCpuTemplate::V1N1 => Ok(Cow::Owned(v1n1::v1n1())), other => Err(GetCpuTemplateError::InvalidStaticCpuTemplate(*other)), }, }, None => Ok(Cow::Owned(CustomCpuTemplate::default())), } } } /// Wrapper type to containing aarch64 CPU config modifiers. #[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct CustomCpuTemplate { /// Additional kvm capabilities to check before /// configuring vcpus. #[serde(default)] pub kvm_capabilities: Vec, /// Modifiers of enabled vcpu features for vcpu. #[serde(default)] pub vcpu_features: Vec, /// Modifiers for registers on Aarch64 CPUs. #[serde(default)] pub reg_modifiers: Vec, } impl CustomCpuTemplate { /// Get a list of register IDs that are modified by the CPU template. pub fn reg_list(&self) -> Vec { self.reg_modifiers .iter() .map(|modifier| modifier.addr) .collect() } /// Validate the correctness of the template. pub fn validate(&self) -> Result<(), serde_json::Error> { for modifier in self.reg_modifiers.iter() { let reg_size = reg_size(modifier.addr); match RegSize::from(reg_size) { RegSize::U32 | RegSize::U64 => { // Safe to unwrap because the number of bits is limited let limit = 2u128.pow(u32::try_from(reg_size).unwrap() * 8) - 1; if limit < modifier.bitmap.value || limit < modifier.bitmap.filter { return Err(serde_json::Error::custom(format!( "Invalid size of bitmap for register {:#x}, should be <= {} bits", modifier.addr, reg_size * 8 ))); } } RegSize::U128 => {} _ => { return Err(serde_json::Error::custom(format!( "Invalid aarch64 register address: {:#x} - Only 32, 64 and 128 bit wide \ registers are supported", modifier.addr ))); } } } Ok(()) } } /// Struct for defining enabled vcpu features #[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)] pub struct VcpuFeatures { /// Index in the `kvm_bindings::kvm_vcpu_init.features` array. pub index: u32, /// Modifier for the value in the `kvm_bindings::kvm_vcpu_init.features` array. pub bitmap: RegisterValueFilter, } /// Wrapper of a mask defined as a bitmap to apply /// changes to a given register's value. #[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Hash)] pub struct RegisterModifier { /// Pointer of the location to be bit mapped. #[serde( deserialize_with = "deserialize_from_str_u64", serialize_with = "serialize_to_hex_str" )] pub addr: u64, /// Bit mapping to be applied as a modifier to the /// register's value at the address provided. pub bitmap: RegisterValueFilter, } #[cfg(test)] mod tests { use serde_json::Value; use super::*; use crate::cpu_config::templates::test_utils::{TEST_TEMPLATE_JSON, build_test_template}; #[test] fn test_get_cpu_template_with_no_template() { // Test `get_cpu_template()` when no template is provided. The empty owned // `CustomCpuTemplate` should be returned. let cpu_template = None; assert_eq!( cpu_template.get_cpu_template().unwrap(), Cow::Owned(CustomCpuTemplate::default()), ); } #[test] fn test_get_cpu_template_with_v1n1_static_template() { // Test `get_cpu_template()` when V1N1 static CPU template is specified. The owned // `CustomCpuTemplate` should be returned. let cpu_template = Some(CpuTemplateType::Static(StaticCpuTemplate::V1N1)); assert_eq!( cpu_template.get_cpu_template().unwrap(), Cow::Owned(v1n1::v1n1()) ); } #[test] fn test_get_cpu_tempalte_with_none_static_template() { // Test `get_cpu_template()` when no static CPU template is provided. // `InvalidStaticCpuTemplate` error should be returned because it is no longer valid and // was replaced with `None` of `Option`. let cpu_template = Some(CpuTemplateType::Static(StaticCpuTemplate::None)); assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::InvalidStaticCpuTemplate(StaticCpuTemplate::None) ); } #[test] fn test_get_cpu_template_with_custom_template() { // Test `get_cpu_template()` when a custom CPU template is provided. The borrowed // `CustomCpuTemplate` should be returned. let inner_cpu_template = CustomCpuTemplate::default(); let cpu_template = Some(CpuTemplateType::Custom(inner_cpu_template.clone())); assert_eq!( cpu_template.get_cpu_template().unwrap(), Cow::Borrowed(&inner_cpu_template) ); } #[test] fn test_correct_json() { let cpu_config_result = serde_json::from_str::( r#"{ "kvm_capabilities": ["1", "!2"], "vcpu_features":[{"index":0,"bitmap":"0b1100000"}], "reg_modifiers": [ { "addr": "0x0030000000000000", "bitmap": "0bx00100x0x1xxxx01xxx1xxxxxxxxxxx1" } ] }"#, ); cpu_config_result.unwrap(); } #[test] fn test_malformed_json() { // Malformed kvm capabilities let cpu_config_result = serde_json::from_str::( r#"{ "kvm_capabilities": ["1", "!a2"], "vcpu_features":[{"index":0,"bitmap":"0b1100000"}] }"#, ); cpu_config_result.unwrap_err(); // Malformed vcpu features let cpu_config_result = serde_json::from_str::( r#"{ "kvm_capabilities": ["1", "!2"], "vcpu_features":[{"index":0,"bitmap":"0b11abc00"}] }"#, ); cpu_config_result.unwrap_err(); // Malformed register address let cpu_config_result = serde_json::from_str::( r#"{ "reg_modifiers": [ { "addr": "j", "bitmap": "0bx00100xxx1xxxx00xxx1xxxxxxxxxxx1" } ] }"#, ); let error_msg: String = cpu_config_result.unwrap_err().to_string(); // Formatted error expected clarifying the number system prefix is missing assert!( error_msg.contains("No supported number system prefix found in value"), "{}", error_msg ); // Malformed address as binary let cpu_config_result = serde_json::from_str::( r#"{ "reg_modifiers": [ { "addr": "0bK", "bitmap": "0bx00100xxx1xxxx00xxx1xxxxxxxxxxx1" } ] }"#, ); assert!( cpu_config_result .unwrap_err() .to_string() .contains("Failed to parse string [0bK] as a number for CPU template") ); // Malformed 64-bit bitmap - filter failed let cpu_config_result = serde_json::from_str::( r#"{ "reg_modifiers": [ { "addr": "0x0030000000000000", "bitmap": "0bx0?1_0_0x_?x1xxxx00xxx1xxxxxxxxxxx1" } ] }"#, ); assert!(cpu_config_result.unwrap_err().to_string().contains( "Failed to parse string [0bx0?1_0_0x_?x1xxxx00xxx1xxxxxxxxxxx1] as a bitmap" )); // Malformed 64-bit bitmap - value failed let cpu_config_result = serde_json::from_str::( r#"{ "reg_modifiers": [ { "addr": "0x0030000000000000", "bitmap": "0bx00100x0x1xxxx05xxx1xxxxxxxxxxx1" } ] }"#, ); assert!( cpu_config_result.unwrap_err().to_string().contains( "Failed to parse string [0bx00100x0x1xxxx05xxx1xxxxxxxxxxx1] as a bitmap" ) ); } #[test] fn test_deserialization_lifecycle() { let cpu_config = serde_json::from_str::(TEST_TEMPLATE_JSON) .expect("Failed to deserialize custom CPU template."); assert_eq!(2, cpu_config.reg_modifiers.len()); } #[test] fn test_serialization_lifecycle() { let template = build_test_template(); let template_json_str_result = serde_json::to_string_pretty(&template); let template_json = template_json_str_result.unwrap(); let deserialization_result = serde_json::from_str::(&template_json); assert_eq!(template, deserialization_result.unwrap()); } /// Test to confirm that templates for different CPU architectures have /// a size bitmask that is supported by the architecture when serialized to JSON. #[test] fn test_bitmap_width() { let mut checked = false; let template = build_test_template(); let aarch64_template_str = serde_json::to_string(&template).expect("Error serializing aarch64 template"); let json_tree: Value = serde_json::from_str(&aarch64_template_str) .expect("Error deserializing aarch64 template JSON string"); // Check that bitmap for aarch64 masks are serialized to 128-bits if let Some(modifiers_root) = json_tree.get("reg_modifiers") { let mod_node = &modifiers_root.as_array().unwrap()[0]; if let Some(bit_map_str) = mod_node.get("bitmap") { // 128-bit width with a "0b" prefix for binary-formatted numbers assert_eq!(bit_map_str.as_str().unwrap().len(), 130); assert!(bit_map_str.as_str().unwrap().starts_with("0b")); checked = true; } } assert!( checked, "Bitmap width in a aarch64 template was not tested." ); } #[test] fn test_cpu_template_validate() { // 32, 64 and 128 bit regs with correct filters and values let template = CustomCpuTemplate { reg_modifiers: vec![ RegisterModifier { addr: 0x0020000000000000, bitmap: RegisterValueFilter { filter: 0x1, value: 0x2, }, }, RegisterModifier { addr: 0x0030000000000000, bitmap: RegisterValueFilter { filter: 0x1, value: 0x2, }, }, RegisterModifier { addr: 0x0040000000000000, bitmap: RegisterValueFilter { filter: 0x1, value: 0x2, }, }, ], ..Default::default() }; template.validate().unwrap(); // 32 bit reg with too long filter let template = CustomCpuTemplate { reg_modifiers: vec![RegisterModifier { addr: 0x0020000000000000, bitmap: RegisterValueFilter { filter: 0x100000000, value: 0x2, }, }], ..Default::default() }; template.validate().unwrap_err(); // 32 bit reg with too long value let template = CustomCpuTemplate { reg_modifiers: vec![RegisterModifier { addr: 0x0020000000000000, bitmap: RegisterValueFilter { filter: 0x1, value: 0x100000000, }, }], ..Default::default() }; template.validate().unwrap_err(); // 16 bit unsupporteed reg let template = CustomCpuTemplate { reg_modifiers: vec![RegisterModifier { addr: 0x0010000000000000, bitmap: RegisterValueFilter { filter: 0x1, value: 0x2, }, }], ..Default::default() }; template.validate().unwrap_err(); } } ================================================ FILE: src/vmm/src/cpu_config/aarch64/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 /// Module for custom CPU templates pub mod custom_cpu_template; /// Module for static CPU templates pub mod static_cpu_templates; /// Module with test utils for custom CPU templates pub mod test_utils; use super::templates::CustomCpuTemplate; use crate::Vcpu; use crate::arch::aarch64::regs::{Aarch64RegisterVec, RegSize}; use crate::arch::aarch64::vcpu::{VcpuArchError, get_registers}; use crate::vstate::vcpu::KvmVcpuError; /// Errors thrown while configuring templates. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum CpuConfigurationError { /// Error initializing the vcpu: {0} VcpuInit(#[from] KvmVcpuError), /// Error reading vcpu registers: {0} VcpuGetRegs(#[from] VcpuArchError), } /// CPU configuration for aarch64 #[derive(Debug, Default, Clone, PartialEq, Eq)] pub struct CpuConfiguration { /// Vector of CPU registers pub regs: Aarch64RegisterVec, } impl CpuConfiguration { /// Create new CpuConfiguration. pub fn new( cpu_template: &CustomCpuTemplate, vcpus: &mut [Vcpu], ) -> Result { for vcpu in vcpus.iter_mut() { vcpu.kvm_vcpu.init(&cpu_template.vcpu_features)?; } let mut regs = Aarch64RegisterVec::default(); get_registers(&vcpus[0].kvm_vcpu.fd, &cpu_template.reg_list(), &mut regs)?; Ok(CpuConfiguration { regs }) } /// Creates new guest CPU config based on the provided template pub fn apply_template(mut self, template: &CustomCpuTemplate) -> Self { for (modifier, mut reg) in template.reg_modifiers.iter().zip(self.regs.iter_mut()) { match reg.size() { RegSize::U32 => { reg.set_value( (modifier.bitmap.apply(u128::from(reg.value::())) & 0xFFFF_FFFF) as u32, ); } RegSize::U64 => { reg.set_value( (modifier.bitmap.apply(u128::from(reg.value::())) & 0xFFFF_FFFF_FFFF_FFFF) as u64, ); } RegSize::U128 => { reg.set_value(modifier.bitmap.apply(reg.value::())); } _ => unreachable!("Only 32, 64 and 128 bit wide registers are supported"), } } self } /// Returns ids of registers that are changed /// by this template pub fn register_ids(&self) -> Vec { self.regs.iter().map(|reg| reg.id).collect() } } ================================================ FILE: src/vmm/src/cpu_config/aarch64/static_cpu_templates/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use serde::{Deserialize, Serialize}; /// Module with V1N1 CPU template for aarch64 pub mod v1n1; /// Templates available for configuring the supported ARM CPU types. #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub enum StaticCpuTemplate { /// Template to mask Neoverse-V1 as Neoverse-N1 V1N1, /// No CPU template is used. #[default] None, } impl StaticCpuTemplate { /// Check if no template specified pub fn is_none(&self) -> bool { self == &StaticCpuTemplate::None } } impl std::fmt::Display for StaticCpuTemplate { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { StaticCpuTemplate::V1N1 => write!(f, "V1N1"), StaticCpuTemplate::None => write!(f, "None"), } } } #[cfg(test)] mod tests { use super::*; use crate::cpu_config::test_utils::get_json_template; #[test] fn verify_consistency_with_json_templates() { let static_templates = [(v1n1::v1n1(), "V1N1.json")]; for (hardcoded_template, filename) in static_templates { let json_template = get_json_template(filename); assert_eq!(hardcoded_template, json_template); } } } ================================================ FILE: src/vmm/src/cpu_config/aarch64/static_cpu_templates/v1n1.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use crate::arch::aarch64::regs::{ ID_AA64ISAR0_EL1, ID_AA64ISAR1_EL1, ID_AA64MMFR2_EL1, ID_AA64PFR0_EL1, }; use crate::cpu_config::aarch64::custom_cpu_template::{CustomCpuTemplate, RegisterModifier}; use crate::cpu_config::templates::RegisterValueFilter; // Arm Armv8-A Architecture Registers documentation // https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers?lang=en /// Template to mask Neoverse-V1 as Neoverse-N1 /// Masks: dgh, asimdfhm, bf16, dcpodp, flagm, i8mm, sha3, sha512, sm3, sm4 /// sve, svebf16, svei8mm, uscat, fcma, jscvt, dit, ilrcpc, rng pub fn v1n1() -> CustomCpuTemplate { CustomCpuTemplate { reg_modifiers: vec![ RegisterModifier { // Disabling sve CPU feature. Setting to 0b0000. // This disables sve, svebf16, svei8mm // sve occupies bits [35:32] in ID_AA64PFR0_EL1. // // Disabling dit CPU feature. Setting to 0b0000. // dit occupies bits [51:48] in ID_AA64PFR0_EL1. addr: ID_AA64PFR0_EL1, bitmap: RegisterValueFilter { filter: 0x000F000F00000000, value: 0x0000000000000000, }, }, RegisterModifier { // Disabling sha3 CPU feature. Setting sha3 to 0b0000. // Disabling sha512 CPU feature. Setting sha2 to 0b0001. // sha3 occupies bits [35:32] in ID_AA64ISAR0_EL1. // sha2 occupies bits [15:12] in ID_AA64ISAR0_EL1. // // Note from the documentation: // If the value of SHA2 field is 0b0010, // ID_AA64ISAR0_EL1. SHA3 must have the value 0b0001 // // Disabling sm3 and sm4 CPU features. Setting to 0b0000. // sm3 occupies bits [39:36] in ID_AA64ISAR0_EL1. // sm4 occupies bits [43:40] in ID_AA64ISAR0_EL1. // // Note from the documentation: // "This field (sm3) must have the same value as ID_AA64ISAR0_EL1.SM4." // // Disabling asimdfhm (fhm) CPU feature. Setting to 0b0000. // fhm occupies bits [51:48] in ID_AA64ISAR0_EL1. // // Disabling flagm (ts) CPU feature. Setting to 0b0000. // ts occupies bits [55:52] in ID_AA64ISAR0_EL1. // // Disabling rnd (rndr) CPU feature. Setting to 0b0000. // rndr occupies bits [63:60] in ID_AA64ISAR0_EL1. addr: ID_AA64ISAR0_EL1, bitmap: RegisterValueFilter { filter: 0xF0FF0FFF0000F000, value: 0x0000000000001000, }, }, RegisterModifier { // Disabling dcpodp (dpb) CPU feature. Setting to 0b0001. // dpb occupies bits [3:0] in ID_AA64ISAR1_EL1. // // Disabling jscvt CPU feature. Setting to 0b0000. // jscvt occupies bits [15:12] in ID_AA64ISAR1_EL1. // // Disabling fcma CPU feature. Setting to 0b0000. // fcma occupies bits [19:16] in ID_AA64ISAR1_EL1. // // Disabling ilrcpc CPU feature. Setting to 0b0001. // lrcpc occupies bits [23:20] in ID_AA64ISAR1_EL1. // // Disabling bf16 CPU feature. Setting to 0b0000. // bf16 occupies bits [47:44] in ID_AA64ISAR1_EL1. // // Disabling dgh CPU feature. Setting to 0b0000. // dgh occupies bits [51:48] in ID_AA64ISAR1_EL1. // // Disabling i8mm CPU feature. Setting to 0b0000. // i8mm occupies bits [55:52] in ID_AA64ISAR1_EL1. addr: ID_AA64ISAR1_EL1, bitmap: RegisterValueFilter { filter: 0x00FFF00000FFF00F, value: 0x0000000000100001, }, }, RegisterModifier { // Disable uscat (at) CPU feature. Setting to 0b0000. // at occupies bits [35:28] in ID_AA64MMFR2_EL1. addr: ID_AA64MMFR2_EL1, bitmap: RegisterValueFilter { filter: 0x0000000F00000000, value: 0x0000000000000000, }, }, ], ..Default::default() } } ================================================ FILE: src/vmm/src/cpu_config/aarch64/test_utils.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use crate::arch::aarch64::regs::{ID_AA64ISAR0_EL1, ID_AA64PFR0_EL1}; use crate::cpu_config::aarch64::custom_cpu_template::RegisterModifier; use crate::cpu_config::templates::{CustomCpuTemplate, RegisterValueFilter}; /// Test CPU template in JSON format pub const TEST_TEMPLATE_JSON: &str = r#"{ "reg_modifiers": [ { "addr": "0x0030000000000011", "bitmap": "0b1xx1" }, { "addr": "0x0030000000000022", "bitmap": "0b1x00" } ] }"#; /// Test CPU template in JSON format but has an invalid field for the architecture. /// "msr_modifiers" is the field name for the model specific registers for /// defined by x86 CPUs. pub const TEST_INVALID_TEMPLATE_JSON: &str = r#"{ "msr_modifiers": [ { "addr": "0x0AAC", "bitmap": "0b1xx1" } ] }"#; /// Builds a sample custom CPU template pub fn build_test_template() -> CustomCpuTemplate { CustomCpuTemplate { reg_modifiers: vec![ RegisterModifier { addr: ID_AA64PFR0_EL1, bitmap: RegisterValueFilter { filter: 0b100010001, value: 0b100000001, }, }, RegisterModifier { addr: ID_AA64ISAR0_EL1, bitmap: RegisterValueFilter { filter: 0b1110, value: 0b0110, }, }, ], ..Default::default() } } ================================================ FILE: src/vmm/src/cpu_config/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 /// Module with types used for custom CPU templates pub mod templates; /// Module with ser/de utils for custom CPU templates pub mod templates_serde; /// Module containing type implementations needed for x86 CPU configuration #[cfg(target_arch = "x86_64")] pub mod x86_64; /// Module containing type implementations needed for aarch64 (ARM) CPU configuration #[cfg(target_arch = "aarch64")] pub mod aarch64; #[cfg(test)] pub(crate) mod test_utils; ================================================ FILE: src/vmm/src/cpu_config/templates.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #[cfg(target_arch = "x86_64")] mod common_types { pub use crate::cpu_config::x86_64::custom_cpu_template::CustomCpuTemplate; pub use crate::cpu_config::x86_64::static_cpu_templates::StaticCpuTemplate; pub use crate::cpu_config::x86_64::{ CpuConfiguration, CpuConfigurationError as GuestConfigError, test_utils, }; } #[cfg(target_arch = "aarch64")] mod common_types { pub use crate::cpu_config::aarch64::custom_cpu_template::CustomCpuTemplate; pub use crate::cpu_config::aarch64::static_cpu_templates::StaticCpuTemplate; pub use crate::cpu_config::aarch64::{ CpuConfiguration, CpuConfigurationError as GuestConfigError, test_utils, }; } use std::borrow::Cow; use std::fmt::Debug; pub use common_types::*; use serde::de::Error as SerdeError; use serde::{Deserialize, Deserializer, Serialize, Serializer}; /// Error for GetCpuTemplate trait. #[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] pub enum GetCpuTemplateError { #[cfg(target_arch = "x86_64")] /// Failed to get CPU vendor information: {0} GetCpuVendor(crate::cpu_config::x86_64::cpuid::common::GetCpuidError), /// CPU vendor mismatched between actual CPU and CPU template. CpuVendorMismatched, /// Invalid static CPU template: {0} InvalidStaticCpuTemplate(StaticCpuTemplate), /// The current CPU model is not permitted to apply the CPU template. InvalidCpuModel, } /// Trait to unwrap the inner [`CustomCpuTemplate`] from [`Option`]. /// /// This trait is needed because static CPU template and custom CPU template have different nested /// structures: `CpuTemplateType::Static(StaticCpuTemplate::StaticTemplateType(CustomCpuTemplate))` /// vs `CpuTemplateType::Custom(CustomCpuTemplate)`. As static CPU templates return owned /// `CustomCpuTemplate`s, `Cow` is used here to avoid unnecessary clone of `CustomCpuTemplate` for /// custom CPU templates and handle static CPU template and custom CPU template in a same manner. pub trait GetCpuTemplate { /// Get CPU template fn get_cpu_template(&self) -> Result, GetCpuTemplateError>; } /// Enum that represents types of cpu templates available. #[derive(Debug, Clone, PartialEq, Eq)] pub enum CpuTemplateType { /// Custom cpu template Custom(CustomCpuTemplate), /// Static cpu template Static(StaticCpuTemplate), } // This conversion is only used for snapshot, but the static CPU template // information has not been saved into snapshot since v1.1. impl From<&Option> for StaticCpuTemplate { fn from(value: &Option) -> Self { match value { Some(CpuTemplateType::Static(template)) => *template, Some(CpuTemplateType::Custom(_)) | None => StaticCpuTemplate::None, } } } // This conversion is used when converting `&VmConfig` to `MachineConfig` to // respond `GET /machine-config` and `GET /vm`. impl From<&CpuTemplateType> for StaticCpuTemplate { fn from(value: &CpuTemplateType) -> Self { match value { CpuTemplateType::Static(template) => *template, CpuTemplateType::Custom(_) => StaticCpuTemplate::None, } } } impl TryFrom<&[u8]> for CustomCpuTemplate { type Error = serde_json::Error; fn try_from(value: &[u8]) -> Result { let template: CustomCpuTemplate = serde_json::from_slice(value)?; template.validate()?; Ok(template) } } impl TryFrom<&str> for CustomCpuTemplate { type Error = serde_json::Error; fn try_from(value: &str) -> Result { CustomCpuTemplate::try_from(value.as_bytes()) } } /// Struct to represent user defined kvm capability. /// Users can add or remove kvm capabilities to be checked /// by FC in addition to those FC checks by default. #[derive(Debug, Clone, Eq, PartialEq)] pub enum KvmCapability { /// Add capability to the check list. Add(u32), /// Remove capability from the check list. Remove(u32), } impl Serialize for KvmCapability { /// Serialize KvmCapability into a string. fn serialize(&self, serializer: S) -> Result where S: Serializer, { let s = match self { KvmCapability::Add(cap) => format!("{cap}"), KvmCapability::Remove(cap) => format!("!{cap}"), }; serializer.serialize_str(&s) } } impl<'de> Deserialize<'de> for KvmCapability { /// Deserialize string into a KvmCapability. fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let original_str = ::deserialize(deserializer)?; let parse_err = |e| { D::Error::custom(format!( "Failed to parse string [{}] as a kvm capability - can not convert to numeric: {}", original_str, e )) }; match original_str.strip_prefix('!') { Some(s) => { let v = s.parse::().map_err(parse_err)?; Ok(Self::Remove(v)) } None => { let v = original_str.parse::().map_err(parse_err)?; Ok(Self::Add(v)) } } } } /// Bit-mapped value to adjust targeted bits of a register. #[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Hash)] pub struct RegisterValueFilter where V: Numeric, { /// Filter to be used when writing the value bits. pub filter: V, /// Value to be applied. pub value: V, } impl RegisterValueFilter where V: Numeric + Debug, { /// Applies filter to the value #[inline] pub fn apply(&self, value: V) -> V { (value & !self.filter) | self.value } } impl Serialize for RegisterValueFilter where V: Numeric + Debug, { /// Serialize combination of value and filter into a single tri state string fn serialize(&self, serializer: S) -> Result where S: Serializer, { let mut bitmap_str = Vec::with_capacity(V::BITS as usize + 2); bitmap_str.push(b'0'); bitmap_str.push(b'b'); for i in (0..V::BITS).rev() { match self.filter.bit(i) { true => { let val = self.value.bit(i); bitmap_str.push(b'0' + u8::from(val)); } false => bitmap_str.push(b'x'), } } // # Safety: // We know that bitmap_str contains only ASCII characters let s = unsafe { std::str::from_utf8_unchecked(&bitmap_str) }; serializer.serialize_str(s) } } impl<'de, V> Deserialize<'de> for RegisterValueFilter where V: Numeric + Debug, { /// Deserialize a composite bitmap string into a value pair /// input string: "010x" /// result: { /// filter: 1110 /// value: 0100 /// } fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let original_str = ::deserialize(deserializer)?; let stripped_str = original_str.strip_prefix("0b").unwrap_or(&original_str); let (mut filter, mut value) = (V::zero(), V::zero()); let mut i = 0; for s in stripped_str.as_bytes().iter().rev() { if V::BITS == i { return Err(D::Error::custom(format!( "Failed to parse string [{}] as a bitmap - string is too long", original_str ))); } match s { b'_' => continue, b'x' => {} b'0' => { filter |= V::one() << i; } b'1' => { filter |= V::one() << i; value |= V::one() << i; } c => { return Err(D::Error::custom(format!( "Failed to parse string [{}] as a bitmap - unknown character: {}", original_str, c ))); } } i += 1; } Ok(RegisterValueFilter { filter, value }) } } /// Trait for numeric types pub trait Numeric: Sized + Copy + PartialEq + std::fmt::Binary + std::ops::Not + std::ops::BitAnd + std::ops::BitOr + std::ops::BitOrAssign + std::ops::BitXor + std::ops::Shl + std::ops::AddAssign { /// Number of bits for type const BITS: u32; /// Value of bit at pos fn bit(&self, pos: u32) -> bool; /// Returns 0 of the type fn zero() -> Self; /// Returns 1 of the type fn one() -> Self; } macro_rules! impl_numeric { ($type:tt) => { impl Numeric for $type { const BITS: u32 = $type::BITS; fn bit(&self, pos: u32) -> bool { (self & (Self::one() << pos)) != 0 } fn zero() -> Self { 0 } fn one() -> Self { 1 } } }; } impl_numeric!(u8); impl_numeric!(u16); impl_numeric!(u32); impl_numeric!(u64); impl_numeric!(u128); #[cfg(test)] mod tests { use super::*; #[test] fn test_kvm_capability_serde() { let kvm_cap = KvmCapability::Add(69); let expected_str = "\"69\""; let serialized = serde_json::to_string(&kvm_cap).unwrap(); assert_eq!(&serialized, expected_str); let kvm_cap = KvmCapability::Remove(69); let expected_str = "\"!69\""; let serialized = serde_json::to_string(&kvm_cap).unwrap(); assert_eq!(&serialized, expected_str); let serialized = "\"69\""; let deserialized: KvmCapability = serde_json::from_str(serialized).unwrap(); assert_eq!(deserialized, KvmCapability::Add(69)); let serialized = "\"!69\""; let deserialized: KvmCapability = serde_json::from_str(serialized).unwrap(); assert_eq!(deserialized, KvmCapability::Remove(69)); } #[test] fn test_register_value_filter_serde() { let rvf = RegisterValueFilter:: { value: 0b01010101, filter: 0b11110000, }; let expected_str = "\"0b0101xxxx\""; let serialized = serde_json::to_string(&rvf).unwrap(); assert_eq!(&serialized, expected_str); let expected_rvf = RegisterValueFilter:: { value: 0b01010000, filter: 0b11110000, }; let deserialized: RegisterValueFilter = serde_json::from_str(&serialized).unwrap(); assert_eq!(deserialized, expected_rvf); let serialized = "\"0b0_101_xx_xx\""; let deserialized: RegisterValueFilter = serde_json::from_str(serialized).unwrap(); assert_eq!(deserialized, expected_rvf); let serialized = "\"0b0_xϽ1_xx_xx\""; let deserialized: Result, _> = serde_json::from_str(serialized); deserialized.unwrap_err(); let serialized = "\"0b0000_0000_0\""; let deserialized: Result, _> = serde_json::from_str(serialized); deserialized.unwrap_err(); } } ================================================ FILE: src/vmm/src/cpu_config/templates_serde.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; use serde::de::Error as SerdeError; use serde::{Deserialize, Deserializer, Serializer}; /// Serializes number to hex pub fn serialize_to_hex_str(number: &N, serializer: S) -> Result where S: Serializer, N: std::fmt::LowerHex + Debug, { serializer.serialize_str(format!("{:#x}", number).as_str()) } macro_rules! deserialize_from_str { ($name:ident, $type:tt) => { /// Deserializes number from string. /// Number can be in binary, hex or dec formats. pub fn $name<'de, D>(deserializer: D) -> Result<$type, D::Error> where D: Deserializer<'de>, { let number_str = String::deserialize(deserializer)?; let deserialized_number = if let Some(s) = number_str.strip_prefix("0b") { $type::from_str_radix(s, 2) } else if let Some(s) = number_str.strip_prefix("0x") { $type::from_str_radix(s, 16) } else { return Err(D::Error::custom(format!( "No supported number system prefix found in value [{}]. Make sure to prefix \ the number with '0x' for hexadecimal numbers or '0b' for binary numbers.", number_str, ))); } .map_err(|err| { D::Error::custom(format!( "Failed to parse string [{}] as a number for CPU template - {:?}", number_str, err )) })?; Ok(deserialized_number) } }; } deserialize_from_str!(deserialize_from_str_u32, u32); deserialize_from_str!(deserialize_from_str_u64, u64); #[cfg(test)] mod tests { use serde::de::IntoDeserializer; use serde::de::value::{Error, StrDeserializer}; use super::*; #[test] fn test_deserialize_from_str() { let valid_string = "0b1000101"; let deserializer: StrDeserializer = valid_string.into_deserializer(); let valid_value = deserialize_from_str_u32(deserializer); assert_eq!(valid_value.unwrap(), 69); let valid_string = "0x0045"; let deserializer: StrDeserializer = valid_string.into_deserializer(); let valid_value = deserialize_from_str_u32(deserializer); assert_eq!(valid_value.unwrap(), 69); let invalid_string = "xϽ69"; let deserializer: StrDeserializer = invalid_string.into_deserializer(); let invalid_value = deserialize_from_str_u32(deserializer); invalid_value.unwrap_err(); let invalid_string = "69"; let deserializer: StrDeserializer = invalid_string.into_deserializer(); let invalid_value = deserialize_from_str_u32(deserializer); invalid_value.unwrap_err(); } } ================================================ FILE: src/vmm/src/cpu_config/test_utils.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::path::PathBuf; use crate::cpu_config::templates::CustomCpuTemplate; /// Get a static CPU template stored as a JSON file. pub fn get_json_template(filename: &str) -> CustomCpuTemplate { let json_path = [ env!("CARGO_MANIFEST_DIR"), "../../tests/data/custom_cpu_templates", filename, ] .iter() .collect::(); serde_json::from_str(&std::fs::read_to_string(json_path).unwrap()).unwrap() } ================================================ FILE: src/vmm/src/cpu_config/x86_64/cpuid/amd/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow(clippy::similar_names, clippy::unreadable_literal)] use super::{CpuidEntry, CpuidKey, CpuidRegisters, CpuidTrait, KvmCpuidFlags}; /// CPUID normalize implementation. mod normalize; pub use normalize::{ ExtendedApicIdError, ExtendedCacheTopologyError, FeatureEntryError, NormalizeCpuidError, }; /// A structure matching the AMD CPUID specification as described in /// [AMD64 Architecture Programmer’s Manual Volume 3: General-Purpose and System Instructions](https://www.amd.com/system/files/TechDocs/24594.pdf) /// . #[allow(clippy::module_name_repetitions)] #[derive(Debug, Clone, Eq, PartialEq)] pub struct AmdCpuid(pub std::collections::BTreeMap); impl CpuidTrait for AmdCpuid { /// Gets a given sub-leaf. #[inline] fn get(&self, key: &CpuidKey) -> Option<&CpuidEntry> { self.0.get(key) } /// Gets a given sub-leaf. #[inline] fn get_mut(&mut self, key: &CpuidKey) -> Option<&mut CpuidEntry> { self.0.get_mut(key) } } impl From for AmdCpuid { #[inline] fn from(kvm_cpuid: kvm_bindings::CpuId) -> Self { let map = kvm_cpuid .as_slice() .iter() .map(|entry| { ( CpuidKey { leaf: entry.function, subleaf: entry.index, }, CpuidEntry { flags: KvmCpuidFlags(entry.flags), result: CpuidRegisters { eax: entry.eax, ebx: entry.ebx, ecx: entry.ecx, edx: entry.edx, }, }, ) }) .collect(); Self(map) } } #[cfg(test)] mod tests { use super::*; #[test] fn get() { let cpuid = AmdCpuid(std::collections::BTreeMap::new()); assert_eq!( cpuid.get(&CpuidKey { leaf: 0, subleaf: 0 }), None ); } #[test] fn get_mut() { let mut cpuid = AmdCpuid(std::collections::BTreeMap::new()); assert_eq!( cpuid.get_mut(&CpuidKey { leaf: 0, subleaf: 0 }), None ); } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/cpuid/amd/normalize.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use crate::cpu_config::x86_64::cpuid::common::{GetCpuidError, get_vendor_id_from_host}; use crate::cpu_config::x86_64::cpuid::normalize::{ CheckedAssignError, get_range, set_bit, set_range, }; use crate::cpu_config::x86_64::cpuid::{ BRAND_STRING_LENGTH, CpuidEntry, CpuidKey, CpuidRegisters, CpuidTrait, KvmCpuidFlags, MissingBrandStringLeaves, VENDOR_ID_AMD, cpuid, cpuid_count, }; /// Error type for [`super::AmdCpuid::normalize`]. #[allow(clippy::module_name_repetitions)] #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum NormalizeCpuidError { /// Provided `cpu_bits` is >=8: {0}. CpuBits(u8), /// Failed to passthrough cache topology: {0} PassthroughCacheTopology(#[from] PassthroughCacheTopologyError), /// Missing leaf 0x7 / subleaf 0. MissingLeaf0x7Subleaf0, /// Missing leaf 0x80000000. MissingLeaf0x80000000, /// Missing leaf 0x80000001. MissingLeaf0x80000001, /// Failed to set feature entry leaf: {0} FeatureEntry(#[from] FeatureEntryError), /// Failed to set extended cache topology leaf: {0} ExtendedCacheTopology(#[from] ExtendedCacheTopologyError), /// Failed to set extended APIC ID leaf: {0} ExtendedApicId(#[from] ExtendedApicIdError), /// Failed to set brand string: {0} BrandString(MissingBrandStringLeaves), } /// Error type for setting cache topology section of [`super::AmdCpuid::normalize`]. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum PassthroughCacheTopologyError { /// Failed to get the host vendor id: {0} NoVendorId(GetCpuidError), /// The host vendor id does not match AMD. BadVendorId, } /// Error type for setting leaf 0x80000008 section of [`super::AmdCpuid::normalize`]. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum FeatureEntryError { /// Missing leaf 0x80000008. MissingLeaf0x80000008, /// Failed to set number of physical threads (CPUID.80000008H:ECX[7:0]): {0} NumberOfPhysicalThreads(CheckedAssignError), /// Failed to set number of physical threads (CPUID.80000008H:ECX[7:0]) due to overflow. NumberOfPhysicalThreadsOverflow, } /// Error type for setting leaf 0x8000001d section of [`super::AmdCpuid::normalize`]. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum ExtendedCacheTopologyError { /// Missing leaf 0x8000001d. MissingLeaf0x8000001d, #[rustfmt::skip] /// Failed to set number of logical processors sharing cache(CPUID.(EAX=8000001DH,ECX={0}):EAX[25:14]): {1} NumSharingCache(u32, CheckedAssignError), #[rustfmt::skip] /// Failed to set number of logical processors sharing cache (CPUID.(EAX=8000001DH,ECX={0}):EAX[25:14]) due to overflow. NumSharingCacheOverflow(u32), } /// Error type for setting leaf 0x8000001e section of [`super::AmdCpuid::normalize`]. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum ExtendedApicIdError { /// Failed to set compute unit ID (CPUID.8000001EH:EBX[7:0]): {0} ComputeUnitId(CheckedAssignError), /// Failed to set extended APIC ID (CPUID.8000001EH:EAX[31:0]): {0} ExtendedApicId(CheckedAssignError), /// Missing leaf 0x8000001e. MissingLeaf0x8000001e, /// Failed to set threads per core unit (CPUID:8000001EH:EBX[15:8]): {0} ThreadPerComputeUnit(CheckedAssignError), } // We use this 2nd implementation so we can conveniently define functions only used within // `normalize`. #[allow(clippy::multiple_inherent_impl)] impl super::AmdCpuid { /// We always use this brand string. const DEFAULT_BRAND_STRING: &'static [u8; BRAND_STRING_LENGTH] = b"AMD EPYC\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; /// Applies required modifications to CPUID respective of a vCPU. /// /// # Errors /// /// When attempting to access missing leaves or set fields within leaves to values that don't /// fit. #[inline] pub fn normalize( &mut self, // The index of the current logical CPU in the range [0..cpu_count]. cpu_index: u8, // The total number of logical CPUs. cpu_count: u8, // The number of logical CPUs per core. cpus_per_core: u8, ) -> Result<(), NormalizeCpuidError> { self.passthrough_cache_topology()?; self.update_structured_extended_entry()?; self.update_extended_feature_fn_entry()?; self.update_amd_feature_entry(cpu_count)?; self.update_extended_cache_topology_entry(cpu_count, cpus_per_core)?; self.update_extended_apic_id_entry(cpu_index, cpus_per_core)?; self.update_brand_string_entry()?; Ok(()) } /// Passthrough cache topology. /// /// # Errors /// /// This function passes through leaves from the host CPUID, if this does not match the AMD /// specification it is possible to enter an indefinite loop. To avoid this, this will return an /// error when the host CPUID vendor id does not match the AMD CPUID vendor id. fn passthrough_cache_topology(&mut self) -> Result<(), PassthroughCacheTopologyError> { if get_vendor_id_from_host().map_err(PassthroughCacheTopologyError::NoVendorId)? != *VENDOR_ID_AMD { return Err(PassthroughCacheTopologyError::BadVendorId); } // Pass-through host CPUID for leaves 0x8000001e and 0x8000001d. { // 0x8000001e - Processor Topology Information self.0.insert( CpuidKey::leaf(0x8000001e), CpuidEntry { flags: KvmCpuidFlags::EMPTY, result: CpuidRegisters::from(cpuid(0x8000001e)), }, ); // 0x8000001d - Cache Topology Information for subleaf in 0.. { let result = CpuidRegisters::from(cpuid_count(0x8000001d, subleaf)); // From 'AMD64 Architecture Programmer’s Manual Volume 3: General-Purpose and System // Instructions': // // > To gather information for all cache levels, software must repeatedly execute // > CPUID with 8000_001Dh in EAX and ECX set to increasing values beginning with 0 // > until a value of 00h is returned in the field CacheType (EAX[4:0]) indicating // > no more cache descriptions are available for this processor. If CPUID // > Fn8000_0001_ECX[TopologyExtensions] = 0, then CPUID Fn8000_001Dh is reserved. // // On non-AMD hosts this condition may never be true thus this loop may be // indefinite. // CPUID Fn8000_0001D_EAX_x[4:0] (Field Name: CacheType) // Cache type. Identifies the type of cache. // ```text // Bits Description // 00h Null; no more caches. // 01h Data cache // 02h Instruction cache // 03h Unified cache // 1Fh-04h Reserved. // ``` let cache_type = result.eax & 15; if cache_type == 0 { break; } self.0.insert( CpuidKey::subleaf(0x8000001d, subleaf), CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, result, }, ); } } Ok(()) } /// Updated extended feature fn entry. fn update_extended_feature_fn_entry(&mut self) -> Result<(), NormalizeCpuidError> { // set the Topology Extension bit since we use the Extended Cache Topology leaf let leaf_80000001 = self .get_mut(&CpuidKey::leaf(0x80000001)) .ok_or(NormalizeCpuidError::MissingLeaf0x80000001)?; // CPUID Fn8000_0001_ECX[22] (Field Name: TopologyExtensions) // Topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID // Fn8000_001E_EDX. set_bit(&mut leaf_80000001.result.ecx, 22, true); Ok(()) } // Update structured extended feature entry. fn update_structured_extended_entry(&mut self) -> Result<(), NormalizeCpuidError> { let leaf_7_subleaf_0 = self .get_mut(&CpuidKey::subleaf(0x7, 0x0)) .ok_or(NormalizeCpuidError::MissingLeaf0x7Subleaf0)?; // According to AMD64 Architecture Programmer’s Manual, IA32_ARCH_CAPABILITIES MSR is not // available on AMD. The availability of IA32_ARCH_CAPABILITIES MSR is controlled via // CPUID.07H(ECX=0):EDX[bit 29]. KVM sets this bit no matter what but this feature is not // supported by hardware. set_bit(&mut leaf_7_subleaf_0.result.edx, 29, false); Ok(()) } /// Update AMD feature entry. #[allow(clippy::unwrap_used, clippy::unwrap_in_result)] fn update_amd_feature_entry(&mut self, cpu_count: u8) -> Result<(), FeatureEntryError> { /// This value allows at most 64 logical threads within a package. const THREAD_ID_MAX_SIZE: u32 = 7; // We don't support more then 128 threads right now. // It's safe to put them all on the same processor. let leaf_80000008 = self .get_mut(&CpuidKey::leaf(0x80000008)) .ok_or(FeatureEntryError::MissingLeaf0x80000008)?; // CPUID Fn8000_0008_ECX[15:12] (Field Name: ApicIdSize) // APIC ID size. The number of bits in the initial APIC20[ApicId] value that indicate // logical processor ID within a package. The size of this field determines the // maximum number of logical processors (MNLP) that the package could // theoretically support, and not the actual number of logical processors that are // implemented or enabled in the package, as indicated by CPUID // Fn8000_0008_ECX[NC]. A value of zero indicates that legacy methods must be // used to determine the maximum number of logical processors, as indicated by // CPUID Fn8000_0008_ECX[NC]. set_range(&mut leaf_80000008.result.ecx, 12..=15, THREAD_ID_MAX_SIZE).unwrap(); // CPUID Fn8000_0008_ECX[7:0] (Field Name: NC) // Number of physical threads - 1. The number of threads in the processor is NT+1 // (e.g., if NT = 0, then there is one thread). See “Legacy Method” on page 633. let sub = cpu_count .checked_sub(1) .ok_or(FeatureEntryError::NumberOfPhysicalThreadsOverflow)?; set_range(&mut leaf_80000008.result.ecx, 0..=7, u32::from(sub)) .map_err(FeatureEntryError::NumberOfPhysicalThreads)?; Ok(()) } /// Update extended cache topology entry. #[allow(clippy::unwrap_in_result, clippy::unwrap_used)] fn update_extended_cache_topology_entry( &mut self, cpu_count: u8, cpus_per_core: u8, ) -> Result<(), ExtendedCacheTopologyError> { for i in 0.. { if let Some(subleaf) = self.get_mut(&CpuidKey::subleaf(0x8000001d, i)) { // CPUID Fn8000_001D_EAX_x[7:5] (Field Name: CacheLevel) // Cache level. Identifies the level of this cache. Note that the enumeration value // is not necessarily equal to the cache level. // ```text // Bits Description // 000b Reserved. // 001b Level 1 // 010b Level 2 // 011b Level 3 // 111b-100b Reserved. // ``` let cache_level = get_range(subleaf.result.eax, 5..=7); // CPUID Fn8000_001D_EAX_x[25:14] (Field Name: NumSharingCache) // Specifies the number of logical processors sharing the cache enumerated by N, // the value passed to the instruction in ECX. The number of logical processors // sharing this cache is the value of this field incremented by 1. To determine // which logical processors are sharing a cache, determine a Share // Id for each processor as follows: // // ShareId = LocalApicId >> log2(NumSharingCache+1) // // Logical processors with the same ShareId then share a cache. If // NumSharingCache+1 is not a power of two, round it up to the next power of two. match cache_level { // L1 & L2 Cache // The L1 & L2 cache is shared by at most 2 hyper-threads 1 | 2 => { // SAFETY: We know `cpus_per_core > 0` therefore this is always safe. let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); set_range(&mut subleaf.result.eax, 14..=25, sub) .map_err(|err| ExtendedCacheTopologyError::NumSharingCache(i, err))?; } // L3 Cache // The L3 cache is shared among all the logical threads 3 => { let sub = cpu_count .checked_sub(1) .ok_or(ExtendedCacheTopologyError::NumSharingCacheOverflow(i))?; set_range(&mut subleaf.result.eax, 14..=25, u32::from(sub)) .map_err(|err| ExtendedCacheTopologyError::NumSharingCache(i, err))?; } _ => (), } } else { break; } } Ok(()) } /// Update extended apic id entry #[allow(clippy::unwrap_used, clippy::unwrap_in_result)] fn update_extended_apic_id_entry( &mut self, cpu_index: u8, cpus_per_core: u8, ) -> Result<(), ExtendedApicIdError> { /// 1 node per processor. const NODES_PER_PROCESSOR: u32 = 0; // When hyper-threading is enabled each pair of 2 consecutive logical CPUs // will have the same core id since they represent 2 threads in the same core. // For Example: // logical CPU 0 -> core id: 0 // logical CPU 1 -> core id: 0 // logical CPU 2 -> core id: 1 // logical CPU 3 -> core id: 1 // // SAFETY: We know `cpus_per_core != 0` therefore this is always safe. let core_id = u32::from(cpu_index.checked_div(cpus_per_core).unwrap()); let leaf_8000001e = self .get_mut(&CpuidKey::leaf(0x8000001e)) .ok_or(ExtendedApicIdError::MissingLeaf0x8000001e)?; // CPUID Fn8000_001E_EAX[31:0] (Field Name: ExtendedApicId) // Extended APIC ID. If MSR0000_001B[ApicEn] = 0, this field is reserved. set_range(&mut leaf_8000001e.result.eax, 0..=31, u32::from(cpu_index)) .map_err(ExtendedApicIdError::ExtendedApicId)?; // CPUID Fn8000_001E_EBX[7:0] (Field Name: ComputeUnitId) // Compute unit ID. Identifies a Compute Unit, which may be one or more physical cores that // each implement one or more logical processors. set_range(&mut leaf_8000001e.result.ebx, 0..=7, core_id) .map_err(ExtendedApicIdError::ComputeUnitId)?; // CPUID Fn8000_001E_EBX[15:8] (Field Name: ThreadsPerComputeUnit) // Threads per compute unit (zero-based count). The actual number of threads // per compute unit is the value of this field + 1. To determine which logical // processors (threads) belong to a given Compute Unit, determine a ShareId // for each processor as follows: // // ShareId = LocalApicId >> log2(ThreadsPerComputeUnit+1) // // Logical processors with the same ShareId then belong to the same Compute // Unit. (If ThreadsPerComputeUnit+1 is not a power of two, round it up to the // next power of two). // // SAFETY: We know `cpus_per_core > 0` therefore this is always safe. let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); set_range(&mut leaf_8000001e.result.ebx, 8..=15, sub) .map_err(ExtendedApicIdError::ThreadPerComputeUnit)?; // CPUID Fn8000_001E_ECX[10:8] (Field Name: NodesPerProcessor) // Specifies the number of nodes in the package/socket in which this logical // processor resides. Node in this context corresponds to a processor die. // Encoding is N-1, where N is the number of nodes present in the socket. // // SAFETY: We know the value always fits within the range and thus is always safe. // Set nodes per processor. set_range(&mut leaf_8000001e.result.ecx, 8..=10, NODES_PER_PROCESSOR).unwrap(); // CPUID Fn8000_001E_ECX[7:0] (Field Name: NodeId) // Specifies the ID of the node containing the current logical processor. NodeId // values are unique across the system. // // Put all the cpus in the same node. set_range(&mut leaf_8000001e.result.ecx, 0..=7, 0).unwrap(); Ok(()) } /// Update brand string entry fn update_brand_string_entry(&mut self) -> Result<(), NormalizeCpuidError> { self.apply_brand_string(Self::DEFAULT_BRAND_STRING) .map_err(NormalizeCpuidError::BrandString)?; Ok(()) } } #[cfg(test)] mod tests { use std::collections::BTreeMap; use super::*; use crate::cpu_config::x86_64::cpuid::AmdCpuid; #[test] fn test_update_structured_extended_entry_invalid() { // `update_structured_extended_entry()` should exit with MissingLeaf0x7Subleaf0 error for // CPUID lacking leaf 0x7 / subleaf 0. let mut cpuid = AmdCpuid(BTreeMap::new()); assert_eq!( cpuid.update_structured_extended_entry().unwrap_err(), NormalizeCpuidError::MissingLeaf0x7Subleaf0 ); } #[test] fn test_update_structured_extended_entry_valid() { // `update_structured_extended_entry()` should succeed for CPUID having leaf 0x7 / subleaf // 0, and bit 29 of EDX (IA32_ARCH_CAPABILITIES MSR enumeration) should be disabled. let mut cpuid = AmdCpuid(BTreeMap::from([( CpuidKey { leaf: 0x7, subleaf: 0x0, }, CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, result: CpuidRegisters { eax: 0, ebx: 0, ecx: 0, edx: u32::MAX, }, }, )])); cpuid.update_structured_extended_entry().unwrap(); assert_eq!( cpuid .get(&CpuidKey { leaf: 0x7, subleaf: 0x0 }) .unwrap() .result .edx & (1 << 29), 0 ); } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/cpuid/common.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow(clippy::restriction)] use crate::arch::x86_64::generated::msr_index::{ MSR_IA32_BNDCFGS, MSR_IA32_CR_PAT, MSR_MTRRdefType, MSR_MTRRfix4K_C0000, MSR_MTRRfix4K_C8000, MSR_MTRRfix4K_D0000, MSR_MTRRfix4K_D8000, MSR_MTRRfix4K_E0000, MSR_MTRRfix4K_E8000, MSR_MTRRfix4K_F0000, MSR_MTRRfix4K_F8000, MSR_MTRRfix16K_80000, MSR_MTRRfix16K_A0000, MSR_MTRRfix64K_00000, }; /// Error type for [`get_cpuid`]. #[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] pub enum GetCpuidError { /// Un-supported leaf: {0} UnsupportedLeaf(u32), /// Invalid subleaf: {0} InvalidSubleaf(u32), } /// Extract entry from the cpuid. /// /// # Errors /// /// - When the given `leaf` is more than `max_leaf` supported by CPUID. /// - When the CPUID leaf `sub-leaf` is invalid (all its register equal 0). pub fn get_cpuid(leaf: u32, subleaf: u32) -> Result { let max_leaf = // JUSTIFICATION: There is no safe alternative. // SAFETY: This is safe because the host supports the `cpuid` instruction unsafe { std::arch::x86_64::__get_cpuid_max(leaf & 0x8000_0000).0 }; if leaf > max_leaf { return Err(GetCpuidError::UnsupportedLeaf(leaf)); } let entry = crate::cpu_config::x86_64::cpuid::cpuid_count(leaf, subleaf); if entry.eax == 0 && entry.ebx == 0 && entry.ecx == 0 && entry.edx == 0 { return Err(GetCpuidError::InvalidSubleaf(subleaf)); } Ok(entry) } /// Extracts the CPU vendor id from leaf 0x0. /// /// # Errors /// /// When CPUID leaf 0 is not supported. pub fn get_vendor_id_from_host() -> Result<[u8; 12], GetCpuidError> { // JUSTIFICATION: There is no safe alternative. // SAFETY: Always safe. get_cpuid(0, 0).map(|vendor_entry| unsafe { // The ordering of the vendor string is ebx,edx,ecx this is not a mistake. std::mem::transmute::<[u32; 3], [u8; 12]>([ vendor_entry.ebx, vendor_entry.edx, vendor_entry.ecx, ]) }) } /// Returns MSRs to be saved based on CPUID features that are enabled. pub(crate) fn msrs_to_save_by_cpuid(cpuid: &kvm_bindings::CpuId) -> Vec { /// Memory Protection Extensions const MPX_BITINDEX: u32 = 14; /// Memory Type Range Registers const MTRR_BITINDEX: u32 = 12; /// Memory Check Exception const MCE_BITINDEX: u32 = 7; /// Scans through the CPUID and determines if a feature bit is set. // TODO: This currently involves a linear search which would be improved // when we'll refactor the cpuid crate. macro_rules! cpuid_is_feature_set { ($cpuid:ident, $leaf:expr, $index:expr, $reg:tt, $feature_bit:expr) => {{ let mut res = false; for entry in $cpuid.as_slice().iter() { if entry.function == $leaf && entry.index == $index { if entry.$reg & (1 << $feature_bit) != 0 { res = true; break; } } } res }}; } let mut msrs = Vec::new(); // Macro used for easy definition of CPUID-MSR dependencies. macro_rules! cpuid_msr_dep { ($leaf:expr, $index:expr, $reg:tt, $feature_bit:expr, $msr:expr) => { if cpuid_is_feature_set!(cpuid, $leaf, $index, $reg, $feature_bit) { msrs.extend($msr) } }; } // TODO: Add more dependencies. cpuid_msr_dep!(0x7, 0, ebx, MPX_BITINDEX, [MSR_IA32_BNDCFGS]); // IA32_MTRR_PHYSBASEn, IA32_MTRR_PHYSMASKn cpuid_msr_dep!(0x1, 0, edx, MTRR_BITINDEX, 0x200..0x210); // Other MTRR MSRs cpuid_msr_dep!( 0x1, 0, edx, MTRR_BITINDEX, [ MSR_MTRRfix64K_00000, MSR_MTRRfix16K_80000, MSR_MTRRfix16K_A0000, MSR_MTRRfix4K_C0000, MSR_MTRRfix4K_C8000, MSR_MTRRfix4K_D0000, MSR_MTRRfix4K_D8000, MSR_MTRRfix4K_E0000, MSR_MTRRfix4K_E8000, MSR_MTRRfix4K_F0000, MSR_MTRRfix4K_F8000, MSR_IA32_CR_PAT, MSR_MTRRdefType, ] ); // MCE MSRs // We are saving 32 MCE banks here as this is the maximum number supported by KVM // and configured by default. // The physical number of the MCE banks depends on the CPU. // The number of emulated MCE banks can be configured via KVM_X86_SETUP_MCE. cpuid_msr_dep!(0x1, 0, edx, MCE_BITINDEX, 0x400..0x480); msrs } #[cfg(test)] mod tests { use super::*; #[test] fn get_cpuid_unsupported_leaf() { let max_leaf = // JUSTIFICATION: There is no safe alternative. // SAFETY: This is safe because the host supports the `cpuid` instruction unsafe { std::arch::x86_64::__get_cpuid_max(0).0 }; let max_leaf_plus_one = max_leaf + 1; assert_eq!( get_cpuid(max_leaf_plus_one, 0), Err(GetCpuidError::UnsupportedLeaf(max_leaf_plus_one)) ); } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/cpuid/intel/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow( clippy::similar_names, clippy::module_name_repetitions, clippy::unreadable_literal, clippy::unsafe_derive_deserialize )] /// CPUID normalize implementation. mod normalize; pub use normalize::{DeterministicCacheError, NormalizeCpuidError}; use super::{CpuidEntry, CpuidKey, CpuidRegisters, CpuidTrait, KvmCpuidFlags}; /// A structure matching the Intel CPUID specification as described in /// [Intel® 64 and IA-32 Architectures Software Developer's Manual Combined Volumes 2A, 2B, 2C, and 2D: Instruction Set Reference, A-Z](https://cdrdv2.intel.com/v1/dl/getContent/671110) /// . #[derive(Debug, Clone, Eq, PartialEq)] pub struct IntelCpuid(pub std::collections::BTreeMap); impl CpuidTrait for IntelCpuid { /// Gets a given sub-leaf. #[inline] fn get(&self, key: &CpuidKey) -> Option<&CpuidEntry> { self.0.get(key) } /// Gets a given sub-leaf. #[inline] fn get_mut(&mut self, key: &CpuidKey) -> Option<&mut CpuidEntry> { self.0.get_mut(key) } } impl From for IntelCpuid { #[inline] fn from(kvm_cpuid: kvm_bindings::CpuId) -> Self { let map = kvm_cpuid .as_slice() .iter() .map(|entry| { ( CpuidKey { leaf: entry.function, subleaf: entry.index, }, CpuidEntry { flags: KvmCpuidFlags(entry.flags), result: CpuidRegisters { eax: entry.eax, ebx: entry.ebx, ecx: entry.ecx, edx: entry.edx, }, }, ) }) .collect(); Self(map) } } #[cfg(test)] mod tests { use super::*; #[test] fn get() { let cpuid = IntelCpuid(std::collections::BTreeMap::new()); assert_eq!( cpuid.get(&CpuidKey { leaf: 0, subleaf: 0 }), None ); } #[test] fn get_mut() { let mut cpuid = IntelCpuid(std::collections::BTreeMap::new()); assert_eq!( cpuid.get_mut(&CpuidKey { leaf: 0, subleaf: 0 }), None ); } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/cpuid/intel/normalize.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use crate::cpu_config::x86_64::cpuid::normalize::{ CheckedAssignError, get_range, set_bit, set_range, }; use crate::cpu_config::x86_64::cpuid::{ BRAND_STRING_LENGTH, CpuidKey, CpuidRegisters, CpuidTrait, MissingBrandStringLeaves, host_brand_string, }; /// Error type for [`super::IntelCpuid::normalize`]. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum NormalizeCpuidError { /// Failed to set deterministic cache leaf: {0} DeterministicCache(#[from] DeterministicCacheError), /// Leaf 0x6 is missing from CPUID. MissingLeaf6, /// Leaf 0x7 / subleaf 0 is missing from CPUID. MissingLeaf7, /// Leaf 0xA is missing from CPUID. MissingLeafA, /// Failed to get brand string: {0} GetBrandString(DefaultBrandStringError), /// Failed to set brand string: {0} ApplyBrandString(MissingBrandStringLeaves), } /// Error type for setting leaf 4 section of [`super::IntelCpuid::normalize`]. // `displaydoc::Display` does not support multi-line comments, `rustfmt` will format these comments // across multiple lines, so we skip formatting here. This can be removed when // https://github.com/yaahc/displaydoc/issues/44 is resolved. #[rustfmt::skip] #[allow(clippy::enum_variant_names)] #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum DeterministicCacheError { /// Failed to set max addressable core ID in physical package (CPUID.04H:EAX[31:26]): {0}. MaxCorePerPackage(CheckedAssignError), /// Failed to set max addressable core ID in physical package (CPUID.04H:EAX[31:26]) due to underflow in cores. MaxCorePerPackageUnderflow, /// Failed to set max addressable processor ID sharing cache (CPUID.04H:EAX[25:14]): {0}. MaxCpusPerCore(CheckedAssignError), /// Failed to set max addressable processor ID sharing cache (CPUID.04H:EAX[25:14]) due to underflow in cpu count. MaxCpusPerCoreUnderflow, } /// We always use this brand string. pub const DEFAULT_BRAND_STRING: &[u8; BRAND_STRING_LENGTH] = b"Intel(R) Xeon(R) Processor\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; pub const DEFAULT_BRAND_STRING_BASE: &[u8; 28] = b"Intel(R) Xeon(R) Processor @"; // We use this 2nd implementation so we can conveniently define functions only used within // `normalize`. #[allow(clippy::multiple_inherent_impl)] impl super::IntelCpuid { /// Applies required modifications to CPUID respective of a vCPU. /// /// # Errors /// /// When attempting to access missing leaves or set fields within leaves to values that don't /// fit. #[inline] pub fn normalize( &mut self, // The index of the current logical CPU in the range [0..cpu_count]. _cpu_index: u8, // The total number of logical CPUs. cpu_count: u8, // The number of logical CPUs per core. cpus_per_core: u8, ) -> Result<(), NormalizeCpuidError> { self.update_deterministic_cache_entry(cpu_count, cpus_per_core)?; self.update_power_management_entry()?; self.update_extended_feature_flags_entry()?; self.update_performance_monitoring_entry()?; self.update_extended_topology_v2_entry(); self.update_brand_string_entry()?; Ok(()) } /// Update deterministic cache entry #[allow(clippy::unwrap_in_result)] fn update_deterministic_cache_entry( &mut self, cpu_count: u8, cpus_per_core: u8, ) -> Result<(), DeterministicCacheError> { for i in 0.. { if let Some(subleaf) = self.get_mut(&CpuidKey::subleaf(0x4, i)) { // If ECX contains an invalid subleaf, EAX/EBX/ECX/EDX return 0 and the // normalization should not be applied. Exits when it hits such an invalid subleaf. if subleaf.result.eax == 0 && subleaf.result.ebx == 0 && subleaf.result.ecx == 0 && subleaf.result.edx == 0 { break; } // CPUID.04H:EAX[7:5] // Cache Level (Starts at 1) let cache_level = get_range(subleaf.result.eax, 5..=7); // CPUID.04H:EAX[25:14] // Maximum number of addressable IDs for logical processors sharing this cache. // - Add one to the return value to get the result. // - The nearest power-of-2 integer that is not smaller than (1 + EAX[25:14]) is the // number of unique initial APIC IDs reserved for addressing different logical // processors sharing this cache. // We know `cpus_per_core > 0` therefore `cpus_per_core.checked_sub(1).unwrap()` is // always safe. #[allow(clippy::unwrap_used)] match cache_level { // L1 & L2 Cache // The L1 & L2 cache is shared by at most 2 hyperthreads 1 | 2 => { let sub = u32::from(cpus_per_core.checked_sub(1).unwrap()); set_range(&mut subleaf.result.eax, 14..=25, sub) .map_err(DeterministicCacheError::MaxCpusPerCore)?; } // L3 Cache // The L3 cache is shared among all the logical threads 3 => { let sub = u32::from( cpu_count .checked_sub(1) .ok_or(DeterministicCacheError::MaxCpusPerCoreUnderflow)?, ); set_range(&mut subleaf.result.eax, 14..=25, sub) .map_err(DeterministicCacheError::MaxCpusPerCore)?; } _ => (), } // We know `cpus_per_core !=0` therefore this is always safe. #[allow(clippy::unwrap_used)] let cores = cpu_count.checked_div(cpus_per_core).unwrap(); // CPUID.04H:EAX[31:26] // Maximum number of addressable IDs for processor cores in the physical package. // - Add one to the return value to get the result. // - The nearest power-of-2 integer that is not smaller than (1 + EAX[31:26]) is the // number of unique Core_IDs reserved for addressing different processor cores in // a physical package. Core ID is a subset of bits of the initial APIC ID. // - The returned value is constant for valid initial values in ECX. Valid ECX // values start from 0. // Put all the cores in the same socket let sub = u32::from(cores) .checked_sub(1) .ok_or(DeterministicCacheError::MaxCorePerPackageUnderflow)?; set_range(&mut subleaf.result.eax, 26..=31, sub) .map_err(DeterministicCacheError::MaxCorePerPackage)?; } else { break; } } Ok(()) } /// Update power management entry fn update_power_management_entry(&mut self) -> Result<(), NormalizeCpuidError> { let leaf_6 = self .get_mut(&CpuidKey::leaf(0x6)) .ok_or(NormalizeCpuidError::MissingLeaf6)?; // CPUID.06H:EAX[1] // Intel Turbo Boost Technology available (see description of IA32_MISC_ENABLE[38]). set_bit(&mut leaf_6.result.eax, 1, false); // CPUID.06H:ECX[3] // The processor supports performance-energy bias preference if CPUID.06H:ECX.SETBH[bit 3] // is set and it also implies the presence of a new architectural MSR called // IA32_ENERGY_PERF_BIAS (1B0H). // Clear X86 EPB feature. No frequency selection in the hypervisor. set_bit(&mut leaf_6.result.ecx, 3, false); Ok(()) } /// Update structured extended feature flags enumeration leaf fn update_extended_feature_flags_entry(&mut self) -> Result<(), NormalizeCpuidError> { let leaf_7_0 = self .get_mut(&CpuidKey::subleaf(0x7, 0)) .ok_or(NormalizeCpuidError::MissingLeaf7)?; // Set the following bits as recommended in kernel doc. These bits are reserved in AMD. // - CPUID.07H:EBX[6] (FDP_EXCPTN_ONLY) // - CPUID.07H:EBX[13] (Deprecates FPU CS and FPU DS values) // https://lore.kernel.org/all/20220322110712.222449-3-pbonzini@redhat.com/ // https://github.com/torvalds/linux/commit/45016721de3c714902c6f475b705e10ae0bdd801 set_bit(&mut leaf_7_0.result.ebx, 6, true); set_bit(&mut leaf_7_0.result.ebx, 13, true); // CPUID.(EAX=07H,ECX=0):ECX[5] (Mnemonic: WAITPKG) // // WAITPKG indicates support of user wait instructions (UMONITOR, UMWAIT and TPAUSE). // - UMONITOR arms address monitoring hardware that checks for store operations on the // specified address range. // - UMWAIT instructs the processor to enter an implementation-dependent optimized state // (either a light-weight power/performance optimized state (C0.1 idle state) or an // improved power/performance optimized state (C0.2 idle state)) while monitoring the // address range specified in UMONITOR. The instruction wakes up when the time-stamp // counter reaches or exceeds the implicit EDX:EAX 64-bit input value. // - TPAUSE instructs the processor to enter an implementation-dependent optimized state. // The instruction wakes up when the time-stamp counter reaches or exceeds the implict // EDX:EAX 64-bit input value. // // These instructions may be executed at any privilege level. Even when UMWAIT/TPAUSE are // executed within a guest, the *physical* processor enters the requested optimized state. // See Intel SDM vol.3 for more details of the behavior of these instructions in VMX // non-root operation. // // MONITOR/MWAIT instructions are the privileged variant of UMONITOR/UMWAIT and are // unconditionally emulated as NOP by KVM. // https://github.com/torvalds/linux/commit/87c00572ba05aa8c9db118da75c608f47eb10b9e // // When UMONITOR/UMWAIT/TPAUSE were initially introduced, KVM clears the WAITPKG CPUID bit // in KVM_GET_SUPPORTED_CPUID by default, and KVM exposed them to guest only when VMM // explicitly set the bit via KVM_SET_CPUID2 API. // https://github.com/torvalds/linux/commit/e69e72faa3a0709dd23df6a4ca060a15e99168a1 // However, since v5.8, if the processor supports "enable user wait and pause" in Intel VMX, // KVM_GET_SUPPORTED_CPUID sets the bit to 1 to let VMM know that it is available. So if the // returned value is passed to KVM_SET_CPUID2 API as it is, guests are able to execute them. // https://github.com/torvalds/linux/commit/0abcc8f65cc23b65bc8d1614cc64b02b1641ed7c // // Similar to MONITOR/MWAIT, we disable the guest's WAITPKG in order to prevent a guest from // executing those instructions and putting a physical processor to an idle state which may // lead to an overhead of waking it up when scheduling another guest on it. By clearing the // WAITPKG bit in KVM_SET_CPUID2 API, KVM does not set the "enable user wait and pause" bit // (bit 26) of the secondary processor-based VM-execution control, which makes guests get // #UD when attempting to executing those instructions. // // Note that the WAITPKG bit is reserved on AMD. set_bit(&mut leaf_7_0.result.ecx, 5, false); Ok(()) } /// Update performance monitoring entry fn update_performance_monitoring_entry(&mut self) -> Result<(), NormalizeCpuidError> { let leaf_a = self .get_mut(&CpuidKey::leaf(0xA)) .ok_or(NormalizeCpuidError::MissingLeafA)?; leaf_a.result = CpuidRegisters { eax: 0, ebx: 0, ecx: 0, edx: 0, }; Ok(()) } /// Update extended topology v2 entry /// /// CPUID leaf 1FH is a preferred superset to leaf 0xB. Intel recommends using leaf 0x1F when /// available rather than leaf 0xB. /// /// Since we don't use any domains than ones supported in leaf 0xB, we just copy contents of /// leaf 0xB to leaf 0x1F. fn update_extended_topology_v2_entry(&mut self) { // Skip if leaf 0x1F does not exist. if self.get(&CpuidKey::leaf(0x1F)).is_none() { return; } for index in 0.. { if let Some(subleaf) = self.get(&CpuidKey::subleaf(0xB, index)) { self.0 .insert(CpuidKey::subleaf(0x1F, index), subleaf.clone()); } else { break; } } } fn update_brand_string_entry(&mut self) -> Result<(), NormalizeCpuidError> { // Get host brand string. let host_brand_string: [u8; BRAND_STRING_LENGTH] = host_brand_string(); let default_brand_string = default_brand_string(host_brand_string).unwrap_or(*DEFAULT_BRAND_STRING); self.apply_brand_string(&default_brand_string) .map_err(NormalizeCpuidError::ApplyBrandString)?; Ok(()) } } /// Error type for [`default_brand_string`]. #[derive(Debug, Eq, PartialEq, thiserror::Error, displaydoc::Display)] pub enum DefaultBrandStringError { /// Missing frequency: {0:?}. MissingFrequency([u8; BRAND_STRING_LENGTH]), /// Missing space: {0:?}. MissingSpace([u8; BRAND_STRING_LENGTH]), /// Insufficient space in brand string. Overflow, } /// Normalize brand string to a generic Xeon(R) processor, with the actual CPU frequency /// /// # Errors /// /// When unable to parse the host brand string. /// `brand_string.try_into().unwrap()` cannot panic as we know /// `brand_string.len() == BRAND_STRING_LENGTH` /// /// # Panics /// /// Never. // As we pass through host frequency, we require CPUID and thus `cfg(cpuid)`. // TODO: Use `split_array_ref` // (https://github.com/firecracker-microvm/firecracker/issues/3347) #[allow(clippy::indexing_slicing, clippy::arithmetic_side_effects)] #[inline] fn default_brand_string( // Host brand string. // This could look like "Intel(R) Xeon(R) Platinum 8275CL CPU @ 3.00GHz". // or this could look like "Intel(R) Xeon(R) Platinum 8275CL CPU\0\0\0\0\0\0\0\0\0\0". host_brand_string: [u8; BRAND_STRING_LENGTH], ) -> Result<[u8; BRAND_STRING_LENGTH], DefaultBrandStringError> { // The slice of the host string before the frequency suffix // e.g. b"Intel(R) Xeon(R) Processor Platinum 8275CL CPU @ 3.00" and b"GHz" let (before, after) = 'outer: { for i in 0..host_brand_string.len() { // Find position of b"THz" or b"GHz" or b"MHz" if let [b'T' | b'G' | b'M', b'H', b'z', ..] = host_brand_string[i..] { break 'outer Ok(host_brand_string.split_at(i)); } } Err(DefaultBrandStringError::MissingFrequency(host_brand_string)) }?; debug_assert_eq!( before.len().checked_add(after.len()), Some(BRAND_STRING_LENGTH) ); // We iterate from the end until hitting a space, getting the frequency number // e.g. b"Intel(R) Xeon(R) Processor Platinum 8275CL CPU @ " and b"3.00" let (_, frequency) = 'outer: { for i in (0..before.len()).rev() { let c = before[i]; match c { b' ' => break 'outer Ok(before.split_at(i)), b'0'..=b'9' | b'.' => (), _ => break, } } Err(DefaultBrandStringError::MissingSpace(host_brand_string)) }?; debug_assert!(frequency.len() <= before.len()); debug_assert!( matches!(frequency.len().checked_add(after.len()), Some(x) if x <= BRAND_STRING_LENGTH) ); debug_assert!(DEFAULT_BRAND_STRING_BASE.len() <= BRAND_STRING_LENGTH); debug_assert!(BRAND_STRING_LENGTH.checked_mul(2).is_some()); // As `DEFAULT_BRAND_STRING_BASE.len() + frequency.len() + after.len()` is guaranteed // to be less than or equal to `2*BRAND_STRING_LENGTH` and we know // `2*BRAND_STRING_LENGTH <= usize::MAX` since `BRAND_STRING_LENGTH==48`, this is always // safe. let len = DEFAULT_BRAND_STRING_BASE.len() + frequency.len() + after.len(); let brand_string = DEFAULT_BRAND_STRING_BASE .iter() .copied() // Include frequency e.g. "3.00" .chain(frequency.iter().copied()) // Include frequency suffix e.g. "GHz" .chain(after.iter().copied()) // Pad with 0s to `BRAND_STRING_LENGTH` .chain(std::iter::repeat_n( b'\0', BRAND_STRING_LENGTH .checked_sub(len) .ok_or(DefaultBrandStringError::Overflow)?, )) .collect::>(); debug_assert_eq!(brand_string.len(), BRAND_STRING_LENGTH); // Padding ensures `brand_string.len() == BRAND_STRING_LENGTH` thus // `brand_string.try_into().unwrap()` is safe. #[allow(clippy::unwrap_used)] Ok(brand_string.try_into().unwrap()) } #[cfg(test)] mod tests { #![allow( clippy::undocumented_unsafe_blocks, clippy::unwrap_used, clippy::as_conversions )] use std::collections::BTreeMap; use std::ffi::CStr; use super::*; use crate::cpu_config::x86_64::cpuid::{CpuidEntry, IntelCpuid, KvmCpuidFlags}; #[test] fn default_brand_string_test() { let brand_string = b"Intel(R) Xeon(R) Platinum 8275CL CPU @ 3.00GHz\0\0"; let ok_result = default_brand_string(*brand_string); let expected = Ok(*b"Intel(R) Xeon(R) Processor @ 3.00GHz\0\0\0\0\0\0\0\0\0\0\0\0"); assert_eq!(ok_result, expected); } #[test] fn default_brand_string_test_missing_frequency() { let brand_string = b"Intel(R) Xeon(R) Platinum 8275CL CPU @ \0\0\0\0\0\0\0\0\0"; let result = default_brand_string(*brand_string); let expected = Err(DefaultBrandStringError::MissingFrequency(*brand_string)); assert_eq!(result, expected); } #[test] fn default_brand_string_test_missing_space() { let brand_string = b"Intel(R) Xeon(R) Platinum 8275CL CPU @3.00GHz\0\0\0"; let result = default_brand_string(*brand_string); let expected = Err(DefaultBrandStringError::MissingSpace(*brand_string)); assert_eq!(result, expected); } #[test] fn default_brand_string_test_overflow() { let brand_string = b"@ 123456789876543212345678987654321234567898GHz\0"; let result = default_brand_string(*brand_string); assert_eq!( result, Err(DefaultBrandStringError::Overflow), "{:?}", result .as_ref() .map(|s| CStr::from_bytes_until_nul(s).unwrap()), ); } #[test] fn test_update_extended_feature_flags_entry() { let mut cpuid = IntelCpuid(BTreeMap::from([( CpuidKey { leaf: 0x7, subleaf: 0, }, CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, ..Default::default() }, )])); cpuid.update_extended_feature_flags_entry().unwrap(); let leaf_7_0 = cpuid .get(&CpuidKey { leaf: 0x7, subleaf: 0, }) .unwrap(); assert!((leaf_7_0.result.ebx & (1 << 6)) > 0); assert!((leaf_7_0.result.ebx & (1 << 13)) > 0); assert_eq!((leaf_7_0.result.ecx & (1 << 5)), 0); } #[test] fn test_update_extended_topology_v2_entry_no_leaf_0x1f() { let mut cpuid = IntelCpuid(BTreeMap::from([( CpuidKey { leaf: 0xB, subleaf: 0, }, CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, ..Default::default() }, )])); cpuid.update_extended_topology_v2_entry(); assert!( cpuid .get(&CpuidKey { leaf: 0x1F, subleaf: 0, }) .is_none() ); } #[test] fn test_update_extended_topology_v2_entry() { let mut cpuid = IntelCpuid(BTreeMap::from([ ( CpuidKey { leaf: 0xB, subleaf: 0, }, CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, result: CpuidRegisters { eax: 0x1, ebx: 0x2, ecx: 0x3, edx: 0x4, }, }, ), ( CpuidKey { leaf: 0xB, subleaf: 1, }, CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, result: CpuidRegisters { eax: 0xa, ebx: 0xb, ecx: 0xc, edx: 0xd, }, }, ), ( CpuidKey { leaf: 0x1F, subleaf: 0, }, CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, result: CpuidRegisters { eax: 0xFFFFFFFF, ebx: 0xFFFFFFFF, ecx: 0xFFFFFFFF, edx: 0xFFFFFFFF, }, }, ), ])); cpuid.update_extended_topology_v2_entry(); // Check leaf 0x1F, subleaf 0 is updated. let leaf_1f_0 = cpuid .get(&CpuidKey { leaf: 0x1F, subleaf: 0, }) .unwrap(); assert_eq!(leaf_1f_0.result.eax, 0x1); assert_eq!(leaf_1f_0.result.ebx, 0x2); assert_eq!(leaf_1f_0.result.ecx, 0x3); assert_eq!(leaf_1f_0.result.edx, 0x4); // Check lefa 0x1F, subleaf 1 is inserted. let leaf_1f_1 = cpuid .get(&CpuidKey { leaf: 0x1F, subleaf: 1, }) .unwrap(); assert_eq!(leaf_1f_1.result.eax, 0xa); assert_eq!(leaf_1f_1.result.ebx, 0xb); assert_eq!(leaf_1f_1.result.ecx, 0xc); assert_eq!(leaf_1f_1.result.edx, 0xd); } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/cpuid/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. #![warn(clippy::pedantic)] #![allow( clippy::blanket_clippy_restriction_lints, clippy::implicit_return, clippy::pattern_type_mismatch, clippy::std_instead_of_alloc, clippy::std_instead_of_core, clippy::pub_use, clippy::non_ascii_literal, clippy::single_char_lifetime_names, clippy::exhaustive_enums, clippy::exhaustive_structs, clippy::unseparated_literal_suffix, clippy::mod_module_files, clippy::missing_trait_methods )] // Apply CPUID specific lint adjustments. #![allow( clippy::unreadable_literal, clippy::similar_names, clippy::same_name_method, clippy::doc_markdown, clippy::module_name_repetitions )] //! Utility for configuring the CPUID (CPU identification) for the guest microVM. use std::convert::TryFrom; use std::mem::{size_of, transmute}; /// cpuid utility functions. pub mod common; /// AMD CPUID specification handling. pub mod amd; pub use amd::AmdCpuid; /// Intel CPUID specification handling. pub mod intel; pub use intel::IntelCpuid; /// CPUID normalize implementation. mod normalize; pub use normalize::{FeatureInformationError, GetMaxCpusPerPackageError, NormalizeCpuidError}; /// Intel brand string. pub const VENDOR_ID_INTEL: &[u8; 12] = b"GenuineIntel"; /// AMD brand string. pub const VENDOR_ID_AMD: &[u8; 12] = b"AuthenticAMD"; /// Intel brand string. #[allow(clippy::undocumented_unsafe_blocks)] pub const VENDOR_ID_INTEL_STR: &str = unsafe { std::str::from_utf8_unchecked(VENDOR_ID_INTEL) }; /// AMD brand string. #[allow(clippy::undocumented_unsafe_blocks)] pub const VENDOR_ID_AMD_STR: &str = unsafe { std::str::from_utf8_unchecked(VENDOR_ID_AMD) }; /// To store the brand string we have 3 leaves, each with 4 registers, each with 4 bytes. pub const BRAND_STRING_LENGTH: usize = 3 * 4 * 4; /// Mimic of [`std::arch::x86_64::__cpuid`] that wraps [`cpuid_count`]. fn cpuid(leaf: u32) -> std::arch::x86_64::CpuidResult { cpuid_count(leaf, 0) } /// Safe wrapper around [`std::arch::x86_64::__cpuid_count`]. fn cpuid_count(leaf: u32, subleaf: u32) -> std::arch::x86_64::CpuidResult { // JUSTIFICATION: There is no safe alternative. // SAFETY: The `cfg(cpuid)` wrapping the `cpuid` module guarantees `CPUID` is supported. unsafe { std::arch::x86_64::__cpuid_count(leaf, subleaf) } } /// Gets the Intel default brand. // As we pass through host frequency, we require CPUID and thus `cfg(cpuid)`. /// Gets host brand string. /// /// Its stored in-order with bytes flipped in each register e.g.: /// ```text /// "etnI" | ")4(l" | "oeX " | ")R(n" | /// "orP " | "ssec" | "@ ro" | "0.3 " | /// "zHG0" | null | null | null /// ------------------------------------ /// Intel(R) Xeon(R) Processor @ 3.00Ghz /// ``` #[inline] #[must_use] pub fn host_brand_string() -> [u8; BRAND_STRING_LENGTH] { let leaf_a = cpuid(0x80000002); let leaf_b = cpuid(0x80000003); let leaf_c = cpuid(0x80000004); let arr = [ leaf_a.eax, leaf_a.ebx, leaf_a.ecx, leaf_a.edx, leaf_b.eax, leaf_b.ebx, leaf_b.ecx, leaf_b.edx, leaf_c.eax, leaf_c.ebx, leaf_c.ecx, leaf_c.edx, ]; // JUSTIFICATION: There is no safe alternative. // SAFETY: Transmuting `[u32;12]` to `[u8;BRAND_STRING_LENGTH]` (`[u8;48]`) is always safe. unsafe { std::mem::transmute(arr) } } /// Trait defining shared behaviour between CPUID structures. pub trait CpuidTrait { /// Returns the CPUID manufacturers ID (e.g. `GenuineIntel` or `AuthenticAMD`) or `None` if it /// cannot be found in CPUID (e.g. leaf 0x0 is missing). #[inline] #[must_use] fn vendor_id(&self) -> Option<[u8; 12]> { let leaf_0 = self.get(&CpuidKey::leaf(0x0))?; // The ordering of the vendor string is ebx,edx,ecx this is not a mistake. let (ebx, edx, ecx) = ( leaf_0.result.ebx.to_ne_bytes(), leaf_0.result.edx.to_ne_bytes(), leaf_0.result.ecx.to_ne_bytes(), ); let arr: [u8; 12] = [ ebx[0], ebx[1], ebx[2], ebx[3], edx[0], edx[1], edx[2], edx[3], ecx[0], ecx[1], ecx[2], ecx[3], ]; Some(arr) } /// Gets a given sub-leaf. fn get(&self, key: &CpuidKey) -> Option<&CpuidEntry>; /// Gets a given sub-leaf. fn get_mut(&mut self, key: &CpuidKey) -> Option<&mut CpuidEntry>; /// Applies a given brand string to CPUID. /// /// # Errors /// /// When any of the leaves 0x80000002, 0x80000003 or 0x80000004 are not present. #[inline] fn apply_brand_string( &mut self, brand_string: &[u8; BRAND_STRING_LENGTH], ) -> Result<(), MissingBrandStringLeaves> { // 0x80000002 { let leaf: &mut CpuidEntry = self .get_mut(&CpuidKey::leaf(0x80000002)) .ok_or(MissingBrandStringLeaves)?; leaf.result.eax = u32::from_ne_bytes([ brand_string[0], brand_string[1], brand_string[2], brand_string[3], ]); leaf.result.ebx = u32::from_ne_bytes([ brand_string[4], brand_string[5], brand_string[6], brand_string[7], ]); leaf.result.ecx = u32::from_ne_bytes([ brand_string[8], brand_string[9], brand_string[10], brand_string[11], ]); leaf.result.edx = u32::from_ne_bytes([ brand_string[12], brand_string[13], brand_string[14], brand_string[15], ]); } // 0x80000003 { let leaf: &mut CpuidEntry = self .get_mut(&CpuidKey::leaf(0x80000003)) .ok_or(MissingBrandStringLeaves)?; leaf.result.eax = u32::from_ne_bytes([ brand_string[16], brand_string[17], brand_string[18], brand_string[19], ]); leaf.result.ebx = u32::from_ne_bytes([ brand_string[20], brand_string[21], brand_string[22], brand_string[23], ]); leaf.result.ecx = u32::from_ne_bytes([ brand_string[24], brand_string[25], brand_string[26], brand_string[27], ]); leaf.result.edx = u32::from_ne_bytes([ brand_string[28], brand_string[29], brand_string[30], brand_string[31], ]); } // 0x80000004 { let leaf: &mut CpuidEntry = self .get_mut(&CpuidKey::leaf(0x80000004)) .ok_or(MissingBrandStringLeaves)?; leaf.result.eax = u32::from_ne_bytes([ brand_string[32], brand_string[33], brand_string[34], brand_string[35], ]); leaf.result.ebx = u32::from_ne_bytes([ brand_string[36], brand_string[37], brand_string[38], brand_string[39], ]); leaf.result.ecx = u32::from_ne_bytes([ brand_string[40], brand_string[41], brand_string[42], brand_string[43], ]); leaf.result.edx = u32::from_ne_bytes([ brand_string[44], brand_string[45], brand_string[46], brand_string[47], ]); } Ok(()) } } impl CpuidTrait for kvm_bindings::CpuId { /// Gets a given sub-leaf. #[allow(clippy::transmute_ptr_to_ptr, clippy::unwrap_used)] #[inline] fn get(&self, CpuidKey { leaf, subleaf }: &CpuidKey) -> Option<&CpuidEntry> { let entry_opt = self .as_slice() .iter() .find(|entry| entry.function == *leaf && entry.index == *subleaf); entry_opt.map(|entry| { // JUSTIFICATION: There is no safe alternative. // SAFETY: The `kvm_cpuid_entry2` and `CpuidEntry` are `repr(C)` with known sizes. unsafe { let arr: &[u8; size_of::()] = transmute(entry); let arr2: &[u8; size_of::()] = arr[8..28].try_into().unwrap(); transmute::<_, &CpuidEntry>(arr2) } }) } /// Gets a given sub-leaf. #[allow(clippy::transmute_ptr_to_ptr, clippy::unwrap_used)] #[inline] fn get_mut(&mut self, CpuidKey { leaf, subleaf }: &CpuidKey) -> Option<&mut CpuidEntry> { let entry_opt = self .as_mut_slice() .iter_mut() .find(|entry| entry.function == *leaf && entry.index == *subleaf); entry_opt.map(|entry| { // JUSTIFICATION: There is no safe alternative. // SAFETY: The `kvm_cpuid_entry2` and `CpuidEntry` are `repr(C)` with known sizes. unsafe { let arr: &mut [u8; size_of::()] = transmute(entry); let arr2: &mut [u8; size_of::()] = (&mut arr[8..28]).try_into().unwrap(); transmute::<_, &mut CpuidEntry>(arr2) } }) } } /// Error type for [`CpuidTrait::apply_brand_string`]. #[derive(Debug, thiserror::Error, Eq, PartialEq)] #[error("Missing brand string leaves 0x80000002, 0x80000003 and 0x80000004.")] pub struct MissingBrandStringLeaves; /// Error type for conversion from `kvm_bindings::CpuId` to `Cpuid`. #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] pub enum CpuidTryFromKvmCpuid { /// Leaf 0 not found in the given `kvm_bindings::CpuId`. MissingLeaf0, /// Unsupported CPUID manufacturer id: \"{0:?}\" (only 'GenuineIntel' and 'AuthenticAMD' are supported). UnsupportedVendor([u8; 12]), } /// CPUID information #[derive(Debug, Clone, PartialEq, Eq)] pub enum Cpuid { /// Intel CPUID specific information. Intel(IntelCpuid), /// AMD CPUID specific information. Amd(AmdCpuid), } impl Cpuid { /// Returns `Some(&mut IntelCpuid)` if `Self == Self::Intel(_)` else returns `None`. #[inline] #[must_use] pub fn intel_mut(&mut self) -> Option<&mut IntelCpuid> { match self { Self::Intel(intel) => Some(intel), Self::Amd(_) => None, } } /// Returns `Some(&IntelCpuid)` if `Self == Self::Intel(_)` else returns `None`. #[inline] #[must_use] pub fn intel(&self) -> Option<&IntelCpuid> { match self { Self::Intel(intel) => Some(intel), Self::Amd(_) => None, } } /// Returns `Some(&AmdCpuid)` if `Self == Self::Amd(_)` else returns `None`. #[inline] #[must_use] pub fn amd(&self) -> Option<&AmdCpuid> { match self { Self::Intel(_) => None, Self::Amd(amd) => Some(amd), } } /// Returns `Some(&mut AmdCpuid)` if `Self == Self::Amd(_)` else returns `None`. #[inline] #[must_use] pub fn amd_mut(&mut self) -> Option<&mut AmdCpuid> { match self { Self::Intel(_) => None, Self::Amd(amd) => Some(amd), } } /// Returns imumutable reference to inner BTreeMap. #[inline] #[must_use] pub fn inner(&self) -> &std::collections::BTreeMap { match self { Self::Intel(intel_cpuid) => &intel_cpuid.0, Self::Amd(amd_cpuid) => &amd_cpuid.0, } } /// Returns mutable reference to inner BTreeMap. #[inline] #[must_use] pub fn inner_mut(&mut self) -> &mut std::collections::BTreeMap { match self { Self::Intel(intel_cpuid) => &mut intel_cpuid.0, Self::Amd(amd_cpuid) => &mut amd_cpuid.0, } } } impl CpuidTrait for Cpuid { /// Gets a given sub-leaf. #[inline] fn get(&self, key: &CpuidKey) -> Option<&CpuidEntry> { match self { Self::Intel(intel_cpuid) => intel_cpuid.get(key), Self::Amd(amd_cpuid) => amd_cpuid.get(key), } } /// Gets a given sub-leaf. #[inline] fn get_mut(&mut self, key: &CpuidKey) -> Option<&mut CpuidEntry> { match self { Self::Intel(intel_cpuid) => intel_cpuid.get_mut(key), Self::Amd(amd_cpuid) => amd_cpuid.get_mut(key), } } } impl TryFrom for Cpuid { type Error = CpuidTryFromKvmCpuid; #[inline] fn try_from(kvm_cpuid: kvm_bindings::CpuId) -> Result { let vendor_id = kvm_cpuid .vendor_id() .ok_or(CpuidTryFromKvmCpuid::MissingLeaf0)?; match std::str::from_utf8(&vendor_id) { Ok(VENDOR_ID_INTEL_STR) => Ok(Cpuid::Intel(IntelCpuid::from(kvm_cpuid))), Ok(VENDOR_ID_AMD_STR) => Ok(Cpuid::Amd(AmdCpuid::from(kvm_cpuid))), _ => Err(CpuidTryFromKvmCpuid::UnsupportedVendor(vendor_id)), } } } impl TryFrom for kvm_bindings::CpuId { type Error = vmm_sys_util::fam::Error; fn try_from(cpuid: Cpuid) -> Result { let entries = cpuid .inner() .iter() .map(|(key, entry)| kvm_bindings::kvm_cpuid_entry2 { function: key.leaf, index: key.subleaf, flags: entry.flags.0, eax: entry.result.eax, ebx: entry.result.ebx, ecx: entry.result.ecx, edx: entry.result.edx, ..Default::default() }) .collect::>(); kvm_bindings::CpuId::from_entries(&entries) } } /// CPUID index values `leaf` and `subleaf`. #[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct CpuidKey { /// CPUID leaf. pub leaf: u32, /// CPUID subleaf. pub subleaf: u32, } impl CpuidKey { /// `CpuidKey { leaf, subleaf: 0 }` #[inline] #[must_use] pub fn leaf(leaf: u32) -> Self { Self { leaf, subleaf: 0 } } /// `CpuidKey { leaf, subleaf }` #[inline] #[must_use] pub fn subleaf(leaf: u32, subleaf: u32) -> Self { Self { leaf, subleaf } } } impl std::cmp::PartialOrd for CpuidKey { #[inline] fn partial_cmp(&self, other: &Self) -> Option { Some(std::cmp::Ord::cmp(self, other)) } } impl std::cmp::Ord for CpuidKey { #[inline] fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.leaf .cmp(&other.leaf) .then(self.subleaf.cmp(&other.subleaf)) } } /// Definitions from `kvm/arch/x86/include/uapi/asm/kvm.h #[derive( Debug, serde::Serialize, serde::Deserialize, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash, )] pub struct KvmCpuidFlags(pub u32); impl KvmCpuidFlags { /// Zero. pub const EMPTY: Self = Self(0); /// Indicates if the `index` field is used for indexing sub-leaves (if false, this CPUID leaf /// has no subleaves). pub const SIGNIFICANT_INDEX: Self = Self(1 << 0); /// Deprecated. pub const STATEFUL_FUNC: Self = Self(1 << 1); /// Deprecated. pub const STATE_READ_NEXT: Self = Self(1 << 2); } #[allow(clippy::derivable_impls)] impl Default for KvmCpuidFlags { #[inline] fn default() -> Self { Self(0) } } /// CPUID entry information stored for each leaf of [`IntelCpuid`]. #[derive(Debug, Default, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[repr(C)] pub struct CpuidEntry { /// The KVM requires a `flags` parameter which indicates if a given CPUID leaf has sub-leaves. /// This does not change at runtime so we can save memory by not storing this under every /// sub-leaf and instead fetching from a map when converting back to the KVM CPUID /// structure. But for robustness we currently do store we do not use this approach. /// /// A map on flags would look like: /// ```ignore /// #[allow(clippy::non_ascii_literal)] /// pub static KVM_CPUID_LEAF_FLAGS: phf::Map = phf::phf_map! { /// 0x00u32 => KvmCpuidFlags::EMPTY, /// 0x01u32 => KvmCpuidFlags::EMPTY, /// 0x02u32 => KvmCpuidFlags::EMPTY, /// 0x03u32 => KvmCpuidFlags::EMPTY, /// 0x04u32 => KvmCpuidFlags::SIGNIFICANT_INDEX, /// 0x05u32 => KvmCpuidFlags::EMPTY, /// 0x06u32 => KvmCpuidFlags::EMPTY, /// 0x07u32 => KvmCpuidFlags::SIGNIFICANT_INDEX, /// 0x09u32 => KvmCpuidFlags::EMPTY, /// 0x0Au32 => KvmCpuidFlags::EMPTY, /// 0x0Bu32 => KvmCpuidFlags::SIGNIFICANT_INDEX, /// 0x0Fu32 => KvmCpuidFlags::SIGNIFICANT_INDEX, /// 0x10u32 => KvmCpuidFlags::SIGNIFICANT_INDEX, /// 0x12u32 => KvmCpuidFlags::SIGNIFICANT_INDEX, /// 0x14u32 => KvmCpuidFlags::SIGNIFICANT_INDEX, /// 0x15u32 => KvmCpuidFlags::EMPTY, /// 0x16u32 => KvmCpuidFlags::EMPTY, /// 0x17u32 => KvmCpuidFlags::SIGNIFICANT_INDEX, /// 0x18u32 => KvmCpuidFlags::SIGNIFICANT_INDEX, /// 0x19u32 => KvmCpuidFlags::EMPTY, /// 0x1Au32 => KvmCpuidFlags::EMPTY, /// 0x1Bu32 => KvmCpuidFlags::EMPTY, /// 0x1Cu32 => KvmCpuidFlags::EMPTY, /// 0x1Fu32 => KvmCpuidFlags::SIGNIFICANT_INDEX, /// 0x20u32 => KvmCpuidFlags::EMPTY, /// 0x80000000u32 => KvmCpuidFlags::EMPTY, /// 0x80000001u32 => KvmCpuidFlags::EMPTY, /// 0x80000002u32 => KvmCpuidFlags::EMPTY, /// 0x80000003u32 => KvmCpuidFlags::EMPTY, /// 0x80000004u32 => KvmCpuidFlags::EMPTY, /// 0x80000005u32 => KvmCpuidFlags::EMPTY, /// 0x80000006u32 => KvmCpuidFlags::EMPTY, /// 0x80000007u32 => KvmCpuidFlags::EMPTY, /// 0x80000008u32 => KvmCpuidFlags::EMPTY, /// }; /// ``` pub flags: KvmCpuidFlags, /// Register values. pub result: CpuidRegisters, } /// To transmute this into leaves such that we can return mutable reference to it with leaf specific /// accessors, requires this to have a consistent member ordering. /// [`core::arch::x86_64::CpuidResult`] is not `repr(C)`. #[derive(Debug, Default, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[repr(C)] pub struct CpuidRegisters { /// EAX pub eax: u32, /// EBX pub ebx: u32, /// ECX pub ecx: u32, /// EDX pub edx: u32, } impl From for CpuidRegisters { #[inline] fn from( core::arch::x86_64::CpuidResult { eax, ebx, ecx, edx }: core::arch::x86_64::CpuidResult, ) -> Self { Self { eax, ebx, ecx, edx } } } #[cfg(test)] mod tests { use std::collections::BTreeMap; use super::*; fn build_intel_leaf0_for_cpuid() -> (CpuidKey, CpuidEntry) { ( CpuidKey { leaf: 0x0, subleaf: 0x0, }, CpuidEntry { flags: KvmCpuidFlags::EMPTY, result: CpuidRegisters { eax: 0x1, // GenuineIntel ebx: 0x756E6547, ecx: 0x6C65746E, edx: 0x49656E69, }, }, ) } fn build_intel_leaf0_for_kvmcpuid() -> kvm_bindings::kvm_cpuid_entry2 { kvm_bindings::kvm_cpuid_entry2 { function: 0x0, index: 0x0, flags: 0x0, eax: 0x1, // GenuineIntel ebx: 0x756E6547, ecx: 0x6C65746E, edx: 0x49656E69, ..Default::default() } } fn build_amd_leaf0_for_cpuid() -> (CpuidKey, CpuidEntry) { ( CpuidKey { leaf: 0x0, subleaf: 0x0, }, CpuidEntry { flags: KvmCpuidFlags::EMPTY, result: CpuidRegisters { eax: 0x1, // AuthenticAMD ebx: 0x68747541, ecx: 0x444D4163, edx: 0x69746E65, }, }, ) } fn build_amd_leaf0_for_kvmcpuid() -> kvm_bindings::kvm_cpuid_entry2 { kvm_bindings::kvm_cpuid_entry2 { function: 0x0, index: 0x0, flags: 0x0, eax: 0x1, // AuthenticAMD ebx: 0x68747541, ecx: 0x444D4163, edx: 0x69746E65, ..Default::default() } } fn build_sample_leaf_for_cpuid() -> (CpuidKey, CpuidEntry) { ( CpuidKey { leaf: 0x1, subleaf: 0x2, }, CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, result: CpuidRegisters { eax: 0x3, ebx: 0x4, ecx: 0x5, edx: 0x6, }, }, ) } fn build_sample_leaf_for_kvmcpuid() -> kvm_bindings::kvm_cpuid_entry2 { kvm_bindings::kvm_cpuid_entry2 { function: 0x1, index: 0x2, flags: 0x1, eax: 0x3, ebx: 0x4, ecx: 0x5, edx: 0x6, ..Default::default() } } fn build_sample_intel_cpuid() -> Cpuid { Cpuid::Intel(IntelCpuid(BTreeMap::from([ build_intel_leaf0_for_cpuid(), build_sample_leaf_for_cpuid(), ]))) } fn build_sample_intel_kvmcpuid() -> kvm_bindings::CpuId { kvm_bindings::CpuId::from_entries(&[ build_intel_leaf0_for_kvmcpuid(), build_sample_leaf_for_kvmcpuid(), ]) .unwrap() } fn build_sample_amd_cpuid() -> Cpuid { Cpuid::Amd(AmdCpuid(BTreeMap::from([ build_amd_leaf0_for_cpuid(), build_sample_leaf_for_cpuid(), ]))) } fn build_sample_amd_kvmcpuid() -> kvm_bindings::CpuId { kvm_bindings::CpuId::from_entries(&[ build_amd_leaf0_for_kvmcpuid(), build_sample_leaf_for_kvmcpuid(), ]) .unwrap() } #[test] fn get() { let cpuid = build_sample_intel_cpuid(); assert_eq!( cpuid.get(&CpuidKey { leaf: 0x8888, subleaf: 0x0 }), None ); assert!( cpuid .get(&CpuidKey { leaf: 0x0, subleaf: 0x0, }) .is_some() ); } #[test] fn get_mut() { let mut cpuid = build_sample_intel_cpuid(); assert_eq!( cpuid.get_mut(&CpuidKey { leaf: 0x888, subleaf: 0x0, }), None ); assert!( cpuid .get_mut(&CpuidKey { leaf: 0x0, subleaf: 0x0, }) .is_some() ); } #[test] fn test_kvmcpuid_to_cpuid() { let kvm_cpuid = build_sample_intel_kvmcpuid(); let cpuid = Cpuid::try_from(kvm_cpuid).unwrap(); assert_eq!(cpuid, build_sample_intel_cpuid()); let kvm_cpuid = build_sample_amd_kvmcpuid(); let cpuid = Cpuid::try_from(kvm_cpuid).unwrap(); assert_eq!(cpuid, build_sample_amd_cpuid()); } #[test] fn test_cpuid_to_kvmcpuid() { let cpuid = build_sample_intel_cpuid(); let kvm_cpuid = kvm_bindings::CpuId::try_from(cpuid).unwrap(); assert_eq!(kvm_cpuid, build_sample_intel_kvmcpuid()); let cpuid = build_sample_amd_cpuid(); let kvm_cpuid = kvm_bindings::CpuId::try_from(cpuid).unwrap(); assert_eq!(kvm_cpuid, build_sample_amd_kvmcpuid()); } #[test] fn test_invalid_kvmcpuid_to_cpuid() { // If leaf 0 contains invalid vendor ID, the type conversion should fail. let kvm_cpuid = kvm_bindings::CpuId::from_entries(&[kvm_bindings::kvm_cpuid_entry2::default()]) .unwrap(); let cpuid = Cpuid::try_from(kvm_cpuid); assert_eq!(cpuid, Err(CpuidTryFromKvmCpuid::UnsupportedVendor([0; 12]))); } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/cpuid/normalize.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use crate::cpu_config::x86_64::cpuid::{ CpuidEntry, CpuidKey, CpuidRegisters, CpuidTrait, KvmCpuidFlags, cpuid, }; use crate::logger::warn; use crate::vmm_config::machine_config::MAX_SUPPORTED_VCPUS; /// Error type for [`super::Cpuid::normalize`]. #[allow(clippy::module_name_repetitions)] #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum NormalizeCpuidError { /// Provided `cpu_bits` is >=8: {0}. CpuBits(u8), /// Failed to apply modifications to Intel CPUID: {0} Intel(#[from] crate::cpu_config::x86_64::cpuid::intel::NormalizeCpuidError), /// Failed to apply modifications to AMD CPUID: {0} Amd(#[from] crate::cpu_config::x86_64::cpuid::amd::NormalizeCpuidError), /// Failed to set feature information leaf: {0} FeatureInformation(#[from] FeatureInformationError), /// Failed to set extended topology leaf: {0} ExtendedTopology(#[from] ExtendedTopologyError), /// Failed to set extended cache features leaf: {0} ExtendedCacheFeatures(#[from] ExtendedCacheFeaturesError), /// Failed to set vendor ID in leaf 0x0: {0} VendorId(#[from] VendorIdError), } /// Error type for setting leaf 0 section. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum VendorIdError { /// Leaf 0x0 is missing from CPUID. MissingLeaf0, } /// Error type for setting leaf 1 section of `IntelCpuid::normalize`. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum FeatureInformationError { /// Leaf 0x1 is missing from CPUID. MissingLeaf1, /// Failed to set `Initial APIC ID`: {0} InitialApicId(CheckedAssignError), /// Failed to set `CLFLUSH line size`: {0} Clflush(CheckedAssignError), /// Failed to get max CPUs per package: {0} GetMaxCpusPerPackage(GetMaxCpusPerPackageError), /// Failed to set max CPUs per package: {0} SetMaxCpusPerPackage(CheckedAssignError), } /// Error type for `get_max_cpus_per_package`. #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum GetMaxCpusPerPackageError { /// Failed to get max CPUs per package as `cpu_count == 0` Underflow, /// Failed to get max CPUs per package as `cpu_count > 128` Overflow, } /// Error type for setting leaf b section of `IntelCpuid::normalize`. #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum ExtendedTopologyError { /// Failed to set domain type (CPUID.(EAX=0xB,ECX={0}):ECX[15:8]): {1} DomainType(u32, CheckedAssignError), /// Failed to set input ECX (CPUID.(EAX=0xB,ECX={0}):ECX[7:0]): {1} InputEcx(u32, CheckedAssignError), /// Failed to set number of logical processors (CPUID.(EAX=0xB,ECX={0}):EBX[15:0]): {1} NumLogicalProcs(u32, CheckedAssignError), /// Failed to set right-shift bits (CPUID.(EAX=0xB,ECX={0}):EAX[4:0]): {1} RightShiftBits(u32, CheckedAssignError), } /// Error type for setting leaf 0x80000006 of Cpuid::normalize(). #[derive(Debug, thiserror::Error, displaydoc::Display, Eq, PartialEq)] pub enum ExtendedCacheFeaturesError { /// Leaf 0x80000005 is missing from CPUID. MissingLeaf0x80000005, /// Leaf 0x80000006 is missing from CPUID. MissingLeaf0x80000006, } /// Error type for setting a bit range. #[derive(Debug, PartialEq, Eq, thiserror::Error)] #[error("Given value is greater than maximum storable value in bit range.")] pub struct CheckedAssignError; /// Sets a given bit to a true or false (1 or 0). #[allow(clippy::arithmetic_side_effects)] pub fn set_bit(x: &mut u32, bit: u8, y: bool) { debug_assert!(bit < 32); *x = (*x & !(1 << bit)) | ((u32::from(u8::from(y))) << bit); } /// Sets a given range to a given value. pub fn set_range( x: &mut u32, range: std::ops::RangeInclusive, y: u32, ) -> Result<(), CheckedAssignError> { let start = *range.start(); let end = *range.end(); debug_assert!(end >= start); debug_assert!(end < 32); // Ensure `y` fits within the number of bits in the specified range. // Note that // - 1 <= `num_bits` <= 32 from the above assertion // - if `num_bits` equals to 32, `y` always fits within it since `y` is `u32`. let num_bits = end - start + 1; if num_bits < 32 && y >= (1u32 << num_bits) { return Err(CheckedAssignError); } let mask = get_mask(range); *x = (*x & !mask) | (y << start); Ok(()) } /// Gets a given range within a given value. pub fn get_range(x: u32, range: std::ops::RangeInclusive) -> u32 { let start = *range.start(); let end = *range.end(); debug_assert!(end >= start); debug_assert!(end < 32); let mask = get_mask(range); (x & mask) >> start } /// Returns a mask where the given range is ones. const fn get_mask(range: std::ops::RangeInclusive) -> u32 { let num_bits = *range.end() - *range.start() + 1; let shift = *range.start(); if num_bits == 32 { u32::MAX } else { ((1u32 << num_bits) - 1) << shift } } // We use this 2nd implementation so we can conveniently define functions only used within // `normalize`. #[allow(clippy::multiple_inherent_impl)] impl super::Cpuid { /// Applies required modifications to CPUID respective of a vCPU. /// /// # Errors /// /// When: /// - [`super::IntelCpuid::normalize`] errors. /// - [`super::AmdCpuid::normalize`] errors. // As we pass through host frequency, we require CPUID and thus `cfg(cpuid)`. #[inline] pub fn normalize( &mut self, // The index of the current logical CPU in the range [0..cpu_count]. cpu_index: u8, // The total number of logical CPUs. cpu_count: u8, // The number of bits needed to enumerate logical CPUs per core. cpu_bits: u8, ) -> Result<(), NormalizeCpuidError> { let cpus_per_core = 1u8 .checked_shl(u32::from(cpu_bits)) .ok_or(NormalizeCpuidError::CpuBits(cpu_bits))?; self.update_vendor_id()?; self.update_feature_info_entry(cpu_index, cpu_count)?; self.update_extended_topology_entry(cpu_index, cpu_count, cpu_bits, cpus_per_core)?; self.update_extended_cache_features()?; // Apply manufacturer specific modifications. match self { // Apply Intel specific modifications. Self::Intel(intel_cpuid) => { intel_cpuid.normalize(cpu_index, cpu_count, cpus_per_core)?; } // Apply AMD specific modifications. Self::Amd(amd_cpuid) => amd_cpuid.normalize(cpu_index, cpu_count, cpus_per_core)?, } Ok(()) } /// Pass-through the vendor ID from the host. This is used to prevent modification of the vendor /// ID via custom CPU templates. fn update_vendor_id(&mut self) -> Result<(), VendorIdError> { let leaf_0 = self .get_mut(&CpuidKey::leaf(0x0)) .ok_or(VendorIdError::MissingLeaf0)?; let host_leaf_0 = cpuid(0x0); leaf_0.result.ebx = host_leaf_0.ebx; leaf_0.result.ecx = host_leaf_0.ecx; leaf_0.result.edx = host_leaf_0.edx; Ok(()) } // Update feature information entry fn update_feature_info_entry( &mut self, cpu_index: u8, cpu_count: u8, ) -> Result<(), FeatureInformationError> { let leaf_1 = self .get_mut(&CpuidKey::leaf(0x1)) .ok_or(FeatureInformationError::MissingLeaf1)?; // CPUID.01H:EBX[15:08] // CLFLUSH line size (Value * 8 = cache line size in bytes; used also by CLFLUSHOPT). set_range(&mut leaf_1.result.ebx, 8..=15, 8).map_err(FeatureInformationError::Clflush)?; // CPUID.01H:EBX[23:16] // Maximum number of addressable IDs for logical processors in this physical package. // // The nearest power-of-2 integer that is not smaller than EBX[23:16] is the number of // unique initial APIC IDs reserved for addressing different logical processors in a // physical package. This field is only valid if CPUID.1.EDX.HTT[bit 28]= 1. let max_cpus_per_package = u32::from( get_max_cpus_per_package(cpu_count) .map_err(FeatureInformationError::GetMaxCpusPerPackage)?, ); set_range(&mut leaf_1.result.ebx, 16..=23, max_cpus_per_package) .map_err(FeatureInformationError::SetMaxCpusPerPackage)?; // CPUID.01H:EBX[31:24] // Initial APIC ID. // // The 8-bit initial APIC ID in EBX[31:24] is replaced by the 32-bit x2APIC ID, available // in Leaf 0BH and Leaf 1FH. set_range(&mut leaf_1.result.ebx, 24..=31, u32::from(cpu_index)) .map_err(FeatureInformationError::InitialApicId)?; // CPUID.01H:ECX[15] (Mnemonic: PDCM) // Performance and Debug Capability: A value of 1 indicates the processor supports the // performance and debug feature indication MSR IA32_PERF_CAPABILITIES. set_bit(&mut leaf_1.result.ecx, 15, false); // CPUID.01H:ECX[24] (Mnemonic: TSC-Deadline) // A value of 1 indicates that the processor’s local APIC timer supports one-shot operation // using a TSC deadline value. set_bit(&mut leaf_1.result.ecx, 24, true); // CPUID.01H:ECX[31] (Mnemonic: Hypervisor) set_bit(&mut leaf_1.result.ecx, 31, true); // CPUID.01H:EDX[28] (Mnemonic: HTT) // Max APIC IDs reserved field is Valid. A value of 0 for HTT indicates there is only a // single logical processor in the package and software should assume only a single APIC ID // is reserved. A value of 1 for HTT indicates the value in CPUID.1.EBX[23:16] (the Maximum // number of addressable IDs for logical processors in this package) is valid for the // package. set_bit(&mut leaf_1.result.edx, 28, cpu_count > 1); Ok(()) } /// Update extended topology entry fn update_extended_topology_entry( &mut self, cpu_index: u8, cpu_count: u8, cpu_bits: u8, cpus_per_core: u8, ) -> Result<(), ExtendedTopologyError> { // The following commit changed the behavior of KVM_GET_SUPPORTED_CPUID to no longer // include CPUID.(EAX=0BH,ECX=1). // https://lore.kernel.org/all/20221027092036.2698180-1-pbonzini@redhat.com/ self.inner_mut() .entry(CpuidKey::subleaf(0xB, 0x1)) .or_insert(CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, result: CpuidRegisters { eax: 0x0, ebx: 0x0, ecx: 0x0, edx: 0x0, }, }); for index in 0.. { if let Some(subleaf) = self.get_mut(&CpuidKey::subleaf(0xB, index)) { // Reset eax, ebx, ecx subleaf.result.eax = 0; subleaf.result.ebx = 0; subleaf.result.ecx = 0; // CPUID.(EAX=0BH,ECX=N).EDX[31:0] // x2APIC ID of the current logical processor. subleaf.result.edx = u32::from(cpu_index); subleaf.flags = KvmCpuidFlags::SIGNIFICANT_INDEX; match index { // CPUID.(EAX=0BH,ECX=N):EAX[4:0] // The number of bits that the x2APIC ID must be shifted to the right to address // instances of the next higher-scoped domain. When logical processor is not // supported by the processor, the value of this field at the Logical Processor // domain sub-leaf may be returned as either 0 (no allocated bits in the x2APIC // ID) or 1 (one allocated bit in the x2APIC ID); software should plan // accordingly. // CPUID.(EAX=0BH,ECX=N):EBX[15:0] // The number of logical processors across all instances of this domain within // the next-higher scoped domain. (For example, in a processor socket/package // comprising "M" dies of "N" cores each, where each core has "L" logical // processors, the "die" domain sub-leaf value of this field would be M*N*L.) // This number reflects configuration as shipped by Intel. Note, software must // not use this field to enumerate processor topology. // CPUID.(EAX=0BH,ECX=N):ECX[7:0] // The input ECX sub-leaf index. // CPUID.(EAX=0BH,ECX=N):ECX[15:8] // Domain Type. This field provides an identification value which indicates the // domain as shown below. Although domains are ordered, their assigned // identification values are not and software should not depend on it. // // Hierarchy Domain Domain Type Identification Value // ----------------------------------------------------------------- // Lowest Logical Processor 1 // Highest Core 2 // // (Note that enumeration values of 0 and 3-255 are reserved.) // Logical processor domain 0 => { // To get the next level APIC ID, shift right with at most 1 because we have // maximum 2 logical procerssors per core that can be represented by 1 bit. set_range(&mut subleaf.result.eax, 0..=4, u32::from(cpu_bits)) .map_err(|err| ExtendedTopologyError::RightShiftBits(index, err))?; // When cpu_count == 1 or HT is disabled, there is 1 logical core at this // domain; otherwise there are 2 set_range(&mut subleaf.result.ebx, 0..=15, u32::from(cpus_per_core)) .map_err(|err| ExtendedTopologyError::NumLogicalProcs(index, err))?; // Skip setting 0 to ECX[7:0] since it's already reset to 0. // Set the domain type identification value for logical processor, set_range(&mut subleaf.result.ecx, 8..=15, 1) .map_err(|err| ExtendedTopologyError::DomainType(index, err))?; } // Core domain 1 => { // Configure such that the next higher-scoped domain (i.e. socket) include // all logical processors. // // The CPUID.(EAX=0BH,ECX=1).EAX[4:0] value must be an integer N such that // 2^N is greater than or equal to the maximum number of vCPUs. set_range( &mut subleaf.result.eax, 0..=4, MAX_SUPPORTED_VCPUS.next_power_of_two().ilog2(), ) .map_err(|err| ExtendedTopologyError::RightShiftBits(index, err))?; set_range(&mut subleaf.result.ebx, 0..=15, u32::from(cpu_count)) .map_err(|err| ExtendedTopologyError::NumLogicalProcs(index, err))?; // Setting the input ECX value (i.e. `index`) set_range(&mut subleaf.result.ecx, 0..=7, index) .map_err(|err| ExtendedTopologyError::InputEcx(index, err))?; // Set the domain type identification value for core. set_range(&mut subleaf.result.ecx, 8..=15, 2) .map_err(|err| ExtendedTopologyError::DomainType(index, err))?; } _ => { // KVM no longer returns any subleaves greater than 0. The patch was merged // in v6.2 and backported to v5.10. So for all our supported kernels, // subleaves >= 2 should not be included. // https://github.com/torvalds/linux/commit/45e966fcca03ecdcccac7cb236e16eea38cc18af // // However, we intentionally leave Firecracker not fail for unsupported // kernels to keep working. Note that we can detect KVM regression thanks // to the test that compares a fingerprint with its baseline. warn!("Subleaf {index} not expected for CPUID leaf 0xB."); subleaf.result.ecx = index; } } } else { break; } } Ok(()) } // Update extended cache features entry fn update_extended_cache_features(&mut self) -> Result<(), ExtendedCacheFeaturesError> { // Leaf 0x800000005 indicates L1 Cache and TLB Information. let guest_leaf_0x80000005 = self .get_mut(&CpuidKey::leaf(0x80000005)) .ok_or(ExtendedCacheFeaturesError::MissingLeaf0x80000005)?; guest_leaf_0x80000005.result = cpuid(0x80000005).into(); // Leaf 0x80000006 indicates L2 Cache and TLB and L3 Cache Information. let guest_leaf_0x80000006 = self .get_mut(&CpuidKey::leaf(0x80000006)) .ok_or(ExtendedCacheFeaturesError::MissingLeaf0x80000006)?; guest_leaf_0x80000006.result = cpuid(0x80000006).into(); guest_leaf_0x80000006.result.edx &= !0x00030000; // bits [17:16] are reserved Ok(()) } } /// The maximum number of logical processors per package is computed as the closest /// power of 2 higher or equal to the CPU count configured by the user. const fn get_max_cpus_per_package(cpu_count: u8) -> Result { // This match is better than but approximately equivalent to // `2.pow((cpu_count as f32).log2().ceil() as u8)` (`2^ceil(log_2(c))`). match cpu_count { 0 => Err(GetMaxCpusPerPackageError::Underflow), // `0u8.checked_next_power_of_two()` returns `Some(1)`, this is not the desired behaviour so // we use `next_power_of_two()` instead. 1..=128 => Ok(cpu_count.next_power_of_two()), 129..=u8::MAX => Err(GetMaxCpusPerPackageError::Overflow), } } #[cfg(test)] mod tests { use std::collections::BTreeMap; use super::*; use crate::cpu_config::x86_64::cpuid::{AmdCpuid, Cpuid, IntelCpuid}; #[test] fn get_max_cpus_per_package_test() { assert_eq!( get_max_cpus_per_package(0), Err(GetMaxCpusPerPackageError::Underflow) ); assert_eq!(get_max_cpus_per_package(1), Ok(1)); assert_eq!(get_max_cpus_per_package(2), Ok(2)); assert_eq!(get_max_cpus_per_package(3), Ok(4)); assert_eq!(get_max_cpus_per_package(4), Ok(4)); assert_eq!(get_max_cpus_per_package(5), Ok(8)); assert_eq!(get_max_cpus_per_package(8), Ok(8)); assert_eq!(get_max_cpus_per_package(9), Ok(16)); assert_eq!(get_max_cpus_per_package(16), Ok(16)); assert_eq!(get_max_cpus_per_package(17), Ok(32)); assert_eq!(get_max_cpus_per_package(32), Ok(32)); assert_eq!(get_max_cpus_per_package(33), Ok(64)); assert_eq!(get_max_cpus_per_package(64), Ok(64)); assert_eq!(get_max_cpus_per_package(65), Ok(128)); assert_eq!(get_max_cpus_per_package(128), Ok(128)); assert_eq!( get_max_cpus_per_package(129), Err(GetMaxCpusPerPackageError::Overflow) ); assert_eq!( get_max_cpus_per_package(u8::MAX), Err(GetMaxCpusPerPackageError::Overflow) ); } #[test] fn test_update_vendor_id() { // Check `update_vendor_id()` passes through the vendor ID from the host correctly. // Pseudo CPUID with invalid vendor ID. let mut guest_cpuid = Cpuid::Intel(IntelCpuid(BTreeMap::from([( CpuidKey { leaf: 0x0, subleaf: 0x0, }, CpuidEntry { flags: KvmCpuidFlags::EMPTY, result: CpuidRegisters { eax: 0, ebx: 0x0123_4567, ecx: 0x89ab_cdef, edx: 0x55aa_55aa, }, }, )]))); // Pass through vendor ID from host. guest_cpuid.update_vendor_id().unwrap(); // Check if the guest vendor ID matches the host one. let guest_leaf_0 = guest_cpuid .get(&CpuidKey { leaf: 0x0, subleaf: 0x0, }) .unwrap(); let host_leaf_0 = cpuid(0x0); assert_eq!(guest_leaf_0.result.ebx, host_leaf_0.ebx); assert_eq!(guest_leaf_0.result.ecx, host_leaf_0.ecx); assert_eq!(guest_leaf_0.result.edx, host_leaf_0.edx); } #[test] fn check_leaf_0xb_subleaf_0x1_added() { // Check leaf 0xb / subleaf 0x1 is added in `update_extended_topology_entry()` even when it // isn't included. // Pseudo CPU setting let smt = false; let cpu_index = 0; let cpu_count = 2; let cpu_bits = u8::from(cpu_count > 1 && smt); let cpus_per_core = 1u8 .checked_shl(u32::from(cpu_bits)) .ok_or(NormalizeCpuidError::CpuBits(cpu_bits)) .unwrap(); // Case 1: Intel CPUID let mut intel_cpuid = Cpuid::Intel(IntelCpuid(BTreeMap::from([( CpuidKey { leaf: 0xb, subleaf: 0, }, CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, result: CpuidRegisters { eax: 0, ebx: 0, ecx: 0, edx: 0, }, }, )]))); let result = intel_cpuid.update_extended_topology_entry( cpu_index, cpu_count, cpu_bits, cpus_per_core, ); result.unwrap(); assert!(intel_cpuid.inner().contains_key(&CpuidKey { leaf: 0xb, subleaf: 0x1 })); // Case 2: AMD CPUID let mut amd_cpuid = Cpuid::Amd(AmdCpuid(BTreeMap::from([( CpuidKey { leaf: 0xb, subleaf: 0, }, CpuidEntry { flags: KvmCpuidFlags::SIGNIFICANT_INDEX, result: CpuidRegisters { eax: 0, ebx: 0, ecx: 0, edx: 0, }, }, )]))); let result = amd_cpuid.update_extended_topology_entry(cpu_index, cpu_count, cpu_bits, cpus_per_core); result.unwrap(); assert!(amd_cpuid.inner().contains_key(&CpuidKey { leaf: 0xb, subleaf: 0x1 })); } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/custom_cpu_template.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 /// Guest config sub-module specifically useful for /// config templates. use std::borrow::Cow; use serde::de::Error as SerdeError; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::arch::x86_64::cpu_model::{CpuModel, SKYLAKE_FMS}; use crate::cpu_config::templates::{ CpuTemplateType, GetCpuTemplate, GetCpuTemplateError, KvmCapability, RegisterValueFilter, }; use crate::cpu_config::templates_serde::*; use crate::cpu_config::x86_64::cpuid::KvmCpuidFlags; use crate::cpu_config::x86_64::cpuid::common::get_vendor_id_from_host; use crate::cpu_config::x86_64::static_cpu_templates::{StaticCpuTemplate, c3, t2, t2a, t2cl, t2s}; use crate::logger::warn; impl GetCpuTemplate for Option { fn get_cpu_template(&self) -> Result, GetCpuTemplateError> { use GetCpuTemplateError::*; match self { Some(template_type) => match template_type { CpuTemplateType::Custom(template) => Ok(Cow::Borrowed(template)), CpuTemplateType::Static(template) => { // Return early for `None` due to no valid vendor and CPU models. if template == &StaticCpuTemplate::None { return Err(InvalidStaticCpuTemplate(StaticCpuTemplate::None)); } if &get_vendor_id_from_host().map_err(GetCpuVendor)? != template.get_supported_vendor() { return Err(CpuVendorMismatched); } let cpu_model = CpuModel::get_cpu_model(); if !template.get_supported_cpu_models().contains(&cpu_model) { return Err(InvalidCpuModel); } match template { StaticCpuTemplate::C3 => { if cpu_model == SKYLAKE_FMS { warn!( "On processors that do not enumerate FBSDP_NO, PSDP_NO and \ SBDR_SSDP_NO on IA32_ARCH_CAPABILITIES MSR, the guest kernel \ does not apply the mitigation against MMIO stale data \ vulnerability." ); } Ok(Cow::Owned(c3::c3())) } StaticCpuTemplate::T2 => Ok(Cow::Owned(t2::t2())), StaticCpuTemplate::T2S => Ok(Cow::Owned(t2s::t2s())), StaticCpuTemplate::T2CL => Ok(Cow::Owned(t2cl::t2cl())), StaticCpuTemplate::T2A => Ok(Cow::Owned(t2a::t2a())), StaticCpuTemplate::None => unreachable!(), // Handled earlier } } }, None => Ok(Cow::Owned(CustomCpuTemplate::default())), } } } /// CPUID register enumeration #[allow(missing_docs)] #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize, Hash, Ord, PartialOrd)] pub enum CpuidRegister { Eax, Ebx, Ecx, Edx, } /// Target register to be modified by a bitmap. #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize, Hash)] pub struct CpuidRegisterModifier { /// CPUID register to be modified by the bitmap. #[serde( deserialize_with = "deserialize_cpuid_register", serialize_with = "serialize_cpuid_register" )] pub register: CpuidRegister, /// Bit mapping to be applied as a modifier to the /// register's value at the address provided. pub bitmap: RegisterValueFilter, } /// Composite type that holistically provides /// the location of a specific register being used /// in the context of a CPUID tree. #[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize, Hash)] pub struct CpuidLeafModifier { /// Leaf value. #[serde( deserialize_with = "deserialize_from_str_u32", serialize_with = "serialize_to_hex_str" )] pub leaf: u32, /// Sub-Leaf value. #[serde( deserialize_with = "deserialize_from_str_u32", serialize_with = "serialize_to_hex_str" )] pub subleaf: u32, /// KVM feature flags for this leaf-subleaf. #[serde(deserialize_with = "deserialize_kvm_cpuid_flags")] pub flags: KvmCpuidFlags, /// All registers to be modified under the sub-leaf. pub modifiers: Vec, } /// Wrapper type to containing x86_64 CPU config modifiers. #[derive(Debug, Default, Clone, Eq, PartialEq, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct CustomCpuTemplate { /// Additional kvm capabilities to check before /// configuring vcpus. #[serde(default)] pub kvm_capabilities: Vec, /// Modifiers for CPUID configuration. #[serde(default)] pub cpuid_modifiers: Vec, /// Modifiers for model specific registers. #[serde(default)] pub msr_modifiers: Vec, } impl CustomCpuTemplate { /// Get an iterator of MSR indices that are modified by the CPU template. pub fn msr_index_iter(&self) -> impl ExactSizeIterator + '_ { self.msr_modifiers.iter().map(|modifier| modifier.addr) } /// Validate the correctness of the template. pub fn validate(&self) -> Result<(), serde_json::Error> { Ok(()) } } /// Wrapper of a mask defined as a bitmap to apply /// changes to a given register's value. #[derive(Debug, Default, Clone, Copy, Eq, PartialEq, Serialize, Deserialize, Hash)] pub struct RegisterModifier { /// Pointer of the location to be bit mapped. #[serde( deserialize_with = "deserialize_from_str_u32", serialize_with = "serialize_to_hex_str" )] pub addr: u32, /// Bit mapping to be applied as a modifier to the /// register's value at the address provided. pub bitmap: RegisterValueFilter, } fn deserialize_kvm_cpuid_flags<'de, D>(deserializer: D) -> Result where D: Deserializer<'de>, { let flag = u32::deserialize(deserializer)?; Ok(KvmCpuidFlags(flag)) } fn deserialize_cpuid_register<'de, D>(deserializer: D) -> Result where D: Deserializer<'de>, { let cpuid_register_str = String::deserialize(deserializer)?; Ok(match cpuid_register_str.as_str() { "eax" => CpuidRegister::Eax, "ebx" => CpuidRegister::Ebx, "ecx" => CpuidRegister::Ecx, "edx" => CpuidRegister::Edx, _ => { return Err(D::Error::custom( "Invalid CPUID register. Must be one of [eax, ebx, ecx, edx]", )); } }) } fn serialize_cpuid_register(cpuid_reg: &CpuidRegister, serializer: S) -> Result where S: Serializer, { match cpuid_reg { CpuidRegister::Eax => serializer.serialize_str("eax"), CpuidRegister::Ebx => serializer.serialize_str("ebx"), CpuidRegister::Ecx => serializer.serialize_str("ecx"), CpuidRegister::Edx => serializer.serialize_str("edx"), } } #[cfg(test)] mod tests { use serde_json::Value; use super::*; use crate::cpu_config::x86_64::test_utils::{TEST_TEMPLATE_JSON, build_test_template}; #[test] fn test_get_cpu_template_with_no_template() { // Test `get_cpu_template()` when no template is provided. The empty owned // `CustomCpuTemplate` should be returned. let cpu_template = None; assert_eq!( cpu_template.get_cpu_template().unwrap(), Cow::Owned(CustomCpuTemplate::default()), ); } #[test] fn test_get_cpu_template_with_c3_static_template() { // Test `get_cpu_template()` when C3 static CPU template is specified. The owned // `CustomCpuTemplate` should be returned if CPU vendor is Intel and the CPU model is // supported. Otherwise, it should fail. let c3 = StaticCpuTemplate::C3; let cpu_template = Some(CpuTemplateType::Static(c3)); if &get_vendor_id_from_host().unwrap() == c3.get_supported_vendor() { if c3 .get_supported_cpu_models() .contains(&CpuModel::get_cpu_model()) { assert_eq!( cpu_template.get_cpu_template().unwrap(), Cow::Owned(c3::c3()) ); } else { assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::InvalidCpuModel, ); } } else { assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::CpuVendorMismatched, ); } } #[test] fn test_get_cpu_template_with_t2_static_template() { // Test `get_cpu_template()` when T2 static CPU template is specified. The owned // `CustomCpuTemplate` should be returned if CPU vendor is Intel and the CPU model is // supported. Otherwise, it should fail. let t2 = StaticCpuTemplate::T2; let cpu_template = Some(CpuTemplateType::Static(t2)); if &get_vendor_id_from_host().unwrap() == t2.get_supported_vendor() { if t2 .get_supported_cpu_models() .contains(&CpuModel::get_cpu_model()) { assert_eq!( cpu_template.get_cpu_template().unwrap(), Cow::Owned(t2::t2()) ); } else { assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::InvalidCpuModel, ); } } else { assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::CpuVendorMismatched, ); } } #[test] fn test_get_cpu_template_with_t2s_static_template() { // Test `get_cpu_template()` when T2S static CPU template is specified. The owned // `CustomCpuTemplate` should be returned if CPU vendor is Intel and the CPU model is // supported. Otherwise, it should fail. let t2s = StaticCpuTemplate::T2S; let cpu_template = Some(CpuTemplateType::Static(t2s)); if &get_vendor_id_from_host().unwrap() == t2s.get_supported_vendor() { if t2s .get_supported_cpu_models() .contains(&CpuModel::get_cpu_model()) { assert_eq!( cpu_template.get_cpu_template().unwrap(), Cow::Owned(t2s::t2s()) ); } else { assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::InvalidCpuModel, ); } } else { assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::CpuVendorMismatched, ); } } #[test] fn test_t2cl_template_equality() { // For coverage purposes, this test forces usage of T2CL and bypasses // validation that is generally applied which usually enforces that T2CL // can only be used on Cascade Lake (or newer) CPUs. let t2cl_custom_template = CpuTemplateType::Custom(t2cl::t2cl()); // This test also demonstrates the difference in concept between custom and static // templates, while practically T2CL is consistent for the user, in code // the static template of T2CL, and the custom template of T2CL are not equivalent. assert_ne!( t2cl_custom_template, CpuTemplateType::Static(StaticCpuTemplate::T2CL) ); } #[test] fn test_get_cpu_template_with_t2cl_static_template() { // Test `get_cpu_template()` when T2CL static CPU template is specified. The owned // `CustomCpuTemplate` should be returned if CPU vendor is Intel and the CPU model is // supported. Otherwise, it should fail. let t2cl = StaticCpuTemplate::T2CL; let cpu_template = Some(CpuTemplateType::Static(t2cl)); if &get_vendor_id_from_host().unwrap() == t2cl.get_supported_vendor() { if t2cl .get_supported_cpu_models() .contains(&CpuModel::get_cpu_model()) { assert_eq!( cpu_template.get_cpu_template().unwrap(), Cow::Owned(t2cl::t2cl()) ); } else { assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::InvalidCpuModel, ); } } else { assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::CpuVendorMismatched, ); } } #[test] fn test_get_cpu_template_with_t2a_static_template() { // Test `get_cpu_template()` when T2A static CPU template is specified. The owned // `CustomCpuTemplate` should be returned if CPU vendor is AMD. Otherwise it should fail. let t2a = StaticCpuTemplate::T2A; let cpu_template = Some(CpuTemplateType::Static(t2a)); if &get_vendor_id_from_host().unwrap() == t2a.get_supported_vendor() { if t2a .get_supported_cpu_models() .contains(&CpuModel::get_cpu_model()) { assert_eq!( cpu_template.get_cpu_template().unwrap(), Cow::Owned(t2a::t2a()) ); } else { assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::InvalidCpuModel, ); } } else { assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::CpuVendorMismatched, ); } } #[test] fn test_get_cpu_template_with_none_static_template() { // Test `get_cpu_template()` when no static CPU template is provided. // `InvalidStaticCpuTemplate` error should be returned because it is no longer valid and // was replaced with `None` of `Option`. let cpu_template = Some(CpuTemplateType::Static(StaticCpuTemplate::None)); assert_eq!( cpu_template.get_cpu_template().unwrap_err(), GetCpuTemplateError::InvalidStaticCpuTemplate(StaticCpuTemplate::None) ); // Test the Display for StaticCpuTemplate assert_eq!(format!("{}", StaticCpuTemplate::None), "None"); } #[test] fn test_get_cpu_template_with_custom_template() { // Test `get_cpu_template()` when a custom CPU template is provided. The borrowed // `CustomCpuTemplate` should be returned. let inner_cpu_template = CustomCpuTemplate::default(); let cpu_template = Some(CpuTemplateType::Custom(inner_cpu_template.clone())); assert_eq!( cpu_template.get_cpu_template().unwrap(), Cow::Borrowed(&inner_cpu_template) ); } #[test] fn test_malformed_json() { // Misspelled field name, register let cpu_template_result = serde_json::from_str::( r#"{ "cpuid_modifiers": [ { "leaf": "0x80000001", "subleaf": "0b000111", "flags": 0, "modifiers": [ { "register": "ekx", "bitmap": "0bx00100xxx1xxxxxxxxxxxxxxxxxxxxx1" } ] }, ], }"#, ); assert!( cpu_template_result .unwrap_err() .to_string() .contains("Invalid CPUID register. Must be one of [eax, ebx, ecx, edx]") ); // Malformed MSR register address let cpu_template_result = serde_json::from_str::( r#"{ "msr_modifiers": [ { "addr": "0jj0", "bitmap": "0bx00100xxx1xxxx00xxx1xxxxxxxxxxx1" }, ] }"#, ); let error_msg: String = cpu_template_result.unwrap_err().to_string(); // Formatted error expected clarifying the number system prefix is missing assert!( error_msg.contains("No supported number system prefix found in value"), "{}", error_msg ); // Malformed CPUID leaf address let cpu_template_result = serde_json::from_str::( r#"{ "cpuid_modifiers": [ { "leaf": "k", "subleaf": "0b000111", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bx00100xxx1xxxxxxxxxxxxxxxxxxxxx1" } ] }, ], }"#, ); let error_msg: String = cpu_template_result.unwrap_err().to_string(); // Formatted error expected clarifying the number system prefix is missing assert!( error_msg.contains("No supported number system prefix found in value"), "{}", error_msg ); // Malformed 64-bit bitmap - filter failed let cpu_template_result = serde_json::from_str::( r#"{ "msr_modifiers": [ { "addr": "0x200", "bitmap": "0bx0?1_0_0x_?x1xxxx00xxx1xxxxxxxxxxx1" }, ] }"#, ); assert!(cpu_template_result.unwrap_err().to_string().contains( "Failed to parse string [0bx0?1_0_0x_?x1xxxx00xxx1xxxxxxxxxxx1] as a bitmap" )); // Malformed 64-bit bitmap - value failed let cpu_template_result = serde_json::from_str::( r#"{ "msr_modifiers": [ { "addr": "0x200", "bitmap": "0bx00100x0x1xxxx05xxx1xxxxxxxxxxx1" }, ] }"#, ); assert!( cpu_template_result.unwrap_err().to_string().contains( "Failed to parse string [0bx00100x0x1xxxx05xxx1xxxxxxxxxxx1] as a bitmap" ) ); } #[test] fn test_deserialization_lifecycle() { let cpu_template = serde_json::from_str::(TEST_TEMPLATE_JSON) .expect("Failed to deserialize custom CPU template."); assert_eq!(5, cpu_template.cpuid_modifiers.len()); assert_eq!(4, cpu_template.msr_modifiers.len()); } #[test] fn test_serialization_lifecycle() { let template = build_test_template(); let template_json_str_result = serde_json::to_string_pretty(&template); let template_json = template_json_str_result.unwrap(); let deserialization_result = serde_json::from_str::(&template_json); assert_eq!(template, deserialization_result.unwrap()); } /// Test to confirm that templates for different CPU architectures have /// a size bitmask that is supported by the architecture when serialized to JSON. #[test] fn test_bitmap_width() { let mut cpuid_checked = false; let mut msr_checked = false; let template = build_test_template(); let x86_template_str = serde_json::to_string(&template).expect("Error serializing x86 template"); let json_tree: Value = serde_json::from_str(&x86_template_str) .expect("Error deserializing x86 template JSON string"); // Check that bitmaps for CPUID values are 32-bits in width if let Some(cpuid_modifiers_root) = json_tree.get("cpuid_modifiers") { let cpuid_mod_node = &cpuid_modifiers_root.as_array().unwrap()[0]; if let Some(modifiers_node) = cpuid_mod_node.get("modifiers") { let mod_node = &modifiers_node.as_array().unwrap()[0]; if let Some(bit_map_str) = mod_node.get("bitmap") { // 32-bit width with a "0b" prefix for binary-formatted numbers assert_eq!(bit_map_str.as_str().unwrap().len(), 34); cpuid_checked = true; } } } // Check that bitmaps for MSRs are 64-bits in width if let Some(msr_modifiers_root) = json_tree.get("msr_modifiers") { let msr_mod_node = &msr_modifiers_root.as_array().unwrap()[0]; if let Some(bit_map_str) = msr_mod_node.get("bitmap") { // 64-bit width with a "0b" prefix for binary-formatted numbers assert_eq!(bit_map_str.as_str().unwrap().len(), 66); assert!(bit_map_str.as_str().unwrap().starts_with("0b")); msr_checked = true; } } assert!( cpuid_checked, "CPUID bitmap width in a x86_64 template was not tested." ); assert!( msr_checked, "MSR bitmap width in a x86_64 template was not tested." ); } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 /// Module for CPUID instruction related content pub mod cpuid; /// Module for custom CPU templates pub mod custom_cpu_template; /// Module for static CPU templates pub mod static_cpu_templates; /// Module with test utils for custom CPU templates pub mod test_utils; use std::collections::BTreeMap; use kvm_bindings::CpuId; use self::custom_cpu_template::CpuidRegister; use super::templates::CustomCpuTemplate; use crate::Vcpu; use crate::cpu_config::x86_64::cpuid::{Cpuid, CpuidKey}; /// Errors thrown while configuring templates. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum CpuConfigurationError { /// Template changes a CPUID entry not supported by KVM: Leaf: {0:0x}, Subleaf: {1:0x} CpuidFeatureNotSupported(u32, u32), /// Template changes an MSR entry not supported by KVM: Register Address: {0:0x} MsrNotSupported(u32), /// Can create cpuid from raw: {0} CpuidFromKvmCpuid(#[from] crate::cpu_config::x86_64::cpuid::CpuidTryFromKvmCpuid), /// KVM vcpu ioctl failed: {0} VcpuIoctl(#[from] crate::vstate::vcpu::KvmVcpuError), } /// CPU configuration for x86_64 CPUs #[derive(Debug, Clone, PartialEq)] pub struct CpuConfiguration { /// CPUID configuration pub cpuid: Cpuid, /// Register values as a key pair for model specific registers /// Key: MSR address /// Value: MSR value pub msrs: BTreeMap, } impl CpuConfiguration { /// Create new CpuConfiguration. pub fn new( supported_cpuid: CpuId, cpu_template: &CustomCpuTemplate, first_vcpu: &Vcpu, ) -> Result { let cpuid = cpuid::Cpuid::try_from(supported_cpuid)?; let msrs = first_vcpu .kvm_vcpu .get_msrs(cpu_template.msr_index_iter())?; Ok(CpuConfiguration { cpuid, msrs }) } /// Modifies provided config with changes from template pub fn apply_template( self, template: &CustomCpuTemplate, ) -> Result { let Self { mut cpuid, mut msrs, } = self; let guest_cpuid = cpuid.inner_mut(); // Apply CPUID modifiers for mod_leaf in template.cpuid_modifiers.iter() { let cpuid_key = CpuidKey { leaf: mod_leaf.leaf, subleaf: mod_leaf.subleaf, }; if let Some(entry) = guest_cpuid.get_mut(&cpuid_key) { entry.flags = mod_leaf.flags; // Can we modify one reg multiple times???? for mod_reg in &mod_leaf.modifiers { match mod_reg.register { CpuidRegister::Eax => { entry.result.eax = mod_reg.bitmap.apply(entry.result.eax) } CpuidRegister::Ebx => { entry.result.ebx = mod_reg.bitmap.apply(entry.result.ebx) } CpuidRegister::Ecx => { entry.result.ecx = mod_reg.bitmap.apply(entry.result.ecx) } CpuidRegister::Edx => { entry.result.edx = mod_reg.bitmap.apply(entry.result.edx) } } } } else { return Err(CpuConfigurationError::CpuidFeatureNotSupported( cpuid_key.leaf, cpuid_key.subleaf, )); } } for modifier in &template.msr_modifiers { if let Some(reg_value) = msrs.get_mut(&modifier.addr) { *reg_value = modifier.bitmap.apply(*reg_value); } else { return Err(CpuConfigurationError::MsrNotSupported(modifier.addr)); } } Ok(Self { cpuid, msrs }) } } #[cfg(test)] mod tests { use std::collections::BTreeMap; use kvm_bindings::KVM_CPUID_FLAG_STATEFUL_FUNC; use super::custom_cpu_template::{CpuidLeafModifier, CpuidRegisterModifier, RegisterModifier}; use super::*; use crate::cpu_config::templates::RegisterValueFilter; use crate::cpu_config::x86_64::cpuid::{CpuidEntry, IntelCpuid, KvmCpuidFlags}; fn build_test_template() -> CustomCpuTemplate { CustomCpuTemplate { cpuid_modifiers: vec![CpuidLeafModifier { leaf: 0x3, subleaf: 0x0, flags: KvmCpuidFlags(KVM_CPUID_FLAG_STATEFUL_FUNC), modifiers: vec![ CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0111, value: 0b0101, }, }, CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b0111, value: 0b0100, }, }, CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0111, value: 0b0111, }, }, CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0111, value: 0b0001, }, }, ], }], msr_modifiers: vec![ RegisterModifier { addr: 0x9999, bitmap: RegisterValueFilter { filter: 0, value: 0, }, }, RegisterModifier { addr: 0x8000, bitmap: RegisterValueFilter { filter: 0, value: 0, }, }, ], ..Default::default() } } fn build_supported_cpuid() -> Cpuid { Cpuid::Intel(IntelCpuid(BTreeMap::from([( CpuidKey { leaf: 0x3, subleaf: 0x0, }, CpuidEntry::default(), )]))) } fn empty_cpu_config() -> CpuConfiguration { CpuConfiguration { cpuid: Cpuid::Intel(IntelCpuid(BTreeMap::new())), msrs: Default::default(), } } fn supported_cpu_config() -> CpuConfiguration { CpuConfiguration { cpuid: build_supported_cpuid(), msrs: BTreeMap::from([(0x8000, 0b1000), (0x9999, 0b1010)]), } } fn unsupported_cpu_config() -> CpuConfiguration { CpuConfiguration { cpuid: build_supported_cpuid(), msrs: BTreeMap::from([(0x8000, 0b1000), (0x8001, 0b1010)]), } } #[test] fn test_empty_template() { let host_configuration = empty_cpu_config(); let cpu_config_result = host_configuration .clone() .apply_template(&CustomCpuTemplate::default()); assert!( cpu_config_result.is_ok(), "{}", cpu_config_result.unwrap_err() ); // CPUID will be comparable, but not MSRs. // The configuration will be configuration required by the template, // not a holistic view of all registers. assert_eq!(cpu_config_result.unwrap().cpuid, host_configuration.cpuid); } #[test] fn test_apply_template() { let host_configuration = supported_cpu_config(); let cpu_config_result = host_configuration .clone() .apply_template(&build_test_template()); assert!( cpu_config_result.is_ok(), "{}", cpu_config_result.unwrap_err() ); assert_ne!(cpu_config_result.unwrap(), host_configuration); } /// Invalid test in this context is when the template /// has modifiers for registers that are not supported. #[test] fn test_invalid_template() { // Test CPUID validation let host_configuration = empty_cpu_config(); let guest_template = build_test_template(); let cpu_config_result = host_configuration.apply_template(&guest_template); assert!( cpu_config_result.is_err(), "Expected an error as template should have failed to modify a CPUID entry that is not \ supported by host configuration", ); assert_eq!( cpu_config_result.unwrap_err(), CpuConfigurationError::CpuidFeatureNotSupported( guest_template.cpuid_modifiers[0].leaf, guest_template.cpuid_modifiers[0].subleaf ) ); // Test MSR validation let host_configuration = unsupported_cpu_config(); let guest_template = build_test_template(); let cpu_config_result = host_configuration.apply_template(&guest_template); assert!( cpu_config_result.is_err(), "Expected an error as template should have failed to modify an MSR value that is not \ supported by host configuration", ); assert_eq!( cpu_config_result.unwrap_err(), CpuConfigurationError::MsrNotSupported(guest_template.msr_modifiers[0].addr) ) } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/static_cpu_templates/c3.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use crate::cpu_config::templates::{CustomCpuTemplate, RegisterValueFilter}; use crate::cpu_config::x86_64::cpuid::KvmCpuidFlags; use crate::cpu_config::x86_64::custom_cpu_template::{ CpuidLeafModifier, CpuidRegister, CpuidRegisterModifier, }; /// C3 CPU template. /// /// Mask CPUID to make exposed CPU features as close as possbile to AWS C3 instance. /// /// CPUID dump taken in c3.large on 2023-06-15: /// ===== /// $ cpuid -1 -r /// Disclaimer: cpuid may not support decoding of all cpuid registers. /// CPU: /// 0x00000000 0x00: eax=0x0000000d ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69 /// 0x00000001 0x00: eax=0x000306e4 ebx=0x01020800 ecx=0xffba2203 edx=0x178bfbff /// 0x00000002 0x00: eax=0x76036301 ebx=0x00f0b2ff ecx=0x00000000 edx=0x00ca0000 /// 0x00000003 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x00000004 0x00: eax=0x00004121 ebx=0x01c0003f ecx=0x0000003f edx=0x00000000 /// 0x00000004 0x01: eax=0x00004122 ebx=0x01c0003f ecx=0x0000003f edx=0x00000000 /// 0x00000004 0x02: eax=0x00004143 ebx=0x01c0003f ecx=0x000001ff edx=0x00000000 /// 0x00000004 0x03: eax=0x00004163 ebx=0x04c0003f ecx=0x00004fff edx=0x00000006 /// 0x00000005 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x00000006 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x00000007 0x00: eax=0x00000000 ebx=0x00000281 ecx=0x00000000 edx=0x00000000 /// 0x00000008 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x00000009 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x0000000a 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x0000000b 0x00: eax=0x00000001 ebx=0x00000002 ecx=0x00000100 edx=0x00000000 /// 0x0000000b 0x01: eax=0x00000005 ebx=0x00000001 ecx=0x00000201 edx=0x00000000 /// 0x0000000c 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x0000000d 0x00: eax=0x00000007 ebx=0x00000340 ecx=0x00000340 edx=0x00000000 /// 0x0000000d 0x01: eax=0x00000001 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x0000000d 0x02: eax=0x00000100 ebx=0x00000240 ecx=0x00000000 edx=0x00000000 /// 0x40000000 0x00: eax=0x40000005 ebx=0x566e6558 ecx=0x65584d4d edx=0x4d4d566e /// 0x40000001 0x00: eax=0x0004000b ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x40000002 0x00: eax=0x00000001 ebx=0x40000000 ecx=0x00000000 edx=0x00000000 /// 0x40000003 0x00: eax=0x00000006 ebx=0x00000002 ecx=0x002a9f50 edx=0x00000001 /// 0x40000003 0x02: eax=0x1387329d ebx=0x00f6b809 ecx=0xb74bc70a edx=0xffffffff /// 0x40000004 0x00: eax=0x0000001c ebx=0x00000000 ecx=0x00002b86 edx=0x00000000 /// 0x40000005 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x80000000 0x00: eax=0x80000008 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x80000001 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000001 edx=0x28100800 /// 0x80000002 0x00: eax=0x20202020 ebx=0x6e492020 ecx=0x286c6574 edx=0x58202952 /// 0x80000003 0x00: eax=0x286e6f65 ebx=0x43202952 ecx=0x45205550 edx=0x36322d35 /// 0x80000004 0x00: eax=0x76203038 ebx=0x20402032 ecx=0x30382e32 edx=0x007a4847 /// 0x80000005 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x80000006 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x01006040 edx=0x00000000 /// 0x80000007 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x80000008 0x00: eax=0x0000302e ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x80860000 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0xc0000000 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// ===== /// /// References: /// - Intel SDM: #[allow(clippy::unusual_byte_groupings)] pub fn c3() -> CustomCpuTemplate { CustomCpuTemplate { cpuid_modifiers: vec![ CpuidLeafModifier { leaf: 0x1, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // EAX: Version Information // - Bits 03-00: Stepping ID. // - Bits 07-04: Model. // - Bits 11-08: Family. // - Bits 13-12: Processor Type. // - Bits 19-16: Extended Model ID. // - Bits 27-20: Extended Family ID. CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_11111111_1111_00_11_1111_1111_1111, value: 0b0000_00000000_0011_00_00_0110_1110_0100, }, }, // ECX: Feature Information // - Bit 02: DTES64 // - Bit 03: MONITOR // - Bit 04: DS-CPL // - Bit 05: VMX // - Bit 08: TM2 // - Bit 10: CNXT-ID // - Bit 11: SDBG // - Bit 12: FMA // - Bit 14: xTPR Update Control // - Bit 15: PDCM // - Bit 22: MOVBE CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0100_0000_1101_1101_0011_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: Feature Information // - Bit 07: MCE // - Bit 12: MTRR // - Bit 18: PSN // - Bit 21: DS // - Bit 22: ACPI // - Bit 27: SS // - Bit 29: TM // - Bit 31: PBE CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b1010_1000_0110_0100_0001_0000_1000_0000, value: 0b0000_0000_0000_0000_0001_0000_1000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x7, subleaf: 0x0, flags: KvmCpuidFlags(1), modifiers: vec![ // EBX: // - Bit 02: SGX // - Bit 03: BMI1 // - Bit 04: HLE // - Bit 05: AVX2 // - Bit 08: BMI2 // - Bit 10: INVPCID // - Bit 11: RTM // - Bit 12: RDT-M // - Bit 14: MPX // - Bit 15: RDT-A // - Bit 16: AVX512F // - Bit 17: AVX512DQ // - Bit 18: RDSEED // - Bit 19: ADX // - Bit 21: AVX512_IFMA // - Bit 23: CLFLUSHOPT // - Bit 24: CLWB // - Bit 25: Intel Processor Trace // - Bit 26: AVX512PF // - Bit 27: AVX512ER // - Bit 28: AVX512CD // - Bit 29: SHA // - Bit 30: AVX512BW // - Bit 31: AVX512VL CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b1111_1111_1010_1111_1101_1101_0011_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // ECX: // - Bit 01: AVX512_VBMI // - Bit 02: UMIP // - Bit 03: PKU // - Bit 04: OSPKE // - Bit 11: AVX512_VNNI // - Bit 14: AVX512_VPOPCNTDQ // - Bit 16: LA57 // - Bit 22: RDPID // - Bit 30: SGX_LC CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0100_0000_0100_0001_0100_1000_0001_1110, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: // - Bit 02: AVX512_4VNNIW // - Bit 03: AVX512_4FMAPS CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0000_0000_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0xd, subleaf: 0x0, flags: KvmCpuidFlags(1), modifiers: vec![ // EAX: // - Bits 04-03: MPX state // - Bits 07-05: AVX-512 state // - Bit 09: PKRU state CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_00_1_0_111_11_000, value: 0b0000_0000_0000_0000_0000_00_0_0_000_00_000, }, }, ], }, CpuidLeafModifier { leaf: 0xd, subleaf: 0x1, flags: KvmCpuidFlags(1), modifiers: vec![ // EAX: // - Bit 01: Supports XSAVEC and the compacted form of XRSTOR // - Bit 02: Supports XGETBV // - Bit 03: Supports XSAVES/XRSTORS and IA32_XSS CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0000_0000_1110, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x80000001, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // ECX: // - Bit 05: LZCNT // - Bit 08: PREFETCHW CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0001_0010_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: // - Bit 26: 1-GByte pages CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0000_0100_0000_0000_0000_0000_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, ], msr_modifiers: vec![], ..Default::default() } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/static_cpu_templates/mod.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use derive_more::Display; use serde::{Deserialize, Serialize}; use crate::arch::x86_64::cpu_model::{ CASCADE_LAKE_FMS, CpuModel, ICE_LAKE_FMS, MILAN_FMS, SKYLAKE_FMS, }; use crate::cpu_config::x86_64::cpuid::{VENDOR_ID_AMD, VENDOR_ID_INTEL}; /// Module with C3 CPU template for x86_64 pub mod c3; /// Module with T2 CPU template for x86_64 pub mod t2; /// Module with T2A CPU template for x86_64 pub mod t2a; /// Module with T2CL CPU template for x86_64 pub mod t2cl; /// Module with T2S CPU template for x86_64 pub mod t2s; /// Template types available for configuring the x86 CPU features that map /// to EC2 instances. #[derive(Debug, Default, Display, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub enum StaticCpuTemplate { /// C3 Template. #[display("C3")] C3, /// T2 Template. #[display("T2")] T2, /// T2S Template. #[display("T2S")] T2S, /// No CPU template is used. #[default] #[display("None")] None, /// T2CL Template. #[display("T2CL")] T2CL, /// T2A Template. #[display("T2A")] T2A, } impl StaticCpuTemplate { /// Check if no template specified pub fn is_none(&self) -> bool { self == &StaticCpuTemplate::None } /// Return the supported vendor for the CPU template. pub fn get_supported_vendor(&self) -> &'static [u8; 12] { match self { StaticCpuTemplate::C3 => VENDOR_ID_INTEL, StaticCpuTemplate::T2 => VENDOR_ID_INTEL, StaticCpuTemplate::T2S => VENDOR_ID_INTEL, StaticCpuTemplate::T2CL => VENDOR_ID_INTEL, StaticCpuTemplate::T2A => VENDOR_ID_AMD, StaticCpuTemplate::None => unreachable!(), // Should be handled in advance } } /// Return supported CPU models for the CPU template. pub fn get_supported_cpu_models(&self) -> &'static [CpuModel] { match self { StaticCpuTemplate::C3 => &[SKYLAKE_FMS, CASCADE_LAKE_FMS, ICE_LAKE_FMS], StaticCpuTemplate::T2 => &[SKYLAKE_FMS, CASCADE_LAKE_FMS, ICE_LAKE_FMS], StaticCpuTemplate::T2S => &[SKYLAKE_FMS, CASCADE_LAKE_FMS], StaticCpuTemplate::T2CL => &[CASCADE_LAKE_FMS, ICE_LAKE_FMS], StaticCpuTemplate::T2A => &[MILAN_FMS], StaticCpuTemplate::None => unreachable!(), // Should be handled in advance } } } #[cfg(test)] mod tests { use super::*; use crate::cpu_config::test_utils::get_json_template; #[test] fn verify_consistency_with_json_templates() { let static_templates = [ (c3::c3(), "C3.json"), (t2::t2(), "T2.json"), (t2s::t2s(), "T2S.json"), (t2cl::t2cl(), "T2CL.json"), (t2a::t2a(), "T2A.json"), ]; for (hardcoded_template, filename) in static_templates { let json_template = get_json_template(filename); assert_eq!(hardcoded_template, json_template); } } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/static_cpu_templates/t2.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use crate::cpu_config::templates::{CustomCpuTemplate, RegisterValueFilter}; use crate::cpu_config::x86_64::cpuid::KvmCpuidFlags; use crate::cpu_config::x86_64::custom_cpu_template::{ CpuidLeafModifier, CpuidRegister, CpuidRegisterModifier, }; /// T2 template /// /// Mask CPUID to make exposed CPU features as close as possbile to AWS T2 instance. /// /// CPUID dump taken in t2.micro on 2023-06-15: /// ===== /// $ cpuid -1 -r /// Disclaimer: cpuid may not support decoding of all cpuid registers. /// CPU: /// 0x00000000 0x00: eax=0x0000000d ebx=0x756e6547 ecx=0x6c65746e edx=0x49656e69 /// 0x00000001 0x00: eax=0x000306f2 ebx=0x00010800 ecx=0xfffa3203 edx=0x178bfbff /// 0x00000002 0x00: eax=0x76036301 ebx=0x00f0b5ff ecx=0x00000000 edx=0x00c10000 /// 0x00000003 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x00000004 0x00: eax=0x00004121 ebx=0x01c0003f ecx=0x0000003f edx=0x00000000 /// 0x00000004 0x01: eax=0x00004122 ebx=0x01c0003f ecx=0x0000003f edx=0x00000000 /// 0x00000004 0x02: eax=0x00004143 ebx=0x01c0003f ecx=0x000001ff edx=0x00000000 /// 0x00000004 0x03: eax=0x0007c163 ebx=0x04c0003f ecx=0x00005fff edx=0x00000006 /// 0x00000005 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x00000006 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x00000007 0x00: eax=0x00000000 ebx=0x000007a9 ecx=0x00000000 edx=0x00000000 /// 0x00000008 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x00000009 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x0000000a 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x0000000b 0x00: eax=0x00000001 ebx=0x00000001 ecx=0x00000100 edx=0x00000000 /// 0x0000000b 0x01: eax=0x00000005 ebx=0x00000001 ecx=0x00000201 edx=0x00000000 /// 0x0000000c 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x0000000d 0x00: eax=0x00000007 ebx=0x00000340 ecx=0x00000340 edx=0x00000000 /// 0x0000000d 0x01: eax=0x00000001 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x0000000d 0x02: eax=0x00000100 ebx=0x00000240 ecx=0x00000000 edx=0x00000000 /// 0x40000000 0x00: eax=0x40000005 ebx=0x566e6558 ecx=0x65584d4d edx=0x4d4d566e /// 0x40000001 0x00: eax=0x0004000b ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x40000002 0x00: eax=0x00000001 ebx=0x40000000 ecx=0x00000000 edx=0x00000000 /// 0x40000003 0x00: eax=0x00000006 ebx=0x00000002 ecx=0x00249f0a edx=0x00000001 /// 0x40000003 0x02: eax=0x9b842c23 ebx=0x007c8980 ecx=0xd5551b14 edx=0xffffffff /// 0x40000004 0x00: eax=0x0000001c ebx=0x00000000 ecx=0x0000762b edx=0x00000000 /// 0x40000005 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x80000000 0x00: eax=0x80000008 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x80000001 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000021 edx=0x28100800 /// 0x80000002 0x00: eax=0x65746e49 ebx=0x2952286c ecx=0x6f655820 edx=0x2952286e /// 0x80000003 0x00: eax=0x55504320 ebx=0x2d354520 ecx=0x36373632 edx=0x20337620 /// 0x80000004 0x00: eax=0x2e322040 ebx=0x48473034 ecx=0x0000007a edx=0x00000000 /// 0x80000005 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x80000006 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x01006040 edx=0x00000000 /// 0x80000007 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x80000008 0x00: eax=0x0000302e ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0x80860000 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// 0xc0000000 0x00: eax=0x00000000 ebx=0x00000000 ecx=0x00000000 edx=0x00000000 /// ===== /// /// References: /// - Intel SDM: #[allow(clippy::unusual_byte_groupings)] pub fn t2() -> CustomCpuTemplate { CustomCpuTemplate { cpuid_modifiers: vec![ CpuidLeafModifier { leaf: 0x1, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // EAX: Version Information // - Bits 03-00: Stepping ID. // - Bits 07-04: Model. // - Bits 11-08: Family. // - Bits 13-12: Processor Type. // - Bits 19-16: Extended Model ID. // - Bits 27-20: Extended Family ID. CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_11111111_1111_00_11_1111_1111_1111, value: 0b0000_00000000_0011_00_00_0110_1111_0010, }, }, // ECX: Feature Information // - Bit 02: DTES64 // - Bit 03: MONITOR // - Bit 04: DS-CPL // - Bit 05: VMX // - Bit 06: SMX // - Bit 07: EIST // - Bit 08: TM2 // - Bit 10: CNXT-ID // - Bit 11: SDBG // - Bit 14: xTPR Update Control // - Bit 15: PDCM // - Bit 18: DCA CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0100_1100_1101_1111_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: Feature Information // - Bit 07: MCE // - Bit 12: MTRR // - Bit 18: PSN // - Bit 21: DS // - Bit 22: ACPI // - Bit 27: SS // - Bit 29: TM // - Bit 30: IA-64 (deprecated) https://www.intel.com/content/dam/www/public/us/en/documents/manuals/itanium-architecture-vol-4-manual.pdf // - Bit 31: PBE CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b1110_1000_0110_0100_0001_0000_1000_0000, value: 0b0000_0000_0000_0000_0001_0000_1000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x7, subleaf: 0x0, flags: KvmCpuidFlags(1), modifiers: vec![ // EBX: // - Bit 02: SGX // - Bit 04: HLE // - Bit 09: Enhanced REP MOVSB/STOSB // - Bit 11: RTM // - Bit 12: RDT-M // - Bit 14: MPX // - Bit 15: RDT-A // - Bit 16: AVX512F // - Bit 17: AVX512DQ // - Bit 18: RDSEED // - Bit 19: ADX // - Bit 21: AVX512_IFMA // - Bit 22: PCOMMIT (deprecated) https://www.intel.com/content/www/us/en/developer/articles/technical/deprecate-pcommit-instruction.html // - Bit 23: CLFLUSHOPT // - Bit 24: CLWB // - Bit 25: Intel Processor Trace // - Bit 26: AVX512PF // - Bit 27: AVX512ER // - Bit 28: AVX512CD // - Bit 29: SHA // - Bit 30: AVX512BW // - Bit 31: AVX512VL CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b1111_1111_1110_1111_1101_1010_0001_0100, value: 0b0000_0000_0000_0000_0000_0010_0000_0000, }, }, // ECX: // - Bit 01: AVX512_VBMI // - Bit 02: UMIP // - Bit 03: PKU // - Bit 04: OSPKE // - Bit 06: AVX512_VBMI2 // - Bit 08: GFNI // - Bit 09: VAES // - Bit 10: VPCLMULQDQ // - Bit 11: AVX512_VNNI // - Bit 12: AVX512_BITALG // - Bit 14: AVX512_VPOPCNTDQ // - Bit 16: LA57 // - Bit 22: RDPID // - Bit 30: SGX_LC CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0100_0000_0100_0001_0101_1111_0101_1110, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: // - Bit 02: AVX512_4VNNIW // - Bit 03: AVX512_4FMAPS // - Bit 04: Fast Short REP MOV // - Bit 08: AVX512_VP2INTERSECT CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0001_0001_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0xd, subleaf: 0x0, flags: KvmCpuidFlags(1), modifiers: vec![ // EAX: // - Bits 04-03: MPX state // - Bits 07-05: AVX-512 state // - Bit 09: PKRU state CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_00_1_0_111_11_000, value: 0b0000_0000_0000_0000_0000_00_0_0_000_00_000, }, }, ], }, CpuidLeafModifier { leaf: 0xd, subleaf: 0x1, flags: KvmCpuidFlags(1), modifiers: vec![ // EAX: // - Bit 01: Supports XSAVEC and the compacted form of XRSTOR // - Bit 02: Supports XGETBV // - Bit 03: Supports XSAVES/XRSTORS and IA32_XSS CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0000_0000_1110, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x80000001, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // ECX: // - Bit 08: PREFETCHW // - Bit 29: MONITORX and MWAITX CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0010_0000_0000_0000_0000_0001_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: // - Bit 26: 1-GByte pages CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0000_0100_0000_0000_0000_0000_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x80000008, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // EBX: // - Bit 09: WBNOINVD CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0010_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, ], msr_modifiers: vec![], ..Default::default() } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/static_cpu_templates/t2a.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use crate::cpu_config::templates::{CustomCpuTemplate, RegisterValueFilter}; use crate::cpu_config::x86_64::cpuid::KvmCpuidFlags; use crate::cpu_config::x86_64::custom_cpu_template::{ CpuidLeafModifier, CpuidRegister, CpuidRegisterModifier, }; /// T2A template /// /// Provide instruction set feature partity with Intel Cascade Lake or later using T2CL template. /// /// References: /// - Intel SDM: /// - AMD APM: /// - CPUID Enumeration and Architectural MSRs: #[allow(clippy::unusual_byte_groupings)] pub fn t2a() -> CustomCpuTemplate { CustomCpuTemplate { cpuid_modifiers: vec![ CpuidLeafModifier { leaf: 0x1, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // EAX: Version Information // - Bits 03-00: Stepping (AMD APM) / Stepping ID (Intel SDM) // - Bits 07-04: BaseModel (AMD APM) / Model (Intel SDM) // - Bits 11-08: BaseFamily (AMD APM) / Family (Intel SDM) // - Bits 13-12: Reserved (AMD APM) / Processor Type (Intel SDM) // - Bits 19-16: ExtModel (AMD APM) / Extended Model ID (Intel SDM) // - Bits 27-20: ExtFamily (AMD APM) / Extended Family ID (Intel SDM) CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_11111111_1111_00_11_1111_1111_1111, value: 0b0000_00000000_0011_00_00_0110_1111_0010, }, }, // ECX: Feature Information // - Bit 02: Reserved (AMD APM) / DTES64 (Intel SDM) // - Bit 03: MONITOR (AMD APM) / MONITOR (Intel SDM) // - Bit 04: Reserved (AMD APM) / DS-CPL (Intel SDM) // - Bit 05: Reserved (AMD APM) / VMX (Intel SDM) // - Bit 06: Reserved (AMD APM) / SMX (Intel SDM) // - Bit 07: Reserved (AMD APM) / EIST (Intel SDM) // - Bit 08: Reserved (AMD APM) / TM2 (Intel SDM) // - Bit 10: Reserved (AMD APM) / CNXT-ID (Intel SDM) // - Bit 11: Reserved (AMD APM) / SDBG (Intel SDM) // - Bit 14: Reserved (AMD APM) / xTPR Update Control (Intel SDM) // - Bit 15: Reserved (AMD APM) / PDCM (Intel SDM) // - Bit 18: Reserevd (AMD APM) / DCA (Intel SDM) CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0100_1100_1101_1111_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: Feature Information // - Bit 07: MCE (AMD APM) / MCE (Intel SDM) // - Bit 12: MTRR (AMD APM) / MTRR (Intel SDM) // - Bit 18: Reserved (AMD APM) / PSN (Intel SDM) // - Bit 21: Reserved (AMD APM) / DS (Intel SDM) // - Bit 22: Reserved (AMD APM) / ACPI (Intel SDM) // - Bit 27: Reserved (AMD APM) / SS (Intel SDM) // - Bit 29: Reserved (AMD APM) / TM (Intel SDM) // - Bit 30: Reserved (AMD APM) / IA-64 (deprecated) https://www.intel.com/content/dam/www/public/us/en/documents/manuals/itanium-architecture-vol-4-manual.pdf // - Bit 31: Reserved (AMD APM) / PBE (Intel SDM) CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b1110_1000_0110_0100_0001_0000_1000_0000, value: 0b0000_0000_0000_0000_0001_0000_1000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x7, subleaf: 0x0, flags: KvmCpuidFlags(1), modifiers: vec![ // EBX: // - Bit 02: Reserved (AMD APM) / SGX (Intel SDM) // - Bit 04: Reserved (AMD APM) / HLE (Intel SDM) // - Bit 09: Reserved (AMD APM) / Enhanced REP MOVSB/STOSB (Intel SDM) // - Bit 11: Reserved (AMD APM) / RTM (Intel SDM) // - Bit 12: PQM (AMD APM) / RDT-M (Intel SDM) // - Bit 14: Reserved (AMD APM) / MPX (Intel SDM) // - Bit 15: PQE (AMD APM) / RDT-A (Intel SDM) // - Bit 16: Reserved (AMD APM) / AVX512F (Intel SDM) // - Bit 17: Reserved (AMD APM) / AVX512DQ (Intel SDM) // - Bit 18: RDSEED (AMD APM) / RDSEED (Intel SDM) // - Bit 19: ADX (AMD APM) / ADX (Intel SDM) // - Bit 21: Reserved (AMD APM) / AVX512_IFMA (Intel SDM) // - Bit 22: RDPID (AMD APM) / Reserved (Intel SDM) // On kernel codebase and Intel SDM, RDPID is enumerated at CPUID.07h:ECX.RDPID[bit 22]. // https://elixir.bootlin.com/linux/v6.3.8/source/arch/x86/include/asm/cpufeatures.h#L389 // - Bit 23: CLFLUSHOPT (AMD APM) / CLFLUSHOPT (Intel SDM) // - Bit 24: CLWB (AMD APM) / CLWB (Intel SDM) // - Bit 25: Reserved (AMD APM) / Intel Processor Trace (Intel SDM) // - Bit 26: Reserved (AMD APM) / AVX512PF (Intel SDM) // - Bit 27: Reserved (AMD APM) / AVX512ER (Intel SDM) // - Bit 28: Reserved (AMD APM) / AVX512CD (Intel SDM) // - Bit 29: SHA (AMD APM) / SHA (Intel SDM) // - Bit 30: Reserved (AMD APM) / AVX512BW (Intel SDM) // - Bit 31: Reserved (AMD APM) / AVX512VL (Intel SDM) CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b1111_1111_1110_1111_1101_1010_0001_0100, value: 0b0000_0000_0000_0000_0000_0010_0000_0000, }, }, // ECX: // - Bit 01: Reserved (AMD APM) / AVX512_VBMI (Intel SDM) // - Bit 02: UMIP (AMD APM) / UMIP (Intel SDM) // - Bit 03: PKU (AMD APM) / PKU (Intel SDM) // - Bit 04: OSPKE (AMD APM) / OSPKE (Intel SDM) // - Bit 06: Reserved (AMD APM) / AVX512_VBMI2 (Intel SDM) // - Bit 08: Reserved (AMD APM) / GFNI (Intel SDM) // - Bit 09: VAES (AMD APM) / VAES (Intel SDM) // - Bit 10: VPCLMULQDQ (AMD APM) / VPCLMULQDQ (Intel SDM) // - Bit 11: Reserved (AMD APM) / AVX512_VNNI (Intel SDM) // - Bit 12: Reserved (AMD APM) / AVX512_BITALG (Intel SDM) // - Bit 14: Reserved (AMD APM) / AVX512_VPOPCNTDQ (Intel SDM) // - Bit 16: LA57 (AMD APM) / LA57 (Intel SDM) // - Bit 22: Reserved (AMD APM) / RDPID and IA32_TSC_AUX (Intel SDM) // - Bit 30: Reserved (AMD APM) / SGX_LC (Intel SDM) CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0100_0000_0100_0001_0101_1111_0101_1110, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: // - Bit 02: Reserved (AMD APM) / AVX512_4VNNIW (Intel SDM) // - Bit 03: Reserved (AMD APM) / AVX512_4FMAPS (Intel SDM) // - Bit 04: Reserved (AMD APM) / Fast Short REP MOV (Intel SDM) // - Bit 08: Reserved (AMD APM) / AVX512_VP2INTERSECT (Intel SDM) CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0001_0001_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0xd, subleaf: 0x0, flags: KvmCpuidFlags(1), modifiers: vec![ // EAX: // - Bits 04-03: Reserved (AMD APM) / MPX state (Intel SDM) // - Bits 07-05: Reserved (AMD APM) / AVX-512 state (Intel SDM) // - Bit 09: MPK (AMD APM) / PKRU state (Intel SDM) CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_00_1_0_111_11_000, value: 0b0000_0000_0000_0000_0000_00_0_0_000_00_000, }, }, ], }, CpuidLeafModifier { leaf: 0xd, subleaf: 0x1, flags: KvmCpuidFlags(1), modifiers: vec![ // EAX: // - Bit 01: XSAVEC (AMD APM) / Supports XSAVEC and the compacted form of // XRSTOR (Intel SDM) // - Bit 02: XGETBV (AMD APM) / Supports XGETBV (Intel SDM) // - Bit 03: XSAVES (AMD APM) / Supports XSAVES/XRSTORS and IA32_XSS (Intel // SDM) CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0000_0000_1110, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x80000001, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // ECX: // - Bit 02: SVM (AMD APM) / Reserved (Intel SDM) // - Bit 06: SSE4A (AMD APM) / Reserved (Intel SDM) // - Bit 07: MisAlignSse (AMD APM) / Reserved (Intel SDM) // - Bit 08: 3DNowPrefetch (AMD APM) / PREFETCHW (Intel SDM) // - Bit 29: MONITORX (AMD APM) / MONITORX and MWAITX (Intel SDM) CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0010_0000_0000_0000_0000_0001_1100_0100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: // - Bit 22: MmxExt (AMD APM) / Reserved (Intel SDM) // - Bit 25: FFXSR (AMD APM) / Reserved (Intel SDM) // - Bit 26: Page1GB (AMD APM) / 1-GByte pages (Intel SDM) CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0000_0110_0100_0000_0000_0000_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x80000008, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // EBX: // - Bit 00: CLZERO (AMD APM) / Reserved (Intel SDM) // - Bit 02: RstrFpErrPtrs (AMD APM) / Reserved (Intel SDM) // - Bit 09: WBNOINVD (AMD APM) / WBNOINVD (Intel SDM) // - Bit 18: IbrsPreferred (ADM APM) / Reserved (Intel SDm) // - Bit 19: IbrsSameMode (AMD APM) / Reserved (Intel SDM) // - Bit 20: EferLmsleUnsupported (AMD APM) / Reserved (Intel SDM) CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0001_1100_0000_0010_0000_0101, value: 0b0000_0000_0001_1100_0000_0000_0000_0100, }, }, ], }, ], msr_modifiers: vec![], ..Default::default() } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/static_cpu_templates/t2cl.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use crate::cpu_config::templates::{CustomCpuTemplate, RegisterValueFilter}; use crate::cpu_config::x86_64::cpuid::KvmCpuidFlags; use crate::cpu_config::x86_64::custom_cpu_template::{ CpuidLeafModifier, CpuidRegister, CpuidRegisterModifier, RegisterModifier, }; /// T2CL template /// /// Mask CPUID to make exposed CPU features as close as possbile to Intel Cascade Lake and provide /// instruction set feature partity with AMD Milan using T2A template. /// /// References: /// - Intel SDM: /// - AMD APM: /// - CPUID Enumeration and Architectural MSRs: #[allow(clippy::unusual_byte_groupings)] pub fn t2cl() -> CustomCpuTemplate { CustomCpuTemplate { cpuid_modifiers: vec![ CpuidLeafModifier { leaf: 0x1, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // EAX: Version Information // - Bits 03-00: Stepping ID (Intel SDM) / Stepping (AMD APM) // - Bits 07-04: Model (Intel SDM) / BaseModel (AMD APM) // - Bits 11-08: Family (Intel SDM) / BaseFamily (AMD APM) // - Bits 13-12: Processor Type (Intel SDM) / Reserved (AMD APM) // - Bits 19-16: Extended Model ID (Intel SDM) / ExtModel (AMD APM) // - Bits 27-20: Extended Family ID (Intel SDM) / ExtFamily (AMD APM) CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_11111111_1111_00_11_1111_1111_1111, value: 0b0000_00000000_0011_00_00_0110_1111_0010, }, }, // ECX: Feature Information // - Bit 02: DTES64 (Intel SDM) / Reserved (AMD APM) // - Bit 03: MONITOR (Intel SDM) / MONITOR (AMD APM) // - Bit 04: DS-CPL (Intel SDM) / Reserved (AMD APM) // - Bit 05: VMX (Intel SDM) / Reserved (AMD APM) // - Bit 06: SMX (Intel SDM) / Reserved (AMD APM) // - Bit 07: EIST (Intel SDM) / Reserved (AMD APM) // - Bit 08: TM2 (Intel SDM) / Reserved (AMD APM) // - Bit 10: CNXT-ID (Intel SDM) / Reserved (AMD APM) // - Bit 11: SDBG (Intel SDM) / Reserved (AMD APM) // - Bit 14: xTPR Update Control (Intel SDM) / Reserved (AMD APM) // - Bit 15: PDCM (Intel SDM) / Reserved (AMD APM) // - Bit 18: DCA (Intel SDM) / Reserevd (AMD APM) CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0100_1100_1101_1111_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: Feature Information // - Bit 07: MCE (Intel SDM) / MCE (AMD APM) // - Bit 12: MTRR (Intel SDM) / MTRR (AMD APM) // - Bit 18: PSN (Intel SDM) / Reserved (AMD APM) // - Bit 21: DS (Intel SDM) / Reserved (AMD APM)PC // - Bit 22: ACPI (Intel SDM) / Reserved (AMD APM) // - Bit 27: SS (Intel SDM) / Reserved (AMD APM) // - Bit 29: TM (Intel SDM) / Reserved (AMD APM) // - Bit 30: IA64 (deprecated) / Reserved (AMD APM) https://www.intel.com/content/dam/www/public/us/en/documents/manuals/itanium-architecture-vol-4-manual.pdf // - Bit 31: PBE (Intel SDM) / Reserved (AMD APM) CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b1110_1000_0110_0100_0001_0000_1000_0000, value: 0b0000_0000_0000_0000_0001_0000_1000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x7, subleaf: 0x0, flags: KvmCpuidFlags(1), modifiers: vec![ // EBX: // - Bit 02: SGX (Intel SDM) / Reserved (AMD APM) // - Bit 04: HLE (Intel SDM) / Reserved (AMD APM) // - Bit 09: Enhanced REP MOVSB/STOSB (Intel SDM) / Reserved (AMD APM) // - Bit 11: RTM (Intel SDM) / Reserved (AMD APM) // - Bit 12: RDT-M (Intel SDM) / PQM (AMD APM) // - Bit 14: MPX (Intel SDM) / Reserved (AMD APM) // - Bit 15: RDT-A (Intel SDM) / PQE (AMD APM) // - Bit 16: AVX512F (Intel SDM) / Reserved (AMD APM) // - Bit 17: AVX512DQ (Intel SDM) / Reserved (AMD APM) // - Bit 18: RDSEED (Intel SDM) / RDSEED (AMD APM) // - Bit 19: ADX (Intel SDM) / ADX (AMD APM) // - Bit 21: AVX512_IFMA (Intel SDM) / Reserved (AMD APM) // - Bit 22: Reserved (Intel SDM) / RDPID (AMD APM) // On kernel codebase and Intel SDM, RDPID is enumerated at CPUID.07h:ECX.RDPID[bit 22]. // https://elixir.bootlin.com/linux/v6.3.8/source/arch/x86/include/asm/cpufeatures.h#L389 // - Bit 23: CLFLUSHOPT (Intel SDM) / CLFLUSHOPT (AMD APM) // - Bit 24: CLWB (Intel SDM) / CLWB (AMD APM) // - Bit 25: Intel Processor Trace (Intel SDM) / Reserved (AMD APM) // - Bit 26: AVX512PF (Intel SDM) / Reserved (AMD APM) // - Bit 27: AVX512ER (Intel SDM) / Reserved (AMD APM) // - Bit 28: AVX512CD (Intel SDM) / Reserved (AMD APM) // - Bit 29: SHA (Intel SDM) / SHA (AMD APM) // - Bit 30: AVX512BW (Intel SDM) / Reserved (AMD APM) // - Bit 31: AVX512VL (Intel SDM) / Reserved (AMD APM) CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b1111_1111_1110_1111_1101_1010_0001_0100, value: 0b0000_0000_0000_0000_0000_0010_0000_0000, }, }, // ECX: // - Bit 01: AVX512_VBMI (Intel SDM) / Reserved (AMD APM) // - Bit 02: UMIP (Intel SDM) / UMIP (AMD APM) // - Bit 03: PKU (Intel SDM) / PKU (AMD APM) // - Bit 04: OSPKE (Intel SDM) / OSPKE (AMD APM) // - Bit 06: AVX512_VBMI2 (Intel SDM) / Reserved (AMD APM) // - Bit 08: GFNI (Intel SDM) / Reserved (AMD APM) // - Bit 09: VAES (Intel SDM) / VAES (AMD APM) // - Bit 10: VPCLMULQDQ (Intel SDM) / VPCLMULQDQ (AMD APM) // - Bit 11: AVX512_VNNI (Intel SDM) / Reserved (AMD APM) // - Bit 12: AVX512_BITALG (Intel SDM) / Reserved (AMD APM) // - Bit 14: AVX512_VPOPCNTDQ (Intel SDM) / Reserved (AMD APM) // - Bit 16: LA57 (Intel SDM) / LA57 (AMD APM) // - Bit 22: RDPID and IA32_TSC_AUX (Intel SDM) / Reserved (AMD APM) // - Bit 30: SGX_LC (Intel SDM) / Reserved (AMD APM) CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0100_0000_0100_0001_0101_1111_0101_1110, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: // - Bit 02: AVX512_4VNNIW (Intel SDM) / Reserved (AMD APM) // - Bit 03: AVX512_4FMAPS (Intel SDM) / Reserved (AMD APM) // - Bit 04: Fast Short REP MOV (Intel SDM) / Reserved (AMD APM) // - Bit 08: AVX512_VP2INTERSECT (Intel SDM) / Reserved (AMD APM) CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0001_0001_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0xd, subleaf: 0x0, flags: KvmCpuidFlags(1), modifiers: vec![ // EAX: // - Bits 04-03: MPX state (Intel SDM) / Reserved (AMD APM) // - Bits 07-05: AVX-512 state (Intel SDM) / Reserved (AMD APM) // - Bit 09: PKRU state (Intel SDM) / MPK (AMD APM) CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_00_1_0_111_11_000, value: 0b0000_0000_0000_0000_0000_00_0_0_000_00_000, }, }, ], }, CpuidLeafModifier { leaf: 0xd, subleaf: 0x1, flags: KvmCpuidFlags(1), modifiers: vec![ // EAX: // - Bit 01: Supports XSAVEC and the compacted form of XRSTOR (Intel SDM) / // XSAVEC (AMD APM) // - Bit 02: Supports XGETBV (Intel SDM) / XGETBV (AMD APM) // - Bit 03: Supports XSAVES/XRSTORS and IA32_XSS (Intel SDM) / XSAVES (AMD // APM) CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0000_0000_1110, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x80000001, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // ECX: // - Bit 06: Reserved (Intel SDM) / SSE4A (AMD APM) // - Bit 07: Reserved (Intel SDM) / MisAlignSse (AMD APM) // - Bit 08: PREFETCHW (Intel SDM) / 3DNowPrefetch (AMD APM) // - Bit 29: MONITORX and MWAITX (Intel SDM) / MONITORX (AMD APM) CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0010_0000_0000_0000_0000_0001_1100_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: // - Bit 22: Reserved (Intel SDM) / MmxExt (AMD APM) // - Bit 23: Reserved (Intel SDM) / MMX (AMD APM) // - Bit 24: Reserved (Intel SDM) / FSXR (AMD APM) // - Bit 25: Reserved (Intel SDM) / FFXSR (AMD APM) // - Bit 26: 1-GByte pages (Intel SDM) / Page1GB (AMD APM) CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0000_0111_1100_0000_0000_0000_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x80000008, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // EBX: // - Bit 09: WBNOINVD (Intel SDM) / WBNOINVD (AMD APM) CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0010_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, ], msr_modifiers: vec![ // IA32_ARCH_CAPABILITIES: // - Bit 09: MCU_CONTROL // - Bit 10: MISC_PACKAGE_CTLS // - Bit 11: ENERGY_FILTERING_CTL // - Bit 12: DOITM // - Bit 16: Reserved // - Bit 18: FB_CLEAR_CTRL // - Bit 20: BHI_NO // - Bit 21: XAPIC_DISABLE_STATUS // - Bit 22: Reserved // - Bit 23: OVERCLOCKING_STATUS // - Bit 25: GDS_CTRL // - Bits 63-27: Reserved (Intel SDM) // // As T2CL template does not aim to provide an ability to migrate securely guests across // different processors, there is no need to mask hardware security mitigation bits off // only to make it appear to the guest as if it's running on the most vulnerable of the // supported processors. Guests might be able to benefit from performance improvements // by making the most use of available mitigations on the processor. Thus, T2CL template // passes through security mitigation bits that KVM thinks are able to be passed // through. The list of such bits are found in the following link. // https://elixir.bootlin.com/linux/v6.8.2/source/arch/x86/kvm/x86.c#L1621 // - Bit 00: RDCL_NO // - Bit 01: IBRS_ALL // - Bit 02: RSBA // - Bit 03: SKIP_L1DFL_VMENTRY // - Bit 04: SSB_NO // - Bit 05: MDS_NO // - Bit 06: IF_PSCHANGE_MC_NO // - Bit 07: TSX_CTRL // - Bit 08: TAA_NO // - Bit 13: SBDR_SSDP_NO // - Bit 14: FBSDP_NO // - Bit 15: PSDP_NO // - Bit 17: FB_CLEAR // - Bit 19: RRSBA // - Bit 24: PBRSB_NO // - Bit 26: GDS_NO // - Bit 27: RFDS_NO // - Bit 28: RFDS_CLEAR // // Note that this MSR is specific to Intel processors. RegisterModifier { addr: 0x10a, bitmap: RegisterValueFilter { filter: 0b1111_1111_1111_1111_1111_1111_1111_1111_1110_0010_1111_0101_0001_1110_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], ..Default::default() } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/static_cpu_templates/t2s.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use crate::cpu_config::templates::{CustomCpuTemplate, RegisterValueFilter}; use crate::cpu_config::x86_64::cpuid::KvmCpuidFlags; use crate::cpu_config::x86_64::custom_cpu_template::{ CpuidLeafModifier, CpuidRegister, CpuidRegisterModifier, RegisterModifier, }; /// T2S template /// /// Mask CPUID to make exposed CPU features as close as possbile to AWS T2 instance and allow /// migrating snapshots between hosts with Intel Skylake and Cascade Lake securely. /// /// Reference: /// - Intel SDM: /// - CPUID Enumeration and Architectural MSRs: #[allow(clippy::unusual_byte_groupings)] pub fn t2s() -> CustomCpuTemplate { CustomCpuTemplate { cpuid_modifiers: vec![ CpuidLeafModifier { leaf: 0x1, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // EAX: Version Information // - Bits 03-00: Stepping ID. // - Bits 07-04: Model. // - Bits 11-08: Family. // - Bits 13-12: Processor Type. // - Bits 19-16: Extended Model ID. // - Bits 27-20: Extended Family ID. CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_11111111_1111_00_11_1111_1111_1111, value: 0b0000_00000000_0011_00_00_0110_1111_0010, }, }, // ECX: Feature Information // - Bit 02: DTES64 // - Bit 03: MONITOR // - Bit 04: DS-CPL // - Bit 05: VMX // - Bit 06: SMX // - Bit 07: EIST // - Bit 08: TM2 // - Bit 10: CNXT-ID // - Bit 11: SDBG // - Bit 14: xTPR Update Control // - Bit 15: PDCM // - Bit 18: DCA CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0100_1100_1101_1111_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: Feature Information // - Bit 07: MCE // - Bit 12: MTRR // - Bit 18: PSN // - Bit 21: DS // - Bit 22: ACPI // - Bit 27: SS // - Bit 29: TM // - Bit 30: IA-64 (deprecated) https://www.intel.com/content/dam/www/public/us/en/documents/manuals/itanium-architecture-vol-4-manual.pdf // - Bit 31: PBE CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b1110_1000_0110_0100_0001_0000_1000_0000, value: 0b0000_0000_0000_0000_0001_0000_1000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x7, subleaf: 0x0, flags: KvmCpuidFlags(1), modifiers: vec![ // EBX: // - Bit 02: SGX // - Bit 04: HLE // - Bit 09: Enhanced REP MOVSB/STOSB // - Bit 11: RTM // - Bit 12: RDT-M // - Bit 14: MPX // - Bit 15: RDT-A // - Bit 16: AVX512F // - Bit 17: AVX512DQ // - Bit 18: RDSEED // - Bit 19: ADX // - Bit 21: AVX512_IFMA // - Bit 22: PCOMMIT (deprecated) https://www.intel.com/content/www/us/en/developer/articles/technical/deprecate-pcommit-instruction.html // - Bit 23: CLFLUSHOPT // - Bit 24: CLWB // - Bit 25: Intel Processor Trace // - Bit 26: AVX512PF // - Bit 27: AVX512ER // - Bit 28: AVX512CD // - Bit 29: SHA // - Bit 30: AVX512BW // - Bit 31: AVX512VL CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b1111_1111_1110_1111_1101_1010_0001_0100, value: 0b0000_0000_0000_0000_0000_0010_0000_0000, }, }, // ECX: // - Bit 01: AVX512_VBMI // - Bit 02: UMIP // - Bit 03: PKU // - Bit 04: OSPKE // - Bit 06: AVX512_VBMI2 // - Bit 08: GFNI // - Bit 09: VAES // - Bit 10: VPCLMULQDQ // - Bit 11: AVX512_VNNI // - Bit 12: AVX512_BITALG // - Bit 14: AVX512_VPOPCNTDQ // - Bit 16: LA57 // - Bit 22: RDPID // - Bit 30: SGX_LC CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0100_0000_0100_0001_0101_1111_0101_1110, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: // - Bit 02: AVX512_4VNNIW // - Bit 03: AVX512_4FMAPS // - Bit 04: Fast Short REP MOV // - Bit 08: AVX512_VP2INTERSECT CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0001_0001_1100, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0xd, subleaf: 0x0, flags: KvmCpuidFlags(1), modifiers: vec![ // EAX: // - Bits 04-03: MPX state // - Bits 07-05: AVX-512 state // - Bit 09: PKRU state CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_00_1_0_111_11_000, value: 0b0000_0000_0000_0000_0000_00_0_0_000_00_000, }, }, ], }, CpuidLeafModifier { leaf: 0xd, subleaf: 0x1, flags: KvmCpuidFlags(1), modifiers: vec![ // EAX: // - Bit 01: Supports XSAVEC and the compacted form of XRSTOR // - Bit 02: Supports XGETBV // - Bit 03: Supports XSAVES/XRSTORS and IA32_XSS CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0000_0000_1110, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x80000001, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // ECX: // - Bit 08: PREFETCHW // - Bit 29: MONITORX and MWAITX CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0010_0000_0000_0000_0000_0001_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, // EDX: // - Bit 26: 1-GByte pages CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0000_0100_0000_0000_0000_0000_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, CpuidLeafModifier { leaf: 0x80000008, subleaf: 0x0, flags: KvmCpuidFlags(0), modifiers: vec![ // EBX: // - Bit 09: WBNOINVD CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b0000_0000_0000_0000_0000_0010_0000_0000, value: 0b0000_0000_0000_0000_0000_0000_0000_0000, }, }, ], }, ], msr_modifiers: vec![ // IA32_ARCH_CAPABILITIES: // - Bit 00: RDCL_NO // - Bit 01: IBRS_ALL // - Bit 02: RSBA // - Bit 03: SKIP_L1DFL_VMENTRY // - Bit 04: SSB_NO // - Bit 05: MDS_NO // - Bit 06: IF_PSCHANGE_MC_NO // - Bit 07: TSX_CTRL // - Bit 08: TAA_NO // - Bit 09: MCU_CONTROL // - Bit 10: MISC_PACKAGE_CTLS // - Bit 11: ENERGY_FILTERING_CTL // - Bit 12: DOITM // - Bit 13: SBDR_SSDP_NO // - Bit 14: FBSDP_NO // - Bit 15: PSDP_NO // - Bit 16: Reserved // - Bit 17: FB_CLEAR // - Bit 18: FB_CLEAR_CTRL // - Bit 19: RRSBA // - Bit 20: BHI_NO // - Bit 21: XAPIC_DISABLE_STATUS // - Bit 22: Reserved // - Bit 23: OVERCLOCKING_STATUS // - Bit 24: PBRSB_NO // - Bit 26: GDS_NO // - BIT 27: RFDS_NO // - Bits 63-25: Reserved RegisterModifier { addr: 0x10a, bitmap: RegisterValueFilter { filter: 0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111, value: 0b0000_0000_0000_0000_0000_0000_0000_0000_0000_1100_0000_1000_0000_1100_0100_1100, }, }], ..Default::default() } } ================================================ FILE: src/vmm/src/cpu_config/x86_64/test_utils.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use super::custom_cpu_template::{ CpuidLeafModifier, CpuidRegister, CpuidRegisterModifier, RegisterModifier, }; use crate::cpu_config::templates::{CustomCpuTemplate, RegisterValueFilter}; use crate::cpu_config::x86_64::cpuid::KvmCpuidFlags; /// Test CPU template in JSON format pub const TEST_TEMPLATE_JSON: &str = r#"{ "cpuid_modifiers": [ { "leaf": "0x80000001", "subleaf": "0x0007", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bx00100xxx1xxxxxxxxxxxxxxxxxxxxx1" } ] }, { "leaf": "0x80000002", "subleaf": "0x0004", "flags": 0, "modifiers": [ { "register": "ebx", "bitmap": "0bxxx1xxxxxxxxxxxxxxxxxxxxx1" }, { "register": "ecx", "bitmap": "0bx00100xxx1xxxxxxxxxxx0xxxxx0xxx1" } ] }, { "leaf": "0x80000003", "subleaf": "0x0004", "flags": 0, "modifiers": [ { "register": "edx", "bitmap": "0bx00100xxx1xxxxxxxxxxx0xxxxx0xxx1" } ] }, { "leaf": "0x80000004", "subleaf": "0x0004", "flags": 0, "modifiers": [ { "register": "edx", "bitmap": "0b00100xxx1xxxxxx1xxxxxxxxxxxxxx1" }, { "register": "ecx", "bitmap": "0bx00100xxx1xxxxxxxxxxxxx111xxxxx1" } ] }, { "leaf": "0x80000005", "subleaf": "0x0004", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bx00100xxx1xxxxx00xxxxxx000xxxxx1" }, { "register": "edx", "bitmap": "0bx10100xxx1xxxxxxxxxxxxx000xxxxx1" } ] } ], "msr_modifiers": [ { "addr": "0x0", "bitmap": "0bx00100xxx1xxxx00xxx1xxxxxxxxxxx1" }, { "addr": "0x1", "bitmap": "0bx00111xxx1xxxx111xxxxx101xxxxxx1" }, { "addr": "0b11", "bitmap": "0bx00100xxx1xxxxxx0000000xxxxxxxx1" }, { "addr": "0xbbca", "bitmap": "0bx00100xxx1xxxxxxxxx1" } ] }"#; /// Test CPU template in JSON format but has an invalid field for the architecture. /// "reg_modifiers" is the field name for the registers for aarch64" pub const TEST_INVALID_TEMPLATE_JSON: &str = r#"{ "reg_modifiers": [ { "addr": "0x0AAC", "bitmap": "0b1xx1" } ] }"#; /// Builds a sample custom CPU template pub fn build_test_template() -> CustomCpuTemplate { CustomCpuTemplate { cpuid_modifiers: vec![CpuidLeafModifier { leaf: 0x3, subleaf: 0x0, flags: KvmCpuidFlags(kvm_bindings::KVM_CPUID_FLAG_STATEFUL_FUNC), modifiers: vec![ CpuidRegisterModifier { register: CpuidRegister::Eax, bitmap: RegisterValueFilter { filter: 0b0111, value: 0b0101, }, }, CpuidRegisterModifier { register: CpuidRegister::Ebx, bitmap: RegisterValueFilter { filter: 0b0111, value: 0b0100, }, }, CpuidRegisterModifier { register: CpuidRegister::Ecx, bitmap: RegisterValueFilter { filter: 0b0111, value: 0b0111, }, }, CpuidRegisterModifier { register: CpuidRegister::Edx, bitmap: RegisterValueFilter { filter: 0b0111, value: 0b0001, }, }, ], }], msr_modifiers: vec![ RegisterModifier { addr: 0x9999, bitmap: RegisterValueFilter { filter: 0, value: 0, }, }, RegisterModifier { addr: 0x8000, bitmap: RegisterValueFilter { filter: 0, value: 0, }, }, ], ..Default::default() } } ================================================ FILE: src/vmm/src/device_manager/acpi.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #[cfg(target_arch = "x86_64")] use acpi_tables::{Aml, aml}; use crate::Vm; use crate::devices::acpi::vmclock::{VmClock, VmClockError}; use crate::devices::acpi::vmgenid::{VmGenId, VmGenIdError}; use crate::vstate::memory::GuestMemoryMmap; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum ACPIDeviceError { /// VMGenID: {0} VmGenId(#[from] VmGenIdError), /// VMClock: {0} VmClock(#[from] VmClockError), /// Could not register IRQ with KVM: {0} RegisterIrq(#[from] kvm_ioctls::Error), } // Although both VMGenID and VMClock devices are always present, they should be instantiated when // they are attached to preserve the existing ordering of GSI allocation. #[derive(Debug, Default)] pub struct ACPIDeviceManager { /// VMGenID device vmgenid: Option, /// VMclock device vmclock: Option, } impl ACPIDeviceManager { /// Create a new ACPIDeviceManager object pub fn new(vmgenid: VmGenId, vmclock: VmClock) -> Self { ACPIDeviceManager { vmgenid: Some(vmgenid), vmclock: Some(vmclock), } } pub fn attach_vmgenid(&mut self, vm: &Vm) -> Result<(), ACPIDeviceError> { self.vmgenid = Some(VmGenId::new(&mut vm.resource_allocator())?); Ok(()) } pub fn attach_vmclock(&mut self, vm: &Vm) -> Result<(), ACPIDeviceError> { self.vmclock = Some(VmClock::new(&mut vm.resource_allocator())?); Ok(()) } pub fn vmgenid(&self) -> &VmGenId { self.vmgenid.as_ref().expect("Missing VMGenID device") } pub fn vmclock(&self) -> &VmClock { self.vmclock.as_ref().expect("Missing VMClock device") } pub fn activate_vmgenid(&self, vm: &Vm) -> Result<(), ACPIDeviceError> { vm.register_irq(&self.vmgenid().interrupt_evt, self.vmgenid().gsi)?; self.vmgenid().activate(vm.guest_memory())?; Ok(()) } pub fn activate_vmclock(&self, vm: &Vm) -> Result<(), ACPIDeviceError> { vm.register_irq(&self.vmclock().interrupt_evt, self.vmclock().gsi)?; self.vmclock().activate(vm.guest_memory())?; Ok(()) } pub fn do_post_restore_vmgenid(&self) -> Result<(), ACPIDeviceError> { self.vmgenid().do_post_restore()?; Ok(()) } pub fn do_post_restore_vmclock( &mut self, mem: &GuestMemoryMmap, ) -> Result<(), ACPIDeviceError> { self.vmclock .as_mut() .expect("Missing VMClock device") .do_post_restore(mem)?; Ok(()) } } #[cfg(target_arch = "x86_64")] impl Aml for ACPIDeviceManager { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), aml::AmlError> { // AML for [`VmGenId`] device. self.vmgenid().append_aml_bytes(v)?; // AML for [`VmClock`] device. self.vmclock().append_aml_bytes(v)?; // Create the AML for the GED interrupt handler aml::Device::new( "_SB_.GED_".try_into()?, vec![ &aml::Name::new("_HID".try_into()?, &"ACPI0013")?, &aml::Name::new( "_CRS".try_into()?, &aml::ResourceTemplate::new(vec![ &aml::Interrupt::new(true, true, false, false, self.vmgenid().gsi), &aml::Interrupt::new(true, true, false, false, self.vmclock().gsi), ]), )?, // We know that the maximum IRQ number fits in a u8. We have up to // 32 IRQs in x86 and up to 128 in ARM (look into `vmm::crate::arch::layout::GSI_LEGACY_END`). // Both `vmgenid.gsi` and `vmclock.gsi` can safely be cast to `u8` // without truncation, so we let clippy know. &aml::Method::new( "_EVT".try_into()?, 1, true, vec![ &aml::If::new( #[allow(clippy::cast_possible_truncation)] &aml::Equal::new(&aml::Arg(0), &(self.vmgenid().gsi as u8)), vec![&aml::Notify::new( &aml::Path::new("\\_SB_.VGEN")?, &0x80usize, )], ), &aml::If::new( #[allow(clippy::cast_possible_truncation)] &aml::Equal::new(&aml::Arg(0), &(self.vmclock().gsi as u8)), vec![&aml::Notify::new( &aml::Path::new("\\_SB_.VCLK")?, &0x80usize, )], ), ], ), ], ) .append_aml_bytes(v) } } ================================================ FILE: src/vmm/src/device_manager/legacy.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. #![cfg(target_arch = "x86_64")] use std::sync::{Arc, Mutex}; use acpi_tables::aml::AmlError; use acpi_tables::{Aml, aml}; use crate::Vm; use crate::devices::legacy::{I8042Device, SerialDevice}; use crate::vstate::bus::BusError; /// Errors corresponding to the `PortIODeviceManager`. #[derive(Debug, derive_more::From, thiserror::Error, displaydoc::Display)] pub enum LegacyDeviceError { /// Failed to add legacy device to Bus: {0} BusError(BusError), /// Failed to create EventFd: {0} EventFd(std::io::Error), } /// The `PortIODeviceManager` is a wrapper that is used for registering legacy devices /// on an I/O Bus. It currently manages the uart and i8042 devices. #[derive(Debug)] pub struct PortIODeviceManager { // BusDevice::Serial pub stdio_serial: Arc>, // BusDevice::I8042Device pub i8042: Arc>, } impl PortIODeviceManager { /// Serial port 1 const COM1_GSI: u32 = 4; /// x86 global system interrupt for keyboard port. /// See . const KBD_EVT_GSI: u32 = 1; /// Legacy serial port device addresses. See /// . const SERIAL_PORT_ADDRESS: u64 = 0x3f8; /// Size of legacy serial ports. const SERIAL_PORT_SIZE: u64 = 0x8; /// i8042 keyboard data register address. See /// . const I8042_KDB_DATA_REGISTER_ADDRESS: u64 = 0x060; /// i8042 keyboard data register size. const I8042_KDB_DATA_REGISTER_SIZE: u64 = 0x5; /// Register supported legacy devices. pub fn register_devices(&mut self, vm: &Vm) -> Result<(), LegacyDeviceError> { let io_bus = &vm.pio_bus; io_bus.insert( self.stdio_serial.clone(), Self::SERIAL_PORT_ADDRESS, Self::SERIAL_PORT_SIZE, )?; io_bus.insert( self.i8042.clone(), Self::I8042_KDB_DATA_REGISTER_ADDRESS, Self::I8042_KDB_DATA_REGISTER_SIZE, )?; vm.register_irq( self.stdio_serial .lock() .expect("Poisoned lock") .serial .interrupt_evt(), Self::COM1_GSI, ) .map_err(|e| LegacyDeviceError::EventFd(std::io::Error::from_raw_os_error(e.errno())))?; vm.register_irq( &self.i8042.lock().expect("Poisoned lock").kbd_interrupt_evt, Self::KBD_EVT_GSI, ) .map_err(|e| LegacyDeviceError::EventFd(std::io::Error::from_raw_os_error(e.errno())))?; Ok(()) } pub(crate) fn append_aml_bytes(bytes: &mut Vec) -> Result<(), AmlError> { // Setup COM1 aml::Device::new( "_SB_.COM1".try_into()?, vec![ &aml::Name::new("_HID".try_into()?, &aml::EisaName::new("PNP0501")?)?, &aml::Name::new("_UID".try_into()?, &0u8)?, &aml::Name::new("_DDN".try_into()?, &"COM1")?, &aml::Name::new( "_CRS".try_into().unwrap(), &aml::ResourceTemplate::new(vec![ &aml::Interrupt::new(true, true, false, false, Self::COM1_GSI), &aml::Io::new( Self::SERIAL_PORT_ADDRESS.try_into().unwrap(), Self::SERIAL_PORT_ADDRESS.try_into().unwrap(), 1, Self::SERIAL_PORT_SIZE.try_into().unwrap(), ), ]), )?, ], ) .append_aml_bytes(bytes)?; // Setup i8042 aml::Device::new( "_SB_.PS2_".try_into()?, vec![ &aml::Name::new("_HID".try_into()?, &aml::EisaName::new("PNP0303")?)?, &aml::Method::new( "_STA".try_into()?, 0, false, vec![&aml::Return::new(&0x0fu8)], ), &aml::Name::new( "_CRS".try_into()?, &aml::ResourceTemplate::new(vec![ &aml::Io::new( PortIODeviceManager::I8042_KDB_DATA_REGISTER_ADDRESS .try_into() .unwrap(), PortIODeviceManager::I8042_KDB_DATA_REGISTER_ADDRESS .try_into() .unwrap(), 1u8, 1u8, ), // Fake a command port so Linux stops complaining &aml::Io::new(0x0064, 0x0064, 1u8, 1u8), &aml::Interrupt::new(true, true, false, false, Self::KBD_EVT_GSI), ]), )?, ], ) .append_aml_bytes(bytes) } } #[cfg(test)] mod tests { use libc::EFD_NONBLOCK; use vm_superio::Serial; use vmm_sys_util::eventfd::EventFd; use super::*; use crate::devices::legacy::serial::SerialOut; use crate::devices::legacy::{EventFdTrigger, SerialEventsWrapper}; use crate::vstate::vm::tests::setup_vm_with_memory; #[test] fn test_register_legacy_devices() { let (_, vm) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); let mut ldm = PortIODeviceManager { stdio_serial: Arc::new(Mutex::new(SerialDevice { serial: Serial::with_events( EventFdTrigger::new(EventFd::new(EFD_NONBLOCK).unwrap()), SerialEventsWrapper { buffer_ready_event_fd: None, }, SerialOut::Sink, ), input: None, })), i8042: Arc::new(Mutex::new( I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(), )), }; ldm.register_devices(&vm).unwrap(); } } ================================================ FILE: src/vmm/src/device_manager/mmio.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::collections::HashMap; use std::fmt::Debug; use std::sync::{Arc, Mutex}; #[cfg(target_arch = "x86_64")] use acpi_tables::{Aml, aml}; use event_manager::SubscriberOps; use kvm_ioctls::IoEventAddress; use linux_loader::cmdline as kernel_cmdline; #[cfg(target_arch = "x86_64")] use log::debug; use serde::{Deserialize, Serialize}; use vm_allocator::AllocPolicy; use crate::arch::BOOT_DEVICE_MEM_START; #[cfg(target_arch = "aarch64")] use crate::arch::{RTC_MEM_START, SERIAL_MEM_START}; #[cfg(target_arch = "aarch64")] use crate::devices::legacy::{RTCDevice, SerialDevice}; use crate::devices::pseudo::BootTimer; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::transport::mmio::MmioTransport; use crate::vstate::bus::{Bus, BusError}; #[cfg(target_arch = "x86_64")] use crate::vstate::memory::GuestAddress; use crate::vstate::resources::ResourceAllocator; use crate::{EventManager, Vm}; /// Errors for MMIO device manager. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MmioError { /// Failed to allocate requested resource: {0} Allocator(#[from] vm_allocator::Error), /// Failed to insert device on the bus: {0} BusInsert(#[from] BusError), /// Failed to allocate requested resourc: {0} Cmdline(#[from] linux_loader::cmdline::Error), /// Could not create IRQ for MMIO device: {0} CreateIrq(#[from] std::io::Error), /// Invalid MMIO IRQ configuration. InvalidIrqConfig, /// Failed to register IO event: {0} RegisterIoEvent(kvm_ioctls::Error), /// Failed to register irqfd: {0} RegisterIrqFd(kvm_ioctls::Error), #[cfg(target_arch = "x86_64")] /// Failed to create AML code for device AmlError(#[from] aml::AmlError), } /// This represents the size of the mmio device specified to the kernel through ACPI and as a /// command line option. /// It has to be larger than 0x100 (the offset where the configuration space starts from /// the beginning of the memory mapped device registers) + the size of the configuration space /// Currently hardcoded to 4K. pub const MMIO_LEN: u64 = 0x1000; /// Stores the address range and irq allocated to this device. #[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct MMIODeviceInfo { /// Mmio address at which the device is registered. pub addr: u64, /// Mmio addr range length. pub len: u64, /// Used GSI (interrupt line) for the device. pub gsi: Option, } #[cfg(target_arch = "x86_64")] fn add_virtio_aml( dsdt_data: &mut Vec, addr: u64, len: u64, gsi: u32, ) -> Result<(), aml::AmlError> { let dev_id = gsi - crate::arch::GSI_LEGACY_START; debug!( "acpi: Building AML for VirtIO device _SB_.V{:03}. memory range: {:#010x}:{} gsi: {}", dev_id, addr, len, gsi ); aml::Device::new( format!("V{:03}", dev_id).as_str().try_into()?, vec![ &aml::Name::new("_HID".try_into()?, &"LNRO0005")?, &aml::Name::new("_UID".try_into()?, &dev_id)?, &aml::Name::new("_CCA".try_into()?, &aml::ONE)?, &aml::Name::new( "_CRS".try_into()?, &aml::ResourceTemplate::new(vec![ &aml::Memory32Fixed::new( true, addr.try_into().unwrap(), len.try_into().unwrap(), ), &aml::Interrupt::new(true, true, false, false, gsi), ]), )?, ], ) .append_aml_bytes(dsdt_data) } #[derive(Debug, Clone)] /// A descriptor for MMIO devices pub struct MMIODevice { /// MMIO resources allocated to the device pub(crate) resources: MMIODeviceInfo, /// The actual device pub(crate) inner: Arc>, /// The subscriber ID returned by the EventManager pub(crate) sub_id: Option, } /// Manages the complexities of registering a MMIO device. #[derive(Debug, Default)] pub struct MMIODeviceManager { /// VirtIO devices using an MMIO transport layer pub(crate) virtio_devices: HashMap<(VirtioDeviceType, String), MMIODevice>, /// Boot timer device pub(crate) boot_timer: Option>, #[cfg(target_arch = "aarch64")] /// Real-Time clock on Aarch64 platforms pub(crate) rtc: Option>, #[cfg(target_arch = "aarch64")] /// Serial device on Aarch64 platforms pub(crate) serial: Option>, #[cfg(target_arch = "x86_64")] // We create the AML byte code for every VirtIO device in the order we build // it, so that we ensure the root block device is appears first in the DSDT. // This is needed, so that the root device appears as `/dev/vda` in the guest // filesystem. // The alternative would be that we iterate the bus to get the data after all // of the devices are build. However, iterating the bus won't give us the // devices in the order they were added. pub(crate) dsdt_data: Vec, } impl MMIODeviceManager { /// Create a new DeviceManager handling mmio devices (virtio net, block). pub fn new() -> MMIODeviceManager { Default::default() } /// Allocates resources for a new device to be added. fn allocate_mmio_resources( &mut self, resource_allocator: &mut ResourceAllocator, irq_count: u32, ) -> Result { let gsi = match resource_allocator.allocate_gsi_legacy(irq_count)?[..] { [] => None, [gsi] => Some(gsi), _ => return Err(MmioError::InvalidIrqConfig), }; let device_info = MMIODeviceInfo { addr: resource_allocator.allocate_32bit_mmio_memory( MMIO_LEN, MMIO_LEN, AllocPolicy::FirstMatch, )?, len: MMIO_LEN, gsi, }; Ok(device_info) } /// Register a virtio-over-MMIO device to be used via MMIO transport at a specific slot. pub fn register_mmio_virtio( &mut self, vm: &Vm, device_id: String, mut device: MMIODevice, event_manager: &mut EventManager, ) -> Result<(), MmioError> { // Our virtio devices are currently hardcoded to use a single IRQ. // Validate that requirement. let gsi = device.resources.gsi.ok_or(MmioError::InvalidIrqConfig)?; let identifier; { let mmio_device = device.inner.lock().expect("Poisoned lock"); let locked_device = mmio_device.locked_device(); identifier = (locked_device.device_type(), device_id); for (i, queue_evt) in locked_device.queue_events().iter().enumerate() { let io_addr = IoEventAddress::Mmio( device.resources.addr + u64::from(crate::devices::virtio::NOTIFY_REG_OFFSET), ); vm.fd() .register_ioevent(queue_evt, &io_addr, u32::try_from(i).unwrap()) .map_err(MmioError::RegisterIoEvent)?; } vm.register_irq(&mmio_device.interrupt.irq_evt, gsi) .map_err(MmioError::RegisterIrqFd)?; } vm.common.mmio_bus.insert( device.inner.clone(), device.resources.addr, device.resources.len, )?; let sub_id = event_manager.add_subscriber(device.inner.lock().expect("Poisoned lock").device()); device.sub_id = Some(sub_id); self.virtio_devices.insert(identifier, device); Ok(()) } /// Append a registered virtio-over-MMIO device to the kernel cmdline. #[cfg(target_arch = "x86_64")] pub fn add_virtio_device_to_cmdline( cmdline: &mut kernel_cmdline::Cmdline, device_info: &MMIODeviceInfo, ) -> Result<(), MmioError> { // as per doc, [virtio_mmio.]device=@: needs to be appended // to kernel command line for virtio mmio devices to get recognized // the size parameter has to be transformed to KiB, so dividing hexadecimal value in // bytes to 1024; further, the '{}' formatting rust construct will automatically // transform it to decimal cmdline .add_virtio_mmio_device( device_info.len, GuestAddress(device_info.addr), device_info.gsi.unwrap(), None, ) .map_err(MmioError::Cmdline) } /// Allocate slot and register an already created virtio-over-MMIO device. Also Adds the device /// to the boot cmdline. pub fn register_mmio_virtio_for_boot( &mut self, vm: &Vm, device_id: String, mmio_device: MmioTransport, event_manager: &mut EventManager, _cmdline: &mut kernel_cmdline::Cmdline, ) -> Result<(), MmioError> { let device = MMIODevice { resources: self.allocate_mmio_resources(&mut vm.resource_allocator(), 1)?, inner: Arc::new(Mutex::new(mmio_device)), sub_id: None, }; #[cfg(target_arch = "x86_64")] { Self::add_virtio_device_to_cmdline(_cmdline, &device.resources)?; add_virtio_aml( &mut self.dsdt_data, device.resources.addr, device.resources.len, // We are sure that `irqs` has at least one element; allocate_mmio_resources makes // sure of it. device.resources.gsi.unwrap(), )?; } self.register_mmio_virtio(vm, device_id, device, event_manager)?; Ok(()) } #[cfg(target_arch = "aarch64")] /// Register an early console at the specified MMIO configuration if given as parameter, /// otherwise allocate a new MMIO resources for it. pub fn register_mmio_serial( &mut self, vm: &Vm, serial: Arc>, device_info_opt: Option, ) -> Result<(), MmioError> { // Create a new MMIODeviceInfo object on boot path or unwrap the // existing object on restore path. let device_info = if let Some(device_info) = device_info_opt { device_info } else { let gsi = vm.resource_allocator().allocate_gsi_legacy(1)?; MMIODeviceInfo { addr: SERIAL_MEM_START, len: MMIO_LEN, gsi: Some(gsi[0]), } }; vm.register_irq( serial.lock().expect("Poisoned lock").serial.interrupt_evt(), device_info.gsi.unwrap(), ) .map_err(MmioError::RegisterIrqFd)?; let device = MMIODevice { resources: device_info, inner: serial, sub_id: None, }; vm.common.mmio_bus.insert( device.inner.clone(), device.resources.addr, device.resources.len, )?; self.serial = Some(device); Ok(()) } #[cfg(target_arch = "aarch64")] /// Append the registered early console to the kernel cmdline. /// /// This assumes that the device has been registered with the device manager. pub fn add_mmio_serial_to_cmdline( &self, cmdline: &mut kernel_cmdline::Cmdline, ) -> Result<(), MmioError> { let device = self.serial.as_ref().unwrap(); cmdline.insert( "earlycon", &format!("uart,mmio,0x{:08x}", device.resources.addr), )?; Ok(()) } #[cfg(target_arch = "aarch64")] /// Create and register a MMIO RTC device at the specified MMIO configuration if /// given as parameter, otherwise allocate a new MMIO resources for it. pub fn register_mmio_rtc( &mut self, vm: &Vm, rtc: Arc>, device_info_opt: Option, ) -> Result<(), MmioError> { // Create a new MMIODeviceInfo object on boot path or unwrap the // existing object on restore path. let device_info = if let Some(device_info) = device_info_opt { device_info } else { let gsi = vm.resource_allocator().allocate_gsi_legacy(1)?; MMIODeviceInfo { addr: RTC_MEM_START, len: MMIO_LEN, gsi: Some(gsi[0]), } }; let device = MMIODevice { resources: device_info, inner: rtc, sub_id: None, }; vm.common.mmio_bus.insert( device.inner.clone(), device.resources.addr, device.resources.len, )?; self.rtc = Some(device); Ok(()) } /// Register a boot timer device. pub fn register_mmio_boot_timer( &mut self, mmio_bus: &Bus, boot_timer: Arc>, ) -> Result<(), MmioError> { // Attach a new boot timer device. let device_info = MMIODeviceInfo { addr: BOOT_DEVICE_MEM_START, len: MMIO_LEN, gsi: None, }; let device = MMIODevice { resources: device_info, inner: boot_timer, sub_id: None, }; mmio_bus.insert( device.inner.clone(), device.resources.addr, device.resources.len, )?; self.boot_timer = Some(device); Ok(()) } /// Gets the specified device. pub fn get_virtio_device( &self, device_type: VirtioDeviceType, device_id: &str, ) -> Option<&MMIODevice> { self.virtio_devices .get(&(device_type, device_id.to_string())) } /// Run fn for each registered virtio device. pub fn for_each_virtio_mmio_device(&self, mut f: F) -> Result<(), E> where F: FnMut(&VirtioDeviceType, &String, &MMIODevice) -> Result<(), E>, { for ((device_type, device_id), mmio_device) in &self.virtio_devices { f(device_type, device_id, mmio_device)?; } Ok(()) } pub fn for_each_virtio_device(&self, mut f: impl FnMut(VirtioDeviceType, &dyn VirtioDevice)) { for ((device_type, _), virtio_device) in &self.virtio_devices { let device_arc = virtio_device.inner.lock().expect("Poisoned lock").device(); let virtio_device = device_arc.lock().expect("Poisoned lock"); f(*device_type, &*virtio_device); } } #[cfg(target_arch = "aarch64")] pub fn virtio_device_info(&self) -> Vec<&MMIODeviceInfo> { let mut device_info = Vec::new(); for (_, dev) in self.virtio_devices.iter() { device_info.push(&dev.resources); } device_info } #[cfg(target_arch = "aarch64")] pub fn rtc_device_info(&self) -> Option<&MMIODeviceInfo> { self.rtc.as_ref().map(|device| &device.resources) } #[cfg(target_arch = "aarch64")] pub fn serial_device_info(&self) -> Option<&MMIODeviceInfo> { self.serial.as_ref().map(|device| &device.resources) } } #[cfg(test)] pub(crate) mod tests { use std::ops::Deref; use std::sync::Arc; use event_manager::{EventOps, Events, MutEventSubscriber}; use vmm_sys_util::eventfd::EventFd; use super::*; use crate::devices::virtio::ActivateError; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::queue::Queue; use crate::devices::virtio::transport::VirtioInterrupt; use crate::devices::virtio::transport::mmio::IrqTrigger; use crate::test_utils::multi_region_mem_raw; use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::{Vm, arch, impl_device_type}; const QUEUE_SIZES: &[u16] = &[64]; impl MMIODeviceManager { pub(crate) fn register_virtio_test_device( &mut self, vm: &Vm, guest_mem: GuestMemoryMmap, device: Arc>, event_manager: &mut EventManager, cmdline: &mut kernel_cmdline::Cmdline, dev_id: &str, ) -> Result { let interrupt = Arc::new(IrqTrigger::new()); let mmio_device = MmioTransport::new(guest_mem, interrupt, device.clone(), false); self.register_mmio_virtio_for_boot( vm, dev_id.to_string(), mmio_device, event_manager, cmdline, )?; Ok(self .get_virtio_device(device.lock().unwrap().device_type(), dev_id) .unwrap() .resources .addr) } #[cfg(target_arch = "x86_64")] /// Gets the number of interrupts used by the devices registered. pub fn used_irqs_count(&self) -> usize { self.virtio_devices .iter() .filter(|(_, mmio_dev)| mmio_dev.resources.gsi.is_some()) .count() } } #[allow(dead_code)] #[derive(Debug)] pub(crate) struct DummyDevice { dummy: u32, queues: Vec, queue_evts: [EventFd; 1], interrupt_trigger: Option>, } impl DummyDevice { pub fn new() -> Self { DummyDevice { dummy: 0, queues: QUEUE_SIZES.iter().map(|&s| Queue::new(s)).collect(), queue_evts: [EventFd::new(libc::EFD_NONBLOCK).expect("cannot create eventFD")], interrupt_trigger: None, } } } impl MutEventSubscriber for DummyDevice { fn process(&mut self, _: Events, _: &mut EventOps) {} fn init(&mut self, _: &mut EventOps) {} } impl VirtioDevice for DummyDevice { impl_device_type!(VirtioDeviceType::Net); fn id(&self) -> &str { "dummy" } fn avail_features(&self) -> u64 { 0 } fn acked_features(&self) -> u64 { 0 } fn set_acked_features(&mut self, _: u64) {} fn queues(&self) -> &[Queue] { &self.queues } fn queues_mut(&mut self) -> &mut [Queue] { &mut self.queues } fn queue_events(&self) -> &[EventFd] { &self.queue_evts } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { self.interrupt_trigger.as_ref().unwrap().deref() } fn ack_features_by_page(&mut self, page: u32, value: u32) { let _ = page; let _ = value; } fn read_config(&self, offset: u64, data: &mut [u8]) { let _ = offset; let _ = data; } fn write_config(&mut self, offset: u64, data: &[u8]) { let _ = offset; let _ = data; } fn activate( &mut self, _: GuestMemoryMmap, _: Arc, ) -> Result<(), ActivateError> { Ok(()) } fn is_activated(&self) -> bool { false } } #[test] #[cfg_attr(target_arch = "x86_64", allow(unused_mut))] fn test_register_virtio_device() { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); vm.register_dram_memory_regions(guest_mem).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); let dummy = Arc::new(Mutex::new(DummyDevice::new())); #[cfg(target_arch = "x86_64")] vm.setup_irqchip().unwrap(); #[cfg(target_arch = "aarch64")] vm.setup_irqchip(1).unwrap(); let mut event_manager = EventManager::new().unwrap(); device_manager .register_virtio_test_device( &vm, vm.guest_memory().clone(), dummy, &mut event_manager, &mut cmdline, "dummy", ) .unwrap(); assert!( device_manager .get_virtio_device(VirtioDeviceType::Net, "foo") .is_none() ); let dev = device_manager .get_virtio_device(VirtioDeviceType::Net, "dummy") .unwrap(); assert_eq!(dev.resources.addr, arch::MEM_32BIT_DEVICES_START); assert_eq!(dev.resources.len, MMIO_LEN); assert_eq!(dev.resources.gsi, Some(arch::GSI_LEGACY_START)); device_manager .for_each_virtio_mmio_device(|device_type, device_id, mmio_device| { assert_eq!(*device_type, VirtioDeviceType::Net); assert_eq!(device_id, "dummy"); assert_eq!(mmio_device.resources.addr, arch::MEM_32BIT_DEVICES_START); assert_eq!(mmio_device.resources.len, MMIO_LEN); assert_eq!(mmio_device.resources.gsi, Some(arch::GSI_LEGACY_START)); Ok::<(), ()>(()) }) .unwrap(); } #[test] #[cfg_attr(target_arch = "x86_64", allow(unused_mut))] fn test_register_too_many_devices() { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); vm.register_dram_memory_regions(guest_mem).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); #[cfg(target_arch = "x86_64")] vm.setup_irqchip().unwrap(); #[cfg(target_arch = "aarch64")] vm.setup_irqchip(1).unwrap(); let mut event_manager = EventManager::new().unwrap(); for _i in crate::arch::GSI_LEGACY_START..=crate::arch::GSI_LEGACY_END { device_manager .register_virtio_test_device( &vm, vm.guest_memory().clone(), Arc::new(Mutex::new(DummyDevice::new())), &mut event_manager, &mut cmdline, "dummy1", ) .unwrap(); } assert_eq!( format!( "{}", device_manager .register_virtio_test_device( &vm, vm.guest_memory().clone(), Arc::new(Mutex::new(DummyDevice::new())), &mut event_manager, &mut cmdline, "dummy2" ) .unwrap_err() ), "Failed to allocate requested resource: The requested resource is not available." .to_string() ); } #[test] fn test_dummy_device() { let dummy = DummyDevice::new(); assert_eq!(dummy.device_type(), VirtioDeviceType::Net); assert_eq!(dummy.queues().len(), QUEUE_SIZES.len()); } #[test] #[cfg_attr(target_arch = "x86_64", allow(unused_mut))] fn test_device_info() { let start_addr1 = GuestAddress(0x0); let start_addr2 = GuestAddress(0x1000); let guest_mem = multi_region_mem_raw(&[(start_addr1, 0x1000), (start_addr2, 0x1000)]); let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let mut vm = Vm::new(&kvm).unwrap(); vm.register_dram_memory_regions(guest_mem).unwrap(); #[cfg(target_arch = "x86_64")] vm.setup_irqchip().unwrap(); #[cfg(target_arch = "aarch64")] vm.setup_irqchip(1).unwrap(); let mut device_manager = MMIODeviceManager::new(); let mut cmdline = kernel_cmdline::Cmdline::new(4096).unwrap(); let dummy = Arc::new(Mutex::new(DummyDevice::new())); let type_id = dummy.lock().unwrap().device_type(); let id = String::from("foo"); let mut event_manager = EventManager::new().unwrap(); let addr = device_manager .register_virtio_test_device( &vm, vm.guest_memory().clone(), dummy, &mut event_manager, &mut cmdline, &id, ) .unwrap(); assert!(device_manager.get_virtio_device(type_id, &id).is_some()); assert_eq!( addr, device_manager.virtio_devices[&(type_id, id.clone())] .resources .addr ); assert_eq!( crate::arch::GSI_LEGACY_START, device_manager.virtio_devices[&(type_id, id)] .resources .gsi .unwrap() ); let id = "bar"; assert!(device_manager.get_virtio_device(type_id, id).is_none()); let dummy2 = Arc::new(Mutex::new(DummyDevice::new())); let id2 = String::from("foo2"); device_manager .register_virtio_test_device( &vm, vm.guest_memory().clone(), dummy2, &mut event_manager, &mut cmdline, &id2, ) .unwrap(); let mut count = 0; let _: Result<(), MmioError> = device_manager.for_each_virtio_mmio_device(|devtype, devid, _| { assert_eq!(*devtype, type_id); match devid.as_str() { "foo" => count += 1, "foo2" => count += 2, _ => unreachable!(), }; Ok(()) }); assert_eq!(count, 3); #[cfg(target_arch = "x86_64")] assert_eq!(device_manager.used_irqs_count(), 2); } #[test] fn test_no_irq_allocation() { let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new(); let device_info = device_manager .allocate_mmio_resources(&mut resource_allocator, 0) .unwrap(); assert!(device_info.gsi.is_none()); } #[test] fn test_irq_allocation() { let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new(); let device_info = device_manager .allocate_mmio_resources(&mut resource_allocator, 1) .unwrap(); assert_eq!(device_info.gsi.unwrap(), crate::arch::GSI_LEGACY_START); } #[test] fn test_allocation_failure() { let mut device_manager = MMIODeviceManager::new(); let mut resource_allocator = ResourceAllocator::new(); assert_eq!( format!( "{}", device_manager .allocate_mmio_resources(&mut resource_allocator, 2) .unwrap_err() ), "Invalid MMIO IRQ configuration.".to_string() ); } } ================================================ FILE: src/vmm/src/device_manager/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::convert::Infallible; use std::fmt::Debug; use std::path::PathBuf; use std::sync::{Arc, Mutex}; use acpi::ACPIDeviceManager; use event_manager::{MutEventSubscriber, SubscriberOps}; #[cfg(target_arch = "x86_64")] use legacy::{LegacyDeviceError, PortIODeviceManager}; use linux_loader::loader::Cmdline; use log::{error, info}; use mmio::{MMIODeviceManager, MmioError}; use pci_mngr::{PciDevices, PciDevicesConstructorArgs, PciManagerError}; use persist::MMIODevManagerConstructorArgs; use serde::{Deserialize, Serialize}; use utils::time::TimestampUs; use vmm_sys_util::eventfd::EventFd; use crate::device_manager::acpi::ACPIDeviceError; #[cfg(target_arch = "x86_64")] use crate::devices::legacy::I8042Device; #[cfg(target_arch = "aarch64")] use crate::devices::legacy::RTCDevice; use crate::devices::legacy::serial::SerialOut; use crate::devices::legacy::{IER_RDA_BIT, IER_RDA_OFFSET, SerialDevice}; use crate::devices::pseudo::BootTimer; use crate::devices::virtio::ActivateError; use crate::devices::virtio::balloon::BalloonError; use crate::devices::virtio::block::BlockError; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::mem::persist::VirtioMemPersistError; use crate::devices::virtio::net::persist::NetPersistError; use crate::devices::virtio::pmem::persist::PmemPersistError; use crate::devices::virtio::rng::persist::EntropyPersistError; use crate::devices::virtio::transport::mmio::{IrqTrigger, MmioTransport}; use crate::devices::virtio::vsock::{VsockError, VsockUnixBackendError}; use crate::resources::VmResources; use crate::snapshot::Persist; use crate::utils::open_file_nonblock; use crate::vmm_config::mmds::MmdsConfigError; use crate::vstate::bus::BusError; use crate::vstate::memory::GuestMemoryMmap; use crate::{EmulateSerialInitError, EventManager, Vm}; /// ACPI device manager. pub mod acpi; /// Legacy Device Manager. pub mod legacy; /// Memory Mapped I/O Manager. pub mod mmio; /// PCIe device manager pub mod pci_mngr; /// Device managers (de)serialization support. pub mod persist; #[derive(Debug, thiserror::Error, displaydoc::Display)] /// Error while creating a new [`DeviceManager`] pub enum DeviceManagerCreateError { /// Error with EventFd: {0} EventFd(#[from] std::io::Error), #[cfg(target_arch = "x86_64")] /// Legacy device manager error: {0} PortIOError(#[from] LegacyDeviceError), /// Resource allocator error: {0} ResourceAllocator(#[from] vm_allocator::Error), } #[derive(Debug, thiserror::Error, displaydoc::Display)] /// Error while attaching a VirtIO device pub enum AttachDeviceError { /// MMIO transport error: {0} MmioTransport(#[from] MmioError), /// Error inserting device in bus: {0} Bus(#[from] BusError), /// Error while registering ACPI with KVM: {0} AttachAcpiDevice(#[from] ACPIDeviceError), #[cfg(target_arch = "aarch64")] /// Cmdline error Cmdline, #[cfg(target_arch = "aarch64")] /// Error creating serial device: {0} CreateSerial(#[from] std::io::Error), /// Error attach PCI device: {0} PciTransport(#[from] PciManagerError), } #[derive(Debug, thiserror::Error, displaydoc::Display)] /// Error while searching for a VirtIO device pub enum FindDeviceError { /// Device not found DeviceNotFound, } #[derive(Debug)] /// A manager of all peripheral devices of Firecracker pub struct DeviceManager { /// MMIO devices pub mmio_devices: MMIODeviceManager, #[cfg(target_arch = "x86_64")] /// Legacy devices pub legacy_devices: PortIODeviceManager, /// ACPI devices pub acpi_devices: ACPIDeviceManager, /// PCIe devices pub pci_devices: PciDevices, } impl DeviceManager { // Adds `O_NONBLOCK` to the stdout flags. fn set_stdout_nonblocking() { // SAFETY: Call is safe since parameters are valid. let flags = unsafe { libc::fcntl(libc::STDOUT_FILENO, libc::F_GETFL, 0) }; if flags < 0 { error!("Could not get Firecracker stdout flags."); } // SAFETY: Call is safe since parameters are valid. let rc = unsafe { libc::fcntl(libc::STDOUT_FILENO, libc::F_SETFL, flags | libc::O_NONBLOCK) }; if rc < 0 { error!("Could not set Firecracker stdout to non-blocking."); } } /// Sets up the serial device. fn setup_serial_device( event_manager: &mut EventManager, output: Option<&PathBuf>, ) -> Result>, std::io::Error> { let (serial_in, serial_out) = match output { Some(path) => (None, open_file_nonblock(path).map(SerialOut::File)?), None => { Self::set_stdout_nonblocking(); (Some(std::io::stdin()), SerialOut::Stdout(std::io::stdout())) } }; let serial = Arc::new(Mutex::new(SerialDevice::new(serial_in, serial_out)?)); event_manager.add_subscriber(serial.clone()); Ok(serial) } #[cfg(target_arch = "x86_64")] fn create_legacy_devices( event_manager: &mut EventManager, vcpus_exit_evt: &EventFd, vm: &Vm, serial_output: Option<&PathBuf>, ) -> Result { // Create serial device let serial = Self::setup_serial_device(event_manager, serial_output)?; let reset_evt = vcpus_exit_evt .try_clone() .map_err(DeviceManagerCreateError::EventFd)?; // Create keyboard emulator for reset event let i8042 = Arc::new(Mutex::new(I8042Device::new(reset_evt)?)); // create pio dev manager with legacy devices let mut legacy_devices = PortIODeviceManager { stdio_serial: serial, i8042, }; legacy_devices.register_devices(vm)?; Ok(legacy_devices) } #[cfg_attr(target_arch = "aarch64", allow(unused))] pub fn new( event_manager: &mut EventManager, vcpus_exit_evt: &EventFd, vm: &Vm, serial_output: Option<&PathBuf>, ) -> Result { #[cfg(target_arch = "x86_64")] let legacy_devices = Self::create_legacy_devices(event_manager, vcpus_exit_evt, vm, serial_output)?; Ok(DeviceManager { mmio_devices: MMIODeviceManager::new(), #[cfg(target_arch = "x86_64")] legacy_devices, acpi_devices: ACPIDeviceManager::default(), pci_devices: PciDevices::new(), }) } /// Attaches an MMIO VirtioDevice device to the device manager and event manager. pub(crate) fn attach_mmio_virtio_device< T: 'static + VirtioDevice + MutEventSubscriber + Debug, >( &mut self, vm: &Vm, id: String, device: Arc>, cmdline: &mut Cmdline, event_manager: &mut EventManager, is_vhost_user: bool, ) -> Result<(), AttachDeviceError> { let interrupt = Arc::new(IrqTrigger::new()); // The device mutex mustn't be locked here otherwise it will deadlock. let device = MmioTransport::new(vm.guest_memory().clone(), interrupt, device, is_vhost_user); self.mmio_devices .register_mmio_virtio_for_boot(vm, id, device, event_manager, cmdline)?; Ok(()) } /// Attaches a VirtioDevice device to the device manager and event manager. pub(crate) fn attach_virtio_device( &mut self, vm: &Arc, id: String, device: Arc>, cmdline: &mut Cmdline, event_manager: &mut EventManager, is_vhost_user: bool, ) -> Result<(), AttachDeviceError> { if self.is_pci_enabled() { self.pci_devices .attach_pci_virtio_device(vm, id, device, event_manager)?; } else { self.attach_mmio_virtio_device(vm, id, device, cmdline, event_manager, is_vhost_user)?; } Ok(()) } /// Attaches a [`BootTimer`] to the VM pub(crate) fn attach_boot_timer_device( &mut self, vm: &Vm, request_ts: TimestampUs, ) -> Result<(), AttachDeviceError> { let boot_timer = Arc::new(Mutex::new(BootTimer::new(request_ts))); self.mmio_devices .register_mmio_boot_timer(&vm.common.mmio_bus, boot_timer)?; Ok(()) } pub(crate) fn attach_vmgenid_device(&mut self, vm: &Vm) -> Result<(), AttachDeviceError> { self.acpi_devices.attach_vmgenid(vm)?; self.acpi_devices.activate_vmgenid(vm)?; Ok(()) } pub(crate) fn attach_vmclock_device(&mut self, vm: &Vm) -> Result<(), AttachDeviceError> { self.acpi_devices.attach_vmclock(vm)?; self.acpi_devices.activate_vmclock(vm)?; Ok(()) } #[cfg(target_arch = "aarch64")] pub(crate) fn attach_legacy_devices_aarch64( &mut self, vm: &Vm, event_manager: &mut EventManager, cmdline: &mut Cmdline, serial_out_path: Option<&PathBuf>, ) -> Result<(), AttachDeviceError> { // Serial device setup. let cmdline_contains_console = cmdline .as_cstring() .map_err(|_| AttachDeviceError::Cmdline)? .into_string() .map_err(|_| AttachDeviceError::Cmdline)? .contains("console="); if cmdline_contains_console { let serial = Self::setup_serial_device(event_manager, serial_out_path)?; self.mmio_devices.register_mmio_serial(vm, serial, None)?; self.mmio_devices.add_mmio_serial_to_cmdline(cmdline)?; } let rtc = Arc::new(Mutex::new(RTCDevice::new())); self.mmio_devices.register_mmio_rtc(vm, rtc, None)?; Ok(()) } /// Enables PCIe support for Firecracker devices pub fn enable_pci(&mut self, vm: &Arc) -> Result<(), PciManagerError> { self.pci_devices.attach_pci_segment(vm) } /// Artificially kick VirtIO devices as if they had external events. pub fn kick_virtio_devices(&self) { info!("Artificially kick devices"); // Go through MMIO VirtIO devices let _: Result<(), MmioError> = self.mmio_devices .for_each_virtio_mmio_device(|_, _, device| { let mmio_transport_locked = device.inner.lock().expect("Poisoned lock"); mmio_transport_locked .device() .lock() .expect("Poisoned lock") .kick(); Ok(()) }); // Go through PCI VirtIO devices for virtio_pci_device in self.pci_devices.virtio_devices.values() { virtio_pci_device .lock() .expect("Poisoned lock") .virtio_device() .lock() .expect("Poisoned lock") .kick(); } } fn do_mark_virtio_queue_memory_dirty( device: Arc>, mem: &GuestMemoryMmap, ) { // SAFETY: // This should never fail as we mark pages only if device has already been activated, // and the address validation was already performed on device activation. let mut locked_device = device.lock().expect("Poisoned lock"); if locked_device.is_activated() { locked_device.mark_queue_memory_dirty(mem).unwrap() } } /// Mark queue memory dirty for activated VirtIO devices pub fn mark_virtio_queue_memory_dirty(&self, mem: &GuestMemoryMmap) { // Go through MMIO VirtIO devices let _: Result<(), Infallible> = self.mmio_devices .for_each_virtio_mmio_device(|_, _, device| { let mmio_transport_locked = device.inner.lock().expect("Poisoned locked"); Self::do_mark_virtio_queue_memory_dirty(mmio_transport_locked.device(), mem); Ok(()) }); // Go through PCI VirtIO devices for device in self.pci_devices.virtio_devices.values() { let virtio_device = device.lock().expect("Poisoned lock").virtio_device(); Self::do_mark_virtio_queue_memory_dirty(virtio_device, mem); } } /// Get a VirtIO device of type `virtio_type` with ID `device_id` pub fn get_virtio_device( &self, device_type: VirtioDeviceType, device_id: &str, ) -> Option>> { if self.is_pci_enabled() { let pci_device = self.pci_devices.get_virtio_device(device_type, device_id)?; Some( pci_device .lock() .expect("Poisoned lock") .virtio_device() .clone(), ) } else { let mmio_device = self .mmio_devices .get_virtio_device(device_type, device_id)?; Some( mmio_device .inner .lock() .expect("Poisoned lock") .device() .clone(), ) } } /// Run fn `f()` for the virtio device matching `virtio_type` and `id`. pub fn with_virtio_device(&self, id: &str, f: F) -> Result where T: VirtioDevice + 'static + Debug, F: FnOnce(&mut T) -> R, { if let Some(device) = self.get_virtio_device(T::const_device_type(), id) { let mut dev = device.lock().expect("Poisoned lock"); Ok(f(dev .as_mut_any() .downcast_mut::() .expect("Invalid device for a given device type"))) } else { Err(FindDeviceError::DeviceNotFound) } } /// Run fn `f()` on all virtio devices pub fn for_each_virtio_device(&self, mut f: impl FnMut(VirtioDeviceType, &dyn VirtioDevice)) { if self.is_pci_enabled() { self.pci_devices.for_each_virtio_device(&mut f); } else { self.mmio_devices.for_each_virtio_device(&mut f); } } pub fn is_pci_enabled(&self) -> bool { self.pci_devices.pci_segment.is_some() } } #[derive(Debug, Default, Clone, Serialize, Deserialize)] /// State of devices in the system pub struct DevicesState { /// MMIO devices state pub mmio_state: persist::DeviceStates, /// ACPI devices state pub acpi_state: persist::ACPIDeviceManagerState, /// PCI devices state pub pci_state: pci_mngr::PciDevicesState, } /// Errors for (de)serialization of the devices. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum DevicePersistError { /// Balloon: {0} Balloon(#[from] BalloonError), /// Block: {0} Block(#[from] BlockError), /// MMIO Device manager: {0} MmioDeviceManager(#[from] mmio::MmioError), /// Mmio transport MmioTransport, /// PCI Device manager: {0} PciDeviceManager(#[from] PciManagerError), /// Bus error: {0} Bus(#[from] BusError), #[cfg(target_arch = "aarch64")] /// Legacy: {0} Legacy(#[from] std::io::Error), /// Net: {0} Net(#[from] NetPersistError), /// Vsock: {0} Vsock(#[from] VsockError), /// VsockUnixBackend: {0} VsockUnixBackend(#[from] VsockUnixBackendError), /// MmdsConfig: {0} MmdsConfig(#[from] MmdsConfigError), /// Entropy: {0} Entropy(#[from] EntropyPersistError), /// Pmem: {0} Pmem(#[from] PmemPersistError), /// virtio-mem: {0} VirtioMem(#[from] VirtioMemPersistError), /// Could not activate device: {0} DeviceActivation(#[from] ActivateError), } /// Errors for (de)serialization of the device manager. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum DeviceManagerPersistError { /// Error restoring MMIO devices: {0} MmioRestore(DevicePersistError), /// Error restoring ACPI devices: {0} AcpiRestore(#[from] ACPIDeviceError), /// Error restoring PCI devices: {0} PciRestore(DevicePersistError), /// Error resetting serial console: {0} SerialRestore(#[from] EmulateSerialInitError), /// Error inserting device in bus: {0} Bus(#[from] BusError), /// Error creating DeviceManager: {0} DeviceManager(#[from] DeviceManagerCreateError), } pub struct DeviceRestoreArgs<'a> { pub mem: &'a GuestMemoryMmap, pub vm: &'a Arc, pub event_manager: &'a mut EventManager, pub vcpus_exit_evt: &'a EventFd, pub vm_resources: &'a mut VmResources, pub instance_id: &'a str, } impl std::fmt::Debug for DeviceRestoreArgs<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("DeviceRestoreArgs") .field("mem", &self.mem) .field("vm", &self.vm) .field("vm_resources", &self.vm_resources) .field("instance_id", &self.instance_id) .finish() } } impl<'a> Persist<'a> for DeviceManager { type State = DevicesState; type ConstructorArgs = DeviceRestoreArgs<'a>; type Error = DeviceManagerPersistError; fn save(&self) -> Self::State { DevicesState { mmio_state: self.mmio_devices.save(), acpi_state: self.acpi_devices.save(), pci_state: self.pci_devices.save(), } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { // Setup legacy devices in case of x86 #[cfg(target_arch = "x86_64")] let legacy_devices = Self::create_legacy_devices( constructor_args.event_manager, constructor_args.vcpus_exit_evt, constructor_args.vm, constructor_args.vm_resources.serial_out_path.as_ref(), )?; // Restore MMIO devices let mmio_ctor_args = MMIODevManagerConstructorArgs { mem: constructor_args.mem, vm: constructor_args.vm, event_manager: constructor_args.event_manager, vm_resources: constructor_args.vm_resources, instance_id: constructor_args.instance_id, }; let mmio_devices = MMIODeviceManager::restore(mmio_ctor_args, &state.mmio_state) .map_err(DeviceManagerPersistError::MmioRestore)?; // Restore ACPI devices let acpi_devices = ACPIDeviceManager::restore(constructor_args.vm, &state.acpi_state)?; // Restore PCI devices let pci_ctor_args = PciDevicesConstructorArgs { vm: constructor_args.vm, mem: constructor_args.mem, vm_resources: constructor_args.vm_resources, instance_id: constructor_args.instance_id, event_manager: constructor_args.event_manager, }; let pci_devices = PciDevices::restore(pci_ctor_args, &state.pci_state) .map_err(DeviceManagerPersistError::PciRestore)?; let device_manager = DeviceManager { mmio_devices, #[cfg(target_arch = "x86_64")] legacy_devices, acpi_devices, pci_devices, }; // Restore serial. // We need to do that after we restore mmio devices, otherwise it won't succeed in Aarch64 device_manager.emulate_serial_init()?; Ok(device_manager) } } impl DeviceManager { /// Sets RDA bit in serial console pub fn emulate_serial_init(&self) -> Result<(), EmulateSerialInitError> { // When restoring from a previously saved state, there is no serial // driver initialization, therefore the RDA (Received Data Available) // interrupt is not enabled. Because of that, the driver won't get // notified of any bytes that we send to the guest. The clean solution // would be to save the whole serial device state when we do the vm // serialization. For now we set that bit manually #[cfg(target_arch = "aarch64")] { if let Some(device) = &self.mmio_devices.serial { let mut device_locked = device.inner.lock().expect("Poisoned lock"); device_locked .serial .write(IER_RDA_OFFSET, IER_RDA_BIT) .map_err(|_| EmulateSerialInitError(std::io::Error::last_os_error()))?; } Ok(()) } #[cfg(target_arch = "x86_64")] { let mut serial = self .legacy_devices .stdio_serial .lock() .expect("Poisoned lock"); serial .serial .write(IER_RDA_OFFSET, IER_RDA_BIT) .map_err(|_| EmulateSerialInitError(std::io::Error::last_os_error()))?; Ok(()) } } } #[cfg(test)] pub(crate) mod tests { use super::*; #[cfg(target_arch = "aarch64")] use crate::builder::tests::default_vmm; use crate::devices::acpi::vmclock::VmClock; use crate::devices::acpi::vmgenid::VmGenId; use crate::vstate::resources::ResourceAllocator; pub(crate) fn default_device_manager() -> DeviceManager { let mut resource_allocator = ResourceAllocator::new(); let mmio_devices = MMIODeviceManager::new(); let acpi_devices = ACPIDeviceManager::new( VmGenId::new(&mut resource_allocator).unwrap(), VmClock::new(&mut resource_allocator).unwrap(), ); let pci_devices = PciDevices::new(); #[cfg(target_arch = "x86_64")] let legacy_devices = PortIODeviceManager { stdio_serial: Arc::new(Mutex::new( SerialDevice::new(None, SerialOut::Sink).unwrap(), )), i8042: Arc::new(Mutex::new( I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(), )), }; DeviceManager { mmio_devices, #[cfg(target_arch = "x86_64")] legacy_devices, acpi_devices, pci_devices, } } #[cfg(target_arch = "aarch64")] #[test] fn test_attach_legacy_serial() { let mut vmm = default_vmm(); assert!(vmm.device_manager.mmio_devices.rtc.is_none()); assert!(vmm.device_manager.mmio_devices.serial.is_none()); let mut cmdline = Cmdline::new(4096).unwrap(); let mut event_manager = EventManager::new().unwrap(); vmm.device_manager .attach_legacy_devices_aarch64(&vmm.vm, &mut event_manager, &mut cmdline, None) .unwrap(); assert!(vmm.device_manager.mmio_devices.rtc.is_some()); assert!(vmm.device_manager.mmio_devices.serial.is_none()); let mut vmm = default_vmm(); cmdline.insert("console", "/dev/blah").unwrap(); vmm.device_manager .attach_legacy_devices_aarch64(&vmm.vm, &mut event_manager, &mut cmdline, None) .unwrap(); assert!(vmm.device_manager.mmio_devices.rtc.is_some()); assert!(vmm.device_manager.mmio_devices.serial.is_some()); assert!( cmdline .as_cstring() .unwrap() .into_string() .unwrap() .contains(&format!( "earlycon=uart,mmio,0x{:08x}", vmm.device_manager .mmio_devices .serial .as_ref() .unwrap() .resources .addr )) ); } } ================================================ FILE: src/vmm/src/device_manager/pci_mngr.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::HashMap; use std::fmt::Debug; use std::ops::DerefMut; use std::sync::{Arc, Mutex}; use event_manager::{MutEventSubscriber, SubscriberOps}; use log::{debug, warn}; use serde::{Deserialize, Serialize}; use super::persist::MmdsState; use crate::device_manager::DevicePersistError; use crate::devices::pci::PciSegment; use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::balloon::persist::{BalloonConstructorArgs, BalloonState}; use crate::devices::virtio::block::device::Block; use crate::devices::virtio::block::persist::{BlockConstructorArgs, BlockState}; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::mem::VirtioMem; use crate::devices::virtio::mem::persist::{VirtioMemConstructorArgs, VirtioMemState}; use crate::devices::virtio::net::Net; use crate::devices::virtio::net::persist::{NetConstructorArgs, NetState}; use crate::devices::virtio::pmem::device::Pmem; use crate::devices::virtio::pmem::persist::{PmemConstructorArgs, PmemState}; use crate::devices::virtio::rng::Entropy; use crate::devices::virtio::rng::persist::{EntropyConstructorArgs, EntropyState}; use crate::devices::virtio::transport::pci::device::{ CAPABILITY_BAR_SIZE, VirtioPciDevice, VirtioPciDeviceError, VirtioPciDeviceState, }; use crate::devices::virtio::vsock::persist::{ VsockConstructorArgs, VsockState, VsockUdsConstructorArgs, }; use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; use crate::pci::bus::PciRootError; use crate::resources::VmResources; use crate::snapshot::Persist; use crate::vmm_config::memory_hotplug::MemoryHotplugConfig; use crate::vstate::bus::BusError; use crate::vstate::interrupts::InterruptError; use crate::vstate::memory::GuestMemoryMmap; use crate::{EventManager, Vm}; use pci::PciBdf; #[derive(Debug, Default)] pub struct PciDevices { /// PCIe segment of the VMM, if PCI is enabled. We currently support a single PCIe segment. pub pci_segment: Option, /// All VirtIO PCI devices of the system pub virtio_devices: HashMap<(VirtioDeviceType, String), Arc>>, } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum PciManagerError { /// Resource allocation error: {0} ResourceAllocation(#[from] vm_allocator::Error), /// Bus error: {0} Bus(#[from] BusError), /// PCI root error: {0} PciRoot(#[from] PciRootError), /// MSI error: {0} Msi(#[from] InterruptError), /// VirtIO PCI device error: {0} VirtioPciDevice(#[from] VirtioPciDeviceError), /// KVM error: {0} Kvm(#[from] vmm_sys_util::errno::Error), } impl PciDevices { pub fn new() -> Self { Default::default() } pub fn attach_pci_segment(&mut self, vm: &Arc) -> Result<(), PciManagerError> { // We only support a single PCIe segment. Calling this function twice is a Firecracker // internal error. assert!(self.pci_segment.is_none()); // Currently we don't assign any IRQs to PCI devices. We will be using MSI-X interrupts // only. let pci_segment = PciSegment::new(0, vm, &[0u8; 32])?; self.pci_segment = Some(pci_segment); Ok(()) } fn register_bars_with_bus( vm: &Vm, virtio_device: &Arc>, ) -> Result<(), PciManagerError> { let virtio_device_locked = virtio_device.lock().expect("Poisoned lock"); debug!( "Inserting MMIO BAR region: {:#x}:{:#x}", virtio_device_locked.bar_address, CAPABILITY_BAR_SIZE ); vm.common.mmio_bus.insert( virtio_device.clone(), virtio_device_locked.bar_address, CAPABILITY_BAR_SIZE, )?; Ok(()) } fn attach_common( &mut self, vm: &Arc, device_type: VirtioDeviceType, id: String, bdf: PciBdf, virtio_device: Arc>, event_manager: &mut EventManager, ) -> Result<(), PciManagerError> { // We should only be reaching this point if PCI is enabled let pci_segment = self.pci_segment.as_ref().unwrap(); pci_segment .pci_bus .lock() .expect("Poisoned lock") .add_device(bdf.device() as u32, virtio_device.clone()); self.virtio_devices .insert((device_type, id), virtio_device.clone()); Self::register_bars_with_bus(vm, &virtio_device)?; let mut device = virtio_device.lock().expect("Poisoned lock"); device.register_notification_ioevent(vm)?; let sub_id = event_manager.add_subscriber(device.virtio_device()); device.sub_id = Some(sub_id); Ok(()) } pub(crate) fn attach_pci_virtio_device< T: 'static + VirtioDevice + MutEventSubscriber + Debug, >( &mut self, vm: &Arc, id: String, device: Arc>, event_manager: &mut EventManager, ) -> Result<(), PciManagerError> { // We should only be reaching this point if PCI is enabled let pci_segment = self.pci_segment.as_ref().unwrap(); let pci_device_bdf = pci_segment.next_device_bdf()?; debug!("Allocating BDF: {pci_device_bdf:?} for device"); let mem = vm.guest_memory().clone(); let device_type = device.lock().expect("Poisoned lock").device_type(); // Allocate one MSI vector per queue, plus one for configuration let msix_num = u16::try_from(device.lock().expect("Poisoned lock").queues().len() + 1).unwrap(); let msix_vectors = Vm::create_msix_group(vm.clone(), msix_num)?; // Create the transport let mut virtio_device = VirtioPciDevice::new( id.clone(), mem, device, Arc::new(msix_vectors), pci_device_bdf.into(), )?; // Allocate bars let mut resource_allocator_lock = vm.resource_allocator(); let resource_allocator = resource_allocator_lock.deref_mut(); virtio_device.allocate_bars(&mut resource_allocator.mmio64_memory); let virtio_device = Arc::new(Mutex::new(virtio_device)); self.attach_common( vm, device_type, id, pci_device_bdf, virtio_device, event_manager, ) } fn restore_pci_device( &mut self, vm: &Arc, device: Arc>, device_id: &str, transport_state: &VirtioPciDeviceState, event_manager: &mut EventManager, ) -> Result<(), PciManagerError> { let device_type = device.lock().expect("Poisoned lock").device_type(); let virtio_device = Arc::new(Mutex::new(VirtioPciDevice::new_from_state( device_id.to_string(), vm, device.clone(), transport_state.clone(), )?)); self.attach_common( vm, device_type, device_id.to_string(), transport_state.pci_device_bdf, virtio_device, event_manager, )?; Ok(()) } /// Gets the specified device. pub fn get_virtio_device( &self, device_type: VirtioDeviceType, device_id: &str, ) -> Option<&Arc>> { self.virtio_devices .get(&(device_type, device_id.to_string())) } pub fn for_each_virtio_device(&self, mut f: impl FnMut(VirtioDeviceType, &dyn VirtioDevice)) { for ((device_type, _), pci_device) in &self.virtio_devices { let device_arc = pci_device.lock().expect("Poisoned lock").virtio_device(); let device = device_arc.lock().expect("Poisoned lock"); f(*device_type, &*device); } } } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VirtioDeviceState { /// Device identifier pub device_id: String, /// Device BDF pub pci_device_bdf: u32, /// Device state pub device_state: T, /// Transport state pub transport_state: VirtioPciDeviceState, } #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct PciDevicesState { /// Whether PCI is enabled pub pci_enabled: bool, /// Block device states. pub block_devices: Vec>, /// Net device states. pub net_devices: Vec>, /// Vsock device state. pub vsock_device: Option>, /// Balloon device state. pub balloon_device: Option>, /// Mmds state. pub mmds: Option, /// Entropy device state. pub entropy_device: Option>, /// Pmem device states. pub pmem_devices: Vec>, /// Memory device state. pub memory_device: Option>, } pub struct PciDevicesConstructorArgs<'a> { pub vm: &'a Arc, pub mem: &'a GuestMemoryMmap, pub vm_resources: &'a mut VmResources, pub instance_id: &'a str, pub event_manager: &'a mut EventManager, } impl<'a> Debug for PciDevicesConstructorArgs<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("PciDevicesConstructorArgs") .field("vm", &self.vm) .field("mem", &self.mem) .field("vm_resources", &self.vm_resources) .field("instance_id", &self.instance_id) .finish() } } impl<'a> Persist<'a> for PciDevices { type State = PciDevicesState; type ConstructorArgs = PciDevicesConstructorArgs<'a>; type Error = DevicePersistError; fn save(&self) -> Self::State { let mut state = PciDevicesState::default(); if self.pci_segment.is_some() { state.pci_enabled = true; } else { return state; } for pci_dev in self.virtio_devices.values() { let locked_pci_dev = pci_dev.lock().expect("Poisoned lock"); let virtio_dev = locked_pci_dev.virtio_device(); // We need to call `prepare_save()` on the device before saving the transport // so that, if we modify the transport state while preparing the device, e.g. sending // an interrupt to the guest, this is correctly captured in the saved transport state. let mut locked_virtio_dev = virtio_dev.lock().expect("Poisoned lock"); locked_virtio_dev.prepare_save(); let transport_state = locked_pci_dev.state(); let pci_device_bdf = transport_state.pci_device_bdf.into(); match locked_virtio_dev.device_type() { VirtioDeviceType::Balloon => { let balloon_device = locked_virtio_dev .as_any() .downcast_ref::() .unwrap(); let device_state = balloon_device.save(); state.balloon_device = Some(VirtioDeviceState { device_id: balloon_device.id().to_string(), pci_device_bdf, device_state, transport_state, }); } VirtioDeviceType::Block => { let block_dev = locked_virtio_dev .as_mut_any() .downcast_mut::() .unwrap(); if block_dev.is_vhost_user() { warn!( "Skipping vhost-user-block device. VhostUserBlock does not support \ snapshotting yet" ); } else { let device_state = block_dev.save(); state.block_devices.push(VirtioDeviceState { device_id: block_dev.id().to_string(), pci_device_bdf, device_state, transport_state, }); } } VirtioDeviceType::Net => { let net_dev = locked_virtio_dev .as_mut_any() .downcast_mut::() .unwrap(); if let (Some(mmds_ns), None) = (net_dev.mmds_ns.as_ref(), state.mmds.as_ref()) { let mmds_guard = mmds_ns.mmds.lock().expect("Poisoned lock"); state.mmds = Some(MmdsState { version: mmds_guard.version(), imds_compat: mmds_guard.imds_compat(), }); } let device_state = net_dev.save(); state.net_devices.push(VirtioDeviceState { device_id: net_dev.id().to_string(), pci_device_bdf, device_state, transport_state, }) } VirtioDeviceType::Vsock => { let vsock_dev = locked_virtio_dev .as_mut_any() // Currently, VsockUnixBackend is the only implementation of VsockBackend. .downcast_mut::>() .unwrap(); // Save state after potential notification to the guest. This // way we save changes to the queue the notification can cause. let vsock_state = VsockState { backend: vsock_dev.backend().save(), frontend: vsock_dev.save(), }; state.vsock_device = Some(VirtioDeviceState { device_id: vsock_dev.id().to_string(), pci_device_bdf, device_state: vsock_state, transport_state, }); } VirtioDeviceType::Rng => { let rng_dev = locked_virtio_dev .as_mut_any() .downcast_mut::() .unwrap(); let device_state = rng_dev.save(); state.entropy_device = Some(VirtioDeviceState { device_id: rng_dev.id().to_string(), pci_device_bdf, device_state, transport_state, }) } VirtioDeviceType::Pmem => { let pmem_dev = locked_virtio_dev .as_mut_any() .downcast_mut::() .unwrap(); let device_state = pmem_dev.save(); state.pmem_devices.push(VirtioDeviceState { device_id: pmem_dev.config.id.clone(), pci_device_bdf, device_state, transport_state, }); } VirtioDeviceType::Mem => { let mem_dev = locked_virtio_dev .as_mut_any() .downcast_mut::() .unwrap(); let device_state = mem_dev.save(); state.memory_device = Some(VirtioDeviceState { device_id: mem_dev.id().to_string(), pci_device_bdf, device_state, transport_state, }) } } } state } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let mem = constructor_args.mem; let mut pci_devices = PciDevices::new(); if !state.pci_enabled { return Ok(pci_devices); } pci_devices.attach_pci_segment(constructor_args.vm)?; if let Some(balloon_state) = &state.balloon_device { let device = Arc::new(Mutex::new(Balloon::restore( BalloonConstructorArgs { mem: mem.clone() }, &balloon_state.device_state, )?)); constructor_args .vm_resources .balloon .set_device(device.clone()); pci_devices.restore_pci_device( constructor_args.vm, device, &balloon_state.device_id, &balloon_state.transport_state, constructor_args.event_manager, )? } for block_state in &state.block_devices { let device = Arc::new(Mutex::new(Block::restore( BlockConstructorArgs { mem: mem.clone() }, &block_state.device_state, )?)); constructor_args .vm_resources .block .add_virtio_device(device.clone()); pci_devices.restore_pci_device( constructor_args.vm, device, &block_state.device_id, &block_state.transport_state, constructor_args.event_manager, )? } // Initialize MMDS if MMDS state is included. if let Some(mmds) = &state.mmds { constructor_args.vm_resources.set_mmds_basic_config( mmds.version, mmds.imds_compat, constructor_args.instance_id, )?; } else if state .net_devices .iter() .any(|dev| dev.device_state.mmds_ns.is_some()) { // If there's at least one network device having an mmds_ns, it means // that we are restoring from a version that did not persist the `MmdsVersionState`. // Init with the default. constructor_args.vm_resources.mmds_or_default()?; } for net_state in &state.net_devices { let device = Arc::new(Mutex::new(Net::restore( NetConstructorArgs { mem: mem.clone(), mmds: constructor_args .vm_resources .mmds .as_ref() // Clone the Arc reference. .cloned(), }, &net_state.device_state, )?)); constructor_args .vm_resources .net_builder .add_device(device.clone()); pci_devices.restore_pci_device( constructor_args.vm, device, &net_state.device_id, &net_state.transport_state, constructor_args.event_manager, )? } if let Some(vsock_state) = &state.vsock_device { let ctor_args = VsockUdsConstructorArgs { cid: vsock_state.device_state.frontend.cid, }; let backend = VsockUnixBackend::restore(ctor_args, &vsock_state.device_state.backend)?; let device = Arc::new(Mutex::new(Vsock::restore( VsockConstructorArgs { mem: mem.clone(), backend, }, &vsock_state.device_state.frontend, )?)); constructor_args .vm_resources .vsock .set_device(device.clone()); pci_devices.restore_pci_device( constructor_args.vm, device, &vsock_state.device_id, &vsock_state.transport_state, constructor_args.event_manager, )? } if let Some(entropy_state) = &state.entropy_device { let ctor_args = EntropyConstructorArgs { mem: mem.clone() }; let device = Arc::new(Mutex::new(Entropy::restore( ctor_args, &entropy_state.device_state, )?)); constructor_args .vm_resources .entropy .set_device(device.clone()); pci_devices.restore_pci_device( constructor_args.vm, device, &entropy_state.device_id, &entropy_state.transport_state, constructor_args.event_manager, )? } for pmem_state in &state.pmem_devices { let device = Arc::new(Mutex::new(Pmem::restore( PmemConstructorArgs { mem, vm: constructor_args.vm.as_ref(), }, &pmem_state.device_state, )?)); constructor_args .vm_resources .pmem .add_device(device.clone()); pci_devices.restore_pci_device( constructor_args.vm, device, &pmem_state.device_id, &pmem_state.transport_state, constructor_args.event_manager, )? } if let Some(memory_device) = &state.memory_device { let ctor_args = VirtioMemConstructorArgs::new(Arc::clone(constructor_args.vm)); let device = VirtioMem::restore(ctor_args, &memory_device.device_state)?; constructor_args.vm_resources.memory_hotplug = Some(MemoryHotplugConfig { total_size_mib: device.total_size_mib(), block_size_mib: device.block_size_mib(), slot_size_mib: device.slot_size_mib(), }); let arcd_device = Arc::new(Mutex::new(device)); pci_devices.restore_pci_device( constructor_args.vm, arcd_device, &memory_device.device_id, &memory_device.transport_state, constructor_args.event_manager, )? } Ok(pci_devices) } } #[cfg(test)] mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; use crate::builder::tests::*; use crate::device_manager; use crate::devices::virtio::block::CacheType; use crate::mmds::data_store::MmdsVersion; use crate::resources::VmmConfig; use crate::vmm_config::balloon::BalloonDeviceConfig; use crate::vmm_config::entropy::EntropyDeviceConfig; use crate::vmm_config::memory_hotplug::MemoryHotplugConfig; use crate::vmm_config::net::NetworkInterfaceConfig; use crate::vmm_config::pmem::PmemConfig; use crate::vmm_config::vsock::VsockDeviceConfig; #[test] fn test_device_manager_persistence() { // These need to survive so the restored blocks find them. let _block_files; let _pmem_files; let mut tmp_sock_file = TempFile::new().unwrap(); tmp_sock_file.remove().unwrap(); let serialized_data; // Set up a vmm with one of each device, and get the serialized DeviceStates. { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let mut vmm = default_vmm(); vmm.device_manager.enable_pci(&vmm.vm).unwrap(); let mut cmdline = default_kernel_cmdline(); // Add a balloon device. let balloon_cfg = BalloonDeviceConfig { amount_mib: 123, deflate_on_oom: false, stats_polling_interval_s: 1, free_page_hinting: false, free_page_reporting: false, }; insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_cfg); // Add a block device. let drive_id = String::from("root"); let block_configs = vec![CustomBlockConfig::new( drive_id, true, None, true, CacheType::Unsafe, )]; _block_files = insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); // Add a net device. let network_interface = NetworkInterfaceConfig { iface_id: String::from("netif"), host_dev_name: String::from("hostname"), guest_mac: None, rx_rate_limiter: None, tx_rate_limiter: None, }; insert_net_device_with_mmds( &mut vmm, &mut cmdline, &mut event_manager, network_interface, MmdsVersion::V2, ); // Add a vsock device. let vsock_dev_id = "vsock"; let vsock_config = VsockDeviceConfig { vsock_id: Some(vsock_dev_id.to_string()), guest_cid: 3, uds_path: tmp_sock_file.as_path().to_str().unwrap().to_string(), }; insert_vsock_device(&mut vmm, &mut cmdline, &mut event_manager, vsock_config); // Add an entropy device. let entropy_config = EntropyDeviceConfig::default(); insert_entropy_device(&mut vmm, &mut cmdline, &mut event_manager, entropy_config); // Add a pmem device. let pmem_id = String::from("pmem"); let pmem_configs = vec![PmemConfig { id: pmem_id, path_on_host: "".into(), root_device: true, read_only: true, }]; _pmem_files = insert_pmem_devices(&mut vmm, &mut cmdline, &mut event_manager, pmem_configs); let memory_hotplug_config = MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 2, slot_size_mib: 128, }; insert_virtio_mem_device( &mut vmm, &mut cmdline, &mut event_manager, memory_hotplug_config, ); let device_state = vmm.device_manager.save(); serialized_data = bitcode::serialize(&device_state).unwrap(); } tmp_sock_file.remove().unwrap(); let mut event_manager = EventManager::new().expect("Unable to create EventManager"); // Keep in mind we are re-creating here an empty DeviceManager. Restoring later on // will create a new PciDevices manager different than vmm.pci_devices. We're doing // this to avoid restoring the whole Vmm, since what we really need from Vmm is the Vm // object and calling default_vmm() is the easiest way to create one. let vmm = default_vmm(); let device_manager_state: device_manager::DevicesState = bitcode::deserialize(&serialized_data).unwrap(); let vm_resources = &mut VmResources::default(); let restore_args = PciDevicesConstructorArgs { vm: &vmm.vm, mem: vmm.vm.guest_memory(), vm_resources, instance_id: "microvm-id", event_manager: &mut event_manager, }; let _restored_dev_manager = PciDevices::restore(restore_args, &device_manager_state.pci_state).unwrap(); let expected_vm_resources = format!( r#"{{ "balloon": {{ "amount_mib": 123, "deflate_on_oom": false, "stats_polling_interval_s": 1, "free_page_hinting": false, "free_page_reporting": false }}, "drives": [ {{ "drive_id": "root", "partuuid": null, "is_root_device": true, "cache_type": "Unsafe", "is_read_only": true, "path_on_host": "{}", "rate_limiter": null, "io_engine": "Sync", "socket": null }} ], "boot-source": {{ "kernel_image_path": "", "initrd_path": null, "boot_args": null }}, "cpu-config": null, "logger": null, "machine-config": {{ "vcpu_count": 1, "mem_size_mib": 128, "smt": false, "track_dirty_pages": false, "huge_pages": "None" }}, "metrics": null, "mmds-config": {{ "version": "V2", "network_interfaces": [ "netif" ], "ipv4_address": "169.254.169.254", "imds_compat": false }}, "network-interfaces": [ {{ "iface_id": "netif", "host_dev_name": "hostname", "guest_mac": null, "rx_rate_limiter": null, "tx_rate_limiter": null }} ], "vsock": {{ "guest_cid": 3, "uds_path": "{}" }}, "entropy": {{ "rate_limiter": null }}, "pmem": [ {{ "id": "pmem", "path_on_host": "{}", "root_device": true, "read_only": true }} ], "memory-hotplug": {{ "total_size_mib": 1024, "block_size_mib": 2, "slot_size_mib": 128 }} }}"#, _block_files.last().unwrap().as_path().to_str().unwrap(), tmp_sock_file.as_path().to_str().unwrap(), _pmem_files.last().unwrap().as_path().to_str().unwrap(), ); assert_eq!( vm_resources .mmds .as_ref() .unwrap() .lock() .unwrap() .version(), MmdsVersion::V2 ); assert_eq!( device_manager_state.pci_state.mmds.unwrap().version, MmdsVersion::V2 ); assert_eq!( expected_vm_resources, serde_json::to_string_pretty(&VmmConfig::from(&*vm_resources)).unwrap() ); } } ================================================ FILE: src/vmm/src/device_manager/persist.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Provides functionality for saving/restoring the MMIO device manager and its devices. use std::fmt::{self, Debug}; use std::sync::{Arc, Mutex}; use log::warn; use serde::{Deserialize, Serialize}; use super::acpi::ACPIDeviceManager; use super::mmio::*; #[cfg(target_arch = "aarch64")] use crate::arch::DeviceType; use crate::device_manager::DevicePersistError; use crate::device_manager::acpi::ACPIDeviceError; use crate::devices::acpi::vmclock::{VmClock, VmClockState}; use crate::devices::acpi::vmgenid::{VMGenIDState, VmGenId}; #[cfg(target_arch = "aarch64")] use crate::devices::legacy::RTCDevice; use crate::devices::virtio::balloon::Balloon; use crate::devices::virtio::balloon::persist::{BalloonConstructorArgs, BalloonState}; use crate::devices::virtio::block::device::Block; use crate::devices::virtio::block::persist::{BlockConstructorArgs, BlockState}; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::mem::VirtioMem; use crate::devices::virtio::mem::persist::{VirtioMemConstructorArgs, VirtioMemState}; use crate::devices::virtio::net::Net; use crate::devices::virtio::net::persist::{NetConstructorArgs, NetState}; use crate::devices::virtio::persist::{MmioTransportConstructorArgs, MmioTransportState}; use crate::devices::virtio::pmem::device::Pmem; use crate::devices::virtio::pmem::persist::{PmemConstructorArgs, PmemState}; use crate::devices::virtio::rng::Entropy; use crate::devices::virtio::rng::persist::{EntropyConstructorArgs, EntropyState}; use crate::devices::virtio::transport::mmio::{IrqTrigger, MmioTransport}; use crate::devices::virtio::vsock::persist::{ VsockConstructorArgs, VsockState, VsockUdsConstructorArgs, }; use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; use crate::mmds::data_store::MmdsVersion; use crate::resources::VmResources; use crate::snapshot::Persist; use crate::vmm_config::memory_hotplug::MemoryHotplugConfig; use crate::vstate::memory::GuestMemoryMmap; use crate::{EventManager, Vm}; /// Holds the state of a MMIO VirtIO device #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VirtioDeviceState { /// Device identifier. pub device_id: String, /// Device state. pub device_state: T, /// Mmio transport state. pub transport_state: MmioTransportState, /// VmmResources. pub device_info: MMIODeviceInfo, } /// Holds the state of a legacy device connected to the MMIO space. #[cfg(target_arch = "aarch64")] #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ConnectedLegacyState { /// Device identifier. pub type_: DeviceType, /// VmmResources. pub device_info: MMIODeviceInfo, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct MmdsState { pub version: MmdsVersion, pub imds_compat: bool, } /// Holds the device states. #[derive(Debug, Default, Clone, Serialize, Deserialize)] pub struct DeviceStates { #[cfg(target_arch = "aarch64")] // State of legacy devices in MMIO space. pub legacy_devices: Vec, /// Block device states. pub block_devices: Vec>, /// Net device states. pub net_devices: Vec>, /// Vsock device state. pub vsock_device: Option>, /// Balloon device state. pub balloon_device: Option>, /// Mmds version. pub mmds: Option, /// Entropy device state. pub entropy_device: Option>, /// Pmem device states. pub pmem_devices: Vec>, /// Memory device state. pub memory_device: Option>, } pub struct MMIODevManagerConstructorArgs<'a> { pub mem: &'a GuestMemoryMmap, pub vm: &'a Arc, pub event_manager: &'a mut EventManager, pub vm_resources: &'a mut VmResources, pub instance_id: &'a str, } impl fmt::Debug for MMIODevManagerConstructorArgs<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("MMIODevManagerConstructorArgs") .field("mem", &self.mem) .field("vm", &self.vm) .field("event_manager", &"?") .field("for_each_restored_device", &"?") .field("vm_resources", &self.vm_resources) .field("instance_id", &self.instance_id) .finish() } } #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct ACPIDeviceManagerState { vmgenid: VMGenIDState, vmclock: VmClockState, } impl<'a> Persist<'a> for ACPIDeviceManager { type State = ACPIDeviceManagerState; type ConstructorArgs = &'a Vm; type Error = ACPIDeviceError; fn save(&self) -> Self::State { ACPIDeviceManagerState { vmgenid: self.vmgenid().save(), vmclock: self.vmclock().save(), } } fn restore(vm: Self::ConstructorArgs, state: &Self::State) -> Result { let mut acpi_devices = ACPIDeviceManager::new( // Safe to unwrap() here, this will never return an error. VmGenId::restore((), &state.vmgenid).unwrap(), // Safe to unwrap() here, this will never return an error. VmClock::restore((), &state.vmclock).unwrap(), ); acpi_devices.activate_vmgenid(vm)?; acpi_devices.do_post_restore_vmgenid()?; acpi_devices.activate_vmclock(vm)?; acpi_devices.do_post_restore_vmclock(vm.guest_memory())?; Ok(acpi_devices) } } impl<'a> Persist<'a> for MMIODeviceManager { type State = DeviceStates; type ConstructorArgs = MMIODevManagerConstructorArgs<'a>; type Error = DevicePersistError; fn save(&self) -> Self::State { let mut states = DeviceStates::default(); #[cfg(target_arch = "aarch64")] { if let Some(device) = &self.serial { states.legacy_devices.push(ConnectedLegacyState { type_: DeviceType::Serial, device_info: device.resources, }); } if let Some(device) = &self.rtc { states.legacy_devices.push(ConnectedLegacyState { type_: DeviceType::Rtc, device_info: device.resources, }); } } let _: Result<(), ()> = self.for_each_virtio_mmio_device(|_, devid, device| { let mmio_transport_locked = device.inner.lock().expect("Poisoned lock"); let mut locked_device = mmio_transport_locked.locked_device(); // We need to call `prepare_save()` on the device before saving the transport // so that, if we modify the transport state while preparing the device, e.g. sending // an interrupt to the guest, this is correctly captured in the saved transport state. locked_device.prepare_save(); let transport_state = mmio_transport_locked.save(); let device_info = device.resources; let device_id = devid.clone(); match locked_device.device_type() { VirtioDeviceType::Balloon => { let device_state = locked_device .as_any() .downcast_ref::() .unwrap() .save(); states.balloon_device = Some(VirtioDeviceState { device_id, device_state, transport_state, device_info, }); } // Both virtio-block and vhost-user-block share same device type. VirtioDeviceType::Block => { let block = locked_device.as_mut_any().downcast_mut::().unwrap(); if block.is_vhost_user() { warn!( "Skipping vhost-user-block device. VhostUserBlock does not support \ snapshotting yet" ); } else { let device_state = block.save(); states.block_devices.push(VirtioDeviceState { device_id, device_state, transport_state, device_info, }); } } VirtioDeviceType::Net => { let net = locked_device.as_mut_any().downcast_mut::().unwrap(); if let (Some(mmds_ns), None) = (net.mmds_ns.as_ref(), states.mmds.as_ref()) { let mmds_guard = mmds_ns.mmds.lock().expect("Poisoned lock"); states.mmds = Some(MmdsState { version: mmds_guard.version(), imds_compat: mmds_guard.imds_compat(), }); } let device_state = net.save(); states.net_devices.push(VirtioDeviceState { device_id, device_state, transport_state, device_info, }); } VirtioDeviceType::Vsock => { let vsock = locked_device .as_mut_any() // Currently, VsockUnixBackend is the only implementation of VsockBackend. .downcast_mut::>() .unwrap(); // Save state after potential notification to the guest. This // way we save changes to the queue the notification can cause. let device_state = VsockState { backend: vsock.backend().save(), frontend: vsock.save(), }; states.vsock_device = Some(VirtioDeviceState { device_id, device_state, transport_state, device_info, }); } VirtioDeviceType::Rng => { let entropy = locked_device .as_mut_any() .downcast_mut::() .unwrap(); let device_state = entropy.save(); states.entropy_device = Some(VirtioDeviceState { device_id, device_state, transport_state, device_info, }); } VirtioDeviceType::Pmem => { let pmem = locked_device.as_mut_any().downcast_mut::().unwrap(); let device_state = pmem.save(); states.pmem_devices.push(VirtioDeviceState { device_id, device_state, transport_state, device_info, }) } VirtioDeviceType::Mem => { let mem = locked_device .as_mut_any() .downcast_mut::() .unwrap(); let device_state = mem.save(); states.memory_device = Some(VirtioDeviceState { device_id, device_state, transport_state, device_info, }); } }; Ok(()) }); states } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let mut dev_manager = MMIODeviceManager::new(); let mem = constructor_args.mem; let vm = constructor_args.vm; #[cfg(target_arch = "aarch64")] { for state in &state.legacy_devices { if state.type_ == DeviceType::Serial { let serial = crate::DeviceManager::setup_serial_device( constructor_args.event_manager, constructor_args.vm_resources.serial_out_path.as_ref(), )?; dev_manager.register_mmio_serial(vm, serial, Some(state.device_info))?; } if state.type_ == DeviceType::Rtc { let rtc = Arc::new(Mutex::new(RTCDevice::new())); dev_manager.register_mmio_rtc(vm, rtc, Some(state.device_info))?; } } } let mut restore_helper = |device: Arc>, activated: bool, is_vhost_user: bool, id: &String, state: &MmioTransportState, device_info: &MMIODeviceInfo, event_manager: &mut EventManager| -> Result<(), Self::Error> { let interrupt = Arc::new(IrqTrigger::new()); let restore_args = MmioTransportConstructorArgs { mem: mem.clone(), interrupt: interrupt.clone(), device: device.clone(), is_vhost_user, }; let mmio_transport = Arc::new(Mutex::new( MmioTransport::restore(restore_args, state) .map_err(|()| DevicePersistError::MmioTransport)?, )); dev_manager.register_mmio_virtio( vm, id.clone(), MMIODevice { resources: *device_info, inner: mmio_transport, sub_id: None, }, event_manager, )?; if activated { device .lock() .expect("Poisoned lock") .activate(mem.clone(), interrupt)?; } Ok(()) }; if let Some(balloon_state) = &state.balloon_device { let device = Arc::new(Mutex::new(Balloon::restore( BalloonConstructorArgs { mem: mem.clone() }, &balloon_state.device_state, )?)); constructor_args .vm_resources .balloon .set_device(device.clone()); restore_helper( device, balloon_state.device_state.virtio_state.activated, false, &balloon_state.device_id, &balloon_state.transport_state, &balloon_state.device_info, constructor_args.event_manager, )?; } for block_state in &state.block_devices { let device = Arc::new(Mutex::new(Block::restore( BlockConstructorArgs { mem: mem.clone() }, &block_state.device_state, )?)); constructor_args .vm_resources .block .add_virtio_device(device.clone()); restore_helper( device, block_state.device_state.is_activated(), false, &block_state.device_id, &block_state.transport_state, &block_state.device_info, constructor_args.event_manager, )?; } // Initialize MMDS if MMDS state is included. if let Some(mmds) = &state.mmds { constructor_args.vm_resources.set_mmds_basic_config( mmds.version, mmds.imds_compat, constructor_args.instance_id, )?; } for net_state in &state.net_devices { let device = Arc::new(Mutex::new(Net::restore( NetConstructorArgs { mem: mem.clone(), mmds: constructor_args .vm_resources .mmds .as_ref() // Clone the Arc reference. .cloned(), }, &net_state.device_state, )?)); constructor_args .vm_resources .net_builder .add_device(device.clone()); restore_helper( device, net_state.device_state.virtio_state.activated, false, &net_state.device_id, &net_state.transport_state, &net_state.device_info, constructor_args.event_manager, )?; } if let Some(vsock_state) = &state.vsock_device { let ctor_args = VsockUdsConstructorArgs { cid: vsock_state.device_state.frontend.cid, }; let backend = VsockUnixBackend::restore(ctor_args, &vsock_state.device_state.backend)?; let device = Arc::new(Mutex::new(Vsock::restore( VsockConstructorArgs { mem: mem.clone(), backend, }, &vsock_state.device_state.frontend, )?)); constructor_args .vm_resources .vsock .set_device(device.clone()); restore_helper( device, vsock_state.device_state.frontend.virtio_state.activated, false, &vsock_state.device_id, &vsock_state.transport_state, &vsock_state.device_info, constructor_args.event_manager, )?; } if let Some(entropy_state) = &state.entropy_device { let ctor_args = EntropyConstructorArgs { mem: mem.clone() }; let device = Arc::new(Mutex::new(Entropy::restore( ctor_args, &entropy_state.device_state, )?)); constructor_args .vm_resources .entropy .set_device(device.clone()); restore_helper( device, entropy_state.device_state.virtio_state.activated, false, &entropy_state.device_id, &entropy_state.transport_state, &entropy_state.device_info, constructor_args.event_manager, )?; } for pmem_state in &state.pmem_devices { let device = Arc::new(Mutex::new(Pmem::restore( PmemConstructorArgs { mem, vm: vm.as_ref(), }, &pmem_state.device_state, )?)); constructor_args .vm_resources .pmem .add_device(device.clone()); restore_helper( device, pmem_state.device_state.virtio_state.activated, false, &pmem_state.device_id, &pmem_state.transport_state, &pmem_state.device_info, constructor_args.event_manager, )?; } if let Some(memory_state) = &state.memory_device { let ctor_args = VirtioMemConstructorArgs::new(Arc::clone(vm)); let device = VirtioMem::restore(ctor_args, &memory_state.device_state)?; constructor_args.vm_resources.memory_hotplug = Some(MemoryHotplugConfig { total_size_mib: device.total_size_mib(), block_size_mib: device.block_size_mib(), slot_size_mib: device.slot_size_mib(), }); let arcd_device = Arc::new(Mutex::new(device)); restore_helper( arcd_device, memory_state.device_state.virtio_state.activated, false, &memory_state.device_id, &memory_state.transport_state, &memory_state.device_info, constructor_args.event_manager, )?; } Ok(dev_manager) } } #[cfg(test)] mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; use crate::builder::tests::*; use crate::device_manager; use crate::devices::virtio::block::CacheType; use crate::resources::VmmConfig; use crate::vmm_config::balloon::BalloonDeviceConfig; use crate::vmm_config::entropy::EntropyDeviceConfig; use crate::vmm_config::memory_hotplug::MemoryHotplugConfig; use crate::vmm_config::net::NetworkInterfaceConfig; use crate::vmm_config::pmem::PmemConfig; use crate::vmm_config::vsock::VsockDeviceConfig; impl PartialEq for VirtioDeviceState { fn eq(&self, other: &VirtioDeviceState) -> bool { // Actual device state equality is checked by the device's tests. self.transport_state == other.transport_state && self.device_info == other.device_info } } impl PartialEq for DeviceStates { fn eq(&self, other: &DeviceStates) -> bool { self.balloon_device == other.balloon_device && self.block_devices == other.block_devices && self.net_devices == other.net_devices && self.vsock_device == other.vsock_device && self.entropy_device == other.entropy_device && self.memory_device == other.memory_device } } impl PartialEq for MMIODevice { fn eq(&self, other: &Self) -> bool { self.resources == other.resources } } impl PartialEq for MMIODeviceManager { fn eq(&self, other: &MMIODeviceManager) -> bool { // We only care about the device hashmap. if self.virtio_devices.len() != other.virtio_devices.len() { return false; } for (key, val) in &self.virtio_devices { match other.virtio_devices.get(key) { Some(other_val) if val == other_val => continue, _ => return false, } } self.boot_timer == other.boot_timer } } #[test] fn test_device_manager_persistence() { // These need to survive so the restored blocks find them. let _block_files; let _pmem_files; let mut tmp_sock_file = TempFile::new().unwrap(); tmp_sock_file.remove().unwrap(); let serialized_data; // Set up a vmm with one of each device, and get the serialized DeviceStates. { let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); // Add a balloon device. let balloon_cfg = BalloonDeviceConfig { amount_mib: 123, deflate_on_oom: false, stats_polling_interval_s: 1, free_page_hinting: false, free_page_reporting: false, }; insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_cfg); // Add a block device. let drive_id = String::from("root"); let block_configs = vec![CustomBlockConfig::new( drive_id, true, None, true, CacheType::Unsafe, )]; _block_files = insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); // Add a net device. let network_interface = NetworkInterfaceConfig { iface_id: String::from("netif"), host_dev_name: String::from("hostname"), guest_mac: None, rx_rate_limiter: None, tx_rate_limiter: None, }; insert_net_device_with_mmds( &mut vmm, &mut cmdline, &mut event_manager, network_interface, MmdsVersion::V2, ); // Add a vsock device. let vsock_dev_id = "vsock"; let vsock_config = VsockDeviceConfig { vsock_id: Some(vsock_dev_id.to_string()), guest_cid: 3, uds_path: tmp_sock_file.as_path().to_str().unwrap().to_string(), }; insert_vsock_device(&mut vmm, &mut cmdline, &mut event_manager, vsock_config); // Add an entropy device. let entropy_config = EntropyDeviceConfig::default(); insert_entropy_device(&mut vmm, &mut cmdline, &mut event_manager, entropy_config); // Add a pmem device. let pmem_id = String::from("pmem"); let pmem_configs = vec![PmemConfig { id: pmem_id, path_on_host: "".into(), root_device: true, read_only: true, }]; _pmem_files = insert_pmem_devices(&mut vmm, &mut cmdline, &mut event_manager, pmem_configs); let memory_hotplug_config = MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 2, slot_size_mib: 128, }; insert_virtio_mem_device( &mut vmm, &mut cmdline, &mut event_manager, memory_hotplug_config, ); let device_state = vmm.device_manager.save(); serialized_data = bitcode::serialize(&device_state).unwrap(); } tmp_sock_file.remove().unwrap(); let mut event_manager = EventManager::new().expect("Unable to create EventManager"); let vmm = default_vmm(); let device_manager_state: device_manager::DevicesState = bitcode::deserialize(&serialized_data).unwrap(); let vm_resources = &mut VmResources::default(); let restore_args = MMIODevManagerConstructorArgs { mem: vmm.vm.guest_memory(), vm: &vmm.vm, event_manager: &mut event_manager, vm_resources, instance_id: "microvm-id", }; let _restored_dev_manager = MMIODeviceManager::restore(restore_args, &device_manager_state.mmio_state).unwrap(); let expected_vm_resources = format!( r#"{{ "balloon": {{ "amount_mib": 123, "deflate_on_oom": false, "stats_polling_interval_s": 1, "free_page_hinting": false, "free_page_reporting": false }}, "drives": [ {{ "drive_id": "root", "partuuid": null, "is_root_device": true, "cache_type": "Unsafe", "is_read_only": true, "path_on_host": "{}", "rate_limiter": null, "io_engine": "Sync", "socket": null }} ], "boot-source": {{ "kernel_image_path": "", "initrd_path": null, "boot_args": null }}, "cpu-config": null, "logger": null, "machine-config": {{ "vcpu_count": 1, "mem_size_mib": 128, "smt": false, "track_dirty_pages": false, "huge_pages": "None" }}, "metrics": null, "mmds-config": {{ "version": "V2", "network_interfaces": [ "netif" ], "ipv4_address": "169.254.169.254", "imds_compat": false }}, "network-interfaces": [ {{ "iface_id": "netif", "host_dev_name": "hostname", "guest_mac": null, "rx_rate_limiter": null, "tx_rate_limiter": null }} ], "vsock": {{ "guest_cid": 3, "uds_path": "{}" }}, "entropy": {{ "rate_limiter": null }}, "pmem": [ {{ "id": "pmem", "path_on_host": "{}", "root_device": true, "read_only": true }} ], "memory-hotplug": {{ "total_size_mib": 1024, "block_size_mib": 2, "slot_size_mib": 128 }} }}"#, _block_files.last().unwrap().as_path().to_str().unwrap(), tmp_sock_file.as_path().to_str().unwrap(), _pmem_files.last().unwrap().as_path().to_str().unwrap(), ); assert_eq!( vm_resources .mmds .as_ref() .unwrap() .lock() .unwrap() .version(), MmdsVersion::V2 ); assert_eq!( device_manager_state.mmio_state.mmds.unwrap().version, MmdsVersion::V2 ); assert_eq!( expected_vm_resources, serde_json::to_string_pretty(&VmmConfig::from(&*vm_resources)).unwrap() ); } } ================================================ FILE: src/vmm/src/devices/acpi/generated/mod.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow(clippy::all)] #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] #![allow(non_snake_case)] pub mod vmclock_abi; ================================================ FILE: src/vmm/src/devices/acpi/generated/vmclock_abi.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] use serde::{Deserialize, Serialize}; pub const __BITS_PER_LONG: u32 = 64; pub const __BITS_PER_LONG_LONG: u32 = 64; pub const __FD_SETSIZE: u32 = 1024; pub const VMCLOCK_MAGIC: u32 = 1263289174; pub const VMCLOCK_COUNTER_ARM_VCNT: u8 = 0; pub const VMCLOCK_COUNTER_X86_TSC: u8 = 1; pub const VMCLOCK_COUNTER_INVALID: u8 = 255; pub const VMCLOCK_TIME_UTC: u8 = 0; pub const VMCLOCK_TIME_TAI: u8 = 1; pub const VMCLOCK_TIME_MONOTONIC: u8 = 2; pub const VMCLOCK_TIME_INVALID_SMEARED: u8 = 3; pub const VMCLOCK_TIME_INVALID_MAYBE_SMEARED: u8 = 4; pub const VMCLOCK_FLAG_TAI_OFFSET_VALID: u64 = 1; pub const VMCLOCK_FLAG_DISRUPTION_SOON: u64 = 2; pub const VMCLOCK_FLAG_DISRUPTION_IMMINENT: u64 = 4; pub const VMCLOCK_FLAG_PERIOD_ESTERROR_VALID: u64 = 8; pub const VMCLOCK_FLAG_PERIOD_MAXERROR_VALID: u64 = 16; pub const VMCLOCK_FLAG_TIME_ESTERROR_VALID: u64 = 32; pub const VMCLOCK_FLAG_TIME_MAXERROR_VALID: u64 = 64; pub const VMCLOCK_FLAG_TIME_MONOTONIC: u64 = 128; pub const VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT: u64 = 256; pub const VMCLOCK_FLAG_NOTIFICATION_PRESENT: u64 = 512; pub const VMCLOCK_STATUS_UNKNOWN: u8 = 0; pub const VMCLOCK_STATUS_INITIALIZING: u8 = 1; pub const VMCLOCK_STATUS_SYNCHRONIZED: u8 = 2; pub const VMCLOCK_STATUS_FREERUNNING: u8 = 3; pub const VMCLOCK_STATUS_UNRELIABLE: u8 = 4; pub const VMCLOCK_SMEARING_STRICT: u8 = 0; pub const VMCLOCK_SMEARING_NOON_LINEAR: u8 = 1; pub const VMCLOCK_SMEARING_UTC_SLS: u8 = 2; pub const VMCLOCK_LEAP_NONE: u8 = 0; pub const VMCLOCK_LEAP_PRE_POS: u8 = 1; pub const VMCLOCK_LEAP_PRE_NEG: u8 = 2; pub const VMCLOCK_LEAP_POS: u8 = 3; pub const VMCLOCK_LEAP_POST_POS: u8 = 4; pub const VMCLOCK_LEAP_POST_NEG: u8 = 5; pub type __s8 = ::std::os::raw::c_schar; pub type __u8 = ::std::os::raw::c_uchar; pub type __s16 = ::std::os::raw::c_short; pub type __u16 = ::std::os::raw::c_ushort; pub type __s32 = ::std::os::raw::c_int; pub type __u32 = ::std::os::raw::c_uint; pub type __s64 = ::std::os::raw::c_longlong; pub type __u64 = ::std::os::raw::c_ulonglong; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct __kernel_fd_set { pub fds_bits: [::std::os::raw::c_ulong; 16usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of __kernel_fd_set"][::std::mem::size_of::<__kernel_fd_set>() - 128usize]; ["Alignment of __kernel_fd_set"][::std::mem::align_of::<__kernel_fd_set>() - 8usize]; ["Offset of field: __kernel_fd_set::fds_bits"] [::std::mem::offset_of!(__kernel_fd_set, fds_bits) - 0usize]; }; pub type __kernel_sighandler_t = ::std::option::Option; pub type __kernel_key_t = ::std::os::raw::c_int; pub type __kernel_mqd_t = ::std::os::raw::c_int; pub type __kernel_old_uid_t = ::std::os::raw::c_ushort; pub type __kernel_old_gid_t = ::std::os::raw::c_ushort; pub type __kernel_old_dev_t = ::std::os::raw::c_ulong; pub type __kernel_long_t = ::std::os::raw::c_long; pub type __kernel_ulong_t = ::std::os::raw::c_ulong; pub type __kernel_ino_t = __kernel_ulong_t; pub type __kernel_mode_t = ::std::os::raw::c_uint; pub type __kernel_pid_t = ::std::os::raw::c_int; pub type __kernel_ipc_pid_t = ::std::os::raw::c_int; pub type __kernel_uid_t = ::std::os::raw::c_uint; pub type __kernel_gid_t = ::std::os::raw::c_uint; pub type __kernel_suseconds_t = __kernel_long_t; pub type __kernel_daddr_t = ::std::os::raw::c_int; pub type __kernel_uid32_t = ::std::os::raw::c_uint; pub type __kernel_gid32_t = ::std::os::raw::c_uint; pub type __kernel_size_t = __kernel_ulong_t; pub type __kernel_ssize_t = __kernel_long_t; pub type __kernel_ptrdiff_t = __kernel_long_t; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct __kernel_fsid_t { pub val: [::std::os::raw::c_int; 2usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of __kernel_fsid_t"][::std::mem::size_of::<__kernel_fsid_t>() - 8usize]; ["Alignment of __kernel_fsid_t"][::std::mem::align_of::<__kernel_fsid_t>() - 4usize]; ["Offset of field: __kernel_fsid_t::val"] [::std::mem::offset_of!(__kernel_fsid_t, val) - 0usize]; }; pub type __kernel_off_t = __kernel_long_t; pub type __kernel_loff_t = ::std::os::raw::c_longlong; pub type __kernel_old_time_t = __kernel_long_t; pub type __kernel_time_t = __kernel_long_t; pub type __kernel_time64_t = ::std::os::raw::c_longlong; pub type __kernel_clock_t = __kernel_long_t; pub type __kernel_timer_t = ::std::os::raw::c_int; pub type __kernel_clockid_t = ::std::os::raw::c_int; pub type __kernel_caddr_t = *mut ::std::os::raw::c_char; pub type __kernel_uid16_t = ::std::os::raw::c_ushort; pub type __kernel_gid16_t = ::std::os::raw::c_ushort; pub type __s128 = i128; pub type __u128 = u128; pub type __le16 = __u16; pub type __be16 = __u16; pub type __le32 = __u32; pub type __be32 = __u32; pub type __le64 = __u64; pub type __be64 = __u64; pub type __sum16 = __u16; pub type __wsum = __u32; pub type __poll_t = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq, Serialize, Deserialize)] pub struct vmclock_abi { pub magic: __le32, pub size: __le32, pub version: __le16, pub counter_id: __u8, pub time_type: __u8, pub seq_count: __le32, pub disruption_marker: __le64, pub flags: __le64, pub pad: [__u8; 2usize], pub clock_status: __u8, pub leap_second_smearing_hint: __u8, pub tai_offset_sec: __le16, pub leap_indicator: __u8, pub counter_period_shift: __u8, pub counter_value: __le64, pub counter_period_frac_sec: __le64, pub counter_period_esterror_rate_frac_sec: __le64, pub counter_period_maxerror_rate_frac_sec: __le64, pub time_sec: __le64, pub time_frac_sec: __le64, pub time_esterror_nanosec: __le64, pub time_maxerror_nanosec: __le64, pub vm_generation_counter: __le64, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of vmclock_abi"][::std::mem::size_of::() - 112usize]; ["Alignment of vmclock_abi"][::std::mem::align_of::() - 8usize]; ["Offset of field: vmclock_abi::magic"][::std::mem::offset_of!(vmclock_abi, magic) - 0usize]; ["Offset of field: vmclock_abi::size"][::std::mem::offset_of!(vmclock_abi, size) - 4usize]; ["Offset of field: vmclock_abi::version"] [::std::mem::offset_of!(vmclock_abi, version) - 8usize]; ["Offset of field: vmclock_abi::counter_id"] [::std::mem::offset_of!(vmclock_abi, counter_id) - 10usize]; ["Offset of field: vmclock_abi::time_type"] [::std::mem::offset_of!(vmclock_abi, time_type) - 11usize]; ["Offset of field: vmclock_abi::seq_count"] [::std::mem::offset_of!(vmclock_abi, seq_count) - 12usize]; ["Offset of field: vmclock_abi::disruption_marker"] [::std::mem::offset_of!(vmclock_abi, disruption_marker) - 16usize]; ["Offset of field: vmclock_abi::flags"][::std::mem::offset_of!(vmclock_abi, flags) - 24usize]; ["Offset of field: vmclock_abi::pad"][::std::mem::offset_of!(vmclock_abi, pad) - 32usize]; ["Offset of field: vmclock_abi::clock_status"] [::std::mem::offset_of!(vmclock_abi, clock_status) - 34usize]; ["Offset of field: vmclock_abi::leap_second_smearing_hint"] [::std::mem::offset_of!(vmclock_abi, leap_second_smearing_hint) - 35usize]; ["Offset of field: vmclock_abi::tai_offset_sec"] [::std::mem::offset_of!(vmclock_abi, tai_offset_sec) - 36usize]; ["Offset of field: vmclock_abi::leap_indicator"] [::std::mem::offset_of!(vmclock_abi, leap_indicator) - 38usize]; ["Offset of field: vmclock_abi::counter_period_shift"] [::std::mem::offset_of!(vmclock_abi, counter_period_shift) - 39usize]; ["Offset of field: vmclock_abi::counter_value"] [::std::mem::offset_of!(vmclock_abi, counter_value) - 40usize]; ["Offset of field: vmclock_abi::counter_period_frac_sec"] [::std::mem::offset_of!(vmclock_abi, counter_period_frac_sec) - 48usize]; ["Offset of field: vmclock_abi::counter_period_esterror_rate_frac_sec"] [::std::mem::offset_of!(vmclock_abi, counter_period_esterror_rate_frac_sec) - 56usize]; ["Offset of field: vmclock_abi::counter_period_maxerror_rate_frac_sec"] [::std::mem::offset_of!(vmclock_abi, counter_period_maxerror_rate_frac_sec) - 64usize]; ["Offset of field: vmclock_abi::time_sec"] [::std::mem::offset_of!(vmclock_abi, time_sec) - 72usize]; ["Offset of field: vmclock_abi::time_frac_sec"] [::std::mem::offset_of!(vmclock_abi, time_frac_sec) - 80usize]; ["Offset of field: vmclock_abi::time_esterror_nanosec"] [::std::mem::offset_of!(vmclock_abi, time_esterror_nanosec) - 88usize]; ["Offset of field: vmclock_abi::time_maxerror_nanosec"] [::std::mem::offset_of!(vmclock_abi, time_maxerror_nanosec) - 96usize]; ["Offset of field: vmclock_abi::vm_generation_counter"] [::std::mem::offset_of!(vmclock_abi, vm_generation_counter) - 104usize]; }; ================================================ FILE: src/vmm/src/devices/acpi/mod.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 mod generated; pub mod vmclock; pub mod vmgenid; ================================================ FILE: src/vmm/src/devices/acpi/vmclock.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::convert::Infallible; use std::mem::offset_of; use std::sync::atomic::{Ordering, fence}; use acpi_tables::{Aml, aml}; use log::{debug, error}; use serde::{Deserialize, Serialize}; use vm_allocator::AllocPolicy; use vm_memory::{Address, ByteValued, Bytes, GuestAddress, GuestMemoryError}; use vm_superio::Trigger; use vmm_sys_util::eventfd::EventFd; use crate::Vm; use crate::devices::acpi::generated::vmclock_abi::{ VMCLOCK_COUNTER_INVALID, VMCLOCK_FLAG_NOTIFICATION_PRESENT, VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT, VMCLOCK_MAGIC, VMCLOCK_STATUS_UNKNOWN, vmclock_abi, }; use crate::devices::legacy::EventFdTrigger; use crate::snapshot::Persist; use crate::vstate::memory::GuestMemoryMmap; use crate::vstate::resources::ResourceAllocator; // SAFETY: `vmclock_abi` is a POD unsafe impl ByteValued for vmclock_abi {} // We are reserving a physical page to expose the [`VmClock`] data pub const VMCLOCK_SIZE: u32 = 0x1000; // Write a value in `vmclock_abi` both in the Firecracker-managed state // and inside guest memory address that corresponds to it. macro_rules! write_vmclock_field { ($vmclock:expr, $mem:expr, $field:ident, $value:expr) => { $vmclock.inner.$field = $value; $mem.write_obj( $vmclock.inner.$field, $vmclock .guest_address .unchecked_add(offset_of!(vmclock_abi, $field) as u64), ); }; } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VmClockError { /// Could not create EventFd: {0} CreateEventFd(std::io::Error), /// Could not allocate GSI: {0} AllocateGsi(vm_allocator::Error), /// Could not allocate guest memory: {0} AllocateMemory(vm_allocator::Error), /// Could not write VMClock data to guest memory: {0} WriteGuestMemory(#[from] GuestMemoryError), /// Could not notify guest: {0} NotifyGuest(std::io::Error), } /// VMclock device /// /// This device emulates the VMclock device which allows passing information to the guest related /// to the relation of the host CPU to real-time clock as well as information about disruptive /// events, such as live-migration. #[derive(Debug)] pub struct VmClock { /// Guest address in which we will write the VMclock struct pub guest_address: GuestAddress, /// Interrupt line for notifying the device about changes pub interrupt_evt: EventFdTrigger, /// GSI number allocated for the device. pub gsi: u32, /// The [`VmClock`] state we are exposing to the guest inner: vmclock_abi, } impl VmClock { /// Create a new [`VmClock`] device for a newly booted VM pub fn new(resource_allocator: &mut ResourceAllocator) -> Result { let addr = resource_allocator .allocate_system_memory( VMCLOCK_SIZE as u64, VMCLOCK_SIZE as u64, AllocPolicy::LastMatch, ) .map_err(VmClockError::AllocateMemory)?; let gsi = resource_allocator .allocate_gsi_legacy(1) .map_err(VmClockError::AllocateGsi)?[0]; let interrupt_evt = EventFdTrigger::new( EventFd::new(libc::EFD_NONBLOCK).map_err(VmClockError::CreateEventFd)?, ); let mut inner = vmclock_abi { magic: VMCLOCK_MAGIC, size: VMCLOCK_SIZE, version: 1, clock_status: VMCLOCK_STATUS_UNKNOWN, counter_id: VMCLOCK_COUNTER_INVALID, flags: VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT | VMCLOCK_FLAG_NOTIFICATION_PRESENT, ..Default::default() }; Ok(VmClock { guest_address: GuestAddress(addr), interrupt_evt, gsi, inner, }) } /// Activate [`VmClock`] device pub fn activate(&self, mem: &GuestMemoryMmap) -> Result<(), VmClockError> { mem.write_slice(self.inner.as_slice(), self.guest_address)?; Ok(()) } /// Bump the VM generation counter and notify guest after snapshot restore pub fn do_post_restore(&mut self, mem: &GuestMemoryMmap) -> Result<(), VmClockError> { write_vmclock_field!(self, mem, seq_count, self.inner.seq_count | 1); // This fence ensures guest sees all previous writes. It is matched to a // read barrier in the guest. fence(Ordering::Release); write_vmclock_field!( self, mem, disruption_marker, self.inner.disruption_marker.wrapping_add(1) ); write_vmclock_field!( self, mem, vm_generation_counter, self.inner.vm_generation_counter.wrapping_add(1) ); // This fence ensures guest sees the `disruption_marker` and `vm_generation_counter` // updates. It is matched to a read barrier in the guest. fence(Ordering::Release); write_vmclock_field!(self, mem, seq_count, self.inner.seq_count.wrapping_add(1)); self.interrupt_evt .trigger() .map_err(VmClockError::NotifyGuest)?; debug!("vmclock: notifying guest about VMClock updates"); Ok(()) } } /// (De)serialize-able state of the [`VmClock`] /// /// We could avoid this and reuse [`VmClock`] itself if `GuestAddress` was `Serialize`/`Deserialize` #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct VmClockState { /// Guest address in which we write the [`VmClock`] info pub guest_address: u64, /// GSI used for notifying the guest about device changes pub gsi: u32, /// Data we expose to the guest pub inner: vmclock_abi, } impl<'a> Persist<'a> for VmClock { type State = VmClockState; type ConstructorArgs = (); type Error = VmClockError; fn save(&self) -> Self::State { VmClockState { guest_address: self.guest_address.0, gsi: self.gsi, inner: self.inner, } } fn restore(vm: Self::ConstructorArgs, state: &Self::State) -> Result { let interrupt_evt = EventFdTrigger::new( EventFd::new(libc::EFD_NONBLOCK).map_err(VmClockError::CreateEventFd)?, ); Ok(VmClock { guest_address: GuestAddress(state.guest_address), interrupt_evt, gsi: state.gsi, inner: state.inner, }) } } impl Aml for VmClock { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), aml::AmlError> { aml::Device::new( "_SB_.VCLK".try_into()?, vec![ &aml::Name::new("_HID".try_into()?, &"AMZNC10C")?, &aml::Name::new("_CID".try_into()?, &"VMCLOCK")?, &aml::Name::new("_DDN".try_into()?, &"VMCLOCK")?, &aml::Method::new( "_STA".try_into()?, 0, false, vec![&aml::Return::new(&0x0fu8)], ), &aml::Name::new( "_CRS".try_into()?, &aml::ResourceTemplate::new(vec![&aml::AddressSpace::new_memory( aml::AddressSpaceCacheable::Cacheable, false, self.guest_address.0, self.guest_address.0 + VMCLOCK_SIZE as u64 - 1, )?]), )?, ], ) .append_aml_bytes(v) } } #[cfg(test)] mod tests { use vm_memory::{Bytes, GuestAddress}; use vmm_sys_util::tempfile::TempFile; use crate::Vm; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::layout; use crate::arch::{self, Kvm}; use crate::devices::acpi::generated::vmclock_abi::vmclock_abi; use crate::devices::acpi::vmclock::{VMCLOCK_SIZE, VmClock}; use crate::devices::virtio::test_utils::default_mem; use crate::snapshot::{Persist, Snapshot}; use crate::test_utils::single_region_mem; use crate::utils::u64_to_usize; use crate::vstate::resources::ResourceAllocator; use crate::vstate::vm::tests::setup_vm_with_memory; // We are allocating memory from the end of the system memory portion const VMCLOCK_TEST_GUEST_ADDR: GuestAddress = GuestAddress(arch::SYSTEM_MEM_START + arch::SYSTEM_MEM_SIZE - VMCLOCK_SIZE as u64); fn default_vmclock() -> VmClock { let mut resource_allocator = ResourceAllocator::new(); VmClock::new(&mut resource_allocator).unwrap() } #[test] fn test_new_device() { let vmclock = default_vmclock(); let mem = single_region_mem( u64_to_usize(arch::SYSTEM_MEM_START) + u64_to_usize(arch::SYSTEM_MEM_SIZE), ); let guest_data: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap(); assert_ne!(guest_data, vmclock.inner); vmclock.activate(&mem); let guest_data: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap(); assert_eq!(guest_data, vmclock.inner); } #[test] fn test_device_save_restore() { let vmclock = default_vmclock(); // We're using memory inside the system memory portion of the guest RAM. So we need a // memory region that includes it. let mem = single_region_mem( u64_to_usize(arch::SYSTEM_MEM_START) + u64_to_usize(arch::SYSTEM_MEM_SIZE), ); vmclock.activate(&mem).unwrap(); let state = vmclock.save(); let mut vmclock_new = VmClock::restore((), &state).unwrap(); vmclock_new.do_post_restore(&mem); let guest_data_new: vmclock_abi = mem.read_obj(VMCLOCK_TEST_GUEST_ADDR).unwrap(); assert_ne!(guest_data_new, vmclock.inner); assert_eq!(guest_data_new, vmclock_new.inner); assert_eq!( vmclock.inner.disruption_marker + 1, vmclock_new.inner.disruption_marker ); assert_eq!( vmclock.inner.vm_generation_counter + 1, vmclock_new.inner.vm_generation_counter ); } } ================================================ FILE: src/vmm/src/devices/acpi/vmgenid.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::convert::Infallible; use acpi_tables::{Aml, aml}; use aws_lc_rs::error::Unspecified as RandError; use aws_lc_rs::rand; use log::{debug, error}; use serde::{Deserialize, Serialize}; use vm_memory::{GuestAddress, GuestMemoryError}; use vm_superio::Trigger; use vmm_sys_util::eventfd::EventFd; use super::super::legacy::EventFdTrigger; use crate::snapshot::Persist; use crate::vstate::memory::{Bytes, GuestMemoryMmap}; use crate::vstate::resources::ResourceAllocator; /// Bytes of memory we allocate for VMGenID device pub const VMGENID_MEM_SIZE: u64 = 16; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VmGenIdError { /// Could not create EventFd: {0} CreateEventFd(std::io::Error), /// Could not allocate GSI: {0} AllocateGsi(vm_allocator::Error), /// Could not allocate guest memory: {0} AllocateMemory(vm_allocator::Error), /// Could not write generation ID to guest memory: {0} WriteGuestMemory(#[from] GuestMemoryError), /// Could not notify guest: {0} NotifyGuest(std::io::Error), } /// Virtual Machine Generation ID device /// /// VMGenID is an emulated device which exposes to the guest a 128-bit cryptographically random /// integer value which will be different every time the virtual machine executes from a different /// configuration file. In Firecracker terms this translates to a different value every time a new /// microVM is created, either from scratch or restored from a snapshot. /// /// The device specification can be found here: https://go.microsoft.com/fwlink/?LinkId=260709 #[derive(Debug)] pub struct VmGenId { /// Current generation ID of guest VM pub gen_id: u128, /// Interrupt line for notifying the device about generation ID changes pub interrupt_evt: EventFdTrigger, /// Guest physical address where VMGenID data lives. pub guest_address: GuestAddress, /// GSI number for the device pub gsi: u32, } impl VmGenId { /// Create a new Vm Generation Id device using an address in the guest for writing the /// generation ID and a GSI for sending device notifications. pub fn from_parts(guest_address: GuestAddress, gsi: u32) -> Result { debug!( "vmgenid: building VMGenID device. Address: {:#010x}. IRQ: {}", guest_address.0, gsi ); let interrupt_evt = EventFdTrigger::new( EventFd::new(libc::EFD_NONBLOCK).map_err(VmGenIdError::CreateEventFd)?, ); let gen_id = Self::make_genid(); Ok(Self { gen_id, interrupt_evt, guest_address, gsi, }) } /// Create a new VMGenID device /// /// Allocate memory and a GSI for sending notifications and build the device pub fn new(resource_allocator: &mut ResourceAllocator) -> Result { let gsi = resource_allocator .allocate_gsi_legacy(1) .map_err(VmGenIdError::AllocateGsi)?[0]; // The generation ID needs to live in an 8-byte aligned buffer let addr = resource_allocator .allocate_system_memory(VMGENID_MEM_SIZE, 8, vm_allocator::AllocPolicy::LastMatch) .map_err(VmGenIdError::AllocateMemory)?; Self::from_parts(GuestAddress(addr), gsi) } // Create a 16-bytes random number fn make_genid() -> u128 { let mut gen_id_bytes = [0u8; 16]; rand::fill(&mut gen_id_bytes).expect("vmgenid: could not create new generation ID"); u128::from_le_bytes(gen_id_bytes) } /// Notify guest after snapshot restore /// /// This will only have effect if we have updated the generation ID in guest memory, i.e. when /// re-creating the device after snapshot resumption. pub fn do_post_restore(&self) -> Result<(), VmGenIdError> { self.interrupt_evt .trigger() .map_err(VmGenIdError::NotifyGuest)?; debug!("vmgenid: notifying guest about new generation ID"); Ok(()) } /// Attach the [`VmGenId`] device pub fn activate(&self, mem: &GuestMemoryMmap) -> Result<(), VmGenIdError> { debug!( "vmgenid: writing new generation ID to guest: {:#034x}", self.gen_id ); mem.write_slice(&self.gen_id.to_le_bytes(), self.guest_address) .map_err(VmGenIdError::WriteGuestMemory)?; Ok(()) } } /// Logic to save/restore the state of a VMGenID device #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub struct VMGenIDState { /// GSI used for VMGenID device pub gsi: u32, /// memory address of generation ID pub addr: u64, } impl<'a> Persist<'a> for VmGenId { type State = VMGenIDState; type ConstructorArgs = (); type Error = VmGenIdError; fn save(&self) -> Self::State { VMGenIDState { gsi: self.gsi, addr: self.guest_address.0, } } fn restore(_: Self::ConstructorArgs, state: &Self::State) -> Result { Self::from_parts(GuestAddress(state.addr), state.gsi) } } impl Aml for VmGenId { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), aml::AmlError> { #[allow(clippy::cast_possible_truncation)] let addr_low = self.guest_address.0 as u32; let addr_high = (self.guest_address.0 >> 32) as u32; aml::Device::new( "_SB_.VGEN".try_into()?, vec![ &aml::Name::new("_HID".try_into()?, &"FCVMGID")?, &aml::Name::new("_CID".try_into()?, &"VM_Gen_Counter")?, &aml::Name::new("_DDN".try_into()?, &"VM_Gen_Counter")?, &aml::Name::new( "ADDR".try_into()?, &aml::Package::new(vec![&addr_low, &addr_high]), )?, ], ) .append_aml_bytes(v) } } ================================================ FILE: src/vmm/src/devices/legacy/i8042.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::io; use std::num::Wrapping; use std::sync::{Arc, Barrier}; use log::warn; use serde::Serialize; use vmm_sys_util::eventfd::EventFd; use crate::logger::{IncMetric, SharedIncMetric, error}; use crate::vstate::bus::BusDevice; /// Errors thrown by the i8042 device. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum I8042Error { /// i8042 internal buffer full. InternalBufferFull, /// Keyboard interrupt disabled by guest driver. KbdInterruptDisabled, /// Could not trigger keyboard interrupt: {0}. KbdInterruptFailure(io::Error), } /// Metrics specific to the i8042 device. #[derive(Debug, Serialize)] pub(super) struct I8042DeviceMetrics { /// Errors triggered while using the i8042 device. error_count: SharedIncMetric, /// Number of superfluous read intents on this i8042 device. missed_read_count: SharedIncMetric, /// Number of superfluous write intents on this i8042 device. missed_write_count: SharedIncMetric, /// Bytes read by this device. read_count: SharedIncMetric, /// Number of resets done by this device. reset_count: SharedIncMetric, /// Bytes written by this device. write_count: SharedIncMetric, } impl I8042DeviceMetrics { /// Const default construction. const fn new() -> Self { Self { error_count: SharedIncMetric::new(), missed_read_count: SharedIncMetric::new(), missed_write_count: SharedIncMetric::new(), read_count: SharedIncMetric::new(), reset_count: SharedIncMetric::new(), write_count: SharedIncMetric::new(), } } } /// Stores aggregated metrics pub(super) static METRICS: I8042DeviceMetrics = I8042DeviceMetrics::new(); /// Offset of the status port (port 0x64) const OFS_STATUS: u64 = 4; /// Offset of the data port (port 0x60) const OFS_DATA: u64 = 0; /// i8042 commands /// These values are written by the guest driver to port 0x64. const CMD_READ_CTR: u8 = 0x20; // Read control register const CMD_WRITE_CTR: u8 = 0x60; // Write control register const CMD_READ_OUTP: u8 = 0xD0; // Read output port const CMD_WRITE_OUTP: u8 = 0xD1; // Write output port const CMD_RESET_CPU: u8 = 0xFE; // Reset CPU /// i8042 status register bits const SB_OUT_DATA_AVAIL: u8 = 0x0001; // Data available at port 0x60 const SB_I8042_CMD_DATA: u8 = 0x0008; // i8042 expecting command parameter at port 0x60 const SB_KBD_ENABLED: u8 = 0x0010; // 1 = kbd enabled, 0 = kbd locked /// i8042 control register bits const CB_KBD_INT: u8 = 0x0001; // kbd interrupt enabled const CB_POST_OK: u8 = 0x0004; // POST ok (should always be 1) /// Key scan codes const KEY_CTRL: u16 = 0x0014; const KEY_ALT: u16 = 0x0011; const KEY_DEL: u16 = 0xE071; /// Internal i8042 buffer size, in bytes const BUF_SIZE: usize = 16; /// A i8042 PS/2 controller that emulates just enough to shutdown the machine. #[derive(Debug)] pub struct I8042Device { /// CPU reset eventfd. We will set this event when the guest issues CMD_RESET_CPU. reset_evt: EventFd, /// Keyboard interrupt event (IRQ 1). pub kbd_interrupt_evt: EventFd, /// The i8042 status register. status: u8, /// The i8042 control register. control: u8, /// The i8042 output port. outp: u8, /// The last command sent to port 0x64. cmd: u8, /// The internal i8042 data buffer. buf: [u8; BUF_SIZE], bhead: Wrapping, btail: Wrapping, } impl I8042Device { /// Constructs an i8042 device that will signal the given event when the guest requests it. pub fn new(reset_evt: EventFd) -> Result { Ok(I8042Device { reset_evt, kbd_interrupt_evt: EventFd::new(libc::EFD_NONBLOCK)?, control: CB_POST_OK | CB_KBD_INT, cmd: 0, outp: 0, status: SB_KBD_ENABLED, buf: [0; BUF_SIZE], bhead: Wrapping(0), btail: Wrapping(0), }) } /// Signal a ctrl-alt-del (reset) event. #[inline] pub fn trigger_ctrl_alt_del(&mut self) -> Result<(), I8042Error> { // The CTRL+ALT+DEL sequence is 4 bytes in total (1 extended key + 2 normal keys). // Fail if we don't have room for the whole sequence. if BUF_SIZE - self.buf_len() < 4 { return Err(I8042Error::InternalBufferFull); } self.trigger_key(KEY_CTRL)?; self.trigger_key(KEY_ALT)?; self.trigger_key(KEY_DEL)?; Ok(()) } fn trigger_kbd_interrupt(&self) -> Result<(), I8042Error> { if (self.control & CB_KBD_INT) == 0 { warn!("Failed to trigger i8042 kbd interrupt (disabled by guest OS)"); return Err(I8042Error::KbdInterruptDisabled); } self.kbd_interrupt_evt .write(1) .map_err(I8042Error::KbdInterruptFailure) } fn trigger_key(&mut self, key: u16) -> Result<(), I8042Error> { if key & 0xff00 != 0 { // Check if there is enough room in the buffer, before pushing an extended (2-byte) key. if BUF_SIZE - self.buf_len() < 2 { return Err(I8042Error::InternalBufferFull); } self.push_byte((key >> 8) as u8)?; } self.push_byte((key & 0xff) as u8)?; match self.trigger_kbd_interrupt() { Ok(_) | Err(I8042Error::KbdInterruptDisabled) => Ok(()), Err(err) => Err(err), } } #[inline] fn push_byte(&mut self, byte: u8) -> Result<(), I8042Error> { self.status |= SB_OUT_DATA_AVAIL; if self.buf_len() == BUF_SIZE { return Err(I8042Error::InternalBufferFull); } self.buf[self.btail.0 % BUF_SIZE] = byte; self.btail += Wrapping(1usize); Ok(()) } #[inline] fn pop_byte(&mut self) -> Option { if self.buf_len() == 0 { return None; } let res = self.buf[self.bhead.0 % BUF_SIZE]; self.bhead += Wrapping(1usize); if self.buf_len() == 0 { self.status &= !SB_OUT_DATA_AVAIL; } Some(res) } #[inline] fn flush_buf(&mut self) { self.bhead = Wrapping(0usize); self.btail = Wrapping(0usize); self.status &= !SB_OUT_DATA_AVAIL; } #[inline] fn buf_len(&self) -> usize { (self.btail - self.bhead).0 } } impl BusDevice for I8042Device { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { // All our ports are byte-wide. We don't know how to handle any wider data. if data.len() != 1 { METRICS.missed_read_count.inc(); return; } let mut read_ok = true; match offset { OFS_STATUS => data[0] = self.status, OFS_DATA => { // The guest wants to read a byte from port 0x60. For the 8042, that means the top // byte in the internal buffer. If the buffer is empty, the guest will get a 0. data[0] = self.pop_byte().unwrap_or(0); // Check if we still have data in the internal buffer. If so, we need to trigger // another interrupt, to let the guest know they need to issue another read from // port 0x60. if (self.status & SB_OUT_DATA_AVAIL) != 0 && let Err(I8042Error::KbdInterruptFailure(err)) = self.trigger_kbd_interrupt() { warn!("Failed to trigger i8042 kbd interrupt {:?}", err); } } _ => read_ok = false, } if read_ok { METRICS.read_count.add(data.len() as u64); } else { METRICS.missed_read_count.inc(); } } fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { // All our ports are byte-wide. We don't know how to handle any wider data. if data.len() != 1 { METRICS.missed_write_count.inc(); return None; } let mut write_ok = true; match offset { OFS_STATUS if data[0] == CMD_RESET_CPU => { // The guest wants to assert the CPU reset line. We handle that by triggering // our exit event fd. Meaning Firecracker will be exiting as soon as the VMM // thread wakes up to handle this event. if let Err(err) = self.reset_evt.write(1) { error!("Failed to trigger i8042 reset event: {:?}", err); METRICS.error_count.inc(); } METRICS.reset_count.inc(); } OFS_STATUS if data[0] == CMD_READ_CTR => { // The guest wants to read the control register. // Let's make sure only the control register will be available for reading from // the data port, for the next inb(0x60). self.flush_buf(); let control = self.control; // Buffer is empty, push() will always succeed. self.push_byte(control).unwrap(); } OFS_STATUS if data[0] == CMD_WRITE_CTR => { // The guest wants to write the control register. This is a two-step command: // 1. port 0x64 < CMD_WRITE_CTR // 2. port 0x60 < // Make sure we'll be expecting the control reg value on port 0x60 for the next // write. self.flush_buf(); self.status |= SB_I8042_CMD_DATA; self.cmd = data[0]; } OFS_STATUS if data[0] == CMD_READ_OUTP => { // The guest wants to read the output port (for lack of a better name - this is // just another register on the 8042, that happens to also have its bits connected // to some output pins of the 8042). self.flush_buf(); let outp = self.outp; // Buffer is empty, push() will always succeed. self.push_byte(outp).unwrap(); } OFS_STATUS if data[0] == CMD_WRITE_OUTP => { // Similar to writing the control register, this is a two-step command. // I.e. write CMD_WRITE_OUTP at port 0x64, then write the actual out port value // to port 0x60. self.status |= SB_I8042_CMD_DATA; self.cmd = data[0]; } OFS_DATA if (self.status & SB_I8042_CMD_DATA) != 0 => { // The guest is writing to port 0x60. This byte can either be: // 1. the payload byte of a CMD_WRITE_CTR or CMD_WRITE_OUTP command, in which case // the status reg bit SB_I8042_CMD_DATA will be set, or // 2. a direct command sent to the keyboard // This match arm handles the first option (when the SB_I8042_CMD_DATA bit is set). match self.cmd { CMD_WRITE_CTR => self.control = data[0], CMD_WRITE_OUTP => self.outp = data[0], _ => (), } self.status &= !SB_I8042_CMD_DATA; } OFS_DATA => { // The guest is sending a command straight to the keyboard (so this byte is not // addressed to the 8042, but to the keyboard). Since we're emulating a pretty // dumb keyboard, we can get away with blindly ack-in anything (byte 0xFA). // Something along the lines of "Yeah, uhm-uhm, yeah, okay, honey, that's great." self.flush_buf(); // Buffer is empty, push() will always succeed. self.push_byte(0xFA).unwrap(); if let Err(I8042Error::KbdInterruptFailure(err)) = self.trigger_kbd_interrupt() { warn!("Failed to trigger i8042 kbd interrupt {:?}", err); } } _ => { write_ok = false; } } if write_ok { METRICS.write_count.inc(); } else { METRICS.missed_write_count.inc(); } None } } #[cfg(test)] mod tests { use super::*; impl PartialEq for I8042Error { fn eq(&self, other: &I8042Error) -> bool { self.to_string() == other.to_string() } } #[test] fn test_i8042_read_write_and_event() { let mut i8042 = I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(); let reset_evt = i8042.reset_evt.try_clone().unwrap(); // Check if reading in a 2-length array doesn't have side effects. let mut data = [1, 2]; i8042.read(0x0, 0, &mut data); assert_eq!(data, [1, 2]); i8042.read(0x0, 1, &mut data); assert_eq!(data, [1, 2]); // Check if reset works. // Write 1 to the reset event fd, so that read doesn't block in case the event fd // counter doesn't change (for 0 it blocks). reset_evt.write(1).unwrap(); let mut data = [CMD_RESET_CPU]; i8042.write(0x0, OFS_STATUS, &data); assert_eq!(reset_evt.read().unwrap(), 2); // Check if reading with offset 1 doesn't have side effects. i8042.read(0x0, 1, &mut data); assert_eq!(data[0], CMD_RESET_CPU); // Check invalid `write`s. let before = METRICS.missed_write_count.count(); // offset != 0. i8042.write(0x0, 1, &data); // data != CMD_RESET_CPU data[0] = CMD_RESET_CPU + 1; i8042.write(0x0, 1, &data); // data.len() != 1 let data = [CMD_RESET_CPU; 2]; i8042.write(0x0, 1, &data); assert_eq!(METRICS.missed_write_count.count(), before + 3); } #[test] fn test_i8042_commands() { let mut i8042 = I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(); let mut data = [1]; // Test reading/writing the control register. data[0] = CMD_WRITE_CTR; i8042.write(0x0, OFS_STATUS, &data); assert_ne!(i8042.status & SB_I8042_CMD_DATA, 0); data[0] = 0x52; i8042.write(0x0, OFS_DATA, &data); data[0] = CMD_READ_CTR; i8042.write(0x0, OFS_STATUS, &data); assert_ne!(i8042.status & SB_OUT_DATA_AVAIL, 0); i8042.read(0x0, OFS_DATA, &mut data); assert_eq!(data[0], 0x52); // Test reading/writing the output port. data[0] = CMD_WRITE_OUTP; i8042.write(0x0, OFS_STATUS, &data); assert_ne!(i8042.status & SB_I8042_CMD_DATA, 0); data[0] = 0x52; i8042.write(0x0, OFS_DATA, &data); data[0] = CMD_READ_OUTP; i8042.write(0x0, OFS_STATUS, &data); assert_ne!(i8042.status & SB_OUT_DATA_AVAIL, 0); i8042.read(0x0, OFS_DATA, &mut data); assert_eq!(data[0], 0x52); // Test kbd commands. data[0] = 0x52; i8042.write(0x0, OFS_DATA, &data); assert_ne!(i8042.status & SB_OUT_DATA_AVAIL, 0); i8042.read(0x0, OFS_DATA, &mut data); assert_eq!(data[0], 0xFA); } #[test] fn test_i8042_buffer() { let mut i8042 = I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(); // Test push/pop. i8042.push_byte(52).unwrap(); assert_ne!(i8042.status & SB_OUT_DATA_AVAIL, 0); assert_eq!(i8042.pop_byte().unwrap(), 52); assert_eq!(i8042.status & SB_OUT_DATA_AVAIL, 0); // Test empty buffer pop. assert!(i8042.pop_byte().is_none()); // Test buffer full. for i in 0..BUF_SIZE { i8042.push_byte(i.try_into().unwrap()).unwrap(); assert_eq!(i8042.buf_len(), i + 1); } assert_eq!( i8042.push_byte(0).unwrap_err(), I8042Error::InternalBufferFull ); } #[test] fn test_i8042_kbd() { let mut i8042 = I8042Device::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()).unwrap(); fn expect_key(i8042: &mut I8042Device, key: u16) { let mut data = [1]; // The interrupt line should be on. i8042.trigger_kbd_interrupt().unwrap(); assert!(i8042.kbd_interrupt_evt.read().unwrap() > 1); // The "data available" flag should be on. i8042.read(0x0, OFS_STATUS, &mut data); let mut key_byte: u8; if key & 0xFF00 != 0 { // For extended keys, we should be able to read the MSB first. key_byte = ((key & 0xFF00) >> 8) as u8; i8042.read(0x0, OFS_DATA, &mut data); assert_eq!(data[0], key_byte); // And then do the same for the LSB. // The interrupt line should be on. i8042.trigger_kbd_interrupt().unwrap(); assert!(i8042.kbd_interrupt_evt.read().unwrap() > 1); // The "data available" flag should be on. i8042.read(0x0, OFS_STATUS, &mut data); } key_byte = (key & 0xFF) as u8; i8042.read(0x0, OFS_DATA, &mut data); assert_eq!(data[0], key_byte); } // Test key trigger. i8042.trigger_key(KEY_CTRL).unwrap(); expect_key(&mut i8042, KEY_CTRL); // Test extended key trigger. i8042.trigger_key(KEY_DEL).unwrap(); expect_key(&mut i8042, KEY_DEL); // Test CTRL+ALT+DEL trigger. i8042.trigger_ctrl_alt_del().unwrap(); expect_key(&mut i8042, KEY_CTRL); expect_key(&mut i8042, KEY_ALT); expect_key(&mut i8042, KEY_DEL); // Almost fill up the buffer, so we can test trigger failures. for _i in 0..BUF_SIZE - 1 { i8042.push_byte(1).unwrap(); } // Test extended key trigger failure. assert_eq!(i8042.buf_len(), BUF_SIZE - 1); assert_eq!( i8042.trigger_key(KEY_DEL).unwrap_err(), I8042Error::InternalBufferFull ); // Test ctrl+alt+del trigger failure. i8042.pop_byte().unwrap(); i8042.pop_byte().unwrap(); assert_eq!(i8042.buf_len(), BUF_SIZE - 3); assert_eq!( i8042.trigger_ctrl_alt_del().unwrap_err(), I8042Error::InternalBufferFull ); // Test kbd interrupt disable. let mut data = [1]; data[0] = CMD_WRITE_CTR; i8042.write(0x0, OFS_STATUS, &data); data[0] = i8042.control & !CB_KBD_INT; i8042.write(0x0, OFS_DATA, &data); i8042.trigger_key(KEY_CTRL).unwrap(); assert_eq!( i8042.trigger_kbd_interrupt().unwrap_err(), I8042Error::KbdInterruptDisabled ) } } ================================================ FILE: src/vmm/src/devices/legacy/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. //! Implements legacy devices (UART, RTC etc). mod i8042; #[cfg(target_arch = "aarch64")] pub mod rtc_pl031; pub mod serial; use std::io; use std::ops::Deref; use serde::Serializer; use serde::ser::SerializeMap; use vm_superio::Trigger; use vmm_sys_util::eventfd::EventFd; pub use self::i8042::{I8042Device, I8042Error as I8042DeviceError}; #[cfg(target_arch = "aarch64")] pub use self::rtc_pl031::RTCDevice; pub use self::serial::{ IER_RDA_BIT, IER_RDA_OFFSET, SerialDevice, SerialEventsWrapper, SerialWrapper, }; /// Wrapper for implementing the trigger functionality for `EventFd`. /// /// The trigger is used for handling events in the legacy devices. #[derive(Debug)] pub struct EventFdTrigger(EventFd); impl Trigger for EventFdTrigger { type E = io::Error; fn trigger(&self) -> io::Result<()> { self.write(1) } } impl Deref for EventFdTrigger { type Target = EventFd; fn deref(&self) -> &Self::Target { &self.0 } } impl EventFdTrigger { /// Clone an `EventFdTrigger`. pub fn try_clone(&self) -> io::Result { Ok(EventFdTrigger((**self).try_clone()?)) } /// Create an `EventFdTrigger`. pub fn new(evt: EventFd) -> Self { Self(evt) } /// Get the associated event fd out of an `EventFdTrigger`. pub fn get_event(&self) -> EventFd { self.0.try_clone().unwrap() } } /// Called by METRICS.flush(), this function facilitates serialization of aggregated metrics. pub fn flush_metrics(serializer: S) -> Result { let mut seq = serializer.serialize_map(Some(1))?; seq.serialize_entry("i8042", &i8042::METRICS)?; #[cfg(target_arch = "aarch64")] seq.serialize_entry("rtc", &rtc_pl031::METRICS)?; seq.serialize_entry("uart", &serial::METRICS)?; seq.end() } ================================================ FILE: src/vmm/src/devices/legacy/rtc_pl031.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::convert::TryInto; use serde::Serialize; use vm_superio::Rtc; use vm_superio::rtc_pl031::RtcEvents; use crate::logger::{IncMetric, SharedIncMetric, warn}; /// Metrics specific to the RTC device. #[derive(Debug, Serialize, Default)] pub struct RTCDeviceMetrics { /// Errors triggered while using the RTC device. pub error_count: SharedIncMetric, /// Number of superfluous read intents on this RTC device. pub missed_read_count: SharedIncMetric, /// Number of superfluous write intents on this RTC device. pub missed_write_count: SharedIncMetric, } impl RTCDeviceMetrics { /// Const default construction. pub const fn new() -> Self { Self { error_count: SharedIncMetric::new(), missed_read_count: SharedIncMetric::new(), missed_write_count: SharedIncMetric::new(), } } } impl RtcEvents for RTCDeviceMetrics { fn invalid_read(&self) { self.missed_read_count.inc(); self.error_count.inc(); warn!("Guest read at invalid offset.") } fn invalid_write(&self) { self.missed_write_count.inc(); self.error_count.inc(); warn!("Guest write at invalid offset.") } } impl RtcEvents for &'static RTCDeviceMetrics { fn invalid_read(&self) { RTCDeviceMetrics::invalid_read(self); } fn invalid_write(&self) { RTCDeviceMetrics::invalid_write(self); } } /// Stores aggregated metrics pub static METRICS: RTCDeviceMetrics = RTCDeviceMetrics::new(); /// Wrapper over vm_superio's RTC implementation. #[derive(Debug)] pub struct RTCDevice(vm_superio::Rtc<&'static RTCDeviceMetrics>); impl Default for RTCDevice { fn default() -> Self { RTCDevice(Rtc::with_events(&METRICS)) } } impl RTCDevice { pub fn new() -> RTCDevice { Default::default() } } impl std::ops::Deref for RTCDevice { type Target = vm_superio::Rtc<&'static RTCDeviceMetrics>; fn deref(&self) -> &Self::Target { &self.0 } } impl std::ops::DerefMut for RTCDevice { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } // Implements Bus functions for AMBA PL031 RTC device impl RTCDevice { pub fn bus_read(&mut self, offset: u64, data: &mut [u8]) { if let (Ok(offset), 4) = (u16::try_from(offset), data.len()) { // read() function from RTC implementation expects a slice of // len 4, and we just validated that this is the data length self.read(offset, data.try_into().unwrap()) } else { warn!( "Found invalid data offset/length while trying to read from the RTC: {}, {}", offset, data.len() ); METRICS.error_count.inc(); } } pub fn bus_write(&mut self, offset: u64, data: &[u8]) { if let (Ok(offset), 4) = (u16::try_from(offset), data.len()) { // write() function from RTC implementation expects a slice of // len 4, and we just validated that this is the data length self.write(offset, data.try_into().unwrap()) } else { warn!( "Found invalid data offset/length while trying to write to the RTC: {}, {}", offset, data.len() ); METRICS.error_count.inc(); } } } #[cfg(target_arch = "aarch64")] impl crate::vstate::bus::BusDevice for RTCDevice { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { self.bus_read(offset, data) } fn write( &mut self, _base: u64, offset: u64, data: &[u8], ) -> Option> { self.bus_write(offset, data); None } } #[cfg(test)] mod tests { use vm_superio::Rtc; use super::*; use crate::logger::IncMetric; #[test] fn test_rtc_device() { static TEST_RTC_DEVICE_METRICS: RTCDeviceMetrics = RTCDeviceMetrics::new(); let mut rtc_pl031 = RTCDevice(Rtc::with_events(&TEST_RTC_DEVICE_METRICS)); let data = [0; 4]; // Write to the DR register. Since this is a RO register, the write // function should fail. let invalid_writes_before = TEST_RTC_DEVICE_METRICS.missed_write_count.count(); let error_count_before = TEST_RTC_DEVICE_METRICS.error_count.count(); rtc_pl031.bus_write(0x000, &data); let invalid_writes_after = TEST_RTC_DEVICE_METRICS.missed_write_count.count(); let error_count_after = TEST_RTC_DEVICE_METRICS.error_count.count(); assert_eq!(invalid_writes_after - invalid_writes_before, 1); assert_eq!(error_count_after - error_count_before, 1); } #[test] fn test_rtc_invalid_buf_len() { static TEST_RTC_INVALID_BUF_LEN_METRICS: RTCDeviceMetrics = RTCDeviceMetrics::new(); let mut rtc_pl031 = RTCDevice(Rtc::with_events(&TEST_RTC_INVALID_BUF_LEN_METRICS)); let write_data_good = 123u32.to_le_bytes(); let mut data_bad = [0; 2]; let mut read_data_good = [0; 4]; rtc_pl031.bus_write(0x008, &write_data_good); rtc_pl031.bus_write(0x008, &data_bad); rtc_pl031.bus_read(0x008, &mut read_data_good); rtc_pl031.bus_read(0x008, &mut data_bad); assert_eq!(u32::from_le_bytes(read_data_good), 123); assert_eq!(u16::from_le_bytes(data_bad), 0); } } ================================================ FILE: src/vmm/src/devices/legacy/serial.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. //! Implements a wrapper over an UART serial device. use std::fmt::Debug; use std::fs::File; use std::io::{self, Read, Stdin, Write}; use std::os::unix::io::{AsRawFd, RawFd}; use std::sync::{Arc, Barrier}; use event_manager::{EventOps, Events, MutEventSubscriber}; use libc::EFD_NONBLOCK; use log::{error, warn}; use serde::Serialize; use vm_superio::serial::{Error as SerialError, SerialEvents}; use vm_superio::{Serial, Trigger}; use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; use crate::devices::legacy::EventFdTrigger; use crate::logger::{IncMetric, SharedIncMetric}; use crate::vstate::bus::BusDevice; /// Received Data Available interrupt - for letting the driver know that /// there is some pending data to be processed. pub const IER_RDA_BIT: u8 = 0b0000_0001; /// Received Data Available interrupt offset pub const IER_RDA_OFFSET: u8 = 1; /// Metrics specific to the UART device. #[derive(Debug, Serialize, Default)] pub struct SerialDeviceMetrics { /// Errors triggered while using the UART device. pub error_count: SharedIncMetric, /// Number of flush operations. pub flush_count: SharedIncMetric, /// Number of read calls that did not trigger a read. pub missed_read_count: SharedIncMetric, /// Number of write calls that did not trigger a write. pub missed_write_count: SharedIncMetric, /// Number of succeeded read calls. pub read_count: SharedIncMetric, /// Number of succeeded write calls. pub write_count: SharedIncMetric, } impl SerialDeviceMetrics { /// Const default construction. pub const fn new() -> Self { Self { error_count: SharedIncMetric::new(), flush_count: SharedIncMetric::new(), missed_read_count: SharedIncMetric::new(), missed_write_count: SharedIncMetric::new(), read_count: SharedIncMetric::new(), write_count: SharedIncMetric::new(), } } } /// Stores aggregated metrics pub(super) static METRICS: SerialDeviceMetrics = SerialDeviceMetrics::new(); #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum RawIOError { /// Serial error: {0:?} Serial(SerialError), } pub trait RawIOHandler { /// Send raw input to this emulated device. fn raw_input(&mut self, _data: &[u8]) -> Result<(), RawIOError>; } impl RawIOHandler for Serial { // This is not used for anything and is basically just a dummy implementation for `raw_input`. fn raw_input(&mut self, data: &[u8]) -> Result<(), RawIOError> { // Fail fast if the serial is serviced with more data than it can buffer. if data.len() > self.fifo_capacity() { return Err(RawIOError::Serial(SerialError::FullFifo)); } // Before enqueuing bytes we first check if there is enough free space // in the FIFO. if self.fifo_capacity() >= data.len() { self.enqueue_raw_bytes(data).map_err(RawIOError::Serial)?; } Ok(()) } } /// Wrapper over available events (i.e metrics, buffer ready etc). #[derive(Debug)] pub struct SerialEventsWrapper { /// Buffer ready event. pub buffer_ready_event_fd: Option, } impl SerialEvents for SerialEventsWrapper { fn buffer_read(&self) { METRICS.read_count.inc(); } fn out_byte(&self) { METRICS.write_count.inc(); } fn tx_lost_byte(&self) { METRICS.missed_write_count.inc(); } fn in_buffer_empty(&self) { match self .buffer_ready_event_fd .as_ref() .map_or(Ok(()), |buf_ready| buf_ready.write(1)) { Ok(_) => (), Err(err) => error!( "Could not signal that serial device buffer is ready: {:?}", err ), } } } #[derive(Debug)] pub enum SerialOut { Sink, Stdout(std::io::Stdout), File(File), } impl std::io::Write for SerialOut { fn write(&mut self, buf: &[u8]) -> std::io::Result { match self { Self::Sink => Ok(buf.len()), Self::Stdout(stdout) => stdout.write(buf), Self::File(file) => file.write(buf), } } fn flush(&mut self) -> std::io::Result<()> { match self { Self::Sink => Ok(()), Self::Stdout(stdout) => stdout.flush(), Self::File(file) => file.flush(), } } } /// Wrapper over the imported serial device. #[derive(Debug)] pub struct SerialWrapper { /// Serial device object. pub serial: Serial, /// Input to the serial device (needs to be readable). pub input: Option, } impl SerialWrapper { fn handle_ewouldblock(&self, ops: &mut EventOps) { let buffer_ready_fd = self.buffer_ready_evt_fd(); let input_fd = self.serial_input_fd(); if input_fd < 0 || buffer_ready_fd < 0 { error!("Serial does not have a configured input source."); return; } match ops.add(Events::new(&input_fd, EventSet::IN)) { Err(event_manager::Error::FdAlreadyRegistered) => (), Err(err) => { error!( "Could not register the serial input to the event manager: {:?}", err ); } Ok(()) => { // Bytes might had come on the unregistered stdin. Try to consume any. self.serial.events().in_buffer_empty() } }; } fn recv_bytes(&mut self) -> io::Result { let avail_cap = self.serial.fifo_capacity(); if avail_cap == 0 { return Err(io::Error::from_raw_os_error(libc::ENOBUFS)); } if let Some(input) = self.input.as_mut() { let mut out = vec![0u8; avail_cap]; let count = input.read(&mut out)?; if count > 0 { self.serial .raw_input(&out[..count]) .map_err(|_| io::Error::from_raw_os_error(libc::ENOBUFS))?; } return Ok(count); } Err(io::Error::from_raw_os_error(libc::ENOTTY)) } #[inline] fn buffer_ready_evt_fd(&self) -> RawFd { self.serial .events() .buffer_ready_event_fd .as_ref() .map_or(-1, |buf_ready| buf_ready.as_raw_fd()) } #[inline] fn serial_input_fd(&self) -> RawFd { self.input.as_ref().map_or(-1, |input| input.as_raw_fd()) } fn consume_buffer_ready_event(&self) -> io::Result { self.serial .events() .buffer_ready_event_fd .as_ref() .map_or(Ok(0), |buf_ready| buf_ready.read()) } } /// Type for representing a serial device. pub type SerialDevice = SerialWrapper; impl SerialDevice { pub fn new(serial_in: Option, serial_out: SerialOut) -> Result { let interrupt_evt = EventFdTrigger::new(EventFd::new(EFD_NONBLOCK)?); let buffer_read_event_fd = EventFdTrigger::new(EventFd::new(EFD_NONBLOCK)?); let serial = Serial::with_events( interrupt_evt, SerialEventsWrapper { buffer_ready_event_fd: Some(buffer_read_event_fd), }, serial_out, ); Ok(SerialDevice { serial, input: serial_in, }) } } impl MutEventSubscriber for SerialWrapper { /// Handle events on the serial input fd. fn process(&mut self, event: Events, ops: &mut EventOps) { #[inline] fn unregister_source(ops: &mut EventOps, source: &T) { match ops.remove(Events::new(source, EventSet::IN)) { Ok(_) => (), Err(_) => error!("Could not unregister source fd: {}", source.as_raw_fd()), } } let input_fd = self.serial_input_fd(); let buffer_ready_fd = self.buffer_ready_evt_fd(); if input_fd < 0 || buffer_ready_fd < 0 { error!("Serial does not have a configured input source."); return; } if buffer_ready_fd == event.fd() { match self.consume_buffer_ready_event() { Ok(_) => (), Err(err) => { error!( "Detach serial device input source due to error in consuming the buffer \ ready event: {:?}", err ); unregister_source(ops, &input_fd); unregister_source(ops, &buffer_ready_fd); return; } } } // We expect to receive: `EventSet::IN`, `EventSet::HANG_UP` or // `EventSet::ERROR`. To process all these events we just have to // read from the serial input. match self.recv_bytes() { Ok(count) => { // Handle EOF if the event came from the input source. if input_fd == event.fd() && count == 0 { unregister_source(ops, &input_fd); unregister_source(ops, &buffer_ready_fd); warn!("Detached the serial input due to peer close/error."); } } Err(err) => { match err.raw_os_error() { Some(errno) if errno == libc::ENOBUFS => { unregister_source(ops, &input_fd); } Some(errno) if errno == libc::EWOULDBLOCK => { self.handle_ewouldblock(ops); } Some(errno) if errno == libc::ENOTTY => { error!("The serial device does not have the input source attached."); unregister_source(ops, &input_fd); unregister_source(ops, &buffer_ready_fd); } Some(_) | None => { // Unknown error, detach the serial input source. unregister_source(ops, &input_fd); unregister_source(ops, &buffer_ready_fd); warn!("Detached the serial input due to peer close/error."); } } } } } /// Initial registration of pollable objects. /// If serial input is present, register the serial input FD as readable. fn init(&mut self, ops: &mut EventOps) { if self.input.is_some() && self.serial.events().buffer_ready_event_fd.is_some() { let serial_fd = self.serial_input_fd(); let buf_ready_evt = self.buffer_ready_evt_fd(); // If the jailer is instructed to daemonize before exec-ing into firecracker, we set // stdin, stdout and stderr to be open('/dev/null'). However, if stdin is redirected // from /dev/null then trying to register FILENO_STDIN to epoll will fail with EPERM. // Therefore, only try to register stdin to epoll if it is a terminal or a FIFO pipe. // SAFETY: isatty has no invariants that need to be upheld. If serial_fd is an invalid // argument, it will return 0 and set errno to EBADF. if (unsafe { libc::isatty(serial_fd) } == 1 || is_fifo(serial_fd)) && let Err(err) = ops.add(Events::new(&serial_fd, EventSet::IN)) { warn!("Failed to register serial input fd: {}", err); } if let Err(err) = ops.add(Events::new(&buf_ready_evt, EventSet::IN)) { warn!("Failed to register serial buffer ready event: {}", err); } } } } /// Checks whether the given file descriptor is a FIFO pipe. fn is_fifo(fd: RawFd) -> bool { let mut stat = std::mem::MaybeUninit::::uninit(); // SAFETY: No unsafety can be introduced by passing in an invalid file descriptor to fstat, // it will return -1 and set errno to EBADF. The pointer passed to fstat is valid for writing // a libc::stat structure. if unsafe { libc::fstat(fd, stat.as_mut_ptr()) } < 0 { return false; } // SAFETY: We can safely assume the libc::stat structure to be initialized, as libc::fstat // returning 0 guarantees that the memory is now initialized with the requested file metadata. let stat = unsafe { stat.assume_init() }; (stat.st_mode & libc::S_IFIFO) != 0 } impl BusDevice for SerialWrapper where I: Read + AsRawFd + Send, { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { if let (Ok(offset), 1) = (u8::try_from(offset), data.len()) { data[0] = self.serial.read(offset); } else { METRICS.missed_read_count.inc(); } } fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { if let (Ok(offset), 1) = (u8::try_from(offset), data.len()) { if let Err(err) = self.serial.write(offset, data[0]) { // Counter incremented for any handle_write() error. error!("Failed the write to serial: {:?}", err); METRICS.error_count.inc(); } } else { METRICS.missed_write_count.inc(); } None } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use vmm_sys_util::eventfd::EventFd; use super::*; use crate::logger::IncMetric; #[test] fn test_serial_bus_read() { let intr_evt = EventFdTrigger::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); let metrics = &METRICS; let mut serial = SerialDevice { serial: Serial::with_events( intr_evt, SerialEventsWrapper { buffer_ready_event_fd: None, }, SerialOut::Sink, ), input: None::, }; serial.serial.raw_input(b"abc").unwrap(); let invalid_reads_before = metrics.missed_read_count.count(); let mut v = [0x00; 2]; serial.read(0x0, 0u64, &mut v); let invalid_reads_after = metrics.missed_read_count.count(); assert_eq!(invalid_reads_before + 1, invalid_reads_after); let mut v = [0x00; 1]; serial.read(0x0, 0u64, &mut v); assert_eq!(v[0], b'a'); let invalid_reads_after_2 = metrics.missed_read_count.count(); // The `invalid_read_count` metric should be the same as before the one-byte reads. assert_eq!(invalid_reads_after_2, invalid_reads_after); } #[test] fn test_is_fifo() { // invalid file descriptors arent fifos let invalid = -1; assert!(!is_fifo(invalid)); // Fifos are fifos let mut fds: [libc::c_int; 2] = [0; 2]; let rc = unsafe { libc::pipe(fds.as_mut_ptr()) }; assert!(rc == 0); assert!(is_fifo(fds[0])); assert!(is_fifo(fds[1])); // Files arent fifos let tmp_file = vmm_sys_util::tempfile::TempFile::new().unwrap(); assert!(!is_fifo(tmp_file.as_file().as_raw_fd())); } #[test] fn test_serial_dev_metrics() { let serial_metrics: SerialDeviceMetrics = SerialDeviceMetrics::new(); let serial_metrics_local: String = serde_json::to_string(&serial_metrics).unwrap(); // the 1st serialize flushes the metrics and resets values to 0 so that // we can compare the values with local metrics. serde_json::to_string(&METRICS).unwrap(); let serial_metrics_global: String = serde_json::to_string(&METRICS).unwrap(); assert_eq!(serial_metrics_local, serial_metrics_global); serial_metrics.read_count.inc(); assert_eq!(serial_metrics.read_count.count(), 1); } } ================================================ FILE: src/vmm/src/devices/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. //! Emulates virtual and hardware devices. #![allow(unused)] use std::io; pub mod acpi; pub mod legacy; pub mod pci; pub mod pseudo; pub mod virtio; use log::error; use crate::devices::virtio::net::metrics::NetDeviceMetrics; use crate::devices::virtio::queue::{InvalidAvailIdx, QueueError}; use crate::devices::virtio::vsock::VsockError; use crate::logger::IncMetric; use crate::vstate::interrupts::InterruptError; // Function used for reporting error in terms of logging // but also in terms of metrics of net event fails. // network metrics is reported per device so we need a handle to each net device's // metrics `net_iface_metrics` to report metrics for that device. pub(crate) fn report_net_event_fail(net_iface_metrics: &NetDeviceMetrics, err: DeviceError) { if let DeviceError::InvalidAvailIdx(err) = err { panic!("{}", err); } error!("{:?}", err); net_iface_metrics.event_fails.inc(); } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum DeviceError { /// Failed to read from the TAP device. FailedReadTap, /// Failed to signal irq: {0} FailedSignalingIrq(#[from] InterruptError), /// IO error: {0} IoError(io::Error), /// Device received malformed payload. MalformedPayload, /// Device received malformed descriptor. MalformedDescriptor, /// Error during queue processing: {0} QueueError(#[from] QueueError), /// {0} InvalidAvailIdx(#[from] InvalidAvailIdx), /// Vsock device error: {0} VsockError(#[from] VsockError), } ================================================ FILE: src/vmm/src/devices/pci/mod.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod pci_segment; pub use pci_segment::*; ================================================ FILE: src/vmm/src/devices/pci/pci_segment.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. // // Copyright © 2019 - 2021 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause // use std::sync::{Arc, Mutex}; #[cfg(target_arch = "x86_64")] use acpi_tables::{Aml, aml}; use log::info; use pci::PciBdf; #[cfg(target_arch = "x86_64")] use uuid::Uuid; use vm_allocator::AddressAllocator; use crate::arch::{ArchVm as Vm, PCI_MMCONFIG_START, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT}; #[cfg(target_arch = "x86_64")] use crate::pci::bus::{PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE}; use crate::pci::bus::{PciBus, PciConfigIo, PciConfigMmio, PciRoot, PciRootError}; use crate::vstate::bus::{BusDeviceSync, BusError}; use crate::vstate::resources::ResourceAllocator; pub struct PciSegment { pub(crate) id: u16, pub(crate) pci_bus: Arc>, pub(crate) pci_config_mmio: Arc>, pub(crate) mmio_config_address: u64, pub(crate) proximity_domain: u32, #[cfg(target_arch = "x86_64")] pub(crate) pci_config_io: Option>>, // Bitmap of PCI devices to hotplug. pub(crate) pci_devices_up: u32, // Bitmap of PCI devices to hotunplug. pub(crate) pci_devices_down: u32, // List of allocated IRQs for each PCI slot. pub(crate) pci_irq_slots: [u8; 32], // Device memory covered by this segment pub(crate) start_of_mem32_area: u64, pub(crate) end_of_mem32_area: u64, pub(crate) start_of_mem64_area: u64, pub(crate) end_of_mem64_area: u64, } impl std::fmt::Debug for PciSegment { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("PciSegment") .field("id", &self.id) .field("mmio_config_address", &self.mmio_config_address) .field("proximity_domain", &self.proximity_domain) .field("pci_devices_up", &self.pci_devices_up) .field("pci_devices_down", &self.pci_devices_down) .field("pci_irq_slots", &self.pci_irq_slots) .field("start_of_mem32_area", &self.start_of_mem32_area) .field("end_of_mem32_area", &self.end_of_mem32_area) .field("start_of_mem64_area", &self.start_of_mem64_area) .field("end_of_mem64_area", &self.end_of_mem64_area) .finish() } } impl PciSegment { fn build(id: u16, vm: &Arc, pci_irq_slots: &[u8; 32]) -> Result { let pci_root = PciRoot::new(None); let pci_bus = Arc::new(Mutex::new(PciBus::new(pci_root, vm.clone()))); let pci_config_mmio = Arc::new(Mutex::new(PciConfigMmio::new(Arc::clone(&pci_bus)))); let mmio_config_address = PCI_MMCONFIG_START + PCI_MMIO_CONFIG_SIZE_PER_SEGMENT * id as u64; vm.common.mmio_bus.insert( Arc::clone(&pci_config_mmio) as Arc, mmio_config_address, PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, )?; let resource_allocator = vm.resource_allocator(); let start_of_mem32_area = resource_allocator.mmio32_memory.base(); let end_of_mem32_area = resource_allocator.mmio32_memory.end(); let start_of_mem64_area = resource_allocator.mmio64_memory.base(); let end_of_mem64_area = resource_allocator.mmio64_memory.end(); let segment = PciSegment { id, pci_bus, pci_config_mmio, mmio_config_address, proximity_domain: 0, pci_devices_up: 0, pci_devices_down: 0, #[cfg(target_arch = "x86_64")] pci_config_io: None, start_of_mem32_area, end_of_mem32_area, start_of_mem64_area, end_of_mem64_area, pci_irq_slots: *pci_irq_slots, }; Ok(segment) } #[cfg(target_arch = "x86_64")] pub(crate) fn new( id: u16, vm: &Arc, pci_irq_slots: &[u8; 32], ) -> Result { use crate::Vm; let mut segment = Self::build(id, vm, pci_irq_slots)?; let pci_config_io = Arc::new(Mutex::new(PciConfigIo::new(Arc::clone(&segment.pci_bus)))); vm.pio_bus.insert( pci_config_io.clone(), PCI_CONFIG_IO_PORT, PCI_CONFIG_IO_PORT_SIZE, )?; segment.pci_config_io = Some(pci_config_io); info!( "pci: adding PCI segment: id={:#x}, PCI MMIO config address: {:#x}, mem32 area: \ [{:#x}-{:#x}], mem64 area: [{:#x}-{:#x}] IO area: [{PCI_CONFIG_IO_PORT:#x}-{:#x}]", segment.id, segment.mmio_config_address, segment.start_of_mem32_area, segment.end_of_mem32_area, segment.start_of_mem64_area, segment.end_of_mem64_area, PCI_CONFIG_IO_PORT + PCI_CONFIG_IO_PORT_SIZE - 1 ); Ok(segment) } #[cfg(target_arch = "aarch64")] pub(crate) fn new( id: u16, vm: &Arc, pci_irq_slots: &[u8; 32], ) -> Result { let segment = Self::build(id, vm, pci_irq_slots)?; info!( "pci: adding PCI segment: id={:#x}, PCI MMIO config address: {:#x}, mem32 area: \ [{:#x}-{:#x}], mem64 area: [{:#x}-{:#x}]", segment.id, segment.mmio_config_address, segment.start_of_mem32_area, segment.end_of_mem32_area, segment.start_of_mem64_area, segment.end_of_mem64_area, ); Ok(segment) } pub(crate) fn next_device_bdf(&self) -> Result { Ok(PciBdf::new( self.id, 0, self.pci_bus .lock() .unwrap() .next_device_id()? .try_into() .unwrap(), 0, )) } } #[cfg(target_arch = "x86_64")] struct PciDevSlot { device_id: u8, } #[cfg(target_arch = "x86_64")] impl Aml for PciDevSlot { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), aml::AmlError> { let sun = self.device_id; let adr: u32 = (self.device_id as u32) << 16; aml::Device::new( format!("S{:03}", self.device_id).as_str().try_into()?, vec![ &aml::Name::new("_SUN".try_into()?, &sun)?, &aml::Name::new("_ADR".try_into()?, &adr)?, &aml::Method::new( "_EJ0".try_into()?, 1, true, vec![&aml::MethodCall::new( "\\_SB_.PHPR.PCEJ".try_into()?, vec![&aml::Path::new("_SUN")?, &aml::Path::new("_SEG")?], )], ), ], ) .append_aml_bytes(v) } } #[cfg(target_arch = "x86_64")] struct PciDevSlotNotify { device_id: u8, } #[cfg(target_arch = "x86_64")] impl Aml for PciDevSlotNotify { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), aml::AmlError> { let device_id_mask: u32 = 1 << self.device_id; let object = aml::Path::new(&format!("S{:03}", self.device_id))?; aml::And::new(&aml::Local(0), &aml::Arg(0), &device_id_mask).append_aml_bytes(v)?; aml::If::new( &aml::Equal::new(&aml::Local(0), &device_id_mask), vec![&aml::Notify::new(&object, &aml::Arg(1))], ) .append_aml_bytes(v) } } #[cfg(target_arch = "x86_64")] struct PciDevSlotMethods {} #[cfg(target_arch = "x86_64")] impl Aml for PciDevSlotMethods { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), aml::AmlError> { let mut device_notifies = Vec::new(); for device_id in 0..32 { device_notifies.push(PciDevSlotNotify { device_id }); } let mut device_notifies_refs: Vec<&dyn Aml> = Vec::new(); for device_notify in device_notifies.iter() { device_notifies_refs.push(device_notify); } aml::Method::new("DVNT".try_into()?, 2, true, device_notifies_refs).append_aml_bytes(v)?; aml::Method::new( "PCNT".try_into()?, 0, true, vec![ &aml::Acquire::new("\\_SB_.PHPR.BLCK".try_into()?, 0xffff), &aml::Store::new( &aml::Path::new("\\_SB_.PHPR.PSEG")?, &aml::Path::new("_SEG")?, ), &aml::MethodCall::new( "DVNT".try_into()?, vec![&aml::Path::new("\\_SB_.PHPR.PCIU")?, &aml::ONE], ), &aml::MethodCall::new( "DVNT".try_into()?, vec![&aml::Path::new("\\_SB_.PHPR.PCID")?, &3usize], ), &aml::Release::new("\\_SB_.PHPR.BLCK".try_into()?), ], ) .append_aml_bytes(v) } } #[cfg(target_arch = "x86_64")] struct PciDsmMethod {} #[cfg(target_arch = "x86_64")] impl Aml for PciDsmMethod { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), aml::AmlError> { // Refer to ACPI spec v6.3 Ch 9.1.1 and PCI Firmware spec v3.3 Ch 4.6.1 // _DSM (Device Specific Method), the following is the implementation in ASL. // Method (_DSM, 4, NotSerialized) // _DSM: Device-Specific Method // { // If ((Arg0 == ToUUID ("e5c937d0-3553-4d7a-9117-ea4d19c3434d") /* Device Labeling // Interface */)) { // If ((Arg2 == Zero)) // { // Return (Buffer (One) { 0x21 }) // } // If ((Arg2 == 0x05)) // { // Return (Zero) // } // } // // Return (Buffer (One) { 0x00 }) // } // // As per ACPI v6.3 Ch 19.6.142, the UUID is required to be in mixed endian: // Among the fields of a UUID: // {d1 (8 digits)} - {d2 (4 digits)} - {d3 (4 digits)} - {d4 (16 digits)} // d1 ~ d3 need to be little endian, d4 be big endian. // See https://en.wikipedia.org/wiki/Universally_unique_identifier#Encoding . let uuid = Uuid::parse_str("E5C937D0-3553-4D7A-9117-EA4D19C3434D").unwrap(); let (uuid_d1, uuid_d2, uuid_d3, uuid_d4) = uuid.as_fields(); let mut uuid_buf = vec![]; uuid_buf.extend(uuid_d1.to_le_bytes()); uuid_buf.extend(uuid_d2.to_le_bytes()); uuid_buf.extend(uuid_d3.to_le_bytes()); uuid_buf.extend(uuid_d4); aml::Method::new( "_DSM".try_into()?, 4, false, vec![ &aml::If::new( &aml::Equal::new(&aml::Arg(0), &aml::Buffer::new(uuid_buf)), vec![ &aml::If::new( &aml::Equal::new(&aml::Arg(2), &aml::ZERO), vec![&aml::Return::new(&aml::Buffer::new(vec![0x21]))], ), &aml::If::new( &aml::Equal::new(&aml::Arg(2), &0x05u8), vec![&aml::Return::new(&aml::ZERO)], ), ], ), &aml::Return::new(&aml::Buffer::new(vec![0])), ], ) .append_aml_bytes(v) } } #[cfg(target_arch = "x86_64")] impl Aml for PciSegment { fn append_aml_bytes(&self, v: &mut Vec) -> Result<(), aml::AmlError> { let mut pci_dsdt_inner_data: Vec<&dyn Aml> = Vec::new(); let hid = aml::Name::new("_HID".try_into()?, &aml::EisaName::new("PNP0A08")?)?; pci_dsdt_inner_data.push(&hid); let cid = aml::Name::new("_CID".try_into()?, &aml::EisaName::new("PNP0A03")?)?; pci_dsdt_inner_data.push(&cid); let adr = aml::Name::new("_ADR".try_into()?, &aml::ZERO)?; pci_dsdt_inner_data.push(&adr); let seg = aml::Name::new("_SEG".try_into()?, &self.id)?; pci_dsdt_inner_data.push(&seg); let uid = aml::Name::new("_UID".try_into()?, &aml::ZERO)?; pci_dsdt_inner_data.push(&uid); let cca = aml::Name::new("_CCA".try_into()?, &aml::ONE)?; pci_dsdt_inner_data.push(&cca); let supp = aml::Name::new("SUPP".try_into()?, &aml::ZERO)?; pci_dsdt_inner_data.push(&supp); let proximity_domain = self.proximity_domain; let pxm_return = aml::Return::new(&proximity_domain); let pxm = aml::Method::new("_PXM".try_into()?, 0, false, vec![&pxm_return]); pci_dsdt_inner_data.push(&pxm); let pci_dsm = PciDsmMethod {}; pci_dsdt_inner_data.push(&pci_dsm); #[allow(clippy::if_same_then_else)] let crs = if self.id == 0 { aml::Name::new( "_CRS".try_into()?, &aml::ResourceTemplate::new(vec![ &aml::AddressSpace::new_bus_number(0x0u16, 0x0u16)?, &aml::Io::new(0xcf8, 0xcf8, 1, 0x8), &aml::Memory32Fixed::new( true, self.mmio_config_address.try_into().unwrap(), PCI_MMIO_CONFIG_SIZE_PER_SEGMENT.try_into().unwrap(), ), &aml::AddressSpace::new_memory( aml::AddressSpaceCacheable::NotCacheable, true, self.start_of_mem32_area, self.end_of_mem32_area, )?, &aml::AddressSpace::new_memory( aml::AddressSpaceCacheable::NotCacheable, true, self.start_of_mem64_area, self.end_of_mem64_area, )?, &aml::AddressSpace::new_io(0u16, 0x0cf7u16)?, &aml::AddressSpace::new_io(0x0d00u16, 0xffffu16)?, ]), )? } else { aml::Name::new( "_CRS".try_into()?, &aml::ResourceTemplate::new(vec![ &aml::AddressSpace::new_bus_number(0x0u16, 0x0u16)?, &aml::Memory32Fixed::new( true, self.mmio_config_address.try_into().unwrap(), PCI_MMIO_CONFIG_SIZE_PER_SEGMENT.try_into().unwrap(), ), &aml::AddressSpace::new_memory( aml::AddressSpaceCacheable::NotCacheable, true, self.start_of_mem32_area, self.end_of_mem32_area, )?, &aml::AddressSpace::new_memory( aml::AddressSpaceCacheable::NotCacheable, true, self.start_of_mem64_area, self.end_of_mem64_area, )?, ]), )? }; pci_dsdt_inner_data.push(&crs); let mut pci_devices = Vec::new(); for device_id in 0..32 { let pci_device = PciDevSlot { device_id }; pci_devices.push(pci_device); } for pci_device in pci_devices.iter() { pci_dsdt_inner_data.push(pci_device); } let pci_device_methods = PciDevSlotMethods {}; pci_dsdt_inner_data.push(&pci_device_methods); // Build PCI routing table, listing IRQs assigned to PCI devices. let prt_package_list: Vec<(u32, u32)> = self .pci_irq_slots .iter() .enumerate() .map(|(i, irq)| { ( ((((u32::try_from(i).unwrap()) & 0x1fu32) << 16) | 0xffffu32), *irq as u32, ) }) .collect(); let prt_package_list: Vec = prt_package_list .iter() .map(|(bdf, irq)| aml::Package::new(vec![bdf, &0u8, &0u8, irq])) .collect(); let prt_package_list: Vec<&dyn Aml> = prt_package_list .iter() .map(|item| item as &dyn Aml) .collect(); let prt = aml::Name::new("_PRT".try_into()?, &aml::Package::new(prt_package_list))?; pci_dsdt_inner_data.push(&prt); aml::Device::new( format!("_SB_.PC{:02X}", self.id).as_str().try_into()?, pci_dsdt_inner_data, ) .append_aml_bytes(v) } } #[cfg(test)] mod tests { use super::*; use crate::arch; use crate::builder::tests::default_vmm; use crate::utils::u64_to_usize; #[test] fn test_pci_segment_build() { let vmm = default_vmm(); let pci_irq_slots = &[0u8; 32]; let pci_segment = PciSegment::new(0, &vmm.vm, pci_irq_slots).unwrap(); assert_eq!(pci_segment.id, 0); assert_eq!( pci_segment.start_of_mem32_area, arch::MEM_32BIT_DEVICES_START ); assert_eq!( pci_segment.end_of_mem32_area, arch::MEM_32BIT_DEVICES_START + arch::MEM_32BIT_DEVICES_SIZE - 1 ); assert_eq!( pci_segment.start_of_mem64_area, arch::MEM_64BIT_DEVICES_START ); assert_eq!( pci_segment.end_of_mem64_area, arch::MEM_64BIT_DEVICES_START + arch::MEM_64BIT_DEVICES_SIZE - 1 ); assert_eq!(pci_segment.mmio_config_address, arch::PCI_MMCONFIG_START); assert_eq!(pci_segment.proximity_domain, 0); assert_eq!(pci_segment.pci_devices_up, 0); assert_eq!(pci_segment.pci_devices_down, 0); assert_eq!(pci_segment.pci_irq_slots, [0u8; 32]); } #[cfg(target_arch = "x86_64")] #[test] fn test_io_bus() { let vmm = default_vmm(); let pci_irq_slots = &[0u8; 32]; let pci_segment = PciSegment::new(0, &vmm.vm, pci_irq_slots).unwrap(); let mut data = [0u8; u64_to_usize(PCI_CONFIG_IO_PORT_SIZE)]; vmm.vm.pio_bus.read(PCI_CONFIG_IO_PORT, &mut data).unwrap(); vmm.vm .pio_bus .read(PCI_CONFIG_IO_PORT + PCI_CONFIG_IO_PORT_SIZE, &mut data) .unwrap_err(); } #[test] fn test_mmio_bus() { let vmm = default_vmm(); let pci_irq_slots = &[0u8; 32]; let pci_segment = PciSegment::new(0, &vmm.vm, pci_irq_slots).unwrap(); let mut data = [0u8; u64_to_usize(PCI_MMIO_CONFIG_SIZE_PER_SEGMENT)]; vmm.vm .common .mmio_bus .read(pci_segment.mmio_config_address, &mut data) .unwrap(); vmm.vm .common .mmio_bus .read( pci_segment.mmio_config_address + PCI_MMIO_CONFIG_SIZE_PER_SEGMENT, &mut data, ) .unwrap_err(); } #[test] fn test_next_device_bdf() { let vmm = default_vmm(); let pci_irq_slots = &[0u8; 32]; let pci_segment = PciSegment::new(0, &vmm.vm, pci_irq_slots).unwrap(); // Start checking from device id 1, since 0 is allocated to the Root port. for dev_id in 1..32 { let bdf = pci_segment.next_device_bdf().unwrap(); // In our case we have a single Segment with id 0, which has // a single bus with id 0. Also, each device of ours has a // single function. assert_eq!(bdf, PciBdf::new(0, 0, dev_id, 0)); } // We can only have 32 devices on a segment pci_segment.next_device_bdf().unwrap_err(); } } ================================================ FILE: src/vmm/src/devices/pseudo/boot_timer.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::sync::{Arc, Barrier}; use utils::time::TimestampUs; use crate::logger::info; use crate::vstate::bus::BusDevice; const MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE: u8 = 123; /// Pseudo device to record the kernel boot time. #[derive(Debug, Clone)] pub struct BootTimer { start_ts: TimestampUs, } impl BusDevice for BootTimer { fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { // Only handle byte length instructions at a zero offset. if data.len() != 1 || offset != 0 { return None; } if data[0] == MAGIC_VALUE_SIGNAL_GUEST_BOOT_COMPLETE { let now_tm_us = TimestampUs::default(); let boot_time_us = now_tm_us.time_us - self.start_ts.time_us; let boot_time_cpu_us = now_tm_us.cputime_us - self.start_ts.cputime_us; info!( "Guest-boot-time = {:>6} us {} ms, {:>6} CPU us {} CPU ms", boot_time_us, boot_time_us / 1000, boot_time_cpu_us, boot_time_cpu_us / 1000 ); } None } fn read(&mut self, _base: u64, _offset: u64, _data: &mut [u8]) {} } impl BootTimer { /// Create a device at a certain point in time. pub fn new(start_ts: TimestampUs) -> BootTimer { BootTimer { start_ts } } } ================================================ FILE: src/vmm/src/devices/pseudo/mod.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Implements Firecracker specific devices (e.g. signal when boot is completed). mod boot_timer; pub use self::boot_timer::BootTimer; ================================================ FILE: src/vmm/src/devices/virtio/balloon/device.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::ops::Deref; use std::sync::Arc; use std::time::Duration; use log::{debug, error, info, warn}; use serde::{Deserialize, Serialize}; use utils::time::TimerFd; use vmm_sys_util::eventfd::EventFd; use super::super::ActivateError; use super::super::device::{DeviceState, VirtioDevice}; use super::super::queue::Queue; use super::metrics::METRICS; use super::util::compact_page_frame_numbers; use super::{ BALLOON_DEV_ID, BALLOON_MIN_NUM_QUEUES, BALLOON_QUEUE_SIZE, DEFLATE_INDEX, FREE_PAGE_HINT_DONE, FREE_PAGE_HINT_STOP, INFLATE_INDEX, MAX_PAGE_COMPACT_BUFFER, MAX_PAGES_IN_DESC, MIB_TO_4K_PAGES, STATS_INDEX, VIRTIO_BALLOON_F_DEFLATE_ON_OOM, VIRTIO_BALLOON_F_FREE_PAGE_HINTING, VIRTIO_BALLOON_F_FREE_PAGE_REPORTING, VIRTIO_BALLOON_F_STATS_VQ, VIRTIO_BALLOON_PFN_SHIFT, VIRTIO_BALLOON_S_ALLOC_STALL, VIRTIO_BALLOON_S_ASYNC_RECLAIM, VIRTIO_BALLOON_S_ASYNC_SCAN, VIRTIO_BALLOON_S_AVAIL, VIRTIO_BALLOON_S_CACHES, VIRTIO_BALLOON_S_DIRECT_RECLAIM, VIRTIO_BALLOON_S_DIRECT_SCAN, VIRTIO_BALLOON_S_HTLB_PGALLOC, VIRTIO_BALLOON_S_HTLB_PGFAIL, VIRTIO_BALLOON_S_MAJFLT, VIRTIO_BALLOON_S_MEMFREE, VIRTIO_BALLOON_S_MEMTOT, VIRTIO_BALLOON_S_MINFLT, VIRTIO_BALLOON_S_OOM_KILL, VIRTIO_BALLOON_S_SWAP_IN, VIRTIO_BALLOON_S_SWAP_OUT, }; use crate::devices::virtio::balloon::BalloonError; use crate::devices::virtio::device::{ActiveState, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1; use crate::devices::virtio::queue::InvalidAvailIdx; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::logger::{IncMetric, log_dev_preview_warning}; use crate::utils::u64_to_usize; use crate::vstate::memory::{ Address, ByteValued, Bytes, GuestAddress, GuestMemoryExtension, GuestMemoryMmap, }; use crate::{impl_device_type, mem_size_mib}; const SIZE_OF_U32: usize = std::mem::size_of::(); const SIZE_OF_STAT: usize = std::mem::size_of::(); fn mib_to_pages(amount_mib: u32) -> Result { amount_mib .checked_mul(MIB_TO_4K_PAGES) .ok_or(BalloonError::TooMuchMemoryRequested( u32::MAX / MIB_TO_4K_PAGES, )) } fn pages_to_mib(amount_pages: u32) -> u32 { amount_pages / MIB_TO_4K_PAGES } #[repr(C)] #[derive(Clone, Copy, Debug, Default, PartialEq)] pub(crate) struct ConfigSpace { pub num_pages: u32, pub actual_pages: u32, pub free_page_hint_cmd_id: u32, } // SAFETY: Safe because ConfigSpace only contains plain data. unsafe impl ByteValued for ConfigSpace {} /// Holds state of the free page hinting run #[derive(Copy, Clone, Debug, Default, Serialize, Deserialize)] pub(crate) struct HintingState { /// The command requested by us. Set to STOP by default. pub host_cmd: u32, /// The last command supplied by guest. pub last_cmd_id: u32, /// The command supplied by guest. pub guest_cmd: Option, /// Whether or not to automatically ack on STOP. pub acknowledge_on_finish: bool, } /// By default hinting will ack on stop fn default_ack_on_stop() -> bool { true } /// Command recieved from the API to start a hinting run #[derive(Copy, Clone, Debug, Eq, PartialEq, Deserialize)] pub struct StartHintingCmd { /// If we should automatically acknowledge end of the run after stop. #[serde(default = "default_ack_on_stop")] pub acknowledge_on_stop: bool, } impl Default for StartHintingCmd { fn default() -> Self { Self { acknowledge_on_stop: true, } } } /// Returned to the API for get hinting status #[derive(Copy, Clone, Debug, Eq, PartialEq, Default, Serialize)] pub struct HintingStatus { /// The command requested by us. Set to STOP by default. pub host_cmd: u32, /// The command supplied by guest. pub guest_cmd: Option, } // This structure needs the `packed` attribute, otherwise Rust will assume // the size to be 16 bytes. #[derive(Copy, Clone, Debug, Default)] #[repr(C, packed)] struct BalloonStat { pub tag: u16, pub val: u64, } // SAFETY: Safe because BalloonStat only contains plain data. unsafe impl ByteValued for BalloonStat {} /// Holds configuration details for the balloon device. #[derive(Clone, Default, Debug, PartialEq, Eq, Serialize)] pub struct BalloonConfig { /// Target size. pub amount_mib: u32, /// Whether or not to ask for pages back. pub deflate_on_oom: bool, /// Interval of time in seconds at which the balloon statistics are updated. pub stats_polling_interval_s: u16, /// Free page hinting enabled #[serde(default)] pub free_page_hinting: bool, /// Free page reporting enabled #[serde(default)] pub free_page_reporting: bool, } /// BalloonStats holds statistics returned from the stats_queue. #[derive(Clone, Copy, Default, Debug, PartialEq, Eq, Serialize)] #[serde(deny_unknown_fields)] pub struct BalloonStats { /// The target size of the balloon, in 4K pages. pub target_pages: u32, /// The number of 4K pages the device is currently holding. pub actual_pages: u32, /// The target size of the balloon, in MiB. pub target_mib: u32, /// The number of MiB the device is currently holding. pub actual_mib: u32, /// Amount of memory swapped in. #[serde(skip_serializing_if = "Option::is_none")] pub swap_in: Option, /// Amount of memory swapped out. #[serde(skip_serializing_if = "Option::is_none")] pub swap_out: Option, /// Number of major faults. #[serde(skip_serializing_if = "Option::is_none")] pub major_faults: Option, /// Number of minor faults. #[serde(skip_serializing_if = "Option::is_none")] pub minor_faults: Option, /// The amount of memory not being used for any /// purpose (in bytes). #[serde(skip_serializing_if = "Option::is_none")] pub free_memory: Option, /// Total amount of memory available (in bytes). #[serde(skip_serializing_if = "Option::is_none")] pub total_memory: Option, /// An estimate of how much memory is available (in /// bytes) for starting new applications, without pushing the system to swap. #[serde(skip_serializing_if = "Option::is_none")] pub available_memory: Option, /// The amount of memory, in bytes, that can be /// quickly reclaimed without additional I/O. Typically these pages are used for /// caching files from disk. #[serde(skip_serializing_if = "Option::is_none")] pub disk_caches: Option, /// The number of successful hugetlb page /// allocations in the guest. #[serde(skip_serializing_if = "Option::is_none")] pub hugetlb_allocations: Option, /// The number of failed hugetlb page allocations /// in the guest. #[serde(skip_serializing_if = "Option::is_none")] pub hugetlb_failures: Option, /// OOM killer invocations. since linux v6.12. #[serde(skip_serializing_if = "Option::is_none")] pub oom_kill: Option, /// Stall count of memory allocatoin. since linux v6.12. #[serde(skip_serializing_if = "Option::is_none")] pub alloc_stall: Option, /// Amount of memory scanned asynchronously. since linux v6.12. #[serde(skip_serializing_if = "Option::is_none")] pub async_scan: Option, /// Amount of memory scanned directly. since linux v6.12. #[serde(skip_serializing_if = "Option::is_none")] pub direct_scan: Option, /// Amount of memory reclaimed asynchronously. since linux v6.12. #[serde(skip_serializing_if = "Option::is_none")] pub async_reclaim: Option, /// Amount of memory reclaimed directly. since linux v6.12. #[serde(skip_serializing_if = "Option::is_none")] pub direct_reclaim: Option, } impl BalloonStats { fn update_with_stat(&mut self, stat: &BalloonStat) { let val = Some(stat.val); match stat.tag { VIRTIO_BALLOON_S_SWAP_IN => self.swap_in = val, VIRTIO_BALLOON_S_SWAP_OUT => self.swap_out = val, VIRTIO_BALLOON_S_MAJFLT => self.major_faults = val, VIRTIO_BALLOON_S_MINFLT => self.minor_faults = val, VIRTIO_BALLOON_S_MEMFREE => self.free_memory = val, VIRTIO_BALLOON_S_MEMTOT => self.total_memory = val, VIRTIO_BALLOON_S_AVAIL => self.available_memory = val, VIRTIO_BALLOON_S_CACHES => self.disk_caches = val, VIRTIO_BALLOON_S_HTLB_PGALLOC => self.hugetlb_allocations = val, VIRTIO_BALLOON_S_HTLB_PGFAIL => self.hugetlb_failures = val, VIRTIO_BALLOON_S_OOM_KILL => self.oom_kill = val, VIRTIO_BALLOON_S_ALLOC_STALL => self.alloc_stall = val, VIRTIO_BALLOON_S_ASYNC_SCAN => self.async_scan = val, VIRTIO_BALLOON_S_DIRECT_SCAN => self.direct_scan = val, VIRTIO_BALLOON_S_ASYNC_RECLAIM => self.async_reclaim = val, VIRTIO_BALLOON_S_DIRECT_RECLAIM => self.direct_reclaim = val, tag => { METRICS.stats_update_fails.inc(); debug!("balloon: unknown stats update tag: {tag}"); } } } } /// Virtio balloon device. #[derive(Debug)] pub struct Balloon { // Virtio fields. pub(crate) avail_features: u64, pub(crate) acked_features: u64, pub(crate) config_space: ConfigSpace, pub(crate) activate_evt: EventFd, // Transport related fields. pub(crate) queues: Vec, pub(crate) queue_evts: Vec, pub(crate) device_state: DeviceState, // Implementation specific fields. pub(crate) stats_polling_interval_s: u16, pub(crate) stats_timer: TimerFd, // The index of the previous stats descriptor is saved because // it is acknowledged after the stats queue is processed. pub(crate) stats_desc_index: Option, pub(crate) latest_stats: BalloonStats, // A buffer used as pfn accumulator during descriptor processing. pub(crate) pfn_buffer: [u32; MAX_PAGE_COMPACT_BUFFER], // Holds state for free page hinting pub(crate) hinting_state: HintingState, } impl Balloon { /// Instantiate a new balloon device. pub fn new( amount_mib: u32, deflate_on_oom: bool, stats_polling_interval_s: u16, free_page_hinting: bool, free_page_reporting: bool, ) -> Result { let mut avail_features = 1u64 << VIRTIO_F_VERSION_1; if deflate_on_oom { avail_features |= 1u64 << VIRTIO_BALLOON_F_DEFLATE_ON_OOM; }; // The VirtIO specification states that the statistics queue should // not be present at all if the statistics are not enabled. let mut queue_count = BALLOON_MIN_NUM_QUEUES; if stats_polling_interval_s > 0 { avail_features |= 1u64 << VIRTIO_BALLOON_F_STATS_VQ; queue_count += 1; } if free_page_hinting { log_dev_preview_warning("Free Page Hinting", None); avail_features |= 1u64 << VIRTIO_BALLOON_F_FREE_PAGE_HINTING; queue_count += 1; } if free_page_reporting { avail_features |= 1u64 << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING; queue_count += 1; } let queues: Vec = (0..queue_count) .map(|_| Queue::new(BALLOON_QUEUE_SIZE)) .collect(); let queue_evts = (0..queue_count) .map(|_| EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)) .collect::, _>>()?; let stats_timer = TimerFd::new(); Ok(Balloon { avail_features, acked_features: 0u64, config_space: ConfigSpace { num_pages: mib_to_pages(amount_mib)?, actual_pages: 0, free_page_hint_cmd_id: FREE_PAGE_HINT_STOP, }, queue_evts, queues, device_state: DeviceState::Inactive, activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(BalloonError::EventFd)?, stats_polling_interval_s, stats_timer, stats_desc_index: None, latest_stats: BalloonStats::default(), pfn_buffer: [0u32; MAX_PAGE_COMPACT_BUFFER], hinting_state: Default::default(), }) } pub(crate) fn process_inflate_queue_event(&mut self) -> Result<(), BalloonError> { self.queue_evts[INFLATE_INDEX] .read() .map_err(BalloonError::EventFd)?; self.process_inflate() } pub(crate) fn process_deflate_queue_event(&mut self) -> Result<(), BalloonError> { self.queue_evts[DEFLATE_INDEX] .read() .map_err(BalloonError::EventFd)?; self.process_deflate_queue() } pub(crate) fn process_stats_queue_event(&mut self) -> Result<(), BalloonError> { self.queue_evts[STATS_INDEX] .read() .map_err(BalloonError::EventFd)?; self.process_stats_queue() } pub(crate) fn process_stats_timer_event(&mut self) -> Result<(), BalloonError> { _ = self.stats_timer.read(); self.trigger_stats_update() } pub(crate) fn process_free_page_hinting_queue_event(&mut self) -> Result<(), BalloonError> { self.queue_evts[self.free_page_hinting_idx()] .read() .map_err(BalloonError::EventFd)?; self.process_free_page_hinting_queue() } pub(crate) fn process_free_page_reporting_queue_event(&mut self) -> Result<(), BalloonError> { self.queue_evts[self.free_page_reporting_idx()] .read() .map_err(BalloonError::EventFd)?; self.process_free_page_reporting_queue() } pub(crate) fn process_inflate(&mut self) -> Result<(), BalloonError> { // This is safe since we checked in the event handler that the device is activated. let mem = &self .device_state .active_state() .ok_or(BalloonError::DeviceNotActive)? .mem; METRICS.inflate_count.inc(); let queue = &mut self.queues[INFLATE_INDEX]; // The pfn buffer index used during descriptor processing. let mut pfn_buffer_idx = 0; let mut needs_interrupt = false; let mut valid_descs_found = true; // Loop until there are no more valid DescriptorChains. while valid_descs_found { valid_descs_found = false; // Internal loop processes descriptors and acummulates the pfns in `pfn_buffer`. // Breaks out when there is not enough space in `pfn_buffer` to completely process // the next descriptor. while let Some(head) = queue.pop()? { let len = head.len as usize; let max_len = MAX_PAGES_IN_DESC * SIZE_OF_U32; valid_descs_found = true; if !head.is_write_only() && len.is_multiple_of(SIZE_OF_U32) { // Check descriptor pfn count. if len > max_len { error!( "Inflate descriptor has bogus page count {} > {}, skipping.", len / SIZE_OF_U32, MAX_PAGES_IN_DESC ); // Skip descriptor. continue; } // Break loop if `pfn_buffer` will be overrun by adding all pfns from current // desc. if MAX_PAGE_COMPACT_BUFFER - pfn_buffer_idx < len / SIZE_OF_U32 { queue.undo_pop(); break; } // This is safe, `len` was validated above. for index in (0..len).step_by(SIZE_OF_U32) { let addr = head .addr .checked_add(index as u64) .ok_or(BalloonError::MalformedDescriptor)?; let page_frame_number = mem .read_obj::(addr) .map_err(|_| BalloonError::MalformedDescriptor)?; self.pfn_buffer[pfn_buffer_idx] = page_frame_number; pfn_buffer_idx += 1; } } // Acknowledge the receipt of the descriptor. // 0 is number of bytes the device has written to memory. queue.add_used(head.index, 0)?; needs_interrupt = true; } // Compact pages into ranges. let page_ranges = compact_page_frame_numbers(&mut self.pfn_buffer[..pfn_buffer_idx]); pfn_buffer_idx = 0; // Remove the page ranges. for (page_frame_number, range_len) in page_ranges { let guest_addr = GuestAddress(u64::from(page_frame_number) << VIRTIO_BALLOON_PFN_SHIFT); if let Err(err) = mem.discard_range( guest_addr, usize::try_from(range_len).unwrap() << VIRTIO_BALLOON_PFN_SHIFT, ) { error!("Error removing memory range: {:?}", err); } } } queue.advance_used_ring_idx(); if needs_interrupt { self.signal_used_queue(INFLATE_INDEX)?; } Ok(()) } pub(crate) fn process_deflate_queue(&mut self) -> Result<(), BalloonError> { METRICS.deflate_count.inc(); let queue = &mut self.queues[DEFLATE_INDEX]; let mut needs_interrupt = false; while let Some(head) = queue.pop()? { queue.add_used(head.index, 0)?; needs_interrupt = true; } queue.advance_used_ring_idx(); if needs_interrupt { self.signal_used_queue(DEFLATE_INDEX) } else { Ok(()) } } pub(crate) fn process_stats_queue(&mut self) -> Result<(), BalloonError> { // This is safe since we checked in the event handler that the device is activated. let mem = &self.device_state.active_state().unwrap().mem; METRICS.stats_updates_count.inc(); while let Some(head) = self.queues[STATS_INDEX].pop()? { if let Some(prev_stats_desc) = self.stats_desc_index { // We shouldn't ever have an extra buffer if the driver follows // the protocol, but return it if we find one. error!("balloon: driver is not compliant, more than one stats buffer received"); self.queues[STATS_INDEX].add_used(prev_stats_desc, 0)?; } for index in (0..head.len).step_by(SIZE_OF_STAT) { // Read the address at position `index`. The only case // in which this fails is if there is overflow, // in which case this descriptor is malformed, // so we ignore the rest of it. let addr = head .addr .checked_add(u64::from(index)) .ok_or(BalloonError::MalformedDescriptor)?; let stat = mem .read_obj::(addr) .map_err(|_| BalloonError::MalformedDescriptor)?; self.latest_stats.update_with_stat(&stat); } self.stats_desc_index = Some(head.index); } Ok(()) } pub(crate) fn process_free_page_hinting_queue(&mut self) -> Result<(), BalloonError> { let mem = &self .device_state .active_state() .ok_or(BalloonError::DeviceNotActive)? .mem; let idx = self.free_page_hinting_idx(); let queue = &mut self.queues[idx]; let host_cmd = self.hinting_state.host_cmd; let mut needs_interrupt = false; let mut complete = false; while let Some(head) = queue.pop()? { let head_index = head.index; let mut last_desc = Some(head); while let Some(desc) = last_desc { last_desc = desc.next_descriptor(); // Updated cmd_ids are always of length 4 if desc.len == 4 { complete = false; let cmd = mem .read_obj::(desc.addr) .map_err(|_| BalloonError::MalformedDescriptor)?; self.hinting_state.guest_cmd = Some(cmd); if cmd == FREE_PAGE_HINT_STOP { complete = true; } // We don't expect this from the driver, but lets treat as a stop if cmd == FREE_PAGE_HINT_DONE { warn!("balloon hinting: Unexpected cmd from guest: {cmd}"); complete = true; } continue; } // If we've requested done we have to discard any in-flight hints if host_cmd == FREE_PAGE_HINT_DONE || host_cmd == FREE_PAGE_HINT_STOP { continue; } let Some(chain_cmd) = self.hinting_state.guest_cmd else { warn!("balloon hinting: received range with no command id."); continue; }; if chain_cmd != host_cmd { info!("balloon hinting: Received chain from previous command ignoring."); continue; } METRICS.free_page_hint_count.inc(); if let Err(err) = mem.discard_range(desc.addr, desc.len as usize) { METRICS.free_page_hint_fails.inc(); error!("balloon hinting: failed to remove range: {err:?}"); } else { METRICS.free_page_hint_freed.add(desc.len as u64); } } queue.add_used(head.index, 0)?; needs_interrupt = true; } queue.advance_used_ring_idx(); if needs_interrupt { self.signal_used_queue(idx)?; } if complete && self.hinting_state.acknowledge_on_finish { self.update_free_page_hint_cmd(FREE_PAGE_HINT_DONE); } Ok(()) } pub(crate) fn process_free_page_reporting_queue(&mut self) -> Result<(), BalloonError> { let mem = &self .device_state .active_state() .ok_or(BalloonError::DeviceNotActive)? .mem; let idx = self.free_page_reporting_idx(); let queue = &mut self.queues[idx]; let mut needs_interrupt = false; while let Some(head) = queue.pop()? { let head_index = head.index; let mut last_desc = Some(head); while let Some(desc) = last_desc { METRICS.free_page_report_count.inc(); if let Err(err) = mem.discard_range(desc.addr, desc.len as usize) { METRICS.free_page_report_fails.inc(); error!("balloon: failed to remove range: {err:?}"); } else { METRICS.free_page_report_freed.add(desc.len as u64); } last_desc = desc.next_descriptor(); } queue.add_used(head.index, 0)?; needs_interrupt = true; } queue.advance_used_ring_idx(); if needs_interrupt { self.signal_used_queue(idx)?; } Ok(()) } pub(crate) fn signal_used_queue(&self, qidx: usize) -> Result<(), BalloonError> { self.interrupt_trigger() .trigger(VirtioInterruptType::Queue( qidx.try_into() .unwrap_or_else(|_| panic!("balloon: invalid queue id: {qidx}")), )) .map_err(|err| { METRICS.event_fails.inc(); BalloonError::InterruptError(err) }) } /// Process device virtio queue(s). pub fn process_virtio_queues(&mut self) -> Result<(), InvalidAvailIdx> { if let Err(BalloonError::InvalidAvailIdx(err)) = self.process_inflate() { return Err(err); } if let Err(BalloonError::InvalidAvailIdx(err)) = self.process_deflate_queue() { return Err(err); } if self.free_page_hinting() && let Err(BalloonError::InvalidAvailIdx(err)) = self.process_free_page_hinting_queue() { return Err(err); } if self.free_page_reporting() && let Err(BalloonError::InvalidAvailIdx(err)) = self.process_free_page_reporting_queue() { return Err(err); } Ok(()) } fn trigger_stats_update(&mut self) -> Result<(), BalloonError> { // The communication is driven by the device by using the buffer // and sending a used buffer notification if let Some(index) = self.stats_desc_index.take() { self.queues[STATS_INDEX].add_used(index, 0)?; self.queues[STATS_INDEX].advance_used_ring_idx(); self.signal_used_queue(STATS_INDEX) } else { error!("Failed to update balloon stats, missing descriptor."); Ok(()) } } /// Update the target size of the balloon. pub fn update_size(&mut self, amount_mib: u32) -> Result<(), BalloonError> { if self.is_activated() { let mem = &self.device_state.active_state().unwrap().mem; // The balloon cannot have a target size greater than the size of // the guest memory. if u64::from(amount_mib) > mem_size_mib(mem) { return Err(BalloonError::TooMuchMemoryRequested(amount_mib)); } self.config_space.num_pages = mib_to_pages(amount_mib)?; self.interrupt_trigger() .trigger(VirtioInterruptType::Config) .map_err(BalloonError::InterruptError) } else { Err(BalloonError::DeviceNotActive) } } pub fn free_page_hinting(&self) -> bool { self.avail_features & (1u64 << VIRTIO_BALLOON_F_FREE_PAGE_HINTING) != 0 } pub fn free_page_hinting_idx(&self) -> usize { let mut idx = BALLOON_MIN_NUM_QUEUES; if self.stats_polling_interval_s > 0 { idx += 1; } idx } pub fn free_page_reporting(&self) -> bool { self.avail_features & (1u64 << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING) != 0 } pub fn free_page_reporting_idx(&self) -> usize { let mut idx = BALLOON_MIN_NUM_QUEUES; if self.stats_polling_interval_s > 0 { idx += 1; } if self.free_page_hinting() { idx += 1; } idx } /// Update the statistics polling interval. pub fn update_stats_polling_interval(&mut self, interval_s: u16) -> Result<(), BalloonError> { if self.stats_polling_interval_s == interval_s { return Ok(()); } if self.stats_polling_interval_s == 0 || interval_s == 0 { return Err(BalloonError::StatisticsStateChange); } self.trigger_stats_update()?; self.stats_polling_interval_s = interval_s; self.update_timer_state(); Ok(()) } pub fn update_timer_state(&mut self) { let duration = Duration::from_secs(self.stats_polling_interval_s as u64); self.stats_timer.arm(duration, Some(duration)); } /// Obtain the number of 4K pages the device is currently holding. pub fn num_pages(&self) -> u32 { self.config_space.num_pages } /// Obtain the size of 4K pages the device is currently holding in MIB. pub fn size_mb(&self) -> u32 { pages_to_mib(self.config_space.num_pages) } pub fn deflate_on_oom(&self) -> bool { self.avail_features & (1u64 << VIRTIO_BALLOON_F_DEFLATE_ON_OOM) != 0 } pub fn stats_polling_interval_s(&self) -> u16 { self.stats_polling_interval_s } /// Retrieve latest stats for the balloon device. pub fn latest_stats(&mut self) -> Result { if self.stats_enabled() { self.latest_stats.target_pages = self.config_space.num_pages; self.latest_stats.actual_pages = self.config_space.actual_pages; self.latest_stats.target_mib = pages_to_mib(self.latest_stats.target_pages); self.latest_stats.actual_mib = pages_to_mib(self.latest_stats.actual_pages); Ok(self.latest_stats) } else { Err(BalloonError::StatisticsDisabled) } } /// Update the free page hinting cmd pub fn update_free_page_hint_cmd(&mut self, cmd_id: u32) -> Result<(), BalloonError> { if !self.is_activated() { return Err(BalloonError::DeviceNotActive); } self.hinting_state.host_cmd = cmd_id; self.config_space.free_page_hint_cmd_id = cmd_id; self.interrupt_trigger() .trigger(VirtioInterruptType::Config) .map_err(BalloonError::InterruptError) } /// Starts a hinting run by setting the cmd_id to a new value. pub(crate) fn start_hinting(&mut self, cmd: StartHintingCmd) -> Result<(), BalloonError> { if !self.free_page_hinting() { return Err(BalloonError::HintingNotEnabled); } let mut cmd_id = self.hinting_state.last_cmd_id.wrapping_add(1); // 0 and 1 are reserved and cannot be used to start a hinting run if cmd_id <= 1 { cmd_id = 2; } self.hinting_state.acknowledge_on_finish = cmd.acknowledge_on_stop; self.hinting_state.last_cmd_id = cmd_id; self.update_free_page_hint_cmd(cmd_id) } /// Return the status of the hinting including the last command we sent to the driver /// and the last cmd sent from the driver pub(crate) fn get_hinting_status(&self) -> Result { if !self.free_page_hinting() { return Err(BalloonError::HintingNotEnabled); } Ok(HintingStatus { host_cmd: self.hinting_state.host_cmd, guest_cmd: self.hinting_state.guest_cmd, }) } /// Stops the hinting run allowing the guest to reclaim hinted pages pub(crate) fn stop_hinting(&mut self) -> Result<(), BalloonError> { if !self.free_page_hinting() { Err(BalloonError::HintingNotEnabled) } else { self.update_free_page_hint_cmd(FREE_PAGE_HINT_DONE) } } /// Return the config of the balloon device. pub fn config(&self) -> BalloonConfig { BalloonConfig { amount_mib: self.size_mb(), deflate_on_oom: self.deflate_on_oom(), stats_polling_interval_s: self.stats_polling_interval_s(), free_page_hinting: self.free_page_hinting(), free_page_reporting: self.free_page_reporting(), } } pub(crate) fn stats_enabled(&self) -> bool { self.stats_polling_interval_s > 0 } pub(crate) fn set_stats_desc_index(&mut self, stats_desc_index: Option) { self.stats_desc_index = stats_desc_index; } } impl VirtioDevice for Balloon { impl_device_type!(VirtioDeviceType::Balloon); fn id(&self) -> &str { BALLOON_DEV_ID } fn avail_features(&self) -> u64 { self.avail_features } fn acked_features(&self) -> u64 { self.acked_features } fn set_acked_features(&mut self, acked_features: u64) { self.acked_features = acked_features; } fn queues(&self) -> &[Queue] { &self.queues } fn queues_mut(&mut self) -> &mut [Queue] { &mut self.queues } fn queue_events(&self) -> &[EventFd] { &self.queue_evts } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { self.device_state .active_state() .expect("Device is not activated") .interrupt .deref() } fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); data[..len].copy_from_slice(&config_space_bytes[..len]); } else { error!("Failed to read config space"); } } fn write_config(&mut self, offset: u64, data: &[u8]) { let config_space_bytes = self.config_space.as_mut_slice(); let start = usize::try_from(offset).ok(); let end = start.and_then(|s| s.checked_add(data.len())); let Some(dst) = start .zip(end) .and_then(|(start, end)| config_space_bytes.get_mut(start..end)) else { error!("Failed to write config space"); return; }; dst.copy_from_slice(data); } fn activate( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError> { for q in self.queues.iter_mut() { q.initialize(&mem) .map_err(ActivateError::QueueMemoryError)?; } self.device_state = DeviceState::Activated(ActiveState { mem, interrupt }); if self.activate_evt.write(1).is_err() { METRICS.activate_fails.inc(); self.device_state = DeviceState::Inactive; return Err(ActivateError::EventFd); } if self.stats_enabled() { self.update_timer_state(); } Ok(()) } fn is_activated(&self) -> bool { self.device_state.is_activated() } fn kick(&mut self) { if self.is_activated() { if self.free_page_hinting() { info!( "[{:?}:{}] resetting free page hinting to DONE", self.device_type(), self.id() ); self.update_free_page_hint_cmd(FREE_PAGE_HINT_DONE); } self.notify_queue_events(); } } } #[cfg(test)] pub(crate) mod tests { use itertools::iproduct; use super::super::BALLOON_CONFIG_SPACE_SIZE; use super::*; use crate::arch::host_page_size; use crate::check_metric_after_block; use crate::devices::virtio::balloon::report_balloon_event_fail; use crate::devices::virtio::balloon::test_utils::{ check_request_completion, invoke_handler_for_queue_event, set_request, }; use crate::devices::virtio::queue::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; use crate::devices::virtio::test_utils::test::{ VirtioTestDevice, VirtioTestHelper, create_virtio_mem, }; use crate::devices::virtio::test_utils::{VirtQueue, default_interrupt, default_mem}; use crate::test_utils::single_region_mem; use crate::utils::align_up; use crate::vstate::memory::GuestAddress; impl VirtioTestDevice for Balloon { fn set_queues(&mut self, queues: Vec) { self.queues = queues; } fn num_queues(&self) -> usize { let mut idx = STATS_INDEX; if self.stats_polling_interval_s > 0 { idx += 1; } if self.free_page_hinting() { idx += 1; } if self.free_page_reporting() { idx += 1; } idx } } impl Balloon { pub(crate) fn set_queue(&mut self, idx: usize, q: Queue) { self.queues[idx] = q; } pub(crate) fn actual_pages(&self) -> u32 { self.config_space.actual_pages } pub fn update_num_pages(&mut self, num_pages: u32) { self.config_space.num_pages = num_pages; } pub fn update_actual_pages(&mut self, actual_pages: u32) { self.config_space.actual_pages = actual_pages; } } #[test] fn test_balloon_stat_size() { assert_eq!(SIZE_OF_STAT, 10); } #[test] fn test_update_balloon_stats() { // Test all feature combinations. let mut stats = BalloonStats { target_pages: 5120, actual_pages: 2560, target_mib: 20, actual_mib: 10, swap_in: Some(0), swap_out: Some(0), major_faults: Some(0), minor_faults: Some(0), free_memory: Some(0), total_memory: Some(0), available_memory: Some(0), disk_caches: Some(0), hugetlb_allocations: Some(0), hugetlb_failures: Some(0), oom_kill: None, alloc_stall: None, async_scan: None, direct_scan: None, async_reclaim: None, direct_reclaim: None, }; let mut stat = BalloonStat { tag: VIRTIO_BALLOON_S_SWAP_IN, val: 1, }; stats.update_with_stat(&stat); assert_eq!(stats.swap_in, Some(1)); stat.tag = VIRTIO_BALLOON_S_SWAP_OUT; stats.update_with_stat(&stat); assert_eq!(stats.swap_out, Some(1)); stat.tag = VIRTIO_BALLOON_S_MAJFLT; stats.update_with_stat(&stat); assert_eq!(stats.major_faults, Some(1)); stat.tag = VIRTIO_BALLOON_S_MINFLT; stats.update_with_stat(&stat); assert_eq!(stats.minor_faults, Some(1)); stat.tag = VIRTIO_BALLOON_S_MEMFREE; stats.update_with_stat(&stat); assert_eq!(stats.free_memory, Some(1)); stat.tag = VIRTIO_BALLOON_S_MEMTOT; stats.update_with_stat(&stat); assert_eq!(stats.total_memory, Some(1)); stat.tag = VIRTIO_BALLOON_S_AVAIL; stats.update_with_stat(&stat); assert_eq!(stats.available_memory, Some(1)); stat.tag = VIRTIO_BALLOON_S_CACHES; stats.update_with_stat(&stat); assert_eq!(stats.disk_caches, Some(1)); stat.tag = VIRTIO_BALLOON_S_HTLB_PGALLOC; stats.update_with_stat(&stat); assert_eq!(stats.hugetlb_allocations, Some(1)); stat.tag = VIRTIO_BALLOON_S_HTLB_PGFAIL; stats.update_with_stat(&stat); assert_eq!(stats.hugetlb_failures, Some(1)); stat.tag = VIRTIO_BALLOON_S_OOM_KILL; stats.update_with_stat(&stat); assert_eq!(stats.oom_kill, Some(1)); stat.tag = VIRTIO_BALLOON_S_ALLOC_STALL; stats.update_with_stat(&stat); assert_eq!(stats.alloc_stall, Some(1)); stat.tag = VIRTIO_BALLOON_S_ASYNC_SCAN; stats.update_with_stat(&stat); assert_eq!(stats.async_scan, Some(1)); stat.tag = VIRTIO_BALLOON_S_DIRECT_SCAN; stats.update_with_stat(&stat); assert_eq!(stats.direct_scan, Some(1)); stat.tag = VIRTIO_BALLOON_S_ASYNC_RECLAIM; stats.update_with_stat(&stat); assert_eq!(stats.async_reclaim, Some(1)); stat.tag = VIRTIO_BALLOON_S_DIRECT_RECLAIM; stats.update_with_stat(&stat); assert_eq!(stats.direct_reclaim, Some(1)); } #[test] fn test_virtio_features() { // Test all feature combinations. let combinations = iproduct!( &[true, false], // Reporitng &[true, false], // Hinting &[true, false], // Deflate &[0, 1] // Interval ); for (reporting, hinting, deflate_on_oom, stats_interval) in combinations { let mut balloon = Balloon::new(0, *deflate_on_oom, *stats_interval, *hinting, *reporting).unwrap(); assert_eq!(balloon.device_type(), VirtioDeviceType::Balloon); let features: u64 = (1u64 << VIRTIO_F_VERSION_1) | (u64::from(*deflate_on_oom) << VIRTIO_BALLOON_F_DEFLATE_ON_OOM) | ((u64::from(*reporting)) << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING) | ((u64::from(*hinting)) << VIRTIO_BALLOON_F_FREE_PAGE_HINTING) | ((u64::from(*stats_interval)) << VIRTIO_BALLOON_F_STATS_VQ); assert_eq!( balloon.avail_features_by_page(0), (features & 0xFFFFFFFF) as u32 ); assert_eq!(balloon.avail_features_by_page(1), (features >> 32) as u32); for i in 2..10 { assert_eq!(balloon.avail_features_by_page(i), 0u32); } for i in 0..10 { balloon.ack_features_by_page(i, u32::MAX); } // Only present features should be acknowledged. assert_eq!(balloon.acked_features, features); } } #[test] fn test_virtio_read_config() { let balloon = Balloon::new(0x10, true, 0, false, false).unwrap(); let cfg = BalloonConfig { amount_mib: 16, deflate_on_oom: true, stats_polling_interval_s: 0, free_page_hinting: false, free_page_reporting: false, }; assert_eq!(balloon.config(), cfg); let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; balloon.read_config(0, &mut actual_config_space); // The first 4 bytes are num_pages, the last 4 bytes are actual_pages. // The config space is little endian. // 0x10 MB in the constructor corresponds to 0x1000 pages in the // config space. let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]; assert_eq!(actual_config_space, expected_config_space); // Invalid read. let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ 0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf, 0x00, 0x00, 0x00, 0x00, ]; actual_config_space = expected_config_space; balloon.read_config( BALLOON_CONFIG_SPACE_SIZE as u64 + 1, &mut actual_config_space, ); // Validate read failed (the config space was not updated). assert_eq!(actual_config_space, expected_config_space); } #[test] fn test_virtio_write_config() { let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]; balloon.write_config(0, &expected_config_space); let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; balloon.read_config(0, &mut actual_config_space); assert_eq!(actual_config_space, expected_config_space); // Invalid write. let new_config_space = [ 0xd, 0xe, 0xa, 0xd, 0xb, 0xe, 0xe, 0xf, 0x00, 0x00, 0x00, 0x00, ]; balloon.write_config(5, &new_config_space); // Make sure nothing got written. balloon.read_config(0, &mut actual_config_space); assert_eq!(actual_config_space, expected_config_space); // Large offset that may cause an overflow. balloon.write_config(u64::MAX, &new_config_space); // Make sure nothing got written. balloon.read_config(0, &mut actual_config_space); assert_eq!(actual_config_space, expected_config_space); } #[test] fn test_free_page_hinting_config() { let mut balloon = Balloon::new(0, true, 0, true, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, infq.create_queue()); balloon.set_queue(DEFLATE_INDEX, infq.create_queue()); balloon.set_queue(balloon.free_page_hinting_idx(), infq.create_queue()); balloon.activate(mem.clone(), interrupt).unwrap(); let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]; balloon.write_config(0, &expected_config_space); let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; balloon.read_config(0, &mut actual_config_space); assert_eq!(actual_config_space, expected_config_space); // We expect the cmd_id to be set to 2 now balloon.start_hinting(Default::default()).unwrap(); let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, ]; let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; balloon.read_config(0, &mut actual_config_space); assert_eq!(actual_config_space, expected_config_space); // We expect the cmd_id to be set to 1 balloon.stop_hinting().unwrap(); let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, ]; let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; balloon.read_config(0, &mut actual_config_space); assert_eq!(actual_config_space, expected_config_space); // We expect the cmd_id to be bumped up to 3 now balloon.start_hinting(Default::default()).unwrap(); let expected_config_space: [u8; BALLOON_CONFIG_SPACE_SIZE] = [ 0x00, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, ]; let mut actual_config_space = [0u8; BALLOON_CONFIG_SPACE_SIZE]; balloon.read_config(0, &mut actual_config_space); assert_eq!(actual_config_space, expected_config_space); } #[test] fn test_invalid_request() { let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); // Only initialize the inflate queue to demonstrate invalid request handling. let infq = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, infq.create_queue()); balloon.set_queue(DEFLATE_INDEX, infq.create_queue()); balloon.activate(mem.clone(), interrupt).unwrap(); // Fill the second page with non-zero bytes. for i in 0..0x1000 { mem.write_obj::(1, GuestAddress((1 << 12) + i)).unwrap(); } // Will write the page frame number of the affected frame at this // arbitrary address in memory. let page_addr = 0x10; // Invalid case: the descriptor is write-only. { mem.write_obj::(0x1, GuestAddress(page_addr)).unwrap(); set_request( &infq, 0, page_addr, SIZE_OF_U32.try_into().unwrap(), VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, ); invoke_handler_for_queue_event(&mut balloon, INFLATE_INDEX); check_request_completion(&infq, 0); // Check that the page was not zeroed. for i in 0..0x1000 { assert_eq!(mem.read_obj::(GuestAddress((1 << 12) + i)).unwrap(), 1); } } // Invalid case: descriptor len is not a multiple of 'SIZE_OF_U32'. { mem.write_obj::(0x1, GuestAddress(page_addr)).unwrap(); set_request( &infq, 1, page_addr, u32::try_from(SIZE_OF_U32).unwrap() + 1, VIRTQ_DESC_F_NEXT, ); invoke_handler_for_queue_event(&mut balloon, INFLATE_INDEX); check_request_completion(&infq, 1); // Check that the page was not zeroed. for i in 0..0x1000 { assert_eq!(mem.read_obj::(GuestAddress((1 << 12) + i)).unwrap(), 1); } } } #[test] fn test_inflate() { let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, infq.create_queue()); balloon.set_queue(DEFLATE_INDEX, infq.create_queue()); balloon.activate(mem.clone(), interrupt).unwrap(); // Fill the third page with non-zero bytes. for i in 0..0x1000 { mem.write_obj::(1, GuestAddress((1 << 12) + i)).unwrap(); } // Will write the page frame number of the affected frame at this // arbitrary address in memory. let page_addr = 0x10; // Error case: the request is well-formed, but we forgot // to trigger the inflate event queue. { mem.write_obj::(0x1, GuestAddress(page_addr)).unwrap(); set_request( &infq, 0, page_addr, SIZE_OF_U32.try_into().unwrap(), VIRTQ_DESC_F_NEXT, ); check_metric_after_block!( METRICS.event_fails, 1, balloon .process_inflate_queue_event() .unwrap_or_else(report_balloon_event_fail) ); // Verify that nothing got processed. assert_eq!(infq.used.idx.get(), 0); // Check that the page was not zeroed. for i in 0..0x1000 { assert_eq!(mem.read_obj::(GuestAddress((1 << 12) + i)).unwrap(), 1); } } // Test the happy case. { mem.write_obj::(0x1, GuestAddress(page_addr)).unwrap(); set_request( &infq, 0, page_addr, SIZE_OF_U32.try_into().unwrap(), VIRTQ_DESC_F_NEXT, ); check_metric_after_block!( METRICS.inflate_count, 1, invoke_handler_for_queue_event(&mut balloon, INFLATE_INDEX) ); check_request_completion(&infq, 0); // Check that the page was zeroed. for i in 0..0x1000 { assert_eq!(mem.read_obj::(GuestAddress((1 << 12) + i)).unwrap(), 0); } } } #[test] fn test_deflate() { let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let defq = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, defq.create_queue()); balloon.set_queue(DEFLATE_INDEX, defq.create_queue()); balloon.activate(mem.clone(), interrupt).unwrap(); let page_addr = 0x10; // Error case: forgot to trigger deflate event queue. { set_request( &defq, 0, page_addr, SIZE_OF_U32.try_into().unwrap(), VIRTQ_DESC_F_NEXT, ); check_metric_after_block!( METRICS.event_fails, 1, balloon .process_deflate_queue_event() .unwrap_or_else(report_balloon_event_fail) ); // Verify that nothing got processed. assert_eq!(defq.used.idx.get(), 0); } // Happy case. { set_request( &defq, 1, page_addr, SIZE_OF_U32.try_into().unwrap(), VIRTQ_DESC_F_NEXT, ); check_metric_after_block!( METRICS.deflate_count, 1, invoke_handler_for_queue_event(&mut balloon, DEFLATE_INDEX) ); check_request_completion(&defq, 1); } } #[test] fn test_stats() { let mut balloon = Balloon::new(0, true, 1, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let statsq = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, statsq.create_queue()); balloon.set_queue(DEFLATE_INDEX, statsq.create_queue()); balloon.set_queue(STATS_INDEX, statsq.create_queue()); balloon.activate(mem.clone(), interrupt).unwrap(); let page_addr = 0x100; // Error case: forgot to trigger stats event queue. { set_request( &statsq, 0, 0x1000, SIZE_OF_STAT.try_into().unwrap(), VIRTQ_DESC_F_NEXT, ); check_metric_after_block!( METRICS.event_fails, 1, balloon .process_stats_queue_event() .unwrap_or_else(report_balloon_event_fail) ); // Verify that nothing got processed. assert_eq!(statsq.used.idx.get(), 0); } // Happy case. { let swap_out_stat = BalloonStat { tag: VIRTIO_BALLOON_S_SWAP_OUT, val: 0x1, }; let mem_free_stat = BalloonStat { tag: VIRTIO_BALLOON_S_MEMFREE, val: 0x5678, }; // Write the stats in memory. mem.write_obj::(swap_out_stat, GuestAddress(page_addr)) .unwrap(); mem.write_obj::( mem_free_stat, GuestAddress(page_addr + SIZE_OF_STAT as u64), ) .unwrap(); set_request( &statsq, 0, page_addr, 2 * u32::try_from(SIZE_OF_STAT).unwrap(), VIRTQ_DESC_F_NEXT, ); check_metric_after_block!(METRICS.stats_updates_count, 1, { // Trigger the queue event. balloon.queue_events()[STATS_INDEX].write(1).unwrap(); balloon.process_stats_queue_event().unwrap(); // Don't check for completion yet. }); let stats = balloon.latest_stats().unwrap(); let expected_stats = BalloonStats { swap_out: Some(0x1), free_memory: Some(0x5678), ..BalloonStats::default() }; assert_eq!(stats, expected_stats); // Wait for the timer to expire, although as it is non-blocking // we could just process the timer event and it would not // return an error. std::thread::sleep(Duration::from_secs(1)); check_metric_after_block!(METRICS.event_fails, 0, { // Trigger the timer event, which consumes the stats // descriptor index and signals the used queue. assert!(balloon.stats_desc_index.is_some()); balloon.process_stats_timer_event().unwrap(); assert!(balloon.stats_desc_index.is_none()); assert!(balloon.interrupt_trigger().has_pending_interrupt( VirtioInterruptType::Queue(STATS_INDEX.try_into().unwrap()) )); }); } } #[test] fn test_process_reporting() { let mem = create_virtio_mem(); let mut th = VirtioTestHelper::::new(&mem, Balloon::new(0, true, 0, false, true).unwrap()); th.activate_device(&mem); let page_size = host_page_size() as u64; // This has to be u32 for the scatter gather #[allow(clippy::cast_possible_truncation)] let page_size_chain = page_size as u32; let reporting_idx = th.device().free_page_reporting_idx(); let safe_addr = align_up(th.data_address(), page_size); th.add_scatter_gather(reporting_idx, 0, &[(0, safe_addr, page_size_chain, 0)]); check_metric_after_block!( METRICS.free_page_report_freed, page_size, invoke_handler_for_queue_event(&mut th.device(), reporting_idx) ); // Test with multiple items th.add_scatter_gather( reporting_idx, 0, &[ (0, safe_addr, page_size_chain, 0), (1, safe_addr + page_size, page_size_chain, 0), (2, safe_addr + (page_size * 2), page_size_chain, 0), ], ); check_metric_after_block!( METRICS.free_page_report_freed, page_size * 3, invoke_handler_for_queue_event(&mut th.device(), reporting_idx) ); // Test with unaligned length th.add_scatter_gather(reporting_idx, 0, &[(1, safe_addr + 1, page_size_chain, 0)]); check_metric_after_block!( METRICS.free_page_report_fails, 1, invoke_handler_for_queue_event(&mut th.device(), reporting_idx) ); } struct HintingTestHelper<'a> { mem: &'a GuestMemoryMmap, th: VirtioTestHelper<'a, Balloon>, page_size: u64, page_size_chain: u32, hinting_idx: usize, safe_addr: u64, } impl<'a> HintingTestHelper<'a> { fn new(mem: &'a GuestMemoryMmap) -> Self { let mut th = VirtioTestHelper::::new( mem, Balloon::new(0, true, 0, true, false).unwrap(), ); th.activate_device(mem); let page_size = host_page_size() as u64; let hinting_idx = th.device().free_page_hinting_idx(); let safe_addr = align_up(th.data_address(), page_size); // Ack the config set on start th.device() .interrupt_trigger() .ack_interrupt(VirtioInterruptType::Config); Self { mem, th, page_size, hinting_idx, // This has to be u32 for the scatter gather #[allow(clippy::cast_possible_truncation)] page_size_chain: page_size as u32, safe_addr, } } fn start_hinting(&mut self, cmd: Option) { let cmd = cmd.unwrap_or_default(); self.th.device().start_hinting(cmd).unwrap(); assert!( self.th .device() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Config) ); self.th .device() .interrupt_trigger() .ack_interrupt(VirtioInterruptType::Config); } fn send_stop(&mut self, cmd: Option) { let cmd = cmd.unwrap_or(FREE_PAGE_HINT_STOP); self.mem .write_obj(cmd, GuestAddress::new(self.safe_addr)) .unwrap(); self.th.add_scatter_gather( self.hinting_idx, 0, &[ (0, self.safe_addr, 4, VIRTQ_DESC_F_WRITE), ( 1, self.safe_addr + self.page_size, self.page_size_chain, VIRTQ_DESC_F_WRITE, ), ], ); check_metric_after_block!( METRICS.free_page_hint_freed, 0, self.th.device().process_free_page_hinting_queue() ); self.th .device() .interrupt_trigger() .ack_interrupt(VirtioInterruptType::Queue( self.hinting_idx.try_into().unwrap(), )); self.th .device() .interrupt_trigger() .ack_interrupt(VirtioInterruptType::Config); } fn test_hinting(&mut self, cmd: Option, expected: u64) { let payload = match cmd { Some(c) => { self.mem .write_obj(c, GuestAddress::new(self.safe_addr)) .unwrap(); vec![ (0, self.safe_addr, 4, VIRTQ_DESC_F_WRITE), ( 1, self.safe_addr + self.page_size, self.page_size_chain, VIRTQ_DESC_F_WRITE, ), ] } None => { vec![( 0, self.safe_addr + self.page_size, self.page_size_chain, VIRTQ_DESC_F_WRITE, )] } }; self.th.add_scatter_gather(self.hinting_idx, 0, &payload); check_metric_after_block!( METRICS.free_page_hint_freed, expected, invoke_handler_for_queue_event(&mut self.th.device(), self.hinting_idx) ); } } #[test] fn test_hinting_no_cmd_set() { let mem = create_virtio_mem(); let mut ht = HintingTestHelper::new(&mem); // Report a page before a cmd_id has even been negotiated ht.test_hinting(Some(2), 0); } #[test] fn test_hinting_normal_path() { let mem = create_virtio_mem(); let mut ht = HintingTestHelper::new(&mem); // Test the good case ht.start_hinting(None); let host_cmd = ht.th.device().get_hinting_status().unwrap().host_cmd; // Ack the start of the hinting run and send a single page ht.test_hinting(Some(host_cmd), ht.page_size); } #[test] fn test_hinting_invalid_cmd() { let mem = create_virtio_mem(); let mut ht = HintingTestHelper::new(&mem); // Test the good case ht.start_hinting(None); let host_cmd = ht.th.device().get_hinting_status().unwrap().host_cmd; // Report pages for an invalid cmd ht.test_hinting(Some(host_cmd + 1), 0); // If correct cmd is again used continue again ht.test_hinting(Some(host_cmd), ht.page_size); } #[test] fn test_hinting_stale_inflight_requests() { let mem = create_virtio_mem(); let mut ht = HintingTestHelper::new(&mem); // Test the good case ht.start_hinting(None); let mut host_cmd = ht.th.device().get_hinting_status().unwrap().host_cmd; ht.test_hinting(Some(host_cmd), ht.page_size); // Trigger another hinting run this will bump the cmd id // so we should ignore any inflight requests ht.start_hinting(None); ht.test_hinting(None, 0); // Update to our new host cmd and check this now works host_cmd = ht.th.device().get_hinting_status().unwrap().host_cmd; ht.test_hinting(Some(host_cmd), ht.page_size); ht.test_hinting(None, ht.page_size); } #[test] fn test_hinting_stale_post_stop() { let mem = create_virtio_mem(); let mut ht = HintingTestHelper::new(&mem); // Test the good case ht.start_hinting(None); let mut host_cmd = ht.th.device().get_hinting_status().unwrap().host_cmd; // Simulate the driver finishing a run. Any reported values after // should be ignored ht.send_stop(None); // Test we handle invalid cmd from driver ht.send_stop(Some(FREE_PAGE_HINT_DONE)); ht.test_hinting(None, 0); // As we had auto ack on finish the host cmd should be set to done host_cmd = ht.th.device().get_hinting_status().unwrap().host_cmd; assert_eq!(host_cmd, FREE_PAGE_HINT_DONE); } #[test] fn test_hinting_no_ack_on_stop() { let mem = create_virtio_mem(); let mut ht = HintingTestHelper::new(&mem); // Test the good case ht.start_hinting(None); let mut host_cmd = ht.th.device().get_hinting_status().unwrap().host_cmd; // Test no ack on stop behaviour ht.start_hinting(Some(StartHintingCmd { acknowledge_on_stop: false, })); host_cmd = ht.th.device().get_hinting_status().unwrap().host_cmd; ht.test_hinting(Some(host_cmd), ht.page_size); ht.test_hinting(None, ht.page_size); ht.send_stop(None); let new_host_cmd = ht.th.device().get_hinting_status().unwrap().host_cmd; assert_eq!(host_cmd, new_host_cmd); } #[test] fn test_hinting_misaligned_value() { let mem = create_virtio_mem(); let mut ht = HintingTestHelper::new(&mem); // Test the good case ht.start_hinting(None); let mut host_cmd = ht.th.device().get_hinting_status().unwrap().host_cmd; ht.test_hinting(Some(host_cmd), ht.page_size); ht.test_hinting(None, ht.page_size); ht.th.add_scatter_gather( ht.hinting_idx, 0, &[(0, ht.safe_addr + ht.page_size + 1, ht.page_size_chain, 0)], ); check_metric_after_block!( METRICS.free_page_hint_fails, 1, ht.th.device().process_free_page_hinting_queue().unwrap() ); } #[test] fn test_process_balloon_queues() { let mut balloon = Balloon::new(0x10, true, 0, true, true).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); let defq = VirtQueue::new(GuestAddress(0), &mem, 16); let hintq = VirtQueue::new(GuestAddress(0), &mem, 16); let reportq = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, infq.create_queue()); balloon.set_queue(DEFLATE_INDEX, defq.create_queue()); balloon.set_queue(balloon.free_page_hinting_idx(), hintq.create_queue()); balloon.set_queue(balloon.free_page_reporting_idx(), reportq.create_queue()); balloon.activate(mem, interrupt).unwrap(); balloon.process_virtio_queues().unwrap(); } #[test] fn test_update_stats_interval() { let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); let mem = default_mem(); let q = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, q.create_queue()); balloon.set_queue(DEFLATE_INDEX, q.create_queue()); let interrupt = default_interrupt(); balloon.activate(mem, interrupt).unwrap(); assert_eq!( format!("{:?}", balloon.update_stats_polling_interval(1)), "Err(StatisticsStateChange)" ); balloon.update_stats_polling_interval(0).unwrap(); let mut balloon = Balloon::new(0, true, 1, false, false).unwrap(); let mem = default_mem(); let q = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, q.create_queue()); balloon.set_queue(DEFLATE_INDEX, q.create_queue()); balloon.set_queue(STATS_INDEX, q.create_queue()); let interrupt = default_interrupt(); balloon.activate(mem, interrupt).unwrap(); assert_eq!( format!("{:?}", balloon.update_stats_polling_interval(0)), "Err(StatisticsStateChange)" ); balloon.update_stats_polling_interval(1).unwrap(); balloon.update_stats_polling_interval(2).unwrap(); } #[test] fn test_cannot_update_inactive_device() { let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); // Assert that we can't update an inactive device. balloon.update_size(1).unwrap_err(); balloon.start_hinting(Default::default()).unwrap_err(); balloon.get_hinting_status().unwrap_err(); balloon.stop_hinting().unwrap_err(); } #[test] fn test_num_pages() { let mut balloon = Balloon::new(0, true, 0, false, false).unwrap(); // Switch the state to active. balloon.device_state = DeviceState::Activated(ActiveState { mem: single_region_mem(32 << 20), interrupt: default_interrupt(), }); assert_eq!(balloon.num_pages(), 0); assert_eq!(balloon.actual_pages(), 0); // Update fields through the API. balloon.update_actual_pages(0x1234); balloon.update_num_pages(0x100); assert_eq!(balloon.num_pages(), 0x100); balloon.update_size(16).unwrap(); let mut actual_config = vec![0; BALLOON_CONFIG_SPACE_SIZE]; balloon.read_config(0, &mut actual_config); assert_eq!( actual_config, vec![0x0, 0x10, 0x0, 0x0, 0x34, 0x12, 0, 0, 0, 0, 0, 0] ); assert_eq!(balloon.num_pages(), 0x1000); assert_eq!(balloon.actual_pages(), 0x1234); assert_eq!(balloon.size_mb(), 16); // Update fields through the config space. let expected_config = vec![0x44, 0x33, 0x22, 0x11, 0x78, 0x56, 0x34, 0x12, 0, 0, 0, 0]; balloon.write_config(0, &expected_config); assert_eq!(balloon.num_pages(), 0x1122_3344); assert_eq!(balloon.actual_pages(), 0x1234_5678); } } ================================================ FILE: src/vmm/src/devices/virtio/balloon/event_handler.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use event_manager::{EventOps, Events, MutEventSubscriber}; use vmm_sys_util::epoll::EventSet; use super::{DEFLATE_INDEX, INFLATE_INDEX, STATS_INDEX, report_balloon_event_fail}; use crate::devices::virtio::balloon::device::Balloon; use crate::devices::virtio::device::VirtioDevice; use crate::logger::{error, warn}; impl Balloon { const PROCESS_ACTIVATE: u32 = 0; const PROCESS_VIRTQ_INFLATE: u32 = 1; const PROCESS_VIRTQ_DEFLATE: u32 = 2; const PROCESS_VIRTQ_STATS: u32 = 3; const PROCESS_STATS_TIMER: u32 = 4; const PROCESS_VIRTQ_FREE_PAGE_HINTING: u32 = 5; const PROCESS_VIRTQ_FREE_PAGE_REPORTING: u32 = 6; fn register_runtime_events(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.queue_evts[INFLATE_INDEX], Self::PROCESS_VIRTQ_INFLATE, EventSet::IN, )) { error!("Failed to register inflate queue event: {}", err); } if let Err(err) = ops.add(Events::with_data( &self.queue_evts[DEFLATE_INDEX], Self::PROCESS_VIRTQ_DEFLATE, EventSet::IN, )) { error!("Failed to register deflate queue event: {}", err); } if self.stats_enabled() { if let Err(err) = ops.add(Events::with_data( &self.queue_evts[STATS_INDEX], Self::PROCESS_VIRTQ_STATS, EventSet::IN, )) { error!("Failed to register stats queue event: {}", err); } if let Err(err) = ops.add(Events::with_data( &self.stats_timer, Self::PROCESS_STATS_TIMER, EventSet::IN, )) { error!("Failed to register stats timerfd event: {}", err); } } if self.free_page_hinting() && let Err(err) = ops.add(Events::with_data( &self.queue_evts[self.free_page_hinting_idx()], Self::PROCESS_VIRTQ_FREE_PAGE_HINTING, EventSet::IN, )) { error!("Failed to register free page hinting queue event: {}", err); } if self.free_page_reporting() && let Err(err) = ops.add(Events::with_data( &self.queue_evts[self.free_page_reporting_idx()], Self::PROCESS_VIRTQ_FREE_PAGE_REPORTING, EventSet::IN, )) { error!( "Failed to register free page reporting queue event: {}", err ); } } fn register_activate_event(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.activate_evt, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("Failed to register activate event: {}", err); } } fn process_activate_event(&self, ops: &mut EventOps) { if let Err(err) = self.activate_evt.read() { error!("Failed to consume balloon activate event: {:?}", err); } self.register_runtime_events(ops); if let Err(err) = ops.remove(Events::with_data( &self.activate_evt, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("Failed to un-register activate event: {}", err); } } } impl MutEventSubscriber for Balloon { fn process(&mut self, event: Events, ops: &mut EventOps) { let source = event.data(); let event_set = event.event_set(); let supported_events = EventSet::IN; if !supported_events.contains(event_set) { warn!( "Received unknown event: {:?} from source: {:?}", event_set, source ); return; } if self.is_activated() { match source { Self::PROCESS_ACTIVATE => self.process_activate_event(ops), Self::PROCESS_VIRTQ_INFLATE => self .process_inflate_queue_event() .unwrap_or_else(report_balloon_event_fail), Self::PROCESS_VIRTQ_DEFLATE => self .process_deflate_queue_event() .unwrap_or_else(report_balloon_event_fail), Self::PROCESS_VIRTQ_STATS => self .process_stats_queue_event() .unwrap_or_else(report_balloon_event_fail), Self::PROCESS_STATS_TIMER => self .process_stats_timer_event() .unwrap_or_else(report_balloon_event_fail), Self::PROCESS_VIRTQ_FREE_PAGE_HINTING => self .process_free_page_hinting_queue_event() .unwrap_or_else(report_balloon_event_fail), Self::PROCESS_VIRTQ_FREE_PAGE_REPORTING => self .process_free_page_reporting_queue_event() .unwrap_or_else(report_balloon_event_fail), _ => { warn!("Balloon: Spurious event received: {:?}", source); } }; } else { warn!( "Balloon: The device is not yet activated. Spurious event received: {:?}", source ); } } fn init(&mut self, ops: &mut EventOps) { // This function can be called during different points in the device lifetime: // - shortly after device creation, // - on device activation (is-activated already true at this point), // - on device restore from snapshot. if self.is_activated() { self.register_runtime_events(ops); } else { self.register_activate_event(ops); } } } #[cfg(test)] pub mod tests { use std::sync::{Arc, Mutex}; use event_manager::{EventManager, SubscriberOps}; use super::*; use crate::devices::virtio::balloon::test_utils::set_request; use crate::devices::virtio::test_utils::{VirtQueue, default_interrupt, default_mem}; use crate::vstate::memory::GuestAddress; #[test] fn test_event_handler() { let mut event_manager = EventManager::new().unwrap(); let mut balloon = Balloon::new(0, true, 10, false, false).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let infq = VirtQueue::new(GuestAddress(0), &mem, 16); balloon.set_queue(INFLATE_INDEX, infq.create_queue()); balloon.set_queue(DEFLATE_INDEX, infq.create_queue()); balloon.set_queue(STATS_INDEX, infq.create_queue()); let balloon = Arc::new(Mutex::new(balloon)); let _id = event_manager.add_subscriber(balloon.clone()); // Push a queue event, use the inflate queue in this test. { let addr = 0x100; set_request(&infq, 0, addr, 4, 0); balloon.lock().unwrap().queue_evts[INFLATE_INDEX] .write(1) .unwrap(); } // EventManager should report no events since balloon has only registered // its activation event so far (even though there is also a queue event pending). let ev_count = event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 0); // Manually force a queue event and check it's ignored pre-activation. { let b = balloon.lock().unwrap(); // Artificially push event. b.queue_evts[INFLATE_INDEX].write(1).unwrap(); // Process the pushed event. let ev_count = event_manager.run_with_timeout(50).unwrap(); // Validate there was no queue operation. assert_eq!(ev_count, 0); assert_eq!(infq.used.idx.get(), 0); } // Now activate the device. balloon .lock() .unwrap() .activate(mem.clone(), interrupt) .unwrap(); // Process the activate event. let ev_count = event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 1); // Handle the previously pushed queue event through EventManager. event_manager .run_with_timeout(100) .expect("Metrics event timeout or error."); // Make sure the data queue advanced. assert_eq!(infq.used.idx.get(), 1); } } ================================================ FILE: src/vmm/src/devices/virtio/balloon/metrics.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the metrics system for balloon devices. //! //! # Metrics format //! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write(). //! //! ## JSON example with metrics: //! ```json //! "balloon": { //! "activate_fails": "SharedIncMetric", //! "inflate_count": "SharedIncMetric", //! "stats_updates_count": "SharedIncMetric", //! ... //! } //! } //! ``` //! Each `balloon` field in the example above is a serializable `BalloonDeviceMetrics` structure //! collecting metrics such as `activate_fails`, `inflate_count` etc. for the balloon device. //! Since balloon doesn't support multiple devices, there is no per device metrics and //! `balloon` represents the aggregate balloon metrics. //! //! # Design //! The main design goals of this system are: //! * Have a consistent approach of keeping device related metrics in the individual devices //! modules. //! * To decouple balloon device metrics from logger module by moving BalloonDeviceMetrics out of //! FirecrackerDeviceMetrics. //! * Rely on `serde` to provide the actual serialization for writing the metrics. //! //! The system implements 1 type of metrics: //! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter //! (i.e the number of times an API request failed). These metrics are reset upon flush. use serde::ser::SerializeMap; use serde::{Serialize, Serializer}; use crate::logger::SharedIncMetric; /// Stores aggregated balloon metrics pub(super) static METRICS: BalloonDeviceMetrics = BalloonDeviceMetrics::new(); /// Called by METRICS.flush(), this function facilitates serialization of balloon device metrics. pub fn flush_metrics(serializer: S) -> Result { let mut seq = serializer.serialize_map(Some(1))?; seq.serialize_entry("balloon", &METRICS)?; seq.end() } /// Balloon Device associated metrics. #[derive(Debug, Serialize)] pub(super) struct BalloonDeviceMetrics { /// Number of times when activate failed on a balloon device. pub activate_fails: SharedIncMetric, /// Number of balloon device inflations. pub inflate_count: SharedIncMetric, // Number of balloon statistics updates from the driver. pub stats_updates_count: SharedIncMetric, // Number of balloon statistics update failures. pub stats_update_fails: SharedIncMetric, /// Number of balloon device deflations. pub deflate_count: SharedIncMetric, /// Number of times when handling events on a balloon device failed. pub event_fails: SharedIncMetric, /// Number of times when free page repoting was triggered pub free_page_report_count: SharedIncMetric, /// Total memory freed by the reporting driver pub free_page_report_freed: SharedIncMetric, /// Number of errors occurred while reporting pub free_page_report_fails: SharedIncMetric, /// Number of times when free page hinting was triggered pub free_page_hint_count: SharedIncMetric, /// Total memory freed by the hinting driver pub free_page_hint_freed: SharedIncMetric, /// Number of errors occurred while hinting pub free_page_hint_fails: SharedIncMetric, } impl BalloonDeviceMetrics { /// Const default construction. const fn new() -> Self { Self { activate_fails: SharedIncMetric::new(), inflate_count: SharedIncMetric::new(), stats_updates_count: SharedIncMetric::new(), stats_update_fails: SharedIncMetric::new(), deflate_count: SharedIncMetric::new(), event_fails: SharedIncMetric::new(), free_page_report_count: SharedIncMetric::new(), free_page_report_freed: SharedIncMetric::new(), free_page_report_fails: SharedIncMetric::new(), free_page_hint_count: SharedIncMetric::new(), free_page_hint_freed: SharedIncMetric::new(), free_page_hint_fails: SharedIncMetric::new(), } } } #[cfg(test)] pub mod tests { use super::*; use crate::logger::IncMetric; #[test] fn test_balloon_dev_metrics() { let balloon_metrics: BalloonDeviceMetrics = BalloonDeviceMetrics::new(); let balloon_metrics_local: String = serde_json::to_string(&balloon_metrics).unwrap(); // the 1st serialize flushes the metrics and resets values to 0 so that // we can compare the values with local metrics. serde_json::to_string(&METRICS).unwrap(); let balloon_metrics_global: String = serde_json::to_string(&METRICS).unwrap(); assert_eq!(balloon_metrics_local, balloon_metrics_global); balloon_metrics.inflate_count.inc(); assert_eq!(balloon_metrics.inflate_count.count(), 1); } } ================================================ FILE: src/vmm/src/devices/virtio/balloon/mod.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Implements a virtio balloon device. pub mod device; mod event_handler; pub mod metrics; pub mod persist; pub mod test_utils; mod util; use log::error; pub use self::device::{Balloon, BalloonConfig, BalloonStats}; use super::queue::{InvalidAvailIdx, QueueError}; use crate::devices::virtio::balloon::metrics::METRICS; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::logger::IncMetric; use crate::vstate::interrupts::InterruptError; /// Device ID used in MMIO device identification. /// Because Balloon is unique per-vm, this ID can be hardcoded. pub const BALLOON_DEV_ID: &str = "balloon"; /// The size of the config space. pub const BALLOON_CONFIG_SPACE_SIZE: usize = 12; /// Min number of virtio queues. pub const BALLOON_MIN_NUM_QUEUES: usize = 2; /// Virtio queue size, in number of descriptor chain heads. pub const BALLOON_QUEUE_SIZE: u16 = FIRECRACKER_MAX_QUEUE_SIZE; // Number of 4K pages in a MiB. pub const MIB_TO_4K_PAGES: u32 = 256; /// The maximum number of pages that can be received in a single descriptor. pub const MAX_PAGES_IN_DESC: usize = 256; /// The maximum number of pages that can be compacted into ranges during process_inflate(). /// Needs to be a multiple of MAX_PAGES_IN_DESC. pub const MAX_PAGE_COMPACT_BUFFER: usize = 2048; /// The addresses given by the driver are divided by 4096. pub const VIRTIO_BALLOON_PFN_SHIFT: u32 = 12; /// The index of the inflate queue from Balloon device queues/queues_evts vector. pub const INFLATE_INDEX: usize = 0; /// The index of the deflate queue from Balloon device queues/queues_evts vector. pub const DEFLATE_INDEX: usize = 1; /// The index of the stats queue from Balloon device queues/queues_evts vector. pub const STATS_INDEX: usize = 2; /// Command used in free page hinting to indicate the guest has finished pub const FREE_PAGE_HINT_STOP: u32 = 0; /// Command used in free page hinting to indicate to the guest to release pages pub const FREE_PAGE_HINT_DONE: u32 = 1; // The feature bitmap for virtio balloon. const VIRTIO_BALLOON_F_STATS_VQ: u32 = 1; // Enable statistics. const VIRTIO_BALLOON_F_DEFLATE_ON_OOM: u32 = 2; // Deflate balloon on OOM. const VIRTIO_BALLOON_F_FREE_PAGE_HINTING: u32 = 3; // Enable free page hinting const VIRTIO_BALLOON_F_FREE_PAGE_REPORTING: u32 = 5; // Enable free page reporting // The statistics tags. defined in linux "include/uapi/linux/virtio_balloon.h". const VIRTIO_BALLOON_S_SWAP_IN: u16 = 0; const VIRTIO_BALLOON_S_SWAP_OUT: u16 = 1; const VIRTIO_BALLOON_S_MAJFLT: u16 = 2; const VIRTIO_BALLOON_S_MINFLT: u16 = 3; const VIRTIO_BALLOON_S_MEMFREE: u16 = 4; const VIRTIO_BALLOON_S_MEMTOT: u16 = 5; const VIRTIO_BALLOON_S_AVAIL: u16 = 6; const VIRTIO_BALLOON_S_CACHES: u16 = 7; const VIRTIO_BALLOON_S_HTLB_PGALLOC: u16 = 8; const VIRTIO_BALLOON_S_HTLB_PGFAIL: u16 = 9; const VIRTIO_BALLOON_S_OOM_KILL: u16 = 10; const VIRTIO_BALLOON_S_ALLOC_STALL: u16 = 11; const VIRTIO_BALLOON_S_ASYNC_SCAN: u16 = 12; const VIRTIO_BALLOON_S_DIRECT_SCAN: u16 = 13; const VIRTIO_BALLOON_S_ASYNC_RECLAIM: u16 = 14; const VIRTIO_BALLOON_S_DIRECT_RECLAIM: u16 = 15; /// Balloon device related errors. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum BalloonError { /// Device not activated yet. DeviceNotActive, /// Attempting to use hinting when not enabled HintingNotEnabled, /// EventFd error: {0} EventFd(std::io::Error), /// Received error while sending an interrupt: {0} InterruptError(InterruptError), /// Guest gave us a malformed descriptor. MalformedDescriptor, /// Guest gave us a malformed payload. MalformedPayload, /// Error restoring the balloon device queues. QueueRestoreError, /// Received stats query when stats are disabled. StatisticsDisabled, /// Statistics cannot be enabled/disabled after activation. StatisticsStateChange, /// Requested memory should be less than {0}MiB TooMuchMemoryRequested(u32), /// Error while processing the virt queues: {0} Queue(#[from] QueueError), /// {0} InvalidAvailIdx(#[from] InvalidAvailIdx), } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum RemoveRegionError { /// Address translation error. AddressTranslation, /// Malformed guest address range. MalformedRange, /// Error calling madvise: {0} MadviseFail(std::io::Error), /// Error calling mmap: {0} MmapFail(std::io::Error), /// Region not found. RegionNotFound, } pub(super) fn report_balloon_event_fail(err: BalloonError) { if let BalloonError::InvalidAvailIdx(err) = err { panic!("{}", err); } error!("{:?}", err); METRICS.event_fails.inc(); } ================================================ FILE: src/vmm/src/devices/virtio/balloon/persist.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the structures needed for saving/restoring balloon devices. use std::sync::Arc; use std::time::Duration; use serde::{Deserialize, Serialize}; use super::*; use crate::devices::virtio::balloon::device::{BalloonStats, ConfigSpace, HintingState}; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDeviceType}; use crate::devices::virtio::persist::VirtioDeviceState; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::devices::virtio::transport::VirtioInterrupt; use crate::snapshot::Persist; use crate::vstate::memory::GuestMemoryMmap; /// Information about the balloon config's that are saved /// at snapshot. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BalloonConfigSpaceState { num_pages: u32, actual_pages: u32, } /// Information about the balloon stats that are saved /// at snapshot. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BalloonStatsState { swap_in: Option, swap_out: Option, major_faults: Option, minor_faults: Option, free_memory: Option, total_memory: Option, available_memory: Option, disk_caches: Option, hugetlb_allocations: Option, hugetlb_failures: Option, oom_kill: Option, alloc_stall: Option, async_scan: Option, direct_scan: Option, async_reclaim: Option, direct_reclaim: Option, } impl BalloonStatsState { fn from_stats(stats: &BalloonStats) -> Self { Self { swap_in: stats.swap_in, swap_out: stats.swap_out, major_faults: stats.major_faults, minor_faults: stats.minor_faults, free_memory: stats.free_memory, total_memory: stats.total_memory, available_memory: stats.available_memory, disk_caches: stats.disk_caches, hugetlb_allocations: stats.hugetlb_allocations, hugetlb_failures: stats.hugetlb_failures, oom_kill: stats.oom_kill, alloc_stall: stats.alloc_stall, async_scan: stats.async_scan, direct_scan: stats.direct_scan, async_reclaim: stats.async_reclaim, direct_reclaim: stats.direct_reclaim, } } fn create_stats(&self) -> BalloonStats { BalloonStats { target_pages: 0, actual_pages: 0, target_mib: 0, actual_mib: 0, swap_in: self.swap_in, swap_out: self.swap_out, major_faults: self.major_faults, minor_faults: self.minor_faults, free_memory: self.free_memory, total_memory: self.total_memory, available_memory: self.available_memory, disk_caches: self.disk_caches, hugetlb_allocations: self.hugetlb_allocations, hugetlb_failures: self.hugetlb_failures, oom_kill: self.oom_kill, alloc_stall: self.alloc_stall, async_scan: self.async_scan, direct_scan: self.direct_scan, async_reclaim: self.async_reclaim, direct_reclaim: self.direct_reclaim, } } } /// Information about the balloon that are saved /// at snapshot. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BalloonState { stats_polling_interval_s: u16, stats_desc_index: Option, latest_stats: BalloonStatsState, config_space: BalloonConfigSpaceState, hinting_state: HintingState, pub virtio_state: VirtioDeviceState, } /// Auxiliary structure for creating a device when resuming from a snapshot. #[derive(Debug)] pub struct BalloonConstructorArgs { /// Pointer to guest memory. pub mem: GuestMemoryMmap, } impl Persist<'_> for Balloon { type State = BalloonState; type ConstructorArgs = BalloonConstructorArgs; type Error = super::BalloonError; fn save(&self) -> Self::State { BalloonState { stats_polling_interval_s: self.stats_polling_interval_s, stats_desc_index: self.stats_desc_index, latest_stats: BalloonStatsState::from_stats(&self.latest_stats), hinting_state: self.hinting_state, config_space: BalloonConfigSpaceState { num_pages: self.config_space.num_pages, actual_pages: self.config_space.actual_pages, }, virtio_state: VirtioDeviceState::from_device(self), } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let free_page_hinting = state.virtio_state.avail_features & (1u64 << VIRTIO_BALLOON_F_FREE_PAGE_HINTING) != 0; let free_page_reporting = state.virtio_state.avail_features & (1u64 << VIRTIO_BALLOON_F_FREE_PAGE_REPORTING) != 0; // We can safely create the balloon with arbitrary flags and // num_pages because we will overwrite them after. let mut balloon = Balloon::new( 0, false, state.stats_polling_interval_s, free_page_hinting, free_page_reporting, )?; let mut num_queues = BALLOON_MIN_NUM_QUEUES; // As per the virtio 1.1 specification, the statistics queue // should not exist if the statistics are not enabled. if state.stats_polling_interval_s > 0 { num_queues += 1; } if free_page_hinting { num_queues += 1; } if free_page_reporting { num_queues += 1; } balloon.queues = state .virtio_state .build_queues_checked( &constructor_args.mem, VirtioDeviceType::Balloon, num_queues, FIRECRACKER_MAX_QUEUE_SIZE, ) .map_err(|_| Self::Error::QueueRestoreError)?; balloon.avail_features = state.virtio_state.avail_features; balloon.acked_features = state.virtio_state.acked_features; balloon.latest_stats = state.latest_stats.create_stats(); balloon.config_space = ConfigSpace { num_pages: state.config_space.num_pages, actual_pages: state.config_space.actual_pages, // On restore allow the guest to reclaim pages free_page_hint_cmd_id: FREE_PAGE_HINT_DONE, }; balloon.hinting_state = state.hinting_state; if state.virtio_state.activated && balloon.stats_enabled() { // Restore the stats descriptor. balloon.set_stats_desc_index(state.stats_desc_index); // Restart timer if needed. let duration = Duration::from_secs(state.stats_polling_interval_s as u64); balloon.stats_timer.arm(duration, Some(duration)); } Ok(balloon) } } #[cfg(test)] mod tests { use super::*; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::test_utils::{default_interrupt, default_mem}; #[test] fn test_persistence() { let guest_mem = default_mem(); // Create and save the balloon device. let balloon = Balloon::new(0x42, false, 2, false, false).unwrap(); let balloon_state = balloon.save(); let serialized_data = bitcode::serialize(&balloon_state).unwrap(); // Deserialize and restore the balloon device. let restored_state = bitcode::deserialize(&serialized_data).unwrap(); let restored_balloon = Balloon::restore(BalloonConstructorArgs { mem: guest_mem }, &restored_state).unwrap(); assert_eq!(restored_balloon.device_type(), VirtioDeviceType::Balloon); assert_eq!(restored_balloon.acked_features, balloon.acked_features); assert_eq!(restored_balloon.avail_features, balloon.avail_features); assert_eq!( restored_balloon.config_space.num_pages, balloon.config_space.num_pages ); assert_eq!( restored_balloon.config_space.actual_pages, balloon.config_space.actual_pages ); assert_eq!( restored_balloon.config_space.free_page_hint_cmd_id, FREE_PAGE_HINT_DONE ); assert_eq!(restored_balloon.queues(), balloon.queues()); assert!(!restored_balloon.is_activated()); assert!(!balloon.is_activated()); assert_eq!( restored_balloon.stats_polling_interval_s, balloon.stats_polling_interval_s ); assert_eq!(restored_balloon.stats_desc_index, balloon.stats_desc_index); assert_eq!(restored_balloon.latest_stats, balloon.latest_stats); } } ================================================ FILE: src/vmm/src/devices/virtio/balloon/test_utils.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![doc(hidden)] #[cfg(test)] use crate::devices::virtio::balloon::Balloon; #[cfg(test)] use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::test_utils::VirtQueue; #[cfg(test)] /// Max number of virtio queues. const BALLOON_MAX_NUM_QUEUES: usize = 5; #[cfg(test)] pub fn invoke_handler_for_queue_event(b: &mut Balloon, queue_index: usize) { use crate::devices::virtio::balloon::{DEFLATE_INDEX, INFLATE_INDEX, STATS_INDEX}; use crate::devices::virtio::transport::VirtioInterruptType; let hinting_idx = b.free_page_hinting_idx(); let reporting_idx = b.free_page_reporting_idx(); assert!(queue_index < BALLOON_MAX_NUM_QUEUES); // Trigger the queue event. b.queue_evts[queue_index].write(1).unwrap(); // Handle event. // Reporting -> hinting -> stats ordering is important as they will change // depending on enabled features match queue_index { INFLATE_INDEX => b.process_inflate_queue_event().unwrap(), DEFLATE_INDEX => b.process_deflate_queue_event().unwrap(), reporting_idx if b.free_page_reporting() => { b.process_free_page_reporting_queue_event().unwrap() } hinting_idx if b.free_page_hinting() => b.process_free_page_hinting_queue_event().unwrap(), STATS_INDEX => b.process_stats_queue_event().unwrap(), _ => unreachable!(), }; // Validate the queue operation finished successfully. let interrupt = b.interrupt_trigger(); assert!( interrupt .has_pending_interrupt(VirtioInterruptType::Queue(queue_index.try_into().unwrap())) ); interrupt.ack_interrupt(VirtioInterruptType::Queue(queue_index.try_into().unwrap())); } pub fn set_request(queue: &VirtQueue, idx: u16, addr: u64, len: u32, flags: u16) { // Set the index of the next request. queue.avail.idx.set(idx + 1); // Set the current descriptor table entry index. queue.avail.ring[idx as usize].set(idx); // Set the current descriptor table entry. queue.dtable[idx as usize].set(addr, len, flags, 1); } pub fn check_request_completion(queue: &VirtQueue, idx: usize) { // Check that the next used will be idx + 1. assert_eq!(queue.used.idx.get() as usize, idx + 1); // Check that the current used is idx. assert_eq!(queue.used.ring[idx].get().id as usize, idx); // The length of the completed request is 0. assert_eq!(queue.used.ring[idx].get().len, 0); } ================================================ FILE: src/vmm/src/devices/virtio/balloon/util.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::io; use super::{MAX_PAGE_COMPACT_BUFFER, RemoveRegionError}; use crate::logger::error; use crate::utils::u64_to_usize; use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; /// This takes a vector of page frame numbers, and compacts them /// into ranges of consecutive pages. The result is a vector /// of (start_page_frame_number, range_length) pairs. pub(crate) fn compact_page_frame_numbers(v: &mut [u32]) -> Vec<(u32, u32)> { if v.is_empty() { return vec![]; } // Since the total number of pages that can be // received at once is `MAX_PAGE_COMPACT_BUFFER`, // this sort does not change the complexity of handling // an inflation. v.sort_unstable(); // Since there are at most `MAX_PAGE_COMPACT_BUFFER` pages, setting the // capacity of `result` to this makes sense. let mut result = Vec::with_capacity(MAX_PAGE_COMPACT_BUFFER); // The most recent range of pages is [previous..previous + length). let mut previous = 0; let mut length = 1; for pfn_index in 1..v.len() { let page_frame_number = v[pfn_index]; // Skip duplicate pages. This will ensure we only consider // distinct PFNs. if page_frame_number == v[pfn_index - 1] { error!("Skipping duplicate PFN {}.", page_frame_number); continue; } // Check if the current page frame number is adjacent to the most recent page range. // This operation will never overflow because for whatever value `v[previous]` // has in the u32 range, we know there are at least `length` consecutive numbers // greater than it in the array (the greatest so far being `page_frame_number`), // since `v[previous]` is before all of them in the sorted array and `length` // was incremented for each consecutive one. This is true only because we skip // duplicates. if page_frame_number == v[previous] + length { // If so, extend that range. length += 1; } else { // Otherwise, push (previous, length) to the result vector. result.push((v[previous], length)); // And update the most recent range of pages. previous = pfn_index; length = 1; } } // Don't forget to push the last range to the result. result.push((v[previous], length)); result } #[cfg(test)] mod tests { use std::fmt::Debug; use super::*; use crate::vstate::memory::Bytes; /// This asserts that $lhs matches $rhs. macro_rules! assert_match { ($lhs:expr, $rhs:pat) => {{ assert!(matches!($lhs, $rhs)) }}; } #[test] fn test_compact_page_indices() { // Test empty input. assert!(compact_page_frame_numbers(&mut []).is_empty()); // Test single compact range. assert_eq!( compact_page_frame_numbers((0_u32..100_u32).collect::>().as_mut_slice()), vec![(0, 100)] ); // `compact_page_frame_numbers` works even when given out of order input. assert_eq!( compact_page_frame_numbers((0_u32..100_u32).rev().collect::>().as_mut_slice()), vec![(0, 100)] ); // Test with 100 distinct ranges. assert_eq!( compact_page_frame_numbers( &mut (0_u32..10000_u32) .step_by(100) .flat_map(|x| (x..x + 10).rev()) .collect::>() ), (0_u32..10000_u32) .step_by(100) .map(|x| (x, 10_u32)) .collect::>() ); // Test range with duplicates. assert_eq!( compact_page_frame_numbers( &mut (0_u32..10000_u32).map(|x| x / 2).collect::>() ), vec![(0, 5000)] ); // Test there is no overflow when there are duplicate max values. assert_eq!( compact_page_frame_numbers(&mut [u32::MAX, u32::MAX]), vec![(u32::MAX, 1)] ); } /// ------------------------------------- /// BEGIN PROPERTY BASED TESTING use proptest::prelude::*; use crate::test_utils::single_region_mem; #[allow(clippy::let_with_type_underscore)] fn random_pfn_u32_max() -> impl Strategy> { // Create a randomly sized vec (max MAX_PAGE_COMPACT_BUFFER elements) filled with random u32 // elements. prop::collection::vec(0..u32::MAX, 0..MAX_PAGE_COMPACT_BUFFER) } #[allow(clippy::let_with_type_underscore)] fn random_pfn_100() -> impl Strategy> { // Create a randomly sized vec (max MAX_PAGE_COMPACT_BUFFER/8) filled with random u32 // elements (0 - 100). prop::collection::vec(0..100u32, 0..MAX_PAGE_COMPACT_BUFFER / 8) } // The uncompactor will output deduplicated and sorted elements as compaction algorithm // guarantees it. fn uncompact(compacted: Vec<(u32, u32)>) -> Vec { let mut result = Vec::new(); for (start, len) in compacted { result.extend(start..start + len); } result } fn sort_and_dedup(v: &[T]) -> Vec { let mut sorted_v = v.to_vec(); sorted_v.sort_unstable(); sorted_v.dedup(); sorted_v } // The below prop tests will validate the following output properties: // - vec elements are sorted by first tuple value // - no pfn duplicates are present // - no pfn is lost #[test] fn test_pfn_compact() { let cfg = ProptestConfig::with_cases(1500); proptest!(cfg, |(mut input1 in random_pfn_u32_max(), mut input2 in random_pfn_100())| { // The uncompactor will output sorted elements. prop_assert!( uncompact(compact_page_frame_numbers(input1.as_mut_slice())) == sort_and_dedup(input1.as_slice()) ); // Input2 will ensure duplicate PFN cases are also covered. prop_assert!( uncompact(compact_page_frame_numbers(input2.as_mut_slice())) == sort_and_dedup(input2.as_slice()) ); }); } } ================================================ FILE: src/vmm/src/devices/virtio/block/device.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::sync::Arc; use event_manager::{EventOps, Events, MutEventSubscriber}; use log::info; use vmm_sys_util::eventfd::EventFd; use super::BlockError; use super::persist::{BlockConstructorArgs, BlockState}; use super::vhost_user::device::{VhostUserBlock, VhostUserBlockConfig}; use super::virtio::device::{VirtioBlock, VirtioBlockConfig}; use crate::devices::virtio::ActivateError; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::queue::{InvalidAvailIdx, Queue}; use crate::devices::virtio::transport::VirtioInterrupt; use crate::impl_device_type; use crate::rate_limiter::BucketUpdate; use crate::snapshot::Persist; use crate::vmm_config::drive::BlockDeviceConfig; use crate::vstate::memory::GuestMemoryMmap; // Clippy thinks that values of the enum are too different in size. #[allow(clippy::large_enum_variant)] #[derive(Debug)] pub enum Block { Virtio(VirtioBlock), VhostUser(VhostUserBlock), } impl Block { pub fn new(config: BlockDeviceConfig) -> Result { if let Ok(config) = VirtioBlockConfig::try_from(&config) { Ok(Self::Virtio( VirtioBlock::new(config).map_err(BlockError::VirtioBackend)?, )) } else if let Ok(config) = VhostUserBlockConfig::try_from(&config) { Ok(Self::VhostUser( VhostUserBlock::new(config).map_err(BlockError::VhostUserBackend)?, )) } else { Err(BlockError::InvalidBlockConfig) } } pub fn config(&self) -> BlockDeviceConfig { match self { Self::Virtio(b) => b.config().into(), Self::VhostUser(b) => b.config().into(), } } pub fn update_disk_image(&mut self, disk_image_path: String) -> Result<(), BlockError> { match self { Self::Virtio(b) => b .update_disk_image(disk_image_path) .map_err(BlockError::VirtioBackend), Self::VhostUser(_) => Err(BlockError::InvalidBlockBackend), } } pub fn update_rate_limiter( &mut self, bytes: BucketUpdate, ops: BucketUpdate, ) -> Result<(), BlockError> { match self { Self::Virtio(b) => { b.update_rate_limiter(bytes, ops); Ok(()) } Self::VhostUser(_) => Err(BlockError::InvalidBlockBackend), } } pub fn update_config(&mut self) -> Result<(), BlockError> { match self { Self::Virtio(_) => Err(BlockError::InvalidBlockBackend), Self::VhostUser(b) => b.config_update().map_err(BlockError::VhostUserBackend), } } pub fn process_virtio_queues(&mut self) -> Result<(), InvalidAvailIdx> { match self { Self::Virtio(b) => b.process_virtio_queues(), Self::VhostUser(_) => Ok(()), } } pub fn root_device(&self) -> bool { match self { Self::Virtio(b) => b.root_device, Self::VhostUser(b) => b.root_device, } } pub fn read_only(&self) -> bool { match self { Self::Virtio(b) => b.read_only, Self::VhostUser(b) => b.read_only, } } pub fn partuuid(&self) -> &Option { match self { Self::Virtio(b) => &b.partuuid, Self::VhostUser(b) => &b.partuuid, } } pub fn is_vhost_user(&self) -> bool { match self { Self::Virtio(_) => false, Self::VhostUser(_) => true, } } } impl VirtioDevice for Block { impl_device_type!(VirtioDeviceType::Block); fn id(&self) -> &str { match self { Self::Virtio(b) => b.id(), Self::VhostUser(b) => b.id(), } } fn avail_features(&self) -> u64 { match self { Self::Virtio(b) => b.avail_features, Self::VhostUser(b) => b.avail_features, } } fn acked_features(&self) -> u64 { match self { Self::Virtio(b) => b.acked_features, Self::VhostUser(b) => b.acked_features, } } fn set_acked_features(&mut self, acked_features: u64) { match self { Self::Virtio(b) => b.acked_features = acked_features, Self::VhostUser(b) => b.acked_features = acked_features, } } fn queues(&self) -> &[Queue] { match self { Self::Virtio(b) => &b.queues, Self::VhostUser(b) => &b.queues, } } fn queues_mut(&mut self) -> &mut [Queue] { match self { Self::Virtio(b) => &mut b.queues, Self::VhostUser(b) => &mut b.queues, } } fn queue_events(&self) -> &[EventFd] { match self { Self::Virtio(b) => &b.queue_evts, Self::VhostUser(b) => &b.queue_evts, } } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { match self { Self::Virtio(b) => b.interrupt_trigger(), Self::VhostUser(b) => b.interrupt_trigger(), } } fn read_config(&self, offset: u64, data: &mut [u8]) { match self { Self::Virtio(b) => b.read_config(offset, data), Self::VhostUser(b) => b.read_config(offset, data), } } fn write_config(&mut self, offset: u64, data: &[u8]) { match self { Self::Virtio(b) => b.write_config(offset, data), Self::VhostUser(b) => b.write_config(offset, data), } } fn activate( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError> { match self { Self::Virtio(b) => b.activate(mem, interrupt), Self::VhostUser(b) => b.activate(mem, interrupt), } } fn is_activated(&self) -> bool { match self { Self::Virtio(b) => b.device_state.is_activated(), Self::VhostUser(b) => b.device_state.is_activated(), } } fn prepare_save(&mut self) { match self { Self::Virtio(b) => b.prepare_save(), Self::VhostUser(b) => b.prepare_save(), } } } impl MutEventSubscriber for Block { fn process(&mut self, event: Events, ops: &mut EventOps) { match self { Self::Virtio(b) => b.process(event, ops), Self::VhostUser(b) => b.process(event, ops), } } fn init(&mut self, ops: &mut EventOps) { match self { Self::Virtio(b) => b.init(ops), Self::VhostUser(b) => b.init(ops), } } } impl Persist<'_> for Block { type State = BlockState; type ConstructorArgs = BlockConstructorArgs; type Error = BlockError; fn save(&self) -> Self::State { match self { Self::Virtio(b) => BlockState::Virtio(b.save()), Self::VhostUser(b) => BlockState::VhostUser(b.save()), } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { match state { BlockState::Virtio(s) => Ok(Self::Virtio( VirtioBlock::restore(constructor_args, s).map_err(BlockError::VirtioBackend)?, )), BlockState::VhostUser(s) => Ok(Self::VhostUser( VhostUserBlock::restore(constructor_args, s) .map_err(BlockError::VhostUserBackend)?, )), } } } ================================================ FILE: src/vmm/src/devices/virtio/block/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use serde::{Deserialize, Serialize}; use self::vhost_user::VhostUserBlockError; use self::virtio::VirtioBlockError; pub mod device; pub mod persist; pub mod vhost_user; pub mod virtio; /// Configuration options for disk caching. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] pub enum CacheType { /// Flushing mechanic not will be advertised to the guest driver #[default] Unsafe, /// Flushing mechanic will be advertised to the guest driver and /// flush requests coming from the guest will be performed using /// `fsync`. Writeback, } /// Errors the block device can trigger. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum BlockError { /// Invalid block config. InvalidBlockConfig, /// Running method expected different backend. InvalidBlockBackend, /// Can not restore any backend. BackendRestore, /// Virtio backend error: {0} VirtioBackend(VirtioBlockError), /// Vhost user backend error: {0} VhostUserBackend(VhostUserBlockError), } ================================================ FILE: src/vmm/src/devices/virtio/block/persist.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::sync::Arc; use serde::{Deserialize, Serialize}; use super::vhost_user::persist::VhostUserBlockState; use super::virtio::persist::VirtioBlockState; use crate::devices::virtio::transport::VirtioInterrupt; use crate::vstate::memory::GuestMemoryMmap; /// Block device state. #[derive(Debug, Clone, Serialize, Deserialize)] pub enum BlockState { Virtio(VirtioBlockState), VhostUser(VhostUserBlockState), } impl BlockState { pub fn is_activated(&self) -> bool { match self { BlockState::Virtio(virtio_block_state) => virtio_block_state.virtio_state.activated, BlockState::VhostUser(vhost_user_block_state) => false, } } } /// Auxiliary structure for creating a device when resuming from a snapshot. #[derive(Debug)] pub struct BlockConstructorArgs { pub mem: GuestMemoryMmap, } ================================================ FILE: src/vmm/src/devices/virtio/block/vhost_user/device.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // Portions Copyright 2019 Intel Corporation. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::ops::Deref; use std::sync::Arc; use log::error; use utils::time::{ClockType, get_time_us}; use vhost::vhost_user::Frontend; use vhost::vhost_user::message::*; use vmm_sys_util::eventfd::EventFd; use super::{NUM_QUEUES, QUEUE_SIZE, VhostUserBlockError}; use crate::MutEventSubscriber; use crate::devices::virtio::ActivateError; use crate::devices::virtio::block::CacheType; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_blk::{VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_RO}; use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1; use crate::devices::virtio::generated::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use crate::devices::virtio::queue::Queue; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::devices::virtio::vhost_user::{VhostUserHandleBackend, VhostUserHandleImpl}; use crate::devices::virtio::vhost_user_metrics::{ VhostUserDeviceMetrics, VhostUserMetricsPerDevice, }; use crate::impl_device_type; use crate::logger::{IncMetric, StoreMetric, log_dev_preview_warning}; use crate::utils::u64_to_usize; use crate::vmm_config::drive::BlockDeviceConfig; use crate::vstate::memory::GuestMemoryMmap; /// Block device config space size in bytes. const BLOCK_CONFIG_SPACE_SIZE: u32 = 60; const AVAILABLE_FEATURES: u64 = (1 << VIRTIO_F_VERSION_1) | (1 << VIRTIO_RING_F_EVENT_IDX) // vhost-user specific bit. Not defined in standard virtio spec. // Specifies ability of frontend to negotiate protocol features. | VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() // We always try to negotiate readonly with the backend. // If the backend is configured as readonly, we will accept it. | (1 << VIRTIO_BLK_F_RO); /// Use this structure to set up the Block Device before booting the kernel. #[derive(Debug, PartialEq, Eq)] pub struct VhostUserBlockConfig { /// Unique identifier of the drive. pub drive_id: String, /// Part-UUID. Represents the unique id of the boot partition of this device. It is /// optional and it will be used only if the `is_root_device` field is true. pub partuuid: Option, /// If set to true, it makes the current device the root block device. /// Setting this flag to true will mount the block device in the /// guest under /dev/vda unless the partuuid is present. pub is_root_device: bool, /// If set to true, the drive will ignore flush requests coming from /// the guest driver. pub cache_type: CacheType, /// Socket path of the vhost-user process pub socket: String, } impl TryFrom<&BlockDeviceConfig> for VhostUserBlockConfig { type Error = VhostUserBlockError; fn try_from(value: &BlockDeviceConfig) -> Result { if let (Some(socket), None, None, None, None) = ( &value.socket, &value.is_read_only, &value.path_on_host, &value.rate_limiter, &value.file_engine_type, ) { Ok(Self { drive_id: value.drive_id.clone(), partuuid: value.partuuid.clone(), is_root_device: value.is_root_device, cache_type: value.cache_type, socket: socket.clone(), }) } else { Err(VhostUserBlockError::Config) } } } impl From for BlockDeviceConfig { fn from(value: VhostUserBlockConfig) -> Self { Self { drive_id: value.drive_id, partuuid: value.partuuid, is_root_device: value.is_root_device, cache_type: value.cache_type, is_read_only: None, path_on_host: None, rate_limiter: None, file_engine_type: None, socket: Some(value.socket), } } } pub type VhostUserBlock = VhostUserBlockImpl; /// vhost-user block device. pub struct VhostUserBlockImpl { // Virtio fields. pub avail_features: u64, pub acked_features: u64, pub config_space: Vec, pub activate_evt: EventFd, // Transport related fields. pub queues: Vec, pub queue_evts: [EventFd; u64_to_usize(NUM_QUEUES)], pub device_state: DeviceState, // Implementation specific fields. pub id: String, pub partuuid: Option, pub cache_type: CacheType, pub root_device: bool, pub read_only: bool, // Vhost user protocol handle pub vu_handle: VhostUserHandleImpl, pub vu_acked_protocol_features: u64, pub metrics: Arc, } // Need custom implementation because otherwise `Debug` is required for `vhost::Master` impl std::fmt::Debug for VhostUserBlockImpl { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("VhostUserBlockImpl") .field("avail_features", &self.avail_features) .field("acked_features", &self.acked_features) .field("config_space", &self.config_space) .field("activate_evt", &self.activate_evt) .field("queues", &self.queues) .field("queue_evts", &self.queue_evts) .field("device_state", &self.device_state) .field("id", &self.id) .field("partuuid", &self.partuuid) .field("cache_type", &self.cache_type) .field("root_device", &self.root_device) .field("read_only", &self.read_only) .field("vu_handle", &self.vu_handle) .field( "vu_acked_protocol_features", &self.vu_acked_protocol_features, ) .field("metrics", &self.metrics) .finish() } } impl VhostUserBlockImpl { pub fn new(config: VhostUserBlockConfig) -> Result { log_dev_preview_warning("vhost-user-blk device", Option::None); let start_time = get_time_us(ClockType::Monotonic); let mut requested_features = AVAILABLE_FEATURES; if config.cache_type == CacheType::Writeback { requested_features |= 1 << VIRTIO_BLK_F_FLUSH; } let requested_protocol_features = VhostUserProtocolFeatures::CONFIG; let mut vu_handle = VhostUserHandleImpl::::new(&config.socket, NUM_QUEUES) .map_err(VhostUserBlockError::VhostUser)?; let (acked_features, acked_protocol_features) = vu_handle .negotiate_features(requested_features, requested_protocol_features) .map_err(VhostUserBlockError::VhostUser)?; // Get config from backend if CONFIG is acked or use empty buffer. let config_space = if acked_protocol_features & VhostUserProtocolFeatures::CONFIG.bits() != 0 { // This buffer is used for config size check in vhost crate. let buffer = [0u8; BLOCK_CONFIG_SPACE_SIZE as usize]; let (_, new_config_space) = vu_handle .vu .get_config( 0, BLOCK_CONFIG_SPACE_SIZE, VhostUserConfigFlags::WRITABLE, &buffer, ) .map_err(VhostUserBlockError::Vhost)?; new_config_space } else { vec![] }; let activate_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(VhostUserBlockError::EventFd)?; let queues = vec![Queue::new(QUEUE_SIZE)]; let queue_evts = [EventFd::new(libc::EFD_NONBLOCK).map_err(VhostUserBlockError::EventFd)?; u64_to_usize(NUM_QUEUES)]; let device_state = DeviceState::Inactive; // We negotiated features with backend. Now these acked_features // are available for guest driver to choose from. let avail_features = acked_features; let acked_features = acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); let read_only = acked_features & (1 << VIRTIO_BLK_F_RO) != 0; let vhost_user_block_metrics_name = format!("block_{}", config.drive_id); let metrics = VhostUserMetricsPerDevice::alloc(vhost_user_block_metrics_name); let delta_us = get_time_us(ClockType::Monotonic) - start_time; metrics.init_time_us.store(delta_us); Ok(Self { avail_features, acked_features, config_space, activate_evt, queues, queue_evts, device_state, id: config.drive_id, partuuid: config.partuuid, cache_type: config.cache_type, read_only, root_device: config.is_root_device, vu_handle, vu_acked_protocol_features: acked_protocol_features, metrics, }) } /// Prepare device for being snapshotted. pub fn prepare_save(&mut self) { unimplemented!("VhostUserBlock does not support snapshotting yet"); } pub fn config(&self) -> VhostUserBlockConfig { VhostUserBlockConfig { drive_id: self.id.clone(), partuuid: self.partuuid.clone(), is_root_device: self.root_device, cache_type: self.cache_type, socket: self.vu_handle.socket_path.clone(), } } pub fn config_update(&mut self) -> Result<(), VhostUserBlockError> { let start_time = get_time_us(ClockType::Monotonic); let interrupt = self .device_state .active_state() .expect("Device is not initialized") .interrupt .clone(); // This buffer is used for config size check in vhost crate. let buffer = [0u8; BLOCK_CONFIG_SPACE_SIZE as usize]; let (_, new_config_space) = self .vu_handle .vu .get_config( 0, BLOCK_CONFIG_SPACE_SIZE, VhostUserConfigFlags::WRITABLE, &buffer, ) .map_err(VhostUserBlockError::Vhost)?; self.config_space = new_config_space; interrupt .trigger(VirtioInterruptType::Config) .map_err(VhostUserBlockError::Interrupt)?; let delta_us = get_time_us(ClockType::Monotonic) - start_time; self.metrics.config_change_time_us.store(delta_us); Ok(()) } } impl VirtioDevice for VhostUserBlockImpl where VhostUserBlockImpl: MutEventSubscriber, { impl_device_type!(VirtioDeviceType::Block); fn id(&self) -> &str { &self.id } fn avail_features(&self) -> u64 { self.avail_features } fn acked_features(&self) -> u64 { self.acked_features } fn set_acked_features(&mut self, acked_features: u64) { self.acked_features = acked_features; } fn queues(&self) -> &[Queue] { &self.queues } fn queues_mut(&mut self) -> &mut [Queue] { &mut self.queues } fn queue_events(&self) -> &[EventFd] { &self.queue_evts } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { self.device_state .active_state() .expect("Device is not initialized") .interrupt .deref() } fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); data[..len].copy_from_slice(&config_space_bytes[..len]); } else { error!("Failed to read config space"); self.metrics.cfg_fails.inc(); } } fn write_config(&mut self, _offset: u64, _data: &[u8]) { // We do not advertise VIRTIO_BLK_F_CONFIG_WCE // that would allow configuring the "writeback" field. // Other block config fields are immutable. } fn activate( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError> { for q in self.queues.iter_mut() { q.initialize(&mem) .map_err(ActivateError::QueueMemoryError)?; } let start_time = get_time_us(ClockType::Monotonic); // Setting features again, because now we negotiated them // with guest driver as well. self.vu_handle .set_features(self.acked_features) .and_then(|()| { self.vu_handle.setup_backend( &mem, &[(0, &self.queues[0], &self.queue_evts[0])], interrupt.clone(), ) }) .map_err(|err| { self.metrics.activate_fails.inc(); ActivateError::VhostUser(err) })?; self.device_state = DeviceState::Activated(ActiveState { mem, interrupt }); let delta_us = get_time_us(ClockType::Monotonic) - start_time; self.metrics.activate_time_us.store(delta_us); Ok(()) } fn is_activated(&self) -> bool { self.device_state.is_activated() } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::net::UnixStream; use std::sync::atomic::Ordering; use event_manager::{EventOps, Events, MutEventSubscriber}; use vhost::{VhostUserMemoryRegionInfo, VringConfigData}; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::devices::virtio::block::virtio::device::FileEngineType; use crate::devices::virtio::test_utils::{VirtQueue, default_interrupt, default_mem}; use crate::devices::virtio::transport::mmio::VIRTIO_MMIO_INT_CONFIG; use crate::devices::virtio::vhost_user::tests::create_mem; use crate::test_utils::create_tmp_socket; use crate::vstate::memory::GuestAddress; #[test] fn test_from_config() { let block_config = BlockDeviceConfig { drive_id: "".to_string(), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: None, path_on_host: None, rate_limiter: None, file_engine_type: None, socket: Some("sock".to_string()), }; VhostUserBlockConfig::try_from(&block_config).unwrap(); let block_config = BlockDeviceConfig { drive_id: "".to_string(), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(true), path_on_host: Some("path".to_string()), rate_limiter: None, file_engine_type: Some(FileEngineType::Sync), socket: None, }; VhostUserBlockConfig::try_from(&block_config).unwrap_err(); let block_config = BlockDeviceConfig { drive_id: "".to_string(), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(true), path_on_host: Some("path".to_string()), rate_limiter: None, file_engine_type: Some(FileEngineType::Sync), socket: Some("sock".to_string()), }; VhostUserBlockConfig::try_from(&block_config).unwrap_err(); } #[test] fn test_new_no_features() { struct MockMaster { sock: UnixStream, max_queue_num: u64, is_owner: std::cell::UnsafeCell, features: u64, protocol_features: VhostUserProtocolFeatures, hdr_flags: std::cell::UnsafeCell, } impl VhostUserHandleBackend for MockMaster { fn from_stream(sock: UnixStream, max_queue_num: u64) -> Self { Self { sock, max_queue_num, is_owner: std::cell::UnsafeCell::new(false), features: 0, protocol_features: VhostUserProtocolFeatures::empty(), hdr_flags: std::cell::UnsafeCell::new(VhostUserHeaderFlag::empty()), } } fn set_owner(&self) -> Result<(), vhost::Error> { unsafe { *self.is_owner.get() = true }; Ok(()) } fn set_hdr_flags(&self, flags: VhostUserHeaderFlag) { unsafe { *self.hdr_flags.get() = flags }; } fn get_features(&self) -> Result { Ok(self.features) } fn get_protocol_features(&mut self) -> Result { Ok(self.protocol_features) } fn set_protocol_features( &mut self, features: VhostUserProtocolFeatures, ) -> Result<(), vhost::Error> { self.protocol_features = features; Ok(()) } } impl MutEventSubscriber for VhostUserBlockImpl { fn process(&mut self, _: Events, _: &mut EventOps) {} fn init(&mut self, _: &mut EventOps) {} } let (_tmp_dir, tmp_socket_path) = create_tmp_socket(); let vhost_block_config = VhostUserBlockConfig { drive_id: "test_drive".to_string(), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, socket: tmp_socket_path.clone(), }; let vhost_block = VhostUserBlockImpl::::new(vhost_block_config).unwrap(); // If backend has no features, nothing should be negotiated and // no flags should be set. assert_eq!( vhost_block .vu_handle .vu .sock .peer_addr() .unwrap() .as_pathname() .unwrap() .to_str() .unwrap(), &tmp_socket_path, ); assert_eq!(vhost_block.vu_handle.vu.max_queue_num, NUM_QUEUES); assert!(unsafe { *vhost_block.vu_handle.vu.is_owner.get() }); assert_eq!(vhost_block.avail_features, 0); assert_eq!(vhost_block.acked_features, 0); assert_eq!(vhost_block.vu_acked_protocol_features, 0); assert_eq!( unsafe { &*vhost_block.vu_handle.vu.hdr_flags.get() }.bits(), VhostUserHeaderFlag::empty().bits() ); assert!(!vhost_block.root_device); assert!(!vhost_block.read_only); assert_eq!(vhost_block.config_space, Vec::::new()); } #[test] fn test_new_all_features() { struct MockMaster { sock: UnixStream, max_queue_num: u64, is_owner: std::cell::UnsafeCell, features: u64, protocol_features: VhostUserProtocolFeatures, hdr_flags: std::cell::UnsafeCell, } impl VhostUserHandleBackend for MockMaster { fn from_stream(sock: UnixStream, max_queue_num: u64) -> Self { Self { sock, max_queue_num, is_owner: std::cell::UnsafeCell::new(false), features: AVAILABLE_FEATURES | (1 << VIRTIO_BLK_F_FLUSH), protocol_features: VhostUserProtocolFeatures::all(), hdr_flags: std::cell::UnsafeCell::new(VhostUserHeaderFlag::empty()), } } fn set_owner(&self) -> Result<(), vhost::Error> { unsafe { *self.is_owner.get() = true }; Ok(()) } fn set_hdr_flags(&self, flags: VhostUserHeaderFlag) { unsafe { *self.hdr_flags.get() = flags }; } fn get_features(&self) -> Result { Ok(self.features) } fn get_protocol_features(&mut self) -> Result { Ok(self.protocol_features) } fn set_protocol_features( &mut self, features: VhostUserProtocolFeatures, ) -> Result<(), vhost::Error> { self.protocol_features = features; Ok(()) } fn get_config( &mut self, _offset: u32, _size: u32, _flags: VhostUserConfigFlags, _buf: &[u8], ) -> Result<(VhostUserConfig, VhostUserConfigPayload), vhost::Error> { Ok((VhostUserConfig::default(), vec![0x69, 0x69, 0x69])) } } impl MutEventSubscriber for VhostUserBlockImpl { fn process(&mut self, _: Events, _: &mut EventOps) {} fn init(&mut self, _: &mut EventOps) {} } let (_tmp_dir, tmp_socket_path) = create_tmp_socket(); let vhost_block_config = VhostUserBlockConfig { drive_id: "test_drive".to_string(), partuuid: None, is_root_device: false, cache_type: CacheType::Writeback, socket: tmp_socket_path.clone(), }; let mut vhost_block = VhostUserBlockImpl::::new(vhost_block_config).unwrap(); // If backend has all features, features offered by block device // should be negotiated and header flags should be set. assert_eq!( vhost_block .vu_handle .vu .sock .peer_addr() .unwrap() .as_pathname() .unwrap() .to_str() .unwrap(), &tmp_socket_path, ); assert_eq!(vhost_block.vu_handle.vu.max_queue_num, NUM_QUEUES); assert!(unsafe { *vhost_block.vu_handle.vu.is_owner.get() }); assert_eq!( vhost_block.avail_features, AVAILABLE_FEATURES | (1 << VIRTIO_BLK_F_FLUSH) ); assert_eq!( vhost_block.acked_features, VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() ); assert_eq!( vhost_block.vu_acked_protocol_features, VhostUserProtocolFeatures::CONFIG.bits() ); assert_eq!( unsafe { &*vhost_block.vu_handle.vu.hdr_flags.get() }.bits(), VhostUserHeaderFlag::empty().bits() ); assert!(!vhost_block.root_device); assert!(!vhost_block.read_only); assert_eq!(vhost_block.config_space, vec![0x69, 0x69, 0x69]); // Test some `VirtioDevice` methods assert_eq!( vhost_block.avail_features(), AVAILABLE_FEATURES | (1 << VIRTIO_BLK_F_FLUSH) ); assert_eq!( vhost_block.acked_features(), VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() ); // Valid read let mut read_config = vec![0, 0, 0]; vhost_block.read_config(0, &mut read_config); assert_eq!(read_config, vec![0x69, 0x69, 0x69]); // Invalid offset let mut read_config = vec![0, 0, 0]; vhost_block.read_config(0x69, &mut read_config); assert_eq!(read_config, vec![0, 0, 0]); // Writing to the config does nothing vhost_block.write_config(0x69, &[0]); assert_eq!(vhost_block.config_space, vec![0x69, 0x69, 0x69]); // Testing [`config_update`] vhost_block.device_state = DeviceState::Activated(ActiveState { mem: default_mem(), interrupt: default_interrupt(), }); vhost_block.config_space = vec![]; vhost_block.config_update().unwrap(); assert_eq!(vhost_block.config_space, vec![0x69, 0x69, 0x69]); assert_eq!( vhost_block.interrupt_status().load(Ordering::SeqCst), VIRTIO_MMIO_INT_CONFIG ); } #[test] fn test_activate() { struct MockMaster { features_are_set: std::cell::UnsafeCell, memory_is_set: std::cell::UnsafeCell, vring_enabled: std::cell::UnsafeCell, } impl VhostUserHandleBackend for MockMaster { fn from_stream(_sock: UnixStream, _max_queue_num: u64) -> Self { Self { features_are_set: std::cell::UnsafeCell::new(false), memory_is_set: std::cell::UnsafeCell::new(false), vring_enabled: std::cell::UnsafeCell::new(false), } } fn set_owner(&self) -> Result<(), vhost::Error> { Ok(()) } fn set_hdr_flags(&self, _flags: VhostUserHeaderFlag) {} fn get_features(&self) -> Result { Ok(0) } fn get_protocol_features(&mut self) -> Result { Ok(VhostUserProtocolFeatures::empty()) } fn set_protocol_features( &mut self, _features: VhostUserProtocolFeatures, ) -> Result<(), vhost::Error> { Ok(()) } fn get_config( &mut self, _offset: u32, _size: u32, _flags: VhostUserConfigFlags, _buf: &[u8], ) -> Result<(VhostUserConfig, VhostUserConfigPayload), vhost::Error> { Ok((VhostUserConfig::default(), vec![])) } fn set_features(&self, _features: u64) -> Result<(), vhost::Error> { unsafe { (*self.features_are_set.get()) = true }; Ok(()) } fn set_mem_table( &self, _regions: &[VhostUserMemoryRegionInfo], ) -> Result<(), vhost::Error> { unsafe { (*self.memory_is_set.get()) = true }; Ok(()) } fn set_vring_num(&self, _queue_index: usize, _num: u16) -> Result<(), vhost::Error> { Ok(()) } fn set_vring_addr( &self, _queue_index: usize, _config_data: &VringConfigData, ) -> Result<(), vhost::Error> { Ok(()) } fn set_vring_base(&self, _queue_index: usize, _base: u16) -> Result<(), vhost::Error> { Ok(()) } fn set_vring_call( &self, _queue_index: usize, _fd: &EventFd, ) -> Result<(), vhost::Error> { Ok(()) } fn set_vring_kick( &self, _queue_index: usize, _fd: &EventFd, ) -> Result<(), vhost::Error> { Ok(()) } fn set_vring_enable( &mut self, _queue_index: usize, _enable: bool, ) -> Result<(), vhost::Error> { unsafe { (*self.vring_enabled.get()) = true }; Ok(()) } } impl MutEventSubscriber for VhostUserBlockImpl { fn process(&mut self, _: Events, _: &mut EventOps) {} fn init(&mut self, _: &mut EventOps) {} } // Block creation let (_tmp_dir, tmp_socket_path) = create_tmp_socket(); let vhost_block_config = VhostUserBlockConfig { drive_id: "test_drive".to_string(), partuuid: None, is_root_device: false, cache_type: CacheType::Writeback, socket: tmp_socket_path, }; let mut vhost_block = VhostUserBlockImpl::::new(vhost_block_config).unwrap(); // Memory creation let region_size = 0x10000; let file = TempFile::new().unwrap().into_file(); file.set_len(region_size as u64).unwrap(); let regions = vec![(GuestAddress(0x0), region_size)]; let guest_memory = create_mem(file, ®ions); let q = VirtQueue::new(GuestAddress(0), &guest_memory, 16); vhost_block.queues[0] = q.create_queue(); let interrupt = default_interrupt(); // During actiavion of the device features, memory and queues should be set and activated. vhost_block.activate(guest_memory, interrupt).unwrap(); assert!(unsafe { *vhost_block.vu_handle.vu.features_are_set.get() }); assert!(unsafe { *vhost_block.vu_handle.vu.memory_is_set.get() }); assert!(unsafe { *vhost_block.vu_handle.vu.vring_enabled.get() }); assert!(vhost_block.is_activated()); } } ================================================ FILE: src/vmm/src/devices/virtio/block/vhost_user/event_handler.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use event_manager::{EventOps, Events, MutEventSubscriber}; use vmm_sys_util::epoll::EventSet; use super::VhostUserBlock; use crate::devices::virtio::device::VirtioDevice; use crate::logger::{error, warn}; impl VhostUserBlock { const PROCESS_ACTIVATE: u32 = 0; fn register_activate_event(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.activate_evt, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("Failed to register activate event: {}", err); } } fn process_activate_event(&self, ops: &mut EventOps) { if let Err(err) = self.activate_evt.read() { error!("Failed to consume block activate event: {:?}", err); } if let Err(err) = ops.remove(Events::with_data( &self.activate_evt, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("Failed to un-register activate event: {}", err); } } } impl MutEventSubscriber for VhostUserBlock { // Handle an event for queue or rate limiter. fn process(&mut self, event: Events, ops: &mut EventOps) { let source = event.data(); let event_set = event.event_set(); let supported_events = EventSet::IN; if !supported_events.contains(event_set) { warn!( "Received unknown event: {:?} from source: {:?}", event_set, source ); return; } if self.is_activated() { if Self::PROCESS_ACTIVATE == source { self.process_activate_event(ops) } else { warn!("BlockVhost: Spurious event received: {:?}", source) } } else { warn!( "BlockVhost: The device is not yet activated. Spurious event received: {:?}", source ); } } fn init(&mut self, ops: &mut EventOps) { // This function can be called during different points in the device lifetime: // - shortly after device creation, // - on device activation (is-activated already true at this point), // - on device restore from snapshot. if self.is_activated() { warn!("Vhost-user block: unexpected init event"); } else { self.register_activate_event(ops); } } } ================================================ FILE: src/vmm/src/devices/virtio/block/vhost_user/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod device; pub mod event_handler; pub mod persist; use self::device::VhostUserBlock; use crate::devices::virtio::vhost_user::VhostUserError; use crate::vstate::interrupts::InterruptError; /// Number of queues for the vhost-user block device. pub const NUM_QUEUES: u64 = 1; /// Queue size for the vhost-user block device. pub const QUEUE_SIZE: u16 = 256; /// Vhost-user block device error. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VhostUserBlockError { /// Cannot create config Config, /// Snapshotting of vhost-user-blk devices is not supported SnapshottingNotSupported, /// Vhost-user error: {0} VhostUser(VhostUserError), /// Vhost error: {0} Vhost(vhost::Error), /// Error opening eventfd: {0} EventFd(std::io::Error), /// Error creating irqfd: {0} Interrupt(InterruptError), } ================================================ FILE: src/vmm/src/devices/virtio/block/vhost_user/persist.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the structures needed for saving/restoring block devices. use serde::{Deserialize, Serialize}; use super::VhostUserBlockError; use super::device::VhostUserBlock; use crate::devices::virtio::block::CacheType; use crate::devices::virtio::block::persist::BlockConstructorArgs; use crate::devices::virtio::persist::VirtioDeviceState; use crate::snapshot::Persist; /// vhost-user block device state. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VhostUserBlockState { id: String, partuuid: Option, cache_type: CacheType, root_device: bool, socket_path: String, vu_acked_protocol_features: u64, config_space: Vec, virtio_state: VirtioDeviceState, } impl Persist<'_> for VhostUserBlock { type State = VhostUserBlockState; type ConstructorArgs = BlockConstructorArgs; type Error = VhostUserBlockError; fn save(&self) -> Self::State { unimplemented!("VhostUserBlock does not support snapshotting yet"); } fn restore( _constructor_args: Self::ConstructorArgs, _state: &Self::State, ) -> Result { Err(VhostUserBlockError::SnapshottingNotSupported) } } ================================================ FILE: src/vmm/src/devices/virtio/block/virtio/device.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::cmp; use std::convert::From; use std::fs::{File, OpenOptions}; use std::io::{Seek, SeekFrom}; use std::ops::Deref; use std::os::linux::fs::MetadataExt; use std::path::PathBuf; use std::sync::Arc; use block_io::FileEngine; use serde::{Deserialize, Serialize}; use vm_memory::ByteValued; use vmm_sys_util::eventfd::EventFd; use super::io::async_io; use super::request::*; use super::{BLOCK_QUEUE_SIZES, SECTOR_SHIFT, SECTOR_SIZE, VirtioBlockError, io as block_io}; use crate::devices::virtio::ActivateError; use crate::devices::virtio::block::CacheType; use crate::devices::virtio::block::virtio::metrics::{BlockDeviceMetrics, BlockMetricsPerDevice}; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_blk::{ VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_RO, VIRTIO_BLK_ID_BYTES, }; use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1; use crate::devices::virtio::generated::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use crate::devices::virtio::queue::{InvalidAvailIdx, Queue}; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::impl_device_type; use crate::logger::{IncMetric, error, warn}; use crate::rate_limiter::{BucketUpdate, RateLimiter}; use crate::utils::u64_to_usize; use crate::vmm_config::RateLimiterConfig; use crate::vmm_config::drive::BlockDeviceConfig; use crate::vstate::memory::GuestMemoryMmap; /// The engine file type, either Sync or Async (through io_uring). #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)] pub enum FileEngineType { /// Use an Async engine, based on io_uring. Async, /// Use a Sync engine, based on blocking system calls. #[default] Sync, } /// Helper object for setting up all `Block` fields derived from its backing file. #[derive(Debug)] pub struct DiskProperties { pub file_path: String, pub file_engine: FileEngine, pub nsectors: u64, pub image_id: [u8; VIRTIO_BLK_ID_BYTES as usize], } impl DiskProperties { // Helper function that opens the file with the proper access permissions fn open_file(disk_image_path: &str, is_disk_read_only: bool) -> Result { OpenOptions::new() .read(true) .write(!is_disk_read_only) .open(PathBuf::from(&disk_image_path)) .map_err(|x| VirtioBlockError::BackingFile(x, disk_image_path.to_string())) } // Helper function that gets the size of the file fn file_size(disk_image_path: &str, disk_image: &mut File) -> Result { let disk_size = disk_image .seek(SeekFrom::End(0)) .map_err(|x| VirtioBlockError::BackingFile(x, disk_image_path.to_string()))?; // We only support disk size, which uses the first two words of the configuration space. // If the image is not a multiple of the sector size, the tail bits are not exposed. if disk_size % u64::from(SECTOR_SIZE) != 0 { warn!( "Disk size {} is not a multiple of sector size {}; the remainder will not be \ visible to the guest.", disk_size, SECTOR_SIZE ); } Ok(disk_size) } /// Create a new file for the block device using a FileEngine pub fn new( disk_image_path: String, is_disk_read_only: bool, file_engine_type: FileEngineType, ) -> Result { let mut disk_image = Self::open_file(&disk_image_path, is_disk_read_only)?; let disk_size = Self::file_size(&disk_image_path, &mut disk_image)?; let image_id = Self::build_disk_image_id(&disk_image); Ok(Self { file_path: disk_image_path, file_engine: FileEngine::from_file(disk_image, file_engine_type) .map_err(VirtioBlockError::FileEngine)?, nsectors: disk_size >> SECTOR_SHIFT, image_id, }) } /// Update the path to the file backing the block device pub fn update( &mut self, disk_image_path: String, is_disk_read_only: bool, ) -> Result<(), VirtioBlockError> { let mut disk_image = Self::open_file(&disk_image_path, is_disk_read_only)?; let disk_size = Self::file_size(&disk_image_path, &mut disk_image)?; self.image_id = Self::build_disk_image_id(&disk_image); self.file_engine .update_file_path(disk_image) .map_err(VirtioBlockError::FileEngine)?; self.nsectors = disk_size >> SECTOR_SHIFT; self.file_path = disk_image_path; Ok(()) } fn build_device_id(disk_file: &File) -> Result { let blk_metadata = disk_file .metadata() .map_err(VirtioBlockError::GetFileMetadata)?; // This is how kvmtool does it. let device_id = format!( "{}{}{}", blk_metadata.st_dev(), blk_metadata.st_rdev(), blk_metadata.st_ino() ); Ok(device_id) } fn build_disk_image_id(disk_file: &File) -> [u8; VIRTIO_BLK_ID_BYTES as usize] { let mut default_id = [0; VIRTIO_BLK_ID_BYTES as usize]; match Self::build_device_id(disk_file) { Err(_) => { warn!("Could not generate device id. We'll use a default."); } Ok(disk_id_string) => { // The kernel only knows to read a maximum of VIRTIO_BLK_ID_BYTES. // This will also zero out any leftover bytes. let disk_id = disk_id_string.as_bytes(); let bytes_to_copy = cmp::min(disk_id.len(), VIRTIO_BLK_ID_BYTES as usize); default_id[..bytes_to_copy].copy_from_slice(&disk_id[..bytes_to_copy]); } } default_id } } #[derive(Debug, Default, Clone, Copy, Eq, PartialEq)] #[repr(C)] pub struct ConfigSpace { pub capacity: u64, } // SAFETY: `ConfigSpace` contains only PODs in `repr(C)` or `repr(transparent)`, without padding. unsafe impl ByteValued for ConfigSpace {} /// Use this structure to set up the Block Device before booting the kernel. #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct VirtioBlockConfig { /// Unique identifier of the drive. pub drive_id: String, /// Part-UUID. Represents the unique id of the boot partition of this device. It is /// optional and it will be used only if the `is_root_device` field is true. pub partuuid: Option, /// If set to true, it makes the current device the root block device. /// Setting this flag to true will mount the block device in the /// guest under /dev/vda unless the partuuid is present. pub is_root_device: bool, /// If set to true, the drive will ignore flush requests coming from /// the guest driver. #[serde(default)] pub cache_type: CacheType, /// If set to true, the drive is opened in read-only mode. Otherwise, the /// drive is opened as read-write. pub is_read_only: bool, /// Path of the backing file on the host pub path_on_host: String, /// Rate Limiter for I/O operations. pub rate_limiter: Option, /// The type of IO engine used by the device. #[serde(default)] #[serde(rename = "io_engine")] pub file_engine_type: FileEngineType, } impl TryFrom<&BlockDeviceConfig> for VirtioBlockConfig { type Error = VirtioBlockError; fn try_from(value: &BlockDeviceConfig) -> Result { if let (Some(path_on_host), None) = (&value.path_on_host, &value.socket) { Ok(Self { drive_id: value.drive_id.clone(), partuuid: value.partuuid.clone(), is_root_device: value.is_root_device, cache_type: value.cache_type, is_read_only: value.is_read_only.unwrap_or(false), path_on_host: path_on_host.clone(), rate_limiter: value.rate_limiter, file_engine_type: value.file_engine_type.unwrap_or_default(), }) } else { Err(VirtioBlockError::Config) } } } impl From for BlockDeviceConfig { fn from(value: VirtioBlockConfig) -> Self { Self { drive_id: value.drive_id, partuuid: value.partuuid, is_root_device: value.is_root_device, cache_type: value.cache_type, is_read_only: Some(value.is_read_only), path_on_host: Some(value.path_on_host), rate_limiter: value.rate_limiter, file_engine_type: Some(value.file_engine_type), socket: None, } } } /// Virtio device for exposing block level read/write operations on a host file. #[derive(Debug)] pub struct VirtioBlock { // Virtio fields. pub avail_features: u64, pub acked_features: u64, pub config_space: ConfigSpace, pub activate_evt: EventFd, // Transport related fields. pub queues: Vec, pub queue_evts: [EventFd; 1], pub device_state: DeviceState, // Implementation specific fields. pub id: String, pub partuuid: Option, pub cache_type: CacheType, pub root_device: bool, pub read_only: bool, // Host file and properties. pub disk: DiskProperties, pub rate_limiter: RateLimiter, pub is_io_engine_throttled: bool, pub metrics: Arc, } macro_rules! unwrap_async_file_engine_or_return { ($file_engine: expr) => { match $file_engine { FileEngine::Async(engine) => engine, FileEngine::Sync(_) => { error!("The block device doesn't use an async IO engine"); return; } } }; } impl VirtioBlock { /// Create a new virtio block device that operates on the given file. /// /// The given file must be seekable and sizable. pub fn new(config: VirtioBlockConfig) -> Result { let disk_properties = DiskProperties::new( config.path_on_host, config.is_read_only, config.file_engine_type, )?; let rate_limiter = config .rate_limiter .map(RateLimiterConfig::try_into) .transpose() .map_err(VirtioBlockError::RateLimiter)? .unwrap_or_default(); let mut avail_features = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX); if config.cache_type == CacheType::Writeback { avail_features |= 1u64 << VIRTIO_BLK_F_FLUSH; } if config.is_read_only { avail_features |= 1u64 << VIRTIO_BLK_F_RO; }; let queue_evts = [EventFd::new(libc::EFD_NONBLOCK).map_err(VirtioBlockError::EventFd)?]; let queues = BLOCK_QUEUE_SIZES.iter().map(|&s| Queue::new(s)).collect(); let config_space = ConfigSpace { capacity: disk_properties.nsectors.to_le(), }; Ok(VirtioBlock { avail_features, acked_features: 0u64, config_space, activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(VirtioBlockError::EventFd)?, queues, queue_evts, device_state: DeviceState::Inactive, id: config.drive_id.clone(), partuuid: config.partuuid, cache_type: config.cache_type, root_device: config.is_root_device, read_only: config.is_read_only, disk: disk_properties, rate_limiter, is_io_engine_throttled: false, metrics: BlockMetricsPerDevice::alloc(config.drive_id), }) } /// Returns a copy of a device config pub fn config(&self) -> VirtioBlockConfig { let rl: RateLimiterConfig = (&self.rate_limiter).into(); VirtioBlockConfig { drive_id: self.id.clone(), path_on_host: self.disk.file_path.clone(), is_root_device: self.root_device, partuuid: self.partuuid.clone(), is_read_only: self.read_only, cache_type: self.cache_type, rate_limiter: rl.into_option(), file_engine_type: self.file_engine_type(), } } /// Process a single event in the Virtio queue. /// /// This function is called by the event manager when the guest notifies us /// about new buffers in the queue. pub(crate) fn process_queue_event(&mut self) { self.metrics.queue_event_count.inc(); if let Err(err) = self.queue_evts[0].read() { error!("Failed to get queue event: {:?}", err); self.metrics.event_fails.inc(); } else if self.rate_limiter.is_blocked() { self.metrics.rate_limiter_throttled_events.inc(); } else if self.is_io_engine_throttled { self.metrics.io_engine_throttled_events.inc(); } else { self.process_virtio_queues().unwrap() } } /// Process device virtio queue(s). pub fn process_virtio_queues(&mut self) -> Result<(), InvalidAvailIdx> { self.process_queue(0) } pub(crate) fn process_rate_limiter_event(&mut self) { self.metrics.rate_limiter_event_count.inc(); // Upon rate limiter event, call the rate limiter handler // and restart processing the queue. if self.rate_limiter.event_handler().is_ok() { self.process_queue(0).unwrap() } } /// Device specific function for peaking inside a queue and processing descriptors. pub fn process_queue(&mut self, queue_index: usize) -> Result<(), InvalidAvailIdx> { // This is safe since we checked in the event handler that the device is activated. let active_state = self.device_state.active_state().unwrap(); let queue = &mut self.queues[queue_index]; let mut used_any = false; while let Some(head) = queue.pop_or_enable_notification()? { self.metrics.remaining_reqs_count.add(queue.len().into()); let processing_result = match Request::parse(&head, &active_state.mem, self.disk.nsectors) { Ok(request) => { if request.rate_limit(&mut self.rate_limiter) { // Stop processing the queue and return this descriptor chain to the // avail ring, for later processing. queue.undo_pop(); self.metrics.rate_limiter_throttled_events.inc(); break; } request.process( &mut self.disk, head.index, &active_state.mem, &self.metrics, ) } Err(err) => { error!("Failed to parse available descriptor chain: {:?}", err); self.metrics.execute_fails.inc(); ProcessingResult::Executed(FinishedRequest { num_bytes_to_mem: 0, desc_idx: head.index, }) } }; match processing_result { ProcessingResult::Submitted => {} ProcessingResult::Throttled => { queue.undo_pop(); self.is_io_engine_throttled = true; break; } ProcessingResult::Executed(finished) => { used_any = true; queue .add_used(head.index, finished.num_bytes_to_mem) .unwrap_or_else(|err| { error!( "Failed to add available descriptor head {}: {}", head.index, err ) }); } } } queue.advance_used_ring_idx(); if used_any && queue.prepare_kick() { active_state .interrupt .trigger(VirtioInterruptType::Queue(0)) .unwrap_or_else(|_| { self.metrics.event_fails.inc(); }); } if let FileEngine::Async(ref mut engine) = self.disk.file_engine && let Err(err) = engine.kick_submission_queue() { error!("BlockError submitting pending block requests: {:?}", err); } if !used_any { self.metrics.no_avail_buffer.inc(); } Ok(()) } fn process_async_completion_queue(&mut self) { let engine = unwrap_async_file_engine_or_return!(&mut self.disk.file_engine); // This is safe since we checked in the event handler that the device is activated. let active_state = self.device_state.active_state().unwrap(); let queue = &mut self.queues[0]; loop { match engine.pop(&active_state.mem) { Err(error) => { error!("Failed to read completed io_uring entry: {:?}", error); break; } Ok(None) => break, Ok(Some(cqe)) => { let res = cqe.result(); let user_data = cqe.user_data(); let (pending, res) = match res { Ok(count) => (user_data, Ok(count)), Err(error) => ( user_data, Err(IoErr::FileEngine(block_io::BlockIoError::Async( async_io::AsyncIoError::IO(error), ))), ), }; let finished = pending.finish(&active_state.mem, res, &self.metrics); queue .add_used(finished.desc_idx, finished.num_bytes_to_mem) .unwrap_or_else(|err| { error!( "Failed to add available descriptor head {}: {}", finished.desc_idx, err ) }); } } } queue.advance_used_ring_idx(); if queue.prepare_kick() { active_state .interrupt .trigger(VirtioInterruptType::Queue(0)) .unwrap_or_else(|_| { self.metrics.event_fails.inc(); }); } } pub fn process_async_completion_event(&mut self) { let engine = unwrap_async_file_engine_or_return!(&mut self.disk.file_engine); if let Err(err) = engine.completion_evt().read() { error!("Failed to get async completion event: {:?}", err); } else { self.process_async_completion_queue(); if self.is_io_engine_throttled { self.is_io_engine_throttled = false; self.process_queue(0).unwrap() } } } /// Update the backing file and the config space of the block device. pub fn update_disk_image(&mut self, disk_image_path: String) -> Result<(), VirtioBlockError> { self.disk.update(disk_image_path, self.read_only)?; self.config_space.capacity = self.disk.nsectors.to_le(); // virtio_block_config_space(); // Kick the driver to pick up the changes. (But only if the device is already activated). if self.is_activated() { self.interrupt_trigger() .trigger(VirtioInterruptType::Config) .unwrap(); } self.metrics.update_count.inc(); Ok(()) } /// Updates the parameters for the rate limiter pub fn update_rate_limiter(&mut self, bytes: BucketUpdate, ops: BucketUpdate) { self.rate_limiter.update_buckets(bytes, ops); } /// Retrieve the file engine type. pub fn file_engine_type(&self) -> FileEngineType { match self.disk.file_engine { FileEngine::Sync(_) => FileEngineType::Sync, FileEngine::Async(_) => FileEngineType::Async, } } fn drain_and_flush(&mut self, discard: bool) { if let Err(err) = self.disk.file_engine.drain_and_flush(discard) { error!("Failed to drain ops and flush block data: {:?}", err); } } /// Prepare device for being snapshotted. pub fn prepare_save(&mut self) { if !self.is_activated() { return; } self.drain_and_flush(false); if let FileEngine::Async(ref _engine) = self.disk.file_engine { self.process_async_completion_queue(); } } } impl VirtioDevice for VirtioBlock { impl_device_type!(VirtioDeviceType::Block); fn id(&self) -> &str { &self.id } fn avail_features(&self) -> u64 { self.avail_features } fn acked_features(&self) -> u64 { self.acked_features } fn set_acked_features(&mut self, acked_features: u64) { self.acked_features = acked_features; } fn queues(&self) -> &[Queue] { &self.queues } fn queues_mut(&mut self) -> &mut [Queue] { &mut self.queues } fn queue_events(&self) -> &[EventFd] { &self.queue_evts } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { self.device_state .active_state() .expect("Device is not initialized") .interrupt .deref() } fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); data[..len].copy_from_slice(&config_space_bytes[..len]); } else { error!("Failed to read config space"); self.metrics.cfg_fails.inc(); } } fn write_config(&mut self, offset: u64, data: &[u8]) { let config_space_bytes = self.config_space.as_mut_slice(); let start = usize::try_from(offset).ok(); let end = start.and_then(|s| s.checked_add(data.len())); let Some(dst) = start .zip(end) .and_then(|(start, end)| config_space_bytes.get_mut(start..end)) else { error!("Failed to write config space"); self.metrics.cfg_fails.inc(); return; }; dst.copy_from_slice(data); } fn activate( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError> { for q in self.queues.iter_mut() { q.initialize(&mem) .map_err(ActivateError::QueueMemoryError)?; } let event_idx = self.has_feature(u64::from(VIRTIO_RING_F_EVENT_IDX)); if event_idx { for queue in &mut self.queues { queue.enable_notif_suppression(); } } if self.activate_evt.write(1).is_err() { self.metrics.activate_fails.inc(); return Err(ActivateError::EventFd); } self.device_state = DeviceState::Activated(ActiveState { mem, interrupt }); Ok(()) } fn is_activated(&self) -> bool { self.device_state.is_activated() } } impl Drop for VirtioBlock { fn drop(&mut self) { match self.cache_type { CacheType::Unsafe => { if let Err(err) = self.disk.file_engine.drain(true) { error!("Failed to drain ops on drop: {:?}", err); } } CacheType::Writeback => { self.drain_and_flush(true); } }; } } #[cfg(test)] mod tests { use std::fs::metadata; use std::io::{Read, Write}; use std::os::unix::ffi::OsStrExt; use std::thread; use std::time::Duration; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::check_metric_after_block; use crate::devices::virtio::block::virtio::IO_URING_NUM_ENTRIES; use crate::devices::virtio::block::virtio::test_utils::{ default_block, read_blk_req_descriptors, set_queue, set_rate_limiter, simulate_async_completion_event, simulate_queue_and_async_completion_events, simulate_queue_event, }; use crate::devices::virtio::queue::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; use crate::devices::virtio::test_utils::{VirtQueue, default_interrupt, default_mem}; use crate::rate_limiter::TokenType; use crate::vstate::memory::{Address, Bytes, GuestAddress}; #[test] fn test_from_config() { let block_config = BlockDeviceConfig { drive_id: "".to_string(), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(true), path_on_host: Some("path".to_string()), rate_limiter: None, file_engine_type: Default::default(), socket: None, }; VirtioBlockConfig::try_from(&block_config).unwrap(); let block_config = BlockDeviceConfig { drive_id: "".to_string(), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: None, path_on_host: None, rate_limiter: None, file_engine_type: Default::default(), socket: Some("sock".to_string()), }; VirtioBlockConfig::try_from(&block_config).unwrap_err(); let block_config = BlockDeviceConfig { drive_id: "".to_string(), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(true), path_on_host: Some("path".to_string()), rate_limiter: None, file_engine_type: Default::default(), socket: Some("sock".to_string()), }; VirtioBlockConfig::try_from(&block_config).unwrap_err(); } #[test] fn test_disk_backing_file_helper() { let num_sectors = 2; let f = TempFile::new().unwrap(); let size = u64::from(SECTOR_SIZE) * num_sectors; f.as_file().set_len(size).unwrap(); for engine in [FileEngineType::Sync, FileEngineType::Async] { let disk_properties = DiskProperties::new(String::from(f.as_path().to_str().unwrap()), true, engine) .unwrap(); assert_eq!(size, u64::from(SECTOR_SIZE) * num_sectors); assert_eq!(disk_properties.nsectors, num_sectors); // Testing `backing_file.virtio_block_disk_image_id()` implies // duplicating that logic in tests, so skipping it. let res = DiskProperties::new("invalid-disk-path".to_string(), true, engine); assert!( matches!(res, Err(VirtioBlockError::BackingFile(_, _))), "{:?}", res ); } } #[test] fn test_virtio_features() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); assert_eq!(block.device_type(), VirtioDeviceType::Block); let features: u64 = (1u64 << VIRTIO_F_VERSION_1) | (1u64 << VIRTIO_RING_F_EVENT_IDX); assert_eq!( block.avail_features_by_page(0), (features & 0xffffffff) as u32, ); assert_eq!(block.avail_features_by_page(1), (features >> 32) as u32); for i in 2..10 { assert_eq!(block.avail_features_by_page(i), 0u32); } for i in 0..10 { block.ack_features_by_page(i, u32::MAX); } assert_eq!(block.acked_features, features); } } #[test] fn test_virtio_read_config() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let block = default_block(engine); let mut actual_config_space = ConfigSpace::default(); block.read_config(0, actual_config_space.as_mut_slice()); // This will read the number of sectors. // The block's backing file size is 0x1000, so there are 8 (4096/512) sectors. // The config space is little endian. let expected_config_space = ConfigSpace { capacity: 8 }; assert_eq!(actual_config_space, expected_config_space); // Invalid read. let expected_config_space = ConfigSpace { capacity: 696969 }; actual_config_space = expected_config_space; block.read_config( std::mem::size_of::() as u64 + 1, actual_config_space.as_mut_slice(), ); // Validate read failed (the config space was not updated). assert_eq!(actual_config_space, expected_config_space); } } #[test] fn test_virtio_write_config() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let expected_config_space = ConfigSpace { capacity: 696969 }; block.write_config(0, expected_config_space.as_slice()); let mut actual_config_space = ConfigSpace::default(); block.read_config(0, actual_config_space.as_mut_slice()); assert_eq!(actual_config_space, expected_config_space); // If privileged user writes to `/dev/mem`, in block config space - byte by byte. let expected_config_space = ConfigSpace { capacity: 0x1122334455667788, }; let expected_config_space_slice = expected_config_space.as_slice(); for (i, b) in expected_config_space_slice.iter().enumerate() { block.write_config(i as u64, &[*b]); } block.read_config(0, actual_config_space.as_mut_slice()); assert_eq!(actual_config_space, expected_config_space); // Invalid write. let new_config_space = ConfigSpace { capacity: 0xDEADBEEF, }; block.write_config(5, new_config_space.as_slice()); // Make sure nothing got written. block.read_config(0, actual_config_space.as_mut_slice()); assert_eq!(actual_config_space, expected_config_space); // Large offset that may cause an overflow. block.write_config(u64::MAX, new_config_space.as_slice()); // Make sure nothing got written. block.read_config(0, actual_config_space.as_mut_slice()); assert_eq!(actual_config_space, expected_config_space); } } #[test] fn test_invalid_request() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); // Request is invalid because the first descriptor is write-only. vq.dtable[0] .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); mem.write_obj::(VIRTIO_BLK_T_IN, request_type_addr) .unwrap(); simulate_queue_event(&mut block, Some(true)); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 0); } } #[test] fn test_addr_out_of_bounds() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); // Default mem size is 0x10000 let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); // Read at out of bounds address. { vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); // Mark the next available descriptor. vq.avail.idx.set(1); vq.dtable[1].set(0x20000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); mem.write_obj::(VIRTIO_BLK_T_IN, request_type_addr) .unwrap(); simulate_queue_and_async_completion_events(&mut block, true); assert_eq!(vq.used.idx.get(), 1); let used = vq.used.ring[0].get(); let status_addr = GuestAddress(vq.dtable[2].addr.get()); assert_eq!(used.len, 1); assert_eq!( u32::from(mem.read_obj::(status_addr).unwrap()), VIRTIO_BLK_S_IOERR ); } // Write at out of bounds address. { vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); // Mark the next available descriptor. vq.avail.idx.set(1); vq.dtable[1].set(0x20000, 0x1000, VIRTQ_DESC_F_NEXT, 2); mem.write_obj::(VIRTIO_BLK_T_OUT, request_type_addr) .unwrap(); simulate_queue_and_async_completion_events(&mut block, true); assert_eq!(vq.used.idx.get(), 1); let used = vq.used.ring[0].get(); let status_addr = GuestAddress(vq.dtable[2].addr.get()); assert_eq!(used.len, 1); assert_eq!( u32::from(mem.read_obj::(status_addr).unwrap()), VIRTIO_BLK_S_IOERR ); } } } #[test] fn test_request_parse_failures() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); { // First descriptor no longer writable. vq.dtable[0].flags.set(VIRTQ_DESC_F_NEXT); vq.dtable[1].flags.set(VIRTQ_DESC_F_NEXT); // Generate a seek execute error caused by a very large sector number. let request_header = RequestHeader::new(VIRTIO_BLK_T_OUT, 0x000f_ffff_ffff); mem.write_obj::(request_header, request_type_addr) .unwrap(); simulate_queue_event(&mut block, Some(true)); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 0); } { // Reset the queue to reuse descriptors and memory. vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); vq.dtable[1] .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); // Set sector to a valid number large enough that the full 0x1000 read will fail. let request_header = RequestHeader::new(VIRTIO_BLK_T_IN, 10); mem.write_obj::(request_header, request_type_addr) .unwrap(); simulate_queue_event(&mut block, Some(true)); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 0); } } } #[test] fn test_unsupported_request_type() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); let status_addr = GuestAddress(vq.dtable[2].addr.get()); // Currently only VIRTIO_BLK_T_IN, VIRTIO_BLK_T_OUT, // VIRTIO_BLK_T_FLUSH and VIRTIO_BLK_T_GET_ID are supported. // Generate an unsupported request. let request_header = RequestHeader::new(42, 0); mem.write_obj::(request_header, request_type_addr) .unwrap(); simulate_queue_event(&mut block, Some(true)); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 1); assert_eq!( mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_UNSUPP ); } } #[test] fn test_end_of_region() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); vq.dtable[1].set(0xf000, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); let status_addr = GuestAddress(vq.dtable[2].addr.get()); vq.used.idx.set(0); mem.write_obj::(VIRTIO_BLK_T_IN, request_type_addr) .unwrap(); vq.dtable[1] .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); check_metric_after_block!( &block.metrics.read_count, 1, simulate_queue_and_async_completion_events(&mut block, true) ); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); // Added status byte length. assert_eq!(vq.used.ring[0].get().len, vq.dtable[1].len.get() + 1); assert_eq!(mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_OK); } } #[test] fn test_read_write() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); let data_addr = GuestAddress(vq.dtable[1].addr.get()); let status_addr = GuestAddress(vq.dtable[2].addr.get()); let empty_data = vec![0; 512]; let rand_data = vmm_sys_util::rand::rand_alphanumerics(1024) .as_bytes() .to_vec(); // Write with invalid data len (not a multiple of 512). { mem.write_obj::(VIRTIO_BLK_T_OUT, request_type_addr) .unwrap(); // Make data read only, 512 bytes in len, and set the actual value to be written. vq.dtable[1].flags.set(VIRTQ_DESC_F_NEXT); vq.dtable[1].len.set(511); mem.write_slice(&rand_data[..511], data_addr).unwrap(); simulate_queue_and_async_completion_events(&mut block, true); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 0); // Check that the data wasn't written to the file let mut buf = [0u8; 512]; block .disk .file_engine .file() .seek(SeekFrom::Start(0)) .unwrap(); block.disk.file_engine.file().read_exact(&mut buf).unwrap(); assert_eq!(buf, empty_data.as_slice()); } // Write from valid address, with an overflowing length. { let mut block = default_block(engine); // Default mem size is 0x10000 let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); vq.dtable[1].set(0xff00, 0x1000, VIRTQ_DESC_F_NEXT, 2); mem.write_obj::(VIRTIO_BLK_T_OUT, request_type_addr) .unwrap(); // Mark the next available descriptor. vq.avail.idx.set(1); vq.used.idx.set(0); check_metric_after_block!( &block.metrics.invalid_reqs_count, 1, simulate_queue_and_async_completion_events(&mut block, true) ); let used_idx = vq.used.idx.get(); assert_eq!(used_idx, 1); let status_addr = GuestAddress(vq.dtable[2].addr.get()); assert_eq!( u32::from(mem.read_obj::(status_addr).unwrap()), VIRTIO_BLK_S_IOERR ); } // Write. { vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); mem.write_obj::(VIRTIO_BLK_T_OUT, request_type_addr) .unwrap(); // Make data read only, 512 bytes in len, and set the actual value to be written. vq.dtable[1].flags.set(VIRTQ_DESC_F_NEXT); vq.dtable[1].len.set(512); mem.write_slice(&rand_data[..512], data_addr).unwrap(); check_metric_after_block!( &block.metrics.write_count, 1, simulate_queue_and_async_completion_events(&mut block, true) ); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 1); assert_eq!(mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_OK); } // Read with invalid data len (not a multiple of 512). { vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); mem.write_obj::(VIRTIO_BLK_T_IN, request_type_addr) .unwrap(); vq.dtable[1] .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); vq.dtable[1].len.set(511); mem.write_slice(empty_data.as_slice(), data_addr).unwrap(); simulate_queue_and_async_completion_events(&mut block, true); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); // The descriptor should have been discarded. assert_eq!(vq.used.ring[0].get().len, 0); // Check that no data was read. let mut buf = [0u8; 512]; mem.read_slice(&mut buf, data_addr).unwrap(); assert_eq!(buf, empty_data.as_slice()); } // Read. { vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); mem.write_obj::(VIRTIO_BLK_T_IN, request_type_addr) .unwrap(); vq.dtable[1] .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); vq.dtable[1].len.set(512); mem.write_slice(empty_data.as_slice(), data_addr).unwrap(); check_metric_after_block!( &block.metrics.read_count, 1, simulate_queue_and_async_completion_events(&mut block, true) ); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); // Added status byte length. assert_eq!(vq.used.ring[0].get().len, vq.dtable[1].len.get() + 1); assert_eq!(mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_OK); // Check that the data is the same that we wrote before let mut buf = [0u8; 512]; mem.read_slice(&mut buf, data_addr).unwrap(); assert_eq!(buf, &rand_data[..512]); } // Read with error. { vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); mem.write_obj::(VIRTIO_BLK_T_IN, request_type_addr) .unwrap(); vq.dtable[1] .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); mem.write_slice(empty_data.as_slice(), data_addr).unwrap(); let size = block .disk .file_engine .file() .seek(SeekFrom::End(0)) .unwrap(); block.disk.file_engine.file().set_len(size / 2).unwrap(); mem.write_obj(10, GuestAddress(request_type_addr.0 + 8)) .unwrap(); simulate_queue_and_async_completion_events(&mut block, true); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); // The descriptor should have been discarded. assert_eq!(vq.used.ring[0].get().len, 0); // Check that no data was read. let mut buf = [0u8; 512]; mem.read_slice(&mut buf, data_addr).unwrap(); assert_eq!(buf, empty_data.as_slice()); } // Partial buffer error on read. { vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); mem.write_obj::(VIRTIO_BLK_T_IN, request_type_addr) .unwrap(); vq.dtable[1] .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); let size = block .disk .file_engine .file() .seek(SeekFrom::End(0)) .unwrap(); block.disk.file_engine.file().set_len(size / 2).unwrap(); // Update sector number: stored at `request_type_addr.0 + 8` mem.write_obj(5, GuestAddress(request_type_addr.0 + 8)) .unwrap(); // This will attempt to read past end of file. simulate_queue_and_async_completion_events(&mut block, true); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); // No data since can't read past end of file, only status byte length. assert_eq!(vq.used.ring[0].get().len, 1); assert_eq!( mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_IOERR ); // Check that no data was read since we can't read past the end of the file. let mut buf = [0u8; 512]; mem.read_slice(&mut buf, data_addr).unwrap(); assert_eq!(buf, empty_data.as_slice()); } { // Note: this test case only works because when we truncated the file above (with // set_len), we did not update the sector count stored in the block device // itself (is still 8, even though the file length is 1024 now, e.g. has 2 sectors). // Normally, requests that reach past the final sector are rejected by // Request::parse. vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); mem.write_obj::(VIRTIO_BLK_T_IN, request_type_addr) .unwrap(); vq.dtable[1] .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); vq.dtable[1].len.set(1024); mem.write_obj(1, GuestAddress(request_type_addr.0 + 8)) .unwrap(); block .disk .file_engine .file() .seek(SeekFrom::Start(512)) .unwrap(); block .disk .file_engine .file() .write_all(&rand_data[512..]) .unwrap(); simulate_queue_and_async_completion_events(&mut block, true); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!( mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_IOERR ); // Check that we correctly read the second file sector. let mut buf = [0u8; 512]; mem.read_slice(&mut buf, data_addr).unwrap(); assert_eq!(buf, rand_data[512..]); } // Read at valid address, with an overflowing length. { // Default mem size is 0x10000 let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); vq.dtable[1].set(0xff00, 0x1000, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, 2); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); // Mark the next available descriptor. vq.avail.idx.set(1); vq.used.idx.set(0); mem.write_obj::(VIRTIO_BLK_T_IN, request_type_addr) .unwrap(); vq.dtable[1] .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); check_metric_after_block!( &block.metrics.invalid_reqs_count, 1, simulate_queue_and_async_completion_events(&mut block, true) ); let used_idx = vq.used.idx.get(); assert_eq!(used_idx, 1); let status_addr = GuestAddress(vq.dtable[2].addr.get()); assert_eq!( u32::from(mem.read_obj::(status_addr).unwrap()), VIRTIO_BLK_S_IOERR ); } } } #[test] fn test_flush() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); let status_addr = GuestAddress(vq.dtable[2].addr.get()); // Flush completes successfully without a data descriptor. { vq.dtable[0].next.set(2); mem.write_obj::(VIRTIO_BLK_T_FLUSH, request_type_addr) .unwrap(); simulate_queue_and_async_completion_events(&mut block, true); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 1); assert_eq!(mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_OK); } // Flush completes successfully even with a data descriptor. { vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); vq.dtable[0].next.set(1); mem.write_obj::(VIRTIO_BLK_T_FLUSH, request_type_addr) .unwrap(); simulate_queue_and_async_completion_events(&mut block, true); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); // status byte length. assert_eq!(vq.used.ring[0].get().len, 1); assert_eq!(mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_OK); } } } #[test] fn test_get_device_id() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); let data_addr = GuestAddress(vq.dtable[1].addr.get()); let status_addr = GuestAddress(vq.dtable[2].addr.get()); let blk_metadata = block.disk.file_engine.file().metadata(); // Test that the driver receives the correct device id. { vq.dtable[1].len.set(VIRTIO_BLK_ID_BYTES); mem.write_obj::(VIRTIO_BLK_T_GET_ID, request_type_addr) .unwrap(); simulate_queue_event(&mut block, Some(true)); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 21); assert_eq!(mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_OK); let blk_meta = blk_metadata.unwrap(); let expected_device_id = format!( "{}{}{}", blk_meta.st_dev(), blk_meta.st_rdev(), blk_meta.st_ino() ); let mut buf = [0; VIRTIO_BLK_ID_BYTES as usize]; mem.read_slice(&mut buf, data_addr).unwrap(); let chars_to_trim: &[char] = &['\u{0}']; let received_device_id = String::from_utf8(buf.to_ascii_lowercase()) .unwrap() .trim_matches(chars_to_trim) .to_string(); assert_eq!(received_device_id, expected_device_id); } // Test that a device ID request will be discarded, if it fails to provide enough buffer // space. { vq.used.idx.set(0); set_queue(&mut block, 0, vq.create_queue()); vq.dtable[1].len.set(VIRTIO_BLK_ID_BYTES - 1); mem.write_obj::(VIRTIO_BLK_T_GET_ID, request_type_addr) .unwrap(); simulate_queue_event(&mut block, Some(true)); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 0); } } } fn add_flush_requests_batch(block: &mut VirtioBlock, vq: &VirtQueue, count: u16) { let mem = vq.memory(); vq.avail.idx.set(0); vq.used.idx.set(0); set_queue(block, 0, vq.create_queue()); let hdr_addr = vq .end() .checked_align_up(std::mem::align_of::() as u64) .unwrap(); // Write request header. All requests will use the same header. mem.write_obj(RequestHeader::new(VIRTIO_BLK_T_FLUSH, 0), hdr_addr) .unwrap(); let mut status_addr = hdr_addr .checked_add(std::mem::size_of::() as u64) .unwrap() .checked_align_up(4) .unwrap(); for i in 0..count { let idx = i * 2; let hdr_desc = &vq.dtable[idx as usize]; hdr_desc.addr.set(hdr_addr.0); hdr_desc.flags.set(VIRTQ_DESC_F_NEXT); hdr_desc.next.set(idx + 1); let status_desc = &vq.dtable[idx as usize + 1]; status_desc.addr.set(status_addr.0); status_desc.flags.set(VIRTQ_DESC_F_WRITE); status_desc.len.set(4); status_addr = status_addr.checked_add(4).unwrap(); vq.avail.ring[i as usize].set(idx); vq.avail.idx.set(i + 1); } } fn check_flush_requests_batch(count: u16, vq: &VirtQueue) { let used_idx = vq.used.idx.get(); assert_eq!(used_idx, count); for i in 0..count { let used = vq.used.ring[i as usize].get(); let status_addr = vq.dtable[used.id as usize + 1].addr.get(); assert_eq!(used.len, 1); assert_eq!( u32::from( vq.memory() .read_obj::(GuestAddress(status_addr)) .unwrap(), ), VIRTIO_BLK_S_OK ); } } #[test] fn test_io_engine_throttling() { // FullSQueue BlockError { let mut block = default_block(FileEngineType::Async); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, IO_URING_NUM_ENTRIES * 4); block.queues[0] = vq.create_queue(); block.activate(mem.clone(), interrupt).unwrap(); // Run scenario that doesn't trigger FullSq BlockError: Add sq_size flush requests. add_flush_requests_batch(&mut block, &vq, IO_URING_NUM_ENTRIES); simulate_queue_event(&mut block, Some(false)); assert!(!block.is_io_engine_throttled); simulate_async_completion_event(&mut block, true); check_flush_requests_batch(IO_URING_NUM_ENTRIES, &vq); // Run scenario that triggers FullSqError : Add sq_size + 10 flush requests. add_flush_requests_batch(&mut block, &vq, IO_URING_NUM_ENTRIES + 10); simulate_queue_event(&mut block, Some(false)); assert!(block.is_io_engine_throttled); // When the async_completion_event is triggered: // 1. sq_size requests should be processed processed. // 2. is_io_engine_throttled should be set back to false. // 3. process_queue() should be called again. simulate_async_completion_event(&mut block, true); assert!(!block.is_io_engine_throttled); check_flush_requests_batch(IO_URING_NUM_ENTRIES, &vq); // check that process_queue() was called again resulting in the processing of the // remaining 10 ops. simulate_async_completion_event(&mut block, true); assert!(!block.is_io_engine_throttled); check_flush_requests_batch(IO_URING_NUM_ENTRIES + 10, &vq); } // FullCQueue BlockError { let mut block = default_block(FileEngineType::Async); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, IO_URING_NUM_ENTRIES * 4); block.queues[0] = vq.create_queue(); block.activate(mem.clone(), interrupt).unwrap(); // Run scenario that triggers FullCqError. Push 2 * IO_URING_NUM_ENTRIES and wait for // completion. Then try to push another entry. add_flush_requests_batch(&mut block, &vq, IO_URING_NUM_ENTRIES); simulate_queue_event(&mut block, Some(false)); assert!(!block.is_io_engine_throttled); thread::sleep(Duration::from_millis(150)); add_flush_requests_batch(&mut block, &vq, IO_URING_NUM_ENTRIES); simulate_queue_event(&mut block, Some(false)); assert!(!block.is_io_engine_throttled); thread::sleep(Duration::from_millis(150)); add_flush_requests_batch(&mut block, &vq, 1); simulate_queue_event(&mut block, Some(false)); assert!(block.is_io_engine_throttled); simulate_async_completion_event(&mut block, true); assert!(!block.is_io_engine_throttled); check_flush_requests_batch(IO_URING_NUM_ENTRIES * 2, &vq); } } #[test] fn test_prepare_save() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); block.queues[0] = vq.create_queue(); block.activate(mem.clone(), interrupt).unwrap(); // Add a batch of flush requests. add_flush_requests_batch(&mut block, &vq, 5); simulate_queue_event(&mut block, None); block.prepare_save(); // Check that all the pending flush requests were processed during `prepare_save()`. check_flush_requests_batch(5, &vq); } } #[test] fn test_bandwidth_rate_limiter() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); let data_addr = GuestAddress(vq.dtable[1].addr.get()); let status_addr = GuestAddress(vq.dtable[2].addr.get()); // Create bandwidth rate limiter that allows only 5120 bytes/s with bucket size of 8 // bytes. let mut rl = RateLimiter::new(512, 0, 100, 0, 0, 0).unwrap(); // Use up the budget. assert!(rl.consume(512, TokenType::Bytes)); set_rate_limiter(&mut block, rl); mem.write_obj::(VIRTIO_BLK_T_OUT, request_type_addr) .unwrap(); // Make data read only, 512 bytes in len, and set the actual value to be written vq.dtable[1].flags.set(VIRTQ_DESC_F_NEXT); vq.dtable[1].len.set(512); mem.write_obj::(123_456_789, data_addr).unwrap(); // Following write procedure should fail because of bandwidth rate limiting. { // Trigger the attempt to write. check_metric_after_block!( &block.metrics.rate_limiter_throttled_events, 1, simulate_queue_event(&mut block, Some(false)) ); // Assert that limiter is blocked. assert!(block.rate_limiter.is_blocked()); // Make sure the data is still queued for processing. assert_eq!(vq.used.idx.get(), 0); } // Wait for 100ms to give the rate-limiter timer a chance to replenish. // Wait for an extra 50ms to make sure the timerfd event makes its way from the kernel. thread::sleep(Duration::from_millis(150)); // Following write procedure should succeed because bandwidth should now be available. { check_metric_after_block!( &block.metrics.rate_limiter_throttled_events, 0, block.process_rate_limiter_event() ); // Validate the rate_limiter is no longer blocked. assert!(!block.rate_limiter.is_blocked()); // Complete async IO ops if needed simulate_async_completion_event(&mut block, true); // Make sure the data queue advanced. assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 1); assert_eq!(mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_OK); } } } #[test] fn test_ops_rate_limiter() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem.clone(), interrupt).unwrap(); read_blk_req_descriptors(&vq); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); let data_addr = GuestAddress(vq.dtable[1].addr.get()); let status_addr = GuestAddress(vq.dtable[2].addr.get()); // Create ops rate limiter that allows only 10 ops/s with bucket size of 1 ops. let mut rl = RateLimiter::new(0, 0, 0, 1, 0, 100).unwrap(); // Use up the budget. assert!(rl.consume(1, TokenType::Ops)); set_rate_limiter(&mut block, rl); mem.write_obj::(VIRTIO_BLK_T_OUT, request_type_addr) .unwrap(); // Make data read only, 512 bytes in len, and set the actual value to be written. vq.dtable[1].flags.set(VIRTQ_DESC_F_NEXT); vq.dtable[1].len.set(512); mem.write_obj::(123_456_789, data_addr).unwrap(); // Following write procedure should fail because of ops rate limiting. { // Trigger the attempt to write. check_metric_after_block!( &block.metrics.rate_limiter_throttled_events, 1, simulate_queue_event(&mut block, Some(false)) ); // Assert that limiter is blocked. assert!(block.rate_limiter.is_blocked()); // Make sure the data is still queued for processing. assert_eq!(vq.used.idx.get(), 0); } // Do a second write that still fails but this time on the fast path. { // Trigger the attempt to write. check_metric_after_block!( &block.metrics.rate_limiter_throttled_events, 1, simulate_queue_event(&mut block, Some(false)) ); // Assert that limiter is blocked. assert!(block.rate_limiter.is_blocked()); // Make sure the data is still queued for processing. assert_eq!(vq.used.idx.get(), 0); } // Wait for 100ms to give the rate-limiter timer a chance to replenish. // Wait for an extra 50ms to make sure the timerfd event makes its way from the kernel. thread::sleep(Duration::from_millis(150)); // Following write procedure should succeed because ops budget should now be available. { check_metric_after_block!( &block.metrics.rate_limiter_throttled_events, 0, block.process_rate_limiter_event() ); // Validate the rate_limiter is no longer blocked. assert!(!block.rate_limiter.is_blocked()); // Complete async IO ops if needed simulate_async_completion_event(&mut block, true); // Make sure the data queue advanced. assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 1); assert_eq!(mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_OK); } } } #[test] fn test_update_disk_image() { for engine in [FileEngineType::Sync, FileEngineType::Async] { let mut block = default_block(engine); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); block.activate(mem, interrupt).unwrap(); let f = TempFile::new().unwrap(); let path = f.as_path(); let mdata = metadata(path).unwrap(); let mut id = vec![0; VIRTIO_BLK_ID_BYTES as usize]; let str_id = format!("{}{}{}", mdata.st_dev(), mdata.st_rdev(), mdata.st_ino()); let part_id = str_id.as_bytes(); id[..cmp::min(part_id.len(), VIRTIO_BLK_ID_BYTES as usize)].clone_from_slice( &part_id[..cmp::min(part_id.len(), VIRTIO_BLK_ID_BYTES as usize)], ); block .update_disk_image(String::from(path.to_str().unwrap())) .unwrap(); assert_eq!( block.disk.file_engine.file().metadata().unwrap().st_ino(), mdata.st_ino() ); assert_eq!(block.disk.image_id, id.as_slice()); } } } ================================================ FILE: src/vmm/src/devices/virtio/block/virtio/event_handler.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use event_manager::{EventOps, Events, MutEventSubscriber}; use vmm_sys_util::epoll::EventSet; use super::io::FileEngine; use crate::devices::virtio::block::virtio::device::VirtioBlock; use crate::devices::virtio::device::VirtioDevice; use crate::logger::{error, warn}; impl VirtioBlock { const PROCESS_ACTIVATE: u32 = 0; const PROCESS_QUEUE: u32 = 1; const PROCESS_RATE_LIMITER: u32 = 2; const PROCESS_ASYNC_COMPLETION: u32 = 3; fn register_runtime_events(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.queue_evts[0], Self::PROCESS_QUEUE, EventSet::IN, )) { error!("Failed to register queue event: {}", err); } if let Err(err) = ops.add(Events::with_data( &self.rate_limiter, Self::PROCESS_RATE_LIMITER, EventSet::IN, )) { error!("Failed to register ratelimiter event: {}", err); } if let FileEngine::Async(ref engine) = self.disk.file_engine && let Err(err) = ops.add(Events::with_data( engine.completion_evt(), Self::PROCESS_ASYNC_COMPLETION, EventSet::IN, )) { error!("Failed to register IO engine completion event: {}", err); } } fn register_activate_event(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.activate_evt, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("Failed to register activate event: {}", err); } } fn process_activate_event(&self, ops: &mut EventOps) { if let Err(err) = self.activate_evt.read() { error!("Failed to consume block activate event: {:?}", err); } self.register_runtime_events(ops); if let Err(err) = ops.remove(Events::with_data( &self.activate_evt, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("Failed to un-register activate event: {}", err); } } } impl MutEventSubscriber for VirtioBlock { // Handle an event for queue or rate limiter. fn process(&mut self, event: Events, ops: &mut EventOps) { let source = event.data(); let event_set = event.event_set(); // TODO: also check for errors. Pending high level discussions on how we want // to handle errors in devices. let supported_events = EventSet::IN; if !supported_events.contains(event_set) { warn!( "Block: Received unknown event: {:?} from source: {:?}", event_set, source ); return; } if self.is_activated() { match source { Self::PROCESS_ACTIVATE => self.process_activate_event(ops), Self::PROCESS_QUEUE => self.process_queue_event(), Self::PROCESS_RATE_LIMITER => self.process_rate_limiter_event(), Self::PROCESS_ASYNC_COMPLETION => self.process_async_completion_event(), _ => warn!("Block: Spurious event received: {:?}", source), } } else { warn!( "Block: The device is not yet activated. Spurious event received: {:?}", source ); } } fn init(&mut self, ops: &mut EventOps) { // This function can be called during different points in the device lifetime: // - shortly after device creation, // - on device activation (is-activated already true at this point), // - on device restore from snapshot. if self.is_activated() { self.register_runtime_events(ops); } else { self.register_activate_event(ops); } } } #[cfg(test)] mod tests { use std::sync::{Arc, Mutex}; use event_manager::{EventManager, SubscriberOps}; use super::*; use crate::devices::virtio::block::virtio::device::FileEngineType; use crate::devices::virtio::block::virtio::test_utils::{ default_block, read_blk_req_descriptors, set_queue, simulate_async_completion_event, }; use crate::devices::virtio::block::virtio::{VIRTIO_BLK_S_OK, VIRTIO_BLK_T_OUT}; use crate::devices::virtio::queue::VIRTQ_DESC_F_NEXT; use crate::devices::virtio::test_utils::{VirtQueue, default_interrupt, default_mem}; use crate::vstate::memory::{Bytes, GuestAddress}; #[test] fn test_event_handler() { let mut event_manager = EventManager::new().unwrap(); let mut block = default_block(FileEngineType::default()); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); set_queue(&mut block, 0, vq.create_queue()); read_blk_req_descriptors(&vq); let block = Arc::new(Mutex::new(block)); let _id = event_manager.add_subscriber(block.clone()); let request_type_addr = GuestAddress(vq.dtable[0].addr.get()); let data_addr = GuestAddress(vq.dtable[1].addr.get()); let status_addr = GuestAddress(vq.dtable[2].addr.get()); // Push a 'Write' operation. { mem.write_obj::(VIRTIO_BLK_T_OUT, request_type_addr) .unwrap(); // Make data read only, 512 bytes in len, and set the actual value to be written. vq.dtable[1].flags.set(VIRTQ_DESC_F_NEXT); vq.dtable[1].len.set(512); mem.write_obj::(123_456_789, data_addr).unwrap(); // Trigger the queue event. block.lock().unwrap().queue_evts[0].write(1).unwrap(); } // EventManager should report no events since block has only registered // its activation event so far (even though queue event is pending). let ev_count = event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 0); // Now activate the device. block .lock() .unwrap() .activate(mem.clone(), interrupt) .unwrap(); // Process the activate event. let ev_count = event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 1); // Handle the pending queue event through EventManager. event_manager .run_with_timeout(100) .expect("Metrics event timeout or error."); // Complete async IO ops if needed simulate_async_completion_event(&mut block.lock().unwrap(), true); assert_eq!(vq.used.idx.get(), 1); assert_eq!(vq.used.ring[0].get().id, 0); assert_eq!(vq.used.ring[0].get().len, 1); assert_eq!(mem.read_obj::(status_addr).unwrap(), VIRTIO_BLK_S_OK); } } ================================================ FILE: src/vmm/src/devices/virtio/block/virtio/io/async_io.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; use std::fs::File; use std::os::fd::RawFd; use std::os::unix::io::AsRawFd; use vm_memory::GuestMemoryError; use vmm_sys_util::eventfd::EventFd; use crate::devices::virtio::block::virtio::io::RequestError; use crate::devices::virtio::block::virtio::{IO_URING_NUM_ENTRIES, PendingRequest}; use crate::io_uring::operation::{Cqe, OpCode, Operation}; use crate::io_uring::restriction::Restriction; use crate::io_uring::{IoUring, IoUringError}; use crate::logger::log_dev_preview_warning; use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryExtension, GuestMemoryMmap}; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum AsyncIoError { /// IO: {0} IO(std::io::Error), /// IoUring: {0} IoUring(IoUringError), /// Submit: {0} Submit(std::io::Error), /// SyncAll: {0} SyncAll(std::io::Error), /// EventFd: {0} EventFd(std::io::Error), /// GuestMemory: {0} GuestMemory(GuestMemoryError), } #[derive(Debug)] pub struct AsyncFileEngine { file: File, ring: IoUring, completion_evt: EventFd, } #[derive(Debug)] pub struct WrappedRequest { addr: Option, req: PendingRequest, } impl WrappedRequest { fn new(req: PendingRequest) -> Self { WrappedRequest { addr: None, req } } fn new_with_dirty_tracking(addr: GuestAddress, req: PendingRequest) -> Self { WrappedRequest { addr: Some(addr), req, } } fn mark_dirty_mem_and_unwrap(self, mem: &GuestMemoryMmap, count: u32) -> PendingRequest { if let Some(addr) = self.addr { mem.mark_dirty(addr, count as usize) } self.req } } impl AsyncFileEngine { fn new_ring( file: &File, completion_fd: RawFd, ) -> Result, IoUringError> { IoUring::new( u32::from(IO_URING_NUM_ENTRIES), vec![file], vec![ // Make sure we only allow operations on pre-registered fds. Restriction::RequireFixedFds, // Allowlist of opcodes. Restriction::AllowOpCode(OpCode::Read), Restriction::AllowOpCode(OpCode::Write), Restriction::AllowOpCode(OpCode::Fsync), ], Some(completion_fd), ) } pub fn from_file(file: File) -> Result { log_dev_preview_warning("Async file IO", Option::None); let completion_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(AsyncIoError::EventFd)?; let ring = Self::new_ring(&file, completion_evt.as_raw_fd()).map_err(AsyncIoError::IoUring)?; Ok(AsyncFileEngine { file, ring, completion_evt, }) } pub fn update_file(&mut self, file: File) -> Result<(), AsyncIoError> { let ring = Self::new_ring(&file, self.completion_evt.as_raw_fd()) .map_err(AsyncIoError::IoUring)?; self.file = file; self.ring = ring; Ok(()) } #[cfg(test)] pub fn file(&self) -> &File { &self.file } pub fn completion_evt(&self) -> &EventFd { &self.completion_evt } pub fn push_read( &mut self, offset: u64, mem: &GuestMemoryMmap, addr: GuestAddress, count: u32, req: PendingRequest, ) -> Result<(), RequestError> { let buf = match mem.get_slice(addr, count as usize) { Ok(slice) => slice.ptr_guard_mut().as_ptr(), Err(err) => { return Err(RequestError { req, error: AsyncIoError::GuestMemory(err), }); } }; let wrapped_user_data = WrappedRequest::new_with_dirty_tracking(addr, req); self.ring .push(Operation::read( 0, buf as usize, count, offset, wrapped_user_data, )) .map_err(|(io_uring_error, data)| RequestError { req: data.req, error: AsyncIoError::IoUring(io_uring_error), }) } pub fn push_write( &mut self, offset: u64, mem: &GuestMemoryMmap, addr: GuestAddress, count: u32, req: PendingRequest, ) -> Result<(), RequestError> { let buf = match mem.get_slice(addr, count as usize) { Ok(slice) => slice.ptr_guard_mut().as_ptr(), Err(err) => { return Err(RequestError { req, error: AsyncIoError::GuestMemory(err), }); } }; let wrapped_user_data = WrappedRequest::new(req); self.ring .push(Operation::write( 0, buf as usize, count, offset, wrapped_user_data, )) .map_err(|(io_uring_error, data)| RequestError { req: data.req, error: AsyncIoError::IoUring(io_uring_error), }) } pub fn push_flush(&mut self, req: PendingRequest) -> Result<(), RequestError> { let wrapped_user_data = WrappedRequest::new(req); self.ring .push(Operation::fsync(0, wrapped_user_data)) .map_err(|(io_uring_error, data)| RequestError { req: data.req, error: AsyncIoError::IoUring(io_uring_error), }) } pub fn kick_submission_queue(&mut self) -> Result<(), AsyncIoError> { self.ring .submit() .map(|_| ()) .map_err(AsyncIoError::IoUring) } pub fn drain(&mut self, discard_cqes: bool) -> Result<(), AsyncIoError> { self.ring .submit_and_wait_all() .map(|_| ()) .map_err(AsyncIoError::IoUring)?; if discard_cqes { // Drain the completion queue so that we may deallocate the user_data fields. while self.do_pop()?.is_some() {} } Ok(()) } pub fn drain_and_flush(&mut self, discard_cqes: bool) -> Result<(), AsyncIoError> { self.drain(discard_cqes)?; // Sync data out to physical media on host. // We don't need to call flush first since all the ops are performed through io_uring // and Rust shouldn't manage any data in its internal buffers. self.file.sync_all().map_err(AsyncIoError::SyncAll)?; Ok(()) } fn do_pop(&mut self) -> Result>, AsyncIoError> { self.ring.pop().map_err(AsyncIoError::IoUring) } pub fn pop( &mut self, mem: &GuestMemoryMmap, ) -> Result>, AsyncIoError> { let cqe = self.do_pop()?.map(|cqe| { let count = cqe.count(); cqe.map_user_data(|wrapped_user_data| { wrapped_user_data.mark_dirty_mem_and_unwrap(mem, count) }) }); Ok(cqe) } } ================================================ FILE: src/vmm/src/devices/virtio/block/virtio/io/mod.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod async_io; pub mod sync_io; use std::fmt::Debug; use std::fs::File; pub use self::async_io::{AsyncFileEngine, AsyncIoError}; pub use self::sync_io::{SyncFileEngine, SyncIoError}; use crate::devices::virtio::block::virtio::PendingRequest; use crate::devices::virtio::block::virtio::device::FileEngineType; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; #[derive(Debug)] pub struct RequestOk { pub req: PendingRequest, pub count: u32, } #[derive(Debug)] pub enum FileEngineOk { Submitted, Executed(RequestOk), } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum BlockIoError { /// Sync error: {0} Sync(SyncIoError), /// Async error: {0} Async(AsyncIoError), } impl BlockIoError { pub fn is_throttling_err(&self) -> bool { match self { BlockIoError::Async(AsyncIoError::IoUring(err)) => err.is_throttling_err(), _ => false, } } } #[derive(Debug)] pub struct RequestError { pub req: PendingRequest, pub error: E, } #[allow(clippy::large_enum_variant)] #[derive(Debug)] pub enum FileEngine { #[allow(unused)] Async(AsyncFileEngine), Sync(SyncFileEngine), } impl FileEngine { pub fn from_file(file: File, engine_type: FileEngineType) -> Result { match engine_type { FileEngineType::Async => Ok(FileEngine::Async( AsyncFileEngine::from_file(file).map_err(BlockIoError::Async)?, )), FileEngineType::Sync => Ok(FileEngine::Sync(SyncFileEngine::from_file(file))), } } pub fn update_file_path(&mut self, file: File) -> Result<(), BlockIoError> { match self { FileEngine::Async(engine) => engine.update_file(file).map_err(BlockIoError::Async)?, FileEngine::Sync(engine) => engine.update_file(file), }; Ok(()) } #[cfg(test)] pub fn file(&self) -> &File { match self { FileEngine::Async(engine) => engine.file(), FileEngine::Sync(engine) => engine.file(), } } pub fn read( &mut self, offset: u64, mem: &GuestMemoryMmap, addr: GuestAddress, count: u32, req: PendingRequest, ) -> Result> { match self { FileEngine::Async(engine) => match engine.push_read(offset, mem, addr, count, req) { Ok(_) => Ok(FileEngineOk::Submitted), Err(err) => Err(RequestError { req: err.req, error: BlockIoError::Async(err.error), }), }, FileEngine::Sync(engine) => match engine.read(offset, mem, addr, count) { Ok(count) => Ok(FileEngineOk::Executed(RequestOk { req, count })), Err(err) => Err(RequestError { req, error: BlockIoError::Sync(err), }), }, } } pub fn write( &mut self, offset: u64, mem: &GuestMemoryMmap, addr: GuestAddress, count: u32, req: PendingRequest, ) -> Result> { match self { FileEngine::Async(engine) => match engine.push_write(offset, mem, addr, count, req) { Ok(_) => Ok(FileEngineOk::Submitted), Err(err) => Err(RequestError { req: err.req, error: BlockIoError::Async(err.error), }), }, FileEngine::Sync(engine) => match engine.write(offset, mem, addr, count) { Ok(count) => Ok(FileEngineOk::Executed(RequestOk { req, count })), Err(err) => Err(RequestError { req, error: BlockIoError::Sync(err), }), }, } } pub fn flush( &mut self, req: PendingRequest, ) -> Result> { match self { FileEngine::Async(engine) => match engine.push_flush(req) { Ok(_) => Ok(FileEngineOk::Submitted), Err(err) => Err(RequestError { req: err.req, error: BlockIoError::Async(err.error), }), }, FileEngine::Sync(engine) => match engine.flush() { Ok(_) => Ok(FileEngineOk::Executed(RequestOk { req, count: 0 })), Err(err) => Err(RequestError { req, error: BlockIoError::Sync(err), }), }, } } pub fn drain(&mut self, discard: bool) -> Result<(), BlockIoError> { match self { FileEngine::Async(engine) => engine.drain(discard).map_err(BlockIoError::Async), FileEngine::Sync(_engine) => Ok(()), } } pub fn drain_and_flush(&mut self, discard: bool) -> Result<(), BlockIoError> { match self { FileEngine::Async(engine) => { engine.drain_and_flush(discard).map_err(BlockIoError::Async) } FileEngine::Sync(engine) => engine.flush().map_err(BlockIoError::Sync), } } } #[cfg(test)] pub mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::ffi::OsStrExt; use vm_memory::GuestMemoryRegion; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::devices::virtio::block::virtio::device::FileEngineType; use crate::utils::u64_to_usize; use crate::vmm_config::machine_config::HugePageConfig; use crate::vstate::memory; use crate::vstate::memory::{Bitmap, Bytes, GuestMemory, GuestRegionMmapExt}; const FILE_LEN: u32 = 1024; // 2 pages of memory should be enough to test read/write ops and also dirty tracking. const MEM_LEN: usize = 8192; macro_rules! assert_sync_execution { ($expression:expr, $count:expr) => { match $expression { Ok(FileEngineOk::Executed(RequestOk { req: _, count })) => { assert_eq!(count, $count) } other => panic!( "Expected: Ok(FileEngineOk::Executed(UserDataOk {{ user_data: _, count: {} \ }})), got: {:?}", $count, other ), } }; } macro_rules! assert_queued { ($expression:expr) => { assert!(matches!($expression, Ok(FileEngineOk::Submitted))) }; } fn assert_async_execution(mem: &GuestMemoryMmap, engine: &mut FileEngine, count: u32) { if let FileEngine::Async(engine) = engine { engine.drain(false).unwrap(); assert_eq!(engine.pop(mem).unwrap().unwrap().result().unwrap(), count); } } fn create_mem() -> GuestMemoryMmap { GuestMemoryMmap::from_regions( memory::anonymous( [(GuestAddress(0), MEM_LEN)].into_iter(), true, HugePageConfig::None, ) .unwrap() .into_iter() .map(|region| GuestRegionMmapExt::dram_from_mmap_region(region, 0)) .collect(), ) .unwrap() } fn check_dirty_mem(mem: &GuestMemoryMmap, addr: GuestAddress, len: u32) { let bitmap = mem.find_region(addr).unwrap().bitmap(); for offset in addr.0..addr.0 + u64::from(len) { assert!(bitmap.dirty_at(u64_to_usize(offset))); } } fn check_clean_mem(mem: &GuestMemoryMmap, addr: GuestAddress, len: u32) { let bitmap = mem.find_region(addr).unwrap().bitmap(); for offset in addr.0..addr.0 + u64::from(len) { assert!(!bitmap.dirty_at(u64_to_usize(offset))); } } #[test] fn test_sync() { let mem = create_mem(); // Create backing file. let file = TempFile::new().unwrap().into_file(); let mut engine = FileEngine::from_file(file, FileEngineType::Sync).unwrap(); let data = vmm_sys_util::rand::rand_alphanumerics(FILE_LEN as usize) .as_bytes() .to_vec(); // Partial write let partial_len = 50; let addr = GuestAddress(MEM_LEN as u64 - u64::from(partial_len)); mem.write(&data, addr).unwrap(); assert_sync_execution!( engine.write(0, &mem, addr, partial_len, PendingRequest::default()), partial_len ); // Partial read let mem = create_mem(); assert_sync_execution!( engine.read(0, &mem, addr, partial_len, PendingRequest::default()), partial_len ); // Check data let mut buf = vec![0u8; partial_len as usize]; mem.read_slice(&mut buf, addr).unwrap(); assert_eq!(buf, data[..partial_len as usize]); // Offset write let offset = 100; let partial_len = 50; let addr = GuestAddress(0); mem.write(&data, addr).unwrap(); assert_sync_execution!( engine.write(offset, &mem, addr, partial_len, PendingRequest::default()), partial_len ); // Offset read let mem = create_mem(); assert_sync_execution!( engine.read(offset, &mem, addr, partial_len, PendingRequest::default()), partial_len ); // Check data let mut buf = vec![0u8; partial_len as usize]; mem.read_slice(&mut buf, addr).unwrap(); assert_eq!(buf, data[..partial_len as usize]); // Full write mem.write(&data, GuestAddress(0)).unwrap(); assert_sync_execution!( engine.write( 0, &mem, GuestAddress(0), FILE_LEN, PendingRequest::default() ), FILE_LEN ); // Full read let mem = create_mem(); assert_sync_execution!( engine.read( 0, &mem, GuestAddress(0), FILE_LEN, PendingRequest::default() ), FILE_LEN ); // Check data let mut buf = vec![0u8; FILE_LEN as usize]; mem.read_slice(&mut buf, GuestAddress(0)).unwrap(); assert_eq!(buf, data.as_slice()); // Check other ops engine.flush(PendingRequest::default()).unwrap(); engine.drain(true).unwrap(); engine.drain_and_flush(true).unwrap(); } #[test] fn test_async() { // Create backing file. let file = TempFile::new().unwrap().into_file(); let mut engine = FileEngine::from_file(file, FileEngineType::Async).unwrap(); let data = vmm_sys_util::rand::rand_alphanumerics(FILE_LEN as usize) .as_bytes() .to_vec(); // Partial reads and writes cannot really be tested because io_uring will return an error // code for trying to write to unmapped memory. // Offset write let mem = create_mem(); let offset = 100; let partial_len = 50; let addr = GuestAddress(0); mem.write(&data, addr).unwrap(); assert_queued!(engine.write(offset, &mem, addr, partial_len, PendingRequest::default())); assert_async_execution(&mem, &mut engine, partial_len); // Offset read let mem = create_mem(); assert_queued!(engine.read(offset, &mem, addr, partial_len, PendingRequest::default())); assert_async_execution(&mem, &mut engine, partial_len); // Check data let mut buf = vec![0u8; partial_len as usize]; mem.read_slice(&mut buf, addr).unwrap(); assert_eq!(buf, data[..partial_len as usize]); // check dirty mem check_dirty_mem(&mem, addr, partial_len); check_clean_mem(&mem, GuestAddress(4096), 4096); // Full write mem.write(&data, GuestAddress(0)).unwrap(); assert_queued!(engine.write(0, &mem, addr, FILE_LEN, PendingRequest::default())); assert_async_execution(&mem, &mut engine, FILE_LEN); // Full read let mem = create_mem(); assert_queued!(engine.read(0, &mem, addr, FILE_LEN, PendingRequest::default())); assert_async_execution(&mem, &mut engine, FILE_LEN); // Check data let mut buf = vec![0u8; FILE_LEN as usize]; mem.read_slice(&mut buf, GuestAddress(0)).unwrap(); assert_eq!(buf, data.as_slice()); // check dirty mem check_dirty_mem(&mem, addr, FILE_LEN); check_clean_mem(&mem, GuestAddress(4096), 4096); // Check other ops assert_queued!(engine.flush(PendingRequest::default())); assert_async_execution(&mem, &mut engine, 0); engine.drain(true).unwrap(); engine.drain_and_flush(true).unwrap(); } } ================================================ FILE: src/vmm/src/devices/virtio/block/virtio/io/sync_io.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fs::File; use std::io::{Seek, SeekFrom, Write}; use vm_memory::{GuestMemoryError, ReadVolatile, WriteVolatile}; use crate::vstate::memory::{GuestAddress, GuestMemory, GuestMemoryMmap}; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum SyncIoError { /// Flush: {0} Flush(std::io::Error), /// Seek: {0} Seek(std::io::Error), /// SyncAll: {0} SyncAll(std::io::Error), /// Transfer: {0} Transfer(GuestMemoryError), } #[derive(Debug)] pub struct SyncFileEngine { file: File, } // SAFETY: `File` is send and ultimately a POD. unsafe impl Send for SyncFileEngine {} impl SyncFileEngine { pub fn from_file(file: File) -> SyncFileEngine { SyncFileEngine { file } } #[cfg(test)] pub fn file(&self) -> &File { &self.file } /// Update the backing file of the engine pub fn update_file(&mut self, file: File) { self.file = file } pub fn read( &mut self, offset: u64, mem: &GuestMemoryMmap, addr: GuestAddress, count: u32, ) -> Result { self.file .seek(SeekFrom::Start(offset)) .map_err(SyncIoError::Seek)?; mem.get_slice(addr, count as usize) .and_then(|mut slice| Ok(self.file.read_exact_volatile(&mut slice)?)) .map_err(SyncIoError::Transfer)?; Ok(count) } pub fn write( &mut self, offset: u64, mem: &GuestMemoryMmap, addr: GuestAddress, count: u32, ) -> Result { self.file .seek(SeekFrom::Start(offset)) .map_err(SyncIoError::Seek)?; mem.get_slice(addr, count as usize) .and_then(|slice| Ok(self.file.write_all_volatile(&slice)?)) .map_err(SyncIoError::Transfer)?; Ok(count) } pub fn flush(&mut self) -> Result<(), SyncIoError> { // flush() first to force any cached data out of rust buffers. self.file.flush().map_err(SyncIoError::Flush)?; // Sync data out to physical media on host. self.file.sync_all().map_err(SyncIoError::SyncAll) } } ================================================ FILE: src/vmm/src/devices/virtio/block/virtio/metrics.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the metrics system for block devices. //! //! # Metrics format //! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write(). //! //! ## JSON example with metrics: //! ```json //! { //! "block_drv0": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "no_avail_buffer": "SharedIncMetric", //! "event_fails": "SharedIncMetric", //! "execute_fails": "SharedIncMetric", //! ... //! } //! "block_drv1": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "no_avail_buffer": "SharedIncMetric", //! "event_fails": "SharedIncMetric", //! "execute_fails": "SharedIncMetric", //! ... //! } //! ... //! "block_drive_id": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "no_avail_buffer": "SharedIncMetric", //! "event_fails": "SharedIncMetric", //! "execute_fails": "SharedIncMetric", //! ... //! } //! "block": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "no_avail_buffer": "SharedIncMetric", //! "event_fails": "SharedIncMetric", //! "execute_fails": "SharedIncMetric", //! ... //! } //! } //! ``` //! Each `block` field in the example above is a serializable `BlockDeviceMetrics` structure //! collecting metrics such as `activate_fails`, `cfg_fails`, etc. for the block device. //! `block_drv0` represent metrics for the endpoint "/drives/drv0", //! `block_drv1` represent metrics for the endpoint "/drives/drv1", and //! `block_drive_id` represent metrics for the endpoint "/drives/{drive_id}" //! block device respectively and `block` is the aggregate of all the per device metrics. //! //! # Limitations //! block device currently do not have `vmm::logger::metrics::StoreMetrics` so aggregate //! doesn't consider them. //! //! # Design //! The main design goals of this system are: //! * To improve block device metrics by logging them at per device granularity. //! * Continue to provide aggregate block metrics to maintain backward compatibility. //! * Move BlockDeviceMetrics out of from logger and decouple it. //! * Rely on `serde` to provide the actual serialization for writing the metrics. //! * Since all metrics start at 0, we implement the `Default` trait via derive for all of them, to //! avoid having to initialize everything by hand. //! //! * Devices could be created in any order i.e. the first device created could either be drv0 or //! drv1 so if we use a vector for BlockDeviceMetrics and call 1st device as block0, then block0 //! could sometimes point to drv0 and sometimes to drv1 which doesn't help with analysing the //! metrics. So, use Map instead of Vec to help understand which drive the metrics actually //! belongs to. //! //! The system implements 1 type of metrics: //! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter //! (i.e the number of times an API request failed). These metrics are reset upon flush. //! //! We add BlockDeviceMetrics entries from block::metrics::METRICS into Block device instead of //! Block device having individual separate BlockDeviceMetrics entries because Block device is not //! accessible from signal handlers to flush metrics and block::metrics::METRICS is. use std::collections::BTreeMap; use std::sync::{Arc, RwLock}; use serde::ser::SerializeMap; use serde::{Serialize, Serializer}; use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric}; /// map of block drive id and metrics /// this should be protected by a lock before accessing. #[derive(Debug)] pub struct BlockMetricsPerDevice { /// used to access per block device metrics pub metrics: BTreeMap>, } impl BlockMetricsPerDevice { /// Allocate `BlockDeviceMetrics` for block device having /// id `drive_id`. Also, allocate only if it doesn't /// exist to avoid overwriting previously allocated data. /// lock is always initialized so it is safe the unwrap /// the lock without a check. pub fn alloc(drive_id: String) -> Arc { Arc::clone( METRICS .write() .unwrap() .metrics .entry(drive_id) .or_insert_with(|| Arc::new(BlockDeviceMetrics::default())), ) } } /// Pool of block-related metrics per device behind a lock to /// keep things thread safe. Since the lock is initialized here /// it is safe to unwrap it without any check. static METRICS: RwLock = RwLock::new(BlockMetricsPerDevice { metrics: BTreeMap::new(), }); /// This function facilitates aggregation and serialization of /// per block device metrics. pub fn flush_metrics(serializer: S) -> Result { let block_metrics = METRICS.read().unwrap(); let metrics_len = block_metrics.metrics.len(); // +1 to accommodate aggregate block metrics let mut seq = serializer.serialize_map(Some(1 + metrics_len))?; let mut block_aggregated: BlockDeviceMetrics = BlockDeviceMetrics::default(); for (name, metrics) in block_metrics.metrics.iter() { let devn = format!("block_{}", name); // serialization will flush the metrics so aggregate before it. let m: &BlockDeviceMetrics = metrics; block_aggregated.aggregate(m); seq.serialize_entry(&devn, m)?; } seq.serialize_entry("block", &block_aggregated)?; seq.end() } /// Block Device associated metrics. #[derive(Debug, Default, Serialize)] pub struct BlockDeviceMetrics { /// Number of times when activate failed on a block device. pub activate_fails: SharedIncMetric, /// Number of times when interacting with the space config of a block device failed. pub cfg_fails: SharedIncMetric, /// No available buffer for the block queue. pub no_avail_buffer: SharedIncMetric, /// Number of times when handling events on a block device failed. pub event_fails: SharedIncMetric, /// Number of failures in executing a request on a block device. pub execute_fails: SharedIncMetric, /// Number of invalid requests received for this block device. pub invalid_reqs_count: SharedIncMetric, /// Number of flushes operation triggered on this block device. pub flush_count: SharedIncMetric, /// Number of events triggered on the queue of this block device. pub queue_event_count: SharedIncMetric, /// Number of events ratelimiter-related. pub rate_limiter_event_count: SharedIncMetric, /// Number of update operation triggered on this block device. pub update_count: SharedIncMetric, /// Number of failures while doing update on this block device. pub update_fails: SharedIncMetric, /// Number of bytes read by this block device. pub read_bytes: SharedIncMetric, /// Number of bytes written by this block device. pub write_bytes: SharedIncMetric, /// Number of successful read operations. pub read_count: SharedIncMetric, /// Number of successful write operations. pub write_count: SharedIncMetric, /// Duration of all read operations. pub read_agg: LatencyAggregateMetrics, /// Duration of all write operations. pub write_agg: LatencyAggregateMetrics, /// Number of rate limiter throttling events. pub rate_limiter_throttled_events: SharedIncMetric, /// Number of virtio events throttled because of the IO engine. /// This happens when the io_uring submission queue is full. pub io_engine_throttled_events: SharedIncMetric, /// Number of remaining requests in the queue. pub remaining_reqs_count: SharedIncMetric, } impl BlockDeviceMetrics { /// Const default construction. pub fn new() -> Self { Self { read_agg: LatencyAggregateMetrics::new(), write_agg: LatencyAggregateMetrics::new(), ..Default::default() } } /// block metrics are SharedIncMetric where the diff of current vs /// old is serialized i.e. serialize_u64(current-old). /// So to have the aggregate serialized in same way we need to /// fetch the diff of current vs old metrics and add it to the /// aggregate. pub fn aggregate(&mut self, other: &Self) { self.activate_fails.add(other.activate_fails.fetch_diff()); self.cfg_fails.add(other.cfg_fails.fetch_diff()); self.no_avail_buffer.add(other.no_avail_buffer.fetch_diff()); self.event_fails.add(other.event_fails.fetch_diff()); self.execute_fails.add(other.execute_fails.fetch_diff()); self.invalid_reqs_count .add(other.invalid_reqs_count.fetch_diff()); self.flush_count.add(other.flush_count.fetch_diff()); self.queue_event_count .add(other.queue_event_count.fetch_diff()); self.rate_limiter_event_count .add(other.rate_limiter_event_count.fetch_diff()); self.update_count.add(other.update_count.fetch_diff()); self.update_fails.add(other.update_fails.fetch_diff()); self.read_bytes.add(other.read_bytes.fetch_diff()); self.write_bytes.add(other.write_bytes.fetch_diff()); self.read_count.add(other.read_count.fetch_diff()); self.write_count.add(other.write_count.fetch_diff()); self.read_agg.sum_us.add(other.read_agg.sum_us.fetch_diff()); self.write_agg .sum_us .add(other.write_agg.sum_us.fetch_diff()); self.rate_limiter_throttled_events .add(other.rate_limiter_throttled_events.fetch_diff()); self.io_engine_throttled_events .add(other.io_engine_throttled_events.fetch_diff()); self.remaining_reqs_count .add(other.remaining_reqs_count.fetch_diff()); } } #[cfg(test)] pub mod tests { use super::*; #[test] fn test_max_block_dev_metrics() { // Note: this test has nothing to do with // block structure or IRQs, this is just to allocate // metrics for max number of devices that system can have. // We have 5-23 IRQ for block devices on x86_64 so, there // are 19 block devices at max. And, even though we have more // devices on aarch64 but we stick to 19 to keep test common. const MAX_BLOCK_DEVICES: usize = 19; // This is to make sure that RwLock for block::metrics::METRICS is good. drop(METRICS.read().unwrap()); drop(METRICS.write().unwrap()); // block::metrics::METRICS is in short RwLock on Vec of BlockDeviceMetrics. // Normally, pointer to unique entries of block::metrics::METRICS are stored // in Block device so that Block device can do self.metrics.* to // update a metric. We try to do something similar here without // using Block device by allocating max number of // BlockDeviceMetrics in block::metrics::METRICS and store pointer to // each entry in the local `metrics` vec. // We then update 1 IncMetric and 2 SharedMetric for each metrics // and validate if the metrics for per device was updated as // expected. let mut metrics: Vec> = Vec::new(); for i in 0..MAX_BLOCK_DEVICES { let devn: String = format!("drv{}", i); metrics.push(BlockMetricsPerDevice::alloc(devn.clone())); // update IncMetric metrics[i].activate_fails.inc(); // update SharedMetric metrics[i].read_bytes.add(10); metrics[i].write_bytes.add(5); if i == 0 { // Unit tests run in parallel and we have // `test_single_block_dev_metrics` that also increases // the IncMetric count of drv0 by 1 (intentional to check // thread safety) so we check if the count is >=1. assert!(metrics[i].activate_fails.count() >= 1); // For the same reason as above since we have // another unit test running in parallel which updates // drv0 metrics we check if count is >=10. assert!(metrics[i].read_bytes.count() >= 10); } else { assert!(metrics[i].activate_fails.count() == 1); assert!(metrics[i].read_bytes.count() == 10); } assert_eq!(metrics[i].write_bytes.count(), 5); } } #[test] fn test_single_block_dev_metrics() { // Use drv0 so that we can check thread safety with the // `test_max_block_dev_metrics` which also uses the same name. let devn = "drv0"; // This is to make sure that RwLock for block::metrics::METRICS is good. drop(METRICS.read().unwrap()); drop(METRICS.write().unwrap()); let test_metrics = BlockMetricsPerDevice::alloc(String::from(devn)); // Test to update IncMetrics test_metrics.activate_fails.inc(); assert!( test_metrics.activate_fails.count() > 0, "{}", test_metrics.activate_fails.count() ); // We expect only 2 tests (this and test_max_block_dev_metrics) // to update activate_fails count for drv0. assert!( test_metrics.activate_fails.count() <= 2, "{}", test_metrics.activate_fails.count() ); // Test to update SharedMetrics test_metrics.read_bytes.add(5); // We expect only 2 tests (this and test_max_block_dev_metrics) // to update read_bytes count for drv0 by 5. assert!(test_metrics.read_bytes.count() >= 5); assert!(test_metrics.read_bytes.count() <= 15); } } ================================================ FILE: src/vmm/src/devices/virtio/block/virtio/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Implements a virtio block device. pub mod device; mod event_handler; mod io; pub mod metrics; pub mod persist; pub mod request; pub mod test_utils; use vm_memory::GuestMemoryError; pub use self::device::VirtioBlock; pub use self::request::*; pub use crate::devices::virtio::block::CacheType; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; /// Sector shift for block device. pub const SECTOR_SHIFT: u8 = 9; /// Size of block sector. pub const SECTOR_SIZE: u32 = (0x01_u32) << SECTOR_SHIFT; /// The number of queues of block device. pub const BLOCK_NUM_QUEUES: usize = 1; pub const BLOCK_QUEUE_SIZES: [u16; BLOCK_NUM_QUEUES] = [FIRECRACKER_MAX_QUEUE_SIZE]; // The virtio queue can hold up to 256 descriptors, but 1 request spreads across 2-3 descriptors. // So we can use 128 IO_URING entries without ever triggering a FullSq Error. /// Maximum number of io uring entries we allow in the queue. pub const IO_URING_NUM_ENTRIES: u16 = 128; /// Errors the block device can trigger. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VirtioBlockError { /// Cannot create config Config, /// Guest gave us too few descriptors in a descriptor chain. DescriptorChainTooShort, /// Guest gave us a descriptor that was too short to use. DescriptorLengthTooSmall, /// Getting a block's metadata fails for any reason. GetFileMetadata(std::io::Error), /// Guest gave us bad memory addresses. GuestMemory(GuestMemoryError), /// The data length is invalid. InvalidDataLength, /// The requested operation would cause a seek beyond disk end. InvalidOffset, /// Guest gave us a read only descriptor that protocol says to write to. UnexpectedReadOnlyDescriptor, /// Guest gave us a write only descriptor that protocol says to read from. UnexpectedWriteOnlyDescriptor, /// Error coming from the IO engine: {0} FileEngine(io::BlockIoError), /// Error manipulating the backing file: {0} {1} BackingFile(std::io::Error, String), /// Error opening eventfd: {0} EventFd(std::io::Error), /// Error creating an interrupt: {0} Interrupt(std::io::Error), /// Error coming from the rate limiter: {0} RateLimiter(std::io::Error), /// Persistence error: {0} Persist(crate::devices::virtio::persist::PersistError), } ================================================ FILE: src/vmm/src/devices/virtio/block/virtio/persist.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the structures needed for saving/restoring block devices. use device::ConfigSpace; use serde::{Deserialize, Serialize}; use vmm_sys_util::eventfd::EventFd; use super::device::DiskProperties; use super::*; use crate::devices::virtio::block::persist::BlockConstructorArgs; use crate::devices::virtio::block::virtio::device::FileEngineType; use crate::devices::virtio::block::virtio::metrics::BlockMetricsPerDevice; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_blk::VIRTIO_BLK_F_RO; use crate::devices::virtio::persist::VirtioDeviceState; use crate::rate_limiter::RateLimiter; use crate::rate_limiter::persist::RateLimiterState; use crate::snapshot::Persist; /// Holds info about block's file engine type. Gets saved in snapshot. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub enum FileEngineTypeState { /// Sync File Engine. // If the snap version does not contain the `FileEngineType`, it must have been snapshotted // on a VM using the Sync backend. #[default] Sync, /// Async File Engine. Async, } impl From for FileEngineTypeState { fn from(file_engine_type: FileEngineType) -> Self { match file_engine_type { FileEngineType::Sync => FileEngineTypeState::Sync, FileEngineType::Async => FileEngineTypeState::Async, } } } impl From for FileEngineType { fn from(file_engine_type_state: FileEngineTypeState) -> Self { match file_engine_type_state { FileEngineTypeState::Sync => FileEngineType::Sync, FileEngineTypeState::Async => FileEngineType::Async, } } } /// Holds info about the block device. Gets saved in snapshot. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VirtioBlockState { id: String, partuuid: Option, cache_type: CacheType, root_device: bool, disk_path: String, pub virtio_state: VirtioDeviceState, rate_limiter_state: RateLimiterState, file_engine_type: FileEngineTypeState, } impl Persist<'_> for VirtioBlock { type State = VirtioBlockState; type ConstructorArgs = BlockConstructorArgs; type Error = VirtioBlockError; fn save(&self) -> Self::State { // Save device state. VirtioBlockState { id: self.id.clone(), partuuid: self.partuuid.clone(), cache_type: self.cache_type, root_device: self.root_device, disk_path: self.disk.file_path.clone(), virtio_state: VirtioDeviceState::from_device(self), rate_limiter_state: self.rate_limiter.save(), file_engine_type: FileEngineTypeState::from(self.file_engine_type()), } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let is_read_only = state.virtio_state.avail_features & (1u64 << VIRTIO_BLK_F_RO) != 0; let rate_limiter = RateLimiter::restore((), &state.rate_limiter_state) .map_err(VirtioBlockError::RateLimiter)?; let disk_properties = DiskProperties::new( state.disk_path.clone(), is_read_only, state.file_engine_type.into(), )?; let queue_evts = [EventFd::new(libc::EFD_NONBLOCK).map_err(VirtioBlockError::EventFd)?]; let queues = state .virtio_state .build_queues_checked( &constructor_args.mem, VirtioDeviceType::Block, BLOCK_NUM_QUEUES, FIRECRACKER_MAX_QUEUE_SIZE, ) .map_err(VirtioBlockError::Persist)?; let avail_features = state.virtio_state.avail_features; let acked_features = state.virtio_state.acked_features; let config_space = ConfigSpace { capacity: disk_properties.nsectors.to_le(), }; Ok(VirtioBlock { avail_features, acked_features, config_space, activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(VirtioBlockError::EventFd)?, queues, queue_evts, device_state: DeviceState::Inactive, id: state.id.clone(), partuuid: state.partuuid.clone(), cache_type: state.cache_type, root_device: state.root_device, read_only: is_read_only, disk: disk_properties, rate_limiter, is_io_engine_throttled: false, metrics: BlockMetricsPerDevice::alloc(state.id.clone()), }) } } #[cfg(test)] mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; use crate::devices::virtio::block::virtio::device::VirtioBlockConfig; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::test_utils::{default_interrupt, default_mem}; #[test] fn test_cache_semantic_ser() { // We create the backing file here so that it exists for the whole lifetime of the test. let f = TempFile::new().unwrap(); f.as_file().set_len(0x1000).unwrap(); let config = VirtioBlockConfig { drive_id: "test".to_string(), path_on_host: f.as_path().to_str().unwrap().to_string(), is_root_device: false, partuuid: None, is_read_only: false, cache_type: CacheType::Writeback, rate_limiter: None, file_engine_type: FileEngineType::default(), }; let block = VirtioBlock::new(config).unwrap(); // Save the block device. let block_state = block.save(); let _serialized_data = bitcode::serialize(&block_state).unwrap(); } #[test] fn test_file_engine_type() { // Test conversions between FileEngineType and FileEngineTypeState. assert_eq!( FileEngineTypeState::Async, FileEngineTypeState::from(FileEngineType::Async) ); assert_eq!( FileEngineTypeState::Sync, FileEngineTypeState::from(FileEngineType::Sync) ); assert_eq!(FileEngineType::Async, FileEngineTypeState::Async.into()); assert_eq!(FileEngineType::Sync, FileEngineTypeState::Sync.into()); // Test default impl. assert_eq!(FileEngineTypeState::default(), FileEngineTypeState::Sync); } #[test] fn test_persistence() { // We create the backing file here so that it exists for the whole lifetime of the test. let f = TempFile::new().unwrap(); f.as_file().set_len(0x1000).unwrap(); let config = VirtioBlockConfig { drive_id: "test".to_string(), path_on_host: f.as_path().to_str().unwrap().to_string(), is_root_device: false, partuuid: None, is_read_only: false, cache_type: CacheType::Unsafe, rate_limiter: None, file_engine_type: FileEngineType::default(), }; let block = VirtioBlock::new(config).unwrap(); let guest_mem = default_mem(); // Save the block device. let block_state = block.save(); let serialized_data = bitcode::serialize(&block_state).unwrap(); // Restore the block device. let restored_state = bitcode::deserialize(&serialized_data).unwrap(); let restored_block = VirtioBlock::restore(BlockConstructorArgs { mem: guest_mem }, &restored_state).unwrap(); // Test that virtio specific fields are the same. assert_eq!(restored_block.device_type(), VirtioDeviceType::Block); assert_eq!(restored_block.avail_features(), block.avail_features()); assert_eq!(restored_block.acked_features(), block.acked_features()); assert_eq!(restored_block.queues(), block.queues()); assert!(!block.is_activated()); assert!(!restored_block.is_activated()); // Test that block specific fields are the same. assert_eq!(restored_block.disk.file_path, block.disk.file_path); } } ================================================ FILE: src/vmm/src/devices/virtio/block/virtio/request.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::convert::From; use vm_memory::GuestMemoryError; use super::{SECTOR_SHIFT, SECTOR_SIZE, VirtioBlockError, io as block_io}; use crate::devices::virtio::block::virtio::device::DiskProperties; use crate::devices::virtio::block::virtio::metrics::BlockDeviceMetrics; pub use crate::devices::virtio::generated::virtio_blk::{ VIRTIO_BLK_ID_BYTES, VIRTIO_BLK_S_IOERR, VIRTIO_BLK_S_OK, VIRTIO_BLK_S_UNSUPP, VIRTIO_BLK_T_FLUSH, VIRTIO_BLK_T_GET_ID, VIRTIO_BLK_T_IN, VIRTIO_BLK_T_OUT, }; use crate::devices::virtio::queue::DescriptorChain; use crate::logger::{IncMetric, error}; use crate::rate_limiter::{RateLimiter, TokenType}; use crate::vstate::memory::{ByteValued, Bytes, GuestAddress, GuestMemoryMmap}; #[derive(Debug, derive_more::From)] pub enum IoErr { GetId(GuestMemoryError), PartialTransfer { completed: u32, expected: u32 }, FileEngine(block_io::BlockIoError), } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum RequestType { In, Out, Flush, GetDeviceID, Unsupported(u32), } impl From for RequestType { fn from(value: u32) -> Self { match value { VIRTIO_BLK_T_IN => RequestType::In, VIRTIO_BLK_T_OUT => RequestType::Out, VIRTIO_BLK_T_FLUSH => RequestType::Flush, VIRTIO_BLK_T_GET_ID => RequestType::GetDeviceID, t => RequestType::Unsupported(t), } } } #[derive(Debug)] pub enum ProcessingResult { Submitted, Throttled, Executed(FinishedRequest), } #[derive(Debug)] pub struct FinishedRequest { pub num_bytes_to_mem: u32, pub desc_idx: u16, } #[derive(Debug)] enum Status { Ok { num_bytes_to_mem: u32 }, IoErr { num_bytes_to_mem: u32, err: IoErr }, Unsupported { op: u32 }, } impl Status { fn from_data(data_len: u32, transferred_data_len: u32, data_to_mem: bool) -> Status { let num_bytes_to_mem = match data_to_mem { true => transferred_data_len, false => 0, }; match transferred_data_len == data_len { true => Status::Ok { num_bytes_to_mem }, false => Status::IoErr { num_bytes_to_mem, err: IoErr::PartialTransfer { completed: transferred_data_len, expected: data_len, }, }, } } } #[derive(Debug)] pub struct PendingRequest { r#type: RequestType, data_len: u32, status_addr: GuestAddress, desc_idx: u16, } impl PendingRequest { fn write_status_and_finish( self, status: &Status, mem: &GuestMemoryMmap, block_metrics: &BlockDeviceMetrics, ) -> FinishedRequest { let (num_bytes_to_mem, status_code) = match status { Status::Ok { num_bytes_to_mem } => { (*num_bytes_to_mem, u8::try_from(VIRTIO_BLK_S_OK).unwrap()) } Status::IoErr { num_bytes_to_mem, err, } => { block_metrics.invalid_reqs_count.inc(); error!( "Failed to execute {:?} virtio block request: {:?}", self.r#type, err ); (*num_bytes_to_mem, u8::try_from(VIRTIO_BLK_S_IOERR).unwrap()) } Status::Unsupported { op } => { block_metrics.invalid_reqs_count.inc(); error!("Received unsupported virtio block request: {}", op); (0, u8::try_from(VIRTIO_BLK_S_UNSUPP).unwrap()) } }; let num_bytes_to_mem = mem .write_obj(status_code, self.status_addr) .map(|_| { // Account for the status byte num_bytes_to_mem + 1 }) .unwrap_or_else(|err| { error!("Failed to write virtio block status: {:?}", err); // If we can't write the status, discard the virtio descriptor 0 }); FinishedRequest { num_bytes_to_mem, desc_idx: self.desc_idx, } } pub fn finish( self, mem: &GuestMemoryMmap, res: Result, block_metrics: &BlockDeviceMetrics, ) -> FinishedRequest { let status = match (res, self.r#type) { (Ok(transferred_data_len), RequestType::In) => { let status = Status::from_data(self.data_len, transferred_data_len, true); block_metrics.read_bytes.add(transferred_data_len.into()); if let Status::Ok { .. } = status { block_metrics.read_count.inc(); } status } (Ok(transferred_data_len), RequestType::Out) => { let status = Status::from_data(self.data_len, transferred_data_len, false); block_metrics.write_bytes.add(transferred_data_len.into()); if let Status::Ok { .. } = status { block_metrics.write_count.inc(); } status } (Ok(_), RequestType::Flush) => { block_metrics.flush_count.inc(); Status::Ok { num_bytes_to_mem: 0, } } (Ok(transferred_data_len), RequestType::GetDeviceID) => { Status::from_data(self.data_len, transferred_data_len, true) } (_, RequestType::Unsupported(op)) => Status::Unsupported { op }, (Err(err), _) => Status::IoErr { num_bytes_to_mem: 0, err, }, }; self.write_status_and_finish(&status, mem, block_metrics) } } /// The request header represents the mandatory fields of each block device request. /// /// A request header contains the following fields: /// * request_type: an u32 value mapping to a read, write or flush operation. /// * reserved: 32 bits are reserved for future extensions of the Virtio Spec. /// * sector: an u64 value representing the offset where a read/write is to occur. /// /// The header simplifies reading the request from memory as all request follow /// the same memory layout. #[derive(Debug, Copy, Clone, Default)] #[repr(C)] pub struct RequestHeader { request_type: u32, _reserved: u32, sector: u64, } // SAFETY: Safe because RequestHeader only contains plain data. unsafe impl ByteValued for RequestHeader {} impl RequestHeader { pub fn new(request_type: u32, sector: u64) -> RequestHeader { RequestHeader { request_type, _reserved: 0, sector, } } /// Reads the request header from GuestMemoryMmap starting at `addr`. /// /// Virtio 1.0 specifies that the data is transmitted by the driver in little-endian /// format. Firecracker currently runs only on little endian platforms so we don't /// need to do an explicit little endian read as all reads are little endian by default. /// When running on a big endian platform, this code should not compile, and support /// for explicit little endian reads is required. #[cfg(target_endian = "little")] fn read_from(memory: &GuestMemoryMmap, addr: GuestAddress) -> Result { let request_header: RequestHeader = memory .read_obj(addr) .map_err(VirtioBlockError::GuestMemory)?; Ok(request_header) } } #[derive(Debug, PartialEq, Eq)] pub struct Request { pub r#type: RequestType, pub data_len: u32, pub status_addr: GuestAddress, sector: u64, data_addr: GuestAddress, } impl Request { pub fn parse( avail_desc: &DescriptorChain, mem: &GuestMemoryMmap, num_disk_sectors: u64, ) -> Result { // The head contains the request type which MUST be readable. if avail_desc.is_write_only() { return Err(VirtioBlockError::UnexpectedWriteOnlyDescriptor); } let request_header = RequestHeader::read_from(mem, avail_desc.addr)?; let mut req = Request { r#type: RequestType::from(request_header.request_type), sector: request_header.sector, data_addr: GuestAddress(0), data_len: 0, status_addr: GuestAddress(0), }; let data_desc; let status_desc; let desc = avail_desc .next_descriptor() .ok_or(VirtioBlockError::DescriptorChainTooShort)?; if !desc.has_next() { status_desc = desc; // Only flush requests are allowed to skip the data descriptor. if req.r#type != RequestType::Flush { return Err(VirtioBlockError::DescriptorChainTooShort); } } else { data_desc = desc; status_desc = data_desc .next_descriptor() .ok_or(VirtioBlockError::DescriptorChainTooShort)?; if data_desc.is_write_only() && req.r#type == RequestType::Out { return Err(VirtioBlockError::UnexpectedWriteOnlyDescriptor); } if !data_desc.is_write_only() && req.r#type == RequestType::In { return Err(VirtioBlockError::UnexpectedReadOnlyDescriptor); } if !data_desc.is_write_only() && req.r#type == RequestType::GetDeviceID { return Err(VirtioBlockError::UnexpectedReadOnlyDescriptor); } req.data_addr = data_desc.addr; req.data_len = data_desc.len; } // check request validity match req.r#type { RequestType::In | RequestType::Out => { // Check that the data length is a multiple of 512 as specified in the virtio // standard. if !req.data_len.is_multiple_of(SECTOR_SIZE) { return Err(VirtioBlockError::InvalidDataLength); } let top_sector = req .sector .checked_add(u64::from(req.data_len) >> SECTOR_SHIFT) .ok_or(VirtioBlockError::InvalidOffset)?; if top_sector > num_disk_sectors { return Err(VirtioBlockError::InvalidOffset); } } RequestType::GetDeviceID => { if req.data_len < VIRTIO_BLK_ID_BYTES { return Err(VirtioBlockError::InvalidDataLength); } } _ => {} } // The status MUST always be writable. if !status_desc.is_write_only() { return Err(VirtioBlockError::UnexpectedReadOnlyDescriptor); } if status_desc.len < 1 { return Err(VirtioBlockError::DescriptorLengthTooSmall); } req.status_addr = status_desc.addr; Ok(req) } pub(crate) fn rate_limit(&self, rate_limiter: &mut RateLimiter) -> bool { // If limiter.consume() fails it means there is no more TokenType::Ops // budget and rate limiting is in effect. if !rate_limiter.consume(1, TokenType::Ops) { return true; } // Exercise the rate limiter only if this request is of data transfer type. if self.r#type == RequestType::In || self.r#type == RequestType::Out { // If limiter.consume() fails it means there is no more TokenType::Bytes // budget and rate limiting is in effect. if !rate_limiter.consume(u64::from(self.data_len), TokenType::Bytes) { // Revert the OPS consume(). rate_limiter.manual_replenish(1, TokenType::Ops); return true; } } false } fn offset(&self) -> u64 { self.sector << SECTOR_SHIFT } fn to_pending_request(&self, desc_idx: u16) -> PendingRequest { PendingRequest { r#type: self.r#type, data_len: self.data_len, status_addr: self.status_addr, desc_idx, } } pub(crate) fn process( self, disk: &mut DiskProperties, desc_idx: u16, mem: &GuestMemoryMmap, block_metrics: &BlockDeviceMetrics, ) -> ProcessingResult { let pending = self.to_pending_request(desc_idx); let res = match self.r#type { RequestType::In => { let _metric = block_metrics.read_agg.record_latency_metrics(); disk.file_engine .read(self.offset(), mem, self.data_addr, self.data_len, pending) } RequestType::Out => { let _metric = block_metrics.write_agg.record_latency_metrics(); disk.file_engine .write(self.offset(), mem, self.data_addr, self.data_len, pending) } RequestType::Flush => disk.file_engine.flush(pending), RequestType::GetDeviceID => { let res = mem .write_slice(&disk.image_id, self.data_addr) .map(|_| VIRTIO_BLK_ID_BYTES) .map_err(IoErr::GetId); return ProcessingResult::Executed(pending.finish(mem, res, block_metrics)); } RequestType::Unsupported(_) => { return ProcessingResult::Executed(pending.finish(mem, Ok(0), block_metrics)); } }; match res { Ok(block_io::FileEngineOk::Submitted) => ProcessingResult::Submitted, Ok(block_io::FileEngineOk::Executed(res)) => { ProcessingResult::Executed(res.req.finish(mem, Ok(res.count), block_metrics)) } Err(err) => { if err.error.is_throttling_err() { ProcessingResult::Throttled } else { ProcessingResult::Executed(err.req.finish( mem, Err(IoErr::FileEngine(err.error)), block_metrics, )) } } } } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use super::*; use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; use crate::devices::virtio::test_utils::{VirtQueue, default_mem}; use crate::vstate::memory::{Address, GuestAddress, GuestMemory}; const NUM_DISK_SECTORS: u64 = 1024; impl Default for PendingRequest { fn default() -> Self { PendingRequest { r#type: RequestType::In, data_len: 0, status_addr: Default::default(), desc_idx: 0, } } } #[test] fn test_read_request_header() { let mem = single_region_mem(0x1000); let addr = GuestAddress(0); let sector = 123_454_321; // Test that all supported request types are read correctly from memory. let supported_request_types = vec![ VIRTIO_BLK_T_IN, VIRTIO_BLK_T_OUT, VIRTIO_BLK_T_FLUSH, VIRTIO_BLK_T_GET_ID, ]; for request_type in supported_request_types { let expected_header = RequestHeader::new(request_type, sector); mem.write_obj::(expected_header, addr) .unwrap(); let actual_header = RequestHeader::read_from(&mem, addr).unwrap(); assert_eq!(actual_header.request_type, expected_header.request_type); assert_eq!(actual_header.sector, expected_header.sector); } // Test that trying to read a request header that goes outside of the // memory boundary fails. RequestHeader::read_from(&mem, GuestAddress(0x1000)).unwrap_err(); } #[test] fn test_request_type_from() { assert_eq!(RequestType::from(VIRTIO_BLK_T_IN), RequestType::In); assert_eq!(RequestType::from(VIRTIO_BLK_T_OUT), RequestType::Out); assert_eq!(RequestType::from(VIRTIO_BLK_T_FLUSH), RequestType::Flush); assert_eq!( RequestType::from(VIRTIO_BLK_T_GET_ID), RequestType::GetDeviceID ); assert_eq!(RequestType::from(42), RequestType::Unsupported(42)); } impl RequestDescriptorChain<'_, '_> { fn check_parse_err(&self, _e: VirtioBlockError) { let mut q = self.driver_queue.create_queue(); let memory = self.driver_queue.memory(); assert!(matches!( Request::parse(&q.pop().unwrap().unwrap(), memory, NUM_DISK_SECTORS), Err(_e) )); } fn check_parse(&self, check_data: bool) { let mut q = self.driver_queue.create_queue(); let memory = self.driver_queue.memory(); let request = Request::parse(&q.pop().unwrap().unwrap(), memory, NUM_DISK_SECTORS).unwrap(); let expected_header = self.header(); assert_eq!( request.r#type, RequestType::from(expected_header.request_type) ); assert_eq!(request.sector, expected_header.sector); if check_data { assert_eq!(request.data_addr.raw_value(), self.data_desc.addr.get()); assert_eq!(request.data_len, self.data_desc.len.get()); } assert_eq!(request.status_addr.raw_value(), self.status_desc.addr.get()); } } #[test] fn test_parse_generic() { let mem = &default_mem(); let queue = VirtQueue::new(GuestAddress(0), mem, 16); let chain = RequestDescriptorChain::new(&queue); let request_header = RequestHeader::new(100, 114); chain.set_header(request_header); // Write only request type descriptor. chain.header_desc.flags.set(VIRTQ_DESC_F_WRITE); chain.check_parse_err(VirtioBlockError::UnexpectedWriteOnlyDescriptor); // Chain too short: no DATA_DESCRIPTOR. chain.header_desc.flags.set(0); chain.check_parse_err(VirtioBlockError::DescriptorChainTooShort); // Chain too short: no status descriptor. chain.header_desc.flags.set(VIRTQ_DESC_F_NEXT); chain.data_desc.flags.set(0); chain.check_parse_err(VirtioBlockError::DescriptorChainTooShort); // Status descriptor not writable. chain.data_desc.flags.set(VIRTQ_DESC_F_NEXT); chain.status_desc.flags.set(0); chain.check_parse_err(VirtioBlockError::UnexpectedReadOnlyDescriptor); // Status descriptor too small. chain.status_desc.flags.set(VIRTQ_DESC_F_WRITE); chain.status_desc.len.set(0); chain.check_parse_err(VirtioBlockError::DescriptorLengthTooSmall); // Fix status descriptor length. chain.status_desc.len.set(0x1000); // Invalid guest address for the status descriptor. Parsing will still succeed // as the operation that will fail happens when executing the request. chain.status_desc.addr.set(mem.last_addr().raw_value()); chain.check_parse(true); // Fix status descriptor addr. chain.status_desc.addr.set(0x3000); // Invalid guest address for the data descriptor. Parsing will still succeed // as the operation that will fail happens when executing the request. chain.data_desc.addr.set(mem.last_addr().raw_value()); chain.check_parse(true); // Fix data descriptor addr. chain.data_desc.addr.set(0x2000); chain.check_parse(true); } #[test] fn test_parse_in() { let mem = &default_mem(); let queue = VirtQueue::new(GuestAddress(0), mem, 16); let chain = RequestDescriptorChain::new(&queue); let mut request_header = RequestHeader::new(VIRTIO_BLK_T_IN, 99); chain.set_header(request_header); // Read only data descriptor for IN. chain.data_desc.flags.set(VIRTQ_DESC_F_NEXT); chain.check_parse_err(VirtioBlockError::UnexpectedReadOnlyDescriptor); // data_len is not multiple of 512 for IN. chain .data_desc .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); chain.data_desc.len.set(513); chain.check_parse_err(VirtioBlockError::InvalidDataLength); // sector is to big. request_header.sector = NUM_DISK_SECTORS; chain.data_desc.len.set(512); chain.set_header(request_header); chain.check_parse_err(VirtioBlockError::InvalidOffset); // Fix data descriptor. request_header.sector = NUM_DISK_SECTORS - 1; chain.set_header(request_header); chain.check_parse(true); } #[test] fn test_parse_out() { let mem = &default_mem(); let queue = VirtQueue::new(GuestAddress(0), mem, 16); let chain = RequestDescriptorChain::new(&queue); let mut request_header = RequestHeader::new(VIRTIO_BLK_T_OUT, 100); chain.set_header(request_header); // Write only data descriptor for OUT. chain .data_desc .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); chain.check_parse_err(VirtioBlockError::UnexpectedWriteOnlyDescriptor); // data_len is not multiple of 512 for IN. chain.data_desc.flags.set(VIRTQ_DESC_F_NEXT); chain.data_desc.len.set(1000); chain.check_parse_err(VirtioBlockError::InvalidDataLength); // sector is to big. request_header.sector = NUM_DISK_SECTORS - 1; chain.data_desc.len.set(1024); chain.set_header(request_header); chain.check_parse_err(VirtioBlockError::InvalidOffset); // Fix header descriptor. request_header.sector = NUM_DISK_SECTORS - 2; chain.set_header(request_header); chain.check_parse(true); } #[test] fn test_parse_flush() { let mem = &default_mem(); let queue = VirtQueue::new(GuestAddress(0), mem, 16); let chain = RequestDescriptorChain::new(&queue); // Flush request with a data descriptor. let request_header = RequestHeader::new(VIRTIO_BLK_T_FLUSH, 50); chain.set_header(request_header); chain.check_parse(true); // Flush request without a data descriptor. chain.header_desc.next.set(2); chain.check_parse(false); } #[test] fn test_parse_get_id() { let mem = &default_mem(); let queue = VirtQueue::new(GuestAddress(0), mem, 16); let chain = RequestDescriptorChain::new(&queue); let request_header = RequestHeader::new(VIRTIO_BLK_T_GET_ID, 15); chain.set_header(request_header); // Read only data descriptor for GetDeviceId. chain.data_desc.flags.set(VIRTQ_DESC_F_NEXT); chain.check_parse_err(VirtioBlockError::UnexpectedReadOnlyDescriptor); // data_len is < VIRTIO_BLK_ID_BYTES for GetDeviceID. chain .data_desc .flags .set(VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE); chain.data_desc.len.set(VIRTIO_BLK_ID_BYTES - 1); chain.check_parse_err(VirtioBlockError::InvalidDataLength); chain.data_desc.len.set(VIRTIO_BLK_ID_BYTES); chain.check_parse(true); } use std::convert::TryInto; /// ------------------------------------- /// BEGIN PROPERTY BASED TESTING use proptest::arbitrary::Arbitrary; use proptest::prelude::*; use proptest::strategy::{Map, Strategy, TupleUnion}; use crate::devices::virtio::block::virtio::test_utils::RequestDescriptorChain; use crate::test_utils::{multi_region_mem, single_region_mem}; // Implements a "strategy" for producing arbitrary values of RequestType. // This can also be generated by a derive macro from `proptest_derive`, but the crate // is currently experimental. // Since we are dealing with a very complex type we need to turn off the clippy // warning. #[allow(clippy::type_complexity)] impl Arbitrary for RequestType { type Parameters = ::Parameters; // Tuple union will hold the strategies that we use to generate the request type. // The first element is the weight of the strategy, the second is a function that // returns the strategy value. type Strategy = TupleUnion<( (u32, std::sync::Arc Self>), (u32, std::sync::Arc Self>), (u32, std::sync::Arc Self>), (u32, std::sync::Arc Self>), ( u32, std::sync::Arc::Strategy, fn(u32) -> Self>>, ), )>; fn arbitrary_with(_: Self::Parameters) -> Self::Strategy { // All strategies have the same weight, there is no reson currently to skew // the rations to increase the odds of a specific request type. TupleUnion::new(( (1u32, std::sync::Arc::new(|| RequestType::In {})), (1u32, std::sync::Arc::new(|| RequestType::Out {})), (1u32, std::sync::Arc::new(|| RequestType::Flush {})), (1u32, std::sync::Arc::new(|| RequestType::GetDeviceID {})), ( 1u32, std::sync::Arc::new(Strategy::prop_map(any::(), |id| { // Random unsupported requests for our implementation start at // VIRTIO_BLK_T_GET_ID + 1 = 9. // This can be further refined to include unsupported requests ids < 9. RequestType::Unsupported(id.checked_add(9).unwrap_or(9)) })), ), )) } } impl From for u32 { fn from(request_type: RequestType) -> u32 { match request_type { RequestType::In => VIRTIO_BLK_T_IN, RequestType::Out => VIRTIO_BLK_T_OUT, RequestType::Flush => VIRTIO_BLK_T_FLUSH, RequestType::GetDeviceID => VIRTIO_BLK_T_GET_ID, RequestType::Unsupported(id) => id, } } } // Returns flags based on the request type. fn request_type_flags(request_type: RequestType) -> u16 { match request_type { RequestType::In => VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, RequestType::Out => VIRTQ_DESC_F_NEXT, RequestType::Flush => VIRTQ_DESC_F_NEXT, RequestType::GetDeviceID => VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, RequestType::Unsupported(_) => VIRTQ_DESC_F_NEXT, } } #[allow(clippy::let_with_type_underscore)] fn random_request_parse() -> impl Strategy, GuestMemoryMmap, Queue)> { // In this strategy we are going to generate random Requests/Errors and map them // to an input descriptor chain. // // We will check that Request::parse() arrives at the same result after // parsing the descriptor chain. Input properties are validated and commented below. ( any::(), // random data buffer sparsity factor any::(), // data_len any::(), // sector any::(), // request type any::<[bool; 10]>(), // coin ) .prop_map(|(sparsity, data_len, sector, request_type, coins)| { ( sparsity, data_len, sector, request_type, request_type.into(), coins, ) }) .prop_map( |(sparsity, data_len, sector, request_type, virtio_request_id, coins)| { do_random_request_parse( sparsity, data_len, sector, request_type, virtio_request_id, &coins, ) }, ) } fn do_random_request_parse( sparsity: u64, data_len: u32, sector: u64, request_type: RequestType, virtio_request_id: u32, coins_arr: &[bool], ) -> (Result, GuestMemoryMmap, Queue) { let coins = &mut coins_arr.iter(); // Randomize descriptor addresses. Assumed page size as max buffer len. let base_addr = sparsity & 0x0000_FFFF_FFFF_F000; // 48 bit base, page aligned. let max_desc_len: u32 = 0x1000; // First addr starts at page base + 1. let req_type_addr = GuestAddress(base_addr).checked_add(0x1000).unwrap(); // Use first 4 bits of randomness to shift the gap size between this descriptor // and the next one. let mut next_desc_dist = u64::from(max_desc_len) + (0x1000 << (sparsity & 0xF)); let data_addr = req_type_addr.checked_add(next_desc_dist).unwrap(); // Use next 4 bits of randomness to shift gap size between this descriptor // and the next one. next_desc_dist = u64::from(max_desc_len) + (0x1000 << ((sparsity & 0xF0) >> 4)); let status_addr = data_addr.checked_add(next_desc_dist).unwrap(); let mem_end = status_addr.checked_add(u64::from(max_desc_len)).unwrap(); let mem = multi_region_mem(&[( GuestAddress(base_addr), (mem_end.0 - base_addr).try_into().unwrap(), )]); let vq = VirtQueue::new(GuestAddress(base_addr), &mem, 16); let chain = RequestDescriptorChain::new(&vq); let q = vq.create_queue(); // Make sure that data_len is a multiple of 512 // and that 512 <= data_len <= (4096 + 512). let valid_data_len = ((data_len & 4096) | (SECTOR_SIZE - 1)) + 1; let sectors_len = u64::from(valid_data_len / SECTOR_SIZE); // Craft a random request with the randomized parameters. let mut request = Request { r#type: request_type, data_len: valid_data_len, status_addr, sector: sector & (NUM_DISK_SECTORS - sectors_len), data_addr, }; let mut request_header = RequestHeader::new(virtio_request_id, request.sector); chain.header_desc.addr.set(req_type_addr.0); chain.header_desc.len.set(max_desc_len); chain.set_header(request_header); // Flush requests have no data desc. if request.r#type == RequestType::Flush { request.data_addr = GuestAddress(0); request.data_len = 0; chain.header_desc.next.set(2); } else { chain.data_desc.set( request.data_addr.0, request.data_len, request_type_flags(request.r#type), 2, ); } chain .status_desc .set(request.status_addr.0, 1, VIRTQ_DESC_F_WRITE, 0); // Flip a coin - should we generate a valid request or an error. if *coins.next().unwrap() { return (Ok(request), mem, q); } // This is the initial correct value. let data_desc_flags = &chain.data_desc.flags; // Flip coin - corrupt the status desc len. if *coins.next().unwrap() { chain.status_desc.len.set(0); return (Err(VirtioBlockError::DescriptorLengthTooSmall), mem, q); } // Flip coin - corrupt data desc next flag. // Exception: flush requests do not have data desc. if *coins.next().unwrap() && request.r#type != RequestType::Flush { data_desc_flags.set(data_desc_flags.get() & !VIRTQ_DESC_F_NEXT); return (Err(VirtioBlockError::DescriptorChainTooShort), mem, q); } // Flip coin - req type desc is write only. if *coins.next().unwrap() { let hdr_desc_flags = &chain.header_desc.flags; hdr_desc_flags.set(hdr_desc_flags.get() | VIRTQ_DESC_F_WRITE); return (Err(VirtioBlockError::UnexpectedWriteOnlyDescriptor), mem, q); } // Corrupt data desc accessibility if *coins.next().unwrap() { match request.r#type { // Readonly buffer is writable. RequestType::Out => { data_desc_flags.set(data_desc_flags.get() | VIRTQ_DESC_F_WRITE); return (Err(VirtioBlockError::UnexpectedWriteOnlyDescriptor), mem, q); } // Writeable buffer is readonly. RequestType::In | RequestType::GetDeviceID => { data_desc_flags.set(data_desc_flags.get() & !VIRTQ_DESC_F_WRITE); return (Err(VirtioBlockError::UnexpectedReadOnlyDescriptor), mem, q); } _ => {} }; } // Flip coin - Corrupt data_len if *coins.next().unwrap() { match request.r#type { RequestType::In | RequestType::Out => { // data_len is not a multiple of 512 chain .data_desc .len .set(valid_data_len + (data_len % 511) + 1); return (Err(VirtioBlockError::InvalidDataLength), mem, q); } RequestType::GetDeviceID => { // data_len is < VIRTIO_BLK_ID_BYTES chain .data_desc .len .set(data_len & (VIRTIO_BLK_ID_BYTES - 1)); return (Err(VirtioBlockError::InvalidDataLength), mem, q); } _ => {} }; } // Flip coin - Corrupt sector if *coins.next().unwrap() { match request.r#type { RequestType::In | RequestType::Out => { request_header.sector = (sector | NUM_DISK_SECTORS) + 1; chain.set_header(request_header); return (Err(VirtioBlockError::InvalidOffset), mem, q); } _ => {} }; } // Simulate no status descriptor. chain.header_desc.flags.set(0); (Err(VirtioBlockError::DescriptorChainTooShort), mem, q) } macro_rules! assert_err { ($expression:expr, $($pattern:tt)+) => { match $expression { $($pattern)+ => (), ref err => { println!("expected `{}` but got `{:?}`", stringify!($($pattern)+), err); prop_assert!(false) } } } } #[test] fn parse_random_requests() { let cfg = ProptestConfig::with_cases(1000); proptest!(cfg, |(mut request in random_request_parse())| { let result = Request::parse(&request.2.pop().unwrap().unwrap(), &request.1, NUM_DISK_SECTORS); match result { Ok(r) => prop_assert!(r == request.0.unwrap()), Err(err) => { // Avoiding implementation of PartialEq which requires that even more types like // GuestMemoryError implement it. match request.0.unwrap_err() { VirtioBlockError::DescriptorChainTooShort => assert_err!(err, VirtioBlockError::DescriptorChainTooShort), VirtioBlockError::DescriptorLengthTooSmall => assert_err!(err, VirtioBlockError::DescriptorLengthTooSmall), VirtioBlockError::InvalidDataLength => assert_err!(err, VirtioBlockError::InvalidDataLength), VirtioBlockError::InvalidOffset => assert_err!(err, VirtioBlockError::InvalidOffset), VirtioBlockError::UnexpectedWriteOnlyDescriptor => assert_err!(err, VirtioBlockError::UnexpectedWriteOnlyDescriptor), VirtioBlockError::UnexpectedReadOnlyDescriptor => assert_err!(err, VirtioBlockError::UnexpectedReadOnlyDescriptor), _ => unreachable!() } } } }); } } ================================================ FILE: src/vmm/src/devices/virtio/block/virtio/test_utils.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![doc(hidden)] #[cfg(test)] use std::thread; #[cfg(test)] use std::time::Duration; use vmm_sys_util::tempfile::TempFile; use super::RequestHeader; use super::device::VirtioBlockConfig; use crate::devices::virtio::block::virtio::device::FileEngineType; #[cfg(test)] use crate::devices::virtio::block::virtio::io::FileEngine; use crate::devices::virtio::block::virtio::{CacheType, VirtioBlock}; #[cfg(test)] use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; use crate::devices::virtio::test_utils::{VirtQueue, VirtqDesc}; #[cfg(test)] use crate::devices::virtio::transport::VirtioInterruptType; use crate::rate_limiter::RateLimiter; use crate::vmm_config::{RateLimiterConfig, TokenBucketConfig}; use crate::vstate::memory::{Bytes, GuestAddress}; /// Create a default Block instance to be used in tests. pub fn default_block(file_engine_type: FileEngineType) -> VirtioBlock { // Create backing file. let f = TempFile::new().unwrap(); f.as_file().set_len(0x1000).unwrap(); default_block_with_path(f.as_path().to_str().unwrap().to_string(), file_engine_type) } /// Create a default Block instance using file at the specified path to be used in tests. pub fn default_block_with_path(path: String, file_engine_type: FileEngineType) -> VirtioBlock { let config = VirtioBlockConfig { drive_id: "test".to_string(), path_on_host: path, is_root_device: false, partuuid: None, is_read_only: false, cache_type: CacheType::Unsafe, // Rate limiting is enabled but with a high operation rate (10 million ops/s). rate_limiter: Some(RateLimiterConfig { bandwidth: Some(TokenBucketConfig { size: 0, one_time_burst: Some(0), refill_time: 0, }), ops: Some(TokenBucketConfig { size: 100_000, one_time_burst: Some(0), refill_time: 10, }), }), file_engine_type, }; // The default block device is read-write and non-root. VirtioBlock::new(config).unwrap() } pub fn set_queue(blk: &mut VirtioBlock, idx: usize, q: Queue) { blk.queues[idx] = q; } pub fn set_rate_limiter(blk: &mut VirtioBlock, rl: RateLimiter) { blk.rate_limiter = rl; } pub fn rate_limiter(blk: &mut VirtioBlock) -> &RateLimiter { &blk.rate_limiter } #[cfg(test)] pub fn simulate_queue_event(b: &mut VirtioBlock, maybe_expected_irq: Option) { // Trigger the queue event. b.queue_evts[0].write(1).unwrap(); // Handle event. b.process_queue_event(); // Validate the queue operation finished successfully. if let Some(expected_irq) = maybe_expected_irq { assert_eq!( b.interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(0)), expected_irq ); } } #[cfg(test)] pub fn simulate_async_completion_event(b: &mut VirtioBlock, expected_irq: bool) { if let FileEngine::Async(ref mut engine) = b.disk.file_engine { // Wait for all the async operations to complete. engine.drain(false).unwrap(); // Wait for the async completion event to be sent. thread::sleep(Duration::from_millis(150)); // Handle event. b.process_async_completion_event(); } // Validate if there are pending IRQs. assert_eq!( b.interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(0)), expected_irq ); } #[cfg(test)] pub fn simulate_queue_and_async_completion_events(b: &mut VirtioBlock, expected_irq: bool) { match b.disk.file_engine { FileEngine::Async(_) => { simulate_queue_event(b, None); simulate_async_completion_event(b, expected_irq); } FileEngine::Sync(_) => { simulate_queue_event(b, Some(expected_irq)); } } } /// Structure encapsulating the virtq descriptors of a single request to the block device #[derive(Debug)] pub struct RequestDescriptorChain<'a, 'b> { pub driver_queue: &'b VirtQueue<'a>, pub header_desc: &'b VirtqDesc<'a>, pub data_desc: &'b VirtqDesc<'a>, pub status_desc: &'b VirtqDesc<'a>, } impl<'a, 'b> RequestDescriptorChain<'a, 'b> { /// Creates a new [`RequestDescriptor´] chain in the given [`VirtQueue`] /// /// The header, data and status descriptors are put into the first three indices in /// the queue's descriptor table. They point to address 0x1000, 0x2000 and 0x3000 in guest /// memory, respectively, and each have their `len` set to 0x1000. /// /// The data descriptor is initialized to be write_only pub fn new(vq: &'b VirtQueue<'a>) -> Self { read_blk_req_descriptors(vq); RequestDescriptorChain { driver_queue: vq, header_desc: &vq.dtable[0], data_desc: &vq.dtable[1], status_desc: &vq.dtable[2], } } pub fn header(&self) -> RequestHeader { self.header_desc .memory() .read_obj(GuestAddress(self.header_desc.addr.get())) .unwrap() } pub fn set_header(&self, header: RequestHeader) { self.header_desc .memory() .write_obj(header, GuestAddress(self.header_desc.addr.get())) .unwrap() } } /// Puts a descriptor chain of length three into the given [`VirtQueue`]. /// /// This chain follows the skeleton of a block device request, e.g. the first /// descriptor offers space for the header (readonly), the second descriptor offers space /// for the data (set to writeonly, if you want a write request, update to readonly), /// and the last descriptor for the device-written status field (writeonly). /// /// The head of the chain is made available as the first descriptor to be processed, by /// setting avail_idx to 1. pub fn read_blk_req_descriptors(vq: &VirtQueue) { let request_type_desc: u16 = 0; let data_desc: u16 = 1; let status_desc: u16 = 2; let request_addr: u64 = 0x1000; let data_addr: u64 = 0x2000; let status_addr: u64 = 0x3000; let len = 0x1000; // Set the request type descriptor. vq.avail.ring[request_type_desc as usize].set(request_type_desc); vq.dtable[request_type_desc as usize].set(request_addr, len, VIRTQ_DESC_F_NEXT, data_desc); // Set the data descriptor. vq.avail.ring[data_desc as usize].set(data_desc); vq.dtable[data_desc as usize].set( data_addr, len, VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE, status_desc, ); // Set the status descriptor. vq.avail.ring[status_desc as usize].set(status_desc); vq.dtable[status_desc as usize].set(status_addr, len, VIRTQ_DESC_F_WRITE, status_desc + 1); // Mark the next available descriptor. vq.avail.idx.set(1); } ================================================ FILE: src/vmm/src/devices/virtio/device.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::fmt; use std::sync::Arc; use std::sync::atomic::AtomicU32; use serde::{Deserialize, Serialize}; use vmm_sys_util::eventfd::EventFd; use super::ActivateError; use super::queue::{Queue, QueueError}; use super::transport::VirtioInterrupt; use crate::MutEventSubscriber; use crate::devices::virtio::AsAny; use crate::devices::virtio::generated::virtio_ids; use crate::logger::{error, info, warn}; use crate::vstate::memory::GuestMemoryMmap; /// State of an active VirtIO device #[derive(Debug, Clone)] pub struct ActiveState { pub mem: GuestMemoryMmap, pub interrupt: Arc, } /// Enum that indicates if a VirtioDevice is inactive or has been activated /// and memory attached to it. #[derive(Debug)] pub enum DeviceState { Inactive, Activated(ActiveState), } impl DeviceState { /// Checks if the device is activated. pub fn is_activated(&self) -> bool { match self { DeviceState::Inactive => false, DeviceState::Activated(_) => true, } } /// Gets the memory and interrupt attached to the device if it is activated. pub fn active_state(&self) -> Option<&ActiveState> { match self { DeviceState::Activated(state) => Some(state), DeviceState::Inactive => None, } } } /// Type of a virtio device /// Represent it as u8 to give it a known size. /// All used types fit in u8. #[allow(clippy::cast_possible_truncation)] #[repr(u8)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum VirtioDeviceType { Net = virtio_ids::VIRTIO_ID_NET as u8, Block = virtio_ids::VIRTIO_ID_BLOCK as u8, Rng = virtio_ids::VIRTIO_ID_RNG as u8, Balloon = virtio_ids::VIRTIO_ID_BALLOON as u8, Vsock = virtio_ids::VIRTIO_ID_VSOCK as u8, Mem = virtio_ids::VIRTIO_ID_MEM as u8, Pmem = virtio_ids::VIRTIO_ID_PMEM as u8, } /// Trait for virtio devices to be driven by a virtio transport. /// /// The lifecycle of a virtio device is to be moved to a virtio transport, which will then query the /// device. The virtio devices needs to create queues, events and event fds for interrupts and /// expose them to the transport via get_queues/get_queue_events/get_interrupt/get_interrupt_status /// fns. pub trait VirtioDevice: AsAny + MutEventSubscriber + Send { /// Get the available features offered by device. fn avail_features(&self) -> u64; /// Get acknowledged features of the driver. fn acked_features(&self) -> u64; /// Set acknowledged features of the driver. /// This function must maintain the following invariant: /// - self.avail_features() & self.acked_features() = self.get_acked_features() fn set_acked_features(&mut self, acked_features: u64); /// Check if virtio device has negotiated given feature. fn has_feature(&self, feature: u64) -> bool { (self.acked_features() & (1 << feature)) != 0 } /// The virtio device type (as a constant of the struct). fn const_device_type() -> VirtioDeviceType where Self: Sized; /// The virtio device type. /// /// It should be the same as returned by Self::const_device_type(). fn device_type(&self) -> VirtioDeviceType; /// Returns unique device id fn id(&self) -> &str; /// Returns the device queues. fn queues(&self) -> &[Queue]; /// Returns a mutable reference to the device queues. fn queues_mut(&mut self) -> &mut [Queue]; /// Returns the device queues event fds. fn queue_events(&self) -> &[EventFd]; /// Returns the current device interrupt status. fn interrupt_status(&self) -> Arc { self.interrupt_trigger().status() } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt; /// The set of feature bits shifted by `page * 32`. fn avail_features_by_page(&self, page: u32) -> u32 { let avail_features = self.avail_features(); match page { // Get the lower 32-bits of the features bitfield. 0 => (avail_features & 0xFFFFFFFF) as u32, // Get the upper 32-bits of the features bitfield. 1 => (avail_features >> 32) as u32, _ => { warn!("Received request for unknown features page."); 0u32 } } } /// Acknowledges that this set of features should be enabled. fn ack_features_by_page(&mut self, page: u32, value: u32) { let mut v = match page { 0 => u64::from(value), 1 => u64::from(value) << 32, _ => { warn!("Cannot acknowledge unknown features page: {}", page); 0u64 } }; // Check if the guest is ACK'ing a feature that we didn't claim to have. let avail_features = self.avail_features(); let unrequested_features = v & !avail_features; if unrequested_features != 0 { warn!("Received acknowledge request for unknown feature: {:#x}", v); // Don't count these features as acked. v &= !unrequested_features; } self.set_acked_features(self.acked_features() | v); } /// Reads this device configuration space at `offset`. fn read_config(&self, offset: u64, data: &mut [u8]); /// Writes to this device configuration space at `offset`. fn write_config(&mut self, offset: u64, data: &[u8]); /// Performs the formal activation for a device, which can be verified also with `is_activated`. fn activate( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError>; /// Checks if the resources of this device are activated. fn is_activated(&self) -> bool; /// Optionally deactivates this device and returns ownership of the guest memory map, interrupt /// event, and queue events. fn reset(&mut self) -> Option<(Arc, Vec)> { None } /// Mark pages used by queues as dirty. fn mark_queue_memory_dirty(&mut self, mem: &GuestMemoryMmap) -> Result<(), QueueError> { for queue in self.queues_mut() { queue.initialize(mem)? } Ok(()) } /// Notify all queues by writing to the eventfds. fn notify_queue_events(&mut self) { info!("[{:?}:{}] notifying queues", self.device_type(), self.id()); for (i, eventfd) in self.queue_events().iter().enumerate() { if let Err(err) = eventfd.write(1) { error!( "[{:?}:{}] error notifying queue {}: {}", self.device_type(), self.id(), i, err ); } } } /// Kick the device, as if it had received external events. fn kick(&mut self) { if self.is_activated() { self.notify_queue_events(); } } /// Prepare the device for saving its state fn prepare_save(&mut self) {} } impl fmt::Debug for dyn VirtioDevice { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "VirtioDevice type {:?}", self.device_type()) } } /// Utility to define both const_device_type and device_type with a u32 constant #[macro_export] macro_rules! impl_device_type { ($const_type:expr) => { fn const_device_type() -> VirtioDeviceType { $const_type } fn device_type(&self) -> VirtioDeviceType { Self::const_device_type() } }; } #[cfg(test)] pub(crate) mod tests { use event_manager::{EventOps, Events, MutEventSubscriber}; use super::*; #[derive(Debug)] struct MockVirtioDevice { avail_features: u64, acked_features: u64, } impl MutEventSubscriber for MockVirtioDevice { fn process(&mut self, _: Events, _: &mut EventOps) {} fn init(&mut self, _: &mut EventOps) {} } impl VirtioDevice for MockVirtioDevice { impl_device_type!(VirtioDeviceType::Net); fn id(&self) -> &str { "mock" } fn avail_features(&self) -> u64 { self.avail_features } fn acked_features(&self) -> u64 { self.acked_features } fn set_acked_features(&mut self, acked_features: u64) { self.acked_features = acked_features } fn queues(&self) -> &[Queue] { todo!() } fn queues_mut(&mut self) -> &mut [Queue] { todo!() } fn queue_events(&self) -> &[EventFd] { todo!() } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { todo!() } fn read_config(&self, _offset: u64, _data: &mut [u8]) { todo!() } fn write_config(&mut self, _offset: u64, _data: &[u8]) { todo!() } fn activate( &mut self, _mem: GuestMemoryMmap, _interrupt: Arc, ) -> Result<(), ActivateError> { todo!() } fn is_activated(&self) -> bool { todo!() } } #[test] fn test_has_feature() { let mut device = MockVirtioDevice { avail_features: 0, acked_features: 0, }; let mock_feature_1 = 1u64; assert!(!device.has_feature(mock_feature_1)); device.acked_features = 1 << mock_feature_1; assert!(device.has_feature(mock_feature_1)); let mock_feature_2 = 2u64; assert!(!device.has_feature(mock_feature_2)); device.acked_features = (1 << mock_feature_1) | (1 << mock_feature_2); assert!(device.has_feature(mock_feature_1)); assert!(device.has_feature(mock_feature_2)); } #[test] fn test_features() { let features: u64 = 0x11223344_55667788; let mut device = MockVirtioDevice { avail_features: features, acked_features: 0, }; assert_eq!( device.avail_features_by_page(0), (features & 0xFFFFFFFF) as u32, ); assert_eq!(device.avail_features_by_page(1), (features >> 32) as u32); for i in 2..10 { assert_eq!(device.avail_features_by_page(i), 0u32); } for i in 0..10 { device.ack_features_by_page(i, u32::MAX); } assert_eq!(device.acked_features, features); } } ================================================ FILE: src/vmm/src/devices/virtio/generated/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. #![allow(clippy::all)] #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] #![allow(non_snake_case)] pub mod virtio_blk; pub mod virtio_config; pub mod virtio_ids; pub mod virtio_mem; pub mod virtio_net; pub mod virtio_ring; ================================================ FILE: src/vmm/src/devices/virtio/generated/virtio_blk.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const VIRTIO_BLK_F_SIZE_MAX: u32 = 1; pub const VIRTIO_BLK_F_SEG_MAX: u32 = 2; pub const VIRTIO_BLK_F_GEOMETRY: u32 = 4; pub const VIRTIO_BLK_F_RO: u32 = 5; pub const VIRTIO_BLK_F_BLK_SIZE: u32 = 6; pub const VIRTIO_BLK_F_TOPOLOGY: u32 = 10; pub const VIRTIO_BLK_F_MQ: u32 = 12; pub const VIRTIO_BLK_F_DISCARD: u32 = 13; pub const VIRTIO_BLK_F_WRITE_ZEROES: u32 = 14; pub const VIRTIO_BLK_F_SECURE_ERASE: u32 = 16; pub const VIRTIO_BLK_F_ZONED: u32 = 17; pub const VIRTIO_BLK_F_BARRIER: u32 = 0; pub const VIRTIO_BLK_F_SCSI: u32 = 7; pub const VIRTIO_BLK_F_FLUSH: u32 = 9; pub const VIRTIO_BLK_F_CONFIG_WCE: u32 = 11; pub const VIRTIO_BLK_F_WCE: u32 = 9; pub const VIRTIO_BLK_ID_BYTES: u32 = 20; pub const VIRTIO_BLK_T_IN: u32 = 0; pub const VIRTIO_BLK_T_OUT: u32 = 1; pub const VIRTIO_BLK_T_SCSI_CMD: u32 = 2; pub const VIRTIO_BLK_T_FLUSH: u32 = 4; pub const VIRTIO_BLK_T_GET_ID: u32 = 8; pub const VIRTIO_BLK_T_DISCARD: u32 = 11; pub const VIRTIO_BLK_T_WRITE_ZEROES: u32 = 13; pub const VIRTIO_BLK_T_SECURE_ERASE: u32 = 14; pub const VIRTIO_BLK_T_ZONE_APPEND: u32 = 15; pub const VIRTIO_BLK_T_ZONE_REPORT: u32 = 16; pub const VIRTIO_BLK_T_ZONE_OPEN: u32 = 18; pub const VIRTIO_BLK_T_ZONE_CLOSE: u32 = 20; pub const VIRTIO_BLK_T_ZONE_FINISH: u32 = 22; pub const VIRTIO_BLK_T_ZONE_RESET: u32 = 24; pub const VIRTIO_BLK_T_ZONE_RESET_ALL: u32 = 26; pub const VIRTIO_BLK_T_BARRIER: u32 = 2147483648; pub const VIRTIO_BLK_Z_NONE: u32 = 0; pub const VIRTIO_BLK_Z_HM: u32 = 1; pub const VIRTIO_BLK_Z_HA: u32 = 2; pub const VIRTIO_BLK_ZT_CONV: u32 = 1; pub const VIRTIO_BLK_ZT_SWR: u32 = 2; pub const VIRTIO_BLK_ZT_SWP: u32 = 3; pub const VIRTIO_BLK_ZS_NOT_WP: u32 = 0; pub const VIRTIO_BLK_ZS_EMPTY: u32 = 1; pub const VIRTIO_BLK_ZS_IOPEN: u32 = 2; pub const VIRTIO_BLK_ZS_EOPEN: u32 = 3; pub const VIRTIO_BLK_ZS_CLOSED: u32 = 4; pub const VIRTIO_BLK_ZS_RDONLY: u32 = 13; pub const VIRTIO_BLK_ZS_FULL: u32 = 14; pub const VIRTIO_BLK_ZS_OFFLINE: u32 = 15; pub const VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP: u32 = 1; pub const VIRTIO_BLK_S_OK: u32 = 0; pub const VIRTIO_BLK_S_IOERR: u32 = 1; pub const VIRTIO_BLK_S_UNSUPP: u32 = 2; pub const VIRTIO_BLK_S_ZONE_INVALID_CMD: u32 = 3; pub const VIRTIO_BLK_S_ZONE_UNALIGNED_WP: u32 = 4; pub const VIRTIO_BLK_S_ZONE_OPEN_RESOURCE: u32 = 5; pub const VIRTIO_BLK_S_ZONE_ACTIVE_RESOURCE: u32 = 6; ================================================ FILE: src/vmm/src/devices/virtio/generated/virtio_config.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const VIRTIO_F_NOTIFY_ON_EMPTY: u32 = 24; pub const VIRTIO_F_ANY_LAYOUT: u32 = 27; pub const VIRTIO_F_VERSION_1: u32 = 32; pub const VIRTIO_F_ACCESS_PLATFORM: u32 = 33; pub const VIRTIO_F_IOMMU_PLATFORM: u32 = 33; pub const VIRTIO_F_RING_PACKED: u32 = 34; pub const VIRTIO_F_IN_ORDER: u32 = 35; pub const VIRTIO_F_ORDER_PLATFORM: u32 = 36; pub const VIRTIO_F_SR_IOV: u32 = 37; pub const VIRTIO_F_NOTIFICATION_DATA: u32 = 38; pub const VIRTIO_F_NOTIF_CONFIG_DATA: u32 = 39; pub const VIRTIO_F_RING_RESET: u32 = 40; pub const VIRTIO_F_ADMIN_VQ: u32 = 41; ================================================ FILE: src/vmm/src/devices/virtio/generated/virtio_ids.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const VIRTIO_ID_NET: u32 = 1; pub const VIRTIO_ID_BLOCK: u32 = 2; pub const VIRTIO_ID_CONSOLE: u32 = 3; pub const VIRTIO_ID_RNG: u32 = 4; pub const VIRTIO_ID_BALLOON: u32 = 5; pub const VIRTIO_ID_IOMEM: u32 = 6; pub const VIRTIO_ID_RPMSG: u32 = 7; pub const VIRTIO_ID_SCSI: u32 = 8; pub const VIRTIO_ID_9P: u32 = 9; pub const VIRTIO_ID_MAC80211_WLAN: u32 = 10; pub const VIRTIO_ID_RPROC_SERIAL: u32 = 11; pub const VIRTIO_ID_CAIF: u32 = 12; pub const VIRTIO_ID_MEMORY_BALLOON: u32 = 13; pub const VIRTIO_ID_GPU: u32 = 16; pub const VIRTIO_ID_CLOCK: u32 = 17; pub const VIRTIO_ID_INPUT: u32 = 18; pub const VIRTIO_ID_VSOCK: u32 = 19; pub const VIRTIO_ID_CRYPTO: u32 = 20; pub const VIRTIO_ID_SIGNAL_DIST: u32 = 21; pub const VIRTIO_ID_PSTORE: u32 = 22; pub const VIRTIO_ID_IOMMU: u32 = 23; pub const VIRTIO_ID_MEM: u32 = 24; pub const VIRTIO_ID_SOUND: u32 = 25; pub const VIRTIO_ID_FS: u32 = 26; pub const VIRTIO_ID_PMEM: u32 = 27; pub const VIRTIO_ID_RPMB: u32 = 28; pub const VIRTIO_ID_MAC80211_HWSIM: u32 = 29; pub const VIRTIO_ID_VIDEO_ENCODER: u32 = 30; pub const VIRTIO_ID_VIDEO_DECODER: u32 = 31; pub const VIRTIO_ID_SCMI: u32 = 32; pub const VIRTIO_ID_NITRO_SEC_MOD: u32 = 33; pub const VIRTIO_ID_I2C_ADAPTER: u32 = 34; pub const VIRTIO_ID_WATCHDOG: u32 = 35; pub const VIRTIO_ID_CAN: u32 = 36; pub const VIRTIO_ID_DMABUF: u32 = 37; pub const VIRTIO_ID_PARAM_SERV: u32 = 38; pub const VIRTIO_ID_AUDIO_POLICY: u32 = 39; pub const VIRTIO_ID_BT: u32 = 40; pub const VIRTIO_ID_GPIO: u32 = 41; ================================================ FILE: src/vmm/src/devices/virtio/generated/virtio_mem.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const VIRTIO_MEM_F_ACPI_PXM: u32 = 0; pub const VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE: u32 = 1; pub const VIRTIO_MEM_F_PERSISTENT_SUSPEND: u32 = 2; pub const VIRTIO_MEM_REQ_PLUG: u32 = 0; pub const VIRTIO_MEM_REQ_UNPLUG: u32 = 1; pub const VIRTIO_MEM_REQ_UNPLUG_ALL: u32 = 2; pub const VIRTIO_MEM_REQ_STATE: u32 = 3; pub const VIRTIO_MEM_RESP_ACK: u32 = 0; pub const VIRTIO_MEM_RESP_NACK: u32 = 1; pub const VIRTIO_MEM_RESP_BUSY: u32 = 2; pub const VIRTIO_MEM_RESP_ERROR: u32 = 3; pub const VIRTIO_MEM_STATE_PLUGGED: u32 = 0; pub const VIRTIO_MEM_STATE_UNPLUGGED: u32 = 1; pub const VIRTIO_MEM_STATE_MIXED: u32 = 2; pub type __u8 = ::std::os::raw::c_uchar; pub type __u16 = ::std::os::raw::c_ushort; pub type __u64 = ::std::os::raw::c_ulonglong; pub type __le16 = __u16; pub type __le64 = __u64; pub type __virtio16 = __u16; pub type __virtio64 = __u64; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct virtio_mem_req_plug { pub addr: __virtio64, pub nb_blocks: __virtio16, pub padding: [__virtio16; 3usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_mem_req_plug"][::std::mem::size_of::() - 16usize]; ["Alignment of virtio_mem_req_plug"][::std::mem::align_of::() - 8usize]; ["Offset of field: virtio_mem_req_plug::addr"] [::std::mem::offset_of!(virtio_mem_req_plug, addr) - 0usize]; ["Offset of field: virtio_mem_req_plug::nb_blocks"] [::std::mem::offset_of!(virtio_mem_req_plug, nb_blocks) - 8usize]; ["Offset of field: virtio_mem_req_plug::padding"] [::std::mem::offset_of!(virtio_mem_req_plug, padding) - 10usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct virtio_mem_req_unplug { pub addr: __virtio64, pub nb_blocks: __virtio16, pub padding: [__virtio16; 3usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_mem_req_unplug"][::std::mem::size_of::() - 16usize]; ["Alignment of virtio_mem_req_unplug"] [::std::mem::align_of::() - 8usize]; ["Offset of field: virtio_mem_req_unplug::addr"] [::std::mem::offset_of!(virtio_mem_req_unplug, addr) - 0usize]; ["Offset of field: virtio_mem_req_unplug::nb_blocks"] [::std::mem::offset_of!(virtio_mem_req_unplug, nb_blocks) - 8usize]; ["Offset of field: virtio_mem_req_unplug::padding"] [::std::mem::offset_of!(virtio_mem_req_unplug, padding) - 10usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct virtio_mem_req_state { pub addr: __virtio64, pub nb_blocks: __virtio16, pub padding: [__virtio16; 3usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_mem_req_state"][::std::mem::size_of::() - 16usize]; ["Alignment of virtio_mem_req_state"][::std::mem::align_of::() - 8usize]; ["Offset of field: virtio_mem_req_state::addr"] [::std::mem::offset_of!(virtio_mem_req_state, addr) - 0usize]; ["Offset of field: virtio_mem_req_state::nb_blocks"] [::std::mem::offset_of!(virtio_mem_req_state, nb_blocks) - 8usize]; ["Offset of field: virtio_mem_req_state::padding"] [::std::mem::offset_of!(virtio_mem_req_state, padding) - 10usize]; }; #[repr(C)] #[derive(Copy, Clone)] pub struct virtio_mem_req { pub type_: __virtio16, pub padding: [__virtio16; 3usize], pub u: virtio_mem_req__bindgen_ty_1, } #[repr(C)] #[derive(Copy, Clone)] pub union virtio_mem_req__bindgen_ty_1 { pub plug: virtio_mem_req_plug, pub unplug: virtio_mem_req_unplug, pub state: virtio_mem_req_state, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_mem_req__bindgen_ty_1"] [::std::mem::size_of::() - 16usize]; ["Alignment of virtio_mem_req__bindgen_ty_1"] [::std::mem::align_of::() - 8usize]; ["Offset of field: virtio_mem_req__bindgen_ty_1::plug"] [::std::mem::offset_of!(virtio_mem_req__bindgen_ty_1, plug) - 0usize]; ["Offset of field: virtio_mem_req__bindgen_ty_1::unplug"] [::std::mem::offset_of!(virtio_mem_req__bindgen_ty_1, unplug) - 0usize]; ["Offset of field: virtio_mem_req__bindgen_ty_1::state"] [::std::mem::offset_of!(virtio_mem_req__bindgen_ty_1, state) - 0usize]; }; impl Default for virtio_mem_req__bindgen_ty_1 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_mem_req"][::std::mem::size_of::() - 24usize]; ["Alignment of virtio_mem_req"][::std::mem::align_of::() - 8usize]; ["Offset of field: virtio_mem_req::type_"] [::std::mem::offset_of!(virtio_mem_req, type_) - 0usize]; ["Offset of field: virtio_mem_req::padding"] [::std::mem::offset_of!(virtio_mem_req, padding) - 2usize]; ["Offset of field: virtio_mem_req::u"][::std::mem::offset_of!(virtio_mem_req, u) - 8usize]; }; impl Default for virtio_mem_req { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct virtio_mem_resp_state { pub state: __virtio16, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_mem_resp_state"][::std::mem::size_of::() - 2usize]; ["Alignment of virtio_mem_resp_state"] [::std::mem::align_of::() - 2usize]; ["Offset of field: virtio_mem_resp_state::state"] [::std::mem::offset_of!(virtio_mem_resp_state, state) - 0usize]; }; #[repr(C)] #[derive(Copy, Clone)] pub struct virtio_mem_resp { pub type_: __virtio16, pub padding: [__virtio16; 3usize], pub u: virtio_mem_resp__bindgen_ty_1, } #[repr(C)] #[derive(Copy, Clone)] pub union virtio_mem_resp__bindgen_ty_1 { pub state: virtio_mem_resp_state, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_mem_resp__bindgen_ty_1"] [::std::mem::size_of::() - 2usize]; ["Alignment of virtio_mem_resp__bindgen_ty_1"] [::std::mem::align_of::() - 2usize]; ["Offset of field: virtio_mem_resp__bindgen_ty_1::state"] [::std::mem::offset_of!(virtio_mem_resp__bindgen_ty_1, state) - 0usize]; }; impl Default for virtio_mem_resp__bindgen_ty_1 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_mem_resp"][::std::mem::size_of::() - 10usize]; ["Alignment of virtio_mem_resp"][::std::mem::align_of::() - 2usize]; ["Offset of field: virtio_mem_resp::type_"] [::std::mem::offset_of!(virtio_mem_resp, type_) - 0usize]; ["Offset of field: virtio_mem_resp::padding"] [::std::mem::offset_of!(virtio_mem_resp, padding) - 2usize]; ["Offset of field: virtio_mem_resp::u"][::std::mem::offset_of!(virtio_mem_resp, u) - 8usize]; }; impl Default for virtio_mem_resp { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct virtio_mem_config { pub block_size: __le64, pub node_id: __le16, pub padding: [__u8; 6usize], pub addr: __le64, pub region_size: __le64, pub usable_region_size: __le64, pub plugged_size: __le64, pub requested_size: __le64, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_mem_config"][::std::mem::size_of::() - 56usize]; ["Alignment of virtio_mem_config"][::std::mem::align_of::() - 8usize]; ["Offset of field: virtio_mem_config::block_size"] [::std::mem::offset_of!(virtio_mem_config, block_size) - 0usize]; ["Offset of field: virtio_mem_config::node_id"] [::std::mem::offset_of!(virtio_mem_config, node_id) - 8usize]; ["Offset of field: virtio_mem_config::padding"] [::std::mem::offset_of!(virtio_mem_config, padding) - 10usize]; ["Offset of field: virtio_mem_config::addr"] [::std::mem::offset_of!(virtio_mem_config, addr) - 16usize]; ["Offset of field: virtio_mem_config::region_size"] [::std::mem::offset_of!(virtio_mem_config, region_size) - 24usize]; ["Offset of field: virtio_mem_config::usable_region_size"] [::std::mem::offset_of!(virtio_mem_config, usable_region_size) - 32usize]; ["Offset of field: virtio_mem_config::plugged_size"] [::std::mem::offset_of!(virtio_mem_config, plugged_size) - 40usize]; ["Offset of field: virtio_mem_config::requested_size"] [::std::mem::offset_of!(virtio_mem_config, requested_size) - 48usize]; }; ================================================ FILE: src/vmm/src/devices/virtio/generated/virtio_net.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const VIRTIO_NET_F_CSUM: u32 = 0; pub const VIRTIO_NET_F_GUEST_CSUM: u32 = 1; pub const VIRTIO_NET_F_CTRL_GUEST_OFFLOADS: u32 = 2; pub const VIRTIO_NET_F_MTU: u32 = 3; pub const VIRTIO_NET_F_MAC: u32 = 5; pub const VIRTIO_NET_F_GUEST_TSO4: u32 = 7; pub const VIRTIO_NET_F_GUEST_TSO6: u32 = 8; pub const VIRTIO_NET_F_GUEST_ECN: u32 = 9; pub const VIRTIO_NET_F_GUEST_UFO: u32 = 10; pub const VIRTIO_NET_F_HOST_TSO4: u32 = 11; pub const VIRTIO_NET_F_HOST_TSO6: u32 = 12; pub const VIRTIO_NET_F_HOST_ECN: u32 = 13; pub const VIRTIO_NET_F_HOST_UFO: u32 = 14; pub const VIRTIO_NET_F_MRG_RXBUF: u32 = 15; pub const VIRTIO_NET_F_STATUS: u32 = 16; pub const VIRTIO_NET_F_CTRL_VQ: u32 = 17; pub const VIRTIO_NET_F_CTRL_RX: u32 = 18; pub const VIRTIO_NET_F_CTRL_VLAN: u32 = 19; pub const VIRTIO_NET_F_CTRL_RX_EXTRA: u32 = 20; pub const VIRTIO_NET_F_GUEST_ANNOUNCE: u32 = 21; pub const VIRTIO_NET_F_MQ: u32 = 22; pub const VIRTIO_NET_F_CTRL_MAC_ADDR: u32 = 23; pub const VIRTIO_NET_F_DEVICE_STATS: u32 = 50; pub const VIRTIO_NET_F_VQ_NOTF_COAL: u32 = 52; pub const VIRTIO_NET_F_NOTF_COAL: u32 = 53; pub const VIRTIO_NET_F_GUEST_USO4: u32 = 54; pub const VIRTIO_NET_F_GUEST_USO6: u32 = 55; pub const VIRTIO_NET_F_HOST_USO: u32 = 56; pub const VIRTIO_NET_F_HASH_REPORT: u32 = 57; pub const VIRTIO_NET_F_GUEST_HDRLEN: u32 = 59; pub const VIRTIO_NET_F_RSS: u32 = 60; pub const VIRTIO_NET_F_RSC_EXT: u32 = 61; pub const VIRTIO_NET_F_STANDBY: u32 = 62; pub const VIRTIO_NET_F_SPEED_DUPLEX: u32 = 63; pub const VIRTIO_NET_F_GSO: u32 = 6; pub type __u8 = ::std::os::raw::c_uchar; pub type __u16 = ::std::os::raw::c_ushort; pub type __le16 = __u16; pub type __virtio16 = __u16; #[repr(C)] #[derive(Copy, Clone)] pub struct virtio_net_hdr_v1 { pub flags: __u8, pub gso_type: __u8, pub hdr_len: __virtio16, pub gso_size: __virtio16, pub __bindgen_anon_1: virtio_net_hdr_v1__bindgen_ty_1, pub num_buffers: __virtio16, } #[repr(C)] #[derive(Copy, Clone)] pub union virtio_net_hdr_v1__bindgen_ty_1 { pub __bindgen_anon_1: virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_1, pub csum: virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_2, pub rsc: virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_3, } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_1 { pub csum_start: __virtio16, pub csum_offset: __virtio16, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_1"] [::std::mem::size_of::() - 4usize]; ["Alignment of virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_1"] [::std::mem::align_of::() - 2usize]; ["Offset of field: virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_1::csum_start"][::std::mem::offset_of!( virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_1, csum_start ) - 0usize]; ["Offset of field: virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_1::csum_offset"][::std::mem::offset_of!( virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_1, csum_offset ) - 2usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_2 { pub start: __virtio16, pub offset: __virtio16, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_2"] [::std::mem::size_of::() - 4usize]; ["Alignment of virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_2"] [::std::mem::align_of::() - 2usize]; ["Offset of field: virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_2::start"] [::std::mem::offset_of!(virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_2, start) - 0usize]; ["Offset of field: virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_2::offset"] [::std::mem::offset_of!(virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_2, offset) - 2usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_3 { pub segments: __le16, pub dup_acks: __le16, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_3"] [::std::mem::size_of::() - 4usize]; ["Alignment of virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_3"] [::std::mem::align_of::() - 2usize]; ["Offset of field: virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_3::segments"] [::std::mem::offset_of!(virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_3, segments) - 0usize]; ["Offset of field: virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_3::dup_acks"] [::std::mem::offset_of!(virtio_net_hdr_v1__bindgen_ty_1__bindgen_ty_3, dup_acks) - 2usize]; }; #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_net_hdr_v1__bindgen_ty_1"] [::std::mem::size_of::() - 4usize]; ["Alignment of virtio_net_hdr_v1__bindgen_ty_1"] [::std::mem::align_of::() - 2usize]; ["Offset of field: virtio_net_hdr_v1__bindgen_ty_1::csum"] [::std::mem::offset_of!(virtio_net_hdr_v1__bindgen_ty_1, csum) - 0usize]; ["Offset of field: virtio_net_hdr_v1__bindgen_ty_1::rsc"] [::std::mem::offset_of!(virtio_net_hdr_v1__bindgen_ty_1, rsc) - 0usize]; }; impl Default for virtio_net_hdr_v1__bindgen_ty_1 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of virtio_net_hdr_v1"][::std::mem::size_of::() - 12usize]; ["Alignment of virtio_net_hdr_v1"][::std::mem::align_of::() - 2usize]; ["Offset of field: virtio_net_hdr_v1::flags"] [::std::mem::offset_of!(virtio_net_hdr_v1, flags) - 0usize]; ["Offset of field: virtio_net_hdr_v1::gso_type"] [::std::mem::offset_of!(virtio_net_hdr_v1, gso_type) - 1usize]; ["Offset of field: virtio_net_hdr_v1::hdr_len"] [::std::mem::offset_of!(virtio_net_hdr_v1, hdr_len) - 2usize]; ["Offset of field: virtio_net_hdr_v1::gso_size"] [::std::mem::offset_of!(virtio_net_hdr_v1, gso_size) - 4usize]; ["Offset of field: virtio_net_hdr_v1::num_buffers"] [::std::mem::offset_of!(virtio_net_hdr_v1, num_buffers) - 10usize]; }; impl Default for virtio_net_hdr_v1 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } ================================================ FILE: src/vmm/src/devices/virtio/generated/virtio_ring.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const VIRTIO_RING_F_EVENT_IDX: u32 = 29; ================================================ FILE: src/vmm/src/devices/virtio/iov_deque.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::os::fd::AsRawFd; use libc::{c_int, c_void, iovec, off_t, size_t}; use memfd; use crate::arch::host_page_size; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum IovDequeError { /// Error with memfd: {0} Memfd(#[from] memfd::Error), /// Error while resizing memfd: {0} MemfdResize(std::io::Error), /// Error calling mmap: {0} Mmap(std::io::Error), } /// ['IovDeque'] is a ring buffer tailored for `struct iovec` objects. /// /// From the point of view of API, [`IovDeque`] is a typical ring buffer that allows us to push /// `struct iovec` objects at the end of the buffer and pop them from its beginning. /// /// It is tailored to store `struct iovec` objects that described memory that was passed to us from /// the guest via a VirtIO queue. This allows us to assume the maximum size of a ring buffer (the /// negotiated size of the queue). // An important feature of the data structure is that it can give us a slice of all `struct iovec` // objects in the queue, so that we can use this `&mut [iovec]` to perform operations such as // `readv`. A typical implementation of a ring buffer allows for entries to wrap around the end of // the underlying buffer. For example, a ring buffer with a capacity of 10 elements which // currently holds 4 elements can look like this: // // tail head // | | // v v // +---+---+---+---+---+---+---+---+---+---+ // ring buffer: | C | D | | | | | | | A | B | // +---+---+---+---+---+---+---+---+---+---+ // // When getting a slice for this data we should get something like that: &[A, B, C, D], which // would require copies in order to make the elements continuous in memory. // // In order to avoid that and make the operation of getting a slice more efficient, we implement // the optimization described in the "Optimization" section of the "Circular buffer" wikipedia // entry: https://en.wikipedia.org/wiki/Circular_buffer. The optimization consists of allocating // double the size of the virtual memory required for the buffer and map both parts on the same // physical address. Looking at the same example as before, we should get, this picture: // // head | tail // | | | // v | v // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ // | C | D | | | | | | | A | B | C | D | | | | | | | A | B | // +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ // First virtual page | Second virtual page // | // | // // Virtual memory // --------------------------------------------------------------------------------------- // Physical memory // // +---+---+---+---+---+---+---+---+---+---+ // | C | D | | | | | | | A | B | // +---+---+---+---+---+---+---+---+---+---+ // // Like that, the elements stored in the buffer are always laid out in contiguous virtual memory, // so making a slice out of them does not require any copies. // // The `L` const generic determines the maximum number of `iovec` elements the queue should hold // at any point in time. The actual capacity of the queue may differ and will depend on the host // page size. // // ```Rust // pub struct iovec { // pub iov_base: *mut ::c_void, // pub iov_len: ::size_t, // } // ``` #[derive(Debug)] pub struct IovDeque { pub iov: *mut libc::iovec, pub start: u16, pub len: u16, pub capacity: u16, } // SAFETY: This is `Send`. We hold sole ownership of the underlying buffer. unsafe impl Send for IovDeque {} impl IovDeque { /// Create a [`memfd`] object that represents a single physical page fn create_memfd(pages_bytes: usize) -> Result { // Create a sealable memfd. let opts = memfd::MemfdOptions::default().allow_sealing(true); let mfd = opts.create("iov_deque")?; // Resize to system page size. mfd.as_file() .set_len(pages_bytes.try_into().unwrap()) .map_err(IovDequeError::MemfdResize)?; // Add seals to prevent further resizing. mfd.add_seals(&[memfd::FileSeal::SealShrink, memfd::FileSeal::SealGrow])?; // Prevent further sealing changes. mfd.add_seal(memfd::FileSeal::SealSeal)?; Ok(mfd) } /// A safe wrapper on top of libc's `mmap` system call /// /// # Safety: Callers need to make sure that the arguments to `mmap` are valid unsafe fn mmap( addr: *mut c_void, len: size_t, prot: c_int, flags: c_int, fd: c_int, offset: off_t, ) -> Result<*mut c_void, IovDequeError> { // SAFETY: caller should ensure the parameters are valid let ptr = unsafe { libc::mmap(addr, len, prot, flags, fd, offset) }; if ptr == libc::MAP_FAILED { return Err(IovDequeError::Mmap(std::io::Error::last_os_error())); } Ok(ptr) } /// Allocate memory for our ring buffer /// /// This will allocate 2 * `pages_bytes` bytes of virtual memory. fn allocate_ring_buffer_memory(pages_bytes: usize) -> Result<*mut c_void, IovDequeError> { // SAFETY: We are calling the system call with valid arguments unsafe { Self::mmap( std::ptr::null_mut(), pages_bytes * 2, libc::PROT_NONE, libc::MAP_PRIVATE | libc::MAP_ANONYMOUS, -1, 0, ) } } /// Calculate a number of bytes in full pages required for /// the type to operate. fn pages_bytes() -> usize { let host_page_size = host_page_size(); let bytes = L as usize * std::mem::size_of::(); let num_host_pages = bytes.div_ceil(host_page_size); num_host_pages * host_page_size } /// Create a new [`IovDeque`] that can hold memory described by a single VirtIO queue. pub fn new() -> Result { let pages_bytes = Self::pages_bytes(); let capacity = pages_bytes / std::mem::size_of::(); let capacity: u16 = capacity.try_into().unwrap(); assert!( L <= capacity, "Actual capacity {} is smaller than requested capacity {}", capacity, L ); let memfd = Self::create_memfd(pages_bytes)?; let raw_memfd = memfd.as_file().as_raw_fd(); let buffer = Self::allocate_ring_buffer_memory(pages_bytes)?; // Map the first page of virtual memory to the physical page described by the memfd object // SAFETY: We are calling the system call with valid arguments let _ = unsafe { Self::mmap( buffer, pages_bytes, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED | libc::MAP_FIXED, raw_memfd, 0, ) }?; // Map the second page of virtual memory to the physical page described by the memfd object // // SAFETY: This is safe because: // * Both `buffer` and the result of `buffer.add(pages_bytes)` are within bounds of the // allocation we got from `Self::allocate_ring_buffer_memory`. // * The resulting pointer is the beginning of the second page of our allocation, so it // doesn't wrap around the address space. let next_page = unsafe { buffer.add(pages_bytes) }; // SAFETY: We are calling the system call with valid arguments let _ = unsafe { Self::mmap( next_page, pages_bytes, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_SHARED | libc::MAP_FIXED, raw_memfd, 0, ) }?; Ok(Self { iov: buffer.cast(), start: 0, len: 0, capacity, }) } /// Returns the number of `iovec` objects currently in the [`IovDeque`] #[inline(always)] pub fn len(&self) -> u16 { self.len } /// Returns `true` if the [`IovDeque`] is full, `false` otherwise #[inline(always)] pub fn is_full(&self) -> bool { self.len() == L } /// Resets the queue, dropping all its elements. #[inline(always)] pub fn clear(&mut self) { self.start = 0; self.len = 0; } /// Adds an `iovec` in the ring buffer. /// /// Returns an `IovDequeError::Full` error if the buffer is full. pub fn push_back(&mut self, iov: iovec) { // This should NEVER happen, since our ring buffer is as big as the maximum queue size. // We also check for the sanity of the VirtIO queues, in queue.rs, which means that if we // ever try to add something in a full ring buffer, there is an internal bug in the device // emulation logic. Panic here because the device is hopelessly broken. assert!( !self.is_full(), "The number of `iovec` objects is bigger than the available space" ); // SAFETY: self.iov is a valid pointer and `self.start + self.len` is within range (we // asserted before that the buffer is not full). unsafe { self.iov .add((self.start + self.len) as usize) .write_volatile(iov) }; self.len += 1; } /// Pops the first `nr_iovecs` iovecs from the front of the buffer. /// /// This will panic if we are asked /// to pop more iovecs than what is currently available in the buffer. pub fn pop_front(&mut self, nr_iovecs: u16) { assert!( self.len() >= nr_iovecs, "Internal bug! Trying to drop more iovec objects than what is available" ); self.start += nr_iovecs; self.len -= nr_iovecs; if self.capacity <= self.start { self.start -= self.capacity; } } /// Pops the first `nr_iovecs` iovecs from the back of the buffer. /// /// This will panic if we are asked /// to pop more iovecs than what is currently available in the buffer. pub fn pop_back(&mut self, nr_iovecs: u16) { assert!( self.len() >= nr_iovecs, "Internal bug! Trying to drop more iovec objects than what is available" ); self.len -= nr_iovecs; } /// Get a slice of the iovec objects currently in the buffer. pub fn as_slice(&self) -> &[iovec] { // SAFETY: Here we create a slice out of the existing elements in the buffer (not the whole // allocated memory). That means that we can: // * We can read `self.len * mem::size_of::()` bytes out of the memory range we are // returning. // * `self.iov.add(self.start.into())` is a non-null pointer and aligned. // * The underlying memory comes from a single allocation. // * The returning pointer points to `self.len` consecutive initialized `iovec` objects. // * We are only accessing the underlying memory through the returned slice. Since we are // returning a slice of only the existing pushed elements the slice does not contain any // aliasing references. // * The slice can be up to 1 page long which is smaller than `isize::MAX`. unsafe { let slice_start = self.iov.add(self.start.into()); std::slice::from_raw_parts(slice_start, self.len.into()) } } /// Get a mutable slice of the iovec objects currently in the buffer. pub fn as_mut_slice(&mut self) -> &mut [iovec] { // SAFETY: Here we create a slice out of the existing elements in the buffer (not the whole // allocated memory). That means that we can: // * We can read/write `self.len * mem::size_of::()` bytes out of the memory range we // are returning. // * The underlying memory comes from a single allocation. // * `self.iov.add(self.start.into())` is a non-null pointer and aligned // * The returning pointer points to `self.len` consecutive initialized `iovec` objects. // * We are only accessing the underlying memory through the returned slice. Since we are // returning a slice of only the existing pushed elements the slice does not contain any // aliasing references. // * The slice can be up to 1 page long which is smaller than `isize::MAX`. unsafe { let slice_start = self.iov.add(self.start.into()); std::slice::from_raw_parts_mut(slice_start, self.len.into()) } } } impl Drop for IovDeque { fn drop(&mut self) { let pages_bytes = Self::pages_bytes(); // SAFETY: We are passing an address that we got from a previous allocation of `2 * // pages_bytes` by calling mmap let _ = unsafe { libc::munmap(self.iov.cast(), 2 * pages_bytes) }; } } #[cfg(test)] mod tests { use libc::iovec; // Redefine `IovDeque` with specific length. Otherwise // Rust will not know what to do. type IovDeque = super::IovDeque<256>; #[test] fn test_new() { let deque = IovDeque::new().unwrap(); assert_eq!(deque.len(), 0); } #[test] fn test_new_less_than_page() { let deque = super::IovDeque::<128>::new().unwrap(); assert_eq!(deque.len(), 0); } #[test] fn test_new_more_than_page() { let deque = super::IovDeque::<512>::new().unwrap(); assert_eq!(deque.len(), 0); } fn make_iovec(id: u16, len: u16) -> iovec { iovec { iov_base: id as *mut libc::c_void, iov_len: len as usize, } } #[test] #[should_panic] fn test_push_back_too_many() { let mut deque = IovDeque::new().unwrap(); assert_eq!(deque.len(), 0); for i in 0u16..256 { deque.push_back(make_iovec(i, i)); assert_eq!(deque.len(), i + 1); } deque.push_back(make_iovec(0, 0)); } #[test] #[should_panic] fn test_pop_front_from_empty() { let mut deque = IovDeque::new().unwrap(); deque.pop_front(1); } #[test] #[should_panic] fn test_pop_front_too_many() { let mut deque = IovDeque::new().unwrap(); deque.push_back(make_iovec(42, 42)); deque.pop_front(2); } #[test] fn test_pop_font() { let mut deque = IovDeque::new().unwrap(); assert_eq!(deque.len(), 0); assert!(!deque.is_full()); deque.pop_front(0); let iovs: Vec<_> = (0..4).map(|i| make_iovec(i, i)).collect(); for iov in iovs.iter() { deque.push_back(*iov); } assert_eq!(deque.as_slice(), &iovs); assert_eq!(deque.as_mut_slice(), &iovs); deque.pop_front(1); assert_eq!(deque.as_slice(), &iovs[1..]); assert_eq!(deque.as_mut_slice(), &iovs[1..]); deque.pop_front(1); assert_eq!(deque.as_slice(), &iovs[2..]); assert_eq!(deque.as_mut_slice(), &iovs[2..]); deque.pop_front(1); assert_eq!(deque.as_slice(), &iovs[3..]); assert_eq!(deque.as_mut_slice(), &iovs[3..]); deque.pop_front(1); assert_eq!(deque.as_slice(), &iovs[4..]); assert_eq!(deque.as_mut_slice(), &iovs[4..]); for i in 0u16..256 { deque.push_back(make_iovec(i, i)); assert_eq!(deque.len(), i + 1); } assert!(deque.is_full()); assert!(deque.len() != 0); for i in 0u16..256 { deque.pop_front(1); assert_eq!(deque.len(), 256 - i - 1); } } #[test] fn test_pop_back() { let mut deque = IovDeque::new().unwrap(); assert_eq!(deque.len(), 0); assert!(!deque.is_full()); deque.pop_back(0); let iovs: Vec<_> = (0..4).map(|i| make_iovec(i, i)).collect(); for iov in iovs.iter() { deque.push_back(*iov); } assert_eq!(deque.as_slice(), &iovs); assert_eq!(deque.as_mut_slice(), &iovs); deque.pop_back(1); assert_eq!(deque.as_slice(), &iovs[..iovs.len() - 1]); assert_eq!(deque.as_mut_slice(), &iovs[..iovs.len() - 1]); deque.pop_back(1); assert_eq!(deque.as_slice(), &iovs[..iovs.len() - 2]); assert_eq!(deque.as_mut_slice(), &iovs[..iovs.len() - 2]); deque.pop_back(1); assert_eq!(deque.as_slice(), &iovs[..iovs.len() - 3]); assert_eq!(deque.as_mut_slice(), &iovs[..iovs.len() - 3]); deque.pop_back(1); assert_eq!(deque.as_slice(), &iovs[..iovs.len() - 4]); assert_eq!(deque.as_mut_slice(), &iovs[..iovs.len() - 4]); for i in 0u16..256 { deque.push_back(make_iovec(i, i)); assert_eq!(deque.len(), i + 1); } assert!(deque.is_full()); assert!(deque.len() != 0); for i in 0u16..256 { deque.pop_back(1); assert_eq!(deque.len(), 256 - i - 1); } } #[test] fn test_pop_many() { let mut deque = IovDeque::new().unwrap(); for i in 0u16..256 { deque.push_back(make_iovec(i, i)); } deque.pop_front(1); assert_eq!(deque.len(), 255); deque.pop_front(2); assert_eq!(deque.len(), 253); deque.pop_front(4); assert_eq!(deque.len(), 249); deque.pop_front(8); assert_eq!(deque.len(), 241); deque.pop_front(16); assert_eq!(deque.len(), 225); deque.pop_front(32); assert_eq!(deque.len(), 193); deque.pop_front(64); assert_eq!(deque.len(), 129); deque.pop_front(128); assert_eq!(deque.len(), 1); } #[test] fn test_as_slice() { let mut deque = IovDeque::new().unwrap(); assert!(deque.as_slice().is_empty()); for i in 0..256 { deque.push_back(make_iovec(i, 100)); assert_eq!(deque.as_slice().len(), (i + 1) as usize); } let copy: Vec = deque.as_slice().to_vec(); assert_eq!(copy.len(), deque.len() as usize); for (i, iov) in deque.as_slice().iter().enumerate() { assert_eq!(iov.iov_len, copy[i].iov_len); } } #[test] fn test_as_mut_slice() { let mut deque = IovDeque::new().unwrap(); assert!(deque.as_mut_slice().is_empty()); for i in 0..256 { deque.push_back(make_iovec(i, 100)); assert_eq!(deque.as_mut_slice().len(), (i + 1) as usize); } let copy: Vec = deque.as_mut_slice().to_vec(); deque .as_mut_slice() .iter_mut() .for_each(|iov| iov.iov_len *= 2); assert_eq!(copy.len(), deque.len() as usize); for (i, iov) in deque.as_slice().iter().enumerate() { assert_eq!(iov.iov_len, 2 * copy[i].iov_len); } } #[test] fn test_size_less_than_capacity() { // Usually we have a queue size of 256 which is a perfect fit // for 4K pages. But with 16K or bigger pages the `perfect fit` // is not perfect anymore. Need to ensure the wraparound logic // remains valid in such cases. const L: u16 = 16; let mut deque = super::IovDeque::::new().unwrap(); assert!(deque.as_mut_slice().is_empty()); // Number of times need to fill/empty the queue to reach the // wraparound point. let fills = deque.capacity / L; // Almost reach the wraparound. for _ in 0..(fills - 1) { for _ in 0..L { deque.push_back(make_iovec(0, 100)); } deque.pop_front(L); } // 1 element away from the wraparound for _ in 0..(L - 1) { deque.push_back(make_iovec(0, 100)); } deque.pop_front(L - 1); // Start filling the 'second' page // First element will be put at the end of the // first page, while the rest will be in `second` // page. for _ in 0..L { deque.push_back(make_iovec(1, 100)); } // Pop one element to trigger the wraparound. deque.pop_front(1); // Now the slice should be pointing to the memory of the `first` page // which should have the same content as the `second` page. assert_eq!(deque.as_slice(), vec![make_iovec(1, 100); L as usize - 1]); } } ================================================ FILE: src/vmm/src/devices/virtio/iovec.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::io::ErrorKind; use libc::{c_void, iovec, size_t}; use serde::{Deserialize, Serialize}; use vm_memory::bitmap::Bitmap; use vm_memory::{ GuestMemory, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile, }; use super::iov_deque::{IovDeque, IovDequeError}; use super::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::devices::virtio::queue::DescriptorChain; use crate::vstate::memory::GuestMemoryMmap; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum IoVecError { /// Tried to create an `IoVec` from a write-only descriptor chain WriteOnlyDescriptor, /// Tried to create an 'IoVecMut` from a read-only descriptor chain ReadOnlyDescriptor, /// Tried to create an `IoVec` or `IoVecMut` from a descriptor chain that was too large OverflowedDescriptor, /// Tried to push to full IovDeque. IovDequeOverflow, /// Guest memory error: {0} GuestMemory(#[from] GuestMemoryError), /// Error with underlying `IovDeque`: {0} IovDeque(#[from] IovDequeError), } /// This is essentially a wrapper of a `Vec` which can be passed to `libc::writev`. /// /// It describes a buffer passed to us by the guest that is scattered across multiple /// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges /// of data from that buffer. #[derive(Debug, Default)] pub struct IoVecBuffer { // container of the memory regions included in this IO vector vecs: Vec, // Total length of the IoVecBuffer len: u32, } // SAFETY: `IoVecBuffer` doesn't allow for interior mutability and no shared ownership is possible // as it doesn't implement clone unsafe impl Send for IoVecBuffer {} impl IoVecBuffer { /// Create an `IoVecBuffer` from a `DescriptorChain` /// /// # Safety /// /// The descriptor chain cannot be referencing the same memory location as another chain pub unsafe fn load_descriptor_chain( &mut self, mem: &GuestMemoryMmap, head: DescriptorChain, ) -> Result<(), IoVecError> { self.clear(); let mut next_descriptor = Some(head); while let Some(desc) = next_descriptor { if desc.is_write_only() { return Err(IoVecError::WriteOnlyDescriptor); } // We use get_slice instead of `get_host_address` here in order to have the whole // range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory // region in the GuestMemoryMmap. let iov_base = mem .get_slice(desc.addr, desc.len as usize)? .ptr_guard_mut() .as_ptr() .cast::(); self.vecs.push(iovec { iov_base, iov_len: desc.len as size_t, }); self.len = self .len .checked_add(desc.len) .ok_or(IoVecError::OverflowedDescriptor)?; next_descriptor = desc.next_descriptor(); } Ok(()) } /// Create an `IoVecBuffer` from a `DescriptorChain` /// /// # Safety /// /// The descriptor chain cannot be referencing the same memory location as another chain pub unsafe fn from_descriptor_chain( mem: &GuestMemoryMmap, head: DescriptorChain, ) -> Result { let mut new_buffer = Self::default(); // SAFETY: descriptor chain cannot be referencing the same memory location as another chain unsafe { new_buffer.load_descriptor_chain(mem, head)?; } Ok(new_buffer) } /// Get the total length of the memory regions covered by this `IoVecBuffer` pub(crate) fn len(&self) -> u32 { self.len } /// Returns a pointer to the memory keeping the `iovec` structs pub fn as_iovec_ptr(&self) -> *const iovec { self.vecs.as_ptr() } /// Returns the length of the `iovec` array. pub fn iovec_count(&self) -> usize { self.vecs.len() } /// Clears the `iovec` array pub fn clear(&mut self) { self.vecs.clear(); self.len = 0u32; } /// Reads a number of bytes from the `IoVecBuffer` starting at a given offset. /// /// This will try to fill `buf` reading bytes from the `IoVecBuffer` starting from /// the given offset. /// /// # Returns /// /// `Ok(())` if `buf` was filled by reading from this [`IoVecBuffer`], /// `Err(VolatileMemoryError::PartialBuffer)` if only part of `buf` could not be filled, and /// `Err(VolatileMemoryError::OutOfBounds)` if `offset >= self.len()`. pub fn read_exact_volatile_at( &self, mut buf: &mut [u8], offset: usize, ) -> Result<(), VolatileMemoryError> { if offset < self.len() as usize { let expected = buf.len(); let bytes_read = self.read_volatile_at(&mut buf, offset, expected)?; if bytes_read != expected { return Err(VolatileMemoryError::PartialBuffer { expected, completed: bytes_read, }); } Ok(()) } else { // If `offset` is past size, there's nothing to read. Err(VolatileMemoryError::OutOfBounds { addr: offset }) } } /// Reads up to `len` bytes from the `IoVecBuffer` starting at the given offset. /// /// This will try to write to the given [`WriteVolatile`]. pub fn read_volatile_at( &self, dst: &mut W, mut offset: usize, mut len: usize, ) -> Result { let mut total_bytes_read = 0; for iov in &self.vecs { if len == 0 { break; } if offset >= iov.iov_len { offset -= iov.iov_len; continue; } let mut slice = // SAFETY: the constructor IoVecBufferMut::from_descriptor_chain ensures that // all iovecs contained point towards valid ranges of guest memory unsafe { VolatileSlice::new(iov.iov_base.cast(), iov.iov_len).offset(offset)? }; offset = 0; if slice.len() > len { slice = slice.subslice(0, len)?; } match loop { match dst.write_volatile(&slice) { Err(VolatileMemoryError::IOError(err)) if err.kind() == ErrorKind::Interrupted => {} result => break result, } } { Ok(bytes_read) => { total_bytes_read += bytes_read; if bytes_read < slice.len() { break; } len -= bytes_read; } // exit successfully if we previously managed to write some bytes Err(_) if total_bytes_read > 0 => break, Err(err) => return Err(err), } } Ok(total_bytes_read) } } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ParsedDescriptorChain { pub head_index: u16, pub length: u32, pub nr_iovecs: u16, } /// This is essentially a wrapper of a `Vec` which can be passed to `libc::readv`. /// /// It describes a write-only buffer passed to us by the guest that is scattered across multiple /// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges /// of data from that buffer. /// `L` const generic value must be a multiple of 256 as required by the `IovDeque` requirements. #[derive(Debug)] pub struct IoVecBufferMut { // container of the memory regions included in this IO vector pub vecs: IovDeque, // Total length of the IoVecBufferMut // We use `u32` here because we use this type in devices which // should not give us huge buffers. In any case this // value will not overflow as we explicitly check for this case. pub len: u32, } // SAFETY: `IoVecBufferMut` doesn't allow for interior mutability and no shared ownership is // possible as it doesn't implement clone unsafe impl Send for IoVecBufferMut {} impl IoVecBufferMut { /// Append a `DescriptorChain` in this `IoVecBufferMut` /// /// # Safety /// /// The descriptor chain cannot be referencing the same memory location as another chain pub unsafe fn append_descriptor_chain( &mut self, mem: &GuestMemoryMmap, head: DescriptorChain, ) -> Result { let head_index = head.index; let mut next_descriptor = Some(head); let mut length = 0u32; let mut nr_iovecs = 0u16; while let Some(desc) = next_descriptor { if !desc.is_write_only() { self.vecs.pop_back(nr_iovecs); return Err(IoVecError::ReadOnlyDescriptor); } // We use get_slice instead of `get_host_address` here in order to have the whole // range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory // region in the GuestMemoryMmap. let slice = mem .get_slice(desc.addr, desc.len as usize) .inspect_err(|_| { self.vecs.pop_back(nr_iovecs); })?; // We need to mark the area of guest memory that will be mutated through this // IoVecBufferMut as dirty ahead of time, as we loose access to all // vm-memory related information after converting down to iovecs. slice.bitmap().mark_dirty(0, desc.len as usize); let iov_base = slice.ptr_guard_mut().as_ptr().cast::(); if self.vecs.is_full() { self.vecs.pop_back(nr_iovecs); return Err(IoVecError::IovDequeOverflow); } self.vecs.push_back(iovec { iov_base, iov_len: desc.len as size_t, }); nr_iovecs += 1; length = length .checked_add(desc.len) .ok_or(IoVecError::OverflowedDescriptor) .inspect_err(|_| { self.vecs.pop_back(nr_iovecs); })?; next_descriptor = desc.next_descriptor(); } self.len = self.len.checked_add(length).ok_or_else(|| { self.vecs.pop_back(nr_iovecs); IoVecError::OverflowedDescriptor })?; Ok(ParsedDescriptorChain { head_index, length, nr_iovecs, }) } /// Create an empty `IoVecBufferMut`. pub fn new() -> Result { let vecs = IovDeque::new()?; Ok(Self { vecs, len: 0 }) } /// Create an `IoVecBufferMut` from a `DescriptorChain` /// /// This will clear any previous `iovec` objects in the buffer and load the new /// [`DescriptorChain`]. /// /// # Safety /// /// The descriptor chain cannot be referencing the same memory location as another chain pub unsafe fn load_descriptor_chain( &mut self, mem: &GuestMemoryMmap, head: DescriptorChain, ) -> Result<(), IoVecError> { self.clear(); // SAFETY: descriptor chain cannot be referencing the same memory location as another chain let _ = unsafe { self.append_descriptor_chain(mem, head)? }; Ok(()) } /// Drop descriptor chain from the `IoVecBufferMut` front /// /// This will drop memory described by the `IoVecBufferMut` from the beginning. pub fn drop_chain_front(&mut self, parse_descriptor: &ParsedDescriptorChain) { self.vecs.pop_front(parse_descriptor.nr_iovecs); self.len -= parse_descriptor.length; } /// Drop descriptor chain from the `IoVecBufferMut` back /// /// This will drop memory described by the `IoVecBufferMut` from the beginning. pub fn drop_chain_back(&mut self, parse_descriptor: &ParsedDescriptorChain) { self.vecs.pop_back(parse_descriptor.nr_iovecs); self.len -= parse_descriptor.length; } /// Create an `IoVecBuffer` from a `DescriptorChain` /// /// # Safety /// /// The descriptor chain cannot be referencing the same memory location as another chain pub unsafe fn from_descriptor_chain( mem: &GuestMemoryMmap, head: DescriptorChain, ) -> Result { let mut new_buffer = Self::new()?; // SAFETY: descriptor chain cannot be referencing the same memory location as another chain unsafe { new_buffer.load_descriptor_chain(mem, head)?; } Ok(new_buffer) } /// Get the total length of the memory regions covered by this `IoVecBuffer` #[inline(always)] pub fn len(&self) -> u32 { self.len } /// Returns true if buffer is empty. #[inline(always)] pub fn is_empty(&self) -> bool { self.len == 0 } /// Returns a pointer to the memory keeping the `iovec` structs pub fn as_iovec_mut_slice(&mut self) -> &mut [iovec] { self.vecs.as_mut_slice() } /// Clears the `iovec` array pub fn clear(&mut self) { self.vecs.clear(); self.len = 0; } /// Writes a number of bytes into the `IoVecBufferMut` starting at a given offset. /// /// This will try to fill `IoVecBufferMut` writing bytes from the `buf` starting from /// the given offset. It will write as many bytes from `buf` as they fit inside the /// `IoVecBufferMut` starting from `offset`. /// /// # Returns /// /// `Ok(())` if the entire contents of `buf` could be written to this [`IoVecBufferMut`], /// `Err(VolatileMemoryError::PartialBuffer)` if only part of `buf` could be transferred, and /// `Err(VolatileMemoryError::OutOfBounds)` if `offset >= self.len()`. pub fn write_all_volatile_at( &mut self, mut buf: &[u8], offset: usize, ) -> Result<(), VolatileMemoryError> { if offset < self.len() as usize { let expected = buf.len(); let bytes_written = self.write_volatile_at(&mut buf, offset, expected)?; if bytes_written != expected { return Err(VolatileMemoryError::PartialBuffer { expected, completed: bytes_written, }); } Ok(()) } else { // We cannot write past the end of the `IoVecBufferMut`. Err(VolatileMemoryError::OutOfBounds { addr: offset }) } } /// Writes up to `len` bytes into the `IoVecBuffer` starting at the given offset. /// /// This will try to write to the given [`WriteVolatile`]. pub fn write_volatile_at( &mut self, src: &mut W, mut offset: usize, mut len: usize, ) -> Result { let mut total_bytes_read = 0; for iov in self.vecs.as_slice() { if len == 0 { break; } if offset >= iov.iov_len { offset -= iov.iov_len; continue; } let mut slice = // SAFETY: the constructor IoVecBufferMut::from_descriptor_chain ensures that // all iovecs contained point towards valid ranges of guest memory unsafe { VolatileSlice::new(iov.iov_base.cast(), iov.iov_len).offset(offset)? }; offset = 0; if slice.len() > len { slice = slice.subslice(0, len)?; } match loop { match src.read_volatile(&mut slice) { Err(VolatileMemoryError::IOError(err)) if err.kind() == ErrorKind::Interrupted => {} result => break result, } } { Ok(bytes_read) => { total_bytes_read += bytes_read; if bytes_read < slice.len() { break; } len -= bytes_read; } // exit successfully if we previously managed to read some bytes Err(_) if total_bytes_read > 0 => break, Err(err) => return Err(err), } } Ok(total_bytes_read) } } #[cfg(test)] #[allow(clippy::cast_possible_truncation)] mod tests { use libc::{c_void, iovec}; use vm_memory::VolatileMemoryError; use super::IoVecBuffer; // Redefine `IoVecBufferMut` with specific length. Otherwise // Rust will not know what to do. type IoVecBufferMutDefault = super::IoVecBufferMut; use crate::devices::virtio::iov_deque::IovDeque; use crate::devices::virtio::queue::{ FIRECRACKER_MAX_QUEUE_SIZE, Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE, }; use crate::devices::virtio::test_utils::VirtQueue; use crate::test_utils::multi_region_mem; use crate::vstate::memory::{Bytes, GuestAddress, GuestMemoryMmap}; impl<'a> From<&'a [u8]> for IoVecBuffer { fn from(buf: &'a [u8]) -> Self { Self { vecs: vec![iovec { iov_base: buf.as_ptr() as *mut c_void, iov_len: buf.len(), }], len: buf.len().try_into().unwrap(), } } } impl<'a> From> for IoVecBuffer { fn from(buffer: Vec<&'a [u8]>) -> Self { let mut len = 0_u32; let vecs = buffer .into_iter() .map(|slice| { len += TryInto::::try_into(slice.len()).unwrap(); iovec { iov_base: slice.as_ptr() as *mut c_void, iov_len: slice.len(), } }) .collect(); Self { vecs, len } } } impl From<&mut [u8]> for super::IoVecBufferMut { fn from(buf: &mut [u8]) -> Self { let mut vecs = IovDeque::new().unwrap(); vecs.push_back(iovec { iov_base: buf.as_mut_ptr().cast::(), iov_len: buf.len(), }); Self { vecs, len: buf.len() as u32, } } } impl From> for super::IoVecBufferMut { fn from(buffer: Vec<&mut [u8]>) -> Self { let mut len = 0; let mut vecs = IovDeque::new().unwrap(); for slice in buffer { len += slice.len() as u32; vecs.push_back(iovec { iov_base: slice.as_ptr() as *mut c_void, iov_len: slice.len(), }); } Self { vecs, len } } } fn default_mem() -> GuestMemoryMmap { multi_region_mem(&[ (GuestAddress(0), 0x10000), (GuestAddress(0x20000), 0x10000), (GuestAddress(0x40000), 0x10000), ]) } fn chain(m: &GuestMemoryMmap, is_write_only: bool) -> (Queue, VirtQueue<'_>) { let vq = VirtQueue::new(GuestAddress(0), m, 16); let mut q = vq.create_queue(); q.ready = true; let flags = if is_write_only { VIRTQ_DESC_F_NEXT | VIRTQ_DESC_F_WRITE } else { VIRTQ_DESC_F_NEXT }; for j in 0..4 { vq.dtable[j as usize].set(0x20000 + 64 * u64::from(j), 64, flags, j + 1); } // one chain: (0, 1, 2, 3) vq.dtable[3].flags.set(flags & !VIRTQ_DESC_F_NEXT); vq.avail.ring[0].set(0); vq.avail.idx.set(1); (q, vq) } fn read_only_chain(mem: &GuestMemoryMmap) -> (Queue, VirtQueue<'_>) { let v: Vec = (0..=255).collect(); mem.write_slice(&v, GuestAddress(0x20000)).unwrap(); chain(mem, false) } fn write_only_chain(mem: &GuestMemoryMmap) -> (Queue, VirtQueue<'_>) { let v = vec![0; 256]; mem.write_slice(&v, GuestAddress(0x20000)).unwrap(); chain(mem, true) } #[test] fn test_access_mode() { let mem = default_mem(); let (mut q, _) = read_only_chain(&mem); let head = q.pop().unwrap().unwrap(); // SAFETY: This descriptor chain is only loaded into one buffer unsafe { IoVecBuffer::from_descriptor_chain(&mem, head).unwrap() }; let (mut q, _) = write_only_chain(&mem); let head = q.pop().unwrap().unwrap(); // SAFETY: This descriptor chain is only loaded into one buffer unsafe { IoVecBuffer::from_descriptor_chain(&mem, head).unwrap_err() }; let (mut q, _) = read_only_chain(&mem); let head = q.pop().unwrap().unwrap(); // SAFETY: This descriptor chain is only loaded into one buffer unsafe { IoVecBufferMutDefault::from_descriptor_chain(&mem, head).unwrap_err() }; let (mut q, _) = write_only_chain(&mem); let head = q.pop().unwrap().unwrap(); // SAFETY: This descriptor chain is only loaded into one buffer unsafe { IoVecBufferMutDefault::from_descriptor_chain(&mem, head).unwrap() }; } #[test] fn test_iovec_length() { let mem = default_mem(); let (mut q, _) = read_only_chain(&mem); let head = q.pop().unwrap().unwrap(); // SAFETY: This descriptor chain is only loaded once in this test let iovec = unsafe { IoVecBuffer::from_descriptor_chain(&mem, head).unwrap() }; assert_eq!(iovec.len(), 4 * 64); } #[test] fn test_iovec_mut_length() { let mem = default_mem(); let (mut q, _) = write_only_chain(&mem); let head = q.pop().unwrap().unwrap(); // SAFETY: This descriptor chain is only loaded once in this test let mut iovec = unsafe { IoVecBufferMutDefault::from_descriptor_chain(&mem, head).unwrap() }; assert_eq!(iovec.len(), 4 * 64); // We are creating a new queue where we can get descriptors from. Probably, this is not // something that we will ever want to do, as `IoVecBufferMut`s are typically // (concpetually) associated with a single `Queue`. We just do this here to be able to test // the appending logic. let (mut q, _) = write_only_chain(&mem); let head = q.pop().unwrap().unwrap(); // SAFETY: it is actually unsafe, but we just want to check the length of the // `IoVecBufferMut` after appending. let _ = unsafe { iovec.append_descriptor_chain(&mem, head).unwrap() }; assert_eq!(iovec.len(), 8 * 64); } #[test] fn test_iovec_read_at() { let mem = default_mem(); let (mut q, _) = read_only_chain(&mem); let head = q.pop().unwrap().unwrap(); // SAFETY: This descriptor chain is only loaded once in this test let iovec = unsafe { IoVecBuffer::from_descriptor_chain(&mem, head).unwrap() }; let mut buf = vec![0u8; 257]; assert_eq!( iovec .read_volatile_at(&mut buf.as_mut_slice(), 0, 257) .unwrap(), 256 ); assert_eq!(buf[0..256], (0..=255).collect::>()); assert_eq!(buf[256], 0); let mut buf = vec![0; 5]; iovec.read_exact_volatile_at(&mut buf[..4], 0).unwrap(); assert_eq!(buf, vec![0u8, 1, 2, 3, 0]); iovec.read_exact_volatile_at(&mut buf, 0).unwrap(); assert_eq!(buf, vec![0u8, 1, 2, 3, 4]); iovec.read_exact_volatile_at(&mut buf, 1).unwrap(); assert_eq!(buf, vec![1u8, 2, 3, 4, 5]); iovec.read_exact_volatile_at(&mut buf, 60).unwrap(); assert_eq!(buf, vec![60u8, 61, 62, 63, 64]); assert_eq!( iovec .read_volatile_at(&mut buf.as_mut_slice(), 252, 5) .unwrap(), 4 ); assert_eq!(buf[0..4], vec![252u8, 253, 254, 255]); assert!(matches!( iovec.read_exact_volatile_at(&mut buf, 252), Err(VolatileMemoryError::PartialBuffer { expected: 5, completed: 4 }) )); assert!(matches!( iovec.read_exact_volatile_at(&mut buf, 256), Err(VolatileMemoryError::OutOfBounds { addr: 256 }) )); } #[test] fn test_iovec_mut_write_at() { let mem = default_mem(); let (mut q, vq) = write_only_chain(&mem); // This is a descriptor chain with 4 elements 64 bytes long each. let head = q.pop().unwrap().unwrap(); // SAFETY: This descriptor chain is only loaded into one buffer let mut iovec = unsafe { IoVecBufferMutDefault::from_descriptor_chain(&mem, head).unwrap() }; let buf = vec![0u8, 1, 2, 3, 4]; // One test vector for each part of the chain let mut test_vec1 = vec![0u8; 64]; let mut test_vec2 = vec![0u8; 64]; let test_vec3 = vec![0u8; 64]; let mut test_vec4 = vec![0u8; 64]; // Control test: Initially all three regions should be zero iovec.write_all_volatile_at(&test_vec1, 0).unwrap(); iovec.write_all_volatile_at(&test_vec2, 64).unwrap(); iovec.write_all_volatile_at(&test_vec3, 128).unwrap(); iovec.write_all_volatile_at(&test_vec4, 192).unwrap(); vq.dtable[0].check_data(&test_vec1); vq.dtable[1].check_data(&test_vec2); vq.dtable[2].check_data(&test_vec3); vq.dtable[3].check_data(&test_vec4); // Let's initialize test_vec1 with our buffer. test_vec1[..buf.len()].copy_from_slice(&buf); // And write just a part of it iovec.write_all_volatile_at(&buf[..3], 0).unwrap(); // Not all 5 bytes from buf should be written in memory, // just 3 of them. vq.dtable[0].check_data(&[0u8, 1, 2, 0, 0]); vq.dtable[1].check_data(&test_vec2); vq.dtable[2].check_data(&test_vec3); vq.dtable[3].check_data(&test_vec4); // But if we write the whole `buf` in memory then all // of it should be observable. iovec.write_all_volatile_at(&buf, 0).unwrap(); vq.dtable[0].check_data(&test_vec1); vq.dtable[1].check_data(&test_vec2); vq.dtable[2].check_data(&test_vec3); vq.dtable[3].check_data(&test_vec4); // We are now writing with an offset of 1. So, initialize // the corresponding part of `test_vec1` test_vec1[1..buf.len() + 1].copy_from_slice(&buf); iovec.write_all_volatile_at(&buf, 1).unwrap(); vq.dtable[0].check_data(&test_vec1); vq.dtable[1].check_data(&test_vec2); vq.dtable[2].check_data(&test_vec3); vq.dtable[3].check_data(&test_vec4); // Perform a write that traverses two of the underlying // regions. Writing at offset 60 should write 4 bytes on the // first region and one byte on the second test_vec1[60..64].copy_from_slice(&buf[0..4]); test_vec2[0] = 4; iovec.write_all_volatile_at(&buf, 60).unwrap(); vq.dtable[0].check_data(&test_vec1); vq.dtable[1].check_data(&test_vec2); vq.dtable[2].check_data(&test_vec3); vq.dtable[3].check_data(&test_vec4); test_vec4[63] = 3; test_vec4[62] = 2; test_vec4[61] = 1; // Now perform a write that does not fit in the buffer. Try writing // 5 bytes at offset 252 (only 4 bytes left). test_vec4[60..64].copy_from_slice(&buf[0..4]); assert_eq!( iovec.write_volatile_at(&mut &*buf, 252, buf.len()).unwrap(), 4 ); vq.dtable[0].check_data(&test_vec1); vq.dtable[1].check_data(&test_vec2); vq.dtable[2].check_data(&test_vec3); vq.dtable[3].check_data(&test_vec4); // Trying to add past the end of the buffer should not write anything assert!(matches!( iovec.write_all_volatile_at(&buf, 256), Err(VolatileMemoryError::OutOfBounds { addr: 256 }) )); vq.dtable[0].check_data(&test_vec1); vq.dtable[1].check_data(&test_vec2); vq.dtable[2].check_data(&test_vec3); vq.dtable[3].check_data(&test_vec4); } } #[cfg(kani)] #[allow(dead_code)] // Avoid warning when using stubs mod verification { use std::mem::ManuallyDrop; use libc::{c_void, iovec}; use vm_memory::VolatileSlice; use vm_memory::bitmap::BitmapSlice; use super::IoVecBuffer; use crate::arch::GUEST_PAGE_SIZE; use crate::devices::virtio::iov_deque::IovDeque; // Redefine `IoVecBufferMut` and `IovDeque` with specific length. Otherwise // Rust will not know what to do. type IoVecBufferMutDefault = super::IoVecBufferMut; type IovDequeDefault = IovDeque; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; // Maximum memory size to use for our buffers. For the time being 1KB. const GUEST_MEMORY_SIZE: usize = 1 << 10; // Maximum number of descriptors in a chain to use in our proofs. The value is selected upon // experimenting with the execution time. Typically, in our virtio devices we use queues of up // to 256 entries which is the theoretical maximum length of a `DescriptorChain`, but in reality // our code does not make any assumption about the length of the chain, apart from it being // >= 1. const MAX_DESC_LENGTH: usize = 4; mod stubs { use super::*; /// This is a stub for the `IovDeque::push_back` method. /// /// `IovDeque` relies on a special allocation of two pages of virtual memory, where both of /// these point to the same underlying physical page. This way, the contents of the first /// page of virtual memory are automatically mirrored in the second virtual page. We do /// that in order to always have the elements that are currently in the ring buffer in /// consecutive (virtual) memory. /// /// To build this particular memory layout we create a new `memfd` object, allocate memory /// with `mmap` and call `mmap` again to make sure both pages point to the page allocated /// via the `memfd` object. These ffi calls make kani complain, so here we mock the /// `IovDeque` object memory with a normal memory allocation of two pages worth of data. /// /// This stub helps imitate the effect of mirroring without all the elaborate memory /// allocation trick. pub fn push_back(deque: &mut IovDeque, iov: iovec) { // This should NEVER happen, since our ring buffer is as big as the maximum queue size. // We also check for the sanity of the VirtIO queues, in queue.rs, which means that if // we ever try to add something in a full ring buffer, there is an internal // bug in the device emulation logic. Panic here because the device is // hopelessly broken. assert!( !deque.is_full(), "The number of `iovec` objects is bigger than the available space" ); let offset = (deque.start + deque.len) as usize; let mirror = if offset >= L as usize { offset - L as usize } else { offset + L as usize }; // SAFETY: self.iov is a valid pointer and `self.start + self.len` is within range (we // asserted before that the buffer is not full). unsafe { deque.iov.add(offset).write_volatile(iov) }; unsafe { deque.iov.add(mirror).write_volatile(iov) }; deque.len += 1; } } fn create_iovecs(mem: *mut u8, size: usize, nr_descs: usize) -> (Vec, u32) { let mut vecs: Vec = Vec::with_capacity(nr_descs); let mut len = 0u32; for _ in 0..nr_descs { // The `IoVecBuffer` constructors ensure that the memory region described by every // `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory // mmap. The assumption, here, that the last address is within the memory object's // bound substitutes these checks that `IoVecBuffer::new() performs.` let addr: usize = kani::any(); let iov_len: usize = kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size)); let iov_base = unsafe { mem.offset(addr.try_into().unwrap()) } as *mut c_void; vecs.push(iovec { iov_base, iov_len }); len += u32::try_from(iov_len).unwrap(); } (vecs, len) } impl IoVecBuffer { fn any_of_length(nr_descs: usize) -> Self { // We only read from `IoVecBuffer`, so create here a guest memory object, with arbitrary // contents and size up to GUEST_MEMORY_SIZE. let mut mem = ManuallyDrop::new(kani::vec::exact_vec::()); let (vecs, len) = create_iovecs(mem.as_mut_ptr(), mem.len(), nr_descs); Self { vecs, len } } } fn create_iov_deque() -> IovDequeDefault { // SAFETY: safe because the layout has non-zero size let mem = unsafe { std::alloc::alloc(std::alloc::Layout::from_size_align_unchecked( 2 * GUEST_PAGE_SIZE, GUEST_PAGE_SIZE, )) }; IovDequeDefault { iov: mem.cast(), start: kani::any_where(|&start| start < FIRECRACKER_MAX_QUEUE_SIZE), len: 0, capacity: FIRECRACKER_MAX_QUEUE_SIZE, } } fn create_iovecs_mut(mem: *mut u8, size: usize, nr_descs: usize) -> (IovDequeDefault, u32) { let mut vecs = create_iov_deque(); let mut len = 0u32; for _ in 0..nr_descs { // The `IoVecBufferMut` constructors ensure that the memory region described by every // `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory // mmap. The assumption, here, that the last address is within the memory object's // bound substitutes these checks that `IoVecBufferMut::new() performs.` let addr: usize = kani::any(); let iov_len: usize = kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size)); let iov_base = unsafe { mem.offset(addr.try_into().unwrap()) } as *mut c_void; vecs.push_back(iovec { iov_base, iov_len }); len += u32::try_from(iov_len).unwrap(); } (vecs, len) } impl IoVecBufferMutDefault { fn any_of_length(nr_descs: usize) -> Self { // We only write into `IoVecBufferMut` objects, so we can simply create a guest memory // object initialized to zeroes, trying to be nice to Kani. let mem = unsafe { std::alloc::alloc_zeroed(std::alloc::Layout::from_size_align_unchecked( GUEST_MEMORY_SIZE, 16, )) }; let (vecs, len) = create_iovecs_mut(mem, GUEST_MEMORY_SIZE, nr_descs); Self { vecs, len: len.try_into().unwrap(), } } } // A mock for the Read-/WriteVolatile implementation for u8 slices that does // not go through rust-vmm's machinery (which would cause kani get stuck during post processing) struct KaniBuffer<'a>(&'a mut [u8]); impl vm_memory::ReadVolatile for KaniBuffer<'_> { fn read_volatile( &mut self, buf: &mut VolatileSlice, ) -> Result { let count = buf.len().min(self.0.len()); unsafe { std::ptr::copy_nonoverlapping(self.0.as_ptr(), buf.ptr_guard_mut().as_ptr(), count); } self.0 = std::mem::take(&mut self.0).split_at_mut(count).1; Ok(count) } } impl vm_memory::WriteVolatile for KaniBuffer<'_> { fn write_volatile( &mut self, buf: &VolatileSlice, ) -> Result { let count = buf.len().min(self.0.len()); unsafe { std::ptr::copy_nonoverlapping( buf.ptr_guard_mut().as_ptr(), self.0.as_mut_ptr(), count, ); } self.0 = std::mem::take(&mut self.0).split_at_mut(count).1; Ok(count) } } #[kani::proof] #[kani::unwind(5)] #[kani::solver(cadical)] fn verify_read_from_iovec() { for nr_descs in 0..MAX_DESC_LENGTH { let iov = IoVecBuffer::any_of_length(nr_descs); let mut buf = vec![0; GUEST_MEMORY_SIZE]; let offset: u32 = kani::any(); // We can't really check the contents that the operation here writes into `buf`, because // our `IoVecBuffer` being completely arbitrary can contain overlapping memory regions, // so checking the data copied is not exactly trivial. // // What we can verify is the bytes that we read out from guest memory: // - `buf.len()`, if `offset + buf.len() < iov.len()`; // - `iov.len() - offset`, otherwise. // Furthermore, we know our Read-/WriteVolatile implementation above is infallible, so // provided that the logic inside read_volatile_at is correct, we should always get // Ok(...) assert_eq!( iov.read_volatile_at( &mut KaniBuffer(&mut buf), offset as usize, GUEST_MEMORY_SIZE ) .unwrap(), buf.len().min(iov.len().saturating_sub(offset) as usize) ); } } #[kani::proof] #[kani::unwind(5)] #[kani::solver(cadical)] #[kani::stub(IovDeque::push_back, stubs::push_back)] fn verify_write_to_iovec() { for nr_descs in 0..MAX_DESC_LENGTH { let mut iov_mut = IoVecBufferMutDefault::any_of_length(nr_descs); let mut buf = kani::vec::any_vec::(); let offset: u32 = kani::any(); // We can't really check the contents that the operation here writes into // `IoVecBufferMut`, because our `IoVecBufferMut` being completely arbitrary // can contain overlapping memory regions, so checking the data copied is // not exactly trivial. // // What we can verify is the bytes that we write into guest memory: // - `buf.len()`, if `offset + buf.len() < iov.len()`; // - `iov.len() - offset`, otherwise. // Furthermore, we know our Read-/WriteVolatile implementation above is infallible, so // provided that the logic inside write_volatile_at is correct, we should always get // Ok(...) assert_eq!( iov_mut .write_volatile_at( &mut KaniBuffer(&mut buf), offset as usize, GUEST_MEMORY_SIZE ) .unwrap(), buf.len().min(iov_mut.len().saturating_sub(offset) as usize) ); std::mem::forget(iov_mut.vecs); } } } ================================================ FILE: src/vmm/src/devices/virtio/mem/device.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::io; use std::ops::{Deref, Range}; use std::sync::Arc; use std::sync::atomic::AtomicU32; use bitvec::vec::BitVec; use log::info; use serde::{Deserialize, Serialize}; use vm_memory::{ Address, Bytes, GuestAddress, GuestMemory, GuestMemoryError, GuestMemoryRegion, GuestUsize, }; use vmm_sys_util::eventfd::EventFd; use super::{MEM_NUM_QUEUES, MEM_QUEUE}; use crate::devices::virtio::ActivateError; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1; use crate::devices::virtio::generated::virtio_mem::{ self, VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, virtio_mem_config, }; use crate::devices::virtio::iov_deque::IovDequeError; use crate::devices::virtio::mem::VIRTIO_MEM_DEV_ID; use crate::devices::virtio::mem::metrics::METRICS; use crate::devices::virtio::mem::request::{BlockRangeState, Request, RequestedRange, Response}; use crate::devices::virtio::queue::{ DescriptorChain, FIRECRACKER_MAX_QUEUE_SIZE, InvalidAvailIdx, Queue, QueueError, }; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::logger::{IncMetric, debug, error}; use crate::utils::{bytes_to_mib, mib_to_bytes, u64_to_usize, usize_to_u64}; use crate::vstate::interrupts::InterruptError; use crate::vstate::memory::{ ByteValued, GuestMemoryExtension, GuestMemoryMmap, GuestRegionMmap, GuestRegionType, }; use crate::vstate::vm::VmError; use crate::{Vm, impl_device_type}; // SAFETY: virtio_mem_config only contains plain data types unsafe impl ByteValued for virtio_mem_config {} #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VirtioMemError { /// Error while handling an Event file descriptor: {0} EventFd(#[from] io::Error), /// Received error while sending an interrupt: {0} InterruptError(#[from] InterruptError), /// Size {0} is invalid: it must be a multiple of block size and less than the total size InvalidSize(u64), /// Device is not active DeviceNotActive, /// Descriptor is write-only UnexpectedWriteOnlyDescriptor, /// Error reading virtio descriptor DescriptorWriteFailed, /// Error writing virtio descriptor DescriptorReadFailed, /// Unknown request type: {0} UnknownRequestType(u32), /// Descriptor chain is too short DescriptorChainTooShort, /// Descriptor is too small DescriptorLengthTooSmall, /// Descriptor is read-only UnexpectedReadOnlyDescriptor, /// Error popping from virtio queue: {0} InvalidAvailIdx(#[from] InvalidAvailIdx), /// Error adding used queue: {0} QueueError(#[from] QueueError), /// Invalid requested range: {0:?}. InvalidRange(RequestedRange), /// The requested range cannot be plugged because it's {0:?}. PlugRequestBlockStateInvalid(BlockRangeState), /// Plug request rejected as plugged_size would be greater than requested_size PlugRequestIsTooBig, /// The requested range cannot be unplugged because it's {0:?}. UnplugRequestBlockStateInvalid(BlockRangeState), /// There was an error updating the KVM slot. UpdateKvmSlot(VmError), } #[derive(Debug)] pub struct VirtioMem { // VirtIO fields avail_features: u64, acked_features: u64, activate_event: EventFd, // Transport fields device_state: DeviceState, pub(crate) queues: Vec, queue_events: Vec, // Device specific fields pub(crate) config: virtio_mem_config, pub(crate) slot_size: usize, // Bitmap to track which blocks are plugged pub(crate) plugged_blocks: BitVec, vm: Arc, } /// Memory hotplug device status information. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct VirtioMemStatus { /// Block size in MiB. pub block_size_mib: usize, /// Total memory size in MiB that can be hotplugged. pub total_size_mib: usize, /// Size of the KVM slots in MiB. pub slot_size_mib: usize, /// Currently plugged memory size in MiB. pub plugged_size_mib: usize, /// Requested memory size in MiB. pub requested_size_mib: usize, } impl VirtioMem { pub fn new( vm: Arc, addr: GuestAddress, total_size_mib: usize, block_size_mib: usize, slot_size_mib: usize, ) -> Result { let queues = vec![Queue::new(FIRECRACKER_MAX_QUEUE_SIZE); MEM_NUM_QUEUES]; let config = virtio_mem_config { addr: addr.raw_value(), region_size: mib_to_bytes(total_size_mib) as u64, block_size: mib_to_bytes(block_size_mib) as u64, ..Default::default() }; let plugged_blocks = BitVec::repeat(false, total_size_mib / block_size_mib); Self::from_state( vm, queues, config, mib_to_bytes(slot_size_mib), plugged_blocks, ) } pub fn from_state( vm: Arc, queues: Vec, config: virtio_mem_config, slot_size: usize, plugged_blocks: BitVec, ) -> Result { let activate_event = EventFd::new(libc::EFD_NONBLOCK)?; let queue_events = (0..MEM_NUM_QUEUES) .map(|_| EventFd::new(libc::EFD_NONBLOCK)) .collect::, io::Error>>()?; Ok(Self { avail_features: (1 << VIRTIO_F_VERSION_1) | (1 << VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE), acked_features: 0u64, activate_event, device_state: DeviceState::Inactive, queues, queue_events, config, vm, slot_size, plugged_blocks, }) } pub fn guest_address(&self) -> GuestAddress { GuestAddress(self.config.addr) } /// Gets the total hotpluggable size. pub fn total_size_mib(&self) -> usize { bytes_to_mib(u64_to_usize(self.config.region_size)) } /// Gets the block size. pub fn block_size_mib(&self) -> usize { bytes_to_mib(u64_to_usize(self.config.block_size)) } /// Gets the block size. pub fn slot_size_mib(&self) -> usize { bytes_to_mib(self.slot_size) } /// Gets the total size of the plugged memory blocks. pub fn plugged_size_mib(&self) -> usize { bytes_to_mib(u64_to_usize(self.config.plugged_size)) } /// Gets the requested size pub fn requested_size_mib(&self) -> usize { bytes_to_mib(u64_to_usize(self.config.requested_size)) } pub fn status(&self) -> VirtioMemStatus { VirtioMemStatus { block_size_mib: self.block_size_mib(), total_size_mib: self.total_size_mib(), slot_size_mib: self.slot_size_mib(), plugged_size_mib: self.plugged_size_mib(), requested_size_mib: self.requested_size_mib(), } } fn signal_used_queue(&self) -> Result<(), VirtioMemError> { self.interrupt_trigger() .trigger(VirtioInterruptType::Queue(MEM_QUEUE.try_into().unwrap())) .map_err(VirtioMemError::InterruptError) } fn guest_memory(&self) -> &GuestMemoryMmap { &self.device_state.active_state().unwrap().mem } fn nb_blocks_to_len(&self, nb_blocks: usize) -> usize { nb_blocks * u64_to_usize(self.config.block_size) } /// Returns the state of all the blocks in the given range. /// /// Note: the range passed to this function must be within the device memory to avoid /// out-of-bound panics. fn range_state(&self, range: &RequestedRange) -> BlockRangeState { let plugged_count = self.plugged_blocks[self.unchecked_block_range(range)].count_ones(); match plugged_count { nb_blocks if nb_blocks == range.nb_blocks => BlockRangeState::Plugged, 0 => BlockRangeState::Unplugged, _ => BlockRangeState::Mixed, } } fn parse_request( &self, avail_desc: &DescriptorChain, ) -> Result<(Request, GuestAddress, u16), VirtioMemError> { // The head contains the request type which MUST be readable. if avail_desc.is_write_only() { return Err(VirtioMemError::UnexpectedWriteOnlyDescriptor); } if (avail_desc.len as usize) < size_of::() { return Err(VirtioMemError::DescriptorLengthTooSmall); } let request: virtio_mem::virtio_mem_req = self .guest_memory() .read_obj(avail_desc.addr) .map_err(|_| VirtioMemError::DescriptorReadFailed)?; let resp_desc = avail_desc .next_descriptor() .ok_or(VirtioMemError::DescriptorChainTooShort)?; // The response MUST always be writable. if !resp_desc.is_write_only() { return Err(VirtioMemError::UnexpectedReadOnlyDescriptor); } if (resp_desc.len as usize) < std::mem::size_of::() { return Err(VirtioMemError::DescriptorLengthTooSmall); } Ok((request.into(), resp_desc.addr, avail_desc.index)) } fn write_response( &mut self, resp: Response, resp_addr: GuestAddress, used_idx: u16, ) -> Result<(), VirtioMemError> { debug!("virtio-mem: Response: {:?}", resp); self.guest_memory() .write_obj(virtio_mem::virtio_mem_resp::from(resp), resp_addr) .map_err(|_| VirtioMemError::DescriptorWriteFailed) .map(|_| size_of::())?; self.queues[MEM_QUEUE] .add_used( used_idx, u32::try_from(std::mem::size_of::()).unwrap(), ) .map_err(VirtioMemError::QueueError) } /// Checks that the range provided by the driver is within the usable memory region fn validate_range(&self, range: &RequestedRange) -> Result<(), VirtioMemError> { // Ensure the range is aligned if !range .addr .raw_value() .is_multiple_of(self.config.block_size) { return Err(VirtioMemError::InvalidRange(*range)); } if range.nb_blocks == 0 { return Err(VirtioMemError::InvalidRange(*range)); } // Ensure the start addr is within the usable region let start_off = range .addr .checked_offset_from(self.guest_address()) .filter(|&off| off < self.config.usable_region_size) .ok_or(VirtioMemError::InvalidRange(*range))?; // Ensure the end offset (exclusive) is within the usable region let end_off = start_off .checked_add(usize_to_u64(self.nb_blocks_to_len(range.nb_blocks))) .filter(|&end_off| end_off <= self.config.usable_region_size) .ok_or(VirtioMemError::InvalidRange(*range))?; Ok(()) } fn unchecked_block_range(&self, range: &RequestedRange) -> Range { let start_block = u64_to_usize((range.addr.0 - self.config.addr) / self.config.block_size); start_block..(start_block + range.nb_blocks) } fn process_plug_request(&mut self, range: &RequestedRange) -> Result<(), VirtioMemError> { self.validate_range(range)?; if self.config.plugged_size + usize_to_u64(self.nb_blocks_to_len(range.nb_blocks)) > self.config.requested_size { return Err(VirtioMemError::PlugRequestIsTooBig); } match self.range_state(range) { // the range was validated BlockRangeState::Unplugged => self.update_range(range, true), state => Err(VirtioMemError::PlugRequestBlockStateInvalid(state)), } } fn handle_plug_request( &mut self, range: &RequestedRange, resp_addr: GuestAddress, used_idx: u16, ) -> Result<(), VirtioMemError> { METRICS.plug_count.inc(); let _metric = METRICS.plug_agg.record_latency_metrics(); let response = match self.process_plug_request(range) { Err(err) => { METRICS.plug_fails.inc(); error!("virtio-mem: Failed to plug range: {}", err); Response::error() } Ok(_) => { METRICS .plug_bytes .add(usize_to_u64(self.nb_blocks_to_len(range.nb_blocks))); Response::ack() } }; self.write_response(response, resp_addr, used_idx) } fn process_unplug_request(&mut self, range: &RequestedRange) -> Result<(), VirtioMemError> { self.validate_range(range)?; match self.range_state(range) { // the range was validated BlockRangeState::Plugged => self.update_range(range, false), state => Err(VirtioMemError::UnplugRequestBlockStateInvalid(state)), } } fn handle_unplug_request( &mut self, range: &RequestedRange, resp_addr: GuestAddress, used_idx: u16, ) -> Result<(), VirtioMemError> { METRICS.unplug_count.inc(); let _metric = METRICS.unplug_agg.record_latency_metrics(); let response = match self.process_unplug_request(range) { Err(err) => { METRICS.unplug_fails.inc(); error!("virtio-mem: Failed to unplug range: {}", err); Response::error() } Ok(_) => { METRICS .unplug_bytes .add(usize_to_u64(self.nb_blocks_to_len(range.nb_blocks))); Response::ack() } }; self.write_response(response, resp_addr, used_idx) } fn handle_unplug_all_request( &mut self, resp_addr: GuestAddress, used_idx: u16, ) -> Result<(), VirtioMemError> { METRICS.unplug_all_count.inc(); let _metric = METRICS.unplug_all_agg.record_latency_metrics(); let range = RequestedRange { addr: self.guest_address(), nb_blocks: self.plugged_blocks.len(), }; let response = match self.update_range(&range, false) { Err(err) => { METRICS.unplug_all_fails.inc(); error!("virtio-mem: Failed to unplug all: {}", err); Response::error() } Ok(_) => { self.config.usable_region_size = 0; Response::ack() } }; self.write_response(response, resp_addr, used_idx) } fn handle_state_request( &mut self, range: &RequestedRange, resp_addr: GuestAddress, used_idx: u16, ) -> Result<(), VirtioMemError> { METRICS.state_count.inc(); let _metric = METRICS.state_agg.record_latency_metrics(); let response = match self.validate_range(range) { Err(err) => { METRICS.state_fails.inc(); error!("virtio-mem: Failed to retrieve state of range: {}", err); Response::error() } // the range was validated Ok(_) => Response::ack_with_state(self.range_state(range)), }; self.write_response(response, resp_addr, used_idx) } fn process_mem_queue(&mut self) -> Result<(), VirtioMemError> { while let Some(desc) = self.queues[MEM_QUEUE].pop()? { let index = desc.index; let (req, resp_addr, used_idx) = self.parse_request(&desc)?; debug!("virtio-mem: Request: {:?}", req); // Handle request and write response match req { Request::State(ref range) => self.handle_state_request(range, resp_addr, used_idx), Request::Plug(ref range) => self.handle_plug_request(range, resp_addr, used_idx), Request::Unplug(ref range) => { self.handle_unplug_request(range, resp_addr, used_idx) } Request::UnplugAll => self.handle_unplug_all_request(resp_addr, used_idx), Request::Unsupported(t) => Err(VirtioMemError::UnknownRequestType(t)), }?; } self.queues[MEM_QUEUE].advance_used_ring_idx(); self.signal_used_queue()?; Ok(()) } pub(crate) fn process_mem_queue_event(&mut self) { METRICS.queue_event_count.inc(); if let Err(err) = self.queue_events[MEM_QUEUE].read() { METRICS.queue_event_fails.inc(); error!("Failed to read mem queue event: {err}"); return; } if let Err(err) = self.process_mem_queue() { METRICS.queue_event_fails.inc(); error!("virtio-mem: Failed to process queue: {err}"); } } pub fn process_virtio_queues(&mut self) -> Result<(), VirtioMemError> { self.process_mem_queue() } pub(crate) fn set_avail_features(&mut self, features: u64) { self.avail_features = features; } pub(crate) fn set_acked_features(&mut self, features: u64) { self.acked_features = features; } pub(crate) fn activate_event(&self) -> &EventFd { &self.activate_event } fn update_kvm_slots(&self, updated_range: &RequestedRange) -> Result<(), VirtioMemError> { let hp_region = self .guest_memory() .iter() .find(|r| r.region_type == GuestRegionType::Hotpluggable) .expect("there should be one and only one hotpluggable region"); hp_region .slots_intersecting_range( updated_range.addr, self.nb_blocks_to_len(updated_range.nb_blocks), ) .try_for_each(|slot| { let slot_range = RequestedRange { addr: slot.guest_addr, nb_blocks: slot.slice.len() / u64_to_usize(self.config.block_size), }; match self.range_state(&slot_range) { BlockRangeState::Mixed | BlockRangeState::Plugged => { hp_region.update_slot(&self.vm, &slot, true) } BlockRangeState::Unplugged => hp_region.update_slot(&self.vm, &slot, false), } .map_err(VirtioMemError::UpdateKvmSlot) }) } /// Plugs/unplugs the given range /// /// Note: the range passed to this function must be within the device memory to avoid /// out-of-bound panics. fn update_range(&mut self, range: &RequestedRange, plug: bool) -> Result<(), VirtioMemError> { // Update internal state let block_range = self.unchecked_block_range(range); let plugged_blocks_slice = &mut self.plugged_blocks[block_range]; let plugged_before = plugged_blocks_slice.count_ones(); plugged_blocks_slice.fill(plug); let plugged_after = plugged_blocks_slice.count_ones(); self.config.plugged_size -= usize_to_u64(self.nb_blocks_to_len(plugged_before)); self.config.plugged_size += usize_to_u64(self.nb_blocks_to_len(plugged_after)); // If unplugging, discard the range if !plug { self.guest_memory() .discard_range(range.addr, self.nb_blocks_to_len(range.nb_blocks)) .inspect_err(|err| { // Failure to discard is not fatal and is not reported to the driver. It only // gets logged. METRICS.unplug_discard_fails.inc(); error!("virtio-mem: Failed to discard memory range: {}", err); }); } self.update_kvm_slots(range) } /// Updates the requested size of the virtio-mem device. pub fn update_requested_size( &mut self, requested_size_mib: usize, ) -> Result<(), VirtioMemError> { let requested_size = usize_to_u64(mib_to_bytes(requested_size_mib)); if !self.is_activated() { return Err(VirtioMemError::DeviceNotActive); } if !requested_size.is_multiple_of(self.config.block_size) { return Err(VirtioMemError::InvalidSize(requested_size)); } if requested_size > self.config.region_size { return Err(VirtioMemError::InvalidSize(requested_size)); } // Increase the usable_region_size if it's not enough for the guest to plug new // memory blocks. // The device cannot decrease the usable_region_size unless the guest requests // to reset it with an UNPLUG_ALL request. if self.config.usable_region_size < requested_size { self.config.usable_region_size = requested_size.next_multiple_of(usize_to_u64(self.slot_size)); debug!( "virtio-mem: Updated usable size to {} bytes", self.config.usable_region_size ); } self.config.requested_size = requested_size; debug!( "virtio-mem: Updated requested size to {} bytes", requested_size ); self.interrupt_trigger() .trigger(VirtioInterruptType::Config) .map_err(VirtioMemError::InterruptError) } } impl VirtioDevice for VirtioMem { impl_device_type!(VirtioDeviceType::Mem); fn id(&self) -> &str { VIRTIO_MEM_DEV_ID } fn queues(&self) -> &[Queue] { &self.queues } fn queues_mut(&mut self) -> &mut [Queue] { &mut self.queues } fn queue_events(&self) -> &[EventFd] { &self.queue_events } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { self.device_state .active_state() .expect("Device is not activated") .interrupt .deref() } fn avail_features(&self) -> u64 { self.avail_features } fn acked_features(&self) -> u64 { self.acked_features } fn set_acked_features(&mut self, acked_features: u64) { self.acked_features = acked_features; } fn read_config(&self, offset: u64, data: &mut [u8]) { let offset = u64_to_usize(offset); self.config .as_slice() .get(offset..offset + data.len()) .map(|s| data.copy_from_slice(s)) .unwrap_or_else(|| { error!( "virtio-mem: Config read offset+length {offset}+{} out of bounds", data.len() ) }) } fn write_config(&mut self, offset: u64, _data: &[u8]) { error!("virtio-mem: Attempted write to read-only config space at offset {offset}"); } fn is_activated(&self) -> bool { self.device_state.is_activated() } fn activate( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError> { if (self.acked_features & (1 << VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE)) == 0 { error!( "virtio-mem: VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE feature not acknowledged by guest" ); METRICS.activate_fails.inc(); return Err(ActivateError::RequiredFeatureNotAcked( "VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE", )); } for q in self.queues.iter_mut() { q.initialize(&mem) .map_err(ActivateError::QueueMemoryError)?; } self.device_state = DeviceState::Activated(ActiveState { mem, interrupt }); if self.activate_event.write(1).is_err() { METRICS.activate_fails.inc(); self.device_state = DeviceState::Inactive; return Err(ActivateError::EventFd); } Ok(()) } fn kick(&mut self) { if self.is_activated() { info!("kick mem {}.", self.id()); self.process_virtio_queues(); } } } #[cfg(test)] pub(crate) mod test_utils { use super::*; use crate::devices::virtio::test_utils::test::VirtioTestDevice; use crate::test_utils::single_region_mem; use crate::vmm_config::machine_config::HugePageConfig; use crate::vstate::memory; use crate::vstate::vm::tests::setup_vm_with_memory; impl VirtioTestDevice for VirtioMem { fn set_queues(&mut self, queues: Vec) { self.queues = queues; } fn num_queues(&self) -> usize { MEM_NUM_QUEUES } } pub(crate) fn default_virtio_mem() -> VirtioMem { let (_, mut vm) = setup_vm_with_memory(0x1000); let addr = GuestAddress(512 << 30); vm.register_hotpluggable_memory_region( memory::anonymous( std::iter::once((addr, mib_to_bytes(1024))), false, HugePageConfig::None, ) .unwrap() .pop() .unwrap(), mib_to_bytes(128), ); let vm = Arc::new(vm); VirtioMem::new(vm, addr, 1024, 2, 128).unwrap() } } #[cfg(test)] mod tests { use std::ptr::null_mut; use serde_json::de; use vm_memory::guest_memory; use vm_memory::mmap::MmapRegionBuilder; use super::*; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::mem::device::test_utils::default_virtio_mem; use crate::devices::virtio::queue::VIRTQ_DESC_F_WRITE; use crate::devices::virtio::test_utils::test::VirtioTestHelper; use crate::vstate::vm::tests::setup_vm_with_memory; #[test] fn test_new() { let mem = default_virtio_mem(); assert_eq!(mem.total_size_mib(), 1024); assert_eq!(mem.block_size_mib(), 2); assert_eq!(mem.plugged_size_mib(), 0); assert_eq!(mem.id(), VIRTIO_MEM_DEV_ID); assert_eq!(mem.device_type(), VirtioDeviceType::Mem); let features = (1 << VIRTIO_F_VERSION_1) | (1 << VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE); assert_eq!(mem.avail_features(), features); assert_eq!(mem.acked_features(), 0); assert!(!mem.is_activated()); assert_eq!(mem.queues().len(), MEM_NUM_QUEUES); assert_eq!(mem.queue_events().len(), MEM_NUM_QUEUES); } #[test] fn test_from_state() { let (_, vm) = setup_vm_with_memory(0x1000); let vm = Arc::new(vm); let queues = vec![Queue::new(FIRECRACKER_MAX_QUEUE_SIZE); MEM_NUM_QUEUES]; let addr = 512 << 30; let region_size_mib = 2048; let block_size_mib = 2; let slot_size_mib = 128; let plugged_size_mib = 512; let usable_region_size = mib_to_bytes(1024) as u64; let config = virtio_mem_config { addr, region_size: mib_to_bytes(region_size_mib) as u64, block_size: mib_to_bytes(block_size_mib) as u64, plugged_size: mib_to_bytes(plugged_size_mib) as u64, usable_region_size, ..Default::default() }; let plugged_blocks = BitVec::repeat( false, mib_to_bytes(region_size_mib) / mib_to_bytes(block_size_mib), ); let mem = VirtioMem::from_state( vm, queues, config, mib_to_bytes(slot_size_mib), plugged_blocks, ) .unwrap(); assert_eq!(mem.config.addr, addr); assert_eq!(mem.total_size_mib(), region_size_mib); assert_eq!(mem.block_size_mib(), block_size_mib); assert_eq!(mem.slot_size_mib(), slot_size_mib); assert_eq!(mem.plugged_size_mib(), plugged_size_mib); assert_eq!(mem.config.usable_region_size, usable_region_size); } #[test] fn test_read_config() { let mem = default_virtio_mem(); let mut data = [0u8; 8]; mem.read_config(0, &mut data); assert_eq!( u64::from_le_bytes(data), mib_to_bytes(mem.block_size_mib()) as u64 ); mem.read_config(16, &mut data); assert_eq!(u64::from_le_bytes(data), 512 << 30); mem.read_config(24, &mut data); assert_eq!( u64::from_le_bytes(data), mib_to_bytes(mem.total_size_mib()) as u64 ); } #[test] fn test_read_config_out_of_bounds() { let mem = default_virtio_mem(); let mut data = [0u8; 8]; let config_size = std::mem::size_of::(); mem.read_config(config_size as u64, &mut data); assert_eq!(data, [0u8; 8]); // Should remain unchanged let mut data = vec![0u8; config_size]; mem.read_config(8, &mut data); assert_eq!(data, vec![0u8; config_size]); // Should remain unchanged } #[test] fn test_write_config() { let mut mem = default_virtio_mem(); let data = [1u8; 8]; mem.write_config(0, &data); // Should log error but not crash // should not change config let mut data = [0u8; 8]; mem.read_config(0, &mut data); let block_size = u64::from_le_bytes(data); assert_eq!(block_size, mib_to_bytes(2) as u64); } #[test] fn test_set_features() { let mut mem = default_virtio_mem(); mem.set_avail_features(123); assert_eq!(mem.avail_features(), 123); mem.set_acked_features(456); assert_eq!(mem.acked_features(), 456); } #[test] fn test_status() { let mut mem = default_virtio_mem(); let status = mem.status(); assert_eq!( status, VirtioMemStatus { block_size_mib: 2, total_size_mib: 1024, slot_size_mib: 128, plugged_size_mib: 0, requested_size_mib: 0, } ); } #[allow(clippy::cast_possible_truncation)] const REQ_SIZE: u32 = std::mem::size_of::() as u32; #[allow(clippy::cast_possible_truncation)] const RESP_SIZE: u32 = std::mem::size_of::() as u32; fn test_helper<'a>( mut dev: VirtioMem, mem: &'a GuestMemoryMmap, ) -> VirtioTestHelper<'a, VirtioMem> { dev.set_acked_features(dev.avail_features); let mut th = VirtioTestHelper::::new(mem, dev); th.activate_device(mem); th } fn emulate_request( th: &mut VirtioTestHelper, mem: &GuestMemoryMmap, req: Request, ) -> Response { th.add_desc_chain( MEM_QUEUE, 0, &[(0, REQ_SIZE, 0), (1, RESP_SIZE, VIRTQ_DESC_F_WRITE)], ); mem.write_obj( virtio_mem::virtio_mem_req::from(req), th.desc_address(MEM_QUEUE, 0), ) .unwrap(); assert_eq!(th.emulate_for_msec(100).unwrap(), 1); mem.read_obj::(th.desc_address(MEM_QUEUE, 1)) .unwrap() .into() } #[test] fn test_event_fail_descriptor_chain_too_short() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); let queue_event_count = METRICS.queue_event_count.count(); let queue_event_fails = METRICS.queue_event_fails.count(); th.add_desc_chain(MEM_QUEUE, 0, &[(0, REQ_SIZE, 0)]); assert_eq!(th.emulate_for_msec(100).unwrap(), 1); assert_eq!(METRICS.queue_event_count.count(), queue_event_count + 1); assert_eq!(METRICS.queue_event_fails.count(), queue_event_fails + 1); } #[test] fn test_event_fail_descriptor_length_too_small() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); let queue_event_count = METRICS.queue_event_count.count(); let queue_event_fails = METRICS.queue_event_fails.count(); th.add_desc_chain(MEM_QUEUE, 0, &[(0, 1, 0)]); assert_eq!(th.emulate_for_msec(100).unwrap(), 1); assert_eq!(METRICS.queue_event_count.count(), queue_event_count + 1); assert_eq!(METRICS.queue_event_fails.count(), queue_event_fails + 1); } #[test] fn test_event_fail_unexpected_writeonly_descriptor() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); let queue_event_count = METRICS.queue_event_count.count(); let queue_event_fails = METRICS.queue_event_fails.count(); th.add_desc_chain(MEM_QUEUE, 0, &[(0, REQ_SIZE, VIRTQ_DESC_F_WRITE)]); assert_eq!(th.emulate_for_msec(100).unwrap(), 1); assert_eq!(METRICS.queue_event_count.count(), queue_event_count + 1); assert_eq!(METRICS.queue_event_fails.count(), queue_event_fails + 1); } #[test] fn test_event_fail_unexpected_readonly_descriptor() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); let queue_event_count = METRICS.queue_event_count.count(); let queue_event_fails = METRICS.queue_event_fails.count(); th.add_desc_chain(MEM_QUEUE, 0, &[(0, REQ_SIZE, 0), (1, RESP_SIZE, 0)]); assert_eq!(th.emulate_for_msec(100).unwrap(), 1); assert_eq!(METRICS.queue_event_count.count(), queue_event_count + 1); assert_eq!(METRICS.queue_event_fails.count(), queue_event_fails + 1); } #[test] fn test_event_fail_response_descriptor_length_too_small() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); let queue_event_count = METRICS.queue_event_count.count(); let queue_event_fails = METRICS.queue_event_fails.count(); th.add_desc_chain( MEM_QUEUE, 0, &[(0, REQ_SIZE, 0), (1, 1, VIRTQ_DESC_F_WRITE)], ); assert_eq!(th.emulate_for_msec(100).unwrap(), 1); assert_eq!(METRICS.queue_event_count.count(), queue_event_count + 1); assert_eq!(METRICS.queue_event_fails.count(), queue_event_fails + 1); } #[test] fn test_update_requested_size_device_not_active() { let mut mem_dev = default_virtio_mem(); let result = mem_dev.update_requested_size(512); assert!(matches!(result, Err(VirtioMemError::DeviceNotActive))); } #[test] fn test_update_requested_size_invalid_size() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); // Size not multiple of block size let result = th.device().update_requested_size(3); assert!(matches!(result, Err(VirtioMemError::InvalidSize(_)))); // Size too large let result = th.device().update_requested_size(2048); assert!(matches!(result, Err(VirtioMemError::InvalidSize(_)))); } #[test] fn test_update_requested_size_success() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(512).unwrap(); assert_eq!(th.device().requested_size_mib(), 512); } #[test] fn test_plug_request_success() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(1024); let addr = th.device().guest_address(); let queue_event_count = METRICS.queue_event_count.count(); let queue_event_fails = METRICS.queue_event_fails.count(); let plug_count = METRICS.plug_count.count(); let plug_bytes = METRICS.plug_bytes.count(); let plug_fails = METRICS.plug_fails.count(); let resp = emulate_request( &mut th, &guest_mem, Request::Plug(RequestedRange { addr, nb_blocks: 1 }), ); assert!(resp.is_ack()); assert_eq!(th.device().plugged_size_mib(), 2); assert_eq!(METRICS.queue_event_count.count(), queue_event_count + 1); assert_eq!(METRICS.queue_event_fails.count(), queue_event_fails); assert_eq!(METRICS.plug_count.count(), plug_count + 1); assert_eq!(METRICS.plug_bytes.count(), plug_bytes + (2 << 20)); assert_eq!(METRICS.plug_fails.count(), plug_fails); } #[test] fn test_plug_request_too_big() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(2); let addr = th.device().guest_address(); let plug_count = METRICS.plug_count.count(); let plug_bytes = METRICS.plug_bytes.count(); let plug_fails = METRICS.plug_fails.count(); let resp = emulate_request( &mut th, &guest_mem, Request::Plug(RequestedRange { addr, nb_blocks: 2 }), ); assert!(resp.is_error()); assert_eq!(METRICS.plug_count.count(), plug_count + 1); assert_eq!(METRICS.plug_bytes.count(), plug_bytes); assert_eq!(METRICS.plug_fails.count(), plug_fails + 1); } #[test] fn test_plug_request_already_plugged() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(1024); let addr = th.device().guest_address(); // First plug succeeds let resp = emulate_request( &mut th, &guest_mem, Request::Plug(RequestedRange { addr, nb_blocks: 1 }), ); assert!(resp.is_ack()); // Second plug fails let resp = emulate_request( &mut th, &guest_mem, Request::Plug(RequestedRange { addr, nb_blocks: 1 }), ); assert!(resp.is_error()); } #[test] fn test_unplug_request_success() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(1024); let addr = th.device().guest_address(); let unplug_count = METRICS.unplug_count.count(); let unplug_bytes = METRICS.unplug_bytes.count(); let unplug_fails = METRICS.unplug_fails.count(); // First plug let resp = emulate_request( &mut th, &guest_mem, Request::Plug(RequestedRange { addr, nb_blocks: 1 }), ); assert!(resp.is_ack()); assert_eq!(th.device().plugged_size_mib(), 2); // Then unplug let resp = emulate_request( &mut th, &guest_mem, Request::Unplug(RequestedRange { addr, nb_blocks: 1 }), ); assert!(resp.is_ack()); assert_eq!(th.device().plugged_size_mib(), 0); assert_eq!(METRICS.unplug_count.count(), unplug_count + 1); assert_eq!(METRICS.unplug_bytes.count(), unplug_bytes + (2 << 20)); assert_eq!(METRICS.unplug_fails.count(), unplug_fails); } #[test] fn test_unplug_request_not_plugged() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(1024); let addr = th.device().guest_address(); let unplug_count = METRICS.unplug_count.count(); let unplug_bytes = METRICS.unplug_bytes.count(); let unplug_fails = METRICS.unplug_fails.count(); let resp = emulate_request( &mut th, &guest_mem, Request::Unplug(RequestedRange { addr, nb_blocks: 1 }), ); assert!(resp.is_error()); assert_eq!(METRICS.unplug_count.count(), unplug_count + 1); assert_eq!(METRICS.unplug_bytes.count(), unplug_bytes); assert_eq!(METRICS.unplug_fails.count(), unplug_fails + 1); } #[test] fn test_unplug_all_request() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(1024); let addr = th.device().guest_address(); let unplug_all_count = METRICS.unplug_all_count.count(); let unplug_all_fails = METRICS.unplug_all_fails.count(); // Plug some blocks let resp = emulate_request( &mut th, &guest_mem, Request::Plug(RequestedRange { addr, nb_blocks: 2 }), ); assert!(resp.is_ack()); assert_eq!(th.device().plugged_size_mib(), 4); // Unplug all let resp = emulate_request(&mut th, &guest_mem, Request::UnplugAll); assert!(resp.is_ack()); assert_eq!(th.device().plugged_size_mib(), 0); assert_eq!(METRICS.unplug_all_count.count(), unplug_all_count + 1); assert_eq!(METRICS.unplug_all_fails.count(), unplug_all_fails); } #[test] fn test_state_request_unplugged() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(1024); let addr = th.device().guest_address(); let state_count = METRICS.state_count.count(); let state_fails = METRICS.state_fails.count(); let resp = emulate_request( &mut th, &guest_mem, Request::State(RequestedRange { addr, nb_blocks: 1 }), ); assert_eq!(resp, Response::ack_with_state(BlockRangeState::Unplugged)); assert_eq!(METRICS.state_count.count(), state_count + 1); assert_eq!(METRICS.state_fails.count(), state_fails); } #[test] fn test_state_request_plugged() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(1024); let addr = th.device().guest_address(); // Plug first let resp = emulate_request( &mut th, &guest_mem, Request::Plug(RequestedRange { addr, nb_blocks: 1 }), ); assert!(resp.is_ack()); // Check state let resp = emulate_request( &mut th, &guest_mem, Request::State(RequestedRange { addr, nb_blocks: 1 }), ); assert_eq!(resp, Response::ack_with_state(BlockRangeState::Plugged)); } #[test] fn test_state_request_mixed() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(1024); let addr = th.device().guest_address(); // Plug first block only let resp = emulate_request( &mut th, &guest_mem, Request::Plug(RequestedRange { addr, nb_blocks: 1 }), ); assert!(resp.is_ack()); // Check state of 2 blocks (one plugged, one unplugged) let resp = emulate_request( &mut th, &guest_mem, Request::State(RequestedRange { addr, nb_blocks: 2 }), ); assert_eq!(resp, Response::ack_with_state(BlockRangeState::Mixed)); } #[test] fn test_invalid_range_unaligned() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(1024); let addr = th.device().guest_address().unchecked_add(1); let state_count = METRICS.state_count.count(); let state_fails = METRICS.state_fails.count(); let resp = emulate_request( &mut th, &guest_mem, Request::State(RequestedRange { addr, nb_blocks: 1 }), ); assert!(resp.is_error()); assert_eq!(METRICS.state_count.count(), state_count + 1); assert_eq!(METRICS.state_fails.count(), state_fails + 1); } #[test] fn test_invalid_range_zero_blocks() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(1024); let addr = th.device().guest_address(); let resp = emulate_request( &mut th, &guest_mem, Request::State(RequestedRange { addr, nb_blocks: 0 }), ); assert!(resp.is_error()); } #[test] fn test_invalid_range_out_of_bounds() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); th.device().update_requested_size(4); let addr = th.device().guest_address(); let resp = emulate_request( &mut th, &guest_mem, Request::State(RequestedRange { addr, nb_blocks: 1024, }), ); assert!(resp.is_error()); } #[test] fn test_unsupported_request() { let mut mem_dev = default_virtio_mem(); let guest_mem = mem_dev.vm.guest_memory().clone(); let mut th = test_helper(mem_dev, &guest_mem); let queue_event_count = METRICS.queue_event_count.count(); let queue_event_fails = METRICS.queue_event_fails.count(); th.add_desc_chain( MEM_QUEUE, 0, &[(0, REQ_SIZE, 0), (1, RESP_SIZE, VIRTQ_DESC_F_WRITE)], ); guest_mem .write_obj( virtio_mem::virtio_mem_req::from(Request::Unsupported(999)), th.desc_address(MEM_QUEUE, 0), ) .unwrap(); assert_eq!(th.emulate_for_msec(100).unwrap(), 1); assert_eq!(METRICS.queue_event_count.count(), queue_event_count + 1); assert_eq!(METRICS.queue_event_fails.count(), queue_event_fails + 1); } } ================================================ FILE: src/vmm/src/devices/virtio/mem/event_handler.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use event_manager::{EventOps, Events, MutEventSubscriber}; use vmm_sys_util::epoll::EventSet; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::mem::MEM_QUEUE; use crate::devices::virtio::mem::device::VirtioMem; use crate::logger::{error, warn}; impl VirtioMem { const PROCESS_ACTIVATE: u32 = 0; const PROCESS_MEM_QUEUE: u32 = 1; fn register_runtime_events(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.queue_events()[MEM_QUEUE], Self::PROCESS_MEM_QUEUE, EventSet::IN, )) { error!("virtio-mem: Failed to register queue event: {err}"); } } fn register_activate_event(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( self.activate_event(), Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("virtio-mem: Failed to register activate event: {err}"); } } fn process_activate_event(&self, ops: &mut EventOps) { if let Err(err) = self.activate_event().read() { error!("virtio-mem: Failed to consume activate event: {err}"); } // Register runtime events self.register_runtime_events(ops); // Remove activate event if let Err(err) = ops.remove(Events::with_data( self.activate_event(), Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("virtio-mem: Failed to un-register activate event: {err}"); } } } impl MutEventSubscriber for VirtioMem { fn init(&mut self, ops: &mut event_manager::EventOps) { // This function can be called during different points in the device lifetime: // - shortly after device creation, // - on device activation (is-activated already true at this point), // - on device restore from snapshot. if self.is_activated() { self.register_runtime_events(ops); } else { self.register_activate_event(ops); } } fn process(&mut self, events: event_manager::Events, ops: &mut event_manager::EventOps) { let event_set = events.event_set(); let source = events.data(); if !event_set.contains(EventSet::IN) { warn!("virtio-mem: Received unknown event: {event_set:?} from source {source}"); return; } if !self.is_activated() { warn!("virtio-mem: The device is not activated yet. Spurious event received: {source}"); return; } match source { Self::PROCESS_ACTIVATE => self.process_activate_event(ops), Self::PROCESS_MEM_QUEUE => self.process_mem_queue_event(), _ => { warn!("virtio-mem: Unknown event received: {source}"); } } } } #[cfg(test)] mod tests { use std::sync::{Arc, Mutex}; use event_manager::{EventManager, SubscriberOps}; use vmm_sys_util::epoll::EventSet; use super::*; use crate::devices::virtio::ActivateError; use crate::devices::virtio::generated::virtio_mem::VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE; use crate::devices::virtio::mem::device::test_utils::default_virtio_mem; use crate::devices::virtio::test_utils::{VirtQueue, default_interrupt, default_mem}; use crate::vstate::memory::GuestAddress; #[test] fn test_event_handler_activation() { let mut event_manager = EventManager::new().unwrap(); let mut mem_device = default_virtio_mem(); let mem = default_mem(); let interrupt = default_interrupt(); // Set up queue let virtq = VirtQueue::new(GuestAddress(0), &mem, 16); mem_device.queues_mut()[MEM_QUEUE] = virtq.create_queue(); let mem_device = Arc::new(Mutex::new(mem_device)); let _id = event_manager.add_subscriber(mem_device.clone()); // Device should register activate event when inactive assert!(!mem_device.lock().unwrap().is_activated()); // Device should prevent activation before features are acked let err = mem_device .lock() .unwrap() .activate(mem.clone(), interrupt.clone()) .unwrap_err(); assert!(matches!(err, ActivateError::RequiredFeatureNotAcked(_))); // Ack the feature and activate the device mem_device .lock() .unwrap() .set_acked_features(1 << VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE); mem_device.lock().unwrap().activate(mem, interrupt).unwrap(); // Process activation event let ev_count = event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 1); assert!(mem_device.lock().unwrap().is_activated()); } #[test] fn test_process_mem_queue_event() { let mut event_manager = EventManager::new().unwrap(); let mut mem_device = default_virtio_mem(); let mem = default_mem(); let interrupt = default_interrupt(); // Set up queue let virtq = VirtQueue::new(GuestAddress(0), &mem, 16); mem_device.queues_mut()[MEM_QUEUE] = virtq.create_queue(); mem_device.set_acked_features(mem_device.avail_features()); let mem_device = Arc::new(Mutex::new(mem_device)); let _id = event_manager.add_subscriber(mem_device.clone()); // Activate device first mem_device.lock().unwrap().activate(mem, interrupt).unwrap(); event_manager.run_with_timeout(50).unwrap(); // Process activation // Trigger queue event mem_device.lock().unwrap().queue_events()[MEM_QUEUE] .write(1) .unwrap(); // Process queue event let ev_count = event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 1); } #[test] fn test_spurious_event_before_activation() { let mut event_manager = EventManager::new().unwrap(); let mem_device = default_virtio_mem(); let mem_device = Arc::new(Mutex::new(mem_device)); let _id = event_manager.add_subscriber(mem_device.clone()); // Try to trigger queue event before activation mem_device.lock().unwrap().queue_events()[MEM_QUEUE] .write(1) .unwrap(); // Should not process queue events before activation let ev_count = event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 0); } } ================================================ FILE: src/vmm/src/devices/virtio/mem/metrics.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the metrics system for memory devices. //! //! # Metrics format //! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write(). //! //! ## JSON example with metrics: //! ```json //! "memory_hotplug": { //! "activate_fails": "SharedIncMetric", //! "queue_event_fails": "SharedIncMetric", //! "queue_event_count": "SharedIncMetric", //! ... //! } //! } //! ``` //! Each `memory` field in the example above is a serializable `VirtioMemDeviceMetrics` structure //! collecting metrics such as `activate_fails`, `queue_event_fails` etc. for the memoty hotplug //! device. //! Since Firecrakcer only supports one virtio-mem device, there is no per device metrics and //! `memory_hotplug` represents the aggregate entropy metrics. use serde::ser::SerializeMap; use serde::{Serialize, Serializer}; use crate::logger::{LatencyAggregateMetrics, SharedIncMetric}; /// Stores aggregated virtio-mem metrics pub(super) static METRICS: VirtioMemDeviceMetrics = VirtioMemDeviceMetrics::new(); /// Called by METRICS.flush(), this function facilitates serialization of virtio-mem device metrics. pub fn flush_metrics(serializer: S) -> Result { let mut seq = serializer.serialize_map(Some(1))?; seq.serialize_entry("memory_hotplug", &METRICS)?; seq.end() } #[derive(Debug, Serialize)] pub(super) struct VirtioMemDeviceMetrics { /// Number of device activation failures pub activate_fails: SharedIncMetric, /// Number of queue event handling failures pub queue_event_fails: SharedIncMetric, /// Number of queue events handled pub queue_event_count: SharedIncMetric, /// Latency of Plug operations pub plug_agg: LatencyAggregateMetrics, /// Number of Plug operations pub plug_count: SharedIncMetric, /// Number of plugged bytes pub plug_bytes: SharedIncMetric, /// Number of Plug operations failed pub plug_fails: SharedIncMetric, /// Latency of Unplug operations pub unplug_agg: LatencyAggregateMetrics, /// Number of Unplug operations pub unplug_count: SharedIncMetric, /// Number of unplugged bytes pub unplug_bytes: SharedIncMetric, /// Number of Unplug operations failed pub unplug_fails: SharedIncMetric, /// Number of discards failed for an Unplug or UnplugAll operation pub unplug_discard_fails: SharedIncMetric, /// Latency of UnplugAll operations pub unplug_all_agg: LatencyAggregateMetrics, /// Number of UnplugAll operations pub unplug_all_count: SharedIncMetric, /// Number of UnplugAll operations failed pub unplug_all_fails: SharedIncMetric, /// Latency of State operations pub state_agg: LatencyAggregateMetrics, /// Number of State operations pub state_count: SharedIncMetric, /// Number of State operations failed pub state_fails: SharedIncMetric, } impl VirtioMemDeviceMetrics { /// Const default construction. const fn new() -> Self { Self { activate_fails: SharedIncMetric::new(), queue_event_fails: SharedIncMetric::new(), queue_event_count: SharedIncMetric::new(), plug_agg: LatencyAggregateMetrics::new(), plug_count: SharedIncMetric::new(), plug_bytes: SharedIncMetric::new(), plug_fails: SharedIncMetric::new(), unplug_agg: LatencyAggregateMetrics::new(), unplug_count: SharedIncMetric::new(), unplug_bytes: SharedIncMetric::new(), unplug_fails: SharedIncMetric::new(), unplug_discard_fails: SharedIncMetric::new(), unplug_all_agg: LatencyAggregateMetrics::new(), unplug_all_count: SharedIncMetric::new(), unplug_all_fails: SharedIncMetric::new(), state_agg: LatencyAggregateMetrics::new(), state_count: SharedIncMetric::new(), state_fails: SharedIncMetric::new(), } } } #[cfg(test)] pub mod tests { use super::*; use crate::logger::IncMetric; #[test] fn test_memory_hotplug_metrics() { let mem_metrics: VirtioMemDeviceMetrics = VirtioMemDeviceMetrics::new(); mem_metrics.queue_event_count.inc(); assert_eq!(mem_metrics.queue_event_count.count(), 1); let _ = serde_json::to_string(&mem_metrics).unwrap(); } } ================================================ FILE: src/vmm/src/devices/virtio/mem/mod.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod device; mod event_handler; pub mod metrics; pub mod persist; mod request; use vm_memory::GuestAddress; pub use self::device::{VirtioMem, VirtioMemError, VirtioMemStatus}; use crate::arch::FIRST_ADDR_PAST_64BITS_MMIO; pub(crate) const MEM_NUM_QUEUES: usize = 1; pub(crate) const MEM_QUEUE: usize = 0; pub const VIRTIO_MEM_DEFAULT_BLOCK_SIZE_MIB: usize = 2; pub const VIRTIO_MEM_DEFAULT_SLOT_SIZE_MIB: usize = 128; pub const VIRTIO_MEM_DEV_ID: &str = "mem"; ================================================ FILE: src/vmm/src/devices/virtio/mem/persist.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the structures needed for saving/restoring virtio-mem devices. use std::sync::Arc; use bitvec::vec::BitVec; use serde::{Deserialize, Serialize}; use vm_memory::Address; use crate::Vm; use crate::devices::virtio::device::VirtioDeviceType; use crate::devices::virtio::generated::virtio_ids::VIRTIO_ID_MEM; use crate::devices::virtio::generated::virtio_mem::virtio_mem_config; use crate::devices::virtio::mem::{MEM_NUM_QUEUES, VirtioMem, VirtioMemError}; use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState}; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::snapshot::Persist; use crate::utils::usize_to_u64; use crate::vstate::memory::{GuestMemoryMmap, GuestRegionMmap}; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VirtioMemState { pub virtio_state: VirtioDeviceState, addr: u64, region_size: u64, block_size: u64, usable_region_size: u64, requested_size: u64, slot_size: usize, plugged_blocks: Vec, } #[derive(Debug)] pub struct VirtioMemConstructorArgs { vm: Arc, } impl VirtioMemConstructorArgs { pub fn new(vm: Arc) -> Self { Self { vm } } } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VirtioMemPersistError { /// Create virtio-mem: {0} CreateVirtioMem(#[from] VirtioMemError), /// Virtio state: {0} VirtioState(#[from] VirtioStateError), } impl Persist<'_> for VirtioMem { type State = VirtioMemState; type ConstructorArgs = VirtioMemConstructorArgs; type Error = VirtioMemPersistError; fn save(&self) -> Self::State { VirtioMemState { virtio_state: VirtioDeviceState::from_device(self), addr: self.config.addr, region_size: self.config.region_size, block_size: self.config.block_size, usable_region_size: self.config.usable_region_size, plugged_blocks: self.plugged_blocks.iter().by_vals().collect(), requested_size: self.config.requested_size, slot_size: self.slot_size, } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let queues = state.virtio_state.build_queues_checked( constructor_args.vm.guest_memory(), VirtioDeviceType::Mem, MEM_NUM_QUEUES, FIRECRACKER_MAX_QUEUE_SIZE, )?; let plugged_blocks = BitVec::from_iter(state.plugged_blocks.iter()); let config = virtio_mem_config { addr: state.addr, region_size: state.region_size, block_size: state.block_size, usable_region_size: state.usable_region_size, plugged_size: usize_to_u64(plugged_blocks.count_ones()) * state.block_size, requested_size: state.requested_size, ..Default::default() }; let mut virtio_mem = VirtioMem::from_state( constructor_args.vm, queues, config, state.slot_size, plugged_blocks, )?; virtio_mem.set_avail_features(state.virtio_state.avail_features); virtio_mem.set_acked_features(state.virtio_state.acked_features); Ok(virtio_mem) } } #[cfg(test)] mod tests { use super::*; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::mem::device::test_utils::default_virtio_mem; use crate::vstate::vm::tests::setup_vm_with_memory; #[test] fn test_save_state() { let dev = default_virtio_mem(); let state = dev.save(); assert_eq!(state.addr, dev.config.addr); assert_eq!(state.region_size, dev.config.region_size); assert_eq!(state.block_size, dev.config.block_size); assert_eq!(state.usable_region_size, dev.config.usable_region_size); assert_eq!( state.plugged_blocks.iter().collect::(), dev.plugged_blocks ); assert_eq!(state.requested_size, dev.config.requested_size); assert_eq!(state.slot_size, dev.slot_size); } #[test] fn test_save_restore_state() { let mut original_dev = default_virtio_mem(); original_dev.set_acked_features(original_dev.avail_features()); let state = original_dev.save(); // Create a "new" VM for restore let (_, vm) = setup_vm_with_memory(0x1000); let vm = Arc::new(vm); let constructor_args = VirtioMemConstructorArgs::new(vm); let restored_dev = VirtioMem::restore(constructor_args, &state).unwrap(); assert_eq!(original_dev.config, restored_dev.config); assert_eq!(original_dev.slot_size, restored_dev.slot_size); assert_eq!(original_dev.avail_features(), restored_dev.avail_features()); assert_eq!(original_dev.acked_features(), restored_dev.acked_features()); } } ================================================ FILE: src/vmm/src/devices/virtio/mem/request.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vm_memory::{Address, ByteValued, GuestAddress}; use crate::devices::virtio::generated::virtio_mem; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct RequestedRange { pub(crate) addr: GuestAddress, pub(crate) nb_blocks: usize, } #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(crate) enum Request { Plug(RequestedRange), Unplug(RequestedRange), UnplugAll, State(RequestedRange), Unsupported(u32), } // SAFETY: this is safe, trust me bro unsafe impl ByteValued for virtio_mem::virtio_mem_req {} impl From for Request { fn from(req: virtio_mem::virtio_mem_req) -> Self { match req.type_.into() { // SAFETY: union type is checked in the match virtio_mem::VIRTIO_MEM_REQ_PLUG => unsafe { Request::Plug(RequestedRange { addr: GuestAddress(req.u.plug.addr), nb_blocks: req.u.plug.nb_blocks.into(), }) }, // SAFETY: union type is checked in the match virtio_mem::VIRTIO_MEM_REQ_UNPLUG => unsafe { Request::Unplug(RequestedRange { addr: GuestAddress(req.u.unplug.addr), nb_blocks: req.u.unplug.nb_blocks.into(), }) }, virtio_mem::VIRTIO_MEM_REQ_UNPLUG_ALL => Request::UnplugAll, // SAFETY: union type is checked in the match virtio_mem::VIRTIO_MEM_REQ_STATE => unsafe { Request::State(RequestedRange { addr: GuestAddress(req.u.state.addr), nb_blocks: req.u.state.nb_blocks.into(), }) }, t => Request::Unsupported(t), } } } #[repr(u16)] #[derive(Debug, Clone, Copy, Eq, PartialEq)] #[allow(clippy::cast_possible_truncation)] pub enum ResponseType { Ack = virtio_mem::VIRTIO_MEM_RESP_ACK as u16, Nack = virtio_mem::VIRTIO_MEM_RESP_NACK as u16, Busy = virtio_mem::VIRTIO_MEM_RESP_BUSY as u16, Error = virtio_mem::VIRTIO_MEM_RESP_ERROR as u16, } #[repr(u16)] #[derive(Debug, Clone, Copy, Eq, PartialEq)] #[allow(clippy::cast_possible_truncation)] pub enum BlockRangeState { Plugged = virtio_mem::VIRTIO_MEM_STATE_PLUGGED as u16, Unplugged = virtio_mem::VIRTIO_MEM_STATE_UNPLUGGED as u16, Mixed = virtio_mem::VIRTIO_MEM_STATE_MIXED as u16, } #[derive(Debug, Clone, Eq, PartialEq)] pub struct Response { pub resp_type: ResponseType, // Only for State requests pub state: Option, } impl Response { pub(crate) fn error() -> Self { Response { resp_type: ResponseType::Error, state: None, } } pub(crate) fn ack() -> Self { Response { resp_type: ResponseType::Ack, state: None, } } pub(crate) fn ack_with_state(state: BlockRangeState) -> Self { Response { resp_type: ResponseType::Ack, state: Some(state), } } pub(crate) fn is_ack(&self) -> bool { self.resp_type == ResponseType::Ack } pub(crate) fn is_error(&self) -> bool { self.resp_type == ResponseType::Error } } // SAFETY: Plain data structures unsafe impl ByteValued for virtio_mem::virtio_mem_resp {} impl From for virtio_mem::virtio_mem_resp { fn from(resp: Response) -> Self { let mut out = virtio_mem::virtio_mem_resp { type_: resp.resp_type as u16, ..Default::default() }; if let Some(state) = resp.state { out.u.state.state = state as u16; } out } } #[cfg(test)] mod test_util { use super::*; // Implement the reverse conversions to use in test code. impl From for virtio_mem::virtio_mem_req { fn from(req: Request) -> virtio_mem::virtio_mem_req { match req { Request::Plug(r) => virtio_mem::virtio_mem_req { type_: virtio_mem::VIRTIO_MEM_REQ_PLUG.try_into().unwrap(), u: virtio_mem::virtio_mem_req__bindgen_ty_1 { plug: virtio_mem::virtio_mem_req_plug { addr: r.addr.raw_value(), nb_blocks: r.nb_blocks.try_into().unwrap(), ..Default::default() }, }, ..Default::default() }, Request::Unplug(r) => virtio_mem::virtio_mem_req { type_: virtio_mem::VIRTIO_MEM_REQ_UNPLUG.try_into().unwrap(), u: virtio_mem::virtio_mem_req__bindgen_ty_1 { unplug: virtio_mem::virtio_mem_req_unplug { addr: r.addr.raw_value(), nb_blocks: r.nb_blocks.try_into().unwrap(), ..Default::default() }, }, ..Default::default() }, Request::UnplugAll => virtio_mem::virtio_mem_req { type_: virtio_mem::VIRTIO_MEM_REQ_UNPLUG_ALL.try_into().unwrap(), ..Default::default() }, Request::State(r) => virtio_mem::virtio_mem_req { type_: virtio_mem::VIRTIO_MEM_REQ_STATE.try_into().unwrap(), u: virtio_mem::virtio_mem_req__bindgen_ty_1 { state: virtio_mem::virtio_mem_req_state { addr: r.addr.raw_value(), nb_blocks: r.nb_blocks.try_into().unwrap(), ..Default::default() }, }, ..Default::default() }, Request::Unsupported(t) => virtio_mem::virtio_mem_req { type_: t.try_into().unwrap(), ..Default::default() }, } } } impl From for Response { fn from(resp: virtio_mem::virtio_mem_resp) -> Self { Response { resp_type: match resp.type_.into() { virtio_mem::VIRTIO_MEM_RESP_ACK => ResponseType::Ack, virtio_mem::VIRTIO_MEM_RESP_NACK => ResponseType::Nack, virtio_mem::VIRTIO_MEM_RESP_BUSY => ResponseType::Busy, virtio_mem::VIRTIO_MEM_RESP_ERROR => ResponseType::Error, t => panic!("Invalid response type: {:?}", t), }, // There is no way to know whether this is present or not as it depends on the // request types. Callers should ignore this value if the request wasn't STATE /// SAFETY: test code only. Uninitialized values are 0 and recognized as PLUGGED. state: Some(unsafe { match resp.u.state.state.into() { virtio_mem::VIRTIO_MEM_STATE_PLUGGED => BlockRangeState::Plugged, virtio_mem::VIRTIO_MEM_STATE_UNPLUGGED => BlockRangeState::Unplugged, virtio_mem::VIRTIO_MEM_STATE_MIXED => BlockRangeState::Mixed, t => panic!("Invalid state: {:?}", t), } }), } } } } ================================================ FILE: src/vmm/src/devices/virtio/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. //! Implements virtio devices, queues, and transport mechanisms. use std::any::Any; use self::queue::QueueError; use crate::devices::virtio::net::TapError; pub mod balloon; pub mod block; pub mod device; pub mod generated; mod iov_deque; pub mod iovec; pub mod mem; pub mod net; pub mod persist; pub mod pmem; pub mod queue; pub mod rng; pub mod test_utils; pub mod transport; pub mod vhost_user; pub mod vhost_user_metrics; pub mod vsock; /// When the driver initializes the device, it lets the device know about the /// completed stages using the Device Status Field. /// /// These following consts are defined in the order in which the bits would /// typically be set by the driver. INIT -> ACKNOWLEDGE -> DRIVER and so on. /// /// This module is a 1:1 mapping for the Device Status Field in the virtio 1.0 /// specification, section 2.1. mod device_status { pub const INIT: u32 = 0; pub const ACKNOWLEDGE: u32 = 1; pub const DRIVER: u32 = 2; pub const FAILED: u32 = 128; pub const FEATURES_OK: u32 = 8; pub const DRIVER_OK: u32 = 4; pub const DEVICE_NEEDS_RESET: u32 = 64; } /// Offset from the base MMIO address of a virtio device used by the guest to notify the device of /// queue events. pub const NOTIFY_REG_OFFSET: u32 = 0x50; /// Errors triggered when activating a VirtioDevice. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum ActivateError { /// Wrong number of queue for virtio device: expected {expected}, got {got} QueueMismatch { expected: usize, got: usize }, /// Failed to write to activate eventfd EventFd, /// Vhost user: {0} VhostUser(vhost_user::VhostUserError), /// Setting tap interface offload flags failed: {0} TapSetOffload(TapError), /// Error setting pointers in the queue: (0) QueueMemoryError(QueueError), /// The driver didn't acknowledge a required feature: {0} RequiredFeatureNotAcked(&'static str), } /// Trait that helps in upcasting an object to Any pub trait AsAny { /// Return the immutable any encapsulated object. fn as_any(&self) -> &dyn Any; /// Return the mutable encapsulated any object. fn as_mut_any(&mut self) -> &mut dyn Any; } impl AsAny for T { fn as_any(&self) -> &dyn Any { self } fn as_mut_any(&mut self) -> &mut dyn Any { self } } ================================================ FILE: src/vmm/src/devices/virtio/net/device.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::collections::VecDeque; use std::mem::{self}; use std::net::Ipv4Addr; use std::num::Wrapping; use std::ops::Deref; use std::sync::{Arc, Mutex}; use libc::{EAGAIN, iovec}; use log::{error, info}; use vmm_sys_util::eventfd::EventFd; use super::NET_QUEUE_MAX_SIZE; use crate::devices::virtio::ActivateError; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1; use crate::devices::virtio::generated::virtio_net::{ VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_TSO6, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_MAC, VIRTIO_NET_F_MRG_RXBUF, virtio_net_hdr_v1, }; use crate::devices::virtio::generated::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use crate::devices::virtio::iovec::{ IoVecBuffer, IoVecBufferMut, IoVecError, ParsedDescriptorChain, }; use crate::devices::virtio::net::metrics::{NetDeviceMetrics, NetMetricsPerDevice}; use crate::devices::virtio::net::tap::Tap; use crate::devices::virtio::net::{ MAX_BUFFER_SIZE, NET_QUEUE_SIZES, NetError, NetQueue, RX_INDEX, TX_INDEX, generated, }; use crate::devices::virtio::queue::{DescriptorChain, InvalidAvailIdx, Queue}; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::devices::{DeviceError, report_net_event_fail}; use crate::dumbo::pdu::arp::ETH_IPV4_FRAME_LEN; use crate::dumbo::pdu::ethernet::{EthernetFrame, PAYLOAD_OFFSET}; use crate::impl_device_type; use crate::logger::{IncMetric, METRICS}; use crate::mmds::data_store::Mmds; use crate::mmds::ns::MmdsNetworkStack; use crate::rate_limiter::{BucketUpdate, RateLimiter, TokenType}; use crate::utils::net::mac::MacAddr; use crate::utils::u64_to_usize; use crate::vstate::memory::{ByteValued, GuestMemoryMmap}; const FRAME_HEADER_MAX_LEN: usize = PAYLOAD_OFFSET + ETH_IPV4_FRAME_LEN; pub(crate) const fn vnet_hdr_len() -> usize { mem::size_of::() } // This returns the maximum frame header length. This includes the VNET header plus // the maximum L2 frame header bytes which includes the ethernet frame header plus // the header IPv4 ARP header which is 28 bytes long. const fn frame_hdr_len() -> usize { vnet_hdr_len() + FRAME_HEADER_MAX_LEN } // Frames being sent/received through the network device model have a VNET header. This // function returns a slice which holds the L2 frame bytes without this header. fn frame_bytes_from_buf(buf: &[u8]) -> Result<&[u8], NetError> { if buf.len() < vnet_hdr_len() { Err(NetError::VnetHeaderMissing) } else { Ok(&buf[vnet_hdr_len()..]) } } fn frame_bytes_from_buf_mut(buf: &mut [u8]) -> Result<&mut [u8], NetError> { if buf.len() < vnet_hdr_len() { Err(NetError::VnetHeaderMissing) } else { Ok(&mut buf[vnet_hdr_len()..]) } } // This initializes to all 0 the VNET hdr part of a buf. fn init_vnet_hdr(buf: &mut [u8]) { // The buffer should be larger than vnet_hdr_len. buf[0..vnet_hdr_len()].fill(0); } #[derive(Debug, Default, Clone, Copy)] #[repr(C)] pub struct ConfigSpace { pub guest_mac: MacAddr, } // SAFETY: `ConfigSpace` contains only PODs in `repr(C)` or `repr(transparent)`, without padding. unsafe impl ByteValued for ConfigSpace {} #[derive(Debug, thiserror::Error, displaydoc::Display)] enum AddRxBufferError { /// Error while parsing new buffer: {0} Parsing(#[from] IoVecError), /// RX buffer is too small BufferTooSmall, } /// A map of all the memory the guest has provided us with for performing RX #[derive(Debug)] pub struct RxBuffers { // minimum size of a usable buffer for doing RX pub min_buffer_size: u32, // An [`IoVecBufferMut`] covering all the memory we have available for receiving network // frames. pub iovec: IoVecBufferMut, // A map of which part of the memory belongs to which `DescriptorChain` object pub parsed_descriptors: VecDeque, // Buffers that we have used and they are ready to be given back to the guest. pub used_descriptors: u16, pub used_bytes: u32, } impl RxBuffers { /// Create a new [`RxBuffers`] object for storing guest memory for performing RX fn new() -> Result { Ok(Self { min_buffer_size: 0, iovec: IoVecBufferMut::new()?, parsed_descriptors: VecDeque::with_capacity(NET_QUEUE_MAX_SIZE.into()), used_descriptors: 0, used_bytes: 0, }) } /// Add a new `DescriptorChain` that we received from the RX queue in the buffer. /// /// SAFETY: The `DescriptorChain` cannot be referencing the same memory location as any other /// `DescriptorChain`. (See also related comment in /// [`IoVecBufferMut::append_descriptor_chain`]). unsafe fn add_buffer( &mut self, mem: &GuestMemoryMmap, head: DescriptorChain, ) -> Result<(), AddRxBufferError> { // SAFETY: descriptor chain cannot be referencing the same memory location as another chain let parsed_dc = unsafe { self.iovec.append_descriptor_chain(mem, head)? }; if parsed_dc.length < self.min_buffer_size { self.iovec.drop_chain_back(&parsed_dc); return Err(AddRxBufferError::BufferTooSmall); } self.parsed_descriptors.push_back(parsed_dc); Ok(()) } /// Returns the total size of available space in the buffer. #[inline(always)] fn capacity(&self) -> u32 { self.iovec.len() } /// Mark the first `size` bytes of available memory as used. /// /// # Safety: /// /// * The `RxBuffers` should include at least one parsed `DescriptorChain`. /// * `size` needs to be smaller or equal to total length of the first `DescriptorChain` stored /// in the `RxBuffers`. unsafe fn mark_used(&mut self, mut bytes_written: u32, rx_queue: &mut Queue) { self.used_bytes = bytes_written; let mut used_heads: u16 = 0; for parsed_dc in self.parsed_descriptors.iter() { let used_bytes = bytes_written.min(parsed_dc.length); // Safe because we know head_index isn't out of bounds rx_queue .write_used_element(self.used_descriptors, parsed_dc.head_index, used_bytes) .unwrap(); bytes_written -= used_bytes; self.used_descriptors += 1; used_heads += 1; if bytes_written == 0 { break; } } // We need to set num_buffers before dropping chains from `self.iovec`. Otherwise // when we set headers, we will iterate over new, yet unused chains instead of the ones // we need. self.header_set_num_buffers(used_heads); for _ in 0..used_heads { let parsed_dc = self .parsed_descriptors .pop_front() .expect("This should never happen if write to the buffer succeeded."); self.iovec.drop_chain_front(&parsed_dc); } } /// Write the number of descriptors used in VirtIO header fn header_set_num_buffers(&mut self, nr_descs: u16) { // We can unwrap here, because we have checked before that the `IoVecBufferMut` holds at // least one buffer with the proper size, depending on the feature negotiation. In any // case, the buffer holds memory of at least `std::mem::size_of::()` // bytes. self.iovec .write_all_volatile_at( &nr_descs.to_le_bytes(), std::mem::offset_of!(virtio_net_hdr_v1, num_buffers), ) .unwrap() } /// This will let the guest know that about all the `DescriptorChain` object that has been /// used to receive a frame from the TAP. fn finish_frame(&mut self, rx_queue: &mut Queue) { rx_queue.advance_next_used(self.used_descriptors); self.used_descriptors = 0; self.used_bytes = 0; } /// Return a slice of iovecs for the first slice in the buffer. /// Panics if there are no parsed descriptors. fn single_chain_slice_mut(&mut self) -> &mut [iovec] { let nr_iovecs = self.parsed_descriptors[0].nr_iovecs as usize; &mut self.iovec.as_iovec_mut_slice()[..nr_iovecs] } /// Return a slice of iovecs for all descriptor chains in the buffer. fn all_chains_slice_mut(&mut self) -> &mut [iovec] { self.iovec.as_iovec_mut_slice() } } /// VirtIO network device. /// /// It emulates a network device able to exchange L2 frames between the guest /// and a host-side tap device. #[derive(Debug)] pub struct Net { pub(crate) id: String, /// The backend for this device: a tap. pub tap: Tap, pub(crate) avail_features: u64, pub(crate) acked_features: u64, pub(crate) queues: Vec, pub(crate) queue_evts: Vec, pub(crate) rx_rate_limiter: RateLimiter, pub(crate) tx_rate_limiter: RateLimiter, rx_frame_buf: [u8; MAX_BUFFER_SIZE], tx_frame_headers: [u8; frame_hdr_len()], pub(crate) config_space: ConfigSpace, pub(crate) guest_mac: Option, pub(crate) device_state: DeviceState, pub(crate) activate_evt: EventFd, /// The MMDS stack corresponding to this interface. /// Only if MMDS transport has been associated with it. pub mmds_ns: Option, pub(crate) metrics: Arc, tx_buffer: IoVecBuffer, pub(crate) rx_buffer: RxBuffers, } impl Net { /// Create a new virtio network device with the given TAP interface. pub fn new_with_tap( id: String, tap: Tap, guest_mac: Option, rx_rate_limiter: RateLimiter, tx_rate_limiter: RateLimiter, ) -> Result { let mut avail_features = (1 << VIRTIO_NET_F_GUEST_CSUM) | (1 << VIRTIO_NET_F_CSUM) | (1 << VIRTIO_NET_F_GUEST_TSO4) | (1 << VIRTIO_NET_F_GUEST_TSO6) | (1 << VIRTIO_NET_F_GUEST_UFO) | (1 << VIRTIO_NET_F_HOST_TSO4) | (1 << VIRTIO_NET_F_HOST_TSO6) | (1 << VIRTIO_NET_F_HOST_UFO) | (1 << VIRTIO_F_VERSION_1) | (1 << VIRTIO_NET_F_MRG_RXBUF) | (1 << VIRTIO_RING_F_EVENT_IDX); let mut config_space = ConfigSpace::default(); if let Some(mac) = guest_mac { config_space.guest_mac = mac; // Enabling feature for MAC address configuration // If not set, the driver will generates a random MAC address avail_features |= 1 << VIRTIO_NET_F_MAC; } let mut queue_evts = Vec::new(); let mut queues = Vec::new(); for size in NET_QUEUE_SIZES { queue_evts.push(EventFd::new(libc::EFD_NONBLOCK).map_err(NetError::EventFd)?); queues.push(Queue::new(size)); } Ok(Net { id: id.clone(), tap, avail_features, acked_features: 0u64, queues, queue_evts, rx_rate_limiter, tx_rate_limiter, rx_frame_buf: [0u8; MAX_BUFFER_SIZE], tx_frame_headers: [0u8; frame_hdr_len()], config_space, guest_mac, device_state: DeviceState::Inactive, activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(NetError::EventFd)?, mmds_ns: None, metrics: NetMetricsPerDevice::alloc(id), tx_buffer: Default::default(), rx_buffer: RxBuffers::new()?, }) } /// Create a new virtio network device given the interface name. pub fn new( id: String, tap_if_name: &str, guest_mac: Option, rx_rate_limiter: RateLimiter, tx_rate_limiter: RateLimiter, ) -> Result { let tap = Tap::open_named(tap_if_name).map_err(NetError::TapOpen)?; let vnet_hdr_size = i32::try_from(vnet_hdr_len()).unwrap(); tap.set_vnet_hdr_size(vnet_hdr_size) .map_err(NetError::TapSetVnetHdrSize)?; Self::new_with_tap(id, tap, guest_mac, rx_rate_limiter, tx_rate_limiter) } /// Provides the MAC of this net device. pub fn guest_mac(&self) -> Option<&MacAddr> { self.guest_mac.as_ref() } /// Provides the host IFACE name of this net device. pub fn iface_name(&self) -> String { self.tap.if_name_as_str().to_string() } /// Provides the MmdsNetworkStack of this net device. pub fn mmds_ns(&self) -> Option<&MmdsNetworkStack> { self.mmds_ns.as_ref() } /// Configures the `MmdsNetworkStack` to allow device to forward MMDS requests. /// If the device already supports MMDS, updates the IPv4 address. pub fn configure_mmds_network_stack(&mut self, ipv4_addr: Ipv4Addr, mmds: Arc>) { if let Some(mmds_ns) = self.mmds_ns.as_mut() { mmds_ns.set_ipv4_addr(ipv4_addr); } else { self.mmds_ns = Some(MmdsNetworkStack::new_with_defaults(Some(ipv4_addr), mmds)) } } /// Disables the `MmdsNetworkStack` to prevent device to forward MMDS requests. pub fn disable_mmds_network_stack(&mut self) { self.mmds_ns = None } /// Provides a reference to the configured RX rate limiter. pub fn rx_rate_limiter(&self) -> &RateLimiter { &self.rx_rate_limiter } /// Provides a reference to the configured TX rate limiter. pub fn tx_rate_limiter(&self) -> &RateLimiter { &self.tx_rate_limiter } /// Trigger queue notification for the guest if we used enough descriptors /// for the notification to be enabled. /// https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-320005 /// 2.6.7.1 Driver Requirements: Used Buffer Notification Suppression fn try_signal_queue(&mut self, queue_type: NetQueue) -> Result<(), DeviceError> { let qidx = match queue_type { NetQueue::Rx => RX_INDEX, NetQueue::Tx => TX_INDEX, }; self.queues[qidx].advance_used_ring_idx(); if self.queues[qidx].prepare_kick() { self.interrupt_trigger() .trigger(VirtioInterruptType::Queue(qidx.try_into().unwrap())) .map_err(|err| { self.metrics.event_fails.inc(); DeviceError::FailedSignalingIrq(err) })?; } Ok(()) } // Helper function to consume one op with `size` bytes from a rate limiter fn rate_limiter_consume_op(rate_limiter: &mut RateLimiter, size: u64) -> bool { if !rate_limiter.consume(1, TokenType::Ops) { return false; } if !rate_limiter.consume(size, TokenType::Bytes) { rate_limiter.manual_replenish(1, TokenType::Ops); return false; } true } // Helper function to replenish one operation with `size` bytes from a rate limiter fn rate_limiter_replenish_op(rate_limiter: &mut RateLimiter, size: u64) { rate_limiter.manual_replenish(1, TokenType::Ops); rate_limiter.manual_replenish(size, TokenType::Bytes); } // Attempts to copy a single frame into the guest if there is enough // rate limiting budget. // Returns true on successful frame delivery. pub fn rate_limited_rx_single_frame(&mut self, frame_size: u32) -> bool { let rx_queue = &mut self.queues[RX_INDEX]; if !Self::rate_limiter_consume_op(&mut self.rx_rate_limiter, frame_size as u64) { self.metrics.rx_rate_limiter_throttled.inc(); return false; } self.rx_buffer.finish_frame(rx_queue); true } /// Returns the minimum size of buffer we expect the guest to provide us depending on the /// features we have negotiated with it fn minimum_rx_buffer_size(&self) -> u32 { if !self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) { if self.has_feature(VIRTIO_NET_F_GUEST_TSO4 as u64) || self.has_feature(VIRTIO_NET_F_GUEST_TSO6 as u64) || self.has_feature(VIRTIO_NET_F_GUEST_UFO as u64) { MAX_BUFFER_SIZE.try_into().unwrap() } else { 1526 } } else { vnet_hdr_len().try_into().unwrap() } } /// Parse available RX `DescriptorChains` from the queue pub fn parse_rx_descriptors(&mut self) -> Result<(), InvalidAvailIdx> { // This is safe since we checked in the event handler that the device is activated. let mem = &self.device_state.active_state().unwrap().mem; let queue = &mut self.queues[RX_INDEX]; while let Some(head) = queue.pop_or_enable_notification()? { let index = head.index; // SAFETY: we are only using this `DescriptorChain` here. if let Err(err) = unsafe { self.rx_buffer.add_buffer(mem, head) } { self.metrics.rx_fails.inc(); // If guest uses dirty tricks to make us add more descriptors than // we can hold, just stop processing. if matches!(err, AddRxBufferError::Parsing(IoVecError::IovDequeOverflow)) { error!("net: Could not add an RX descriptor: {err}"); queue.undo_pop(); break; } error!("net: Could not parse an RX descriptor: {err}"); // Add this broken chain to the used_ring. It will be // reported to the quest on the next `rx_buffer.finish_frame` call. // SAFETY: // index is verified on `DescriptorChain` creation. queue .write_used_element(self.rx_buffer.used_descriptors, index, 0) .unwrap(); self.rx_buffer.used_descriptors += 1; } } Ok(()) } // Tries to detour the frame to MMDS and if MMDS doesn't accept it, sends it on the host TAP. // // Returns whether MMDS consumed the frame. fn write_to_mmds_or_tap( mmds_ns: Option<&mut MmdsNetworkStack>, rate_limiter: &mut RateLimiter, headers: &mut [u8], frame_iovec: &IoVecBuffer, tap: &mut Tap, guest_mac: Option, net_metrics: &NetDeviceMetrics, ) -> Result { // Read the frame headers from the IoVecBuffer let max_header_len = headers.len(); let header_len = frame_iovec .read_volatile_at(&mut &mut *headers, 0, max_header_len) .map_err(|err| { error!("Received malformed TX buffer: {:?}", err); net_metrics.tx_malformed_frames.inc(); NetError::VnetHeaderMissing })?; let headers = frame_bytes_from_buf(&headers[..header_len]).inspect_err(|_| { error!("VNET headers missing in TX frame"); net_metrics.tx_malformed_frames.inc(); })?; if let Some(ns) = mmds_ns && ns.is_mmds_frame(headers) { let mut frame = vec![0u8; frame_iovec.len() as usize - vnet_hdr_len()]; // Ok to unwrap here, because we are passing a buffer that has the exact size // of the `IoVecBuffer` minus the VNET headers. frame_iovec .read_exact_volatile_at(&mut frame, vnet_hdr_len()) .unwrap(); let _ = ns.detour_frame(&frame); METRICS.mmds.rx_accepted.inc(); // MMDS frames are not accounted by the rate limiter. Self::rate_limiter_replenish_op(rate_limiter, u64::from(frame_iovec.len())); // MMDS consumed the frame. return Ok(true); } // This frame goes to the TAP. // Check for guest MAC spoofing. if let Some(guest_mac) = guest_mac { let _ = EthernetFrame::from_bytes(headers).map(|eth_frame| { if guest_mac != eth_frame.src_mac() { net_metrics.tx_spoofed_mac_count.inc(); } }); } let _metric = net_metrics.tap_write_agg.record_latency_metrics(); match Self::write_tap(tap, frame_iovec) { Ok(_) => { let len = u64::from(frame_iovec.len()); net_metrics.tx_bytes_count.add(len); net_metrics.tx_packets_count.inc(); net_metrics.tx_count.inc(); } Err(err) => { error!("Failed to write to tap: {:?}", err); net_metrics.tap_write_fails.inc(); } }; Ok(false) } // We currently prioritize packets from the MMDS over regular network packets. fn read_from_mmds_or_tap(&mut self) -> Result, NetError> { // We only want to read from TAP (or mmds) if we have at least 64K of available capacity as // this is the max size of 1 packet. // SAFETY: // * MAX_BUFFER_SIZE is constant and fits into u32 #[allow(clippy::cast_possible_truncation)] if self.rx_buffer.capacity() < MAX_BUFFER_SIZE as u32 { self.parse_rx_descriptors()?; // If after parsing the RX queue we still don't have enough capacity, stop processing RX // frames. if self.rx_buffer.capacity() < MAX_BUFFER_SIZE as u32 { return Ok(None); } } if let Some(ns) = self.mmds_ns.as_mut() && let Some(len) = ns.write_next_frame(frame_bytes_from_buf_mut(&mut self.rx_frame_buf)?) { let len = len.get(); METRICS.mmds.tx_frames.inc(); METRICS.mmds.tx_bytes.add(len as u64); init_vnet_hdr(&mut self.rx_frame_buf); self.rx_buffer .iovec .write_all_volatile_at(&self.rx_frame_buf[..vnet_hdr_len() + len], 0)?; // SAFETY: // * len will never be bigger that u32::MAX because mmds is bound // by the size of `self.rx_frame_buf` which is MAX_BUFFER_SIZE size. let len: u32 = (vnet_hdr_len() + len).try_into().unwrap(); // SAFETY: // * We checked that `rx_buffer` includes at least one `DescriptorChain` // * `rx_frame_buf` has size of `MAX_BUFFER_SIZE` and all `DescriptorChain` objects are // at least that big. unsafe { self.rx_buffer.mark_used(len, &mut self.queues[RX_INDEX]); } return Ok(Some(len)); } // SAFETY: // * We ensured that `self.rx_buffer` has at least one DescriptorChain parsed in it. let len = unsafe { self.read_tap().map_err(NetError::IO) }?; // SAFETY: // * len will never be bigger that u32::MAX let len: u32 = len.try_into().unwrap(); // SAFETY: // * `rx_buffer` has at least one `DescriptorChain` // * `read_tap` passes the first `DescriptorChain` to `readv` so we can't have read more // bytes than its capacity. unsafe { self.rx_buffer.mark_used(len, &mut self.queues[RX_INDEX]); } Ok(Some(len)) } /// Read as many frames as possible. fn process_rx(&mut self) -> Result<(), DeviceError> { loop { match self.read_from_mmds_or_tap() { Ok(None) => { self.metrics.no_rx_avail_buffer.inc(); break; } Ok(Some(bytes)) => { self.metrics.rx_count.inc(); self.metrics.rx_bytes_count.add(bytes as u64); self.metrics.rx_packets_count.inc(); if !self.rate_limited_rx_single_frame(bytes) { break; } } Err(NetError::IO(err)) => { // The tap device is non-blocking, so any error aside from EAGAIN is // unexpected. match err.raw_os_error() { Some(err) if err == EAGAIN => (), _ => { error!("Failed to read tap: {:?}", err); self.metrics.tap_read_fails.inc(); return Err(DeviceError::FailedReadTap); } }; break; } Err(NetError::InvalidAvailIdx(err)) => { return Err(DeviceError::InvalidAvailIdx(err)); } Err(err) => { error!("Spurious error in network RX: {:?}", err); } } } self.try_signal_queue(NetQueue::Rx) } fn resume_rx(&mut self) -> Result<(), DeviceError> { // First try to handle any deferred frame if self.rx_buffer.used_bytes != 0 { // If can't finish sending this frame, re-set it as deferred and return; we can't // process any more frames from the TAP. if !self.rate_limited_rx_single_frame(self.rx_buffer.used_bytes) { return Ok(()); } } self.process_rx() } fn process_tx(&mut self) -> Result<(), DeviceError> { // This is safe since we checked in the event handler that the device is activated. let mem = &self.device_state.active_state().unwrap().mem; // The MMDS network stack works like a state machine, based on synchronous calls, and // without being added to any event loop. If any frame is accepted by the MMDS, we also // trigger a process_rx() which checks if there are any new frames to be sent, starting // with the MMDS network stack. let mut process_rx_for_mmds = false; let mut used_any = false; let tx_queue = &mut self.queues[TX_INDEX]; while let Some(head) = tx_queue.pop_or_enable_notification()? { self.metrics .tx_remaining_reqs_count .add(tx_queue.len().into()); let head_index = head.index; // Parse IoVecBuffer from descriptor head // SAFETY: This descriptor chain is only loaded once // virtio requests are handled sequentially so no two IoVecBuffers // are live at the same time, meaning this has exclusive ownership over the memory if unsafe { self.tx_buffer.load_descriptor_chain(mem, head).is_err() } { self.metrics.tx_fails.inc(); tx_queue.add_used(head_index, 0)?; continue; }; // We only handle frames that are up to MAX_BUFFER_SIZE if self.tx_buffer.len() as usize > MAX_BUFFER_SIZE { error!("net: received too big frame from driver"); self.metrics.tx_malformed_frames.inc(); tx_queue.add_used(head_index, 0)?; continue; } if !Self::rate_limiter_consume_op( &mut self.tx_rate_limiter, u64::from(self.tx_buffer.len()), ) { tx_queue.undo_pop(); self.metrics.tx_rate_limiter_throttled.inc(); break; } let frame_consumed_by_mmds = Self::write_to_mmds_or_tap( self.mmds_ns.as_mut(), &mut self.tx_rate_limiter, &mut self.tx_frame_headers, &self.tx_buffer, &mut self.tap, self.guest_mac, &self.metrics, ) .unwrap_or(false); if frame_consumed_by_mmds && self.rx_buffer.used_bytes == 0 { // MMDS consumed this frame/request, let's also try to process the response. process_rx_for_mmds = true; } tx_queue.add_used(head_index, 0)?; used_any = true; } if !used_any { self.metrics.no_tx_avail_buffer.inc(); } // Cleanup tx_buffer to ensure no two buffers point at the same memory self.tx_buffer.clear(); self.try_signal_queue(NetQueue::Tx)?; // An incoming frame for the MMDS may trigger the transmission of a new message. if process_rx_for_mmds { self.process_rx() } else { Ok(()) } } /// Builds the offload features we will setup on the TAP device based on the features that the /// guest supports. pub fn build_tap_offload_features(guest_supported_features: u64) -> u32 { let add_if_supported = |tap_features: &mut u32, supported_features: u64, tap_flag: u32, virtio_flag: u32| { if supported_features & (1 << virtio_flag) != 0 { *tap_features |= tap_flag; } }; let mut tap_features: u32 = 0; add_if_supported( &mut tap_features, guest_supported_features, generated::TUN_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, ); add_if_supported( &mut tap_features, guest_supported_features, generated::TUN_F_UFO, VIRTIO_NET_F_GUEST_UFO, ); add_if_supported( &mut tap_features, guest_supported_features, generated::TUN_F_TSO4, VIRTIO_NET_F_GUEST_TSO4, ); add_if_supported( &mut tap_features, guest_supported_features, generated::TUN_F_TSO6, VIRTIO_NET_F_GUEST_TSO6, ); tap_features } /// Updates the parameters for the rate limiters pub fn patch_rate_limiters( &mut self, rx_bytes: BucketUpdate, rx_ops: BucketUpdate, tx_bytes: BucketUpdate, tx_ops: BucketUpdate, ) { self.rx_rate_limiter.update_buckets(rx_bytes, rx_ops); self.tx_rate_limiter.update_buckets(tx_bytes, tx_ops); } /// Reads a frame from the TAP device inside the first descriptor held by `self.rx_buffer`. /// /// # Safety /// /// `self.rx_buffer` needs to have at least one descriptor chain parsed pub unsafe fn read_tap(&mut self) -> std::io::Result { let slice = if self.has_feature(VIRTIO_NET_F_MRG_RXBUF as u64) { self.rx_buffer.all_chains_slice_mut() } else { self.rx_buffer.single_chain_slice_mut() }; self.tap.read_iovec(slice) } fn write_tap(tap: &mut Tap, buf: &IoVecBuffer) -> std::io::Result { tap.write_iovec(buf) } /// Process a single RX queue event. /// /// This is called by the event manager responding to the guest adding a new /// buffer in the RX queue. pub fn process_rx_queue_event(&mut self) { self.metrics.rx_queue_event_count.inc(); if let Err(err) = self.queue_evts[RX_INDEX].read() { // rate limiters present but with _very high_ allowed rate error!("Failed to get rx queue event: {:?}", err); self.metrics.event_fails.inc(); return; } else { self.parse_rx_descriptors().unwrap(); } if self.rx_rate_limiter.is_blocked() { self.metrics.rx_rate_limiter_throttled.inc(); } else { // If the limiter is not blocked, resume the receiving of bytes. self.resume_rx() .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } } pub fn process_tap_rx_event(&mut self) { // This is safe since we checked in the event handler that the device is activated. self.metrics.rx_tap_event_count.inc(); // While limiter is blocked, don't process any more incoming. if self.rx_rate_limiter.is_blocked() { self.metrics.rx_rate_limiter_throttled.inc(); return; } self.resume_rx() .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } /// Process a single TX queue event. /// /// This is called by the event manager responding to the guest adding a new /// buffer in the TX queue. pub fn process_tx_queue_event(&mut self) { self.metrics.tx_queue_event_count.inc(); if let Err(err) = self.queue_evts[TX_INDEX].read() { error!("Failed to get tx queue event: {:?}", err); self.metrics.event_fails.inc(); } else if !self.tx_rate_limiter.is_blocked() // If the limiter is not blocked, continue transmitting bytes. { self.process_tx() .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } else { self.metrics.tx_rate_limiter_throttled.inc(); } } pub fn process_rx_rate_limiter_event(&mut self) { self.metrics.rx_event_rate_limiter_count.inc(); // Upon rate limiter event, call the rate limiter handler // and restart processing the queue. match self.rx_rate_limiter.event_handler() { Ok(_) => { // There might be enough budget now to receive the frame. self.resume_rx() .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } Err(err) => { error!("Failed to get rx rate-limiter event: {:?}", err); self.metrics.event_fails.inc(); } } } pub fn process_tx_rate_limiter_event(&mut self) { self.metrics.tx_rate_limiter_event_count.inc(); // Upon rate limiter event, call the rate limiter handler // and restart processing the queue. match self.tx_rate_limiter.event_handler() { Ok(_) => { // There might be enough budget now to send the frame. self.process_tx() .unwrap_or_else(|err| report_net_event_fail(&self.metrics, err)); } Err(err) => { error!("Failed to get tx rate-limiter event: {:?}", err); self.metrics.event_fails.inc(); } } } /// Process device virtio queue(s). pub fn process_virtio_queues(&mut self) -> Result<(), InvalidAvailIdx> { if let Err(DeviceError::InvalidAvailIdx(err)) = self.resume_rx() { return Err(err); } if let Err(DeviceError::InvalidAvailIdx(err)) = self.process_tx() { return Err(err); } Ok(()) } } impl VirtioDevice for Net { impl_device_type!(VirtioDeviceType::Net); fn id(&self) -> &str { &self.id } fn avail_features(&self) -> u64 { self.avail_features } fn acked_features(&self) -> u64 { self.acked_features } fn set_acked_features(&mut self, acked_features: u64) { self.acked_features = acked_features; } fn queues(&self) -> &[Queue] { &self.queues } fn queues_mut(&mut self) -> &mut [Queue] { &mut self.queues } fn queue_events(&self) -> &[EventFd] { &self.queue_evts } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { self.device_state .active_state() .expect("Device is not implemented") .interrupt .deref() } fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); data[..len].copy_from_slice(&config_space_bytes[..len]); } else { error!("Failed to read config space"); self.metrics.cfg_fails.inc(); } } fn write_config(&mut self, offset: u64, data: &[u8]) { let config_space_bytes = self.config_space.as_mut_slice(); let start = usize::try_from(offset).ok(); let end = start.and_then(|s| s.checked_add(data.len())); let Some(dst) = start .zip(end) .and_then(|(start, end)| config_space_bytes.get_mut(start..end)) else { error!("Failed to write config space"); self.metrics.cfg_fails.inc(); return; }; dst.copy_from_slice(data); self.guest_mac = Some(self.config_space.guest_mac); self.metrics.mac_address_updates.inc(); } fn activate( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError> { for q in self.queues.iter_mut() { q.initialize(&mem) .map_err(ActivateError::QueueMemoryError)?; } let event_idx = self.has_feature(u64::from(VIRTIO_RING_F_EVENT_IDX)); if event_idx { for queue in &mut self.queues { queue.enable_notif_suppression(); } } let supported_flags: u32 = Net::build_tap_offload_features(self.acked_features); self.tap .set_offload(supported_flags) .map_err(super::super::ActivateError::TapSetOffload)?; self.rx_buffer.min_buffer_size = self.minimum_rx_buffer_size(); if self.activate_evt.write(1).is_err() { self.metrics.activate_fails.inc(); return Err(ActivateError::EventFd); } self.device_state = DeviceState::Activated(ActiveState { mem, interrupt }); Ok(()) } fn is_activated(&self) -> bool { self.device_state.is_activated() } /// Prepare saving state fn prepare_save(&mut self) { // We shouldn't be messing with the queue if the device is not activated. // Anyways, if it isn't there's nothing to prepare; we haven't parsed any // descriptors yet from it and we can't have a deferred frame. if !self.is_activated() { return; } // Give potential deferred RX frame to guest self.rx_buffer.finish_frame(&mut self.queues[RX_INDEX]); // Reset the parsed available descriptors, so we will re-parse them self.queues[RX_INDEX].next_avail -= Wrapping(u16::try_from(self.rx_buffer.parsed_descriptors.len()).unwrap()); self.rx_buffer.parsed_descriptors.clear(); self.rx_buffer.iovec.clear(); self.rx_buffer.used_bytes = 0; self.rx_buffer.used_descriptors = 0; } } #[cfg(test)] #[macro_use] #[allow(clippy::cast_possible_truncation)] pub mod tests { use std::net::Ipv4Addr; use std::os::fd::AsRawFd; use std::str::FromStr; use std::time::Duration; use std::{mem, thread}; use vm_memory::GuestAddress; use super::*; use crate::check_metric_after_block; use crate::devices::virtio::generated::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use crate::devices::virtio::iovec::IoVecBuffer; use crate::devices::virtio::net::NET_QUEUE_SIZES; use crate::devices::virtio::net::device::{ frame_bytes_from_buf, frame_bytes_from_buf_mut, frame_hdr_len, init_vnet_hdr, vnet_hdr_len, }; use crate::devices::virtio::net::test_utils::test::TestHelper; use crate::devices::virtio::net::test_utils::{ NetEvent, NetQueue, TapTrafficSimulator, default_net, if_index, inject_tap_tx_frame, set_mac, }; use crate::devices::virtio::queue::VIRTQ_DESC_F_WRITE; use crate::devices::virtio::test_utils::VirtQueue; use crate::dumbo::EthernetFrame; use crate::dumbo::pdu::arp::{ETH_IPV4_FRAME_LEN, EthIPv4ArpFrame}; use crate::dumbo::pdu::ethernet::ETHERTYPE_ARP; use crate::logger::IncMetric; use crate::rate_limiter::{BucketUpdate, RateLimiter, TokenBucket, TokenType}; use crate::test_utils::single_region_mem; use crate::utils::net::mac::{MAC_ADDR_LEN, MacAddr}; use crate::vstate::memory::{Address, GuestMemory}; impl Net { pub fn finish_frame(&mut self) { self.rx_buffer.finish_frame(&mut self.queues[RX_INDEX]); self.queues[RX_INDEX].advance_used_ring_idx(); } } /// Write the number of descriptors used in VirtIO header fn header_set_num_buffers(frame: &mut [u8], nr_descs: u16) { let bytes = nr_descs.to_le_bytes(); let offset = std::mem::offset_of!(virtio_net_hdr_v1, num_buffers); frame[offset] = bytes[0]; frame[offset + 1] = bytes[1]; } #[test] fn test_vnet_helpers() { let mut frame_buf = vec![42u8; vnet_hdr_len() - 1]; assert_eq!( format!("{:?}", frame_bytes_from_buf_mut(&mut frame_buf)), "Err(VnetHeaderMissing)" ); let mut frame_buf: [u8; MAX_BUFFER_SIZE] = [42u8; MAX_BUFFER_SIZE]; let vnet_hdr_len_ = mem::size_of::(); assert_eq!(vnet_hdr_len_, vnet_hdr_len()); init_vnet_hdr(&mut frame_buf); let zero_vnet_hdr = vec![0u8; vnet_hdr_len_]; assert_eq!(zero_vnet_hdr, &frame_buf[..vnet_hdr_len_]); let payload = vec![42u8; MAX_BUFFER_SIZE - vnet_hdr_len_]; assert_eq!(payload, frame_bytes_from_buf(&frame_buf).unwrap()); { let payload = frame_bytes_from_buf_mut(&mut frame_buf).unwrap(); payload[0] = 15; } assert_eq!(frame_buf[vnet_hdr_len_], 15); } #[test] fn test_virtio_device_type() { let mut net = default_net(); set_mac(&mut net, MacAddr::from_str("11:22:33:44:55:66").unwrap()); assert_eq!(net.device_type(), VirtioDeviceType::Net); } #[test] // Test that `Net::build_tap_offload_features` creates the TAP offload features that we expect // it to do, based on the available guest features fn test_build_tap_offload_features_all() { let supported_features = (1 << VIRTIO_NET_F_GUEST_CSUM) | (1 << VIRTIO_NET_F_GUEST_UFO) | (1 << VIRTIO_NET_F_GUEST_TSO4) | (1 << VIRTIO_NET_F_GUEST_TSO6); let expected_tap_features = generated::TUN_F_CSUM | generated::TUN_F_UFO | generated::TUN_F_TSO4 | generated::TUN_F_TSO6; let supported_flags = Net::build_tap_offload_features(supported_features); assert_eq!(supported_flags, expected_tap_features); } #[test] // Same as before, however, using each supported feature one by one. fn test_build_tap_offload_features_one_by_one() { let features = [ (1 << VIRTIO_NET_F_GUEST_CSUM, generated::TUN_F_CSUM), (1 << VIRTIO_NET_F_GUEST_UFO, generated::TUN_F_UFO), (1 << VIRTIO_NET_F_GUEST_TSO4, generated::TUN_F_TSO4), ]; for (virtio_flag, tap_flag) in features { let supported_flags = Net::build_tap_offload_features(virtio_flag); assert_eq!(supported_flags, tap_flag); } } #[test] fn test_virtio_device_read_config() { let mut net = default_net(); set_mac(&mut net, MacAddr::from_str("11:22:33:44:55:66").unwrap()); // Test `read_config()`. This also validates the MAC was properly configured. let mac = MacAddr::from_str("11:22:33:44:55:66").unwrap(); let mut config_mac = [0u8; MAC_ADDR_LEN as usize]; net.read_config(0, &mut config_mac); assert_eq!(&config_mac, mac.get_bytes()); // Invalid read. config_mac = [0u8; MAC_ADDR_LEN as usize]; net.read_config(u64::from(MAC_ADDR_LEN), &mut config_mac); assert_eq!(config_mac, [0u8, 0u8, 0u8, 0u8, 0u8, 0u8]); } #[test] fn test_virtio_device_rewrite_config() { let mut net = default_net(); set_mac(&mut net, MacAddr::from_str("11:22:33:44:55:66").unwrap()); let new_config: [u8; MAC_ADDR_LEN as usize] = [0x66, 0x55, 0x44, 0x33, 0x22, 0x11]; net.write_config(0, &new_config); let mut new_config_read = [0u8; MAC_ADDR_LEN as usize]; net.read_config(0, &mut new_config_read); assert_eq!(new_config, new_config_read); // Check that the guest MAC was updated. let expected_guest_mac = MacAddr::from_bytes_unchecked(&new_config); assert_eq!(expected_guest_mac, net.guest_mac.unwrap()); assert_eq!(net.metrics.mac_address_updates.count(), 1); // Partial write (this is how the kernel sets a new mac address) - byte by byte. let new_config = [0x11, 0x22, 0x33, 0x44, 0x55, 0x66]; for i in 0..new_config.len() { net.write_config(i as u64, &new_config[i..=i]); } net.read_config(0, &mut new_config_read); assert_eq!(new_config, new_config_read); // Invalid write. net.write_config(5, &new_config); // Verify old config was untouched. new_config_read = [0u8; MAC_ADDR_LEN as usize]; net.read_config(0, &mut new_config_read); assert_eq!(new_config, new_config_read); // Large offset that may cause an overflow. net.write_config(u64::MAX, &new_config); // Verify old config was untouched. new_config_read = [0u8; MAC_ADDR_LEN as usize]; net.read_config(0, &mut new_config_read); assert_eq!(new_config, new_config_read); } #[test] fn test_rx_missing_queue_signal() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); th.add_desc_chain(NetQueue::Rx, 0, &[(0, 4096, VIRTQ_DESC_F_WRITE)]); th.net().queue_evts[RX_INDEX].read().unwrap(); check_metric_after_block!( th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::RxQueue) ); // Check that the used queue didn't advance. assert_eq!(th.rxq.used.idx.get(), 0); } fn rx_read_only_descriptor(mut th: TestHelper) { th.activate_net(); th.add_desc_chain( NetQueue::Rx, 0, &[ (0, 100, VIRTQ_DESC_F_WRITE), (1, 100, 0), (2, 1000, VIRTQ_DESC_F_WRITE), ], ); let mut frame = inject_tap_tx_frame(&th.net(), 1000); check_metric_after_block!( th.net().metrics.rx_fails, 1, th.event_manager.run_with_timeout(100).unwrap() ); th.rxq.check_used_elem(0, 0, 0); header_set_num_buffers(frame.as_mut_slice(), 1); th.check_rx_queue_resume(&frame); } #[test] fn test_rx_read_only_descriptor() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let th = TestHelper::get_default(&mem); rx_read_only_descriptor(th); } #[test] fn test_rx_read_only_descriptor_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); // VIRTIO_NET_F_MRG_RXBUF is not enabled by default th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; rx_read_only_descriptor(th); } fn rx_short_descriptor(mut th: TestHelper) { th.activate_net(); th.add_desc_chain(NetQueue::Rx, 0, &[(0, 10, VIRTQ_DESC_F_WRITE)]); let mut frame = th.check_rx_discarded_buffer(1000); th.rxq.check_used_elem(0, 0, 0); header_set_num_buffers(frame.as_mut_slice(), 1); th.check_rx_queue_resume(&frame); } #[test] fn test_rx_short_descriptor() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let th = TestHelper::get_default(&mem); rx_short_descriptor(th); } #[test] fn test_rx_short_descriptor_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); // VIRTIO_NET_F_MRG_RXBUF is not enabled by default th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; rx_short_descriptor(th); } fn rx_invalid_descriptor(mut th: TestHelper) { th.activate_net(); // The descriptor chain is created so that the last descriptor doesn't fit in the // guest memory. let offset = th.mem.last_addr().raw_value() - th.data_addr() - 300; th.add_desc_chain( NetQueue::Rx, offset, &[ (0, 100, VIRTQ_DESC_F_WRITE), (1, 50, VIRTQ_DESC_F_WRITE), (2, 4096, VIRTQ_DESC_F_WRITE), ], ); let mut frame = th.check_rx_discarded_buffer(1000); th.rxq.check_used_elem(0, 0, 0); header_set_num_buffers(frame.as_mut_slice(), 1); th.check_rx_queue_resume(&frame); } #[test] fn test_rx_invalid_descriptor() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let th = TestHelper::get_default(&mem); rx_invalid_descriptor(th); } #[test] fn test_rx_invalid_descriptor_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); // VIRTIO_NET_F_MRG_RXBUF is not enabled by default th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; rx_invalid_descriptor(th); } fn rx_retry(mut th: TestHelper) { th.activate_net(); // Add invalid descriptor chain - read only descriptor. th.add_desc_chain( NetQueue::Rx, 0, &[ (0, 100, VIRTQ_DESC_F_WRITE), (1, 100, 0), (2, 1000, VIRTQ_DESC_F_WRITE), ], ); // Add invalid descriptor chain - too short. th.add_desc_chain(NetQueue::Rx, 1200, &[(3, 10, VIRTQ_DESC_F_WRITE)]); // Add invalid descriptor chain - invalid memory offset. th.add_desc_chain( NetQueue::Rx, th.mem.last_addr().raw_value(), &[(4, 1000, VIRTQ_DESC_F_WRITE)], ); // Add valid descriptor chain. TestHelper does not negotiate any feature offloading so the // buffers need to be at least 1526 bytes long. th.add_desc_chain( NetQueue::Rx, 1300, &[(5, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], ); // Inject frame to tap and run epoll. let mut frame = inject_tap_tx_frame(&th.net(), 1000); check_metric_after_block!( th.net().metrics.rx_packets_count, 1, th.event_manager.run_with_timeout(100).unwrap() ); // Check that the used queue has advanced. assert_eq!(th.rxq.used.idx.get(), 4); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); // Check that the invalid descriptor chains have been discarded th.rxq.check_used_elem(0, 0, 0); th.rxq.check_used_elem(1, 3, 0); th.rxq.check_used_elem(2, 4, 0); // Check that the frame wasn't deferred. assert!(th.net().rx_buffer.used_descriptors == 0); // Check that the frame has been written successfully to the valid Rx descriptor chain. th.rxq .check_used_elem(3, 5, frame.len().try_into().unwrap()); header_set_num_buffers(frame.as_mut_slice(), 1); th.rxq.dtable[5].check_data(&frame); } #[test] fn test_rx_retry() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let th = TestHelper::get_default(&mem); rx_retry(th); } #[test] fn test_rx_retry_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); // VIRTIO_NET_F_MRG_RXBUF is not enabled by default th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; rx_retry(th); } fn rx_complex_desc_chain(mut th: TestHelper) { th.activate_net(); // Create a valid Rx avail descriptor chain with multiple descriptors. th.add_desc_chain( NetQueue::Rx, 0, // Add gaps between the descriptor ids in order to ensure that we follow // the `next` field. &[ (3, 100, VIRTQ_DESC_F_WRITE), (5, 50, VIRTQ_DESC_F_WRITE), (11, MAX_BUFFER_SIZE as u32 - 100 - 50, VIRTQ_DESC_F_WRITE), ], ); // Inject frame to tap and run epoll. let mut frame = inject_tap_tx_frame(&th.net(), 1000); check_metric_after_block!( th.net().metrics.rx_packets_count, 1, th.event_manager.run_with_timeout(100).unwrap() ); // Check that the frame wasn't deferred. assert!(th.net().rx_buffer.used_descriptors == 0); // Check that the used queue has advanced. assert_eq!(th.rxq.used.idx.get(), 1); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); // Check that the frame has been written successfully to the Rx descriptor chain. header_set_num_buffers(frame.as_mut_slice(), 1); th.rxq .check_used_elem(0, 3, frame.len().try_into().unwrap()); th.rxq.dtable[3].check_data(&frame[..100]); th.rxq.dtable[5].check_data(&frame[100..150]); th.rxq.dtable[11].check_data(&frame[150..]); } #[test] fn test_rx_complex_desc_chain() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let th = TestHelper::get_default(&mem); rx_complex_desc_chain(th); } #[test] fn test_rx_complex_desc_chain_mrg() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); // VIRTIO_NET_F_MRG_RXBUF is not enabled by default th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; rx_complex_desc_chain(th); } fn rx_multiple_frames(mut th: TestHelper) { th.activate_net(); // Create 2 valid Rx avail descriptor chains. Each one has enough space to fit the // following 2 frames. But only 1 frame has to be written to each chain. th.add_desc_chain( NetQueue::Rx, 0, &[ (0, 500, VIRTQ_DESC_F_WRITE), (1, 500, VIRTQ_DESC_F_WRITE), (2, MAX_BUFFER_SIZE as u32 - 1000, VIRTQ_DESC_F_WRITE), ], ); // Second chain needs at least MAX_BUFFER_SIZE offset th.add_desc_chain( NetQueue::Rx, MAX_BUFFER_SIZE as u64 + 1000, &[ (3, 500, VIRTQ_DESC_F_WRITE), (4, 500, VIRTQ_DESC_F_WRITE), (5, MAX_BUFFER_SIZE as u32 - 1000, VIRTQ_DESC_F_WRITE), ], ); // Inject 2 frames to tap and run epoll. let mut frame_1 = inject_tap_tx_frame(&th.net(), 200); let mut frame_2 = inject_tap_tx_frame(&th.net(), 300); check_metric_after_block!( th.net().metrics.rx_packets_count, 2, th.event_manager.run_with_timeout(100).unwrap() ); // Check that the frames weren't deferred. assert!(th.net().rx_buffer.used_bytes == 0); // Check that the used queue has advanced. assert_eq!(th.rxq.used.idx.get(), 2); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); // Check that the 1st frame was written successfully to the 1st Rx descriptor chain. header_set_num_buffers(frame_1.as_mut_slice(), 1); th.rxq .check_used_elem(0, 0, frame_1.len().try_into().unwrap()); th.rxq.dtable[0].check_data(&frame_1); th.rxq.dtable[1].check_data(&[0; 500]); th.rxq.dtable[2].check_data(&[0; MAX_BUFFER_SIZE - 1000]); // Check that the 2nd frame was written successfully to the 2nd Rx descriptor chain. header_set_num_buffers(frame_2.as_mut_slice(), 1); th.rxq .check_used_elem(1, 3, frame_2.len().try_into().unwrap()); th.rxq.dtable[3].check_data(&frame_2); th.rxq.dtable[4].check_data(&[0; 500]); th.rxq.dtable[5].check_data(&[0; MAX_BUFFER_SIZE - 1000]); } #[test] fn test_rx_multiple_frames() { let mem = single_region_mem(3 * MAX_BUFFER_SIZE); let th = TestHelper::get_default(&mem); rx_multiple_frames(th); } #[test] fn test_rx_multiple_frames_mrg() { let mem = single_region_mem(3 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); // VIRTIO_NET_F_MRG_RXBUF is not enabled by default th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; rx_multiple_frames(th); } fn rx_mrg_rxbuf_only(mut th: TestHelper) { th.activate_net(); // Create 2 valid Rx avail descriptor chains. The total size should // be at least 64K to pass the capacity check for rx_buffers. // First chain is intentionally small, so non VIRTIO_NET_F_MRG_RXBUF // version will skip it. th.add_desc_chain(NetQueue::Rx, 0, &[(0, 500, VIRTQ_DESC_F_WRITE)]); th.add_desc_chain( NetQueue::Rx, 1000, &[(1, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], ); // Inject frame to tap and run epoll. let mut frame = inject_tap_tx_frame(&th.net(), 1000); check_metric_after_block!( th.net().metrics.rx_packets_count, 1, th.event_manager.run_with_timeout(100).unwrap() ); // Check that the frame wasn't deferred. assert!(th.net().rx_buffer.used_bytes == 0); // Check that the used queue has advanced. assert_eq!(th.rxq.used.idx.get(), 2); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); // 2 chains should be used for the packet. header_set_num_buffers(frame.as_mut_slice(), 2); // Here non VIRTIO_NET_F_MRG_RXBUF version should panic as // first descriptor will be discarded by it. th.rxq.check_used_elem(0, 0, 500); th.rxq.check_used_elem(1, 1, 500); th.rxq.dtable[0].check_data(&frame[0..500]); th.rxq.dtable[1].check_data(&frame[500..]); } #[test] #[should_panic] fn test_rx_mrg_rxbuf_only() { let mem = single_region_mem(3 * MAX_BUFFER_SIZE); let th = TestHelper::get_default(&mem); rx_mrg_rxbuf_only(th); } #[test] fn test_rx_mrg_rxbuf_only_mrg() { let mem = single_region_mem(3 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); // VIRTIO_NET_F_MRG_RXBUF is not enabled by default th.net().acked_features = 1 << VIRTIO_NET_F_MRG_RXBUF; rx_mrg_rxbuf_only(th); } #[test] fn test_tx_missing_queue_signal() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap)); th.add_desc_chain(NetQueue::Tx, 0, &[(0, 4096, 0)]); th.net().queue_evts[TX_INDEX].read().unwrap(); check_metric_after_block!( th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::TxQueue) ); // Check that the used queue didn't advance. assert_eq!(th.txq.used.idx.get(), 0); // Check that the frame wasn't sent to the tap. assert!(!tap_traffic_simulator.pop_rx_packet(&mut [0; 1000])); } #[test] fn test_tx_writeable_descriptor() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap)); let desc_list = [(0, 100, 0), (1, 100, VIRTQ_DESC_F_WRITE), (2, 500, 0)]; th.add_desc_chain(NetQueue::Tx, 0, &desc_list); th.write_tx_frame(&desc_list, 700); th.event_manager.run_with_timeout(100).unwrap(); // Check that the used queue advanced. assert_eq!(th.txq.used.idx.get(), 1); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16)) ); th.txq.check_used_elem(0, 0, 0); // Check that the frame was skipped. assert!(!tap_traffic_simulator.pop_rx_packet(&mut [])); } #[test] fn test_tx_short_frame() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap)); // Send an invalid frame (too small, VNET header missing). th.add_desc_chain(NetQueue::Tx, 0, &[(0, 1, 0)]); check_metric_after_block!( th.net().metrics.tx_malformed_frames, 1, th.event_manager.run_with_timeout(100) ); // Check that the used queue advanced. assert_eq!(th.txq.used.idx.get(), 1); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16)) ); th.txq.check_used_elem(0, 0, 0); // Check that the frame was skipped. assert!(!tap_traffic_simulator.pop_rx_packet(&mut [])); } #[test] fn test_tx_big_frame() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap)); // Send an invalid frame (too big, maximum buffer is MAX_BUFFER_SIZE). th.add_desc_chain( NetQueue::Tx, 0, &[(0, (MAX_BUFFER_SIZE + 1).try_into().unwrap(), 0)], ); check_metric_after_block!( th.net().metrics.tx_malformed_frames, 1, th.event_manager.run_with_timeout(100) ); // Check that the used queue advanced. assert_eq!(th.txq.used.idx.get(), 1); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16)) ); th.txq.check_used_elem(0, 0, 0); // Check that the frame was skipped. assert!(!tap_traffic_simulator.pop_rx_packet(&mut [])); } #[test] fn test_tx_empty_frame() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap)); // Send an invalid frame (too small, VNET header missing). th.add_desc_chain(NetQueue::Tx, 0, &[(0, 0, 0)]); check_metric_after_block!( th.net().metrics.tx_malformed_frames, 1, th.event_manager.run_with_timeout(100) ); // Check that the used queue advanced. assert_eq!(th.txq.used.idx.get(), 1); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16)) ); th.txq.check_used_elem(0, 0, 0); // Check that the frame was skipped. assert!(!tap_traffic_simulator.pop_rx_packet(&mut [])); } #[test] fn test_tx_retry() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap)); // Add invalid descriptor chain - writeable descriptor. th.add_desc_chain( NetQueue::Tx, 0, &[(0, 100, 0), (1, 100, VIRTQ_DESC_F_WRITE), (2, 500, 0)], ); // Add invalid descriptor chain - invalid memory. th.add_desc_chain(NetQueue::Tx, th.mem.last_addr().raw_value(), &[(3, 100, 0)]); // Add invalid descriptor chain - too short. th.add_desc_chain(NetQueue::Tx, 700, &[(0, 1, 0)]); // Add valid descriptor chain let desc_list = [(4, 1000, 0)]; th.add_desc_chain(NetQueue::Tx, 0, &desc_list); let frame = th.write_tx_frame(&desc_list, 1000); // One frame is valid, one will not be handled because it includes write-only memory // so that leaves us with 2 malformed (no vnet header) frames. check_metric_after_block!( th.net().metrics.tx_malformed_frames, 2, th.event_manager.run_with_timeout(100) ); // Check that the used queue advanced. assert_eq!(th.txq.used.idx.get(), 4); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16)) ); th.txq.check_used_elem(3, 4, 0); // Check that the valid frame was sent to the tap. let mut buf = vec![0; 1000]; assert!(tap_traffic_simulator.pop_rx_packet(&mut buf[vnet_hdr_len()..])); assert_eq!(&buf, &frame); // Check that no other frame was sent to the tap. assert!(!tap_traffic_simulator.pop_rx_packet(&mut [])); } #[test] fn test_tx_complex_descriptor() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap)); // Add gaps between the descriptor ids in order to ensure that we follow // the `next` field. let desc_list = [(3, 100, 0), (5, 50, 0), (11, 850, 0)]; th.add_desc_chain(NetQueue::Tx, 0, &desc_list); let frame = th.write_tx_frame(&desc_list, 1000); check_metric_after_block!( th.net().metrics.tx_packets_count, 1, th.event_manager.run_with_timeout(100).unwrap() ); // Check that the used queue advanced. assert_eq!(th.txq.used.idx.get(), 1); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16)) ); th.txq.check_used_elem(0, 3, 0); // Check that the frame was sent to the tap. let mut buf = vec![0; 1000]; assert!(tap_traffic_simulator.pop_rx_packet(&mut buf[vnet_hdr_len()..])); assert_eq!(&buf[..1000], &frame[..1000]); } #[test] fn test_tx_tap_failure() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); // force the next write to the tap to return an error by simply closing the fd // SAFETY: its a valid fd unsafe { libc::close(th.net.lock().unwrap().tap.as_raw_fd()) }; let desc_list = [(0, 1000, 0)]; th.add_desc_chain(NetQueue::Tx, 0, &desc_list); let _ = th.write_tx_frame(&desc_list, 1000); check_metric_after_block!( th.net().metrics.tap_write_fails, 1, th.event_manager.run_with_timeout(100).unwrap() ); // Check that the used queue advanced. assert_eq!(th.txq.used.idx.get(), 1); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16)) ); th.txq.check_used_elem(0, 0, 0); // dropping th would double close the tap fd, so leak it std::mem::forget(th); } #[test] fn test_tx_multiple_frame() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&th.net().tap)); // Write the first frame to the Tx queue let desc_list = [(0, 50, 0), (1, 100, 0), (2, 150, 0)]; th.add_desc_chain(NetQueue::Tx, 0, &desc_list); let frame_1 = th.write_tx_frame(&desc_list, 300); // Write the second frame to the Tx queue let desc_list = [(3, 100, 0), (4, 200, 0), (5, 300, 0)]; th.add_desc_chain(NetQueue::Tx, 500, &desc_list); let frame_2 = th.write_tx_frame(&desc_list, 600); check_metric_after_block!( th.net().metrics.tx_packets_count, 2, th.event_manager.run_with_timeout(100).unwrap() ); // Check that the used queue advanced. assert_eq!(th.txq.used.idx.get(), 2); assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16)) ); th.txq.check_used_elem(0, 0, 0); th.txq.check_used_elem(1, 3, 0); // Check that the first frame was sent to the tap. let mut buf = vec![0; 300]; assert!(tap_traffic_simulator.pop_rx_packet(&mut buf[vnet_hdr_len()..])); assert_eq!(&buf[..300], &frame_1[..300]); // Check that the second frame was sent to the tap. let mut buf = vec![0; 600]; assert!(tap_traffic_simulator.pop_rx_packet(&mut buf[vnet_hdr_len()..])); assert_eq!(&buf[..600], &frame_2[..600]); } fn create_arp_request( src_mac: MacAddr, src_ip: Ipv4Addr, dst_mac: MacAddr, dst_ip: Ipv4Addr, ) -> ([u8; MAX_BUFFER_SIZE], usize) { let mut frame_buf = [b'\0'; MAX_BUFFER_SIZE]; // Create an ethernet frame. let incomplete_frame = EthernetFrame::write_incomplete( frame_bytes_from_buf_mut(&mut frame_buf).unwrap(), dst_mac, src_mac, ETHERTYPE_ARP, ) .ok() .unwrap(); // Set its length to hold an ARP request. let mut frame = incomplete_frame.with_payload_len_unchecked(ETH_IPV4_FRAME_LEN); // Save the total frame length. let frame_len = vnet_hdr_len() + frame.payload_offset() + ETH_IPV4_FRAME_LEN; // Create the ARP request. let arp_request = EthIPv4ArpFrame::write_request(frame.payload_mut(), src_mac, src_ip, dst_mac, dst_ip); // Validate success. arp_request.unwrap(); (frame_buf, frame_len) } #[test] fn test_mmds_detour_and_injection() { let mut net = default_net(); let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let rxq = VirtQueue::new(GuestAddress(0), &mem, 16); net.queues[RX_INDEX] = rxq.create_queue(); // Inject a fake buffer in the devices buffers, otherwise we won't be able to receive the // MMDS frame. One iovec will be just fine. let mut fake_buffer = vec![0u8; MAX_BUFFER_SIZE]; let iov_buffer = IoVecBufferMut::from(fake_buffer.as_mut_slice()); net.rx_buffer.iovec = iov_buffer; net.rx_buffer .parsed_descriptors .push_back(ParsedDescriptorChain { head_index: 1, length: 1024, nr_iovecs: 1, }); let src_mac = MacAddr::from_str("11:11:11:11:11:11").unwrap(); let src_ip = Ipv4Addr::new(10, 1, 2, 3); let dst_mac = MacAddr::from_str("22:22:22:22:22:22").unwrap(); let dst_ip = Ipv4Addr::new(169, 254, 169, 254); let (frame_buf, frame_len) = create_arp_request(src_mac, src_ip, dst_mac, dst_ip); let buffer = IoVecBuffer::from(&frame_buf[..frame_len]); let mut headers = vec![0; frame_hdr_len()]; buffer.read_exact_volatile_at(&mut headers, 0).unwrap(); // Call the code which sends the packet to the host or MMDS. // Validate the frame was consumed by MMDS and that the metrics reflect that. check_metric_after_block!( &METRICS.mmds.rx_accepted, 1, assert!( Net::write_to_mmds_or_tap( net.mmds_ns.as_mut(), &mut net.tx_rate_limiter, &mut headers, &buffer, &mut net.tap, Some(src_mac), &net.metrics, ) .unwrap() ) ); // Validate that MMDS has a response and we can retrieve it. check_metric_after_block!( &METRICS.mmds.tx_frames, 1, net.read_from_mmds_or_tap().unwrap() ); } #[test] fn test_mac_spoofing_detection() { let mut net = default_net(); let guest_mac = MacAddr::from_str("11:11:11:11:11:11").unwrap(); let not_guest_mac = MacAddr::from_str("33:33:33:33:33:33").unwrap(); let guest_ip = Ipv4Addr::new(10, 1, 2, 3); let dst_mac = MacAddr::from_str("22:22:22:22:22:22").unwrap(); let dst_ip = Ipv4Addr::new(10, 1, 1, 1); let (frame_buf, frame_len) = create_arp_request(guest_mac, guest_ip, dst_mac, dst_ip); let buffer = IoVecBuffer::from(&frame_buf[..frame_len]); let mut headers = vec![0; frame_hdr_len()]; // Check that a legit MAC doesn't affect the spoofed MAC metric. check_metric_after_block!( net.metrics.tx_spoofed_mac_count, 0, Net::write_to_mmds_or_tap( net.mmds_ns.as_mut(), &mut net.tx_rate_limiter, &mut headers, &buffer, &mut net.tap, Some(guest_mac), &net.metrics, ) ); // Check that a spoofed MAC increases our spoofed MAC metric. check_metric_after_block!( net.metrics.tx_spoofed_mac_count, 1, Net::write_to_mmds_or_tap( net.mmds_ns.as_mut(), &mut net.tx_rate_limiter, &mut headers, &buffer, &mut net.tap, Some(not_guest_mac), &net.metrics, ) ); } #[test] fn test_process_error_cases() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); // RX rate limiter events should error since the limiter is not blocked. // Validate that the event failed and failure was properly accounted for. check_metric_after_block!( th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::RxRateLimiter) ); // TX rate limiter events should error since the limiter is not blocked. // Validate that the event failed and failure was properly accounted for. check_metric_after_block!( th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::TxRateLimiter) ); } // Cannot easily test failures for: // * queue_evt.read (rx and tx) // * interrupt_evt.write #[test] fn test_read_tap_fail_event_handler() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); // force the next write to the tap to return an error by simply closing the fd // SAFETY: its a valid fd unsafe { libc::close(th.net.lock().unwrap().tap.as_raw_fd()) }; // The RX queue is empty and there is a deferred frame. th.net().rx_buffer.used_descriptors = 1; th.net().rx_buffer.used_bytes = 100; check_metric_after_block!( th.net().metrics.no_rx_avail_buffer, 1, th.simulate_event(NetEvent::Tap) ); // We need to set this here to false, otherwise the device will try to // handle a deferred frame, it will fail and will never try to read from // the tap. th.net().rx_buffer.used_descriptors = 0; th.net().rx_buffer.used_bytes = 0; th.add_desc_chain( NetQueue::Rx, 0, &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], ); check_metric_after_block!( th.net().metrics.tap_read_fails, 1, th.simulate_event(NetEvent::Tap) ); // dropping th would double close the tap fd, so leak it std::mem::forget(th); } #[test] fn test_rx_rate_limiter_handling() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); th.net().rx_rate_limiter = RateLimiter::new(0, 0, 0, 0, 0, 0).unwrap(); // There is no actual event on the rate limiter's timerfd. check_metric_after_block!( th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::RxRateLimiter) ); } #[test] fn test_tx_rate_limiter_handling() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); th.net().tx_rate_limiter = RateLimiter::new(0, 0, 0, 0, 0, 0).unwrap(); th.simulate_event(NetEvent::TxRateLimiter); // There is no actual event on the rate limiter's timerfd. check_metric_after_block!( th.net().metrics.event_fails, 1, th.simulate_event(NetEvent::TxRateLimiter) ); } #[test] fn test_bandwidth_rate_limiter() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); // Test TX bandwidth rate limiting { // create bandwidth rate limiter that allows 40960 bytes/s with bucket size 4096 bytes let mut rl = RateLimiter::new(0x1000, 0, 100, 0, 0, 0).unwrap(); // use up the budget assert!(rl.consume(0x1000, TokenType::Bytes)); // set this tx rate limiter to be used th.net().tx_rate_limiter = rl; // try doing TX // following TX procedure should fail because of bandwidth rate limiting { // trigger the TX handler th.add_desc_chain(NetQueue::Tx, 0, &[(0, 4096, 0)]); th.simulate_event(NetEvent::TxQueue); // assert that limiter is blocked assert!(th.net().tx_rate_limiter.is_blocked()); assert_eq!(th.net().metrics.tx_rate_limiter_throttled.count(), 1); // make sure the data is still queued for processing assert_eq!(th.txq.used.idx.get(), 0); } // A second TX queue event should be throttled too { th.add_desc_chain(NetQueue::Tx, 0, &[(1, 1024, 0)]); // trigger the RX queue event handler th.simulate_event(NetEvent::TxQueue); assert_eq!(th.net().metrics.tx_rate_limiter_throttled.count(), 2); } // wait for 100ms to give the rate-limiter timer a chance to replenish // wait for an extra 100ms to make sure the timerfd event makes its way from the kernel thread::sleep(Duration::from_millis(200)); // following TX procedure should succeed because bandwidth should now be available { // tx_count increments 1 from write_to_mmds_or_tap() check_metric_after_block!( th.net().metrics.tx_count, 1, th.simulate_event(NetEvent::TxRateLimiter) ); // This should be still blocked. We managed to send the first frame, but // not enough budget for the second assert!(th.net().tx_rate_limiter.is_blocked()); // make sure the data queue advanced assert_eq!(th.txq.used.idx.get(), 1); } thread::sleep(Duration::from_millis(200)); // following TX procedure should succeed to handle the second frame as well { // tx_count increments 1 from write_to_mmds_or_tap() check_metric_after_block!( th.net().metrics.tx_count, 1, th.simulate_event(NetEvent::TxRateLimiter) ); // validate the rate_limiter is no longer blocked assert!(!th.net().tx_rate_limiter.is_blocked()); // make sure the data queue advance one more place assert_eq!(th.txq.used.idx.get(), 2); } } // Test RX bandwidth rate limiting { // create bandwidth rate limiter that allows 2000 bytes/s with bucket size 1000 bytes let mut rl = RateLimiter::new(1000, 0, 1000, 0, 0, 0).unwrap(); // set up RX assert!(th.net().rx_buffer.used_descriptors == 0); th.add_desc_chain( NetQueue::Rx, 0, &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], ); let mut frame = inject_tap_tx_frame(&th.net(), 1000); // use up the budget (do it after injecting the tx frame, as socket communication is // slow enough that the ratelimiter could replenish in the meantime). assert!(rl.consume(1000, TokenType::Bytes)); // set this rx rate limiter to be used th.net().rx_rate_limiter = rl; // following RX procedure should fail because of bandwidth rate limiting { // trigger the RX handler th.simulate_event(NetEvent::Tap); // assert that limiter is blocked assert!(th.net().rx_rate_limiter.is_blocked()); assert_eq!(th.net().metrics.rx_rate_limiter_throttled.count(), 1); assert!(th.net().rx_buffer.used_descriptors != 0); // assert that no operation actually completed (limiter blocked it) assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); // make sure the data is still queued for processing assert_eq!(th.rxq.used.idx.get(), 0); } // An RX queue event should be throttled too { // trigger the RX queue event handler th.simulate_event(NetEvent::RxQueue); assert_eq!(th.net().metrics.rx_rate_limiter_throttled.count(), 2); } // wait for 1000ms to give the rate-limiter timer a chance to replenish // wait for an extra 1000ms to make sure the timerfd event makes its way from the kernel thread::sleep(Duration::from_millis(2000)); // following RX procedure should succeed because bandwidth should now be available { // no longer throttled check_metric_after_block!( th.net().metrics.rx_rate_limiter_throttled, 0, th.simulate_event(NetEvent::RxRateLimiter) ); // validate the rate_limiter is no longer blocked assert!(!th.net().rx_rate_limiter.is_blocked()); // make sure the virtio queue operation completed this time assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); // make sure the data queue advanced assert_eq!(th.rxq.used.idx.get(), 1); th.rxq .check_used_elem(0, 0, frame.len().try_into().unwrap()); header_set_num_buffers(frame.as_mut_slice(), 1); th.rxq.dtable[0].check_data(&frame); } } } #[test] fn test_ops_rate_limiter() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); // Test TX ops rate limiting { // create ops rate limiter that allows 10 ops/s with bucket size 1 ops let mut rl = RateLimiter::new(0, 0, 0, 1, 0, 100).unwrap(); // use up the budget assert!(rl.consume(1, TokenType::Ops)); // set this tx rate limiter to be used th.net().tx_rate_limiter = rl; // try doing TX // following TX procedure should fail because of ops rate limiting { // trigger the TX handler th.add_desc_chain(NetQueue::Tx, 0, &[(0, 4096, 0)]); check_metric_after_block!( th.net().metrics.tx_rate_limiter_throttled, 1, th.simulate_event(NetEvent::TxQueue) ); // assert that limiter is blocked assert!(th.net().tx_rate_limiter.is_blocked()); // make sure the data is still queued for processing assert_eq!(th.txq.used.idx.get(), 0); } // wait for 100ms to give the rate-limiter timer a chance to replenish // wait for an extra 100ms to make sure the timerfd event makes its way from the kernel thread::sleep(Duration::from_millis(200)); // following TX procedure should succeed because ops should now be available { // no longer throttled check_metric_after_block!( th.net().metrics.tx_rate_limiter_throttled, 0, th.simulate_event(NetEvent::TxRateLimiter) ); // validate the rate_limiter is no longer blocked assert!(!th.net().tx_rate_limiter.is_blocked()); // make sure the data queue advanced assert_eq!(th.txq.used.idx.get(), 1); } } // Test RX ops rate limiting { // create ops rate limiter that allows 2 ops/s with bucket size 1 ops let mut rl = RateLimiter::new(0, 0, 0, 1, 0, 1000).unwrap(); // set up RX assert!(th.net().rx_buffer.used_descriptors == 0); th.add_desc_chain( NetQueue::Rx, 0, &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], ); let mut frame = inject_tap_tx_frame(&th.net(), 1234); // use up the initial budget assert!(rl.consume(1, TokenType::Ops)); // set this rx rate limiter to be used th.net().rx_rate_limiter = rl; // following RX procedure should fail because of ops rate limiting { // trigger the RX handler check_metric_after_block!( th.net().metrics.rx_rate_limiter_throttled, 1, th.simulate_event(NetEvent::Tap) ); // assert that limiter is blocked assert!(th.net().rx_rate_limiter.is_blocked()); assert!(th.net().metrics.rx_rate_limiter_throttled.count() >= 1); assert!(th.net().rx_buffer.used_descriptors != 0); // assert that no operation actually completed (limiter blocked it) assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); // make sure the data is still queued for processing assert_eq!(th.rxq.used.idx.get(), 0); // trigger the RX handler again, this time it should do the limiter fast path exit th.simulate_event(NetEvent::Tap); // assert that no operation actually completed, that the limiter blocked it assert!( !th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); // make sure the data is still queued for processing assert_eq!(th.rxq.used.idx.get(), 0); } // wait for 1000ms to give the rate-limiter timer a chance to replenish // wait for an extra 1000ms to make sure the timerfd event makes its way from the kernel thread::sleep(Duration::from_millis(2000)); // following RX procedure should succeed because ops should now be available { th.simulate_event(NetEvent::RxRateLimiter); // make sure the virtio queue operation completed this time assert!( th.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); // make sure the data queue advanced assert_eq!(th.rxq.used.idx.get(), 1); th.rxq .check_used_elem(0, 0, frame.len().try_into().unwrap()); header_set_num_buffers(frame.as_mut_slice(), 1); th.rxq.dtable[0].check_data(&frame); } } } #[test] fn test_patch_rate_limiters() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); th.net().rx_rate_limiter = RateLimiter::new(10, 0, 10, 2, 0, 2).unwrap(); th.net().tx_rate_limiter = RateLimiter::new(10, 0, 10, 2, 0, 2).unwrap(); let rx_bytes = TokenBucket::new(1000, 1001, 1002).unwrap(); let rx_ops = TokenBucket::new(1003, 1004, 1005).unwrap(); let tx_bytes = TokenBucket::new(1006, 1007, 1008).unwrap(); let tx_ops = TokenBucket::new(1009, 1010, 1011).unwrap(); th.net().patch_rate_limiters( BucketUpdate::Update(rx_bytes.clone()), BucketUpdate::Update(rx_ops.clone()), BucketUpdate::Update(tx_bytes.clone()), BucketUpdate::Update(tx_ops.clone()), ); let compare_buckets = |a: &TokenBucket, b: &TokenBucket| { assert_eq!(a.capacity(), b.capacity()); assert_eq!(a.one_time_burst(), b.one_time_burst()); assert_eq!(a.refill_time_ms(), b.refill_time_ms()); }; compare_buckets(th.net().rx_rate_limiter.bandwidth().unwrap(), &rx_bytes); compare_buckets(th.net().rx_rate_limiter.ops().unwrap(), &rx_ops); compare_buckets(th.net().tx_rate_limiter.bandwidth().unwrap(), &tx_bytes); compare_buckets(th.net().tx_rate_limiter.ops().unwrap(), &tx_ops); th.net().patch_rate_limiters( BucketUpdate::Disabled, BucketUpdate::Disabled, BucketUpdate::Disabled, BucketUpdate::Disabled, ); assert!(th.net().rx_rate_limiter.bandwidth().is_none()); assert!(th.net().rx_rate_limiter.ops().is_none()); assert!(th.net().tx_rate_limiter.bandwidth().is_none()); assert!(th.net().tx_rate_limiter.ops().is_none()); } #[test] fn test_virtio_device() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.activate_net(); let net = th.net.lock().unwrap(); // Test queues count (TX and RX). let queues = net.queues(); assert_eq!(queues.len(), NET_QUEUE_SIZES.len()); assert_eq!(queues[RX_INDEX].size, th.rxq.size()); assert_eq!(queues[TX_INDEX].size, th.txq.size()); // Test corresponding queues events. assert_eq!(net.queue_events().len(), NET_QUEUE_SIZES.len()); // Test interrupts. assert!( !net.interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); assert!( !net.interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(TX_INDEX as u16)) ); } #[test] fn test_queues_notification_suppression() { let features = 1 << VIRTIO_RING_F_EVENT_IDX; let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); th.net().set_acked_features(features); th.activate_net(); let net = th.net(); let queues = net.queues(); assert!(queues[RX_INDEX].uses_notif_suppression); assert!(queues[TX_INDEX].uses_notif_suppression); } } ================================================ FILE: src/vmm/src/devices/virtio/net/event_handler.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use event_manager::{EventOps, Events, MutEventSubscriber}; use vmm_sys_util::epoll::EventSet; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::net::device::Net; use crate::devices::virtio::net::{RX_INDEX, TX_INDEX}; use crate::logger::{IncMetric, error, warn}; impl Net { const PROCESS_ACTIVATE: u32 = 0; const PROCESS_VIRTQ_RX: u32 = 1; const PROCESS_VIRTQ_TX: u32 = 2; const PROCESS_TAP_RX: u32 = 3; const PROCESS_RX_RATE_LIMITER: u32 = 4; const PROCESS_TX_RATE_LIMITER: u32 = 5; fn register_runtime_events(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.queue_evts[RX_INDEX], Self::PROCESS_VIRTQ_RX, EventSet::IN, )) { error!("Failed to register rx queue event: {}", err); } if let Err(err) = ops.add(Events::with_data( &self.queue_evts[TX_INDEX], Self::PROCESS_VIRTQ_TX, EventSet::IN, )) { error!("Failed to register tx queue event: {}", err); } if let Err(err) = ops.add(Events::with_data( &self.rx_rate_limiter, Self::PROCESS_RX_RATE_LIMITER, EventSet::IN, )) { error!("Failed to register rx queue event: {}", err); } if let Err(err) = ops.add(Events::with_data( &self.tx_rate_limiter, Self::PROCESS_TX_RATE_LIMITER, EventSet::IN, )) { error!("Failed to register tx queue event: {}", err); } if let Err(err) = ops.add(Events::with_data( &self.tap, Self::PROCESS_TAP_RX, EventSet::IN | EventSet::EDGE_TRIGGERED, )) { error!("Failed to register tap event: {}", err); } } fn register_activate_event(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.activate_evt, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("Failed to register activate event: {}", err); } } fn process_activate_event(&self, ops: &mut EventOps) { if let Err(err) = self.activate_evt.read() { error!("Failed to consume net activate event: {:?}", err); } self.register_runtime_events(ops); if let Err(err) = ops.remove(Events::with_data( &self.activate_evt, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("Failed to un-register activate event: {}", err); } } } impl MutEventSubscriber for Net { fn process(&mut self, event: Events, ops: &mut EventOps) { let source = event.data(); let event_set = event.event_set(); // TODO: also check for errors. Pending high level discussions on how we want // to handle errors in devices. let supported_events = EventSet::IN; if !supported_events.contains(event_set) { warn!( "Received unknown event: {:?} from source: {:?}", event_set, source ); return; } if self.is_activated() { match source { Self::PROCESS_ACTIVATE => self.process_activate_event(ops), Self::PROCESS_VIRTQ_RX => self.process_rx_queue_event(), Self::PROCESS_VIRTQ_TX => self.process_tx_queue_event(), Self::PROCESS_TAP_RX => self.process_tap_rx_event(), Self::PROCESS_RX_RATE_LIMITER => self.process_rx_rate_limiter_event(), Self::PROCESS_TX_RATE_LIMITER => self.process_tx_rate_limiter_event(), _ => { warn!("Net: Spurious event received: {:?}", source); self.metrics.event_fails.inc(); } } } else { warn!( "Net: The device is not yet activated. Spurious event received: {:?}", source ); } } fn init(&mut self, ops: &mut EventOps) { // This function can be called during different points in the device lifetime: // - shortly after device creation, // - on device activation (is-activated already true at this point), // - on device restore from snapshot. if self.is_activated() { self.register_runtime_events(ops); } else { self.register_activate_event(ops); } } } #[cfg(test)] pub mod tests { use crate::devices::virtio::net::test_utils::NetQueue; use crate::devices::virtio::net::test_utils::test::TestHelper; use crate::devices::virtio::net::{MAX_BUFFER_SIZE, TX_INDEX}; use crate::test_utils::single_region_mem; #[test] fn test_event_handler() { let mem = single_region_mem(2 * MAX_BUFFER_SIZE); let mut th = TestHelper::get_default(&mem); // Push a queue event, use the TX_QUEUE_EVENT in this test. th.add_desc_chain(NetQueue::Tx, 0, &[(0, 4096, 0)]); // EventManager should report no events since net has only registered // its activation event so far (even though there is also a queue event pending). let ev_count = th.event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 0); // Manually force a queue event and check it's ignored pre-activation. th.net().queue_evts[TX_INDEX].write(1).unwrap(); let ev_count = th.event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 0); // Validate there was no queue operation. assert_eq!(th.txq.used.idx.get(), 0); // Now activate the device. th.activate_net(); // Handle the previously pushed queue event through EventManager. th.event_manager .run_with_timeout(50) .expect("Metrics event timeout or error."); // Make sure the data queue advanced. assert_eq!(th.txq.used.idx.get(), 1); } } ================================================ FILE: src/vmm/src/devices/virtio/net/generated/if_tun.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const ETH_ALEN: u32 = 6; pub const ETH_TLEN: u32 = 2; pub const ETH_HLEN: u32 = 14; pub const ETH_ZLEN: u32 = 60; pub const ETH_DATA_LEN: u32 = 1500; pub const ETH_FRAME_LEN: u32 = 1514; pub const ETH_FCS_LEN: u32 = 4; pub const ETH_MIN_MTU: u32 = 68; pub const ETH_MAX_MTU: u32 = 65535; pub const ETH_P_LOOP: u32 = 96; pub const ETH_P_PUP: u32 = 512; pub const ETH_P_PUPAT: u32 = 513; pub const ETH_P_TSN: u32 = 8944; pub const ETH_P_ERSPAN2: u32 = 8939; pub const ETH_P_IP: u32 = 2048; pub const ETH_P_X25: u32 = 2053; pub const ETH_P_ARP: u32 = 2054; pub const ETH_P_BPQ: u32 = 2303; pub const ETH_P_IEEEPUP: u32 = 2560; pub const ETH_P_IEEEPUPAT: u32 = 2561; pub const ETH_P_BATMAN: u32 = 17157; pub const ETH_P_DEC: u32 = 24576; pub const ETH_P_DNA_DL: u32 = 24577; pub const ETH_P_DNA_RC: u32 = 24578; pub const ETH_P_DNA_RT: u32 = 24579; pub const ETH_P_LAT: u32 = 24580; pub const ETH_P_DIAG: u32 = 24581; pub const ETH_P_CUST: u32 = 24582; pub const ETH_P_SCA: u32 = 24583; pub const ETH_P_TEB: u32 = 25944; pub const ETH_P_RARP: u32 = 32821; pub const ETH_P_ATALK: u32 = 32923; pub const ETH_P_AARP: u32 = 33011; pub const ETH_P_8021Q: u32 = 33024; pub const ETH_P_ERSPAN: u32 = 35006; pub const ETH_P_IPX: u32 = 33079; pub const ETH_P_IPV6: u32 = 34525; pub const ETH_P_PAUSE: u32 = 34824; pub const ETH_P_SLOW: u32 = 34825; pub const ETH_P_WCCP: u32 = 34878; pub const ETH_P_MPLS_UC: u32 = 34887; pub const ETH_P_MPLS_MC: u32 = 34888; pub const ETH_P_ATMMPOA: u32 = 34892; pub const ETH_P_PPP_DISC: u32 = 34915; pub const ETH_P_PPP_SES: u32 = 34916; pub const ETH_P_LINK_CTL: u32 = 34924; pub const ETH_P_ATMFATE: u32 = 34948; pub const ETH_P_PAE: u32 = 34958; pub const ETH_P_PROFINET: u32 = 34962; pub const ETH_P_REALTEK: u32 = 34969; pub const ETH_P_AOE: u32 = 34978; pub const ETH_P_ETHERCAT: u32 = 34980; pub const ETH_P_8021AD: u32 = 34984; pub const ETH_P_802_EX1: u32 = 34997; pub const ETH_P_PREAUTH: u32 = 35015; pub const ETH_P_TIPC: u32 = 35018; pub const ETH_P_LLDP: u32 = 35020; pub const ETH_P_MRP: u32 = 35043; pub const ETH_P_MACSEC: u32 = 35045; pub const ETH_P_8021AH: u32 = 35047; pub const ETH_P_MVRP: u32 = 35061; pub const ETH_P_1588: u32 = 35063; pub const ETH_P_NCSI: u32 = 35064; pub const ETH_P_PRP: u32 = 35067; pub const ETH_P_CFM: u32 = 35074; pub const ETH_P_FCOE: u32 = 35078; pub const ETH_P_IBOE: u32 = 35093; pub const ETH_P_TDLS: u32 = 35085; pub const ETH_P_FIP: u32 = 35092; pub const ETH_P_80221: u32 = 35095; pub const ETH_P_HSR: u32 = 35119; pub const ETH_P_NSH: u32 = 35151; pub const ETH_P_LOOPBACK: u32 = 36864; pub const ETH_P_QINQ1: u32 = 37120; pub const ETH_P_QINQ2: u32 = 37376; pub const ETH_P_QINQ3: u32 = 37632; pub const ETH_P_EDSA: u32 = 56026; pub const ETH_P_DSA_8021Q: u32 = 56027; pub const ETH_P_DSA_A5PSW: u32 = 57345; pub const ETH_P_IFE: u32 = 60734; pub const ETH_P_AF_IUCV: u32 = 64507; pub const ETH_P_802_3_MIN: u32 = 1536; pub const ETH_P_802_3: u32 = 1; pub const ETH_P_AX25: u32 = 2; pub const ETH_P_ALL: u32 = 3; pub const ETH_P_802_2: u32 = 4; pub const ETH_P_SNAP: u32 = 5; pub const ETH_P_DDCMP: u32 = 6; pub const ETH_P_WAN_PPP: u32 = 7; pub const ETH_P_PPP_MP: u32 = 8; pub const ETH_P_LOCALTALK: u32 = 9; pub const ETH_P_CAN: u32 = 12; pub const ETH_P_CANFD: u32 = 13; pub const ETH_P_CANXL: u32 = 14; pub const ETH_P_PPPTALK: u32 = 16; pub const ETH_P_TR_802_2: u32 = 17; pub const ETH_P_MOBITEX: u32 = 21; pub const ETH_P_CONTROL: u32 = 22; pub const ETH_P_IRDA: u32 = 23; pub const ETH_P_ECONET: u32 = 24; pub const ETH_P_HDLC: u32 = 25; pub const ETH_P_ARCNET: u32 = 26; pub const ETH_P_DSA: u32 = 27; pub const ETH_P_TRAILER: u32 = 28; pub const ETH_P_PHONET: u32 = 245; pub const ETH_P_IEEE802154: u32 = 246; pub const ETH_P_CAIF: u32 = 247; pub const ETH_P_XDSA: u32 = 248; pub const ETH_P_MAP: u32 = 249; pub const ETH_P_MCTP: u32 = 250; pub const TUN_READQ_SIZE: u32 = 500; pub const TUN_TYPE_MASK: u32 = 15; pub const IFF_TAP: u32 = 2; pub const IFF_NO_PI: u32 = 4096; pub const IFF_VNET_HDR: u32 = 16384; pub const IFF_MULTI_QUEUE: u32 = 256; pub const TUN_TX_TIMESTAMP: u32 = 1; pub const TUN_F_CSUM: u32 = 1; pub const TUN_F_TSO4: u32 = 2; pub const TUN_F_TSO6: u32 = 4; pub const TUN_F_TSO_ECN: u32 = 8; pub const TUN_F_UFO: u32 = 16; pub const TUN_F_USO4: u32 = 32; pub const TUN_F_USO6: u32 = 64; pub const TUN_PKT_STRIP: u32 = 1; pub const TUN_FLT_ALLMULTI: u32 = 1; pub type __u8 = ::std::os::raw::c_uchar; pub type __u16 = ::std::os::raw::c_ushort; pub type __u32 = ::std::os::raw::c_uint; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct sock_filter { pub code: __u16, pub jt: __u8, pub jf: __u8, pub k: __u32, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of sock_filter"][::std::mem::size_of::() - 8usize]; ["Alignment of sock_filter"][::std::mem::align_of::() - 4usize]; ["Offset of field: sock_filter::code"][::std::mem::offset_of!(sock_filter, code) - 0usize]; ["Offset of field: sock_filter::jt"][::std::mem::offset_of!(sock_filter, jt) - 2usize]; ["Offset of field: sock_filter::jf"][::std::mem::offset_of!(sock_filter, jf) - 3usize]; ["Offset of field: sock_filter::k"][::std::mem::offset_of!(sock_filter, k) - 4usize]; }; #[repr(C)] #[derive(Debug, Copy, Clone, PartialEq)] pub struct sock_fprog { pub len: ::std::os::raw::c_ushort, pub filter: *mut sock_filter, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of sock_fprog"][::std::mem::size_of::() - 16usize]; ["Alignment of sock_fprog"][::std::mem::align_of::() - 8usize]; ["Offset of field: sock_fprog::len"][::std::mem::offset_of!(sock_fprog, len) - 0usize]; ["Offset of field: sock_fprog::filter"][::std::mem::offset_of!(sock_fprog, filter) - 8usize]; }; impl Default for sock_fprog { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } ================================================ FILE: src/vmm/src/devices/virtio/net/generated/iff.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const IFNAMSIZ: u32 = 16; pub const IFALIASZ: u32 = 256; pub const IF_GET_IFACE: u32 = 1; pub const IF_GET_PROTO: u32 = 2; pub const IF_IFACE_V35: u32 = 4096; pub const IF_IFACE_V24: u32 = 4097; pub const IF_IFACE_X21: u32 = 4098; pub const IF_IFACE_T1: u32 = 4099; pub const IF_IFACE_E1: u32 = 4100; pub const IF_IFACE_SYNC_SERIAL: u32 = 4101; pub const IF_IFACE_X21D: u32 = 4102; pub const IF_PROTO_HDLC: u32 = 8192; pub const IF_PROTO_PPP: u32 = 8193; pub const IF_PROTO_CISCO: u32 = 8194; pub const IF_PROTO_FR: u32 = 8195; pub const IF_PROTO_FR_ADD_PVC: u32 = 8196; pub const IF_PROTO_FR_DEL_PVC: u32 = 8197; pub const IF_PROTO_X25: u32 = 8198; pub const IF_PROTO_HDLC_ETH: u32 = 8199; pub const IF_PROTO_FR_ADD_ETH_PVC: u32 = 8200; pub const IF_PROTO_FR_DEL_ETH_PVC: u32 = 8201; pub const IF_PROTO_FR_PVC: u32 = 8202; pub const IF_PROTO_FR_ETH_PVC: u32 = 8203; pub const IF_PROTO_RAW: u32 = 8204; pub const IFHWADDRLEN: u32 = 6; pub type sa_family_t = ::std::os::raw::c_ushort; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct sockaddr { pub sa_family: sa_family_t, pub sa_data: [::std::os::raw::c_char; 14usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of sockaddr"][::std::mem::size_of::() - 16usize]; ["Alignment of sockaddr"][::std::mem::align_of::() - 2usize]; ["Offset of field: sockaddr::sa_family"][::std::mem::offset_of!(sockaddr, sa_family) - 0usize]; ["Offset of field: sockaddr::sa_data"][::std::mem::offset_of!(sockaddr, sa_data) - 2usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct sync_serial_settings { pub clock_rate: ::std::os::raw::c_uint, pub clock_type: ::std::os::raw::c_uint, pub loopback: ::std::os::raw::c_ushort, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of sync_serial_settings"][::std::mem::size_of::() - 12usize]; ["Alignment of sync_serial_settings"][::std::mem::align_of::() - 4usize]; ["Offset of field: sync_serial_settings::clock_rate"] [::std::mem::offset_of!(sync_serial_settings, clock_rate) - 0usize]; ["Offset of field: sync_serial_settings::clock_type"] [::std::mem::offset_of!(sync_serial_settings, clock_type) - 4usize]; ["Offset of field: sync_serial_settings::loopback"] [::std::mem::offset_of!(sync_serial_settings, loopback) - 8usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct te1_settings { pub clock_rate: ::std::os::raw::c_uint, pub clock_type: ::std::os::raw::c_uint, pub loopback: ::std::os::raw::c_ushort, pub slot_map: ::std::os::raw::c_uint, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of te1_settings"][::std::mem::size_of::() - 16usize]; ["Alignment of te1_settings"][::std::mem::align_of::() - 4usize]; ["Offset of field: te1_settings::clock_rate"] [::std::mem::offset_of!(te1_settings, clock_rate) - 0usize]; ["Offset of field: te1_settings::clock_type"] [::std::mem::offset_of!(te1_settings, clock_type) - 4usize]; ["Offset of field: te1_settings::loopback"] [::std::mem::offset_of!(te1_settings, loopback) - 8usize]; ["Offset of field: te1_settings::slot_map"] [::std::mem::offset_of!(te1_settings, slot_map) - 12usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct raw_hdlc_proto { pub encoding: ::std::os::raw::c_ushort, pub parity: ::std::os::raw::c_ushort, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of raw_hdlc_proto"][::std::mem::size_of::() - 4usize]; ["Alignment of raw_hdlc_proto"][::std::mem::align_of::() - 2usize]; ["Offset of field: raw_hdlc_proto::encoding"] [::std::mem::offset_of!(raw_hdlc_proto, encoding) - 0usize]; ["Offset of field: raw_hdlc_proto::parity"] [::std::mem::offset_of!(raw_hdlc_proto, parity) - 2usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct fr_proto { pub t391: ::std::os::raw::c_uint, pub t392: ::std::os::raw::c_uint, pub n391: ::std::os::raw::c_uint, pub n392: ::std::os::raw::c_uint, pub n393: ::std::os::raw::c_uint, pub lmi: ::std::os::raw::c_ushort, pub dce: ::std::os::raw::c_ushort, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of fr_proto"][::std::mem::size_of::() - 24usize]; ["Alignment of fr_proto"][::std::mem::align_of::() - 4usize]; ["Offset of field: fr_proto::t391"][::std::mem::offset_of!(fr_proto, t391) - 0usize]; ["Offset of field: fr_proto::t392"][::std::mem::offset_of!(fr_proto, t392) - 4usize]; ["Offset of field: fr_proto::n391"][::std::mem::offset_of!(fr_proto, n391) - 8usize]; ["Offset of field: fr_proto::n392"][::std::mem::offset_of!(fr_proto, n392) - 12usize]; ["Offset of field: fr_proto::n393"][::std::mem::offset_of!(fr_proto, n393) - 16usize]; ["Offset of field: fr_proto::lmi"][::std::mem::offset_of!(fr_proto, lmi) - 20usize]; ["Offset of field: fr_proto::dce"][::std::mem::offset_of!(fr_proto, dce) - 22usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct fr_proto_pvc { pub dlci: ::std::os::raw::c_uint, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of fr_proto_pvc"][::std::mem::size_of::() - 4usize]; ["Alignment of fr_proto_pvc"][::std::mem::align_of::() - 4usize]; ["Offset of field: fr_proto_pvc::dlci"][::std::mem::offset_of!(fr_proto_pvc, dlci) - 0usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct fr_proto_pvc_info { pub dlci: ::std::os::raw::c_uint, pub master: [::std::os::raw::c_char; 16usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of fr_proto_pvc_info"][::std::mem::size_of::() - 20usize]; ["Alignment of fr_proto_pvc_info"][::std::mem::align_of::() - 4usize]; ["Offset of field: fr_proto_pvc_info::dlci"] [::std::mem::offset_of!(fr_proto_pvc_info, dlci) - 0usize]; ["Offset of field: fr_proto_pvc_info::master"] [::std::mem::offset_of!(fr_proto_pvc_info, master) - 4usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct cisco_proto { pub interval: ::std::os::raw::c_uint, pub timeout: ::std::os::raw::c_uint, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of cisco_proto"][::std::mem::size_of::() - 8usize]; ["Alignment of cisco_proto"][::std::mem::align_of::() - 4usize]; ["Offset of field: cisco_proto::interval"] [::std::mem::offset_of!(cisco_proto, interval) - 0usize]; ["Offset of field: cisco_proto::timeout"] [::std::mem::offset_of!(cisco_proto, timeout) - 4usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct x25_hdlc_proto { pub dce: ::std::os::raw::c_ushort, pub modulo: ::std::os::raw::c_uint, pub window: ::std::os::raw::c_uint, pub t1: ::std::os::raw::c_uint, pub t2: ::std::os::raw::c_uint, pub n2: ::std::os::raw::c_uint, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of x25_hdlc_proto"][::std::mem::size_of::() - 24usize]; ["Alignment of x25_hdlc_proto"][::std::mem::align_of::() - 4usize]; ["Offset of field: x25_hdlc_proto::dce"][::std::mem::offset_of!(x25_hdlc_proto, dce) - 0usize]; ["Offset of field: x25_hdlc_proto::modulo"] [::std::mem::offset_of!(x25_hdlc_proto, modulo) - 4usize]; ["Offset of field: x25_hdlc_proto::window"] [::std::mem::offset_of!(x25_hdlc_proto, window) - 8usize]; ["Offset of field: x25_hdlc_proto::t1"][::std::mem::offset_of!(x25_hdlc_proto, t1) - 12usize]; ["Offset of field: x25_hdlc_proto::t2"][::std::mem::offset_of!(x25_hdlc_proto, t2) - 16usize]; ["Offset of field: x25_hdlc_proto::n2"][::std::mem::offset_of!(x25_hdlc_proto, n2) - 20usize]; }; pub mod net_device_flags { pub type Type = ::std::os::raw::c_uint; pub const IFF_UP: Type = 1; pub const IFF_BROADCAST: Type = 2; pub const IFF_DEBUG: Type = 4; pub const IFF_LOOPBACK: Type = 8; pub const IFF_POINTOPOINT: Type = 16; pub const IFF_NOTRAILERS: Type = 32; pub const IFF_RUNNING: Type = 64; pub const IFF_NOARP: Type = 128; pub const IFF_PROMISC: Type = 256; pub const IFF_ALLMULTI: Type = 512; pub const IFF_MASTER: Type = 1024; pub const IFF_SLAVE: Type = 2048; pub const IFF_MULTICAST: Type = 4096; pub const IFF_PORTSEL: Type = 8192; pub const IFF_AUTOMEDIA: Type = 16384; pub const IFF_DYNAMIC: Type = 32768; pub const IFF_LOWER_UP: Type = 65536; pub const IFF_DORMANT: Type = 131072; pub const IFF_ECHO: Type = 262144; } pub mod _bindgen_ty_4 { pub type Type = ::std::os::raw::c_uint; pub const IF_OPER_UNKNOWN: Type = 0; pub const IF_OPER_NOTPRESENT: Type = 1; pub const IF_OPER_DOWN: Type = 2; pub const IF_OPER_LOWERLAYERDOWN: Type = 3; pub const IF_OPER_TESTING: Type = 4; pub const IF_OPER_DORMANT: Type = 5; pub const IF_OPER_UP: Type = 6; } pub mod _bindgen_ty_5 { pub type Type = ::std::os::raw::c_uint; pub const IF_LINK_MODE_DEFAULT: Type = 0; pub const IF_LINK_MODE_DORMANT: Type = 1; pub const IF_LINK_MODE_TESTING: Type = 2; } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct ifmap { pub mem_start: ::std::os::raw::c_ulong, pub mem_end: ::std::os::raw::c_ulong, pub base_addr: ::std::os::raw::c_ushort, pub irq: ::std::os::raw::c_uchar, pub dma: ::std::os::raw::c_uchar, pub port: ::std::os::raw::c_uchar, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of ifmap"][::std::mem::size_of::() - 24usize]; ["Alignment of ifmap"][::std::mem::align_of::() - 8usize]; ["Offset of field: ifmap::mem_start"][::std::mem::offset_of!(ifmap, mem_start) - 0usize]; ["Offset of field: ifmap::mem_end"][::std::mem::offset_of!(ifmap, mem_end) - 8usize]; ["Offset of field: ifmap::base_addr"][::std::mem::offset_of!(ifmap, base_addr) - 16usize]; ["Offset of field: ifmap::irq"][::std::mem::offset_of!(ifmap, irq) - 18usize]; ["Offset of field: ifmap::dma"][::std::mem::offset_of!(ifmap, dma) - 19usize]; ["Offset of field: ifmap::port"][::std::mem::offset_of!(ifmap, port) - 20usize]; }; #[repr(C)] #[derive(Copy, Clone)] pub struct if_settings { pub type_: ::std::os::raw::c_uint, pub size: ::std::os::raw::c_uint, pub ifs_ifsu: if_settings__bindgen_ty_1, } #[repr(C)] #[derive(Copy, Clone)] pub union if_settings__bindgen_ty_1 { pub raw_hdlc: *mut raw_hdlc_proto, pub cisco: *mut cisco_proto, pub fr: *mut fr_proto, pub fr_pvc: *mut fr_proto_pvc, pub fr_pvc_info: *mut fr_proto_pvc_info, pub x25: *mut x25_hdlc_proto, pub sync: *mut sync_serial_settings, pub te1: *mut te1_settings, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of if_settings__bindgen_ty_1"] [::std::mem::size_of::() - 8usize]; ["Alignment of if_settings__bindgen_ty_1"] [::std::mem::align_of::() - 8usize]; ["Offset of field: if_settings__bindgen_ty_1::raw_hdlc"] [::std::mem::offset_of!(if_settings__bindgen_ty_1, raw_hdlc) - 0usize]; ["Offset of field: if_settings__bindgen_ty_1::cisco"] [::std::mem::offset_of!(if_settings__bindgen_ty_1, cisco) - 0usize]; ["Offset of field: if_settings__bindgen_ty_1::fr"] [::std::mem::offset_of!(if_settings__bindgen_ty_1, fr) - 0usize]; ["Offset of field: if_settings__bindgen_ty_1::fr_pvc"] [::std::mem::offset_of!(if_settings__bindgen_ty_1, fr_pvc) - 0usize]; ["Offset of field: if_settings__bindgen_ty_1::fr_pvc_info"] [::std::mem::offset_of!(if_settings__bindgen_ty_1, fr_pvc_info) - 0usize]; ["Offset of field: if_settings__bindgen_ty_1::x25"] [::std::mem::offset_of!(if_settings__bindgen_ty_1, x25) - 0usize]; ["Offset of field: if_settings__bindgen_ty_1::sync"] [::std::mem::offset_of!(if_settings__bindgen_ty_1, sync) - 0usize]; ["Offset of field: if_settings__bindgen_ty_1::te1"] [::std::mem::offset_of!(if_settings__bindgen_ty_1, te1) - 0usize]; }; impl Default for if_settings__bindgen_ty_1 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of if_settings"][::std::mem::size_of::() - 16usize]; ["Alignment of if_settings"][::std::mem::align_of::() - 8usize]; ["Offset of field: if_settings::type_"][::std::mem::offset_of!(if_settings, type_) - 0usize]; ["Offset of field: if_settings::size"][::std::mem::offset_of!(if_settings, size) - 4usize]; ["Offset of field: if_settings::ifs_ifsu"] [::std::mem::offset_of!(if_settings, ifs_ifsu) - 8usize]; }; impl Default for if_settings { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C)] #[derive(Copy, Clone)] pub struct ifreq { pub ifr_ifrn: ifreq__bindgen_ty_1, pub ifr_ifru: ifreq__bindgen_ty_2, } #[repr(C)] #[derive(Copy, Clone)] pub union ifreq__bindgen_ty_1 { pub ifrn_name: [::std::os::raw::c_uchar; 16usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of ifreq__bindgen_ty_1"][::std::mem::size_of::() - 16usize]; ["Alignment of ifreq__bindgen_ty_1"][::std::mem::align_of::() - 1usize]; ["Offset of field: ifreq__bindgen_ty_1::ifrn_name"] [::std::mem::offset_of!(ifreq__bindgen_ty_1, ifrn_name) - 0usize]; }; impl Default for ifreq__bindgen_ty_1 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C)] #[derive(Copy, Clone)] pub union ifreq__bindgen_ty_2 { pub ifru_addr: sockaddr, pub ifru_dstaddr: sockaddr, pub ifru_broadaddr: sockaddr, pub ifru_netmask: sockaddr, pub ifru_hwaddr: sockaddr, pub ifru_flags: ::std::os::raw::c_short, pub ifru_ivalue: ::std::os::raw::c_int, pub ifru_mtu: ::std::os::raw::c_int, pub ifru_map: ifmap, pub ifru_slave: [::std::os::raw::c_char; 16usize], pub ifru_newname: [::std::os::raw::c_char; 16usize], pub ifru_data: *mut ::std::os::raw::c_void, pub ifru_settings: if_settings, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of ifreq__bindgen_ty_2"][::std::mem::size_of::() - 24usize]; ["Alignment of ifreq__bindgen_ty_2"][::std::mem::align_of::() - 8usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_addr"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_addr) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_dstaddr"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_dstaddr) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_broadaddr"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_broadaddr) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_netmask"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_netmask) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_hwaddr"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_hwaddr) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_flags"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_flags) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_ivalue"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_ivalue) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_mtu"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_mtu) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_map"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_map) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_slave"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_slave) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_newname"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_newname) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_data"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_data) - 0usize]; ["Offset of field: ifreq__bindgen_ty_2::ifru_settings"] [::std::mem::offset_of!(ifreq__bindgen_ty_2, ifru_settings) - 0usize]; }; impl Default for ifreq__bindgen_ty_2 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of ifreq"][::std::mem::size_of::() - 40usize]; ["Alignment of ifreq"][::std::mem::align_of::() - 8usize]; ["Offset of field: ifreq::ifr_ifrn"][::std::mem::offset_of!(ifreq, ifr_ifrn) - 0usize]; ["Offset of field: ifreq::ifr_ifru"][::std::mem::offset_of!(ifreq, ifr_ifru) - 16usize]; }; impl Default for ifreq { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C)] #[derive(Copy, Clone)] pub struct ifconf { pub ifc_len: ::std::os::raw::c_int, pub ifc_ifcu: ifconf__bindgen_ty_1, } #[repr(C)] #[derive(Copy, Clone)] pub union ifconf__bindgen_ty_1 { pub ifcu_buf: *mut ::std::os::raw::c_char, pub ifcu_req: *mut ifreq, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of ifconf__bindgen_ty_1"][::std::mem::size_of::() - 8usize]; ["Alignment of ifconf__bindgen_ty_1"][::std::mem::align_of::() - 8usize]; ["Offset of field: ifconf__bindgen_ty_1::ifcu_buf"] [::std::mem::offset_of!(ifconf__bindgen_ty_1, ifcu_buf) - 0usize]; ["Offset of field: ifconf__bindgen_ty_1::ifcu_req"] [::std::mem::offset_of!(ifconf__bindgen_ty_1, ifcu_req) - 0usize]; }; impl Default for ifconf__bindgen_ty_1 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of ifconf"][::std::mem::size_of::() - 16usize]; ["Alignment of ifconf"][::std::mem::align_of::() - 8usize]; ["Offset of field: ifconf::ifc_len"][::std::mem::offset_of!(ifconf, ifc_len) - 0usize]; ["Offset of field: ifconf::ifc_ifcu"][::std::mem::offset_of!(ifconf, ifc_ifcu) - 8usize]; }; impl Default for ifconf { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } ================================================ FILE: src/vmm/src/devices/virtio/net/generated/mod.rs ================================================ // Copyright TUNTAP, 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. #![allow(clippy::all)] #![allow(non_upper_case_globals)] #![allow(non_camel_case_types)] #![allow(non_snake_case)] // generated with bindgen /usr/include/linux/if.h --no-unstable-rust // --constified-enum '*' --with-derive-default -- -D __UAPI_DEF_IF_IFNAMSIZ -D // __UAPI_DEF_IF_NET_DEVICE_FLAGS -D __UAPI_DEF_IF_IFREQ -D __UAPI_DEF_IF_IFMAP // Name is "iff" to avoid conflicting with "if" keyword. // Generated against Linux 4.11 to include fix "uapi: fix linux/if.h userspace // compilation errors". // Manual fixup of ifrn_name to be of type c_uchar instead of c_char. pub mod iff; // generated with bindgen /usr/include/linux/if_tun.h --no-unstable-rust // --constified-enum '*' --with-derive-default pub mod if_tun; // generated with bindgen /usr/include/linux/sockios.h --no-unstable-rust // --constified-enum '*' --with-derive-default pub mod sockios; pub use if_tun::*; pub use iff::*; ================================================ FILE: src/vmm/src/devices/virtio/net/generated/sockios.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] pub const __BITS_PER_LONG: u32 = 64; pub const __BITS_PER_LONG_LONG: u32 = 64; pub const FIOSETOWN: u32 = 35073; pub const SIOCSPGRP: u32 = 35074; pub const FIOGETOWN: u32 = 35075; pub const SIOCGPGRP: u32 = 35076; pub const SIOCATMARK: u32 = 35077; pub const SIOCGSTAMP_OLD: u32 = 35078; pub const SIOCGSTAMPNS_OLD: u32 = 35079; pub const SOCK_IOC_TYPE: u32 = 137; pub const SIOCGSTAMP: u32 = 35078; pub const SIOCGSTAMPNS: u32 = 35079; pub const SIOCADDRT: u32 = 35083; pub const SIOCDELRT: u32 = 35084; pub const SIOCRTMSG: u32 = 35085; pub const SIOCGIFNAME: u32 = 35088; pub const SIOCSIFLINK: u32 = 35089; pub const SIOCGIFCONF: u32 = 35090; pub const SIOCGIFFLAGS: u32 = 35091; pub const SIOCSIFFLAGS: u32 = 35092; pub const SIOCGIFADDR: u32 = 35093; pub const SIOCSIFADDR: u32 = 35094; pub const SIOCGIFDSTADDR: u32 = 35095; pub const SIOCSIFDSTADDR: u32 = 35096; pub const SIOCGIFBRDADDR: u32 = 35097; pub const SIOCSIFBRDADDR: u32 = 35098; pub const SIOCGIFNETMASK: u32 = 35099; pub const SIOCSIFNETMASK: u32 = 35100; pub const SIOCGIFMETRIC: u32 = 35101; pub const SIOCSIFMETRIC: u32 = 35102; pub const SIOCGIFMEM: u32 = 35103; pub const SIOCSIFMEM: u32 = 35104; pub const SIOCGIFMTU: u32 = 35105; pub const SIOCSIFMTU: u32 = 35106; pub const SIOCSIFNAME: u32 = 35107; pub const SIOCSIFHWADDR: u32 = 35108; pub const SIOCGIFENCAP: u32 = 35109; pub const SIOCSIFENCAP: u32 = 35110; pub const SIOCGIFHWADDR: u32 = 35111; pub const SIOCGIFSLAVE: u32 = 35113; pub const SIOCSIFSLAVE: u32 = 35120; pub const SIOCADDMULTI: u32 = 35121; pub const SIOCDELMULTI: u32 = 35122; pub const SIOCGIFINDEX: u32 = 35123; pub const SIOGIFINDEX: u32 = 35123; pub const SIOCSIFPFLAGS: u32 = 35124; pub const SIOCGIFPFLAGS: u32 = 35125; pub const SIOCDIFADDR: u32 = 35126; pub const SIOCSIFHWBROADCAST: u32 = 35127; pub const SIOCGIFCOUNT: u32 = 35128; pub const SIOCGIFBR: u32 = 35136; pub const SIOCSIFBR: u32 = 35137; pub const SIOCGIFTXQLEN: u32 = 35138; pub const SIOCSIFTXQLEN: u32 = 35139; pub const SIOCETHTOOL: u32 = 35142; pub const SIOCGMIIPHY: u32 = 35143; pub const SIOCGMIIREG: u32 = 35144; pub const SIOCSMIIREG: u32 = 35145; pub const SIOCWANDEV: u32 = 35146; pub const SIOCOUTQNSD: u32 = 35147; pub const SIOCGSKNS: u32 = 35148; pub const SIOCDARP: u32 = 35155; pub const SIOCGARP: u32 = 35156; pub const SIOCSARP: u32 = 35157; pub const SIOCDRARP: u32 = 35168; pub const SIOCGRARP: u32 = 35169; pub const SIOCSRARP: u32 = 35170; pub const SIOCGIFMAP: u32 = 35184; pub const SIOCSIFMAP: u32 = 35185; pub const SIOCADDDLCI: u32 = 35200; pub const SIOCDELDLCI: u32 = 35201; pub const SIOCGIFVLAN: u32 = 35202; pub const SIOCSIFVLAN: u32 = 35203; pub const SIOCBONDENSLAVE: u32 = 35216; pub const SIOCBONDRELEASE: u32 = 35217; pub const SIOCBONDSETHWADDR: u32 = 35218; pub const SIOCBONDSLAVEINFOQUERY: u32 = 35219; pub const SIOCBONDINFOQUERY: u32 = 35220; pub const SIOCBONDCHANGEACTIVE: u32 = 35221; pub const SIOCBRADDBR: u32 = 35232; pub const SIOCBRDELBR: u32 = 35233; pub const SIOCBRADDIF: u32 = 35234; pub const SIOCBRDELIF: u32 = 35235; pub const SIOCSHWTSTAMP: u32 = 35248; pub const SIOCGHWTSTAMP: u32 = 35249; pub const SIOCDEVPRIVATE: u32 = 35312; pub const SIOCPROTOPRIVATE: u32 = 35296; ================================================ FILE: src/vmm/src/devices/virtio/net/metrics.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the metrics system for Network devices. //! //! # Metrics format //! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write(). //! //! ## JSON example with metrics: //! ```json //! { //! "net_eth0": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "mac_address_updates": "SharedIncMetric", //! "no_rx_avail_buffer": "SharedIncMetric", //! "no_tx_avail_buffer": "SharedIncMetric", //! ... //! } //! "net_eth1": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "mac_address_updates": "SharedIncMetric", //! "no_rx_avail_buffer": "SharedIncMetric", //! "no_tx_avail_buffer": "SharedIncMetric", //! ... //! } //! ... //! "net_iface_id": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "mac_address_updates": "SharedIncMetric", //! "no_rx_avail_buffer": "SharedIncMetric", //! "no_tx_avail_buffer": "SharedIncMetric", //! ... //! } //! "net": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "mac_address_updates": "SharedIncMetric", //! "no_rx_avail_buffer": "SharedIncMetric", //! "no_tx_avail_buffer": "SharedIncMetric", //! ... //! } //! } //! ``` //! Each `net` field in the example above is a serializable `NetDeviceMetrics` structure //! collecting metrics such as `activate_fails`, `cfg_fails`, etc. for the network device. //! `net_eth0` represent metrics for the endpoint "/network-interfaces/eth0", //! `net_eth1` represent metrics for the endpoint "/network-interfaces/eth1", and //! `net_iface_id` represent metrics for the endpoint "/network-interfaces/{iface_id}" //! network device respectively and `net` is the aggregate of all the per device metrics. //! //! # Limitations //! Network device currently do not have `vmm::logger::metrics::StoreMetrics` so aggregate //! doesn't consider them. //! //! # Design //! The main design goals of this system are: //! * To improve network device metrics by logging them at per device granularity. //! * Continue to provide aggregate net metrics to maintain backward compatibility. //! * Move NetDeviceMetrics out of from logger and decouple it. //! * Use lockless operations, preferably ones that don't require anything other than simple //! reads/writes being atomic. //! * Rely on `serde` to provide the actual serialization for writing the metrics. //! * Since all metrics start at 0, we implement the `Default` trait via derive for all of them, to //! avoid having to initialize everything by hand. //! //! * Devices could be created in any order i.e. the first device created could either be eth0 or //! eth1 so if we use a vector for NetDeviceMetrics and call 1st device as net0, then net0 could //! sometimes point to eth0 and sometimes to eth1 which doesn't help with analysing the metrics. //! So, use Map instead of Vec to help understand which interface the metrics actually belongs to. //! * We use "net_$iface_id" for the metrics name instead of "net_$tap_name" to be consistent with //! the net endpoint "/network-interfaces/{iface_id}". //! //! The system implements 1 types of metrics: //! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter //! (i.e the number of times an API request failed). These metrics are reset upon flush. //! //! We use net::metrics::METRICS instead of adding an entry of NetDeviceMetrics //! in Net so that metrics are accessible to be flushed even from signal handlers. use std::collections::BTreeMap; use std::sync::{Arc, RwLock}; use serde::ser::SerializeMap; use serde::{Serialize, Serializer}; use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric}; /// map of network interface id and metrics /// this should be protected by a lock before accessing. #[derive(Debug)] pub struct NetMetricsPerDevice { /// used to access per net device metrics pub metrics: BTreeMap>, } impl NetMetricsPerDevice { /// Allocate `NetDeviceMetrics` for net device having /// id `iface_id`. Also, allocate only if it doesn't /// exist to avoid overwriting previously allocated data. /// lock is always initialized so it is safe the unwrap /// the lock without a check. pub fn alloc(iface_id: String) -> Arc { Arc::clone( METRICS .write() .unwrap() .metrics .entry(iface_id) .or_insert_with(|| Arc::new(NetDeviceMetrics::default())), ) } } /// Pool of Network-related metrics per device behind a lock to /// keep things thread safe. Since the lock is initialized here /// it is safe to unwrap it without any check. static METRICS: RwLock = RwLock::new(NetMetricsPerDevice { metrics: BTreeMap::new(), }); /// This function facilitates aggregation and serialization of /// per net device metrics. pub fn flush_metrics(serializer: S) -> Result { let net_metrics = METRICS.read().unwrap(); let metrics_len = net_metrics.metrics.len(); // +1 to accomodate aggregate net metrics let mut seq = serializer.serialize_map(Some(1 + metrics_len))?; let mut net_aggregated: NetDeviceMetrics = NetDeviceMetrics::default(); for (name, metrics) in net_metrics.metrics.iter() { let devn = format!("net_{}", name); // serialization will flush the metrics so aggregate before it. let m: &NetDeviceMetrics = metrics; net_aggregated.aggregate(m); seq.serialize_entry(&devn, m)?; } seq.serialize_entry("net", &net_aggregated)?; seq.end() } /// Network-related metrics. #[derive(Default, Debug, Serialize)] pub struct NetDeviceMetrics { /// Number of times when activate failed on a network device. pub activate_fails: SharedIncMetric, /// Number of times when interacting with the space config of a network device failed. pub cfg_fails: SharedIncMetric, /// Number of times the mac address was updated through the config space. pub mac_address_updates: SharedIncMetric, /// No available buffer for the net device rx queue. pub no_rx_avail_buffer: SharedIncMetric, /// No available buffer for the net device tx queue. pub no_tx_avail_buffer: SharedIncMetric, /// Number of times when handling events on a network device failed. pub event_fails: SharedIncMetric, /// Number of events associated with the receiving queue. pub rx_queue_event_count: SharedIncMetric, /// Number of events associated with the rate limiter installed on the receiving path. pub rx_event_rate_limiter_count: SharedIncMetric, /// Number of RX rate limiter throttling events. pub rx_rate_limiter_throttled: SharedIncMetric, /// Number of events received on the associated tap. pub rx_tap_event_count: SharedIncMetric, /// Number of bytes received. pub rx_bytes_count: SharedIncMetric, /// Number of packets received. pub rx_packets_count: SharedIncMetric, /// Number of errors while receiving data. pub rx_fails: SharedIncMetric, /// Number of successful read operations while receiving data. pub rx_count: SharedIncMetric, /// Number of times reading from TAP failed. pub tap_read_fails: SharedIncMetric, /// Number of times writing to TAP failed. pub tap_write_fails: SharedIncMetric, /// Duration of all tap write operations. pub tap_write_agg: LatencyAggregateMetrics, /// Number of transmitted bytes. pub tx_bytes_count: SharedIncMetric, /// Number of malformed TX frames. pub tx_malformed_frames: SharedIncMetric, /// Number of errors while transmitting data. pub tx_fails: SharedIncMetric, /// Number of successful write operations while transmitting data. pub tx_count: SharedIncMetric, /// Number of transmitted packets. pub tx_packets_count: SharedIncMetric, /// Number of events associated with the transmitting queue. pub tx_queue_event_count: SharedIncMetric, /// Number of events associated with the rate limiter installed on the transmitting path. pub tx_rate_limiter_event_count: SharedIncMetric, /// Number of RX rate limiter throttling events. pub tx_rate_limiter_throttled: SharedIncMetric, /// Number of packets with a spoofed mac, sent by the guest. pub tx_spoofed_mac_count: SharedIncMetric, /// Number of remaining requests in the TX queue. pub tx_remaining_reqs_count: SharedIncMetric, } impl NetDeviceMetrics { /// Const default construction. pub fn new() -> Self { Self { tap_write_agg: LatencyAggregateMetrics::new(), ..Default::default() } } /// Net metrics are SharedIncMetric where the diff of current vs /// old is serialized i.e. serialize_u64(current-old). /// So to have the aggregate serialized in same way we need to /// fetch the diff of current vs old metrics and add it to the /// aggregate. pub fn aggregate(&mut self, other: &Self) { self.activate_fails.add(other.activate_fails.fetch_diff()); self.cfg_fails.add(other.cfg_fails.fetch_diff()); self.mac_address_updates .add(other.mac_address_updates.fetch_diff()); self.no_rx_avail_buffer .add(other.no_rx_avail_buffer.fetch_diff()); self.no_tx_avail_buffer .add(other.no_tx_avail_buffer.fetch_diff()); self.event_fails.add(other.event_fails.fetch_diff()); self.rx_queue_event_count .add(other.rx_queue_event_count.fetch_diff()); self.rx_event_rate_limiter_count .add(other.rx_event_rate_limiter_count.fetch_diff()); self.rx_rate_limiter_throttled .add(other.rx_rate_limiter_throttled.fetch_diff()); self.rx_tap_event_count .add(other.rx_tap_event_count.fetch_diff()); self.rx_bytes_count.add(other.rx_bytes_count.fetch_diff()); self.rx_packets_count .add(other.rx_packets_count.fetch_diff()); self.rx_fails.add(other.rx_fails.fetch_diff()); self.rx_count.add(other.rx_count.fetch_diff()); self.tap_read_fails.add(other.tap_read_fails.fetch_diff()); self.tap_write_fails.add(other.tap_write_fails.fetch_diff()); self.tap_write_agg .sum_us .add(other.tap_write_agg.sum_us.fetch_diff()); self.tx_bytes_count.add(other.tx_bytes_count.fetch_diff()); self.tx_malformed_frames .add(other.tx_malformed_frames.fetch_diff()); self.tx_fails.add(other.tx_fails.fetch_diff()); self.tx_count.add(other.tx_count.fetch_diff()); self.tx_packets_count .add(other.tx_packets_count.fetch_diff()); self.tx_queue_event_count .add(other.tx_queue_event_count.fetch_diff()); self.tx_rate_limiter_event_count .add(other.tx_rate_limiter_event_count.fetch_diff()); self.tx_rate_limiter_throttled .add(other.tx_rate_limiter_throttled.fetch_diff()); self.tx_spoofed_mac_count .add(other.tx_spoofed_mac_count.fetch_diff()); self.tx_remaining_reqs_count .add(other.tx_remaining_reqs_count.fetch_diff()); } } #[cfg(test)] pub mod tests { use super::*; #[test] fn test_max_net_dev_metrics() { // Note: this test has nothing to do with // Net structure or IRQs, this is just to allocate // metrics for max number of devices that system can have. // we have 5-23 irq for net devices so max 19 net devices. const MAX_NET_DEVICES: usize = 19; drop(METRICS.read().unwrap()); drop(METRICS.write().unwrap()); for i in 0..MAX_NET_DEVICES { let devn: String = format!("eth{}", i); NetMetricsPerDevice::alloc(devn.clone()); METRICS .read() .unwrap() .metrics .get(&devn) .unwrap() .activate_fails .inc(); METRICS .read() .unwrap() .metrics .get(&devn) .unwrap() .rx_bytes_count .add(10); METRICS .read() .unwrap() .metrics .get(&devn) .unwrap() .tx_bytes_count .add(5); } for i in 0..MAX_NET_DEVICES { let devn: String = format!("eth{}", i); assert!( METRICS .read() .unwrap() .metrics .get(&devn) .unwrap() .activate_fails .count() >= 1 ); assert!( METRICS .read() .unwrap() .metrics .get(&devn) .unwrap() .rx_bytes_count .count() >= 10 ); assert_eq!( METRICS .read() .unwrap() .metrics .get(&devn) .unwrap() .tx_bytes_count .count(), 5 ); } } #[test] fn test_signle_net_dev_metrics() { // Use eth0 so that we can check thread safety with the // `test_net_dev_metrics` which also uses the same name. let devn = "eth0"; drop(METRICS.read().unwrap()); drop(METRICS.write().unwrap()); NetMetricsPerDevice::alloc(String::from(devn)); METRICS.read().unwrap().metrics.get(devn).unwrap(); METRICS .read() .unwrap() .metrics .get(devn) .unwrap() .activate_fails .inc(); assert!( METRICS .read() .unwrap() .metrics .get(devn) .unwrap() .activate_fails .count() > 0, "{}", METRICS .read() .unwrap() .metrics .get(devn) .unwrap() .activate_fails .count() ); // we expect only 2 tests (this and test_max_net_dev_metrics) // to update activate_fails count for eth0. assert!( METRICS .read() .unwrap() .metrics .get(devn) .unwrap() .activate_fails .count() <= 2, "{}", METRICS .read() .unwrap() .metrics .get(devn) .unwrap() .activate_fails .count() ); METRICS .read() .unwrap() .metrics .get(devn) .unwrap() .activate_fails .inc(); METRICS .read() .unwrap() .metrics .get(devn) .unwrap() .rx_bytes_count .add(5); assert!( METRICS .read() .unwrap() .metrics .get(devn) .unwrap() .rx_bytes_count .count() >= 5 ); } } ================================================ FILE: src/vmm/src/devices/virtio/net/mod.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Implements a virtio network device. use std::io; /// Maximum size of the queue for network device. pub const NET_QUEUE_MAX_SIZE: u16 = 256; /// Maximum size of the frame buffers handled by this device. pub const MAX_BUFFER_SIZE: usize = 65562; /// The number of queues of the network device. pub const NET_NUM_QUEUES: usize = 2; pub const NET_QUEUE_SIZES: [u16; NET_NUM_QUEUES] = [NET_QUEUE_MAX_SIZE; NET_NUM_QUEUES]; /// The index of the rx queue from Net device queues/queues_evts vector. pub const RX_INDEX: usize = 0; /// The index of the tx queue from Net device queues/queues_evts vector. pub const TX_INDEX: usize = 1; pub mod device; mod event_handler; pub mod metrics; pub mod persist; mod tap; pub mod test_utils; mod generated; pub use tap::{Tap, TapError}; use vm_memory::VolatileMemoryError; pub use self::device::Net; use super::iovec::IoVecError; use crate::devices::virtio::queue::{InvalidAvailIdx, QueueError}; /// Enum representing the Net device queue types #[derive(Debug)] pub enum NetQueue { /// The RX queue Rx, /// The TX queue Tx, } /// Errors the network device can trigger. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum NetError { /// Open tap device failed: {0} TapOpen(TapError), /// Setting vnet header size failed: {0} TapSetVnetHdrSize(TapError), /// EventFd error: {0} EventFd(io::Error), /// IO error: {0} IO(io::Error), /// Error writing in guest memory: {0} GuestMemoryError(#[from] VolatileMemoryError), /// The VNET header is missing from the frame VnetHeaderMissing, /// IoVecBuffer(Mut) error: {0} IoVecError(#[from] IoVecError), /// virtio queue error: {0} QueueError(#[from] QueueError), /// {0} InvalidAvailIdx(#[from] InvalidAvailIdx), } ================================================ FILE: src/vmm/src/devices/virtio/net/persist.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the structures needed for saving/restoring net devices. use std::io; use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; use super::device::{Net, RxBuffers}; use super::{NET_NUM_QUEUES, NET_QUEUE_MAX_SIZE, RX_INDEX, TapError}; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDeviceType}; use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState}; use crate::devices::virtio::transport::VirtioInterrupt; use crate::mmds::data_store::Mmds; use crate::mmds::ns::MmdsNetworkStack; use crate::mmds::persist::MmdsNetworkStackState; use crate::rate_limiter::RateLimiter; use crate::rate_limiter::persist::RateLimiterState; use crate::snapshot::Persist; use crate::utils::net::mac::MacAddr; use crate::vstate::memory::GuestMemoryMmap; /// Information about the network config's that are saved /// at snapshot. #[derive(Debug, Default, Clone, Serialize, Deserialize)] pub struct NetConfigSpaceState { guest_mac: Option, } /// Information about the network device that are saved /// at snapshot. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NetState { pub id: String, pub tap_if_name: String, rx_rate_limiter_state: RateLimiterState, tx_rate_limiter_state: RateLimiterState, /// The associated MMDS network stack. pub mmds_ns: Option, config_space: NetConfigSpaceState, pub virtio_state: VirtioDeviceState, } /// Auxiliary structure for creating a device when resuming from a snapshot. #[derive(Debug)] pub struct NetConstructorArgs { /// Pointer to guest memory. pub mem: GuestMemoryMmap, /// Pointer to the MMDS data store. pub mmds: Option>>, } /// Errors triggered when trying to construct a network device at resume time. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum NetPersistError { /// Failed to create a network device: {0} CreateNet(#[from] super::NetError), /// Failed to create a rate limiter: {0} CreateRateLimiter(#[from] io::Error), /// Failed to re-create the virtio state (i.e queues etc): {0} VirtioState(#[from] VirtioStateError), /// Indicator that no MMDS is associated with this device. NoMmdsDataStore, /// Setting tap interface offload flags failed: {0} TapSetOffload(TapError), } impl Persist<'_> for Net { type State = NetState; type ConstructorArgs = NetConstructorArgs; type Error = NetPersistError; fn save(&self) -> Self::State { NetState { id: self.id.clone(), tap_if_name: self.iface_name(), rx_rate_limiter_state: self.rx_rate_limiter.save(), tx_rate_limiter_state: self.tx_rate_limiter.save(), mmds_ns: self.mmds_ns.as_ref().map(|mmds| mmds.save()), config_space: NetConfigSpaceState { guest_mac: self.guest_mac, }, virtio_state: VirtioDeviceState::from_device(self), } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { // RateLimiter::restore() can fail at creating a timerfd. let rx_rate_limiter = RateLimiter::restore((), &state.rx_rate_limiter_state)?; let tx_rate_limiter = RateLimiter::restore((), &state.tx_rate_limiter_state)?; let mut net = Net::new( state.id.clone(), &state.tap_if_name, state.config_space.guest_mac, rx_rate_limiter, tx_rate_limiter, )?; // We trust the MMIODeviceManager::restore to pass us an MMDS data store reference if // there is at least one net device having the MMDS NS present and/or the mmds version was // persisted in the snapshot. if let Some(mmds_ns) = &state.mmds_ns { // We're safe calling unwrap() to discard the error, as MmdsNetworkStack::restore() // always returns Ok. net.mmds_ns = Some( MmdsNetworkStack::restore( constructor_args .mmds .map_or_else(|| Err(NetPersistError::NoMmdsDataStore), Ok)?, mmds_ns, ) .unwrap(), ); } net.queues = state.virtio_state.build_queues_checked( &constructor_args.mem, VirtioDeviceType::Net, NET_NUM_QUEUES, NET_QUEUE_MAX_SIZE, )?; net.avail_features = state.virtio_state.avail_features; net.acked_features = state.virtio_state.acked_features; Ok(net) } } #[cfg(test)] mod tests { use super::*; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::net::test_utils::{default_net, default_net_no_mmds}; use crate::devices::virtio::test_utils::{default_interrupt, default_mem}; fn validate_save_and_restore(net: Net, mmds_ds: Option>>) { let guest_mem = default_mem(); let id; let tap_if_name; let has_mmds_ns; let allow_mmds_requests; let virtio_state; let serialized_data; // Create and save the net device. { let net_state = net.save(); serialized_data = bitcode::serialize(&net_state).unwrap(); // Save some fields that we want to check later. id = net.id.clone(); tap_if_name = net.iface_name(); has_mmds_ns = net.mmds_ns.is_some(); allow_mmds_requests = has_mmds_ns && mmds_ds.is_some(); virtio_state = VirtioDeviceState::from_device(&net); } // Drop the initial net device so that we don't get an error when trying to recreate the // TAP device. drop(net); { // Deserialize and restore the net device. let restored_state = bitcode::deserialize(&serialized_data).unwrap(); match Net::restore( NetConstructorArgs { mem: guest_mem, mmds: mmds_ds, }, &restored_state, ) { Ok(restored_net) => { // Test that virtio specific fields are the same. assert_eq!(restored_net.device_type(), VirtioDeviceType::Net); assert_eq!(restored_net.avail_features(), virtio_state.avail_features); assert_eq!(restored_net.acked_features(), virtio_state.acked_features); assert_eq!(restored_net.is_activated(), virtio_state.activated); // Test that net specific fields are the same. assert_eq!(&restored_net.id, &id); assert_eq!(&restored_net.iface_name(), &tap_if_name); assert_eq!(restored_net.mmds_ns.is_some(), allow_mmds_requests); assert_eq!(restored_net.rx_rate_limiter, RateLimiter::default()); assert_eq!(restored_net.tx_rate_limiter, RateLimiter::default()); } Err(NetPersistError::NoMmdsDataStore) => { assert!(has_mmds_ns && !allow_mmds_requests) } _ => unreachable!(), } } } #[test] fn test_persistence() { let mmds = Some(Arc::new(Mutex::new(Mmds::default()))); validate_save_and_restore(default_net(), mmds.as_ref().cloned()); validate_save_and_restore(default_net_no_mmds(), None); // Check what happens if the MMIODeviceManager gives us the reference to the MMDS // data store even if this device does not have mmds ns configured. // The restore should be conservative and not configure the mmds ns. validate_save_and_restore(default_net_no_mmds(), mmds); // Check what happens if the MMIODeviceManager does not give us the reference to the MMDS // data store. This will return an error. validate_save_and_restore(default_net(), None); } } ================================================ FILE: src/vmm/src/devices/virtio/net/tap.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::fmt::{self, Debug}; use std::fs::File; use std::io::Error as IoError; use std::os::raw::*; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val}; use vmm_sys_util::ioctl_iow_nr; use crate::devices::virtio::iovec::IoVecBuffer; use crate::devices::virtio::net::generated; // As defined in the Linux UAPI: // https://elixir.bootlin.com/linux/v4.17/source/include/uapi/linux/if.h#L33 const IFACE_NAME_MAX_LEN: usize = 16; /// List of errors the tap implementation can throw. #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum TapError { /// Couldn't open /dev/net/tun: {0} OpenTun(IoError), /// Invalid interface name InvalidIfname, /// Error while creating ifreq structure: {0}. Invalid TUN/TAP Backend provided by {1}. Check our documentation on setting up the network devices. IfreqExecuteError(IoError, String), /// Error while setting the offload flags: {0} SetOffloadFlags(IoError), /// Error while setting size of the vnet header: {0} SetSizeOfVnetHdr(IoError), } const TUNTAP: ::std::os::raw::c_uint = 84; ioctl_iow_nr!(TUNSETIFF, TUNTAP, 202, ::std::os::raw::c_int); ioctl_iow_nr!(TUNSETOFFLOAD, TUNTAP, 208, ::std::os::raw::c_uint); ioctl_iow_nr!(TUNSETVNETHDRSZ, TUNTAP, 216, ::std::os::raw::c_int); /// Handle for a network tap interface. /// /// For now, this simply wraps the file descriptor for the tap device so methods /// can run ioctls on the interface. The tap interface fd will be closed when /// Tap goes out of scope, and the kernel will clean up the interface automatically. #[derive(Debug)] pub struct Tap { tap_file: File, pub(crate) if_name: [u8; IFACE_NAME_MAX_LEN], } // Returns a byte vector representing the contents of a null terminated C string which // contains if_name. fn build_terminated_if_name(if_name: &str) -> Result<[u8; IFACE_NAME_MAX_LEN], TapError> { // Convert the string slice to bytes, and shadow the variable, // since we no longer need the &str version. let if_name = if_name.as_bytes(); if if_name.len() >= IFACE_NAME_MAX_LEN { return Err(TapError::InvalidIfname); } let mut terminated_if_name = [b'\0'; IFACE_NAME_MAX_LEN]; terminated_if_name[..if_name.len()].copy_from_slice(if_name); Ok(terminated_if_name) } #[derive(Copy, Clone)] pub struct IfReqBuilder(generated::ifreq); impl fmt::Debug for IfReqBuilder { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "IfReqBuilder {{ .. }}") } } impl IfReqBuilder { pub fn new() -> Self { Self(Default::default()) } pub fn if_name(mut self, if_name: &[u8; IFACE_NAME_MAX_LEN]) -> Self { // SAFETY: Since we don't call as_mut on the same union field more than once, this block is // safe. let ifrn_name = unsafe { self.0.ifr_ifrn.ifrn_name.as_mut() }; ifrn_name.copy_from_slice(if_name.as_ref()); self } pub(crate) fn flags(mut self, flags: i16) -> Self { self.0.ifr_ifru.ifru_flags = flags; self } pub(crate) fn execute( mut self, socket: &F, ioctl: u64, ) -> std::io::Result { // SAFETY: ioctl is safe. Called with a valid socket fd, and we check the return. if unsafe { ioctl_with_mut_ref(socket, ioctl, &mut self.0) } < 0 { return Err(IoError::last_os_error()); } Ok(self.0) } } impl Tap { /// Create a TUN/TAP device given the interface name. /// # Arguments /// /// * `if_name` - the name of the interface. pub fn open_named(if_name: &str) -> Result { // SAFETY: Open calls are safe because we give a constant null-terminated // string and verify the result. let fd = unsafe { libc::open( c"/dev/net/tun".as_ptr(), libc::O_RDWR | libc::O_NONBLOCK | libc::O_CLOEXEC, ) }; if fd < 0 { return Err(TapError::OpenTun(IoError::last_os_error())); } // SAFETY: We just checked that the fd is valid. let tuntap = unsafe { File::from_raw_fd(fd) }; let terminated_if_name = build_terminated_if_name(if_name)?; let ifreq = IfReqBuilder::new() .if_name(&terminated_if_name) .flags( i16::try_from(generated::IFF_TAP | generated::IFF_NO_PI | generated::IFF_VNET_HDR) .unwrap(), ) .execute(&tuntap, TUNSETIFF()) .map_err(|io_error| TapError::IfreqExecuteError(io_error, if_name.to_owned()))?; Ok(Tap { tap_file: tuntap, // SAFETY: Safe since only the name is accessed, and it's cloned out. if_name: unsafe { ifreq.ifr_ifrn.ifrn_name }, }) } /// Retrieve the interface's name as a str. pub fn if_name_as_str(&self) -> &str { let len = self .if_name .iter() .position(|x| *x == 0) .unwrap_or(IFACE_NAME_MAX_LEN); std::str::from_utf8(&self.if_name[..len]).unwrap_or("") } /// Set the offload flags for the tap interface. pub fn set_offload(&self, flags: c_uint) -> Result<(), TapError> { // SAFETY: ioctl is safe. Called with a valid tap fd, and we check the return. if unsafe { ioctl_with_val(&self.tap_file, TUNSETOFFLOAD(), c_ulong::from(flags)) } < 0 { return Err(TapError::SetOffloadFlags(IoError::last_os_error())); } Ok(()) } /// Set the size of the vnet hdr. pub fn set_vnet_hdr_size(&self, size: c_int) -> Result<(), TapError> { // SAFETY: ioctl is safe. Called with a valid tap fd, and we check the return. if unsafe { ioctl_with_ref(&self.tap_file, TUNSETVNETHDRSZ(), &size) } < 0 { return Err(TapError::SetSizeOfVnetHdr(IoError::last_os_error())); } Ok(()) } /// Write an `IoVecBuffer` to tap pub(crate) fn write_iovec(&mut self, buffer: &IoVecBuffer) -> Result { let iovcnt = i32::try_from(buffer.iovec_count()).unwrap(); let iov = buffer.as_iovec_ptr(); // SAFETY: `writev` is safe. Called with a valid tap fd, the iovec pointer and length // is provide by the `IoVecBuffer` implementation and we check the return value. let ret = unsafe { libc::writev(self.tap_file.as_raw_fd(), iov, iovcnt) }; if ret == -1 { return Err(IoError::last_os_error()); } Ok(usize::try_from(ret).unwrap()) } /// Read from tap to an `IoVecBufferMut` pub(crate) fn read_iovec(&mut self, buffer: &mut [libc::iovec]) -> Result { let iov = buffer.as_mut_ptr(); let iovcnt = buffer.len().try_into().unwrap(); // SAFETY: `readv` is safe. Called with a valid tap fd, the iovec pointer and length // is provide by the `IoVecBufferMut` implementation and we check the return value. let ret = unsafe { libc::readv(self.tap_file.as_raw_fd(), iov, iovcnt) }; if ret == -1 { return Err(IoError::last_os_error()); } Ok(usize::try_from(ret).unwrap()) } } impl AsRawFd for Tap { fn as_raw_fd(&self) -> RawFd { self.tap_file.as_raw_fd() } } #[cfg(test)] pub mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::ffi::OsStrExt; use super::*; use crate::devices::virtio::net::generated; use crate::devices::virtio::net::test_utils::{TapTrafficSimulator, enable, if_index}; // Redefine `IoVecBufferMut` with specific length. Otherwise // Rust will not know what to do. type IoVecBufferMut = crate::devices::virtio::iovec::IoVecBufferMut<256>; // The size of the virtio net header const VNET_HDR_SIZE: usize = 10; const PAYLOAD_SIZE: usize = 512; #[test] fn test_tap_name() { // Sanity check that the assumed max iface name length is correct. assert_eq!(IFACE_NAME_MAX_LEN, unsafe { generated::ifreq__bindgen_ty_1::default().ifrn_name.len() }); // Empty name - The tap should be named "tapN" by default, where N is some number // assigned by the kernel (e.g. "tap0", "tap1", etc.). We cannot assert a specific // number because other tests running in parallel may have already created tap devices. let tap = Tap::open_named("").unwrap(); let name = tap.if_name_as_str(); assert!( name.starts_with("tap") && name.len() > 3 && name[3..].chars().all(|c| c.is_ascii_digit()), "Expected tap name matching 'tapN', got '{name}'" ); // Test using '%d' to have the kernel assign an unused name, // and that we correctly copy back that generated name let tap = Tap::open_named("tap%d").unwrap(); // '%d' should be replaced with _some_ number, although we don't know what was the next // available one. Just assert that '%d' definitely isn't there anymore. assert_ne!(b"tap%d", &tap.if_name[..5]); // 16 characters - too long. let name = "a123456789abcdef"; match Tap::open_named(name) { Err(TapError::InvalidIfname) => (), _ => panic!("Expected Error::InvalidIfname"), }; // 15 characters - OK. let name = "a123456789abcde"; let tap = Tap::open_named(name).unwrap(); assert_eq!(&format!("{}\0", name).as_bytes(), &tap.if_name); assert_eq!(name, tap.if_name_as_str()); } #[test] fn test_tap_exclusive_open() { let _tap1 = Tap::open_named("exclusivetap").unwrap(); // Opening same tap device a second time should not be permitted. Tap::open_named("exclusivetap").unwrap_err(); } #[test] fn test_set_options() { // This line will fail to provide an initialized FD if the test is not run as root. let tap = Tap::open_named("").unwrap(); tap.set_vnet_hdr_size(16).unwrap(); tap.set_offload(0).unwrap(); } #[test] fn test_raw_fd() { let tap = Tap::open_named("").unwrap(); assert_eq!(tap.as_raw_fd(), tap.tap_file.as_raw_fd()); } #[test] fn test_write_iovec() { let mut tap = Tap::open_named("").unwrap(); enable(&tap); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&tap)); let mut fragment1 = vmm_sys_util::rand::rand_bytes(PAYLOAD_SIZE); fragment1.as_mut_slice()[..generated::ETH_HLEN as usize] .copy_from_slice(&[0; generated::ETH_HLEN as usize]); let fragment2 = vmm_sys_util::rand::rand_bytes(PAYLOAD_SIZE); let fragment3 = vmm_sys_util::rand::rand_bytes(PAYLOAD_SIZE); let scattered = IoVecBuffer::from(vec![ fragment1.as_slice(), fragment2.as_slice(), fragment3.as_slice(), ]); let num_bytes = tap.write_iovec(&scattered).unwrap(); assert_eq!(num_bytes, scattered.len() as usize); let mut read_buf = vec![0u8; scattered.len() as usize]; assert!(tap_traffic_simulator.pop_rx_packet(&mut read_buf)); assert_eq!( &read_buf[..PAYLOAD_SIZE - VNET_HDR_SIZE], &fragment1[VNET_HDR_SIZE..] ); assert_eq!( &read_buf[PAYLOAD_SIZE - VNET_HDR_SIZE..2 * PAYLOAD_SIZE - VNET_HDR_SIZE], fragment2 ); assert_eq!( &read_buf[2 * PAYLOAD_SIZE - VNET_HDR_SIZE..3 * PAYLOAD_SIZE - VNET_HDR_SIZE], fragment3 ); } #[test] fn test_read_iovec() { let mut tap = Tap::open_named("").unwrap(); enable(&tap); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&tap)); let mut buff1 = vec![0; PAYLOAD_SIZE + VNET_HDR_SIZE]; let mut buff2 = vec![0; 2 * PAYLOAD_SIZE]; let mut rx_buffers = IoVecBufferMut::from(vec![buff1.as_mut_slice(), buff2.as_mut_slice()]); let packet = vmm_sys_util::rand::rand_alphanumerics(2 * PAYLOAD_SIZE); tap_traffic_simulator.push_tx_packet(packet.as_bytes()); assert_eq!( tap.read_iovec(rx_buffers.as_iovec_mut_slice()).unwrap(), 2 * PAYLOAD_SIZE + VNET_HDR_SIZE ); assert_eq!(&buff1[VNET_HDR_SIZE..], &packet.as_bytes()[..PAYLOAD_SIZE]); assert_eq!(&buff2[..PAYLOAD_SIZE], &packet.as_bytes()[PAYLOAD_SIZE..]); assert_eq!(&buff2[PAYLOAD_SIZE..], &vec![0; PAYLOAD_SIZE]) } } ================================================ FILE: src/vmm/src/devices/virtio/net/test_utils.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![doc(hidden)] use std::fs::File; use std::mem; use std::os::raw::c_ulong; use std::os::unix::io::{AsRawFd, FromRawFd}; use std::process::Command; use std::str::FromStr; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; use crate::devices::virtio::net::Net; #[cfg(test)] use crate::devices::virtio::net::device::vnet_hdr_len; use crate::devices::virtio::net::generated::net_device_flags; use crate::devices::virtio::net::tap::{IfReqBuilder, Tap}; use crate::devices::virtio::queue::Queue; use crate::devices::virtio::test_utils::VirtQueue; use crate::mmds::data_store::Mmds; use crate::mmds::ns::MmdsNetworkStack; use crate::rate_limiter::RateLimiter; use crate::utils::net::mac::MacAddr; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; static NEXT_INDEX: AtomicUsize = AtomicUsize::new(1); pub fn default_net() -> Net { let next_tap = NEXT_INDEX.fetch_add(1, Ordering::SeqCst); // Id is the firecracker-facing identifier, e.g. local to the FC process. We thus do not need to // make sure it is globally unique let tap_device_id = format!("net-device{}", next_tap); // This is the device name on the host, and thus needs to be unique between all firecracker // processes. We cannot use the above counter to ensure this uniqueness (as it is // per-process). Thus, ask the kernel to assign us a number. let tap_if_name = "net-device%d"; let guest_mac = default_guest_mac(); let mut net = Net::new( tap_device_id, tap_if_name, Some(guest_mac), RateLimiter::default(), RateLimiter::default(), ) .unwrap(); net.configure_mmds_network_stack( MmdsNetworkStack::default_ipv4_addr(), Arc::new(Mutex::new(Mmds::default())), ); enable(&net.tap); net } pub fn default_net_no_mmds() -> Net { let next_tap = NEXT_INDEX.fetch_add(1, Ordering::SeqCst); let tap_device_id = format!("net-device{}", next_tap); let guest_mac = default_guest_mac(); let net = Net::new( tap_device_id, "net-device%d", Some(guest_mac), RateLimiter::default(), RateLimiter::default(), ) .unwrap(); enable(&net.tap); net } #[derive(Debug)] pub enum NetQueue { Rx, Tx, } #[derive(Debug)] pub enum NetEvent { RxQueue, RxRateLimiter, Tap, TxQueue, TxRateLimiter, } #[derive(Debug)] pub struct TapTrafficSimulator { socket: File, send_addr: libc::sockaddr_ll, } impl TapTrafficSimulator { pub fn new(tap_index: i32) -> Self { // Create sockaddr_ll struct. // SAFETY: sockaddr_storage has no invariants and can be safely zeroed. let mut storage: libc::sockaddr_storage = unsafe { mem::zeroed() }; let send_addr_ptr = &mut storage as *mut libc::sockaddr_storage; // SAFETY: `sock_addr` is a valid pointer and safe to dereference. unsafe { let sock_addr: *mut libc::sockaddr_ll = send_addr_ptr.cast::(); (*sock_addr).sll_family = libc::sa_family_t::try_from(libc::AF_PACKET).unwrap(); (*sock_addr).sll_protocol = u16::try_from(libc::ETH_P_ALL).unwrap().to_be(); (*sock_addr).sll_halen = u8::try_from(libc::ETH_ALEN).unwrap(); (*sock_addr).sll_ifindex = tap_index; } // Bind socket to tap interface. let socket = create_socket(); // SAFETY: Call is safe because parameters are valid. let ret = unsafe { libc::bind( socket.as_raw_fd(), send_addr_ptr.cast(), libc::socklen_t::try_from(mem::size_of::()).unwrap(), ) }; if ret == -1 { panic!("Can't create TapChannel"); } // Enable nonblocking // SAFETY: Call is safe because parameters are valid. let ret = unsafe { libc::fcntl(socket.as_raw_fd(), libc::F_SETFL, libc::O_NONBLOCK) }; if ret == -1 { panic!("Couldn't make TapChannel non-blocking"); } Self { socket, // SAFETY: size_of::() is greater than // sizeof::(), so to return an owned value of sockaddr_ll // from the stack-local libc::sockaddr_storage that we have, we need to // 1. Create a zeroed out libc::sockaddr_ll, // 2. Copy over the first size_of::() bytes into the struct we want // to return // We cannot simply return "*(send_addr_ptr as *const libc::sockaddr_ll)", as this // would return a reference to a variable that lives in the stack frame of the current // function, and which will no longer be valid after returning. // transmute_copy does all this for us. // Note that this is how these structures are intended to be used in C. send_addr: unsafe { mem::transmute_copy(&storage) }, } } pub fn push_tx_packet(&self, buf: &[u8]) { // SAFETY: The call is safe since the parameters are valid. let res = unsafe { libc::sendto( self.socket.as_raw_fd(), buf.as_ptr().cast(), buf.len(), 0, (&self.send_addr as *const libc::sockaddr_ll).cast(), libc::socklen_t::try_from(mem::size_of::()).unwrap(), ) }; if res == -1 { panic!("Can't inject tx_packet"); } } pub fn pop_rx_packet(&self, buf: &mut [u8]) -> bool { // SAFETY: The call is safe since the parameters are valid. let ret = unsafe { libc::recvfrom( self.socket.as_raw_fd(), buf.as_ptr() as *mut _, buf.len(), 0, (&mut mem::zeroed() as *mut libc::sockaddr_storage).cast(), &mut libc::socklen_t::try_from(mem::size_of::()).unwrap(), ) }; if ret == -1 { return false; } true } } pub fn create_socket() -> File { // SAFETY: This is safe since we check the return value. let socket = unsafe { libc::socket(libc::AF_PACKET, libc::SOCK_RAW, libc::ETH_P_ALL.to_be()) }; if socket < 0 { panic!("Unable to create tap socket"); } // SAFETY: This is safe; nothing else will use or hold onto the raw socket fd. unsafe { File::from_raw_fd(socket) } } // Returns handles to virtio queues creation/activation and manipulation. pub fn virtqueues(mem: &GuestMemoryMmap) -> (VirtQueue<'_>, VirtQueue<'_>) { let rxq = VirtQueue::new(GuestAddress(0), mem, 16); let txq = VirtQueue::new(GuestAddress(0x1000), mem, 16); assert!(rxq.end().0 < txq.start().0); (rxq, txq) } pub fn if_index(tap: &Tap) -> i32 { let sock = create_socket(); let ifreq = IfReqBuilder::new() .if_name(&tap.if_name) .execute( &sock, c_ulong::from(super::generated::sockios::SIOCGIFINDEX), ) .unwrap(); // SAFETY: Using this union variant is safe since `SIOCGIFINDEX` returns an integer. unsafe { ifreq.ifr_ifru.ifru_ivalue } } /// Enable the tap interface. pub fn enable(tap: &Tap) { // Disable IPv6 router advertisement requests Command::new("sh") .arg("-c") .arg(format!( "echo 0 > /proc/sys/net/ipv6/conf/{}/accept_ra", tap.if_name_as_str() )) .output() .unwrap(); let sock = create_socket(); IfReqBuilder::new() .if_name(&tap.if_name) .flags( (net_device_flags::IFF_UP | net_device_flags::IFF_RUNNING | net_device_flags::IFF_NOARP) .try_into() .unwrap(), ) .execute( &sock, c_ulong::from(super::generated::sockios::SIOCSIFFLAGS), ) .unwrap(); } #[cfg(test)] pub(crate) fn inject_tap_tx_frame(net: &Net, len: usize) -> Vec { use std::os::unix::ffi::OsStrExt; assert!(len >= vnet_hdr_len()); let tap_traffic_simulator = TapTrafficSimulator::new(if_index(&net.tap)); let mut frame = vmm_sys_util::rand::rand_alphanumerics(len - vnet_hdr_len()) .as_bytes() .to_vec(); tap_traffic_simulator.push_tx_packet(&frame); frame.splice(0..0, vec![b'\0'; vnet_hdr_len()]); frame } pub fn default_guest_mac() -> MacAddr { MacAddr::from_str("11:22:33:44:55:66").unwrap() } pub fn set_mac(net: &mut Net, mac: MacAddr) { net.guest_mac = Some(mac); net.config_space.guest_mac = mac; } // Assigns "guest virtio driver" activated queues to the net device. pub fn assign_queues(net: &mut Net, rxq: Queue, txq: Queue) { net.queues.clear(); net.queues.push(rxq); net.queues.push(txq); } #[cfg(test)] #[allow(clippy::cast_possible_truncation)] #[allow(clippy::undocumented_unsafe_blocks)] pub mod test { use std::os::unix::ffi::OsStrExt; use std::sync::{Arc, Mutex, MutexGuard}; use std::{cmp, fmt}; use event_manager::{EventManager, SubscriberId, SubscriberOps}; use crate::check_metric_after_block; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::net::device::vnet_hdr_len; use crate::devices::virtio::net::generated::ETH_HLEN; use crate::devices::virtio::net::test_utils::{ NetEvent, NetQueue, assign_queues, default_net, inject_tap_tx_frame, }; use crate::devices::virtio::net::{MAX_BUFFER_SIZE, Net, RX_INDEX, TX_INDEX}; use crate::devices::virtio::queue::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; use crate::devices::virtio::test_utils::{VirtQueue, VirtqDesc, default_interrupt}; use crate::devices::virtio::transport::VirtioInterruptType; use crate::logger::IncMetric; use crate::vstate::memory::{Address, Bytes, GuestAddress, GuestMemoryMmap}; pub struct TestHelper<'a> { pub event_manager: EventManager>>, pub subscriber_id: SubscriberId, pub net: Arc>, pub mem: &'a GuestMemoryMmap, pub rxq: VirtQueue<'a>, pub txq: VirtQueue<'a>, } impl fmt::Debug for TestHelper<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("TestHelper") .field("event_manager", &"?") .field("subscriber_id", &self.subscriber_id) .field("net", &self.net) .field("mem", &self.mem) .field("rxq", &self.rxq) .field("txq", &self.txq) .finish() } } impl<'a> TestHelper<'a> { const QUEUE_SIZE: u16 = 16; pub fn get_default(mem: &'a GuestMemoryMmap) -> TestHelper<'a> { let mut event_manager = EventManager::new().unwrap(); let mut net = default_net(); let rxq = VirtQueue::new(GuestAddress(0), mem, Self::QUEUE_SIZE); let txq = VirtQueue::new( rxq.end().unchecked_align_up(VirtqDesc::ALIGNMENT), mem, Self::QUEUE_SIZE, ); assign_queues(&mut net, rxq.create_queue(), txq.create_queue()); let net = Arc::new(Mutex::new(net)); let subscriber_id = event_manager.add_subscriber(net.clone()); Self { event_manager, subscriber_id, net, mem, rxq, txq, } } pub fn net(&mut self) -> MutexGuard<'_, Net> { self.net.lock().unwrap() } pub fn activate_net(&mut self) { let interrupt = default_interrupt(); self.net .lock() .unwrap() .activate(self.mem.clone(), interrupt) .unwrap(); // Process the activate event. let ev_count = self.event_manager.run_with_timeout(100).unwrap(); assert_eq!(ev_count, 1); } pub fn simulate_event(&mut self, event: NetEvent) { match event { NetEvent::RxQueue => self.net().process_rx_queue_event(), NetEvent::RxRateLimiter => self.net().process_rx_rate_limiter_event(), NetEvent::Tap => self.net().process_tap_rx_event(), NetEvent::TxQueue => self.net().process_tx_queue_event(), NetEvent::TxRateLimiter => self.net().process_tx_rate_limiter_event(), }; } pub fn data_addr(&self) -> u64 { self.txq.end().raw_value() } pub fn add_desc_chain( &mut self, queue: NetQueue, addr_offset: u64, desc_list: &[(u16, u32, u16)], ) { // Get queue and event_fd. let net = self.net.lock().unwrap(); let (queue, event_fd) = match queue { NetQueue::Rx => (&self.rxq, &net.queue_evts[RX_INDEX]), NetQueue::Tx => (&self.txq, &net.queue_evts[TX_INDEX]), }; // Create the descriptor chain. let mut iter = desc_list.iter().peekable(); let mut addr = self.data_addr() + addr_offset; while let Some(&(index, len, flags)) = iter.next() { let desc = &queue.dtable[index as usize]; desc.set(addr, len, flags, 0); if let Some(&&(next_index, _, _)) = iter.peek() { desc.flags.set(flags | VIRTQ_DESC_F_NEXT); desc.next.set(next_index); } addr += u64::from(len); // Add small random gaps between descriptor addresses in order to make sure we // don't blindly read contiguous memory. addr += u64::from(vmm_sys_util::rand::xor_pseudo_rng_u32()) % 10; } // Mark the chain as available. if let Some(&(index, _, _)) = desc_list.first() { let ring_index = queue.avail.idx.get(); queue.avail.ring[ring_index as usize].set(index); queue.avail.idx.set(ring_index + 1); } event_fd.write(1).unwrap(); } /// Generate a tap frame of `frame_len` and check that it is not read and /// the descriptor chain has been discarded pub fn check_rx_discarded_buffer(&mut self, frame_len: usize) -> Vec { let old_used_descriptors = self.net().rx_buffer.used_descriptors; // Inject frame to tap and run epoll. let frame = inject_tap_tx_frame(&self.net(), frame_len); check_metric_after_block!( self.net().metrics.rx_packets_count, 0, self.event_manager.run_with_timeout(100).unwrap() ); // Check that the descriptor chain has been discarded. assert_eq!( self.net().rx_buffer.used_descriptors, old_used_descriptors + 1 ); assert!( self.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); frame } /// Check that after adding a valid Rx queue descriptor chain a previously deferred frame /// is eventually received by the guest pub fn check_rx_queue_resume(&mut self, expected_frame: &[u8]) { // Need to call this to flush all previous frame // and advance RX queue. self.net().finish_frame(); let used_idx = self.rxq.used.idx.get(); // Add a valid Rx avail descriptor chain and run epoll. self.add_desc_chain( NetQueue::Rx, 0, &[(0, MAX_BUFFER_SIZE as u32, VIRTQ_DESC_F_WRITE)], ); check_metric_after_block!( self.net().metrics.rx_packets_count, 1, self.event_manager.run_with_timeout(100).unwrap() ); // Check that the expected frame was sent to the Rx queue eventually. assert_eq!(self.rxq.used.idx.get(), used_idx + 1); assert!( self.net() .interrupt_trigger() .has_pending_interrupt(VirtioInterruptType::Queue(RX_INDEX as u16)) ); self.rxq .check_used_elem(used_idx, 0, expected_frame.len().try_into().unwrap()); self.rxq.dtable[0].check_data(expected_frame); } // Generates a frame of `frame_len` and writes it to the provided descriptor chain. // Doesn't generate an error if the descriptor chain is longer than `frame_len`. pub fn write_tx_frame(&self, desc_list: &[(u16, u32, u16)], frame_len: usize) -> Vec { let mut frame = vmm_sys_util::rand::rand_alphanumerics(frame_len) .as_bytes() .to_vec(); let prefix_len = vnet_hdr_len() + ETH_HLEN as usize; frame.splice(..prefix_len, vec![0; prefix_len]); let mut frame_slice = frame.as_slice(); for &(index, len, _) in desc_list { let chunk_size = cmp::min(frame_slice.len(), len as usize); self.mem .write_slice( &frame_slice[..chunk_size], GuestAddress::new(self.txq.dtable[index as usize].addr.get()), ) .unwrap(); frame_slice = &frame_slice[chunk_size..]; } frame } } } ================================================ FILE: src/vmm/src/devices/virtio/persist.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the structures needed for saving/restoring Virtio primitives. use std::num::Wrapping; use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; use super::queue::{InvalidAvailIdx, QueueError}; use super::transport::mmio::IrqTrigger; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use crate::devices::virtio::queue::Queue; use crate::devices::virtio::transport::mmio::MmioTransport; use crate::snapshot::Persist; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; /// Errors thrown during restoring virtio state. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum PersistError { /// Snapshot state contains invalid queue info. InvalidInput, /// Could not restore queue: {0} QueueConstruction(QueueError), /// {0} InvalidAvailIdx(#[from] InvalidAvailIdx), } /// Queue information saved in snapshot. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct QueueState { /// The maximal size in elements offered by the device max_size: u16, /// The queue size in elements the driver selected size: u16, /// Indicates if the queue is finished with configuration ready: bool, /// Guest physical address of the descriptor table desc_table: u64, /// Guest physical address of the available ring avail_ring: u64, /// Guest physical address of the used ring used_ring: u64, next_avail: Wrapping, next_used: Wrapping, /// The number of added used buffers since last guest kick num_added: Wrapping, } /// Auxiliary structure for restoring queues. #[derive(Debug, Clone)] pub struct QueueConstructorArgs { /// Pointer to guest memory. pub mem: GuestMemoryMmap, /// Is device this queue belong to activated pub is_activated: bool, } impl Persist<'_> for Queue { type State = QueueState; type ConstructorArgs = QueueConstructorArgs; type Error = QueueError; fn save(&self) -> Self::State { QueueState { max_size: self.max_size, size: self.size, ready: self.ready, desc_table: self.desc_table_address.0, avail_ring: self.avail_ring_address.0, used_ring: self.used_ring_address.0, next_avail: self.next_avail, next_used: self.next_used, num_added: self.num_added, } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let mut queue = Queue { max_size: state.max_size, size: state.size, ready: state.ready, desc_table_address: GuestAddress(state.desc_table), avail_ring_address: GuestAddress(state.avail_ring), used_ring_address: GuestAddress(state.used_ring), desc_table_ptr: std::ptr::null(), avail_ring_ptr: std::ptr::null_mut(), used_ring_ptr: std::ptr::null_mut(), next_avail: state.next_avail, next_used: state.next_used, uses_notif_suppression: false, num_added: state.num_added, }; if constructor_args.is_activated { queue.initialize(&constructor_args.mem)?; } Ok(queue) } } /// State of a VirtioDevice. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct VirtioDeviceState { /// Device type. pub device_type: VirtioDeviceType, /// Available virtio features. pub avail_features: u64, /// Negotiated virtio features. pub acked_features: u64, /// List of queues. pub queues: Vec, /// Flag for activated status. pub activated: bool, } impl VirtioDeviceState { /// Construct the virtio state of a device. pub fn from_device(device: &dyn VirtioDevice) -> Self { VirtioDeviceState { device_type: device.device_type(), avail_features: device.avail_features(), acked_features: device.acked_features(), queues: device.queues().iter().map(Persist::save).collect(), activated: device.is_activated(), } } /// Does sanity checking on the `self` state against expected values /// and builds queues from state. pub fn build_queues_checked( &self, mem: &GuestMemoryMmap, expected_device_type: VirtioDeviceType, expected_num_queues: usize, expected_queue_max_size: u16, ) -> Result, PersistError> { // Sanity check: // - right device type, // - acked features is a subset of available ones, // - right number of queues, if self.device_type != expected_device_type || (self.acked_features & !self.avail_features) != 0 || self.queues.len() != expected_num_queues { return Err(PersistError::InvalidInput); } let uses_notif_suppression = (self.acked_features & (1u64 << VIRTIO_RING_F_EVENT_IDX)) != 0; let queue_construction_args = QueueConstructorArgs { mem: mem.clone(), is_activated: self.activated, }; let queues: Vec = self .queues .iter() .map(|queue_state| { Queue::restore(queue_construction_args.clone(), queue_state) .map(|mut queue| { if uses_notif_suppression { queue.enable_notif_suppression(); } queue }) .map_err(PersistError::QueueConstruction) }) .collect::>()?; for q in &queues { // Sanity check queue size and queue max size. if q.max_size != expected_queue_max_size { return Err(PersistError::InvalidInput); } } Ok(queues) } } /// Transport information saved in snapshot. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct MmioTransportState { // The register where feature bits are stored. features_select: u32, // The register where features page is selected. acked_features_select: u32, queue_select: u32, device_status: u32, config_generation: u32, interrupt_status: u32, } /// Auxiliary structure for initializing the transport when resuming from a snapshot. #[derive(Debug)] pub struct MmioTransportConstructorArgs { /// Pointer to guest memory. pub mem: GuestMemoryMmap, /// Interrupt to use for the device pub interrupt: Arc, /// Device associated with the current MMIO state. pub device: Arc>, /// Is device backed by vhost-user. pub is_vhost_user: bool, } impl Persist<'_> for MmioTransport { type State = MmioTransportState; type ConstructorArgs = MmioTransportConstructorArgs; type Error = (); fn save(&self) -> Self::State { MmioTransportState { features_select: self.features_select, acked_features_select: self.acked_features_select, queue_select: self.queue_select, device_status: self.device_status, config_generation: self.config_generation, interrupt_status: self.interrupt.irq_status.load(Ordering::SeqCst), } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let mut transport = MmioTransport::new( constructor_args.mem, constructor_args.interrupt, constructor_args.device, constructor_args.is_vhost_user, ); transport.features_select = state.features_select; transport.acked_features_select = state.acked_features_select; transport.queue_select = state.queue_select; transport.device_status = state.device_status; transport.config_generation = state.config_generation; transport .interrupt .irq_status .store(state.interrupt_status, Ordering::SeqCst); Ok(transport) } } #[cfg(test)] mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; use crate::devices::virtio::block::virtio::VirtioBlock; use crate::devices::virtio::block::virtio::device::FileEngineType; use crate::devices::virtio::block::virtio::test_utils::default_block_with_path; use crate::devices::virtio::net::Net; use crate::devices::virtio::net::test_utils::default_net; use crate::devices::virtio::test_utils::default_mem; use crate::devices::virtio::transport::mmio::tests::DummyDevice; use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; const DEFAULT_QUEUE_MAX_SIZE: u16 = 256; impl Default for QueueState { fn default() -> QueueState { QueueState { max_size: DEFAULT_QUEUE_MAX_SIZE, size: DEFAULT_QUEUE_MAX_SIZE, ready: false, desc_table: 0, avail_ring: 0, used_ring: 0, next_avail: Wrapping(0), next_used: Wrapping(0), num_added: Wrapping(0), } } } #[test] fn test_virtiodev_sanity_checks() { let max_size = DEFAULT_QUEUE_MAX_SIZE; let mut state = VirtioDeviceState { device_type: VirtioDeviceType::Net, avail_features: 0, acked_features: 0, queues: vec![], activated: false, }; let mem = default_mem(); // Valid checks. state .build_queues_checked(&mem, VirtioDeviceType::Net, 0, max_size) .unwrap(); // Invalid dev-type. state .build_queues_checked(&mem, VirtioDeviceType::Block, 0, max_size) .unwrap_err(); // Invalid num-queues. state .build_queues_checked(&mem, VirtioDeviceType::Net, 1, max_size) .unwrap_err(); // Unavailable features acked. state.acked_features = 1; state .build_queues_checked(&mem, VirtioDeviceType::Net, 0, max_size) .unwrap_err(); // Validate queue sanity checks. let mut state = VirtioDeviceState { device_type: VirtioDeviceType::Net, avail_features: 0, acked_features: 0, queues: vec![], activated: false, }; let good_q = QueueState::default(); state.queues = vec![good_q]; // Valid. state .build_queues_checked(&mem, VirtioDeviceType::Net, state.queues.len(), max_size) .unwrap(); // Invalid max queue size. let bad_q = QueueState { max_size: max_size + 1, ..Default::default() }; state.queues = vec![bad_q]; state .build_queues_checked(&mem, VirtioDeviceType::Net, state.queues.len(), max_size) .unwrap_err(); // Invalid: size > max. let bad_q = QueueState { size: max_size + 1, ..Default::default() }; state.queues = vec![bad_q]; state.activated = true; state .build_queues_checked(&mem, VirtioDeviceType::Net, state.queues.len(), max_size) .unwrap_err(); // activated && !q.is_valid() let bad_q = QueueState::default(); state.queues = vec![bad_q]; state.activated = true; state .build_queues_checked(&mem, VirtioDeviceType::Net, state.queues.len(), max_size) .unwrap_err(); } #[test] fn test_queue_persistence() { let mem = default_mem(); let mut queue = Queue::new(128); queue.ready = true; queue.size = queue.max_size; queue.initialize(&mem).unwrap(); let queue_state = queue.save(); let serialized_data = bitcode::serialize(&queue_state).unwrap(); let ca = QueueConstructorArgs { mem, is_activated: true, }; let restored_state = bitcode::deserialize(&serialized_data).unwrap(); let restored_queue = Queue::restore(ca, &restored_state).unwrap(); assert_eq!(restored_queue, queue); } #[test] fn test_virtio_device_state_serde() { let dummy = DummyDevice::new(); let state = VirtioDeviceState::from_device(&dummy); let serialized_data = bitcode::serialize(&state).unwrap(); let restored_state: VirtioDeviceState = bitcode::deserialize(&serialized_data).unwrap(); assert_eq!(restored_state, state); } impl PartialEq for MmioTransport { fn eq(&self, other: &MmioTransport) -> bool { let self_dev_type = self.device().lock().unwrap().device_type(); self.acked_features_select == other.acked_features_select && self.features_select == other.features_select && self.queue_select == other.queue_select && self.device_status == other.device_status && self.config_generation == other.config_generation && self.interrupt.irq_status.load(Ordering::SeqCst) == other.interrupt.irq_status.load(Ordering::SeqCst) && // Only checking equality of device type, actual device (de)ser is tested by that // device's tests. self_dev_type == other.device().lock().unwrap().device_type() } } fn generic_mmiotransport_persistence_test( mmio_transport: MmioTransport, interrupt: Arc, mem: GuestMemoryMmap, device: Arc>, ) { let transport_state = mmio_transport.save(); let serialized_data = bitcode::serialize(&transport_state).unwrap(); let restore_args = MmioTransportConstructorArgs { mem, interrupt, device, is_vhost_user: false, }; let restored_state = bitcode::deserialize(&serialized_data).unwrap(); let restored_mmio_transport = MmioTransport::restore(restore_args, &restored_state).unwrap(); assert_eq!(restored_mmio_transport, mmio_transport); } fn create_default_block() -> ( MmioTransport, Arc, GuestMemoryMmap, Arc>, ) { let mem = default_mem(); let interrupt = Arc::new(IrqTrigger::new()); // Create backing file. let f = TempFile::new().unwrap(); f.as_file().set_len(0x1000).unwrap(); let block = default_block_with_path( f.as_path().to_str().unwrap().to_string(), FileEngineType::default(), ); let block = Arc::new(Mutex::new(block)); let mmio_transport = MmioTransport::new(mem.clone(), interrupt.clone(), block.clone(), false); (mmio_transport, interrupt, mem, block) } fn create_default_net() -> ( MmioTransport, Arc, GuestMemoryMmap, Arc>, ) { let mem = default_mem(); let interrupt = Arc::new(IrqTrigger::new()); let net = Arc::new(Mutex::new(default_net())); let mmio_transport = MmioTransport::new(mem.clone(), interrupt.clone(), net.clone(), false); (mmio_transport, interrupt, mem, net) } fn default_vsock() -> ( MmioTransport, Arc, GuestMemoryMmap, Arc>>, ) { let mem = default_mem(); let interrupt = Arc::new(IrqTrigger::new()); let guest_cid = 52; let mut temp_uds_path = TempFile::new().unwrap(); // Remove the file so the path can be used by the socket. temp_uds_path.remove().unwrap(); let uds_path = String::from(temp_uds_path.as_path().to_str().unwrap()); let backend = VsockUnixBackend::new(guest_cid, uds_path).unwrap(); let vsock = Vsock::new(guest_cid, backend).unwrap(); let vsock = Arc::new(Mutex::new(vsock)); let mmio_transport = MmioTransport::new(mem.clone(), interrupt.clone(), vsock.clone(), false); (mmio_transport, interrupt, mem, vsock) } #[test] fn test_block_over_mmiotransport_persistence() { let (mmio_transport, interrupt, mem, block) = create_default_block(); generic_mmiotransport_persistence_test(mmio_transport, interrupt, mem, block); } #[test] fn test_net_over_mmiotransport_persistence() { let (mmio_transport, interrupt, mem, net) = create_default_net(); generic_mmiotransport_persistence_test(mmio_transport, interrupt, mem, net); } #[test] fn test_vsock_over_mmiotransport_persistence() { let (mmio_transport, interrupt, mem, vsock) = default_vsock(); generic_mmiotransport_persistence_test(mmio_transport, interrupt, mem, vsock); } } ================================================ FILE: src/vmm/src/devices/virtio/pmem/device.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fs::{File, OpenOptions}; use std::ops::{Deref, DerefMut}; use std::os::fd::AsRawFd; use std::sync::{Arc, Mutex}; use kvm_bindings::{KVM_MEM_READONLY, kvm_userspace_memory_region}; use kvm_ioctls::VmFd; use serde::{Deserialize, Serialize}; use vm_allocator::AllocPolicy; use vm_memory::mmap::{MmapRegionBuilder, MmapRegionError}; use vm_memory::{GuestAddress, GuestMemoryError}; use vmm_sys_util::eventfd::EventFd; use crate::devices::virtio::ActivateError; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1; use crate::devices::virtio::pmem::PMEM_QUEUE_SIZE; use crate::devices::virtio::pmem::metrics::{PmemMetrics, PmemMetricsPerDevice}; use crate::devices::virtio::queue::{DescriptorChain, InvalidAvailIdx, Queue, QueueError}; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::logger::{IncMetric, error, info}; use crate::utils::{align_up, u64_to_usize}; use crate::vmm_config::pmem::PmemConfig; use crate::vstate::memory::{ByteValued, Bytes, GuestMemoryMmap, GuestMmapRegion}; use crate::vstate::vm::VmError; use crate::{Vm, impl_device_type}; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum PmemError { /// Cannot set the memory regions: {0} SetUserMemoryRegion(VmError), /// Unablet to allocate a KVM slot for the device NoKvmSlotAvailable, /// Error accessing backing file: {0} BackingFile(std::io::Error), /// Error backing file size is 0 BackingFileZeroSize, /// Error with EventFd: {0} EventFd(std::io::Error), /// Unexpected read-only descriptor ReadOnlyDescriptor, /// Unexpected write-only descriptor WriteOnlyDescriptor, /// UnknownRequestType: {0} UnknownRequestType(u32), /// Descriptor chain too short DescriptorChainTooShort, /// Guest memory error: {0} GuestMemory(#[from] GuestMemoryError), /// Error handling the VirtIO queue: {0} Queue(#[from] QueueError), /// Error during obtaining the descriptor from the queue: {0} QueuePop(#[from] InvalidAvailIdx), } const VIRTIO_PMEM_REQ_TYPE_FLUSH: u32 = 0; const SUCCESS: i32 = 0; const FAILURE: i32 = -1; #[derive(Debug, Default, Copy, Clone, Serialize, Deserialize)] #[repr(C)] pub struct ConfigSpace { // Physical address of the first byte of the persistent memory region. pub start: u64, // Length of the address range pub size: u64, } // SAFETY: `ConfigSpace` contains only PODs in `repr(c)`, without padding. unsafe impl ByteValued for ConfigSpace {} #[derive(Debug)] pub struct Pmem { // VirtIO fields pub avail_features: u64, pub acked_features: u64, pub activate_event: EventFd, // Transport fields pub device_state: DeviceState, pub queues: Vec, pub queue_events: Vec, // Pmem specific fields pub config_space: ConfigSpace, pub file: File, pub file_len: u64, pub mmap_ptr: u64, pub metrics: Arc, pub config: PmemConfig, } impl Drop for Pmem { fn drop(&mut self) { let mmap_len = align_up(self.file_len, Self::ALIGNMENT); // SAFETY: `mmap_ptr` is a valid pointer since Pmem can only be created with `new*` methods. // Mapping size calculation is same for original mmap call. unsafe { _ = libc::munmap(self.mmap_ptr as *mut libc::c_void, u64_to_usize(mmap_len)); } } } impl Pmem { // Pmem devices need to have address and size to be // a multiple of 2MB pub const ALIGNMENT: u64 = 2 * 1024 * 1024; /// Create a new Pmem device with a backing file at `disk_image_path` path. pub fn new(config: PmemConfig) -> Result { Self::new_with_queues(config, vec![Queue::new(PMEM_QUEUE_SIZE)]) } /// Create a new Pmem device with a backing file at `disk_image_path` path using a pre-created /// set of queues. pub fn new_with_queues(config: PmemConfig, queues: Vec) -> Result { let (file, file_len, mmap_ptr, mmap_len) = Self::mmap_backing_file(&config.path_on_host, config.read_only)?; Ok(Self { avail_features: 1u64 << VIRTIO_F_VERSION_1, acked_features: 0u64, activate_event: EventFd::new(libc::EFD_NONBLOCK).map_err(PmemError::EventFd)?, device_state: DeviceState::Inactive, queues, queue_events: vec![EventFd::new(libc::EFD_NONBLOCK).map_err(PmemError::EventFd)?], config_space: ConfigSpace { start: 0, size: mmap_len, }, file, file_len, mmap_ptr, metrics: PmemMetricsPerDevice::alloc(config.id.clone()), config, }) } fn mmap_backing_file(path: &str, read_only: bool) -> Result<(File, u64, u64, u64), PmemError> { let file = OpenOptions::new() .read(true) .write(!read_only) .open(path) .map_err(PmemError::BackingFile)?; let file_len = file.metadata().unwrap().len(); if (file_len == 0) { return Err(PmemError::BackingFileZeroSize); } let mut prot = libc::PROT_READ; if !read_only { prot |= libc::PROT_WRITE; } let mmap_len = align_up(file_len, Self::ALIGNMENT); let mmap_ptr = if (mmap_len == file_len) { // SAFETY: We are calling the system call with valid arguments and checking the returned // value unsafe { let r = libc::mmap( std::ptr::null_mut(), u64_to_usize(file_len), prot, libc::MAP_SHARED | libc::MAP_NORESERVE, file.as_raw_fd(), 0, ); if r == libc::MAP_FAILED { return Err(PmemError::BackingFile(std::io::Error::last_os_error())); } r } } else { // SAFETY: We are calling system calls with valid arguments and checking returned // values // // The double mapping is done to ensure the underlying memory has the size of // `mmap_len` (wich is 2MB aligned as per `virtio-pmem` specification) // First mmap creates a mapping of `mmap_len` while second mmaps the actual // file on top. The remaining gap between the end of the mmaped file and // the actual end of the memory region is backed by PRIVATE | ANONYMOUS memory. unsafe { let mmap_ptr = libc::mmap( std::ptr::null_mut(), u64_to_usize(mmap_len), prot, libc::MAP_PRIVATE | libc::MAP_NORESERVE | libc::MAP_ANONYMOUS, -1, 0, ); if mmap_ptr == libc::MAP_FAILED { return Err(PmemError::BackingFile(std::io::Error::last_os_error())); } let r = libc::mmap( mmap_ptr, u64_to_usize(file_len), prot, libc::MAP_SHARED | libc::MAP_NORESERVE | libc::MAP_FIXED, file.as_raw_fd(), 0, ); if r == libc::MAP_FAILED { return Err(PmemError::BackingFile(std::io::Error::last_os_error())); } mmap_ptr } }; Ok((file, file_len, mmap_ptr as u64, mmap_len)) } /// Allocate memory in past_mmio64 memory region pub fn alloc_region(&mut self, vm: &Vm) { let mut resource_allocator_lock = vm.resource_allocator(); let resource_allocator = resource_allocator_lock.deref_mut(); let addr = resource_allocator .past_mmio64_memory .allocate( self.config_space.size, Pmem::ALIGNMENT, AllocPolicy::FirstMatch, ) .unwrap(); self.config_space.start = addr.start(); } /// Set user memory region in KVM pub fn set_mem_region(&mut self, vm: &Vm) -> Result<(), PmemError> { let next_slot = vm.next_kvm_slot(1).ok_or(PmemError::NoKvmSlotAvailable)?; let memory_region = kvm_userspace_memory_region { slot: next_slot, guest_phys_addr: self.config_space.start, memory_size: self.config_space.size, userspace_addr: self.mmap_ptr, flags: if self.config.read_only { KVM_MEM_READONLY } else { 0 }, }; vm.set_user_memory_region(memory_region) .map_err(PmemError::SetUserMemoryRegion) } pub fn handle_queue(&mut self) -> Result<(), PmemError> { // This is safe since we checked in the event handler that the device is activated. let active_state = self.device_state.active_state().unwrap(); while let Some(head) = self.queues[0].pop()? { let add_result = match self.process_chain(head) { Ok(()) => self.queues[0].add_used(head.index, 4), Err(err) => { error!("pmem: {err}"); self.metrics.event_fails.inc(); self.queues[0].add_used(head.index, 0) } }; if let Err(err) = add_result { error!("pmem: {err}"); self.metrics.event_fails.inc(); break; } } self.queues[0].advance_used_ring_idx(); if self.queues[0].prepare_kick() { active_state .interrupt .trigger(VirtioInterruptType::Queue(0)) .unwrap_or_else(|err| { error!("pmem: {err}"); self.metrics.event_fails.inc(); }); } Ok(()) } fn process_chain(&self, head: DescriptorChain) -> Result<(), PmemError> { // This is safe since we checked in the event handler that the device is activated. let active_state = self.device_state.active_state().unwrap(); if head.is_write_only() { return Err(PmemError::WriteOnlyDescriptor); } let request: u32 = active_state.mem.read_obj(head.addr)?; if request != VIRTIO_PMEM_REQ_TYPE_FLUSH { return Err(PmemError::UnknownRequestType(request)); } let Some(status_descriptor) = head.next_descriptor() else { return Err(PmemError::DescriptorChainTooShort); }; if !status_descriptor.is_write_only() { return Err(PmemError::ReadOnlyDescriptor); } let mut result = SUCCESS; // SAFETY: We are calling the system call with valid arguments and checking the returned // value unsafe { let ret = libc::msync( self.mmap_ptr as *mut libc::c_void, u64_to_usize(self.file_len), libc::MS_SYNC, ); if ret < 0 { error!("pmem: Unable to msync the file. Error: {}", ret); result = FAILURE; } } active_state.mem.write_obj(result, status_descriptor.addr)?; Ok(()) } pub fn process_queue(&mut self) { self.metrics.queue_event_count.inc(); if let Err(err) = self.queue_events[0].read() { error!("pmem: Failed to get queue event: {err:?}"); self.metrics.event_fails.inc(); return; } self.handle_queue().unwrap_or_else(|err| { error!("pmem: {err:?}"); self.metrics.event_fails.inc(); }); } } impl VirtioDevice for Pmem { impl_device_type!(VirtioDeviceType::Pmem); fn id(&self) -> &str { &self.config.id } fn avail_features(&self) -> u64 { self.avail_features } fn acked_features(&self) -> u64 { self.acked_features } fn set_acked_features(&mut self, acked_features: u64) { self.acked_features = acked_features; } fn queues(&self) -> &[Queue] { &self.queues } fn queues_mut(&mut self) -> &mut [Queue] { &mut self.queues } fn queue_events(&self) -> &[EventFd] { &self.queue_events } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { self.device_state .active_state() .expect("Device not activated") .interrupt .deref() } fn read_config(&self, offset: u64, data: &mut [u8]) { if let Some(config_space_bytes) = self.config_space.as_slice().get(u64_to_usize(offset)..) { let len = config_space_bytes.len().min(data.len()); data[..len].copy_from_slice(&config_space_bytes[..len]); } else { error!("Failed to read config space"); self.metrics.cfg_fails.inc(); } } fn write_config(&mut self, _offset: u64, _data: &[u8]) {} fn activate( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError> { for q in self.queues.iter_mut() { q.initialize(&mem) .map_err(ActivateError::QueueMemoryError)?; } if self.activate_event.write(1).is_err() { self.metrics.activate_fails.inc(); return Err(ActivateError::EventFd); } self.device_state = DeviceState::Activated(ActiveState { mem, interrupt }); Ok(()) } fn is_activated(&self) -> bool { self.device_state.is_activated() } fn kick(&mut self) { if self.is_activated() { info!("kick pmem {}.", self.config.id); self.handle_queue(); } } } #[cfg(test)] mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; use crate::devices::virtio::queue::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; use crate::devices::virtio::test_utils::{VirtQueue, default_interrupt, default_mem}; #[test] fn test_from_config() { let config = PmemConfig { id: "1".into(), path_on_host: "not_a_path".into(), root_device: true, read_only: false, }; assert!(matches!( Pmem::new(config).unwrap_err(), PmemError::BackingFile(_), )); let dummy_file = TempFile::new().unwrap(); let dummy_path = dummy_file.as_path().to_str().unwrap().to_string(); let config = PmemConfig { id: "1".into(), path_on_host: dummy_path.clone(), root_device: true, read_only: false, }; assert!(matches!( Pmem::new(config).unwrap_err(), PmemError::BackingFileZeroSize, )); dummy_file.as_file().set_len(0x20_0000); let config = PmemConfig { id: "1".into(), path_on_host: dummy_path, root_device: true, read_only: false, }; Pmem::new(config).unwrap(); } #[test] fn test_process_chain() { let dummy_file = TempFile::new().unwrap(); dummy_file.as_file().set_len(0x20_0000); let dummy_path = dummy_file.as_path().to_str().unwrap().to_string(); let config = PmemConfig { id: "1".into(), path_on_host: dummy_path, root_device: true, read_only: false, }; let mut pmem = Pmem::new(config).unwrap(); let mem = default_mem(); let interrupt = default_interrupt(); let vq = VirtQueue::new(GuestAddress(0), &mem, 16); pmem.queues[0] = vq.create_queue(); pmem.activate(mem.clone(), interrupt).unwrap(); // Valid request { vq.avail.ring[0].set(0); vq.dtable[0].set(0x1000, 4, VIRTQ_DESC_F_NEXT, 1); vq.avail.ring[1].set(1); vq.dtable[1].set(0x2000, 4, VIRTQ_DESC_F_WRITE, 0); mem.write_obj::(0, GuestAddress(0x1000)).unwrap(); mem.write_obj::(0x69, GuestAddress(0x2000)).unwrap(); vq.used.idx.set(0); vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); pmem.process_chain(head).unwrap(); assert_eq!(mem.read_obj::(GuestAddress(0x2000)).unwrap(), 0); } // Invalid request type { vq.avail.ring[0].set(0); vq.dtable[0].set(0x1000, 4, VIRTQ_DESC_F_NEXT, 1); mem.write_obj::(0x69, GuestAddress(0x1000)).unwrap(); pmem.queues[0] = vq.create_queue(); vq.used.idx.set(0); vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); assert!(matches!( pmem.process_chain(head).unwrap_err(), PmemError::UnknownRequestType(0x69), )); } // Short chain request { vq.avail.ring[0].set(0); vq.dtable[0].set(0x1000, 4, 0, 1); mem.write_obj::(0, GuestAddress(0x1000)).unwrap(); pmem.queues[0] = vq.create_queue(); vq.used.idx.set(0); vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); assert!(matches!( pmem.process_chain(head).unwrap_err(), PmemError::DescriptorChainTooShort, )); } // Write only first descriptor { vq.avail.ring[0].set(0); vq.dtable[0].set(0x1000, 4, VIRTQ_DESC_F_WRITE | VIRTQ_DESC_F_NEXT, 1); vq.avail.ring[1].set(1); vq.dtable[1].set(0x2000, 4, VIRTQ_DESC_F_WRITE, 0); mem.write_obj::(0, GuestAddress(0x1000)).unwrap(); pmem.queues[0] = vq.create_queue(); vq.used.idx.set(0); vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); assert!(matches!( pmem.process_chain(head).unwrap_err(), PmemError::WriteOnlyDescriptor, )); } // Read only second descriptor { vq.avail.ring[0].set(0); vq.dtable[0].set(0x1000, 4, VIRTQ_DESC_F_NEXT, 1); vq.avail.ring[1].set(1); vq.dtable[1].set(0x2000, 4, 0, 0); mem.write_obj::(0, GuestAddress(0x1000)).unwrap(); pmem.queues[0] = vq.create_queue(); vq.used.idx.set(0); vq.avail.idx.set(1); let head = pmem.queues[0].pop().unwrap().unwrap(); assert!(matches!( pmem.process_chain(head).unwrap_err(), PmemError::ReadOnlyDescriptor, )); } } } ================================================ FILE: src/vmm/src/devices/virtio/pmem/event_handler.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use event_manager::{EventOps, EventSet, Events, MutEventSubscriber}; use log::{error, warn}; use super::device::Pmem; use crate::devices::virtio::device::VirtioDevice; impl Pmem { const PROCESS_ACTIVATE: u32 = 0; const PROCESS_PMEM_QUEUE: u32 = 1; fn register_runtime_events(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.queue_events[0], Self::PROCESS_PMEM_QUEUE, EventSet::IN, )) { error!("pmem: Failed to register queue event: {err}"); } } fn register_activate_event(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.activate_event, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("pmem: Failed to register activate event: {err}"); } } fn process_activate_event(&self, ops: &mut EventOps) { if let Err(err) = self.activate_event.read() { error!("pmem: Failed to consume activate event: {err}"); } // Register runtime events self.register_runtime_events(ops); // Remove activate event if let Err(err) = ops.remove(Events::with_data( &self.activate_event, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("pmem: Failed to unregister activate event: {err}"); } } } impl MutEventSubscriber for Pmem { fn init(&mut self, ops: &mut EventOps) { if self.is_activated() { self.register_runtime_events(ops) } else { self.register_activate_event(ops) } } fn process(&mut self, events: Events, ops: &mut EventOps) { let event_set = events.event_set(); let source = events.data(); if !event_set.contains(EventSet::IN) { warn!("pmem: Received unknown event: {event_set:#?} from source {source}"); return; } if !self.is_activated() { warn!("pmem: The device is not activated yet. Spurious event received from {source}"); return; } match source { Self::PROCESS_ACTIVATE => self.process_activate_event(ops), Self::PROCESS_PMEM_QUEUE => self.process_queue(), _ => { warn!("pmem: Unknown event received: {source}"); } } } } ================================================ FILE: src/vmm/src/devices/virtio/pmem/metrics.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the metrics system for pmem devices. //! //! # Metrics format //! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write(). //! //! ## JSON example with metrics: //! ```json //! { //! "pmem_drv0": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "no_avail_buffer": "SharedIncMetric", //! "event_fails": "SharedIncMetric", //! "execute_fails": "SharedIncMetric", //! ... //! } //! "pmem_drv1": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "no_avail_buffer": "SharedIncMetric", //! "event_fails": "SharedIncMetric", //! "execute_fails": "SharedIncMetric", //! ... //! } //! ... //! "pmem_drive_id": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "no_avail_buffer": "SharedIncMetric", //! "event_fails": "SharedIncMetric", //! "execute_fails": "SharedIncMetric", //! ... //! } //! "pmem": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "no_avail_buffer": "SharedIncMetric", //! "event_fails": "SharedIncMetric", //! "execute_fails": "SharedIncMetric", //! ... //! } //! } //! ``` //! Each `pmem` field in the example above is a serializable `PmemDeviceMetrics` structure //! collecting metrics such as `activate_fails`, `cfg_fails`, etc. for the pmem device. //! `pmem_drv0` represent metrics for the endpoint "/pmem/drv0", //! `pmem_drv1` represent metrics for the endpoint "/pmem/drv1", and //! `pmem_drive_id` represent metrics for the endpoint "/pmem/{drive_id}" //! pmem device respectively and `pmem` is the aggregate of all the per device metrics. //! //! # Limitations //! pmem device currently do not have `vmm::logger::metrics::StoreMetrics` so aggregate //! doesn't consider them. //! //! # Design //! The main design goals of this system are: //! * To improve pmem device metrics by logging them at per device granularity. //! * Continue to provide aggregate pmem metrics to maintain backward compatibility. //! * Move PmemDeviceMetrics out of from logger and decouple it. //! * Rely on `serde` to provide the actual serialization for writing the metrics. //! * Since all metrics start at 0, we implement the `Default` trait via derive for all of them, to //! avoid having to initialize everything by hand. //! //! * Devices could be created in any order i.e. the first device created could either be drv0 or //! drv1 so if we use a vector for PmemDeviceMetrics and call 1st device as pmem0, then pmem0 //! could sometimes point to drv0 and sometimes to drv1 which doesn't help with analysing the //! metrics. So, use Map instead of Vec to help understand which drive the metrics actually //! belongs to. //! //! The system implements 1 type of metrics: //! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter //! (i.e the number of times an API request failed). These metrics are reset upon flush. //! //! We add PmemDeviceMetrics entries from pmem::metrics::METRICS into Pmem device instead of //! Pmem device having individual separate PmemDeviceMetrics entries because Pmem device is not //! accessible from signal handlers to flush metrics and pmem::metrics::METRICS is. use std::collections::BTreeMap; use std::sync::{Arc, RwLock}; use serde::ser::SerializeMap; use serde::{Serialize, Serializer}; use crate::logger::{IncMetric, LatencyAggregateMetrics, SharedIncMetric}; /// map of pmem drive id and metrics /// this should be protected by a lock before accessing. #[derive(Debug)] pub struct PmemMetricsPerDevice { /// used to access per pmem device metrics pub metrics: BTreeMap>, } impl PmemMetricsPerDevice { /// Allocate `PmemDeviceMetrics` for pmem device having /// id `drive_id`. Also, allocate only if it doesn't /// exist to avoid overwriting previously allocated data. /// lock is always initialized so it is safe the unwrap /// the lock without a check. pub fn alloc(drive_id: String) -> Arc { Arc::clone( METRICS .write() .unwrap() .metrics .entry(drive_id) .or_insert_with(|| Arc::new(PmemMetrics::default())), ) } } /// Pool of pmem-related metrics per device behind a lock to /// keep things thread safe. Since the lock is initialized here /// it is safe to unwrap it without any check. static METRICS: RwLock = RwLock::new(PmemMetricsPerDevice { metrics: BTreeMap::new(), }); /// This function facilitates aggregation and serialization of /// per pmem device metrics. pub fn flush_metrics(serializer: S) -> Result { let pmem_metrics = METRICS.read().unwrap(); let metrics_len = pmem_metrics.metrics.len(); // +1 to accommodate aggregate pmem metrics let mut seq = serializer.serialize_map(Some(1 + metrics_len))?; let mut pmem_aggregated: PmemMetrics = PmemMetrics::default(); for (name, metrics) in pmem_metrics.metrics.iter() { let devn = format!("pmem_{}", name); // serialization will flush the metrics so aggregate before it. let m: &PmemMetrics = metrics; pmem_aggregated.aggregate(m); seq.serialize_entry(&devn, m)?; } seq.serialize_entry("pmem", &pmem_aggregated)?; seq.end() } /// Pmem Device associated metrics. #[derive(Debug, Default, Serialize)] pub struct PmemMetrics { /// Number of times when activate failed on a pmem device. pub activate_fails: SharedIncMetric, /// Number of times when interacting with the space config of a pmem device failed. pub cfg_fails: SharedIncMetric, /// Number of times when handling events on a pmem device failed. pub event_fails: SharedIncMetric, /// Number of events triggered on the queue of this pmem device. pub queue_event_count: SharedIncMetric, } impl PmemMetrics { /// Const default construction. pub fn new() -> Self { Self { ..Default::default() } } /// pmem metrics are SharedIncMetric where the diff of current vs /// old is serialized i.e. serialize_u64(current-old). /// So to have the aggregate serialized in same way we need to /// fetch the diff of current vs old metrics and add it to the /// aggregate. pub fn aggregate(&mut self, other: &Self) { self.activate_fails.add(other.activate_fails.fetch_diff()); self.cfg_fails.add(other.cfg_fails.fetch_diff()); self.event_fails.add(other.event_fails.fetch_diff()); self.queue_event_count .add(other.queue_event_count.fetch_diff()); } } #[cfg(test)] pub mod tests { use super::*; #[test] fn test_max_pmem_dev_metrics() { // Note: this test has nothing to do with // pmem structure or IRQs, this is just to allocate // metrics for max number of devices that system can have. // We have 5-23 IRQ for pmem devices on x86_64 so, there // are 19 pmem devices at max. And, even though we have more // devices on aarch64 but we stick to 19 to keep test common. const MAX_PMEM_DEVICES: usize = 19; // This is to make sure that RwLock for pmem::metrics::METRICS is good. drop(METRICS.read().unwrap()); drop(METRICS.write().unwrap()); // pmem::metrics::METRICS is in short RwLock on Vec of PmemDeviceMetrics. // Normally, pointer to unique entries of pmem::metrics::METRICS are stored // in Pmem device so that Pmem device can do self.metrics.* to // update a metric. We try to do something similar here without // using Pmem device by allocating max number of // PmemDeviceMetrics in pmem::metrics::METRICS and store pointer to // each entry in the local `metrics` vec. // We then update 1 IncMetric and 2 SharedMetric for each metrics // and validate if the metrics for per device was updated as // expected. let mut metrics: Vec> = Vec::new(); for i in 0..MAX_PMEM_DEVICES { let pmem_name: String = format!("pmem{}", i); metrics.push(PmemMetricsPerDevice::alloc(pmem_name.clone())); // update IncMetric metrics[i].activate_fails.inc(); if i == 0 { // Unit tests run in parallel and we have // `test_single_pmem_dev_metrics` that also increases // the IncMetric count of drv0 by 1 (intentional to check // thread safety) so we check if the count is >=1. assert!(metrics[i].activate_fails.count() >= 1); } else { assert!(metrics[i].activate_fails.count() == 1); } } } #[test] fn test_single_pmem_dev_metrics() { let test_metrics = PmemMetricsPerDevice::alloc(String::from("pmem0")); // Test to update IncMetrics test_metrics.activate_fails.inc(); assert!( test_metrics.activate_fails.count() > 0, "{}", test_metrics.activate_fails.count() ); // We expect only 2 tests (this and test_max_pmem_dev_metrics) // to update activate_fails count for pmem0. assert!( test_metrics.activate_fails.count() <= 2, "{}", test_metrics.activate_fails.count() ); } } ================================================ FILE: src/vmm/src/devices/virtio/pmem/mod.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod device; pub mod event_handler; pub mod metrics; pub mod persist; pub const PMEM_NUM_QUEUES: usize = 1; pub const PMEM_QUEUE_SIZE: u16 = 256; ================================================ FILE: src/vmm/src/devices/virtio/pmem/persist.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use serde::{Deserialize, Serialize}; use vm_memory::GuestAddress; use super::device::{ConfigSpace, Pmem, PmemError}; use crate::Vm; use crate::devices::virtio::device::{DeviceState, VirtioDeviceType}; use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState}; use crate::devices::virtio::pmem::{PMEM_NUM_QUEUES, PMEM_QUEUE_SIZE}; use crate::snapshot::Persist; use crate::vmm_config::pmem::PmemConfig; use crate::vstate::memory::{GuestMemoryMmap, GuestRegionMmap}; use crate::vstate::vm::VmError; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PmemState { pub virtio_state: VirtioDeviceState, pub config_space: ConfigSpace, pub config: PmemConfig, } #[derive(Debug)] pub struct PmemConstructorArgs<'a> { pub mem: &'a GuestMemoryMmap, pub vm: &'a Vm, } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum PmemPersistError { /// Error resetting VirtIO state: {0} VirtioState(#[from] VirtioStateError), /// Error creating Pmem devie: {0} Pmem(#[from] PmemError), /// Error registering memory region: {0} Vm(#[from] VmError), } impl<'a> Persist<'a> for Pmem { type State = PmemState; type ConstructorArgs = PmemConstructorArgs<'a>; type Error = PmemPersistError; fn save(&self) -> Self::State { PmemState { virtio_state: VirtioDeviceState::from_device(self), config_space: self.config_space, config: self.config.clone(), } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let queues = state.virtio_state.build_queues_checked( constructor_args.mem, VirtioDeviceType::Pmem, PMEM_NUM_QUEUES, PMEM_QUEUE_SIZE, )?; let mut pmem = Pmem::new_with_queues(state.config.clone(), queues)?; pmem.config_space = state.config_space; pmem.avail_features = state.virtio_state.avail_features; pmem.acked_features = state.virtio_state.acked_features; pmem.set_mem_region(constructor_args.vm)?; Ok(pmem) } } #[cfg(test)] mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; use crate::arch::Kvm; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::test_utils::default_mem; #[test] fn test_persistence() { // We create the backing file here so that it exists for the whole lifetime of the test. let dummy_file = TempFile::new().unwrap(); dummy_file.as_file().set_len(0x20_0000); let dummy_path = dummy_file.as_path().to_str().unwrap().to_string(); let config = PmemConfig { id: "1".into(), path_on_host: dummy_path, root_device: true, read_only: false, }; let pmem = Pmem::new(config).unwrap(); let guest_mem = default_mem(); let kvm = Kvm::new(vec![]).unwrap(); let vm = Vm::new(&kvm).unwrap(); // Save the block device. let pmem_state = pmem.save(); let serialized_data = bitcode::serialize(&pmem_state).unwrap(); // Restore the block device. let restored_state = bitcode::deserialize(&serialized_data).unwrap(); let restored_pmem = Pmem::restore( PmemConstructorArgs { mem: &guest_mem, vm: &vm, }, &restored_state, ) .unwrap(); // Test that virtio specific fields are the same. assert_eq!(restored_pmem.device_type(), VirtioDeviceType::Pmem); assert_eq!(restored_pmem.avail_features(), pmem.avail_features()); assert_eq!(restored_pmem.acked_features(), pmem.acked_features()); assert_eq!(restored_pmem.queues(), pmem.queues()); assert!(!pmem.is_activated()); assert!(!restored_pmem.is_activated()); assert_eq!(restored_pmem.config, pmem.config); } } ================================================ FILE: src/vmm/src/devices/virtio/queue.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::num::Wrapping; use std::sync::atomic::{Ordering, fence}; use crate::logger::error; use crate::utils::u64_to_usize; use crate::vstate::memory::{Bitmap, ByteValued, GuestAddress, GuestMemory}; pub const VIRTQ_DESC_F_NEXT: u16 = 0x1; pub const VIRTQ_DESC_F_WRITE: u16 = 0x2; /// Max size of virtio queues offered by firecracker's virtio devices. pub(super) const FIRECRACKER_MAX_QUEUE_SIZE: u16 = 256; // GuestMemoryMmap::read_obj_from_addr() will be used to fetch the descriptor, // which has an explicit constraint that the entire descriptor doesn't // cross the page boundary. Otherwise the descriptor may be split into // two mmap regions which causes failure of GuestMemoryMmap::read_obj_from_addr(). // // The Virtio Spec 1.0 defines the alignment of VirtIO descriptor is 16 bytes, // which fulfills the explicit constraint of GuestMemoryMmap::read_obj_from_addr(). #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum QueueError { /// Descriptor index out of bounds: {0}. DescIndexOutOfBounds(u16), /// Failed to write value into the virtio queue used ring: {0} MemoryError(#[from] vm_memory::GuestMemoryError), /// Pointer is not aligned properly: {0:#x} not {1}-byte aligned. PointerNotAligned(usize, usize), /// Attempt to use virtio queue that is not marked ready NotReady, /// Virtio queue with invalid size: {0} InvalidSize(u16), } /// Error type indicating the guest configured a virtio queue such that the avail_idx field would /// indicate there are more descriptors to process than the queue actually has space for. /// /// Should this error bubble up to the event loop, we exit Firecracker, since this could be a /// potential malicious driver scenario. This way we also eliminate the risk of repeatedly /// logging and potentially clogging the microVM through the log system. #[derive(Debug, thiserror::Error, PartialEq, Eq)] #[error( "The number of available virtio descriptors {reported_len} is greater than queue size: \ {queue_size}!" )] pub struct InvalidAvailIdx { queue_size: u16, reported_len: u16, } /// A virtio descriptor constraints with C representative. /// Taken from Virtio spec: /// https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-430008 /// 2.6.5 The Virtqueue Descriptor Table #[repr(C)] #[derive(Debug, Default, Clone, Copy)] pub struct Descriptor { pub addr: u64, pub len: u32, pub flags: u16, pub next: u16, } // SAFETY: `Descriptor` is a POD and contains no padding. unsafe impl ByteValued for Descriptor {} /// A virtio used element in the used ring. /// Taken from Virtio spec: /// https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-430008 /// 2.6.8 The Virtqueue Used Ring #[repr(C)] #[derive(Debug, Default, Clone, Copy)] pub struct UsedElement { pub id: u32, pub len: u32, } // SAFETY: `UsedElement` is a POD and contains no padding. unsafe impl ByteValued for UsedElement {} /// A virtio descriptor chain. #[derive(Debug, Copy, Clone)] pub struct DescriptorChain { desc_table_ptr: *const Descriptor, queue_size: u16, ttl: u16, // used to prevent infinite chain cycles /// Index into the descriptor table pub index: u16, /// Guest physical address of device specific data pub addr: GuestAddress, /// Length of device specific data pub len: u32, /// Includes next, write, and indirect bits pub flags: u16, /// Index into the descriptor table of the next descriptor if flags has /// the next bit set pub next: u16, } impl DescriptorChain { /// Creates a new `DescriptorChain` from the given memory and descriptor table. /// /// Note that the desc_table and queue_size are assumed to be validated by the caller. fn checked_new(desc_table_ptr: *const Descriptor, queue_size: u16, index: u16) -> Option { if queue_size <= index { return None; } // SAFETY: // index is in 0..queue_size bounds let desc = unsafe { desc_table_ptr.add(usize::from(index)).read_volatile() }; let chain = DescriptorChain { desc_table_ptr, queue_size, ttl: queue_size, index, addr: GuestAddress(desc.addr), len: desc.len, flags: desc.flags, next: desc.next, }; if chain.is_valid() { Some(chain) } else { None } } fn is_valid(&self) -> bool { !self.has_next() || self.next < self.queue_size } /// Gets if this descriptor chain has another descriptor chain linked after it. pub fn has_next(&self) -> bool { self.flags & VIRTQ_DESC_F_NEXT != 0 && self.ttl > 1 } /// If the driver designated this as a write only descriptor. /// /// If this is false, this descriptor is read only. /// Write only means the emulated device can write and the driver can read. pub fn is_write_only(&self) -> bool { self.flags & VIRTQ_DESC_F_WRITE != 0 } /// Gets the next descriptor in this descriptor chain, if there is one. /// /// Note that this is distinct from the next descriptor chain returned by `AvailIter`, which is /// the head of the next _available_ descriptor chain. pub fn next_descriptor(&self) -> Option { if self.has_next() { DescriptorChain::checked_new(self.desc_table_ptr, self.queue_size, self.next).map( |mut c| { c.ttl = self.ttl - 1; c }, ) } else { None } } } #[derive(Debug)] pub struct DescriptorIterator(Option); impl IntoIterator for DescriptorChain { type Item = DescriptorChain; type IntoIter = DescriptorIterator; fn into_iter(self) -> Self::IntoIter { DescriptorIterator(Some(self)) } } impl Iterator for DescriptorIterator { type Item = DescriptorChain; fn next(&mut self) -> Option { self.0.take().inspect(|desc| { self.0 = desc.next_descriptor(); }) } } #[derive(Clone, Debug, PartialEq, Eq)] /// A virtio queue's parameters. pub struct Queue { /// The maximal size in elements offered by the device pub max_size: u16, /// The queue size in elements the driver selected pub size: u16, /// Indicates if the queue is finished with configuration pub ready: bool, /// Guest physical address of the descriptor table pub desc_table_address: GuestAddress, /// Guest physical address of the available ring pub avail_ring_address: GuestAddress, /// Guest physical address of the used ring pub used_ring_address: GuestAddress, /// Host virtual address pointer to the descriptor table /// in the guest memory . /// Getting access to the underling /// data structure should only occur after the /// struct is initialized with `new`. /// Representation of in memory struct layout. /// struct DescriptorTable = [Descriptor; ] pub desc_table_ptr: *const Descriptor, /// Host virtual address pointer to the available ring /// in the guest memory . /// Getting access to the underling /// data structure should only occur after the /// struct is initialized with `new`. /// /// Representation of in memory struct layout. /// struct AvailRing { /// flags: u16, /// idx: u16, /// ring: [u16; ], /// used_event: u16, /// } /// /// Because all types in the AvailRing are u16, /// we store pointer as *mut u16 for simplicity. pub avail_ring_ptr: *mut u16, /// Host virtual address pointer to the used ring /// in the guest memory . /// Getting access to the underling /// data structure should only occur after the /// struct is initialized with `new`. /// /// Representation of in memory struct layout. // struct UsedRing { // flags: u16, // idx: u16, // ring: [UsedElement; ], // avail_event: u16, // } /// Because types in the UsedRing are different (u16 and u32) /// store pointer as *mut u8. pub used_ring_ptr: *mut u8, pub next_avail: Wrapping, pub next_used: Wrapping, /// VIRTIO_F_RING_EVENT_IDX negotiated (notification suppression enabled) pub uses_notif_suppression: bool, /// The number of added used buffers since last guest kick pub num_added: Wrapping, } /// SAFETY: Queue is Send, because we use volatile memory accesses when /// working with pointers. These pointers are not copied or store anywhere /// else. We assume guest will not give different queues same guest memory /// addresses. unsafe impl Send for Queue {} #[allow(clippy::len_without_is_empty)] impl Queue { /// Constructs an empty virtio queue with the given `max_size`. pub fn new(max_size: u16) -> Queue { Queue { max_size, size: max_size, ready: false, desc_table_address: GuestAddress(0), avail_ring_address: GuestAddress(0), used_ring_address: GuestAddress(0), desc_table_ptr: std::ptr::null(), avail_ring_ptr: std::ptr::null_mut(), used_ring_ptr: std::ptr::null_mut(), next_avail: Wrapping(0), next_used: Wrapping(0), uses_notif_suppression: false, num_added: Wrapping(0), } } fn desc_table_size(&self) -> usize { std::mem::size_of::() * usize::from(self.size) } fn avail_ring_size(&self) -> usize { std::mem::size_of::() + std::mem::size_of::() + std::mem::size_of::() * usize::from(self.size) + std::mem::size_of::() } fn used_ring_size(&self) -> usize { std::mem::size_of::() + std::mem::size_of::() + std::mem::size_of::() * usize::from(self.size) + std::mem::size_of::() } fn get_aligned_slice_ptr( &self, mem: &M, addr: GuestAddress, len: usize, alignment: usize, ) -> Result<*mut T, QueueError> { // Guest memory base address is page aligned, so as long as alignment divides page size, // It suffices to check that the GPA is properly aligned (e.g. we don't need to recheck // the HVA). if addr.0 & (alignment as u64 - 1) != 0 { return Err(QueueError::PointerNotAligned( u64_to_usize(addr.0), alignment, )); } let slice = mem.get_slice(addr, len).map_err(QueueError::MemoryError)?; slice.bitmap().mark_dirty(0, len); Ok(slice.ptr_guard_mut().as_ptr().cast()) } /// Set up pointers to the queue objects in the guest memory /// and mark memory dirty for those objects pub fn initialize(&mut self, mem: &M) -> Result<(), QueueError> { if !self.ready { return Err(QueueError::NotReady); } if self.size > self.max_size || self.size == 0 || (self.size & (self.size - 1)) != 0 { return Err(QueueError::InvalidSize(self.size)); } // All the below pointers are verified to be aligned properly; otherwise some methods (e.g. // `read_volatile()`) will panic. Such an unalignment is possible when restored from a // broken/fuzzed snapshot. // // Specification of those pointers' alignments // https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.html#x1-350007 // > ================ ========== // > Virtqueue Part Alignment // > ================ ========== // > Descriptor Table 16 // > Available Ring 2 // > Used Ring 4 // > ================ ========== self.desc_table_ptr = self.get_aligned_slice_ptr(mem, self.desc_table_address, self.desc_table_size(), 16)?; self.avail_ring_ptr = self.get_aligned_slice_ptr(mem, self.avail_ring_address, self.avail_ring_size(), 2)?; self.used_ring_ptr = self.get_aligned_slice_ptr(mem, self.used_ring_address, self.used_ring_size(), 4)?; Ok(()) } /// Get AvailRing.idx #[inline(always)] pub fn avail_ring_idx_get(&self) -> u16 { // SAFETY: `idx` is 1 u16 away from the start unsafe { self.avail_ring_ptr.add(1).read_volatile() } } /// Get element from AvailRing.ring at index /// # Safety /// The `index` parameter should be in 0..queue_size bounds #[inline(always)] unsafe fn avail_ring_ring_get(&self, index: usize) -> u16 { // SAFETY: `ring` is 2 u16 away from the start unsafe { self.avail_ring_ptr.add(2).add(index).read_volatile() } } /// Get AvailRing.used_event #[inline(always)] pub fn avail_ring_used_event_get(&self) -> u16 { // SAFETY: `used_event` is 2 + self.len u16 away from the start unsafe { self.avail_ring_ptr .add(2_usize.unchecked_add(usize::from(self.size))) .read_volatile() } } /// Set UsedRing.idx #[inline(always)] pub fn used_ring_idx_set(&mut self, val: u16) { // SAFETY: `idx` is 1 u16 away from the start unsafe { self.used_ring_ptr .add(std::mem::size_of::()) .cast::() .write_volatile(val) } } /// Get element from UsedRing.ring at index /// # Safety /// The `index` parameter should be in 0..queue_size bounds #[inline(always)] unsafe fn used_ring_ring_set(&mut self, index: usize, val: UsedElement) { // SAFETY: `ring` is 2 u16 away from the start unsafe { self.used_ring_ptr .add(std::mem::size_of::().unchecked_mul(2)) .cast::() .add(index) .write_volatile(val) } } #[cfg(any(test, kani))] #[inline(always)] pub fn used_ring_avail_event_get(&mut self) -> u16 { // SAFETY: `avail_event` is 2 * u16 and self.len * UsedElement away from the start unsafe { self.used_ring_ptr .add( std::mem::size_of::().unchecked_mul(2) + std::mem::size_of::().unchecked_mul(usize::from(self.size)), ) .cast::() .read_volatile() } } /// Set UsedRing.avail_event #[inline(always)] pub fn used_ring_avail_event_set(&mut self, val: u16) { // SAFETY: `avail_event` is 2 * u16 and self.len * UsedElement away from the start unsafe { self.used_ring_ptr .add( std::mem::size_of::().unchecked_mul(2) + std::mem::size_of::().unchecked_mul(usize::from(self.size)), ) .cast::() .write_volatile(val) } } /// Returns the number of yet-to-be-popped descriptor chains in the avail ring. pub fn len(&self) -> u16 { (Wrapping(self.avail_ring_idx_get()) - self.next_avail).0 } /// Checks if the driver has made any descriptor chains available in the avail ring. pub fn is_empty(&self) -> bool { self.len() == 0 } /// Pop the first available descriptor chain from the avail ring. /// /// If this function returns an error at runtime, then the guest has requested Firecracker /// to process more virtio descriptors than there can possibly be given the queue's size. /// This can be a malicious guest driver scenario, and hence a DoS attempt. If encountered /// and runtime, correct handling is to panic! /// /// This function however is also called on paths that can (and should) just report /// the error to the user (e.g. loading a corrupt snapshot file), and hence cannot panic on its /// own. pub fn pop(&mut self) -> Result, InvalidAvailIdx> { let len = self.len(); // The number of descriptor chain heads to process should always // be smaller or equal to the queue size, as the driver should // never ask the VMM to process a available ring entry more than // once. Checking and reporting such incorrect driver behavior // can prevent potential hanging and Denial-of-Service from // happening on the VMM side. if self.size < len { return Err(InvalidAvailIdx { queue_size: self.size, reported_len: len, }); } if len == 0 { return Ok(None); } Ok(self.pop_unchecked()) } /// Try to pop the first available descriptor chain from the avail ring. /// If no descriptor is available, enable notifications. /// /// If this function returns an error at runtime, then the guest has requested Firecracker /// to process more virtio descriptors than there can possibly be given the queue's size. /// This can be a malicious guest driver scenario, and hence a DoS attempt. If encountered /// and runtime, correct handling is to panic! /// /// This function however is also called on paths that can (and should) just report /// the error to the user (e.g. loading a corrupt snapshot file), and hence cannot panic on its /// own. pub fn pop_or_enable_notification( &mut self, ) -> Result, InvalidAvailIdx> { if !self.uses_notif_suppression { return self.pop(); } if self.try_enable_notification()? { return Ok(None); } Ok(self.pop_unchecked()) } /// Pop the first available descriptor chain from the avail ring. /// /// # Important /// This is an internal method that ASSUMES THAT THERE ARE AVAILABLE DESCRIPTORS. Otherwise it /// will retrieve a descriptor that contains garbage data (obsolete/empty). fn pop_unchecked(&mut self) -> Option { // This fence ensures all subsequent reads see the updated driver writes. fence(Ordering::Acquire); // We'll need to find the first available descriptor, that we haven't yet popped. // In a naive notation, that would be: // `descriptor_table[avail_ring[next_avail]]`. // // We use `self.next_avail` to store the position, in `ring`, of the next available // descriptor index, with a twist: we always only increment `self.next_avail`, so the // actual position will be `self.next_avail % self.size`. let idx = self.next_avail.0 % self.size; // SAFETY: // index is bound by the queue size let desc_index = unsafe { self.avail_ring_ring_get(usize::from(idx)) }; DescriptorChain::checked_new(self.desc_table_ptr, self.size, desc_index).inspect(|_| { self.next_avail += Wrapping(1); }) } /// Undo the effects of the last `self.pop()` call. /// The caller can use this, if it was unable to consume the last popped descriptor chain. pub fn undo_pop(&mut self) { self.next_avail -= Wrapping(1); } /// Write used element into used_ring ring. /// - [`ring_index_offset`] is an offset added to the current [`self.next_used`] to obtain /// actual index into used_ring. pub fn write_used_element( &mut self, ring_index_offset: u16, desc_index: u16, len: u32, ) -> Result<(), QueueError> { if self.size <= desc_index { error!( "attempted to add out of bounds descriptor to used ring: {}", desc_index ); return Err(QueueError::DescIndexOutOfBounds(desc_index)); } let next_used = (self.next_used + Wrapping(ring_index_offset)).0 % self.size; let used_element = UsedElement { id: u32::from(desc_index), len, }; // SAFETY: // index is bound by the queue size unsafe { self.used_ring_ring_set(usize::from(next_used), used_element); } Ok(()) } /// Advance queue and used ring by `n` elements. pub fn advance_next_used(&mut self, n: u16) { self.num_added += Wrapping(n); self.next_used += Wrapping(n); } /// Set the used ring index to the current `next_used` value. /// Should be called once after number of `add_used` calls. pub fn advance_used_ring_idx(&mut self) { // This fence ensures all descriptor writes are visible before the index update is. fence(Ordering::Release); self.used_ring_idx_set(self.next_used.0); } /// Puts an available descriptor head into the used ring for use by the guest. pub fn add_used(&mut self, desc_index: u16, len: u32) -> Result<(), QueueError> { self.write_used_element(0, desc_index, len)?; self.advance_next_used(1); Ok(()) } /// Try to enable notification events from the guest driver. Returns true if notifications were /// successfully enabled. Otherwise it means that one or more descriptors can still be consumed /// from the available ring and we can't guarantee that there will be a notification. In this /// case the caller might want to consume the mentioned descriptors and call this method again. fn try_enable_notification(&mut self) -> Result { // If the device doesn't use notification suppression, we'll continue to get notifications // no matter what. if !self.uses_notif_suppression { return Ok(true); } let len = self.len(); if len != 0 { // The number of descriptor chain heads to process should always // be smaller or equal to the queue size. if len > self.size { return Err(InvalidAvailIdx { queue_size: self.size, reported_len: len, }); } return Ok(false); } // Set the next expected avail_idx as avail_event. self.used_ring_avail_event_set(self.next_avail.0); // Make sure all subsequent reads are performed after we set avail_event. fence(Ordering::SeqCst); // If the actual avail_idx is different than next_avail one or more descriptors can still // be consumed from the available ring. Ok(self.next_avail.0 == self.avail_ring_idx_get()) } /// Enable notification suppression. pub fn enable_notif_suppression(&mut self) { self.uses_notif_suppression = true; } /// Check if we need to kick the guest. /// /// Please note this method has side effects: once it returns `true`, it considers the /// driver will actually be notified, and won't return `true` again until the driver /// updates `used_event` and/or the notification conditions hold once more. /// /// This is similar to the `vring_need_event()` method implemented by the Linux kernel. pub fn prepare_kick(&mut self) -> bool { // If the device doesn't use notification suppression, always return true if !self.uses_notif_suppression { return true; } // We need to expose used array entries before checking the used_event. fence(Ordering::SeqCst); let new = self.next_used; let old = self.next_used - self.num_added; let used_event = Wrapping(self.avail_ring_used_event_get()); self.num_added = Wrapping(0); new - used_event - Wrapping(1) < new - old } /// Resets the Virtio Queue pub(crate) fn reset(&mut self) { self.ready = false; self.size = self.max_size; self.desc_table_address = GuestAddress(0); self.avail_ring_address = GuestAddress(0); self.used_ring_address = GuestAddress(0); self.next_avail = Wrapping(0); self.next_used = Wrapping(0); self.num_added = Wrapping(0); self.uses_notif_suppression = false; } } #[cfg(kani)] #[allow(dead_code)] mod verification { use std::mem::ManuallyDrop; use std::num::Wrapping; use vm_memory::{GuestMemoryRegion, MemoryRegionAddress}; use super::*; use crate::vstate::memory::{Bytes, FileOffset, GuestAddress, GuestMemory, MmapRegion}; /// A made-for-kani version of `vm_memory::GuestMemoryMmap`. Unlike the real /// `GuestMemoryMmap`, which manages a list of regions and then does a binary /// search to determine which region a specific read or write request goes to, /// this only uses a single region. Eliminating this binary search significantly /// speeds up all queue proofs, because it eliminates the only loop contained herein, /// meaning we can use `kani::unwind(0)` instead of `kani::unwind(2)`. Functionally, /// it works identically to `GuestMemoryMmap` with only a single contained region. pub struct ProofGuestMemory { the_region: vm_memory::GuestRegionMmap, } impl GuestMemory for ProofGuestMemory { type R = vm_memory::GuestRegionMmap; fn num_regions(&self) -> usize { 1 } fn find_region(&self, addr: GuestAddress) -> Option<&Self::R> { self.the_region .to_region_addr(addr) .map(|_| &self.the_region) } fn iter(&self) -> impl Iterator { std::iter::once(&self.the_region) } fn try_access( &self, count: usize, addr: GuestAddress, mut f: F, ) -> vm_memory::guest_memory::Result where F: FnMut( usize, usize, MemoryRegionAddress, &Self::R, ) -> vm_memory::guest_memory::Result, { // We only have a single region, meaning a lot of the complications of the default // try_access implementation for dealing with reads/writes across multiple // regions does not apply. let region_addr = self .the_region .to_region_addr(addr) .ok_or(vm_memory::guest_memory::Error::InvalidGuestAddress(addr))?; self.the_region .checked_offset(region_addr, count) .ok_or(vm_memory::guest_memory::Error::InvalidGuestAddress(addr))?; f(0, count, region_addr, &self.the_region) } } pub struct ProofContext(pub Queue, pub ProofGuestMemory); pub struct MmapRegionStub { addr: *mut u8, size: usize, bitmap: (), file_offset: Option, prot: i32, flags: i32, owned: bool, hugetlbfs: Option, } /// We start the first guest memory region at an offset so that harnesses using /// Queue::any() will be exposed to queue segments both before and after valid guest memory. const GUEST_MEMORY_BASE: u64 = 512; // We size our guest memory to fit a properly aligned queue, plus some wiggles bytes // to make sure we not only test queues where all segments are consecutively aligned (at least // for those proofs that use a completely arbitrary queue structure). // We need to give at least 16 bytes of buffer space for the descriptor table to be // able to change its address, as it is 16-byte aligned. const GUEST_MEMORY_SIZE: usize = (QUEUE_END - QUEUE_BASE_ADDRESS) as usize + 30; fn guest_memory(memory: *mut u8) -> ProofGuestMemory { // Ideally, we'd want to do // let region = unsafe {MmapRegionBuilder::new(GUEST_MEMORY_SIZE) // .with_raw_mmap_pointer(bytes.as_mut_ptr()) // .build() // .unwrap()}; // However, .build() calls to .build_raw(), which contains a call to libc::sysconf. // Since kani 0.34.0, stubbing out foreign functions is supported, but due to the rust // standard library using a special version of the libc crate, it runs into some problems // [1] Even if we work around those problems, we run into performance problems [2]. // Therefore, for now we stick to this ugly transmute hack (which only works because // the kani compiler will never re-order fields, so we can treat repr(Rust) as repr(C)). // // [1]: https://github.com/model-checking/kani/issues/2673 // [2]: https://github.com/model-checking/kani/issues/2538 let region_stub = MmapRegionStub { addr: memory, size: GUEST_MEMORY_SIZE, bitmap: Default::default(), file_offset: None, prot: 0, flags: libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, owned: false, hugetlbfs: None, }; let region: MmapRegion<()> = unsafe { std::mem::transmute(region_stub) }; let guest_region = vm_memory::GuestRegionMmap::new(region, GuestAddress(GUEST_MEMORY_BASE)).unwrap(); // Use a single memory region, just as firecracker does for guests of size < 2GB // For largest guests, firecracker uses two regions (due to the MMIO gap being // at the top of 32-bit address space) ProofGuestMemory { the_region: guest_region, } } // can't implement kani::Arbitrary for the relevant types due to orphan rules fn setup_kani_guest_memory() -> ProofGuestMemory { // Non-deterministic Vec that will be used as the guest memory. We use `exact_vec` for now // as `any_vec` will likely result in worse performance. We do not loose much from // `exact_vec`, as our proofs do not make any assumptions about "filling" guest // memory: Since everything is placed at non-deterministic addresses with // non-deterministic lengths, we still cover all scenarios that would be covered by // smaller guest memory closely. We leak the memory allocated here, so that it // doesnt get deallocated at the end of this function. We do not explicitly // de-allocate, but since this is a kani proof, that does not matter. guest_memory( ManuallyDrop::new(kani::vec::exact_vec::()).as_mut_ptr(), ) } // Constants describing the in-memory layout of a queue of size FIRECRACKER_MAX_SIZE starting // at the beginning of guest memory. These are based on Section 2.6 of the VirtIO 1.1 // specification. const QUEUE_BASE_ADDRESS: u64 = GUEST_MEMORY_BASE; /// descriptor table has 16 bytes per entry, avail ring starts right after const AVAIL_RING_BASE_ADDRESS: u64 = QUEUE_BASE_ADDRESS + FIRECRACKER_MAX_QUEUE_SIZE as u64 * 16; /// Used ring starts after avail ring (which has size 6 + 2 * FIRECRACKER_MAX_QUEUE_SIZE), /// and needs 2 bytes of padding const USED_RING_BASE_ADDRESS: u64 = AVAIL_RING_BASE_ADDRESS + 6 + 2 * FIRECRACKER_MAX_QUEUE_SIZE as u64 + 2; /// The address of the first byte after the queue (which starts at QUEUE_BASE_ADDRESS). /// Note that the used ring structure has size 6 + 8 * FIRECRACKER_MAX_QUEUE_SIZE const QUEUE_END: u64 = USED_RING_BASE_ADDRESS + 6 + 8 * FIRECRACKER_MAX_QUEUE_SIZE as u64; fn less_arbitrary_queue() -> Queue { let mut queue = Queue::new(FIRECRACKER_MAX_QUEUE_SIZE); queue.size = FIRECRACKER_MAX_QUEUE_SIZE; queue.ready = true; queue.desc_table_address = GuestAddress(QUEUE_BASE_ADDRESS); queue.avail_ring_address = GuestAddress(AVAIL_RING_BASE_ADDRESS); queue.used_ring_address = GuestAddress(USED_RING_BASE_ADDRESS); queue.next_avail = Wrapping(kani::any()); queue.next_used = Wrapping(kani::any()); queue.uses_notif_suppression = kani::any(); queue.num_added = Wrapping(kani::any()); queue } impl ProofContext { /// Creates a [`ProofContext`] where the queue layout is not arbitrary and instead /// fixed to a known valid one pub fn bounded_queue() -> Self { let mem = setup_kani_guest_memory(); let mut queue = less_arbitrary_queue(); queue.initialize(&mem).unwrap(); ProofContext(queue, mem) } } impl kani::Arbitrary for ProofContext { fn any() -> Self { let mem = setup_kani_guest_memory(); let mut queue: Queue = kani::any(); kani::assume(queue.initialize(&mem).is_ok()); ProofContext(queue, mem) } } impl kani::Arbitrary for Queue { fn any() -> Queue { // firecracker statically sets the maximal queue size to 256 let mut queue = Queue::new(FIRECRACKER_MAX_QUEUE_SIZE); queue.size = kani::any(); queue.ready = true; queue.desc_table_address = GuestAddress(kani::any()); queue.avail_ring_address = GuestAddress(kani::any()); queue.used_ring_address = GuestAddress(kani::any()); queue.next_avail = Wrapping(kani::any()); queue.next_used = Wrapping(kani::any()); queue.uses_notif_suppression = kani::any(); queue.num_added = Wrapping(kani::any()); queue } } impl kani::Arbitrary for Descriptor { fn any() -> Descriptor { Descriptor { addr: kani::any(), len: kani::any(), flags: kani::any(), next: kani::any(), } } } #[kani::proof] #[kani::unwind(0)] // There are no loops anywhere, but kani really enjoys getting stuck in std::ptr::drop_in_place. // This is a compiler intrinsic that has a "dummy" implementation in stdlib that just // recursively calls itself. Kani will generally unwind this recursion infinitely fn verify_spec_2_6_7_2() { // Section 2.6.7.2 deals with device-to-driver notification suppression. // It describes a mechanism by which the driver can tell the device that it does not // want notifications (IRQs) about the device finishing processing individual buffers // (descriptor chain heads) from the avail ring until a specific number of descriptors // has been processed. This is done by the driver // defining a "used_event" index, which tells the device "please do not notify me until // used.ring[used_event] has been written to by you". let ProofContext(mut queue, _) = kani::any(); let num_added_old = queue.num_added.0; let needs_notification = queue.prepare_kick(); // uses_notif_suppression equivalent to VIRTIO_F_EVENT_IDX negotiated if !queue.uses_notif_suppression { // The specification here says // After the device writes a descriptor index into the used ring: // – If flags is 1, the device SHOULD NOT send a notification. // – If flags is 0, the device MUST send a notification // flags is the first field in the avail_ring_address, which we completely ignore. We // always send a notification, and as there only is a SHOULD NOT, that is okay assert!(needs_notification); } else { // next_used - 1 is where the previous descriptor was placed if Wrapping(queue.avail_ring_used_event_get()) == queue.next_used - Wrapping(1) && num_added_old > 0 { // If the idx field in the used ring (which determined where that descriptor index // was placed) was equal to used_event, the device MUST send a // notification. assert!(needs_notification); kani::cover!(); } // The other case is handled by a "SHOULD NOT send a notification" in the spec. // So we do not care } } #[kani::proof] #[kani::unwind(0)] fn verify_prepare_kick() { // Firecracker's virtio queue implementation is not completely spec conform: // According to the spec, we have to check whether to notify the driver after every call // to add_used. We don't do that. Instead, we call add_used a bunch of times (with the // number of added descriptors being counted in Queue.num_added), and then use // "prepare_kick" to check if any of those descriptors should have triggered a // notification. let ProofContext(mut queue, _) = kani::any(); queue.enable_notif_suppression(); assert!(queue.uses_notif_suppression); // With firecracker's batching of used IRQs, we need to check if addition of the last // queue.num_added buffers is what caused us to cross the used_event index (e.g. if the // index used_event was written to since the last call to prepare_kick). We have to // take various ring-wrapping behavior into consideration here. This is the case if // used_event in [next_used - num_added, next_used - 1]. However, intervals // in modular arithmetic are a finicky thing, as we do not have a notion of order // (consider for example u16::MAX + 1 = 0. Clearly, x + 1 > x, but that would imply 0 > // u16::MAX) This gives us some interesting corner cases: What if our "interval" is // "[u16::MAX - 1, 1]"? For these "wrapped" intervals, we can instead consider // [next_used - num_added - 1, u16::MAX] ∪ [0, next_used - 1]. Since queue size is at most // 2^15, intervals can only wrap at most once. This gives us the following logic: let used_event = Wrapping(queue.avail_ring_used_event_get()); let interval_start = queue.next_used - queue.num_added; let interval_end = queue.next_used - Wrapping(1); let needs_notification = if queue.num_added.0 == 0 { false } else if interval_start > interval_end { used_event <= interval_end || used_event >= interval_start } else { used_event >= interval_start && used_event <= interval_end }; assert_eq!(queue.prepare_kick(), needs_notification); } #[kani::proof] #[kani::unwind(0)] fn verify_add_used() { let ProofContext(mut queue, _) = kani::any(); // The spec here says (2.6.8.2): // // The device MUST set len prior to updating the used idx. // The device MUST write at least len bytes to descriptor, beginning at the first // device-writable buffer, prior to updating the used idx. // The device MAY write more than len bytes to descriptor. // // We can't really verify any of these. We can verify that guest memory is updated correctly // though // index into used ring at which the index of the descriptor to which // the device wrote. let used_idx = queue.next_used; let used_desc_table_index = kani::any(); if queue.add_used(used_desc_table_index, kani::any()).is_ok() { assert_eq!(queue.next_used, used_idx + Wrapping(1)); } else { assert_eq!(queue.next_used, used_idx); // Ideally, here we would want to actually read the relevant values from memory and // assert they are unchanged. However, kani will run out of memory if we try to do so, // so we instead verify the following "proxy property": If an error happened, then // it happened at the very beginning of add_used, meaning no memory accesses were // done. This is relying on implementation details of add_used, namely that // the check for out-of-bounds descriptor index happens at the very beginning of the // function. assert!(used_desc_table_index >= queue.size); } } #[kani::proof] #[kani::unwind(0)] fn verify_is_empty() { let ProofContext(queue, _) = kani::any(); assert_eq!(queue.len() == 0, queue.is_empty()); } #[kani::proof] #[kani::unwind(0)] #[kani::solver(cadical)] fn verify_initialize() { let ProofContext(mut queue, mem) = kani::any(); if queue.initialize(&mem).is_ok() { // Section 2.6: Alignment of descriptor table, available ring and used ring; size of // queue fn alignment_of(val: u64) -> u64 { if val == 0 { u64::MAX } else { val & (!val + 1) } } assert!(alignment_of(queue.desc_table_address.0) >= 16); assert!(alignment_of(queue.avail_ring_address.0) >= 2); assert!(alignment_of(queue.used_ring_address.0) >= 4); // length of queue must be power-of-two, and at most 2^15 assert_eq!(queue.size.count_ones(), 1); assert!(queue.size <= 1u16 << 15); } } #[kani::proof] #[kani::unwind(0)] fn verify_avail_ring_idx_get() { let ProofContext(queue, _) = kani::any(); _ = queue.avail_ring_idx_get(); } #[kani::proof] #[kani::unwind(0)] fn verify_avail_ring_ring_get() { let ProofContext(queue, _) = kani::any(); let x: usize = kani::any_where(|x| *x < usize::from(queue.size)); unsafe { _ = queue.avail_ring_ring_get(x) }; } #[kani::proof] #[kani::unwind(0)] fn verify_avail_ring_used_event_get() { let ProofContext(queue, _) = kani::any(); _ = queue.avail_ring_used_event_get(); } #[kani::proof] #[kani::unwind(0)] fn verify_used_ring_idx_set() { let ProofContext(mut queue, _) = kani::any(); queue.used_ring_idx_set(kani::any()); } #[kani::proof] #[kani::unwind(0)] fn verify_used_ring_ring_set() { let ProofContext(mut queue, _) = kani::any(); let x: usize = kani::any_where(|x| *x < usize::from(queue.size)); let used_element = UsedElement { id: kani::any(), len: kani::any(), }; unsafe { queue.used_ring_ring_set(x, used_element) }; } #[kani::proof] #[kani::unwind(0)] fn verify_used_ring_avail_event() { let ProofContext(mut queue, _) = kani::any(); let x = kani::any(); queue.used_ring_avail_event_set(x); assert_eq!(x, queue.used_ring_avail_event_get()); } #[kani::proof] #[kani::unwind(0)] #[kani::solver(cadical)] fn verify_pop() { let ProofContext(mut queue, _) = kani::any(); // This is an assertion in pop which we use to abort firecracker in a ddos scenario // This condition being false means that the guest is asking us to process every element // in the queue multiple times. It cannot be checked by initialize, as that function // is called when the queue is being initialized, e.g. empty. We compute it using // local variables here to make things easier on kani: One less roundtrip through vm-memory. let queue_len = queue.len(); kani::assume(queue_len <= queue.size); let next_avail = queue.next_avail; if let Some(_) = queue.pop().unwrap() { // Can't get anything out of an empty queue, assert queue_len != 0 assert_ne!(queue_len, 0); assert_eq!(queue.next_avail, next_avail + Wrapping(1)); } } #[kani::proof] #[kani::unwind(0)] #[kani::solver(cadical)] fn verify_undo_pop() { let ProofContext(mut queue, _) = kani::any(); // See verify_pop for explanation kani::assume(queue.len() <= queue.size); let queue_clone = queue.clone(); if let Some(_) = queue.pop().unwrap() { queue.undo_pop(); assert_eq!(queue, queue_clone); // TODO: can we somehow check that guest memory wasn't touched? } } #[kani::proof] #[kani::unwind(0)] fn verify_try_enable_notification() { let ProofContext(mut queue, _) = ProofContext::bounded_queue(); kani::assume(queue.len() <= queue.size); if queue.try_enable_notification().unwrap() && queue.uses_notif_suppression { // We only require new notifications if the queue is empty (e.g. we've processed // everything we've been notified about), or if suppression is disabled. assert!(queue.is_empty()); assert_eq!(Wrapping(queue.avail_ring_idx_get()), queue.next_avail) } } #[kani::proof] #[kani::unwind(0)] #[kani::solver(cadical)] fn verify_checked_new() { let ProofContext(queue, mem) = kani::any(); let index = kani::any(); let maybe_chain = DescriptorChain::checked_new(queue.desc_table_ptr, queue.size, index); if index >= queue.size { assert!(maybe_chain.is_none()) } else { // If the index was in-bounds for the descriptor table, we at least should be // able to compute the address of the descriptor table entry without going out // of bounds anywhere, and also read from that address. let desc_head = mem .checked_offset(queue.desc_table_address, (index as usize) * 16) .unwrap(); mem.checked_offset(desc_head, 16).unwrap(); let desc = mem.read_obj::(desc_head).unwrap(); match maybe_chain { None => { // This assert is the negation of the "is_valid" check in checked_new assert!(desc.flags & VIRTQ_DESC_F_NEXT == 1 && desc.next >= queue.size) } Some(head) => { assert!(head.is_valid()) } } } } } #[cfg(test)] mod tests { use vm_memory::{Address, Bytes}; pub use super::*; use crate::devices::virtio::queue::QueueError::DescIndexOutOfBounds; use crate::devices::virtio::test_utils::{VirtQueue, default_mem}; use crate::test_utils::{multi_region_mem, single_region_mem}; use crate::vstate::memory::GuestAddress; #[test] fn test_checked_new_descriptor_chain() { let m = &multi_region_mem(&[(GuestAddress(0), 0x10000), (GuestAddress(0x20000), 0x2000)]); let vq = VirtQueue::new(GuestAddress(0), m, 16); let mut q = vq.create_queue(); q.initialize(m).unwrap(); assert!(vq.end().0 < 0x1000); // index >= queue_size assert!(DescriptorChain::checked_new(q.desc_table_ptr, 16, 16).is_none()); // Let's create an invalid chain. { // The first desc has a normal len, and the next_descriptor flag is set. vq.dtable[0].addr.set(0x1000); vq.dtable[0].len.set(0x1000); vq.dtable[0].flags.set(VIRTQ_DESC_F_NEXT); // .. but the index of the next descriptor is too large vq.dtable[0].next.set(16); assert!(DescriptorChain::checked_new(q.desc_table_ptr, 16, 0).is_none()); } // Finally, let's test an ok chain. { vq.dtable[0].next.set(1); vq.dtable[1].set(0x2000, 0x1000, 0, 0); let c = DescriptorChain::checked_new(q.desc_table_ptr, 16, 0).unwrap(); assert_eq!(c.desc_table_ptr, q.desc_table_ptr); assert_eq!(c.queue_size, 16); assert_eq!(c.ttl, c.queue_size); assert_eq!(c.index, 0); assert_eq!(c.addr, GuestAddress(0x1000)); assert_eq!(c.len, 0x1000); assert_eq!(c.flags, VIRTQ_DESC_F_NEXT); assert_eq!(c.next, 1); assert!(c.next_descriptor().unwrap().next_descriptor().is_none()); } } #[test] fn test_queue_validation() { let m = &default_mem(); let vq = VirtQueue::new(GuestAddress(0), m, 16); let mut q = vq.create_queue(); // q is currently valid q.initialize(m).unwrap(); // shouldn't be valid when not marked as ready q.ready = false; assert!(matches!(q.initialize(m).unwrap_err(), QueueError::NotReady)); q.ready = true; // or when size > max_size q.size = q.max_size << 1; assert!(matches!( q.initialize(m).unwrap_err(), QueueError::InvalidSize(_) )); q.size = q.max_size; // or when size is 0 q.size = 0; assert!(matches!( q.initialize(m).unwrap_err(), QueueError::InvalidSize(_) )); q.size = q.max_size; // or when size is not a power of 2 q.size = 11; assert!(matches!( q.initialize(m).unwrap_err(), QueueError::InvalidSize(_) )); q.size = q.max_size; // reset dirtied values q.max_size = 16; q.next_avail = Wrapping(0); m.write_obj::(0, q.avail_ring_address.unchecked_add(2)) .unwrap(); // or if the various addresses are off q.desc_table_address = GuestAddress(0xffff_ff00); assert!(matches!( q.initialize(m).unwrap_err(), QueueError::MemoryError(_) )); q.desc_table_address = GuestAddress(0x1001); assert!(matches!( q.initialize(m).unwrap_err(), QueueError::PointerNotAligned(_, _) )); q.desc_table_address = vq.dtable_start(); q.avail_ring_address = GuestAddress(0xffff_ff00); assert!(matches!( q.initialize(m).unwrap_err(), QueueError::MemoryError(_) )); q.avail_ring_address = GuestAddress(0x1001); assert!(matches!( q.initialize(m).unwrap_err(), QueueError::PointerNotAligned(_, _) )); q.avail_ring_address = vq.avail_start(); q.used_ring_address = GuestAddress(0xffff_ff00); assert!(matches!( q.initialize(m).unwrap_err(), QueueError::MemoryError(_) )); q.used_ring_address = GuestAddress(0x1001); assert!(matches!( q.initialize(m).unwrap_err(), QueueError::PointerNotAligned(_, _) )); q.used_ring_address = vq.used_start(); } #[test] fn test_queue_processing() { let m = &default_mem(); let vq = VirtQueue::new(GuestAddress(0), m, 16); let mut q = vq.create_queue(); q.ready = true; // Let's create two simple descriptor chains. for j in 0..5 { vq.dtable[j as usize].set(0x1000 * u64::from(j + 1), 0x1000, VIRTQ_DESC_F_NEXT, j + 1); } // the chains are (0, 1) and (2, 3, 4) vq.dtable[1].flags.set(0); vq.dtable[4].flags.set(0); vq.avail.ring[0].set(0); vq.avail.ring[1].set(2); vq.avail.idx.set(2); // We've just set up two chains. assert_eq!(q.len(), 2); // The first chain should hold exactly two descriptors. let d = q.pop().unwrap().unwrap().next_descriptor().unwrap(); assert!(!d.has_next()); assert!(d.next_descriptor().is_none()); // We popped one chain, so there should be only one left. assert_eq!(q.len(), 1); // The next chain holds three descriptors. let d = q .pop() .unwrap() .unwrap() .next_descriptor() .unwrap() .next_descriptor() .unwrap(); assert!(!d.has_next()); assert!(d.next_descriptor().is_none()); // We've popped both chains, so the queue should be empty. assert!(q.is_empty()); assert!(q.pop().unwrap().is_none()); // Undoing the last pop should let us walk the last chain again. q.undo_pop(); assert_eq!(q.len(), 1); // Walk the last chain again (three descriptors). let d = q .pop() .unwrap() .unwrap() .next_descriptor() .unwrap() .next_descriptor() .unwrap(); assert!(!d.has_next()); assert!(d.next_descriptor().is_none()); // Undoing the last pop should let us walk the last chain again. q.undo_pop(); assert_eq!(q.len(), 1); // Walk the last chain again (three descriptors) using pop_or_enable_notification(). let d = q .pop_or_enable_notification() .unwrap() .unwrap() .next_descriptor() .unwrap() .next_descriptor() .unwrap(); assert!(!d.has_next()); assert!(d.next_descriptor().is_none()); // There are no more descriptors, but notification suppression is disabled. // Calling pop_or_enable_notification() should simply return None. assert_eq!(q.used_ring_avail_event_get(), 0); assert!(q.pop_or_enable_notification().unwrap().is_none()); assert_eq!(q.used_ring_avail_event_get(), 0); // There are no more descriptors and notification suppression is enabled. Calling // pop_or_enable_notification() should enable notifications. q.enable_notif_suppression(); assert!(q.pop_or_enable_notification().unwrap().is_none()); assert_eq!(q.used_ring_avail_event_get(), 2); } #[test] fn test_invalid_avail_idx_no_notification() { // This test ensures constructing a descriptor chain succeeds // with valid available ring indexes while it produces an error with invalid // indexes. // No notification suppression enabled. let m = &single_region_mem(0x6000); // We set up a queue of size 4. let vq = VirtQueue::new(GuestAddress(0), m, 4); let mut q = vq.create_queue(); for j in 0..4 { vq.dtable[j as usize].set(0x1000 * u64::from(j + 1), 0x1000, VIRTQ_DESC_F_NEXT, j + 1); } // Create 2 descriptor chains. // the chains are (0, 1) and (2, 3) vq.dtable[1].flags.set(0); vq.dtable[3].flags.set(0); vq.avail.ring[0].set(0); vq.avail.ring[1].set(2); vq.avail.idx.set(2); // We've just set up two chains. assert_eq!(q.len(), 2); // We process the first descriptor. let d = q.pop().unwrap().unwrap().next_descriptor(); assert!(matches!(d, Some(x) if !x.has_next())); // We confuse the device and set the available index as being 6. vq.avail.idx.set(6); // We've actually just popped a descriptor so 6 - 1 = 5. assert_eq!(q.len(), 5); // However, since the apparent length set by the driver is more than the queue size, // we would be running the risk of going through some descriptors more than once. // As such, we expect to panic. assert_eq!( q.pop().unwrap_err(), InvalidAvailIdx { reported_len: 5, queue_size: 4 } ); } #[test] fn test_invalid_avail_idx_with_notification() { // This test ensures constructing a descriptor chain succeeds // with valid available ring indexes while it produces an error with invalid // indexes. // Notification suppression is enabled. let m = &single_region_mem(0x6000); // We set up a queue of size 4. let vq = VirtQueue::new(GuestAddress(0), m, 4); let mut q = vq.create_queue(); q.uses_notif_suppression = true; // Create 1 descriptor chain of 4. for j in 0..4 { vq.dtable[j as usize].set(0x1000 * u64::from(j + 1), 0x1000, VIRTQ_DESC_F_NEXT, j + 1); } // We need to clear the VIRTQ_DESC_F_NEXT for the last descriptor. vq.dtable[3].flags.set(0); vq.avail.ring[0].set(0); // driver sets available index to suspicious value. vq.avail.idx.set(6); assert_eq!( q.pop_or_enable_notification().unwrap_err(), InvalidAvailIdx { queue_size: 4, reported_len: 6 } ); } #[test] fn test_add_used() { let m = &default_mem(); let vq = VirtQueue::new(GuestAddress(0), m, 16); let mut q = vq.create_queue(); assert_eq!(vq.used.idx.get(), 0); // Valid queue addresses configuration { // index too large match q.add_used(16, 0x1000) { Err(DescIndexOutOfBounds(16)) => (), _ => unreachable!(), } // should be ok q.add_used(1, 0x1000).unwrap(); q.advance_used_ring_idx(); assert_eq!(vq.used.idx.get(), 1); let x = vq.used.ring[0].get(); assert_eq!(x.id, 1); assert_eq!(x.len, 0x1000); } } #[test] fn test_used_event() { let m = &default_mem(); let vq = VirtQueue::new(GuestAddress(0), m, 16); let q = vq.create_queue(); assert_eq!(q.avail_ring_used_event_get(), 0); vq.avail.event.set(10); assert_eq!(q.avail_ring_used_event_get(), 10); vq.avail.event.set(u16::MAX); assert_eq!(q.avail_ring_used_event_get(), u16::MAX); } #[test] fn test_set_used_ring_avail_event() { let m = &default_mem(); let vq = VirtQueue::new(GuestAddress(0), m, 16); let mut q = vq.create_queue(); assert_eq!(vq.used.event.get(), 0); q.used_ring_avail_event_set(10); assert_eq!(vq.used.event.get(), 10); q.used_ring_avail_event_set(u16::MAX); assert_eq!(vq.used.event.get(), u16::MAX); } #[test] fn test_needs_kick() { let m = &default_mem(); let vq = VirtQueue::new(GuestAddress(0), m, 16); let mut q = vq.create_queue(); { // If the device doesn't have notification suppression support, // `needs_notification()` should always return true. q.uses_notif_suppression = false; for used_idx in 0..10 { for used_event in 0..10 { for num_added in 0..10 { q.next_used = Wrapping(used_idx); vq.avail.event.set(used_event); q.num_added = Wrapping(num_added); assert!(q.prepare_kick()); } } } } q.enable_notif_suppression(); { // old used idx < used_event < next_used q.next_used = Wrapping(10); vq.avail.event.set(6); q.num_added = Wrapping(5); assert!(q.prepare_kick()); } { // old used idx = used_event < next_used q.next_used = Wrapping(10); vq.avail.event.set(6); q.num_added = Wrapping(4); assert!(q.prepare_kick()); } { // used_event < old used idx < next_used q.next_used = Wrapping(10); vq.avail.event.set(6); q.num_added = Wrapping(3); assert!(!q.prepare_kick()); } } #[test] fn test_try_enable_notification() { let m = &default_mem(); let vq = VirtQueue::new(GuestAddress(0), m, 16); let mut q = vq.create_queue(); q.ready = true; // We create a simple descriptor chain vq.dtable[0].set(0x1000_u64, 0x1000, 0, 0); vq.avail.ring[0].set(0); vq.avail.idx.set(1); assert_eq!(q.len(), 1); // Notification suppression is disabled. try_enable_notification shouldn't do anything. assert!(q.try_enable_notification().unwrap()); assert_eq!(q.used_ring_avail_event_get(), 0); // Enable notification suppression and check again. There is 1 available descriptor chain. // Again nothing should happen. q.enable_notif_suppression(); assert!(!q.try_enable_notification().unwrap()); assert_eq!(q.used_ring_avail_event_get(), 0); // Consume the descriptor. avail_event should be modified assert!(q.pop().unwrap().is_some()); assert!(q.try_enable_notification().unwrap()); assert_eq!(q.used_ring_avail_event_get(), 1); } #[test] fn test_initialize_with_aligned_pointer() { let mut q = Queue::new(FIRECRACKER_MAX_QUEUE_SIZE); q.ready = true; q.size = q.max_size; // Descriptor table must be 16-byte aligned. q.desc_table_address = GuestAddress(16); // Available ring must be 2-byte aligned. q.avail_ring_address = GuestAddress(2); // Used ring must be 4-byte aligned. q.avail_ring_address = GuestAddress(4); let mem = single_region_mem(0x10000); q.initialize(&mem).unwrap(); } #[test] fn test_initialize_with_misaligned_pointer() { let mut q = Queue::new(FIRECRACKER_MAX_QUEUE_SIZE); q.ready = true; q.size = q.max_size; let mem = single_region_mem(0x1000); // Descriptor table must be 16-byte aligned. q.desc_table_address = GuestAddress(0xb); match q.initialize(&mem) { Ok(_) => panic!("Unexpected success"), Err(QueueError::PointerNotAligned(addr, alignment)) => { assert_eq!(addr % 16, 0xb); assert_eq!(alignment, 16); } Err(e) => panic!("Unexpected error {e:#?}"), } q.desc_table_address = GuestAddress(0x0); // Available ring must be 2-byte aligned. q.avail_ring_address = GuestAddress(0x1); match q.initialize(&mem) { Ok(_) => panic!("Unexpected success"), Err(QueueError::PointerNotAligned(addr, alignment)) => { assert_eq!(addr % 2, 0x1); assert_eq!(alignment, 2); } Err(e) => panic!("Unexpected error {e:#?}"), } q.avail_ring_address = GuestAddress(0x0); // Used ring must be 4-byte aligned. q.used_ring_address = GuestAddress(0x3); match q.initialize(&mem) { Ok(_) => panic!("unexpected success"), Err(QueueError::PointerNotAligned(addr, alignment)) => { assert_eq!(addr % 4, 0x3); assert_eq!(alignment, 4); } Err(e) => panic!("Unexpected error {e:#?}"), } } #[test] fn test_queue_error_display() { let err = QueueError::MemoryError(vm_memory::GuestMemoryError::InvalidGuestAddress( GuestAddress(0), )); let _ = format!("{}{:?}", err, err); let err = DescIndexOutOfBounds(1); let _ = format!("{}{:?}", err, err); } } ================================================ FILE: src/vmm/src/devices/virtio/rng/device.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::io; use std::ops::Deref; use std::sync::Arc; use aws_lc_rs::rand; use log::info; use vm_memory::GuestMemoryError; use vmm_sys_util::eventfd::EventFd; use super::metrics::METRICS; use super::{RNG_NUM_QUEUES, RNG_QUEUE}; use crate::devices::DeviceError; use crate::devices::virtio::ActivateError; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1; use crate::devices::virtio::iov_deque::IovDequeError; use crate::devices::virtio::iovec::IoVecBufferMut; use crate::devices::virtio::queue::{FIRECRACKER_MAX_QUEUE_SIZE, InvalidAvailIdx, Queue}; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::impl_device_type; use crate::logger::{IncMetric, debug, error}; use crate::rate_limiter::{RateLimiter, TokenType}; use crate::vstate::memory::GuestMemoryMmap; pub const ENTROPY_DEV_ID: &str = "rng"; /// Maximum number of bytes `handle_one()` will serve per request. /// /// Overlapping descriptors within a single chain can cause `buffer.len()` to /// exceed the amount of distinct guest memory actually backing the request. /// Capping the per-request allocation to 64 KiB keeps host memory usage /// bounded regardless of how the descriptor chain is constructed. const MAX_ENTROPY_BYTES: u32 = 64 * 1024; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum EntropyError { /// Error while handling an Event file descriptor: {0} EventFd(#[from] io::Error), /// Bad guest memory buffer: {0} GuestMemory(#[from] GuestMemoryError), /// Could not get random bytes: {0} Random(#[from] aws_lc_rs::error::Unspecified), /// Underlying IovDeque error: {0} IovDeque(#[from] IovDequeError), } #[derive(Debug)] pub struct Entropy { // VirtIO fields avail_features: u64, acked_features: u64, activate_event: EventFd, // Transport fields device_state: DeviceState, pub(crate) queues: Vec, queue_events: Vec, // Device specific fields rate_limiter: RateLimiter, buffer: IoVecBufferMut, } impl Entropy { pub fn new(rate_limiter: RateLimiter) -> Result { let queues = vec![Queue::new(FIRECRACKER_MAX_QUEUE_SIZE); RNG_NUM_QUEUES]; Self::new_with_queues(queues, rate_limiter) } pub fn new_with_queues( queues: Vec, rate_limiter: RateLimiter, ) -> Result { let activate_event = EventFd::new(libc::EFD_NONBLOCK)?; let queue_events = (0..RNG_NUM_QUEUES) .map(|_| EventFd::new(libc::EFD_NONBLOCK)) .collect::, io::Error>>()?; Ok(Self { avail_features: 1 << VIRTIO_F_VERSION_1, acked_features: 0u64, activate_event, device_state: DeviceState::Inactive, queues, queue_events, rate_limiter, buffer: IoVecBufferMut::new()?, }) } fn signal_used_queue(&self) -> Result<(), DeviceError> { self.interrupt_trigger() .trigger(VirtioInterruptType::Queue(RNG_QUEUE.try_into().unwrap())) .map_err(DeviceError::FailedSignalingIrq) } fn rate_limit_request(&mut self, bytes: u64) -> bool { if !self.rate_limiter.consume(1, TokenType::Ops) { return false; } if !self.rate_limiter.consume(bytes, TokenType::Bytes) { self.rate_limiter.manual_replenish(1, TokenType::Ops); return false; } true } fn rate_limit_replenish_request(rate_limiter: &mut RateLimiter, bytes: u64) { rate_limiter.manual_replenish(1, TokenType::Ops); rate_limiter.manual_replenish(bytes, TokenType::Bytes); } fn handle_one(&mut self) -> Result { // If guest provided us with an empty buffer just return directly if self.buffer.is_empty() { return Ok(0); } // Cap the number of bytes we actually generate so that the host-side // allocation stays bounded even when buffer.len() is inflated by // overlapping descriptors in the chain. let len = std::cmp::min(self.buffer.len(), MAX_ENTROPY_BYTES); let mut rand_bytes = vec![0; len as usize]; rand::fill(&mut rand_bytes).inspect_err(|_| { METRICS.host_rng_fails.inc(); })?; // It is ok to unwrap here. We are writing `len` bytes at offset 0. self.buffer.write_all_volatile_at(&rand_bytes, 0).unwrap(); Ok(len) } fn process_entropy_queue(&mut self) -> Result<(), InvalidAvailIdx> { let mut used_any = false; while let Some(desc) = self.queues[RNG_QUEUE].pop()? { // This is safe since we checked in the event handler that the device is activated. let mem = &self.device_state.active_state().unwrap().mem; let index = desc.index; METRICS.entropy_event_count.inc(); // SAFETY: This descriptor chain points to a single `DescriptorChain` memory buffer, // no other `IoVecBufferMut` object points to the same `DescriptorChain` at the same // time and we clear the `iovec` after we process the request. let bytes = match unsafe { self.buffer.load_descriptor_chain(mem, desc) } { Ok(()) => { debug!( "entropy: guest request for {} bytes of entropy", self.buffer.len() ); // Check for available rate limiting budget. // If not enough budget is available, leave the request descriptor in the queue // to handle once we do have budget. if !self.rate_limit_request(u64::from(self.buffer.len())) { debug!("entropy: throttling entropy queue"); METRICS.entropy_rate_limiter_throttled.inc(); self.queues[RNG_QUEUE].undo_pop(); break; } self.handle_one().unwrap_or_else(|err| { error!("entropy: {err}"); METRICS.entropy_event_fails.inc(); 0 }) } Err(err) => { error!("entropy: Could not parse descriptor chain: {err}"); METRICS.entropy_event_fails.inc(); 0 } }; match self.queues[RNG_QUEUE].add_used(index, bytes) { Ok(_) => { used_any = true; METRICS.entropy_bytes.add(bytes.into()); } Err(err) => { error!("entropy: Could not add used descriptor to queue: {err}"); Self::rate_limit_replenish_request(&mut self.rate_limiter, bytes.into()); METRICS.entropy_event_fails.inc(); // If we are not able to add a buffer to the used queue, something // is probably seriously wrong, so just stop processing additional // buffers break; } } } self.queues[RNG_QUEUE].advance_used_ring_idx(); if used_any { self.signal_used_queue().unwrap_or_else(|err| { error!("entropy: {err:?}"); METRICS.entropy_event_fails.inc() }); } Ok(()) } pub(crate) fn process_entropy_queue_event(&mut self) { if let Err(err) = self.queue_events[RNG_QUEUE].read() { error!("Failed to read entropy queue event: {err}"); METRICS.entropy_event_fails.inc(); } else if !self.rate_limiter.is_blocked() { // We are not throttled, handle the entropy queue self.process_entropy_queue().unwrap() } else { METRICS.rate_limiter_event_count.inc(); } } pub(crate) fn process_rate_limiter_event(&mut self) { METRICS.rate_limiter_event_count.inc(); match self.rate_limiter.event_handler() { Ok(_) => { // There might be enough budget now to process entropy requests. self.process_entropy_queue().unwrap() } Err(err) => { error!("entropy: Failed to handle rate-limiter event: {err:?}"); METRICS.entropy_event_fails.inc(); } } } pub fn process_virtio_queues(&mut self) -> Result<(), InvalidAvailIdx> { self.process_entropy_queue() } pub fn rate_limiter(&self) -> &RateLimiter { &self.rate_limiter } pub(crate) fn set_avail_features(&mut self, features: u64) { self.avail_features = features; } pub(crate) fn set_acked_features(&mut self, features: u64) { self.acked_features = features; } pub(crate) fn set_activated( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) { self.device_state = DeviceState::Activated(ActiveState { mem, interrupt }); } pub(crate) fn activate_event(&self) -> &EventFd { &self.activate_event } } impl VirtioDevice for Entropy { impl_device_type!(VirtioDeviceType::Rng); fn id(&self) -> &str { ENTROPY_DEV_ID } fn queues(&self) -> &[Queue] { &self.queues } fn queues_mut(&mut self) -> &mut [Queue] { &mut self.queues } fn queue_events(&self) -> &[EventFd] { &self.queue_events } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { self.device_state .active_state() .expect("Device is not initialized") .interrupt .deref() } fn avail_features(&self) -> u64 { self.avail_features } fn acked_features(&self) -> u64 { self.acked_features } fn set_acked_features(&mut self, acked_features: u64) { self.acked_features = acked_features; } fn read_config(&self, _offset: u64, mut _data: &mut [u8]) {} fn write_config(&mut self, _offset: u64, _data: &[u8]) {} fn is_activated(&self) -> bool { self.device_state.is_activated() } fn activate( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError> { for q in self.queues.iter_mut() { q.initialize(&mem) .map_err(ActivateError::QueueMemoryError)?; } self.activate_event.write(1).map_err(|_| { METRICS.activate_fails.inc(); ActivateError::EventFd })?; self.device_state = DeviceState::Activated(ActiveState { mem, interrupt }); Ok(()) } } #[cfg(test)] mod tests { use std::time::Duration; use super::*; use crate::check_metric_after_block; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::queue::VIRTQ_DESC_F_WRITE; use crate::devices::virtio::test_utils::test::{ VirtioTestDevice, VirtioTestHelper, create_virtio_mem, }; impl VirtioTestDevice for Entropy { fn set_queues(&mut self, queues: Vec) { self.queues = queues; } fn num_queues(&self) -> usize { RNG_NUM_QUEUES } } fn default_entropy() -> Entropy { Entropy::new(RateLimiter::default()).unwrap() } #[test] fn test_new() { let entropy_dev = default_entropy(); assert_eq!(entropy_dev.avail_features(), 1 << VIRTIO_F_VERSION_1); assert_eq!(entropy_dev.acked_features(), 0); assert!(!entropy_dev.is_activated()); } #[test] fn test_id() { let entropy_dev = default_entropy(); assert_eq!(entropy_dev.id(), ENTROPY_DEV_ID); } #[test] fn test_device_type() { let entropy_dev = default_entropy(); assert_eq!(entropy_dev.device_type(), VirtioDeviceType::Rng); } #[test] fn test_read_config() { let entropy_dev = default_entropy(); let mut config = vec![0; 10]; entropy_dev.read_config(0, &mut config); assert_eq!(config, vec![0; 10]); entropy_dev.read_config(1, &mut config); assert_eq!(config, vec![0; 10]); entropy_dev.read_config(2, &mut config); assert_eq!(config, vec![0; 10]); entropy_dev.read_config(1024, &mut config); assert_eq!(config, vec![0; 10]); } #[test] fn test_write_config() { let mut entropy_dev = default_entropy(); let mut read_config = vec![0; 10]; let write_config = vec![42; 10]; entropy_dev.write_config(0, &write_config); entropy_dev.read_config(0, &mut read_config); assert_eq!(read_config, vec![0; 10]); entropy_dev.write_config(1, &write_config); entropy_dev.read_config(1, &mut read_config); assert_eq!(read_config, vec![0; 10]); entropy_dev.write_config(2, &write_config); entropy_dev.read_config(2, &mut read_config); assert_eq!(read_config, vec![0; 10]); entropy_dev.write_config(1024, &write_config); entropy_dev.read_config(1024, &mut read_config); assert_eq!(read_config, vec![0; 10]); } #[test] fn test_handle_one() { let mem = create_virtio_mem(); let mut th = VirtioTestHelper::::new(&mem, default_entropy()); // Checks that device activation works th.activate_device(&mem); // Add a read-only descriptor (this should fail) th.add_desc_chain(RNG_QUEUE, 0, &[(0, 64, 0)]); // Add a write-only descriptor with 10 bytes th.add_desc_chain(RNG_QUEUE, 0, &[(1, 10, VIRTQ_DESC_F_WRITE)]); // Add a write-only descriptor with 0 bytes. This should not fail. th.add_desc_chain(RNG_QUEUE, 0, &[(2, 0, VIRTQ_DESC_F_WRITE)]); let mut entropy_dev = th.device(); // This should succeed, we just added two descriptors let desc = entropy_dev.queues_mut()[RNG_QUEUE].pop().unwrap().unwrap(); assert!(matches!( // SAFETY: This descriptor chain is only loaded into one buffer unsafe { IoVecBufferMut::<256>::from_descriptor_chain(&mem, desc) }, Err(crate::devices::virtio::iovec::IoVecError::ReadOnlyDescriptor) )); // This should succeed, we should have one more descriptor let desc = entropy_dev.queues_mut()[RNG_QUEUE].pop().unwrap().unwrap(); // SAFETY: This descriptor chain is only loaded into one buffer entropy_dev.buffer = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, desc).unwrap() }; entropy_dev.handle_one().unwrap(); } #[test] fn test_entropy_event() { let mem = create_virtio_mem(); let mut th = VirtioTestHelper::::new(&mem, default_entropy()); th.activate_device(&mem); // Add a read-only descriptor (this should fail) th.add_desc_chain(RNG_QUEUE, 0, &[(0, 64, 0)]); let entropy_event_fails = METRICS.entropy_event_fails.count(); let entropy_event_count = METRICS.entropy_event_count.count(); let entropy_bytes = METRICS.entropy_bytes.count(); let host_rng_fails = METRICS.host_rng_fails.count(); assert_eq!(th.emulate_for_msec(100).unwrap(), 1); assert_eq!(METRICS.entropy_event_fails.count(), entropy_event_fails + 1); assert_eq!(METRICS.entropy_event_count.count(), entropy_event_count + 1); assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes); assert_eq!(METRICS.host_rng_fails.count(), host_rng_fails); // Add two good descriptors th.add_desc_chain(RNG_QUEUE, 0, &[(1, 10, VIRTQ_DESC_F_WRITE)]); th.add_desc_chain(RNG_QUEUE, 100, &[(2, 20, VIRTQ_DESC_F_WRITE)]); let entropy_event_fails = METRICS.entropy_event_fails.count(); let entropy_event_count = METRICS.entropy_event_count.count(); let entropy_bytes = METRICS.entropy_bytes.count(); let host_rng_fails = METRICS.host_rng_fails.count(); assert_eq!(th.emulate_for_msec(100).unwrap(), 1); assert_eq!(METRICS.entropy_event_fails.count(), entropy_event_fails); assert_eq!(METRICS.entropy_event_count.count(), entropy_event_count + 2); assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 30); assert_eq!(METRICS.host_rng_fails.count(), host_rng_fails); th.add_desc_chain( RNG_QUEUE, 0, &[ (3, 128, VIRTQ_DESC_F_WRITE), (4, 128, VIRTQ_DESC_F_WRITE), (5, 256, VIRTQ_DESC_F_WRITE), ], ); let entropy_event_fails = METRICS.entropy_event_fails.count(); let entropy_event_count = METRICS.entropy_event_count.count(); let entropy_bytes = METRICS.entropy_bytes.count(); let host_rng_fails = METRICS.host_rng_fails.count(); assert_eq!(th.emulate_for_msec(100).unwrap(), 1); assert_eq!(METRICS.entropy_event_fails.count(), entropy_event_fails); assert_eq!(METRICS.entropy_event_count.count(), entropy_event_count + 1); assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 512); assert_eq!(METRICS.host_rng_fails.count(), host_rng_fails); } #[test] fn test_bad_rate_limiter_event() { let mem = create_virtio_mem(); let mut th = VirtioTestHelper::::new(&mem, default_entropy()); th.activate_device(&mem); let mut dev = th.device(); check_metric_after_block!( &METRICS.entropy_event_fails, 1, dev.process_rate_limiter_event() ); } #[test] fn test_bandwidth_rate_limiter() { let mem = create_virtio_mem(); // Rate Limiter with 4000 bytes / sec allowance and no initial burst allowance let device = Entropy::new(RateLimiter::new(4000, 0, 1000, 0, 0, 0).unwrap()).unwrap(); let mut th = VirtioTestHelper::::new(&mem, device); th.activate_device(&mem); // We are asking for 4000 bytes which should be available, so the // buffer should be processed normally th.add_desc_chain(RNG_QUEUE, 0, &[(0, 4000, VIRTQ_DESC_F_WRITE)]); check_metric_after_block!( METRICS.entropy_bytes, 4000, th.device().process_entropy_queue() ); assert!(!th.device().rate_limiter.is_blocked()); // Completely replenish the rate limiter th.device() .rate_limiter .manual_replenish(4000, TokenType::Bytes); // Add two descriptors. The first one should drain the available budget, // so the next one should be throttled. th.add_desc_chain(RNG_QUEUE, 0, &[(0, 4000, VIRTQ_DESC_F_WRITE)]); th.add_desc_chain(RNG_QUEUE, 1, &[(1, 1000, VIRTQ_DESC_F_WRITE)]); check_metric_after_block!( METRICS.entropy_bytes, 4000, th.device().process_entropy_queue() ); check_metric_after_block!( METRICS.entropy_rate_limiter_throttled, 1, th.device().process_entropy_queue() ); assert!(th.device().rate_limiter().is_blocked()); // 250 msec should give enough time for replenishing 1000 bytes worth of tokens. // Give it an extra 100 ms just to be sure the timer event reaches us from the kernel. std::thread::sleep(Duration::from_millis(350)); check_metric_after_block!(METRICS.entropy_bytes, 1000, th.emulate_for_msec(100)); assert!(!th.device().rate_limiter().is_blocked()); } #[test] fn test_ops_rate_limiter() { let mem = create_virtio_mem(); // Rate Limiter with unlimited bandwidth and allowance for 1 operation every 100 msec, // (10 ops/sec), without initial burst. let device = Entropy::new(RateLimiter::new(0, 0, 0, 1, 0, 100).unwrap()).unwrap(); let mut th = VirtioTestHelper::::new(&mem, device); th.activate_device(&mem); // We don't have a bandwidth limit and we can do 10 requests per sec // so this should succeed. th.add_desc_chain(RNG_QUEUE, 0, &[(0, 4000, VIRTQ_DESC_F_WRITE)]); check_metric_after_block!( METRICS.entropy_bytes, 4000, th.device().process_entropy_queue() ); assert!(!th.device().rate_limiter.is_blocked()); // Sleep for 1 second to completely replenish the rate limiter std::thread::sleep(Duration::from_millis(1000)); // First one should succeed let entropy_bytes = METRICS.entropy_bytes.count(); th.add_desc_chain(RNG_QUEUE, 0, &[(0, 64, VIRTQ_DESC_F_WRITE)]); check_metric_after_block!(METRICS.entropy_bytes, 64, th.emulate_for_msec(100)); assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 64); // The rate limiter is not blocked yet. assert!(!th.device().rate_limiter().is_blocked()); // But immediately asking another operation should block it because we have 1 op every 100 // msec. th.add_desc_chain(RNG_QUEUE, 0, &[(0, 64, VIRTQ_DESC_F_WRITE)]); check_metric_after_block!( METRICS.entropy_rate_limiter_throttled, 1, th.emulate_for_msec(50) ); // Entropy bytes count should not have increased. assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 64); // After 100 msec (plus 50 msec for ensuring the event reaches us from the kernel), the // timer of the rate limiter should fire saying that there's now more tokens available check_metric_after_block!( METRICS.rate_limiter_event_count, 1, th.emulate_for_msec(150) ); // The rate limiter event should have processed the pending buffer as well assert_eq!(METRICS.entropy_bytes.count(), entropy_bytes + 128); } /// Verify that handle_one() caps the host allocation to MAX_ENTROPY_BYTES /// when overlapping descriptors inflate buffer.len() beyond the limit. #[test] fn test_handle_one_caps_overlapping_descriptors() { use crate::devices::virtio::queue::VIRTQ_DESC_F_NEXT; use crate::devices::virtio::test_utils::VirtQueue; use crate::test_utils::single_region_mem; use crate::vstate::memory::GuestAddress; // 32 descriptors × 4 KiB = 128 KiB claimed, which exceeds MAX_ENTROPY_BYTES (64 KiB). const N_DESC: u16 = 32; const CHUNK: u32 = 4096; let mem = single_region_mem(0x20000); let vq = VirtQueue::new(GuestAddress(0), &mem, 256); let mut queue = vq.create_queue(); let target: u64 = 0x10000; for i in 0..N_DESC { let flags = VIRTQ_DESC_F_WRITE | if i < N_DESC - 1 { VIRTQ_DESC_F_NEXT } else { 0 }; vq.dtable[i as usize].set(target, CHUNK, flags, i + 1); } vq.avail.ring[0].set(0); vq.avail.idx.set(1); let head = queue.pop().unwrap().unwrap(); // SAFETY: `mem` is a valid guest memory region and `head` is a descriptor chain // obtained from the virtqueue backed by that memory. let buf = unsafe { IoVecBufferMut::<256>::from_descriptor_chain(&mem, head).unwrap() }; // buffer.len() is inflated well past the cap. assert_eq!(buf.len(), u32::from(N_DESC) * CHUNK); // 128 KiB let mut dev = default_entropy(); dev.buffer = buf; let bytes = dev.handle_one().unwrap(); assert_eq!( bytes, MAX_ENTROPY_BYTES, "handle_one() must cap at MAX_ENTROPY_BYTES ({MAX_ENTROPY_BYTES}), \ got {bytes} for inflated buffer.len() = {}", u32::from(N_DESC) * CHUNK ); } /// Verify that handle_one() caps a large inflated buffer (~4 GiB from /// 255 overlapping descriptors) to MAX_ENTROPY_BYTES. #[test] fn test_handle_one_caps_large_inflated_buffer() { use crate::devices::virtio::queue::VIRTQ_DESC_F_NEXT; use crate::devices::virtio::test_utils::VirtQueue; use crate::test_utils::single_region_mem; use crate::vstate::memory::GuestAddress; const N_DESC: u16 = 255; const CHUNK: u32 = 16 * 1024 * 1024; // 16 MiB const TOTAL: u64 = (N_DESC as u64) * (CHUNK as u64); // ~4 GiB let mem = single_region_mem((CHUNK as usize) + 0x100000); let vq = VirtQueue::new(GuestAddress(0), &mem, 256); let mut queue = vq.create_queue(); let target: u64 = 0x80000; for i in 0..N_DESC { let flags = VIRTQ_DESC_F_WRITE | if i < N_DESC - 1 { VIRTQ_DESC_F_NEXT } else { 0 }; vq.dtable[i as usize].set(target, CHUNK, flags, i + 1); } vq.avail.ring[0].set(0); vq.avail.idx.set(1); let head = queue.pop().unwrap().unwrap(); // SAFETY: `mem` is a valid guest memory region and `head` is a descriptor chain // obtained from the virtqueue backed by that memory. let buf = unsafe { IoVecBufferMut::<256>::from_descriptor_chain(&mem, head).unwrap() }; assert_eq!(buf.len() as u64, TOTAL); let mut dev = default_entropy(); dev.buffer = buf; let bytes = dev.handle_one().unwrap(); assert_eq!( bytes, MAX_ENTROPY_BYTES, "handle_one() must cap at MAX_ENTROPY_BYTES, not allocate {} bytes", TOTAL ); } /// Verify that a request within MAX_ENTROPY_BYTES is served in full /// (the cap does not truncate legitimate small requests). #[test] fn test_handle_one_serves_small_request_in_full() { use crate::devices::virtio::test_utils::VirtQueue; use crate::test_utils::single_region_mem; use crate::vstate::memory::GuestAddress; const SIZE: u32 = 256; let mem = single_region_mem(0x20000); let vq = VirtQueue::new(GuestAddress(0), &mem, 256); let mut queue = vq.create_queue(); vq.dtable[0].set(0x10000, SIZE, VIRTQ_DESC_F_WRITE, 0); vq.avail.ring[0].set(0); vq.avail.idx.set(1); let head = queue.pop().unwrap().unwrap(); // SAFETY: `mem` is a valid guest memory region and `head` is a descriptor chain // obtained from the virtqueue backed by that memory. let buf = unsafe { IoVecBufferMut::<256>::from_descriptor_chain(&mem, head).unwrap() }; assert_eq!(buf.len(), SIZE); let mut dev = default_entropy(); dev.buffer = buf; let bytes = dev.handle_one().unwrap(); assert_eq!( bytes, SIZE, "small request ({SIZE} bytes) should be served in full, got {bytes}" ); } } ================================================ FILE: src/vmm/src/devices/virtio/rng/event_handler.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use event_manager::{EventOps, Events, MutEventSubscriber}; use vmm_sys_util::epoll::EventSet; use super::{Entropy, RNG_QUEUE}; use crate::devices::virtio::device::VirtioDevice; use crate::logger::{error, warn}; impl Entropy { const PROCESS_ACTIVATE: u32 = 0; const PROCESS_ENTROPY_QUEUE: u32 = 1; const PROCESS_RATE_LIMITER: u32 = 2; fn register_runtime_events(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.queue_events()[RNG_QUEUE], Self::PROCESS_ENTROPY_QUEUE, EventSet::IN, )) { error!("entropy: Failed to register queue event: {err}"); } if let Err(err) = ops.add(Events::with_data( self.rate_limiter(), Self::PROCESS_RATE_LIMITER, EventSet::IN, )) { error!("entropy: Failed to register rate-limiter event: {err}"); } } fn register_activate_event(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( self.activate_event(), Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("entropy: Failed to register activate event: {err}"); } } fn process_activate_event(&self, ops: &mut EventOps) { if let Err(err) = self.activate_event().read() { error!("entropy: Failed to consume activate event: {err}"); } // Register runtime events self.register_runtime_events(ops); // Remove activate event if let Err(err) = ops.remove(Events::with_data( self.activate_event(), Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("entropy: Failed to un-register activate event: {err}"); } } } impl MutEventSubscriber for Entropy { fn init(&mut self, ops: &mut event_manager::EventOps) { // This function can be called during different points in the device lifetime: // - shortly after device creation, // - on device activation (is-activated already true at this point), // - on device restore from snapshot. if self.is_activated() { self.register_runtime_events(ops); } else { self.register_activate_event(ops); } } fn process(&mut self, events: event_manager::Events, ops: &mut event_manager::EventOps) { let event_set = events.event_set(); let source = events.data(); if !event_set.contains(EventSet::IN) { warn!("entropy: Received unknown event: {event_set:?} from source {source}"); return; } if !self.is_activated() { warn!("entropy: The device is not activated yet. Spurious event received: {source}"); return; } match source { Self::PROCESS_ACTIVATE => self.process_activate_event(ops), Self::PROCESS_ENTROPY_QUEUE => self.process_entropy_queue_event(), Self::PROCESS_RATE_LIMITER => self.process_rate_limiter_event(), _ => { warn!("entropy: Unknown event received: {source}"); } } } } ================================================ FILE: src/vmm/src/devices/virtio/rng/metrics.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the metrics system for entropy devices. //! //! # Metrics format //! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write(). //! //! ## JSON example with metrics: //! ```json //! "entropy": { //! "activate_fails": "SharedIncMetric", //! "entropy_event_fails": "SharedIncMetric", //! "entropy_event_count": "SharedIncMetric", //! ... //! } //! } //! ``` //! Each `entropy` field in the example above is a serializable `EntropyDeviceMetrics` structure //! collecting metrics such as `activate_fails`, `entropy_event_fails` etc. for the entropy device. //! Since entropy doesn't support multiple devices, there is no per device metrics and //! `entropy` represents the aggregate entropy metrics. //! //! # Design //! The main design goals of this system are: //! * Have a consistent approach of keeping device related metrics in the individual devices //! modules. //! * To decouple entropy device metrics from logger module by moving EntropyDeviceMetrics out of //! FirecrackerDeviceMetrics. //! * Rely on `serde` to provide the actual serialization for writing the metrics. //! //! The system implements 1 type of metrics: //! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter //! (i.e the number of times an API request failed). These metrics are reset upon flush. use serde::ser::SerializeMap; use serde::{Serialize, Serializer}; use crate::logger::SharedIncMetric; /// Stores aggregated entropy metrics pub(super) static METRICS: EntropyDeviceMetrics = EntropyDeviceMetrics::new(); /// Called by METRICS.flush(), this function facilitates serialization of entropy device metrics. pub fn flush_metrics(serializer: S) -> Result { let mut seq = serializer.serialize_map(Some(1))?; seq.serialize_entry("entropy", &METRICS)?; seq.end() } #[derive(Debug, Serialize)] pub(super) struct EntropyDeviceMetrics { /// Number of device activation failures pub activate_fails: SharedIncMetric, /// Number of entropy queue event handling failures pub entropy_event_fails: SharedIncMetric, /// Number of entropy requests handled pub entropy_event_count: SharedIncMetric, /// Number of entropy bytes provided to guest pub entropy_bytes: SharedIncMetric, /// Number of errors while getting random bytes on host pub host_rng_fails: SharedIncMetric, /// Number of times an entropy request was rate limited pub entropy_rate_limiter_throttled: SharedIncMetric, /// Number of events associated with the rate limiter pub rate_limiter_event_count: SharedIncMetric, } impl EntropyDeviceMetrics { /// Const default construction. const fn new() -> Self { Self { activate_fails: SharedIncMetric::new(), entropy_event_fails: SharedIncMetric::new(), entropy_event_count: SharedIncMetric::new(), entropy_bytes: SharedIncMetric::new(), host_rng_fails: SharedIncMetric::new(), entropy_rate_limiter_throttled: SharedIncMetric::new(), rate_limiter_event_count: SharedIncMetric::new(), } } } #[cfg(test)] pub mod tests { use super::*; use crate::logger::IncMetric; #[test] fn test_entropy_dev_metrics() { let entropy_metrics: EntropyDeviceMetrics = EntropyDeviceMetrics::new(); let entropy_metrics_local: String = serde_json::to_string(&entropy_metrics).unwrap(); // the 1st serialize flushes the metrics and resets values to 0 so that // we can compare the values with local metrics. serde_json::to_string(&METRICS).unwrap(); let entropy_metrics_global: String = serde_json::to_string(&METRICS).unwrap(); assert_eq!(entropy_metrics_local, entropy_metrics_global); entropy_metrics.entropy_event_count.inc(); assert_eq!(entropy_metrics.entropy_event_count.count(), 1); } } ================================================ FILE: src/vmm/src/devices/virtio/rng/mod.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod device; mod event_handler; pub mod metrics; pub mod persist; pub use self::device::{Entropy, EntropyError}; pub(crate) const RNG_NUM_QUEUES: usize = 1; pub(crate) const RNG_QUEUE: usize = 0; ================================================ FILE: src/vmm/src/devices/virtio/rng/persist.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the structures needed for saving/restoring entropy devices. use std::sync::Arc; use serde::{Deserialize, Serialize}; use crate::devices::virtio::device::VirtioDeviceType; use crate::devices::virtio::persist::{PersistError as VirtioStateError, VirtioDeviceState}; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::devices::virtio::rng::{Entropy, EntropyError, RNG_NUM_QUEUES}; use crate::devices::virtio::transport::VirtioInterrupt; use crate::rate_limiter::RateLimiter; use crate::rate_limiter::persist::RateLimiterState; use crate::snapshot::Persist; use crate::vstate::memory::GuestMemoryMmap; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct EntropyState { pub virtio_state: VirtioDeviceState, rate_limiter_state: RateLimiterState, } #[derive(Debug)] pub struct EntropyConstructorArgs { pub mem: GuestMemoryMmap, } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum EntropyPersistError { /// Create entropy: {0} CreateEntropy(#[from] EntropyError), /// Virtio state: {0} VirtioState(#[from] VirtioStateError), /// Restore rate limiter: {0} RestoreRateLimiter(#[from] std::io::Error), } impl Persist<'_> for Entropy { type State = EntropyState; type ConstructorArgs = EntropyConstructorArgs; type Error = EntropyPersistError; fn save(&self) -> Self::State { EntropyState { virtio_state: VirtioDeviceState::from_device(self), rate_limiter_state: self.rate_limiter().save(), } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let queues = state.virtio_state.build_queues_checked( &constructor_args.mem, VirtioDeviceType::Rng, RNG_NUM_QUEUES, FIRECRACKER_MAX_QUEUE_SIZE, )?; let rate_limiter = RateLimiter::restore((), &state.rate_limiter_state)?; let mut entropy = Entropy::new_with_queues(queues, rate_limiter)?; entropy.set_avail_features(state.virtio_state.avail_features); entropy.set_acked_features(state.virtio_state.acked_features); Ok(entropy) } } #[cfg(test)] mod tests { use super::*; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::rng::device::ENTROPY_DEV_ID; use crate::devices::virtio::test_utils::default_interrupt; use crate::devices::virtio::test_utils::test::create_virtio_mem; #[test] fn test_persistence() { let entropy = Entropy::new(RateLimiter::default()).unwrap(); let entropy_state = entropy.save(); let serialized_data = bitcode::serialize(&entropy_state).unwrap(); let guest_mem = create_virtio_mem(); let restored_state = bitcode::deserialize(&serialized_data).unwrap(); let restored = Entropy::restore(EntropyConstructorArgs { mem: guest_mem }, &restored_state).unwrap(); assert_eq!(restored.device_type(), VirtioDeviceType::Rng); assert_eq!(restored.id(), ENTROPY_DEV_ID); assert!(!restored.is_activated()); assert!(!entropy.is_activated()); assert_eq!(restored.avail_features(), entropy.avail_features()); assert_eq!(restored.acked_features(), entropy.acked_features()); } } ================================================ FILE: src/vmm/src/devices/virtio/test_utils.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![doc(hidden)] use std::fmt::Debug; use std::marker::PhantomData; use std::mem; use std::sync::Arc; use std::sync::atomic::{AtomicUsize, Ordering}; use crate::devices::virtio::queue::Queue; use crate::devices::virtio::transport::VirtioInterrupt; use crate::devices::virtio::transport::mmio::IrqTrigger; use crate::test_utils::single_region_mem; use crate::utils::{align_up, u64_to_usize}; use crate::vstate::memory::{Address, Bytes, GuestAddress, GuestMemoryMmap}; #[macro_export] macro_rules! check_metric_after_block { ($metric:expr, $delta:expr, $block:expr) => {{ let before = $metric.count(); let _ = $block; assert_eq!($metric.count() - before, $delta, "unexpected metric value"); }}; } /// Creates a [`GuestMemoryMmap`] with a single region of size 65536 (= 0x10000 hex) starting at /// guest physical address 0 pub fn default_mem() -> GuestMemoryMmap { single_region_mem(0x10000) } /// Creates a default ['IrqTrigger'] interrupt for a VirtIO device. pub fn default_interrupt() -> Arc { Arc::new(IrqTrigger::new()) } #[derive(Debug)] pub struct InputData { pub data: Vec, pub read_pos: AtomicUsize, } impl InputData { pub fn get_slice(&self, len: usize) -> &[u8] { let old_pos = self.read_pos.fetch_add(len, Ordering::AcqRel); &self.data[old_pos..old_pos + len] } } // Represents a location in GuestMemoryMmap which holds a given type. #[derive(Debug)] pub struct SomeplaceInMemory<'a, T> { pub location: GuestAddress, mem: &'a GuestMemoryMmap, phantom: PhantomData<*const T>, } // The ByteValued trait is required to use mem.read_obj_from_addr and write_obj_at_addr. impl<'a, T> SomeplaceInMemory<'a, T> where T: Debug + crate::vstate::memory::ByteValued, { fn new(location: GuestAddress, mem: &'a GuestMemoryMmap) -> Self { SomeplaceInMemory { location, mem, phantom: PhantomData, } } // Reads from the actual memory location. pub fn get(&self) -> T { self.mem.read_obj(self.location).unwrap() } // Writes to the actual memory location. pub fn set(&self, val: T) { self.mem.write_obj(val, self.location).unwrap() } // This function returns a place in memory which holds a value of type U, and starts // offset bytes after the current location. fn map_offset(&self, offset: usize) -> SomeplaceInMemory<'a, U> { SomeplaceInMemory { location: self.location.checked_add(offset as u64).unwrap(), mem: self.mem, phantom: PhantomData, } } // This function returns a place in memory which holds a value of type U, and starts // immediately after the end of self (which is location + sizeof(T)). fn next_place(&self) -> SomeplaceInMemory<'a, U> { self.map_offset::(mem::size_of::()) } fn end(&self) -> GuestAddress { self.location .checked_add(mem::size_of::() as u64) .unwrap() } } // Represents a virtio descriptor in guest memory. #[derive(Debug)] pub struct VirtqDesc<'a> { pub addr: SomeplaceInMemory<'a, u64>, pub len: SomeplaceInMemory<'a, u32>, pub flags: SomeplaceInMemory<'a, u16>, pub next: SomeplaceInMemory<'a, u16>, } impl<'a> VirtqDesc<'a> { pub const ALIGNMENT: u64 = 16; fn new(start: GuestAddress, mem: &'a GuestMemoryMmap) -> Self { assert_eq!(start.0 & (Self::ALIGNMENT - 1), 0); let addr = SomeplaceInMemory::new(start, mem); let len = addr.next_place(); let flags = len.next_place(); let next = flags.next_place(); VirtqDesc { addr, len, flags, next, } } fn start(&self) -> GuestAddress { self.addr.location } fn end(&self) -> GuestAddress { self.next.end() } pub fn set(&self, addr: u64, len: u32, flags: u16, next: u16) { self.addr.set(addr); self.len.set(len); self.flags.set(flags); self.next.set(next); } pub fn memory(&self) -> &'a GuestMemoryMmap { self.addr.mem } pub fn set_data(&mut self, data: &[u8]) { assert!(self.len.get() as usize >= data.len()); let mem = self.addr.mem; mem.write_slice(data, GuestAddress::new(self.addr.get())) .unwrap(); } pub fn check_data(&self, expected_data: &[u8]) { assert!(self.len.get() as usize >= expected_data.len()); let mem = self.addr.mem; let mut buf = vec![0; expected_data.len()]; mem.read_slice(&mut buf, GuestAddress::new(self.addr.get())) .unwrap(); assert_eq!(buf.as_slice(), expected_data); } } // Represents a virtio queue ring. The only difference between the used and available rings, // is the ring element type. #[derive(Debug)] pub struct VirtqRing<'a, T> { pub flags: SomeplaceInMemory<'a, u16>, pub idx: SomeplaceInMemory<'a, u16>, pub ring: Vec>, pub event: SomeplaceInMemory<'a, u16>, } impl<'a, T> VirtqRing<'a, T> where T: Debug + crate::vstate::memory::ByteValued, { fn new(start: GuestAddress, mem: &'a GuestMemoryMmap, qsize: u16, alignment: usize) -> Self { assert_eq!(start.0 & (alignment as u64 - 1), 0); let flags = SomeplaceInMemory::new(start, mem); let idx = flags.next_place(); let mut ring = Vec::with_capacity(qsize as usize); ring.push(idx.next_place()); for _ in 1..qsize as usize { let x = ring.last().unwrap().next_place(); ring.push(x) } let event = ring.last().unwrap().next_place(); flags.set(0); idx.set(0); event.set(0); VirtqRing { flags, idx, ring, event, } } pub fn end(&self) -> GuestAddress { self.event.end() } } #[repr(C)] #[derive(Debug, Clone, Copy, Default)] pub struct VirtqUsedElem { pub id: u32, pub len: u32, } // SAFETY: `VirtqUsedElem` is a POD and contains no padding. unsafe impl crate::vstate::memory::ByteValued for VirtqUsedElem {} pub type VirtqAvail<'a> = VirtqRing<'a, u16>; pub type VirtqUsed<'a> = VirtqRing<'a, VirtqUsedElem>; #[derive(Debug)] pub struct VirtQueue<'a> { pub dtable: Vec>, pub avail: VirtqAvail<'a>, pub used: VirtqUsed<'a>, } impl<'a> VirtQueue<'a> { // We try to make sure things are aligned properly :-s pub fn new(start: GuestAddress, mem: &'a GuestMemoryMmap, qsize: u16) -> Self { // power of 2? assert!(qsize > 0 && qsize & (qsize - 1) == 0); let mut dtable = Vec::with_capacity(qsize as usize); let mut end = start; for _ in 0..qsize { let d = VirtqDesc::new(end, mem); end = d.end(); dtable.push(d); } const AVAIL_ALIGN: usize = 2; let avail = VirtqAvail::new(end, mem, qsize, AVAIL_ALIGN); const USED_ALIGN: u64 = 4; let mut x = avail.end().0; x = align_up(x, USED_ALIGN); let used = VirtqUsed::new(GuestAddress(x), mem, qsize, u64_to_usize(USED_ALIGN)); VirtQueue { dtable, avail, used, } } pub fn memory(&self) -> &'a GuestMemoryMmap { self.used.flags.mem } pub fn size(&self) -> u16 { // Safe to unwrap because the size is specified as a u16 when the table is first created. self.dtable.len().try_into().unwrap() } pub fn dtable_start(&self) -> GuestAddress { self.dtable.first().unwrap().start() } pub fn avail_start(&self) -> GuestAddress { self.avail.flags.location } pub fn used_start(&self) -> GuestAddress { self.used.flags.location } // Creates a new Queue, using the underlying memory regions represented by the VirtQueue. pub fn create_queue(&self) -> Queue { let mut q = Queue::new(self.size()); q.size = self.size(); q.ready = true; q.desc_table_address = self.dtable_start(); q.avail_ring_address = self.avail_start(); q.used_ring_address = self.used_start(); q.initialize(self.memory()).unwrap(); q } pub fn start(&self) -> GuestAddress { self.dtable_start() } pub fn end(&self) -> GuestAddress { self.used.end() } pub fn check_used_elem(&self, used_index: u16, expected_id: u16, expected_len: u32) { let used_elem = self.used.ring[used_index as usize].get(); assert_eq!(used_elem.id, u32::from(expected_id)); assert_eq!(used_elem.len, expected_len); } } #[cfg(test)] pub(crate) mod test { use std::fmt::{self, Debug}; use std::sync::{Arc, Mutex, MutexGuard}; use event_manager::{EventManager, MutEventSubscriber, SubscriberId, SubscriberOps}; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::net::MAX_BUFFER_SIZE; use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT}; use crate::devices::virtio::test_utils::{VirtQueue, VirtqDesc, default_interrupt}; use crate::test_utils::single_region_mem; use crate::vstate::memory::{Address, GuestAddress, GuestMemoryMmap}; pub fn create_virtio_mem() -> GuestMemoryMmap { single_region_mem(MAX_BUFFER_SIZE) } /// Provides functionality necessary for testing a VirtIO device with /// [`VirtioTestHelper`](VirtioTestHelper) pub trait VirtioTestDevice: VirtioDevice { /// Replace the queues used by the device fn set_queues(&mut self, queues: Vec); /// Number of queues this device supports fn num_queues(&self) -> usize; } /// A helper type to allow testing VirtIO devices /// /// `VirtioTestHelper` provides functionality to allow testing a VirtIO device by /// 1. Emulating the guest size of things (essentially the handling of Virtqueues) and /// 2. Emulating an event loop that handles device specific events /// /// It creates and handles a guest memory address space, which uses for keeping the /// Virtqueues of the device and storing data, i.e. storing data described by DescriptorChains /// that the guest would pass to the device during normal operation pub struct VirtioTestHelper<'a, T> where T: VirtioTestDevice + MutEventSubscriber, { event_manager: EventManager>>, _subscriber_id: SubscriberId, device: Arc>, virtqueues: Vec>, } impl fmt::Debug for VirtioTestHelper<'_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("VirtioTestHelper") .field("event_manager", &"?") .field("_subscriber_id", &self._subscriber_id) .field("device", &self.device) .field("virtqueues", &self.virtqueues) .finish() } } impl<'a, T> VirtioTestHelper<'a, T> where T: VirtioTestDevice + MutEventSubscriber + Debug, { const QUEUE_SIZE: u16 = 16; // Helper function to create a set of Virtqueues for the device fn create_virtqueues(mem: &'a GuestMemoryMmap, num_queues: usize) -> Vec> { (0..num_queues) .scan(GuestAddress(0), |next_addr, _| { let vqueue = VirtQueue::new(*next_addr, mem, Self::QUEUE_SIZE); // Address for the next virt queue will be the first aligned address after // the end of this one. *next_addr = vqueue.end().unchecked_align_up(VirtqDesc::ALIGNMENT); Some(vqueue) }) .collect::>() } /// Create a new Virtio Device test helper pub fn new(mem: &'a GuestMemoryMmap, mut device: T) -> VirtioTestHelper<'a, T> { let mut event_manager = EventManager::new().unwrap(); let virtqueues = Self::create_virtqueues(mem, device.num_queues()); let queues = virtqueues.iter().map(|vq| vq.create_queue()).collect(); device.set_queues(queues); let device = Arc::new(Mutex::new(device)); let _subscriber_id = event_manager.add_subscriber(device.clone()); Self { event_manager, _subscriber_id, device, virtqueues, } } /// Get a (locked) reference to the device pub fn device(&mut self) -> MutexGuard<'_, T> { self.device.lock().unwrap() } /// Activate the device pub fn activate_device(&mut self, mem: &'a GuestMemoryMmap) { let interrupt = default_interrupt(); self.device .lock() .unwrap() .activate(mem.clone(), interrupt) .unwrap(); // Process the activate event let ev_count = self.event_manager.run_with_timeout(100).unwrap(); assert_eq!(ev_count, 1); } /// Get the start of the data region /// /// The first address that can be used for data in the guest memory mmap /// is the first address after the memory occupied by the last Virtqueue /// used by the device pub fn data_address(&self) -> u64 { self.virtqueues.last().unwrap().end().raw_value() } /// Add a new Descriptor in one of the device's queues in the form of scatter gather /// /// This function adds in one of the queues of the device a DescriptorChain at some offset /// in the "data range" of the guest memory. The number of descriptors to create is passed /// as a list of descriptors (a tuple of (index, addr, length, flags)). /// /// The total size of the buffer is the sum of all lengths of this list of descriptors. /// The fist descriptor will be stored at `self.data_address() + addr_offset`. Subsequent /// descriptors will be placed at random addresses after that. /// /// # Arguments /// /// * `queue` - The index of the device queue to use /// * `addr_offset` - Offset within the data region where to put the first descriptor /// * `desc_list` - List of descriptors to create in the chain pub fn add_scatter_gather( &mut self, queue: usize, addr_offset: u64, desc_list: &[(u16, u64, u32, u16)], ) { let device = self.device.lock().unwrap(); let event_fd = &device.queue_events()[queue]; let vq = &self.virtqueues[queue]; // Create the descriptor chain let mut iter = desc_list.iter().peekable(); while let Some(&(index, addr, len, flags)) = iter.next() { let desc = &vq.dtable[index as usize]; desc.set(addr, len, flags, 0); if let Some(&&(next_index, _, _, _)) = iter.peek() { desc.flags.set(flags | VIRTQ_DESC_F_NEXT); desc.next.set(next_index); } } // Mark the chain as available. if let Some(&(index, _, _, _)) = desc_list.first() { let ring_index = vq.avail.idx.get(); vq.avail.ring[ring_index as usize].set(index); vq.avail.idx.set(ring_index + 1); } event_fd.write(1).unwrap(); } /// Get the address of a descriptor pub fn desc_address(&self, queue: usize, index: usize) -> GuestAddress { GuestAddress(self.virtqueues[queue].dtable[index].addr.get()) } /// Add a new Descriptor in one of the device's queues /// /// This function adds in one of the queues of the device a DescriptorChain at some offset /// in the "data range" of the guest memory. The number of descriptors to create is passed /// as a list of descriptors (a triple of (index, length, flags)). /// /// The total size of the buffer is the sum of all lengths of this list of descriptors. /// The fist descriptor will be stored at `self.data_address() + addr_offset`. Subsequent /// descriptors will be placed at random addresses after that. /// /// # Arguments /// /// * `queue` - The index of the device queue to use /// * `addr_offset` - Offset within the data region where to put the first descriptor /// * `desc_list` - List of descriptors to create in the chain pub fn add_desc_chain( &mut self, queue: usize, addr_offset: u64, desc_list: &[(u16, u32, u16)], ) { let device = self.device.lock().unwrap(); let event_fd = &device.queue_events()[queue]; let vq = &self.virtqueues[queue]; // Create the descriptor chain let mut iter = desc_list.iter().peekable(); let mut addr = self.data_address() + addr_offset; while let Some(&(index, len, flags)) = iter.next() { let desc = &vq.dtable[index as usize]; desc.set(addr, len, flags, 0); if let Some(&&(next_index, _, _)) = iter.peek() { desc.flags.set(flags | VIRTQ_DESC_F_NEXT); desc.next.set(next_index); } addr += u64::from(len); // Add small random gaps between descriptor addresses in order to make sure we // don't blindly read contiguous memory. addr += u64::from(vmm_sys_util::rand::xor_pseudo_rng_u32()) % 10; } // Mark the chain as available. if let Some(&(index, _, _)) = desc_list.first() { let ring_index = vq.avail.idx.get(); vq.avail.ring[ring_index as usize].set(index); vq.avail.idx.set(ring_index + 1); } event_fd.write(1).unwrap(); } /// Emulate the device for a period of time /// /// # Arguments /// /// * `msec` - The amount pf time in milliseconds for which to Emulate pub fn emulate_for_msec(&mut self, msec: i32) -> Result { self.event_manager.run_with_timeout(msec) } } } ================================================ FILE: src/vmm/src/devices/virtio/transport/mmio.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::fmt::Debug; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Barrier, Mutex, MutexGuard}; use vmm_sys_util::eventfd::EventFd; use super::{VirtioInterrupt, VirtioInterruptType}; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::device_status; use crate::devices::virtio::queue::Queue; use crate::logger::{IncMetric, METRICS, error, warn}; use crate::utils::byte_order; use crate::vstate::bus::BusDevice; use crate::vstate::interrupts::InterruptError; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; // TODO crosvm uses 0 here, but IIRC virtio specified some other vendor id that should be used const VENDOR_ID: u32 = 0; /// Interrupt flags (re: interrupt status & acknowledge registers). /// See linux/virtio_mmio.h. pub const VIRTIO_MMIO_INT_VRING: u32 = 0x01; pub const VIRTIO_MMIO_INT_CONFIG: u32 = 0x02; // required by the virtio mmio device register layout at offset 0 from base const MMIO_MAGIC_VALUE: u32 = 0x7472_6976; // current version specified by the mmio standard (legacy devices used 1 here) const MMIO_VERSION: u32 = 2; /// Implements the /// [MMIO](http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-1090002) /// transport for virtio devices. /// /// This requires 3 points of installation to work with a VM: /// /// 1. Mmio reads and writes must be sent to this device at what is referred to here as MMIO base. /// 1. `Mmio::queue_evts` must be installed at `virtio::NOTIFY_REG_OFFSET` offset from the MMIO /// base. Each event in the array must be signaled if the index is written at that offset. /// 1. `Mmio::interrupt_evt` must signal an interrupt that the guest driver is listening to when it /// is written to. /// /// Typically one page (4096 bytes) of MMIO address space is sufficient to handle this transport /// and inner virtio device. #[derive(Debug, Clone)] pub struct MmioTransport { device: Arc>, // The register where feature bits are stored. pub(crate) features_select: u32, // The register where features page is selected. pub(crate) acked_features_select: u32, pub(crate) queue_select: u32, pub(crate) device_status: u32, pub(crate) config_generation: u32, mem: GuestMemoryMmap, pub(crate) interrupt: Arc, pub is_vhost_user: bool, } impl MmioTransport { /// Constructs a new MMIO transport for the given virtio device. pub fn new( mem: GuestMemoryMmap, interrupt: Arc, device: Arc>, is_vhost_user: bool, ) -> MmioTransport { MmioTransport { device, features_select: 0, acked_features_select: 0, queue_select: 0, device_status: device_status::INIT, config_generation: 0, mem, interrupt, is_vhost_user, } } /// Gets the encapsulated locked VirtioDevice. pub fn locked_device(&self) -> MutexGuard<'_, dyn VirtioDevice + 'static> { self.device.lock().expect("Poisoned lock") } /// Gets the encapsulated VirtioDevice. pub fn device(&self) -> Arc> { self.device.clone() } fn check_device_status(&self, set: u32, clr: u32) -> bool { self.device_status & (set | clr) == set } fn with_queue(&self, d: U, f: F) -> U where F: FnOnce(&Queue) -> U, U: Debug, { match self .locked_device() .queues() .get(self.queue_select as usize) { Some(queue) => f(queue), None => d, } } fn with_queue_mut(&mut self, f: F) -> bool { if let Some(queue) = self .locked_device() .queues_mut() .get_mut(self.queue_select as usize) { f(queue); true } else { false } } fn update_queue_field(&mut self, f: F) { if self.check_device_status( device_status::FEATURES_OK, device_status::DRIVER_OK | device_status::FAILED, ) { self.with_queue_mut(f); } else { warn!( "update virtio queue in invalid state {:#x}", self.device_status ); } } fn reset(&mut self) { if self.locked_device().is_activated() { warn!("reset device while it's still in active state"); } self.features_select = 0; self.acked_features_select = 0; self.queue_select = 0; self.interrupt.irq_status.store(0, Ordering::SeqCst); self.device_status = device_status::INIT; // . Keep interrupt_evt and queue_evts as is. There may be pending notifications in those // eventfds, but nothing will happen other than supurious wakeups. // . Do not reset config_generation and keep it monotonically increasing for queue in self.locked_device().queues_mut() { *queue = Queue::new(queue.max_size); } } /// Update device status according to the state machine defined by VirtIO Spec 1.0. /// Please refer to VirtIO Spec 1.0, section 2.1.1 and 3.1.1. /// /// The driver MUST update device status, setting bits to indicate the completed steps /// of the driver initialization sequence specified in 3.1. The driver MUST NOT clear /// a device status bit. If the driver sets the FAILED bit, the driver MUST later reset /// the device before attempting to re-initialize. #[allow(unused_assignments)] fn set_device_status(&mut self, status: u32) { use device_status::*; // match changed bits match !self.device_status & status { ACKNOWLEDGE if self.device_status == INIT => { self.device_status = status; } DRIVER if self.device_status == ACKNOWLEDGE => { self.device_status = status; } FEATURES_OK if self.device_status == (ACKNOWLEDGE | DRIVER) => { self.device_status = status; } DRIVER_OK if self.device_status == (ACKNOWLEDGE | DRIVER | FEATURES_OK) => { self.device_status = status; let mut locked_device = self.device.lock().expect("Poisoned lock"); let device_activated = locked_device.is_activated(); if !device_activated { // temporary variable needed for borrow checker let activate_result = locked_device.activate(self.mem.clone(), self.interrupt.clone()); if let Err(err) = activate_result { self.device_status |= DEVICE_NEEDS_RESET; // Section 2.1.2 of the specification states that we need to send a device // configuration change interrupt let _ = self.interrupt.trigger(VirtioInterruptType::Config); error!("Failed to activate virtio device: {}", err) } } } _ if (status & FAILED) != 0 => { // TODO: notify backend driver to stop the device self.device_status |= FAILED; } _ if status == 0 => { { let mut locked_device = self.device.lock().expect("Poisoned lock"); if locked_device.is_activated() { let mut device_status = self.device_status; let reset_result = locked_device.reset(); match reset_result { Some((_interrupt_evt, mut _queue_evts)) => {} None => { device_status |= FAILED; } } self.device_status = device_status; } } // If the backend device driver doesn't support reset, // just leave the device marked as FAILED. if self.device_status & FAILED == 0 { self.reset(); } } _ => { warn!( "invalid virtio driver status transition: {:#x} -> {:#x}", self.device_status, status ); } } } } impl BusDevice for MmioTransport { fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { match offset { 0x00..=0xff if data.len() == 4 => { let v = match offset { 0x0 => MMIO_MAGIC_VALUE, 0x04 => MMIO_VERSION, 0x08 => self.locked_device().device_type() as u32, 0x0c => VENDOR_ID, // vendor id 0x10 => { let mut features = self .locked_device() .avail_features_by_page(self.features_select); if self.features_select == 1 { features |= 0x1; // enable support of VirtIO Version 1 } features } 0x34 => self.with_queue(0, |q| u32::from(q.max_size)), 0x44 => self.with_queue(0, |q| u32::from(q.ready)), 0x60 => { // For vhost-user backed devices we need some additional // logic to differentiate between `VIRTIO_MMIO_INT_VRING` // and `VIRTIO_MMIO_INT_CONFIG` statuses. // Because backend cannot propagate any interrupt status // changes to the FC we always try to serve the `VIRTIO_MMIO_INT_VRING` // status. But in case when backend changes the configuration and // user triggers the manual notification, FC needs to send // `VIRTIO_MMIO_INT_CONFIG`. We know that for vhost-user devices the // interrupt status can only be 0 (no one set any bits) or // `VIRTIO_MMIO_INT_CONFIG`. Based on this knowledge we can simply // check if the current interrupt_status is equal to the // `VIRTIO_MMIO_INT_CONFIG` or not to understand if we need to send // `VIRTIO_MMIO_INT_CONFIG` or // `VIRTIO_MMIO_INT_VRING`. let is = self.interrupt.irq_status.load(Ordering::SeqCst); if !self.is_vhost_user { is } else if is == VIRTIO_MMIO_INT_CONFIG { VIRTIO_MMIO_INT_CONFIG } else { VIRTIO_MMIO_INT_VRING } } 0x70 => self.device_status, 0xfc => self.config_generation, _ => { warn!("unknown virtio mmio register read: {:#x}", offset); return; } }; byte_order::write_le_u32(data, v); } 0x100..=0xfff => self.locked_device().read_config(offset - 0x100, data), _ => { warn!( "invalid virtio mmio read: {base:#x}:{offset:#x}:{:#x}", data.len() ); } }; } fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option> { fn hi(v: &mut GuestAddress, x: u32) { *v = (*v & 0xffff_ffff) | (u64::from(x) << 32) } fn lo(v: &mut GuestAddress, x: u32) { *v = (*v & !0xffff_ffff) | u64::from(x) } match offset { 0x00..=0xff if data.len() == 4 => { let v = byte_order::read_le_u32(data); match offset { 0x14 => self.features_select = v, 0x20 => { if self.check_device_status( device_status::DRIVER, device_status::FEATURES_OK | device_status::FAILED | device_status::DEVICE_NEEDS_RESET, ) { self.locked_device() .ack_features_by_page(self.acked_features_select, v); } else { warn!( "ack virtio features in invalid state {:#x}", self.device_status ); } } 0x24 => self.acked_features_select = v, 0x30 => self.queue_select = v, 0x38 => self.update_queue_field(|q| q.size = (v & 0xffff) as u16), 0x44 => self.update_queue_field(|q| q.ready = v == 1), 0x64 => { if self.check_device_status(device_status::DRIVER_OK, 0) { self.interrupt.irq_status.fetch_and(!v, Ordering::SeqCst); } } 0x70 => self.set_device_status(v), 0x80 => self.update_queue_field(|q| lo(&mut q.desc_table_address, v)), 0x84 => self.update_queue_field(|q| hi(&mut q.desc_table_address, v)), 0x90 => self.update_queue_field(|q| lo(&mut q.avail_ring_address, v)), 0x94 => self.update_queue_field(|q| hi(&mut q.avail_ring_address, v)), 0xa0 => self.update_queue_field(|q| lo(&mut q.used_ring_address, v)), 0xa4 => self.update_queue_field(|q| hi(&mut q.used_ring_address, v)), _ => { warn!("unknown virtio mmio register write: {:#x}", offset); } } } 0x100..=0xfff => { if self.check_device_status( device_status::DRIVER, device_status::FAILED | device_status::DEVICE_NEEDS_RESET, ) { self.locked_device().write_config(offset - 0x100, data) } else { warn!("can not write to device config data area before driver is ready"); } } _ => { warn!( "invalid virtio mmio write: {base:#x}:{offset:#x}:{:#x}", data.len() ); } } None } } /// The 2 types of interrupt sources in MMIO transport. #[derive(Debug)] pub enum IrqType { /// Interrupt triggered by change in config. Config, /// Interrupt triggered by used vring buffers. Vring, } impl From for IrqType { fn from(interrupt_type: VirtioInterruptType) -> Self { match interrupt_type { VirtioInterruptType::Config => IrqType::Config, VirtioInterruptType::Queue(_) => IrqType::Vring, } } } /// Helper struct that is responsible for triggering guest IRQs #[derive(Debug)] pub struct IrqTrigger { pub(crate) irq_status: Arc, pub(crate) irq_evt: EventFd, } impl Default for IrqTrigger { fn default() -> Self { Self::new() } } impl VirtioInterrupt for IrqTrigger { fn trigger(&self, interrupt_type: VirtioInterruptType) -> Result<(), InterruptError> { METRICS.interrupts.triggers.inc(); match interrupt_type { VirtioInterruptType::Config => self.trigger_irq(IrqType::Config), VirtioInterruptType::Queue(_) => self.trigger_irq(IrqType::Vring), } } fn trigger_queues(&self, queues: &[u16]) -> Result<(), InterruptError> { if queues.is_empty() { Ok(()) } else { METRICS.interrupts.triggers.inc(); self.trigger_irq(IrqType::Vring) } } fn notifier(&self, _interrupt_type: VirtioInterruptType) -> Option<&EventFd> { Some(&self.irq_evt) } fn status(&self) -> Arc { self.irq_status.clone() } #[cfg(test)] fn has_pending_interrupt(&self, interrupt_type: VirtioInterruptType) -> bool { if let Ok(num_irqs) = self.irq_evt.read() { if num_irqs == 0 { return false; } let irq_status = self.irq_status.load(Ordering::SeqCst); return matches!( (irq_status, interrupt_type.into()), (VIRTIO_MMIO_INT_CONFIG, IrqType::Config) | (VIRTIO_MMIO_INT_VRING, IrqType::Vring) ); } false } #[cfg(test)] fn ack_interrupt(&self, interrupt_type: VirtioInterruptType) { let irq = match interrupt_type { VirtioInterruptType::Config => VIRTIO_MMIO_INT_CONFIG, VirtioInterruptType::Queue(_) => VIRTIO_MMIO_INT_VRING, }; self.irq_status.fetch_and(!irq, Ordering::SeqCst); } } impl IrqTrigger { pub fn new() -> Self { Self { irq_status: Arc::new(AtomicU32::new(0)), irq_evt: EventFd::new(libc::EFD_NONBLOCK) .expect("Could not create EventFd for IrqTrigger"), } } fn trigger_irq(&self, irq_type: IrqType) -> Result<(), InterruptError> { let irq = match irq_type { IrqType::Config => VIRTIO_MMIO_INT_CONFIG, IrqType::Vring => VIRTIO_MMIO_INT_VRING, }; self.irq_status.fetch_or(irq, Ordering::SeqCst); self.irq_evt.write(1).map_err(|err| { error!("Failed to send irq to the guest: {:?}", err); err })?; Ok(()) } } #[cfg(test)] pub(crate) mod tests { use std::ops::Deref; use event_manager::{EventOps, Events, MutEventSubscriber}; use vmm_sys_util::eventfd::EventFd; use super::*; use crate::devices::virtio::ActivateError; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::device_status::DEVICE_NEEDS_RESET; use crate::impl_device_type; use crate::test_utils::single_region_mem; use crate::utils::byte_order::{read_le_u32, write_le_u32}; use crate::utils::u64_to_usize; use crate::vstate::memory::GuestMemoryMmap; #[derive(Debug)] pub(crate) struct DummyDevice { acked_features: u64, avail_features: u64, interrupt_trigger: Option>, queue_evts: Vec, queues: Vec, device_activated: bool, config_bytes: [u8; 0xeff], activate_should_error: bool, } impl DummyDevice { pub(crate) fn new() -> Self { DummyDevice { acked_features: 0, avail_features: 0, interrupt_trigger: None, queue_evts: vec![ EventFd::new(libc::EFD_NONBLOCK).unwrap(), EventFd::new(libc::EFD_NONBLOCK).unwrap(), ], queues: vec![Queue::new(16), Queue::new(32)], device_activated: false, config_bytes: [0; 0xeff], activate_should_error: false, } } pub fn set_avail_features(&mut self, avail_features: u64) { self.avail_features = avail_features; } } impl MutEventSubscriber for DummyDevice { fn process(&mut self, _: Events, _: &mut EventOps) {} fn init(&mut self, _: &mut EventOps) {} } impl VirtioDevice for DummyDevice { impl_device_type!(VirtioDeviceType::Rng); fn id(&self) -> &str { "dummy" } fn avail_features(&self) -> u64 { self.avail_features } fn acked_features(&self) -> u64 { self.acked_features } fn set_acked_features(&mut self, acked_features: u64) { self.acked_features = acked_features; } fn queues(&self) -> &[Queue] { &self.queues } fn queues_mut(&mut self) -> &mut [Queue] { &mut self.queues } fn queue_events(&self) -> &[EventFd] { &self.queue_evts } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { self.interrupt_trigger .as_ref() .expect("Device is not activated") .deref() } fn read_config(&self, offset: u64, data: &mut [u8]) { data.copy_from_slice(&self.config_bytes[u64_to_usize(offset)..]); } fn write_config(&mut self, offset: u64, data: &[u8]) { for (i, item) in data.iter().enumerate() { self.config_bytes[u64_to_usize(offset) + i] = *item; } } fn activate( &mut self, _: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError> { self.device_activated = true; self.interrupt_trigger = Some(interrupt); if self.activate_should_error { Err(ActivateError::EventFd) } else { Ok(()) } } fn is_activated(&self) -> bool { self.device_activated } } fn set_device_status(d: &mut MmioTransport, status: u32) { let mut buf = [0; 4]; write_le_u32(&mut buf[..], status); d.write(0x0, 0x70, &buf[..]); } #[test] fn test_new() { let m = single_region_mem(0x1000); let interrupt = Arc::new(IrqTrigger::new()); let mut dummy = DummyDevice::new(); // Validate reset is no-op. assert!(dummy.reset().is_none()); let mut d = MmioTransport::new(m, interrupt, Arc::new(Mutex::new(dummy)), false); // We just make sure here that the implementation of a mmio device behaves as we expect, // given a known virtio device implementation (the dummy device). assert_eq!(d.locked_device().queue_events().len(), 2); d.queue_select = 0; assert_eq!(d.with_queue(0, |q| q.max_size), 16); assert!(d.with_queue_mut(|q| q.size = 16)); assert_eq!(d.locked_device().queues()[d.queue_select as usize].size, 16); d.queue_select = 1; assert_eq!(d.with_queue(0, |q| q.max_size), 32); assert!(d.with_queue_mut(|q| q.size = 16)); assert_eq!(d.locked_device().queues()[d.queue_select as usize].size, 16); d.queue_select = 2; assert_eq!(d.with_queue(0, |q| q.max_size), 0); assert!(!d.with_queue_mut(|q| q.size = 16)); } #[test] fn test_bus_device_read() { let m = single_region_mem(0x1000); let interrupt = Arc::new(IrqTrigger::new()); let mut d = MmioTransport::new( m, interrupt, Arc::new(Mutex::new(DummyDevice::new())), false, ); let mut buf = vec![0xff, 0, 0xfe, 0]; let buf_copy = buf.to_vec(); // The following read shouldn't be valid, because the length of the buf is not 4. buf.push(0); d.read(0x0, 0, &mut buf[..]); assert_eq!(buf[..4], buf_copy[..]); // the length is ok again buf.pop(); // Now we test that reading at various predefined offsets works as intended. d.read(0x0, 0, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), MMIO_MAGIC_VALUE); d.read(0x0, 0x04, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), MMIO_VERSION); d.read(0x0, 0x08, &mut buf[..]); assert_eq!( read_le_u32(&buf[..]), d.locked_device().device_type() as u32, ); d.read(0x0, 0x0c, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), VENDOR_ID); d.features_select = 0; d.read(0x0, 0x10, &mut buf[..]); assert_eq!( read_le_u32(&buf[..]), d.locked_device().avail_features_by_page(0) ); d.features_select = 1; d.read(0x0, 0x10, &mut buf[..]); assert_eq!( read_le_u32(&buf[..]), d.locked_device().avail_features_by_page(0) | 0x1 ); d.read(0x0, 0x34, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), 16); d.read(0x0, 0x44, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), u32::from(false)); d.interrupt.irq_status.store(111, Ordering::SeqCst); d.read(0x0, 0x60, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), 111); d.is_vhost_user = true; d.interrupt.status().store(0, Ordering::SeqCst); d.read(0x0, 0x60, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), VIRTIO_MMIO_INT_VRING); d.is_vhost_user = true; d.interrupt .irq_status .store(VIRTIO_MMIO_INT_CONFIG, Ordering::SeqCst); d.read(0x0, 0x60, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), VIRTIO_MMIO_INT_CONFIG); d.read(0x0, 0x70, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), 0); d.config_generation = 5; d.read(0x0, 0xfc, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), 5); // This read shouldn't do anything, as it's past the readable generic registers, and // before the device specific configuration space. Btw, reads from the device specific // conf space are going to be tested a bit later, alongside writes. buf = buf_copy.to_vec(); d.read(0x0, 0xfd, &mut buf[..]); assert_eq!(buf[..], buf_copy[..]); // Read from an invalid address in generic register range. d.read(0x0, 0xfb, &mut buf[..]); assert_eq!(buf[..], buf_copy[..]); // Read from an invalid length in generic register range. d.read(0x0, 0xfc, &mut buf[..3]); assert_eq!(buf[..], buf_copy[..]); } #[test] #[allow(clippy::cognitive_complexity)] fn test_bus_device_write() { let m = single_region_mem(0x1000); let interrupt = Arc::new(IrqTrigger::new()); let dummy_dev = Arc::new(Mutex::new(DummyDevice::new())); let mut d = MmioTransport::new(m, interrupt, dummy_dev.clone(), false); let mut buf = vec![0; 5]; write_le_u32(&mut buf[..4], 1); // Nothing should happen, because the slice len > 4. d.features_select = 0; d.write(0x0, 0x14, &buf[..]); assert_eq!(d.features_select, 0); buf.pop(); assert_eq!(d.device_status, device_status::INIT); set_device_status(&mut d, device_status::ACKNOWLEDGE); // Acking features in invalid state shouldn't take effect. assert_eq!(d.locked_device().acked_features(), 0x0); d.acked_features_select = 0x0; write_le_u32(&mut buf[..], 1); d.write(0x0, 0x20, &buf[..]); assert_eq!(d.locked_device().acked_features(), 0x0); // Write to device specific configuration space should be ignored before setting // device_status::DRIVER let buf1 = vec![1; 0xeff]; for i in (0..0xeff).rev() { let mut buf2 = vec![0; 0xeff]; d.write(0x0, 0x100 + i as u64, &buf1[i..]); d.read(0x0, 0x100, &mut buf2[..]); for item in buf2.iter().take(0xeff) { assert_eq!(*item, 0); } } set_device_status(&mut d, device_status::ACKNOWLEDGE | device_status::DRIVER); assert_eq!( d.device_status, device_status::ACKNOWLEDGE | device_status::DRIVER ); // now writes should work d.features_select = 0; write_le_u32(&mut buf[..], 1); d.write(0x0, 0x14, &buf[..]); assert_eq!(d.features_select, 1); // Test acknowledging features on bus. d.acked_features_select = 0; write_le_u32(&mut buf[..], 0x124); // Set the device available features in order to make acknowledging possible. dummy_dev.lock().unwrap().set_avail_features(0x124); d.write(0x0, 0x20, &buf[..]); assert_eq!(d.locked_device().acked_features(), 0x124); d.acked_features_select = 0; write_le_u32(&mut buf[..], 2); d.write(0x0, 0x24, &buf[..]); assert_eq!(d.acked_features_select, 2); set_device_status( &mut d, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK, ); // Acking features in invalid state shouldn't take effect. assert_eq!(d.locked_device().acked_features(), 0x124); d.acked_features_select = 0x0; write_le_u32(&mut buf[..], 1); d.write(0x0, 0x20, &buf[..]); assert_eq!(d.locked_device().acked_features(), 0x124); // Setup queues d.queue_select = 0; write_le_u32(&mut buf[..], 3); d.write(0x0, 0x30, &buf[..]); assert_eq!(d.queue_select, 3); d.queue_select = 0; assert_eq!(d.locked_device().queues()[0].size, 16); write_le_u32(&mut buf[..], 16); d.write(0x0, 0x38, &buf[..]); assert_eq!(d.locked_device().queues()[0].size, 16); assert!(!d.locked_device().queues()[0].ready); write_le_u32(&mut buf[..], 1); d.write(0x0, 0x44, &buf[..]); assert!(d.locked_device().queues()[0].ready); assert_eq!(d.locked_device().queues()[0].desc_table_address.0, 0); write_le_u32(&mut buf[..], 123); d.write(0x0, 0x80, &buf[..]); assert_eq!(d.locked_device().queues()[0].desc_table_address.0, 123); d.write(0x0, 0x84, &buf[..]); assert_eq!( d.locked_device().queues()[0].desc_table_address.0, 123 + (123 << 32) ); assert_eq!(d.locked_device().queues()[0].avail_ring_address.0, 0); write_le_u32(&mut buf[..], 124); d.write(0x0, 0x90, &buf[..]); assert_eq!(d.locked_device().queues()[0].avail_ring_address.0, 124); d.write(0x0, 0x94, &buf[..]); assert_eq!( d.locked_device().queues()[0].avail_ring_address.0, 124 + (124 << 32) ); assert_eq!(d.locked_device().queues()[0].used_ring_address.0, 0); write_le_u32(&mut buf[..], 125); d.write(0x0, 0xa0, &buf[..]); assert_eq!(d.locked_device().queues()[0].used_ring_address.0, 125); d.write(0x0, 0xa4, &buf[..]); assert_eq!( d.locked_device().queues()[0].used_ring_address.0, 125 + (125 << 32) ); set_device_status( &mut d, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK | device_status::DRIVER_OK, ); d.interrupt.irq_status.store(0b10_1010, Ordering::Relaxed); write_le_u32(&mut buf[..], 0b111); d.write(0x0, 0x64, &buf[..]); assert_eq!(d.interrupt.irq_status.load(Ordering::Relaxed), 0b10_1000); // Write to an invalid address in generic register range. write_le_u32(&mut buf[..], 0xf); d.config_generation = 0; d.write(0x0, 0xfb, &buf[..]); assert_eq!(d.config_generation, 0); // Write to an invalid length in generic register range. d.write(0x0, 0xfc, &buf[..2]); assert_eq!(d.config_generation, 0); // Here we test writes/read into/from the device specific configuration space. let buf1 = vec![1; 0xeff]; for i in (0..0xeff).rev() { let mut buf2 = vec![0; 0xeff]; d.write(0x0, 0x100 + i as u64, &buf1[i..]); d.read(0x0, 0x100, &mut buf2[..]); for item in buf2.iter().take(i) { assert_eq!(*item, 0); } assert_eq!(buf1[i..], buf2[i..]); } } #[test] fn test_bus_device_activate() { let m = single_region_mem(0x1000); let interrupt = Arc::new(IrqTrigger::new()); let mut d = MmioTransport::new( m, interrupt, Arc::new(Mutex::new(DummyDevice::new())), false, ); assert!(!d.locked_device().is_activated()); assert_eq!(d.device_status, device_status::INIT); set_device_status(&mut d, device_status::ACKNOWLEDGE); set_device_status(&mut d, device_status::ACKNOWLEDGE | device_status::DRIVER); assert_eq!( d.device_status, device_status::ACKNOWLEDGE | device_status::DRIVER ); // invalid state transition should have no effect set_device_status( &mut d, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::DRIVER_OK, ); assert_eq!( d.device_status, device_status::ACKNOWLEDGE | device_status::DRIVER ); set_device_status( &mut d, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK, ); assert_eq!( d.device_status, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK ); let mut buf = [0; 4]; let queue_len = d.locked_device().queues().len(); for q in 0..queue_len { d.queue_select = q.try_into().unwrap(); write_le_u32(&mut buf[..], 16); d.write(0x0, 0x38, &buf[..]); write_le_u32(&mut buf[..], 1); d.write(0x0, 0x44, &buf[..]); } assert!(!d.locked_device().is_activated()); // Device should be ready for activation now. // A couple of invalid writes; will trigger warnings; shouldn't activate the device. d.write(0x0, 0xa8, &buf[..]); d.write(0x0, 0x1000, &buf[..]); assert!(!d.locked_device().is_activated()); set_device_status( &mut d, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK | device_status::DRIVER_OK, ); assert_eq!( d.device_status, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK | device_status::DRIVER_OK ); assert!(d.locked_device().is_activated()); // A write which changes the size of a queue after activation; currently only triggers // a warning path and have no effect on queue state. write_le_u32(&mut buf[..], 0); d.queue_select = 0; d.write(0x0, 0x44, &buf[..]); d.read(0x0, 0x44, &mut buf[..]); assert_eq!(read_le_u32(&buf[..]), 1); } #[test] fn test_bus_device_activate_failure() { let m = single_region_mem(0x1000); let interrupt = Arc::new(IrqTrigger::new()); let device = DummyDevice { activate_should_error: true, ..DummyDevice::new() }; let mut d = MmioTransport::new(m, interrupt, Arc::new(Mutex::new(device)), false); set_device_status(&mut d, device_status::ACKNOWLEDGE); set_device_status(&mut d, device_status::ACKNOWLEDGE | device_status::DRIVER); set_device_status( &mut d, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK, ); let mut buf = [0; 4]; let queue_len = d.locked_device().queues().len(); for q in 0..queue_len { d.queue_select = q.try_into().unwrap(); write_le_u32(&mut buf[..], 16); d.write(0x0, 0x38, &buf[..]); write_le_u32(&mut buf[..], 1); d.write(0x0, 0x44, &buf[..]); } assert!(!d.locked_device().is_activated()); set_device_status( &mut d, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK | device_status::DRIVER_OK, ); // Failure in activate results in `DEVICE_NEEDS_RESET` status being set assert_ne!(d.device_status & DEVICE_NEEDS_RESET, 0); // We injected an interrupt of type "configuration change" assert_eq!( d.locked_device().interrupt_status().load(Ordering::SeqCst), VIRTIO_MMIO_INT_CONFIG ); // We actually wrote to the eventfd assert_eq!( d.locked_device() .interrupt_trigger() .notifier(VirtioInterruptType::Config) .unwrap() .read() .unwrap(), 1 ); } fn activate_device(d: &mut MmioTransport) { set_device_status(d, device_status::ACKNOWLEDGE); set_device_status(d, device_status::ACKNOWLEDGE | device_status::DRIVER); set_device_status( d, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK, ); // Setup queue data structures let mut buf = [0; 4]; let queues_count = d.locked_device().queues().len(); for q in 0..queues_count { d.queue_select = q.try_into().unwrap(); write_le_u32(&mut buf[..], 16); d.write(0x0, 0x38, &buf[..]); write_le_u32(&mut buf[..], 1); d.write(0x0, 0x44, &buf[..]); } assert!(!d.locked_device().is_activated()); // Device should be ready for activation now. set_device_status( d, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK | device_status::DRIVER_OK, ); assert_eq!( d.device_status, device_status::ACKNOWLEDGE | device_status::DRIVER | device_status::FEATURES_OK | device_status::DRIVER_OK ); assert!(d.locked_device().is_activated()); } #[test] fn test_bus_device_reset() { let m = single_region_mem(0x1000); let interrupt = Arc::new(IrqTrigger::new()); let mut d = MmioTransport::new( m, interrupt, Arc::new(Mutex::new(DummyDevice::new())), false, ); let mut buf = [0; 4]; assert!(!d.locked_device().is_activated()); assert_eq!(d.device_status, 0); activate_device(&mut d); // Marking device as FAILED should not affect device_activated state write_le_u32(&mut buf[..], 0x8f); d.write(0x0, 0x70, &buf[..]); assert_eq!(d.device_status, 0x8f); assert!(d.locked_device().is_activated()); // Nothing happens when backend driver doesn't support reset write_le_u32(&mut buf[..], 0x0); d.write(0x0, 0x70, &buf[..]); assert_eq!(d.device_status, 0x8f); assert!(d.locked_device().is_activated()); } #[test] fn test_get_avail_features() { let dummy_dev = DummyDevice::new(); assert_eq!(dummy_dev.avail_features(), dummy_dev.avail_features); } #[test] fn test_get_acked_features() { let dummy_dev = DummyDevice::new(); assert_eq!(dummy_dev.acked_features(), dummy_dev.acked_features); } #[test] fn test_set_acked_features() { let mut dummy_dev = DummyDevice::new(); assert_eq!(dummy_dev.acked_features(), 0); dummy_dev.set_acked_features(16); assert_eq!(dummy_dev.acked_features(), dummy_dev.acked_features); } #[test] fn test_ack_features_by_page() { let mut dummy_dev = DummyDevice::new(); dummy_dev.set_acked_features(16); dummy_dev.set_avail_features(8); dummy_dev.ack_features_by_page(0, 8); assert_eq!(dummy_dev.acked_features(), 24); } #[test] fn irq_trigger() { let irq_trigger = IrqTrigger::new(); assert_eq!(irq_trigger.irq_status.load(Ordering::SeqCst), 0); // Check that there are no pending irqs. assert!(!irq_trigger.has_pending_interrupt(VirtioInterruptType::Config)); assert!(!irq_trigger.has_pending_interrupt(VirtioInterruptType::Queue(0))); // Check that trigger_irq() correctly generates irqs. irq_trigger.trigger(VirtioInterruptType::Config).unwrap(); assert!(irq_trigger.has_pending_interrupt(VirtioInterruptType::Config)); irq_trigger.irq_status.store(0, Ordering::SeqCst); irq_trigger.trigger(VirtioInterruptType::Queue(0)).unwrap(); assert!(irq_trigger.has_pending_interrupt(VirtioInterruptType::Queue(0))); // Check trigger_irq() failure case (irq_evt is full). irq_trigger.irq_evt.write(u64::MAX - 1).unwrap(); irq_trigger .trigger(VirtioInterruptType::Config) .unwrap_err(); irq_trigger .trigger(VirtioInterruptType::Queue(0)) .unwrap_err(); } } ================================================ FILE: src/vmm/src/devices/virtio/transport/mod.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::sync::Arc; use std::sync::atomic::AtomicU32; use vmm_sys_util::eventfd::EventFd; use crate::vstate::interrupts::InterruptError; /// MMIO transport for VirtIO devices pub mod mmio; /// PCI transport for VirtIO devices pub mod pci; /// Represents the types of interrupts used by VirtIO devices #[derive(Debug, Clone)] pub enum VirtioInterruptType { /// Interrupt for VirtIO configuration changes Config, /// Interrupts for new events in a queue. Queue(u16), } /// API of interrupt types used by VirtIO devices pub trait VirtioInterrupt: std::fmt::Debug + Send + Sync { /// Trigger a VirtIO interrupt. fn trigger(&self, interrupt_type: VirtioInterruptType) -> Result<(), InterruptError>; /// Trigger multiple Virtio interrupts for selected queues. /// The caller needs to ensure that [`queues`] does not include duplicate entries to /// avoid sending multiple interrupts for the same queue. /// This is to allow sending a single interrupt for implementations that don't /// distinguish different queues, like IrqTrigger, instead of sending multiple same /// interrupts. fn trigger_queues(&self, queues: &[u16]) -> Result<(), InterruptError> { queues .iter() .try_for_each(|&qidx| self.trigger(VirtioInterruptType::Queue(qidx))) } /// Get the `EventFd` (if any) that backs the underlying interrupt. fn notifier(&self, _interrupt_type: VirtioInterruptType) -> Option<&EventFd> { None } /// Get the current device interrupt status. fn status(&self) -> Arc; /// Returns true if there is any pending interrupt #[cfg(test)] fn has_pending_interrupt(&self, interrupt_type: VirtioInterruptType) -> bool; /// Used to acknowledge an interrupt #[cfg(test)] fn ack_interrupt(&self, interrupt_type: VirtioInterruptType); } ================================================ FILE: src/vmm/src/devices/virtio/transport/pci/common_config.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright 2018 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. // // Copyright © 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause use std::sync::atomic::{AtomicU16, Ordering}; use std::sync::{Arc, Mutex}; use byteorder::{ByteOrder, LittleEndian}; use serde::{Deserialize, Serialize}; use vm_memory::GuestAddress; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::queue::Queue; use crate::devices::virtio::transport::pci::device::VIRTQ_MSI_NO_VECTOR; use crate::logger::warn; pub const VIRTIO_PCI_COMMON_CONFIG_ID: &str = "virtio_pci_common_config"; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VirtioPciCommonConfigState { pub driver_status: u8, pub config_generation: u8, pub device_feature_select: u32, pub driver_feature_select: u32, pub queue_select: u16, pub msix_config: u16, pub msix_queues: Vec, } /// Contains the data for reading and writing the common configuration structure of a virtio PCI /// device. /// /// * Registers: /// /// ** About the whole device. /// le32 device_feature_select; // 0x00 // read-write /// le32 device_feature; // 0x04 // read-only for driver /// le32 driver_feature_select; // 0x08 // read-write /// le32 driver_feature; // 0x0C // read-write /// le16 msix_config; // 0x10 // read-write /// le16 num_queues; // 0x12 // read-only for driver /// u8 device_status; // 0x14 // read-write (driver_status) /// u8 config_generation; // 0x15 // read-only for driver /// /// ** About a specific virtqueue. /// le16 queue_select; // 0x16 // read-write /// le16 queue_size; // 0x18 // read-write, power of 2, or 0. /// le16 queue_msix_vector; // 0x1A // read-write /// le16 queue_enable; // 0x1C // read-write (Ready) /// le16 queue_notify_off; // 0x1E // read-only for driver /// le64 queue_desc; // 0x20 // read-write /// le64 queue_avail; // 0x28 // read-write /// le64 queue_used; // 0x30 // read-write #[derive(Debug)] pub struct VirtioPciCommonConfig { pub driver_status: u8, pub config_generation: u8, pub device_feature_select: u32, pub driver_feature_select: u32, pub queue_select: u16, pub msix_config: Arc, pub msix_queues: Arc>>, } impl VirtioPciCommonConfig { pub fn new(state: VirtioPciCommonConfigState) -> Self { VirtioPciCommonConfig { driver_status: state.driver_status, config_generation: state.config_generation, device_feature_select: state.device_feature_select, driver_feature_select: state.driver_feature_select, queue_select: state.queue_select, msix_config: Arc::new(AtomicU16::new(state.msix_config)), msix_queues: Arc::new(Mutex::new(state.msix_queues)), } } pub fn state(&self) -> VirtioPciCommonConfigState { VirtioPciCommonConfigState { driver_status: self.driver_status, config_generation: self.config_generation, device_feature_select: self.device_feature_select, driver_feature_select: self.driver_feature_select, queue_select: self.queue_select, msix_config: self.msix_config.load(Ordering::Acquire), msix_queues: self.msix_queues.lock().unwrap().clone(), } } pub fn read(&mut self, offset: u64, data: &mut [u8], device: Arc>) { assert!(data.len() <= 8); match data.len() { 1 => { let v = self.read_common_config_byte(offset); data[0] = v; } 2 => { let v = self.read_common_config_word(offset, device.lock().unwrap().queues()); LittleEndian::write_u16(data, v); } 4 => { let v = self.read_common_config_dword(offset, device); LittleEndian::write_u32(data, v); } _ => warn!( "pci: invalid data length for virtio read: len {}", data.len() ), } } pub fn write(&mut self, offset: u64, data: &[u8], device: Arc>) { assert!(data.len() <= 8); match data.len() { 1 => self.write_common_config_byte(offset, data[0]), 2 => self.write_common_config_word( offset, LittleEndian::read_u16(data), device.lock().unwrap().queues_mut(), ), 4 => self.write_common_config_dword(offset, LittleEndian::read_u32(data), device), _ => warn!( "pci: invalid data length for virtio write: len {}", data.len() ), } } fn read_common_config_byte(&self, offset: u64) -> u8 { // The driver is only allowed to do aligned, properly sized access. match offset { 0x14 => self.driver_status, 0x15 => self.config_generation, _ => { warn!("pci: invalid virtio config byte read: 0x{:x}", offset); 0 } } } fn write_common_config_byte(&mut self, offset: u64, value: u8) { match offset { 0x14 => self.driver_status = value, _ => { warn!("pci: invalid virtio config byte write: 0x{:x}", offset); } } } fn read_common_config_word(&self, offset: u64, queues: &[Queue]) -> u16 { match offset { 0x10 => self.msix_config.load(Ordering::Acquire), 0x12 => queues.len().try_into().unwrap(), // num_queues 0x16 => self.queue_select, 0x18 => self.with_queue(queues, |q| q.size).unwrap_or(0), // If `queue_select` points to an invalid queue we should return NO_VECTOR. // Reading from here // https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-1280005: // // > The device MUST return vector mapped to a given event, (NO_VECTOR if unmapped) on // > read of config_msix_vector/queue_msix_vector. 0x1a => self .msix_queues .lock() .unwrap() .get(self.queue_select as usize) .copied() .unwrap_or(VIRTQ_MSI_NO_VECTOR), 0x1c => u16::from(self.with_queue(queues, |q| q.ready).unwrap_or(false)), 0x1e => self.queue_select, // notify_off _ => { warn!("pci: invalid virtio register word read: 0x{:x}", offset); 0 } } } fn write_common_config_word(&mut self, offset: u64, value: u16, queues: &mut [Queue]) { match offset { 0x10 => { // Make sure that the guest doesn't select an invalid vector. We are offering // `num_queues + 1` vectors (plus one for configuration updates). If an invalid // vector has been selected, we just store the `NO_VECTOR` value. let mut msix_queues = self.msix_queues.lock().expect("Poisoned lock"); let nr_vectors = msix_queues.len() + 1; if (value as usize) < nr_vectors { self.msix_config.store(value, Ordering::Release); } else { self.msix_config .store(VIRTQ_MSI_NO_VECTOR, Ordering::Release); } } 0x16 => self.queue_select = value, 0x18 => self.with_queue_mut(queues, |q| q.size = value), 0x1a => { let mut msix_queues = self.msix_queues.lock().expect("Poisoned lock"); let nr_vectors = msix_queues.len() + 1; // Make sure that `queue_select` points to a valid queue. If not, we won't do // anything here and subsequent reads at 0x1a will return `NO_VECTOR`. if let Some(queue) = msix_queues.get_mut(self.queue_select as usize) { // Make sure that the guest doesn't select an invalid vector. We are offering // `num_queues + 1` vectors (plus one for configuration updates). If an invalid // vector has been selected, we just store the `NO_VECTOR` value. if (value as usize) < nr_vectors { *queue = value; } else { *queue = VIRTQ_MSI_NO_VECTOR; } } } 0x1c => self.with_queue_mut(queues, |q| { if value != 0 { q.ready = value == 1; } }), _ => { warn!("pci: invalid virtio register word write: 0x{:x}", offset); } } } fn read_common_config_dword(&self, offset: u64, device: Arc>) -> u32 { match offset { 0x00 => self.device_feature_select, 0x04 => { let locked_device = device.lock().unwrap(); // Only 64 bits of features (2 pages) are defined for now, so limit // device_feature_select to avoid shifting by 64 or more bits. if self.device_feature_select < 2 { ((locked_device.avail_features() >> (self.device_feature_select * 32)) & 0xffff_ffff) as u32 } else { 0 } } 0x08 => self.driver_feature_select, 0x20 => { let locked_device = device.lock().unwrap(); self.with_queue(locked_device.queues(), |q| { (q.desc_table_address.0 & 0xffff_ffff) as u32 }) .unwrap_or_default() } 0x24 => { let locked_device = device.lock().unwrap(); self.with_queue(locked_device.queues(), |q| { (q.desc_table_address.0 >> 32) as u32 }) .unwrap_or_default() } 0x28 => { let locked_device = device.lock().unwrap(); self.with_queue(locked_device.queues(), |q| { (q.avail_ring_address.0 & 0xffff_ffff) as u32 }) .unwrap_or_default() } 0x2c => { let locked_device = device.lock().unwrap(); self.with_queue(locked_device.queues(), |q| { (q.avail_ring_address.0 >> 32) as u32 }) .unwrap_or_default() } 0x30 => { let locked_device = device.lock().unwrap(); self.with_queue(locked_device.queues(), |q| { (q.used_ring_address.0 & 0xffff_ffff) as u32 }) .unwrap_or_default() } 0x34 => { let locked_device = device.lock().unwrap(); self.with_queue(locked_device.queues(), |q| { (q.used_ring_address.0 >> 32) as u32 }) .unwrap_or_default() } _ => { warn!("pci: invalid virtio register dword read: 0x{:x}", offset); 0 } } } fn write_common_config_dword( &mut self, offset: u64, value: u32, device: Arc>, ) { fn hi(v: &mut GuestAddress, x: u32) { *v = (*v & 0xffff_ffff) | (u64::from(x) << 32) } fn lo(v: &mut GuestAddress, x: u32) { *v = (*v & !0xffff_ffff) | u64::from(x) } let mut locked_device = device.lock().unwrap(); match offset { 0x00 => self.device_feature_select = value, 0x08 => self.driver_feature_select = value, 0x0c => locked_device.ack_features_by_page(self.driver_feature_select, value), 0x20 => self.with_queue_mut(locked_device.queues_mut(), |q| { lo(&mut q.desc_table_address, value) }), 0x24 => self.with_queue_mut(locked_device.queues_mut(), |q| { hi(&mut q.desc_table_address, value) }), 0x28 => self.with_queue_mut(locked_device.queues_mut(), |q| { lo(&mut q.avail_ring_address, value) }), 0x2c => self.with_queue_mut(locked_device.queues_mut(), |q| { hi(&mut q.avail_ring_address, value) }), 0x30 => self.with_queue_mut(locked_device.queues_mut(), |q| { lo(&mut q.used_ring_address, value) }), 0x34 => self.with_queue_mut(locked_device.queues_mut(), |q| { hi(&mut q.used_ring_address, value) }), _ => { warn!("pci: invalid virtio register dword write: 0x{:x}", offset); } } } fn with_queue(&self, queues: &[Queue], f: F) -> Option where F: FnOnce(&Queue) -> U, { queues.get(self.queue_select as usize).map(f) } fn with_queue_mut(&self, queues: &mut [Queue], f: F) { if let Some(queue) = queues.get_mut(self.queue_select as usize) { f(queue); } } } #[cfg(test)] mod tests { use vm_memory::ByteValued; use super::*; use crate::devices::virtio::transport::mmio::tests::DummyDevice; fn default_device() -> Arc> { Arc::new(Mutex::new(DummyDevice::new())) } fn default_pci_common_config() -> VirtioPciCommonConfig { VirtioPciCommonConfig { driver_status: 0, config_generation: 0, device_feature_select: 0, driver_feature_select: 0, queue_select: 0, msix_config: Arc::new(AtomicU16::new(0)), msix_queues: Arc::new(Mutex::new(vec![0u16; 2])), } } #[test] fn write_base_regs() { let mut regs = VirtioPciCommonConfig { driver_status: 0xaa, config_generation: 0x55, device_feature_select: 0x0, driver_feature_select: 0x0, queue_select: 0xff, msix_config: Arc::new(AtomicU16::new(0)), msix_queues: Arc::new(Mutex::new(vec![0; 3])), }; let dev = Arc::new(Mutex::new(DummyDevice::new())); // Can set all bits of driver_status. regs.write(0x14, &[0x55], dev.clone()); let mut read_back = vec![0x00]; regs.read(0x14, &mut read_back, dev.clone()); assert_eq!(read_back[0], 0x55); // The config generation register is read only. regs.write(0x15, &[0xaa], dev.clone()); let mut read_back = vec![0x00]; regs.read(0x15, &mut read_back, dev.clone()); assert_eq!(read_back[0], 0x55); // Device features is read-only and passed through from the device. regs.write(0x04, &[0, 0, 0, 0], dev.clone()); let mut read_back = vec![0, 0, 0, 0]; regs.read(0x04, &mut read_back, dev.clone()); assert_eq!(LittleEndian::read_u32(&read_back), 0u32); // Feature select registers are read/write. regs.write(0x00, &[1, 2, 3, 4], dev.clone()); let mut read_back = vec![0, 0, 0, 0]; regs.read(0x00, &mut read_back, dev.clone()); assert_eq!(LittleEndian::read_u32(&read_back), 0x0403_0201); regs.write(0x08, &[1, 2, 3, 4], dev.clone()); let mut read_back = vec![0, 0, 0, 0]; regs.read(0x08, &mut read_back, dev.clone()); assert_eq!(LittleEndian::read_u32(&read_back), 0x0403_0201); // 'queue_select' can be read and written. regs.write(0x16, &[0xaa, 0x55], dev.clone()); let mut read_back = vec![0x00, 0x00]; regs.read(0x16, &mut read_back, dev.clone()); assert_eq!(read_back[0], 0xaa); assert_eq!(read_back[1], 0x55); // Getting the MSI vector when `queue_select` points to an invalid queue should return // NO_VECTOR (0xffff) regs.read(0x1a, &mut read_back, dev.clone()); assert_eq!(read_back, [0xff, 0xff]); // Writing the MSI vector of an invalid `queue_select` does not have any effect. regs.write(0x1a, &[0x12, 0x13], dev.clone()); assert_eq!(read_back, [0xff, 0xff]); // Valid `queue_select` though should setup the corresponding MSI-X queue. regs.write(0x16, &[0x1, 0x0], dev.clone()); assert_eq!(regs.queue_select, 1); regs.write(0x1a, &[0x1, 0x0], dev.clone()); regs.read(0x1a, &mut read_back, dev); assert_eq!(LittleEndian::read_u16(&read_back[..2]), 0x1); } #[test] fn test_device_feature() { let mut config = default_pci_common_config(); let mut device = default_device(); let mut features = 0u32; device .lock() .unwrap() .set_avail_features(0x0000_1312_0000_1110); config.read(0x04, features.as_mut_slice(), device.clone()); assert_eq!(features, 0x1110); // select second page config.write(0x0, 1u32.as_slice(), device.clone()); config.read(0x04, features.as_mut_slice(), device.clone()); assert_eq!(features, 0x1312); // Try a third page. It doesn't exist so we should get all 0s config.write(0x0, 2u32.as_slice(), device.clone()); config.read(0x04, features.as_mut_slice(), device.clone()); assert_eq!(features, 0x0); } #[test] fn test_driver_feature() { let mut config = default_pci_common_config(); let mut device = default_device(); device .lock() .unwrap() .set_avail_features(0x0000_1312_0000_1110); // ACK some features of the first page config.write(0x0c, 0x1100u32.as_slice(), device.clone()); assert_eq!(device.lock().unwrap().acked_features(), 0x1100); // ACK some features of the second page config.write(0x08, 1u32.as_slice(), device.clone()); config.write(0x0c, 0x0000_1310u32.as_slice(), device.clone()); assert_eq!( device.lock().unwrap().acked_features(), 0x0000_1310_0000_1100 ); } #[test] fn test_num_queues() { let mut config = default_pci_common_config(); let mut device = default_device(); let mut num_queues = 0u16; config.read(0x12, num_queues.as_mut_slice(), device.clone()); assert_eq!(num_queues, 2); // `num_queues` is read-only config.write(0x12, 4u16.as_slice(), device.clone()); config.read(0x12, num_queues.as_mut_slice(), device.clone()); assert_eq!(num_queues, 2); } #[test] fn test_device_status() { let mut config = default_pci_common_config(); let mut device = default_device(); let mut status = 0u8; config.read(0x14, status.as_mut_slice(), device.clone()); assert_eq!(status, 0); config.write(0x14, 0x42u8.as_slice(), device.clone()); config.read(0x14, status.as_mut_slice(), device.clone()); assert_eq!(status, 0x42); } #[test] fn test_config_msix_vector() { let mut config = default_pci_common_config(); let device = default_device(); let mut vector: u16 = 0; // Our device has 2 queues, so we should be using 3 vectors in total. // Trying to set a vector bigger than that should fail. Observing the // failure happens through a subsequent read that should return NO_VECTOR. config.write(0x10, 3u16.as_slice(), device.clone()); config.read(0x10, vector.as_mut_slice(), device.clone()); assert_eq!(vector, VIRTQ_MSI_NO_VECTOR); // Any of the 3 valid values should work for i in 0u16..3 { config.write(0x10, i.as_slice(), device.clone()); config.read(0x10, vector.as_mut_slice(), device.clone()); assert_eq!(vector, i); } } #[test] fn test_queue_size() { let mut config = default_pci_common_config(); let device = default_device(); let mut len = 0u16; let mut max_size = [0u16; 2]; for queue_id in 0u16..2 { config.write(0x16, queue_id.as_slice(), device.clone()); config.read(0x18, len.as_mut_slice(), device.clone()); assert_eq!( len, device.lock().unwrap().queues()[queue_id as usize].max_size ); max_size[queue_id as usize] = len; } config.write(0x16, 2u16.as_slice(), device.clone()); config.read(0x18, len.as_mut_slice(), device.clone()); assert_eq!(len, 0); // Setup size smaller than what is the maximum offered for queue_id in 0u16..2 { config.write(0x16, queue_id.as_slice(), device.clone()); config.write( 0x18, (max_size[queue_id as usize] - 1).as_slice(), device.clone(), ); config.read(0x18, len.as_mut_slice(), device.clone()); assert_eq!(len, max_size[queue_id as usize] - 1); } } #[test] fn test_queue_msix_vector() { let mut config = default_pci_common_config(); let device = default_device(); let mut vector = 0u16; // Our device has 2 queues, so we should be using 3 vectors in total. // Trying to set a vector bigger than that should fail. Observing the // failure happens through a subsequent read that should return NO_VECTOR. for queue_id in 0u16..2 { // Select queue config.write(0x16, queue_id.as_slice(), device.clone()); config.write(0x1a, 3u16.as_slice(), device.clone()); config.read(0x1a, vector.as_mut_slice(), device.clone()); assert_eq!(vector, VIRTQ_MSI_NO_VECTOR); // Any of the 3 valid values should work for vector_id in 0u16..3 { config.write(0x1a, vector_id.as_slice(), device.clone()); config.read(0x1a, vector.as_mut_slice(), device.clone()); assert_eq!(vector, vector_id); } } } #[test] fn test_queue_enable() { let mut config = default_pci_common_config(); let device = default_device(); let mut enabled = 0u16; for queue_id in 0u16..2 { config.write(0x16, queue_id.as_slice(), device.clone()); // Initially queue should be disabled config.read(0x1c, enabled.as_mut_slice(), device.clone()); assert_eq!(enabled, 0); // Enable queue config.write(0x1c, 1u16.as_slice(), device.clone()); config.read(0x1c, enabled.as_mut_slice(), device.clone()); assert_eq!(enabled, 1); // According to the specification "The driver MUST NOT write a 0 to queue_enable." config.write(0x1c, 0u16.as_slice(), device.clone()); config.read(0x1c, enabled.as_mut_slice(), device.clone()); assert_eq!(enabled, 1); } } #[test] fn test_queue_notify_off() { let mut config = default_pci_common_config(); let device = default_device(); let mut offset = 0u16; // `queue_notify_off` is an offset (index not bytes) from the notification structure // that helps locate the address of the queue notify within the device's BAR. This is // a field setup by the device and should be read-only for the driver for queue_id in 0u16..2 { config.write(0x16, queue_id.as_slice(), device.clone()); config.read(0x1e, offset.as_mut_slice(), device.clone()); assert_eq!(offset, queue_id); // Writing to it should not have any effect config.write(0x1e, 0x42.as_slice(), device.clone()); config.read(0x1e, offset.as_mut_slice(), device.clone()); assert_eq!(offset, queue_id); } } fn write_64bit_field( config: &mut VirtioPciCommonConfig, device: Arc>, offset: u64, value: u64, ) { let lo32 = (value & 0xffff_ffff) as u32; let hi32 = (value >> 32) as u32; config.write(offset, lo32.as_slice(), device.clone()); config.write(offset + 4, hi32.as_slice(), device.clone()); } fn read_64bit_field( config: &mut VirtioPciCommonConfig, device: Arc>, offset: u64, ) -> u64 { let mut lo32 = 0u32; let mut hi32 = 0u32; config.read(offset, lo32.as_mut_slice(), device.clone()); config.read(offset + 4, hi32.as_mut_slice(), device.clone()); (lo32 as u64) | ((hi32 as u64) << 32) } #[test] fn test_queue_addresses() { let mut config = default_pci_common_config(); let device = default_device(); let mut reg64bit = 0; for queue_id in 0u16..2 { config.write(0x16, queue_id.as_slice(), device.clone()); for offset in [0x20, 0x28, 0x30] { write_64bit_field(&mut config, device.clone(), offset, 0x0000_1312_0000_1110); assert_eq!( read_64bit_field(&mut config, device.clone(), offset), 0x0000_1312_0000_1110 ); } } } #[test] fn test_bad_width_reads() { let mut config = default_pci_common_config(); let mut device = default_device(); // According to the VirtIO specification (section 4.1.3.1) // // > For device configuration access, the driver MUST use 8-bit wide accesses for 8-bit // > wide fields, 16-bit wide and aligned accesses for 16-bit wide fields and 32-bit wide // > and aligned accesses for 32-bit and 64-bit wide fields. For 64-bit fields, the driver // > MAY access each of the high and low 32-bit parts of the field independently. // 64-bit fields device.lock().unwrap().queues_mut()[0].desc_table_address = GuestAddress(0x0000_1312_0000_1110); let mut buffer = [0u8; 8]; config.read(0x20, &mut buffer[..1], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0x20, &mut buffer[..2], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0x20, &mut buffer[..8], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0x20, &mut buffer[..4], device.clone()); assert_eq!(LittleEndian::read_u32(&buffer[..4]), 0x1110); config.read(0x24, &mut buffer[..4], device.clone()); assert_eq!(LittleEndian::read_u32(&buffer[..4]), 0x1312); // 32-bit fields config.device_feature_select = 0x42; let mut buffer = [0u8; 8]; config.read(0, &mut buffer[..1], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0, &mut buffer[..2], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0, &mut buffer[..8], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0, &mut buffer[..4], device.clone()); assert_eq!(LittleEndian::read_u32(&buffer[..4]), 0x42); // 16-bit fields let mut buffer = [0u8; 8]; config.queue_select = 0x42; config.read(0x16, &mut buffer[..1], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0x16, &mut buffer[..4], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0x16, &mut buffer[..8], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0x16, &mut buffer[..2], device.clone()); assert_eq!(LittleEndian::read_u16(&buffer[..2]), 0x42); // 8-bit fields let mut buffer = [0u8; 8]; config.driver_status = 0x42; config.read(0x14, &mut buffer[..2], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0x14, &mut buffer[..4], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0x14, &mut buffer[..8], device.clone()); assert_eq!(buffer, [0u8; 8]); config.read(0x14, &mut buffer[..1], device.clone()); assert_eq!(buffer[0], 0x42); } } ================================================ FILE: src/vmm/src/devices/virtio/transport/pci/device.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright 2018 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. // // Copyright © 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause use std::cmp; use std::collections::HashMap; use std::fmt::{Debug, Formatter}; use std::io::{ErrorKind, Write}; use std::sync::atomic::{AtomicBool, AtomicU16, AtomicU32, AtomicUsize, Ordering}; use std::sync::{Arc, Barrier, Mutex}; use kvm_ioctls::{IoEventAddress, NoDatamatch}; use log::warn; use pci::{ PciBdf, PciCapabilityId, PciClassCode, PciMassStorageSubclass, PciNetworkControllerSubclass, PciSubclass, }; use serde::{Deserialize, Serialize}; use thiserror::Error; use vm_allocator::{AddressAllocator, AllocPolicy, RangeInclusive}; use vm_memory::{Address, ByteValued, GuestAddress, Le32}; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; use crate::Vm; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_ids; use crate::devices::virtio::queue::Queue; use crate::devices::virtio::transport::pci::common_config::{ VirtioPciCommonConfig, VirtioPciCommonConfigState, }; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::logger::{debug, error}; use crate::pci::configuration::{PciCapability, PciConfiguration, PciConfigurationState}; use crate::pci::msix::{MsixCap, MsixConfig, MsixConfigState}; use crate::pci::{BarReprogrammingParams, DeviceRelocationError, PciDevice}; use crate::snapshot::Persist; use crate::utils::u64_to_usize; use crate::vstate::bus::BusDevice; use crate::vstate::interrupts::{InterruptError, MsixVectorGroup}; use crate::vstate::memory::GuestMemoryMmap; use crate::vstate::resources::ResourceAllocator; const DEVICE_INIT: u8 = 0x00; const DEVICE_ACKNOWLEDGE: u8 = 0x01; const DEVICE_DRIVER: u8 = 0x02; const DEVICE_DRIVER_OK: u8 = 0x04; const DEVICE_FEATURES_OK: u8 = 0x08; const DEVICE_FAILED: u8 = 0x80; /// Vector value used to disable MSI for a queue. pub const VIRTQ_MSI_NO_VECTOR: u16 = 0xffff; /// BAR index we are using for VirtIO configuration const VIRTIO_BAR_INDEX: u8 = 0; enum PciCapabilityType { Common = 1, Notify = 2, Isr = 3, Device = 4, Pci = 5, SharedMemory = 8, } // This offset represents the 2 bytes omitted from the VirtioPciCap structure // as they are already handled through add_capability(). These 2 bytes are the // fields cap_vndr (1 byte) and cap_next (1 byte) defined in the virtio spec. const VIRTIO_PCI_CAP_OFFSET: usize = 2; #[repr(C, packed)] #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] struct VirtioPciCap { cap_len: u8, // Generic PCI field: capability length cfg_type: u8, // Identifies the structure. pci_bar: u8, // Where to find it. id: u8, // Multiple capabilities of the same type. padding: [u8; 2], // Pad to full dword. offset: Le32, // Offset within bar. length: Le32, // Length of the structure, in bytes. } // SAFETY: All members are simple numbers and any value is valid. unsafe impl ByteValued for VirtioPciCap {} impl PciCapability for VirtioPciCap { fn bytes(&self) -> &[u8] { self.as_slice() } fn id(&self) -> PciCapabilityId { PciCapabilityId::VendorSpecific } } const VIRTIO_PCI_CAP_LEN_OFFSET: u8 = 2; impl VirtioPciCap { pub fn new(cfg_type: PciCapabilityType, offset: u32, length: u32) -> Self { VirtioPciCap { cap_len: u8::try_from(std::mem::size_of::()).unwrap() + VIRTIO_PCI_CAP_LEN_OFFSET, cfg_type: cfg_type as u8, pci_bar: VIRTIO_BAR_INDEX, id: 0, padding: [0; 2], offset: Le32::from(offset), length: Le32::from(length), } } } #[repr(C, packed)] #[derive(Clone, Copy, Default)] struct VirtioPciNotifyCap { cap: VirtioPciCap, notify_off_multiplier: Le32, } // SAFETY: All members are simple numbers and any value is valid. unsafe impl ByteValued for VirtioPciNotifyCap {} impl PciCapability for VirtioPciNotifyCap { fn bytes(&self) -> &[u8] { self.as_slice() } fn id(&self) -> PciCapabilityId { PciCapabilityId::VendorSpecific } } impl VirtioPciNotifyCap { pub fn new(cfg_type: PciCapabilityType, offset: u32, length: u32, multiplier: Le32) -> Self { VirtioPciNotifyCap { cap: VirtioPciCap { cap_len: u8::try_from(std::mem::size_of::()).unwrap() + VIRTIO_PCI_CAP_LEN_OFFSET, cfg_type: cfg_type as u8, pci_bar: VIRTIO_BAR_INDEX, id: 0, padding: [0; 2], offset: Le32::from(offset), length: Le32::from(length), }, notify_off_multiplier: multiplier, } } } #[repr(C, packed)] #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] struct VirtioPciCfgCap { cap: VirtioPciCap, pci_cfg_data: [u8; 4], } // SAFETY: All members are simple numbers and any value is valid. unsafe impl ByteValued for VirtioPciCfgCap {} impl PciCapability for VirtioPciCfgCap { fn bytes(&self) -> &[u8] { self.as_slice() } fn id(&self) -> PciCapabilityId { PciCapabilityId::VendorSpecific } } impl VirtioPciCfgCap { fn new() -> Self { VirtioPciCfgCap { cap: VirtioPciCap { cap_len: u8::try_from(size_of::()).unwrap() + VIRTIO_PCI_CAP_LEN_OFFSET, cfg_type: PciCapabilityType::Pci as u8, pci_bar: VIRTIO_BAR_INDEX, id: 0, padding: [0; 2], offset: Le32::from(0), length: Le32::from(0), }, ..Default::default() } } } #[derive(Debug, Clone, Copy, Default)] struct VirtioPciCfgCapInfo { offset: usize, cap: VirtioPciCfgCap, } #[derive(Debug, Copy, Clone)] pub enum PciVirtioSubclass { NonTransitionalBase = 0xff, } impl PciSubclass for PciVirtioSubclass { fn get_register_value(&self) -> u8 { *self as u8 } } // Allocate one bar for the structs pointed to by the capability structures. // As per the PCI specification, because the same BAR shares MSI-X and non // MSI-X structures, it is recommended to use 8KiB alignment for all those // structures. const COMMON_CONFIG_BAR_OFFSET: u64 = 0x0000; const COMMON_CONFIG_SIZE: u64 = 56; const ISR_CONFIG_BAR_OFFSET: u64 = 0x2000; const ISR_CONFIG_SIZE: u64 = 1; const DEVICE_CONFIG_BAR_OFFSET: u64 = 0x4000; const DEVICE_CONFIG_SIZE: u64 = 0x1000; const NOTIFICATION_BAR_OFFSET: u64 = 0x6000; const NOTIFICATION_SIZE: u64 = 0x1000; const MSIX_TABLE_BAR_OFFSET: u64 = 0x8000; // The size is 256KiB because the table can hold up to 2048 entries, with each // entry being 128 bits (4 DWORDS). const MSIX_TABLE_SIZE: u64 = 0x40000; const MSIX_PBA_BAR_OFFSET: u64 = 0x48000; // The size is 2KiB because the Pending Bit Array has one bit per vector and it // can support up to 2048 vectors. const MSIX_PBA_SIZE: u64 = 0x800; /// The BAR size must be a power of 2. pub const CAPABILITY_BAR_SIZE: u64 = 0x80000; const VIRTIO_COMMON_BAR_INDEX: usize = 0; const VIRTIO_SHM_BAR_INDEX: usize = 2; const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address. const VIRTIO_PCI_VENDOR_ID: u16 = 0x1af4; const VIRTIO_PCI_DEVICE_ID_BASE: u16 = 0x1040; // Add to device type to get device ID. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VirtioPciDeviceState { pub pci_device_bdf: PciBdf, pub device_activated: bool, pub cap_pci_cfg_offset: usize, pub cap_pci_cfg: Vec, pub pci_configuration_state: PciConfigurationState, pub pci_dev_state: VirtioPciCommonConfigState, pub msix_state: MsixConfigState, pub bar_address: u64, } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VirtioPciDeviceError { /// Failed creating VirtioPciDevice: {0} CreateVirtioPciDevice(#[from] DeviceRelocationError), /// Error creating MSI configuration: {0} Msi(#[from] InterruptError), } pub struct VirtioPciDevice { id: String, // The subscriber ID returned by the EventManager pub sub_id: Option, // BDF assigned to the device pci_device_bdf: PciBdf, // PCI configuration registers. configuration: PciConfiguration, // virtio PCI common configuration common_config: VirtioPciCommonConfig, // Virtio device reference and status device: Arc>, device_activated: Arc, // PCI interrupts. virtio_interrupt: Option>, // Guest memory memory: GuestMemoryMmap, // Add a dedicated structure to hold information about the very specific // virtio-pci capability VIRTIO_PCI_CAP_PCI_CFG. This is needed to support // the legacy/backward compatible mechanism of letting the guest access the // other virtio capabilities without mapping the PCI BARs. This can be // needed when the guest tries to early access the virtio configuration of // a device. cap_pci_cfg_info: VirtioPciCfgCapInfo, // Allocated address for the BAR pub bar_address: u64, } impl Debug for VirtioPciDevice { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { f.debug_struct("VirtioPciDevice") .field("id", &self.id) .finish() } } impl VirtioPciDevice { fn pci_configuration( device_type: VirtioDeviceType, msix_config: &Arc>, ) -> PciConfiguration { let pci_device_id = VIRTIO_PCI_DEVICE_ID_BASE + device_type as u16; let (class, subclass) = match device_type { VirtioDeviceType::Net => ( PciClassCode::NetworkController, &PciNetworkControllerSubclass::EthernetController as &dyn PciSubclass, ), VirtioDeviceType::Block => ( PciClassCode::MassStorage, &PciMassStorageSubclass::MassStorage as &dyn PciSubclass, ), _ => ( PciClassCode::Other, &PciVirtioSubclass::NonTransitionalBase as &dyn PciSubclass, ), }; PciConfiguration::new_type0( VIRTIO_PCI_VENDOR_ID, pci_device_id, 0x1, // For modern virtio-PCI devices class, subclass, VIRTIO_PCI_VENDOR_ID, pci_device_id, Some(msix_config.clone()), ) } /// Allocate the PCI BAR for the VirtIO device and its associated capabilities. /// /// This must happen only during the creation of a brand new VM. When a VM is restored from a /// known state, the BARs are already created with the right content, therefore we don't need /// to go through this codepath. pub fn allocate_bars(&mut self, mmio64_allocator: &mut AddressAllocator) { let device_clone = self.device.clone(); let device = device_clone.lock().unwrap(); // Allocate the virtio-pci capability BAR. // See http://docs.oasis-open.org/virtio/virtio/v1.0/cs04/virtio-v1.0-cs04.html#x1-740004 let virtio_pci_bar_addr = mmio64_allocator .allocate( CAPABILITY_BAR_SIZE, CAPABILITY_BAR_SIZE, AllocPolicy::FirstMatch, ) .unwrap() .start(); self.configuration.add_pci_bar( VIRTIO_COMMON_BAR_INDEX, virtio_pci_bar_addr, CAPABILITY_BAR_SIZE, ); // Once the BARs are allocated, the capabilities can be added to the PCI configuration. self.add_pci_capabilities(); self.bar_address = virtio_pci_bar_addr; } /// Constructs a new PCI transport for the given virtio device. pub fn new( id: String, memory: GuestMemoryMmap, device: Arc>, msix_vectors: Arc, pci_device_bdf: u32, ) -> Result { let num_queues = device.lock().expect("Poisoned lock").queues().len(); let msix_config = Arc::new(Mutex::new(MsixConfig::new( msix_vectors.clone(), pci_device_bdf, ))); let pci_config = Self::pci_configuration( device.lock().expect("Poisoned lock").device_type(), &msix_config, ); let virtio_common_config = VirtioPciCommonConfig::new(VirtioPciCommonConfigState { driver_status: 0, config_generation: 0, device_feature_select: 0, driver_feature_select: 0, queue_select: 0, msix_config: VIRTQ_MSI_NO_VECTOR, msix_queues: vec![VIRTQ_MSI_NO_VECTOR; num_queues], }); let interrupt = Arc::new(VirtioInterruptMsix::new( msix_config.clone(), virtio_common_config.msix_config.clone(), virtio_common_config.msix_queues.clone(), msix_vectors, )); let virtio_pci_device = VirtioPciDevice { id, sub_id: None, pci_device_bdf: pci_device_bdf.into(), configuration: pci_config, common_config: virtio_common_config, device, device_activated: Arc::new(AtomicBool::new(false)), virtio_interrupt: Some(interrupt), memory, cap_pci_cfg_info: VirtioPciCfgCapInfo::default(), bar_address: 0, }; Ok(virtio_pci_device) } pub fn new_from_state( id: String, vm: &Arc, device: Arc>, state: VirtioPciDeviceState, ) -> Result { let msix_config = MsixConfig::from_state(state.msix_state, vm.clone(), state.pci_device_bdf.into())?; let vectors = msix_config.vectors.clone(); let msix_config = Arc::new(Mutex::new(msix_config)); let pci_config = PciConfiguration::type0_from_state( state.pci_configuration_state, Some(msix_config.clone()), ); let virtio_common_config = VirtioPciCommonConfig::new(state.pci_dev_state); let cap_pci_cfg_info = VirtioPciCfgCapInfo { offset: state.cap_pci_cfg_offset, cap: *VirtioPciCfgCap::from_slice(&state.cap_pci_cfg).unwrap(), }; let interrupt = Arc::new(VirtioInterruptMsix::new( msix_config.clone(), virtio_common_config.msix_config.clone(), virtio_common_config.msix_queues.clone(), vectors, )); let virtio_pci_device = VirtioPciDevice { id, sub_id: None, pci_device_bdf: state.pci_device_bdf, configuration: pci_config, common_config: virtio_common_config, device, device_activated: Arc::new(AtomicBool::new(state.device_activated)), virtio_interrupt: Some(interrupt), memory: vm.guest_memory().clone(), cap_pci_cfg_info, bar_address: state.bar_address, }; if state.device_activated { virtio_pci_device .device .lock() .expect("Poisoned lock") .activate( virtio_pci_device.memory.clone(), virtio_pci_device.virtio_interrupt.as_ref().unwrap().clone(), ); } Ok(virtio_pci_device) } fn is_driver_ready(&self) -> bool { let ready_bits = (DEVICE_ACKNOWLEDGE | DEVICE_DRIVER | DEVICE_DRIVER_OK | DEVICE_FEATURES_OK); self.common_config.driver_status == ready_bits && self.common_config.driver_status & DEVICE_FAILED == 0 } /// Determines if the driver has requested the device (re)init / reset itself fn is_driver_init(&self) -> bool { self.common_config.driver_status == DEVICE_INIT } pub fn config_bar_addr(&self) -> u64 { self.configuration.get_bar_addr(VIRTIO_BAR_INDEX as usize) } fn add_pci_capabilities(&mut self) { // Add pointers to the different configuration structures from the PCI capabilities. let common_cap = VirtioPciCap::new( PciCapabilityType::Common, COMMON_CONFIG_BAR_OFFSET.try_into().unwrap(), COMMON_CONFIG_SIZE.try_into().unwrap(), ); self.configuration.add_capability(&common_cap); let isr_cap = VirtioPciCap::new( PciCapabilityType::Isr, ISR_CONFIG_BAR_OFFSET.try_into().unwrap(), ISR_CONFIG_SIZE.try_into().unwrap(), ); self.configuration.add_capability(&isr_cap); // TODO(dgreid) - set based on device's configuration size? let device_cap = VirtioPciCap::new( PciCapabilityType::Device, DEVICE_CONFIG_BAR_OFFSET.try_into().unwrap(), DEVICE_CONFIG_SIZE.try_into().unwrap(), ); self.configuration.add_capability(&device_cap); let notify_cap = VirtioPciNotifyCap::new( PciCapabilityType::Notify, NOTIFICATION_BAR_OFFSET.try_into().unwrap(), NOTIFICATION_SIZE.try_into().unwrap(), Le32::from(NOTIFY_OFF_MULTIPLIER), ); self.configuration.add_capability(¬ify_cap); let configuration_cap = VirtioPciCfgCap::new(); self.cap_pci_cfg_info.offset = self.configuration.add_capability(&configuration_cap) + VIRTIO_PCI_CAP_OFFSET; self.cap_pci_cfg_info.cap = configuration_cap; if let Some(interrupt) = &self.virtio_interrupt { let msix_cap = MsixCap::new( VIRTIO_BAR_INDEX, interrupt .msix_config .lock() .expect("Poisoned lock") .vectors .num_vectors(), MSIX_TABLE_BAR_OFFSET.try_into().unwrap(), VIRTIO_BAR_INDEX, MSIX_PBA_BAR_OFFSET.try_into().unwrap(), ); self.configuration.add_capability(&msix_cap); } } fn read_cap_pci_cfg(&mut self, offset: usize, mut data: &mut [u8]) { let cap_slice = self.cap_pci_cfg_info.cap.as_slice(); let data_len = data.len(); let cap_len = cap_slice.len(); if offset + data_len > cap_len { error!("Failed to read cap_pci_cfg from config space"); return; } if offset < std::mem::size_of::() { if let Some(end) = offset.checked_add(data_len) { // This write can't fail, offset and end are checked against config_len. data.write_all(&cap_slice[offset..cmp::min(end, cap_len)]) .unwrap(); } } else { let bar_offset: u32 = self.cap_pci_cfg_info.cap.cap.offset.into(); let len = u32::from(self.cap_pci_cfg_info.cap.cap.length) as usize; // BAR reads expect that the buffer has the exact size of the field that // offset is pointing to. So, do some check that the `length` has a meaningful value // and only use the part of the buffer we actually need. if len <= 4 { self.read_bar(0, bar_offset as u64, &mut data[..len]); } } } fn write_cap_pci_cfg(&mut self, offset: usize, data: &[u8]) -> Option> { let cap_slice = self.cap_pci_cfg_info.cap.as_mut_slice(); let data_len = data.len(); let cap_len = cap_slice.len(); if offset + data_len > cap_len { error!("Failed to write cap_pci_cfg to config space"); return None; } if offset < std::mem::size_of::() { let (_, right) = cap_slice.split_at_mut(offset); right[..data_len].copy_from_slice(data); None } else { let bar_offset: u32 = self.cap_pci_cfg_info.cap.cap.offset.into(); let len = u32::from(self.cap_pci_cfg_info.cap.cap.length) as usize; // BAR writes expect that the buffer has the exact size of the field that // offset is pointing to. So, do some check that the `length` has a meaningful value // and only use the part of the buffer we actually need. if len <= 4 { let len = len.min(data.len()); self.write_bar(0, bar_offset as u64, &data[..len]) } else { None } } } pub fn virtio_device(&self) -> Arc> { self.device.clone() } fn needs_activation(&self) -> bool { !self.device_activated.load(Ordering::SeqCst) && self.is_driver_ready() } /// Register the IoEvent notification for a VirtIO device pub fn register_notification_ioevent(&self, vm: &Vm) -> Result<(), errno::Error> { let bar_addr = self.config_bar_addr(); for (i, queue_evt) in self .device .lock() .expect("Poisoned lock") .queue_events() .iter() .enumerate() { let notify_base = bar_addr + NOTIFICATION_BAR_OFFSET; let io_addr = IoEventAddress::Mmio(notify_base + i as u64 * NOTIFY_OFF_MULTIPLIER as u64); vm.fd().register_ioevent(queue_evt, &io_addr, NoDatamatch)?; } Ok(()) } pub fn state(&self) -> VirtioPciDeviceState { VirtioPciDeviceState { pci_device_bdf: self.pci_device_bdf, device_activated: self.device_activated.load(Ordering::Acquire), cap_pci_cfg_offset: self.cap_pci_cfg_info.offset, cap_pci_cfg: self.cap_pci_cfg_info.cap.bytes().to_vec(), pci_configuration_state: self.configuration.state(), pci_dev_state: self.common_config.state(), msix_state: self .virtio_interrupt .as_ref() .unwrap() .msix_config .lock() .expect("Poisoned lock") .state(), bar_address: self.bar_address, } } } pub struct VirtioInterruptMsix { msix_config: Arc>, config_vector: Arc, queues_vectors: Arc>>, vectors: Arc, } impl std::fmt::Debug for VirtioInterruptMsix { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_struct("VirtioInterruptMsix") .field("msix_config", &self.msix_config) .field("config_vector", &self.config_vector) .field("queues_vectors", &self.queues_vectors) .finish() } } impl VirtioInterruptMsix { pub fn new( msix_config: Arc>, config_vector: Arc, queues_vectors: Arc>>, vectors: Arc, ) -> Self { VirtioInterruptMsix { msix_config, config_vector, queues_vectors, vectors, } } } impl VirtioInterrupt for VirtioInterruptMsix { fn trigger(&self, int_type: VirtioInterruptType) -> Result<(), InterruptError> { let vector = match int_type { VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire), VirtioInterruptType::Queue(queue_index) => *self .queues_vectors .lock() .unwrap() .get(queue_index as usize) .ok_or(InterruptError::InvalidVectorIndex(queue_index as usize))?, }; if vector == VIRTQ_MSI_NO_VECTOR { return Ok(()); } let config = &mut self.msix_config.lock().unwrap(); let entry = &config.table_entries[vector as usize]; // In case the vector control register associated with the entry // has its first bit set, this means the vector is masked and the // device should not inject the interrupt. // Instead, the Pending Bit Array table is updated to reflect there // is a pending interrupt for this specific vector. if config.masked || entry.masked() { config.set_pba_bit(vector, false); return Ok(()); } self.vectors.trigger(vector as usize) } fn notifier(&self, int_type: VirtioInterruptType) -> Option<&EventFd> { let vector = match int_type { VirtioInterruptType::Config => self.config_vector.load(Ordering::Acquire), VirtioInterruptType::Queue(queue_index) => *self .queues_vectors .lock() .unwrap() .get(queue_index as usize)?, }; self.vectors.notifier(vector as usize) } fn status(&self) -> Arc { Arc::new(AtomicU32::new(0)) } #[cfg(test)] fn has_pending_interrupt(&self, interrupt_type: VirtioInterruptType) -> bool { false } #[cfg(test)] fn ack_interrupt(&self, interrupt_type: VirtioInterruptType) { // Do nothing here } } impl PciDevice for VirtioPciDevice { fn write_config_register( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> Option> { // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG // is accessed. This capability has a special meaning as it allows the // guest to access other capabilities without mapping the PCI BAR. let base = reg_idx * 4; if base + u64_to_usize(offset) >= self.cap_pci_cfg_info.offset && base + u64_to_usize(offset) + data.len() <= self.cap_pci_cfg_info.offset + self.cap_pci_cfg_info.cap.bytes().len() { let offset = base + u64_to_usize(offset) - self.cap_pci_cfg_info.offset; self.write_cap_pci_cfg(offset, data) } else { self.configuration .write_config_register(reg_idx, offset, data); None } } fn read_config_register(&mut self, reg_idx: usize) -> u32 { // Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG // is accessed. This capability has a special meaning as it allows the // guest to access other capabilities without mapping the PCI BAR. let base = reg_idx * 4; if base >= self.cap_pci_cfg_info.offset && base + 4 <= self.cap_pci_cfg_info.offset + self.cap_pci_cfg_info.cap.bytes().len() { let offset = base - self.cap_pci_cfg_info.offset; let mut data = [0u8; 4]; let len = u32::from(self.cap_pci_cfg_info.cap.cap.length) as usize; if len <= 4 { self.read_cap_pci_cfg(offset, &mut data[..len]); u32::from_le_bytes(data) } else { 0 } } else { self.configuration.read_reg(reg_idx) } } fn detect_bar_reprogramming( &mut self, reg_idx: usize, data: &[u8], ) -> Option { self.configuration.detect_bar_reprogramming(reg_idx, data) } fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), DeviceRelocationError> { // We only update our idea of the bar in order to support free_bars() above. // The majority of the reallocation is done inside DeviceManager. if self.bar_address == old_base { self.bar_address = new_base; } Ok(()) } fn read_bar(&mut self, _base: u64, offset: u64, data: &mut [u8]) { match offset { o if o < COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE => { self.common_config .read(o - COMMON_CONFIG_BAR_OFFSET, data, self.device.clone()) } o if (ISR_CONFIG_BAR_OFFSET..ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE).contains(&o) => { // We don't actually support legacy INT#x interrupts for VirtIO PCI devices warn!("pci: read access to unsupported ISR status field"); data.fill(0); } o if (DEVICE_CONFIG_BAR_OFFSET..DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE) .contains(&o) => { let device = self.device.lock().unwrap(); device.read_config(o - DEVICE_CONFIG_BAR_OFFSET, data); } o if (NOTIFICATION_BAR_OFFSET..NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE) .contains(&o) => { // Handled with ioeventfds. warn!("pci: unexpected read to notification BAR. Offset {o:#x}"); } o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => { if let Some(interrupt) = &self.virtio_interrupt { interrupt .msix_config .lock() .unwrap() .read_table(o - MSIX_TABLE_BAR_OFFSET, data); } } o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => { if let Some(interrupt) = &self.virtio_interrupt { interrupt .msix_config .lock() .unwrap() .read_pba(o - MSIX_PBA_BAR_OFFSET, data); } } _ => (), } } fn write_bar(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { match offset { o if o < COMMON_CONFIG_BAR_OFFSET + COMMON_CONFIG_SIZE => { self.common_config .write(o - COMMON_CONFIG_BAR_OFFSET, data, self.device.clone()) } o if (ISR_CONFIG_BAR_OFFSET..ISR_CONFIG_BAR_OFFSET + ISR_CONFIG_SIZE).contains(&o) => { // We don't actually support legacy INT#x interrupts for VirtIO PCI devices warn!("pci: access to unsupported ISR status field"); } o if (DEVICE_CONFIG_BAR_OFFSET..DEVICE_CONFIG_BAR_OFFSET + DEVICE_CONFIG_SIZE) .contains(&o) => { let mut device = self.device.lock().unwrap(); device.write_config(o - DEVICE_CONFIG_BAR_OFFSET, data); } o if (NOTIFICATION_BAR_OFFSET..NOTIFICATION_BAR_OFFSET + NOTIFICATION_SIZE) .contains(&o) => { // Handled with ioeventfds. warn!("pci: unexpected write to notification BAR. Offset {o:#x}"); } o if (MSIX_TABLE_BAR_OFFSET..MSIX_TABLE_BAR_OFFSET + MSIX_TABLE_SIZE).contains(&o) => { if let Some(interrupt) = &self.virtio_interrupt { interrupt .msix_config .lock() .unwrap() .write_table(o - MSIX_TABLE_BAR_OFFSET, data); } } o if (MSIX_PBA_BAR_OFFSET..MSIX_PBA_BAR_OFFSET + MSIX_PBA_SIZE).contains(&o) => { if let Some(interrupt) = &self.virtio_interrupt { interrupt .msix_config .lock() .unwrap() .write_pba(o - MSIX_PBA_BAR_OFFSET, data); } } _ => (), }; // Try and activate the device if the driver status has changed if self.needs_activation() { debug!("Activating device"); let interrupt = Arc::clone(self.virtio_interrupt.as_ref().unwrap()); match self .virtio_device() .lock() .unwrap() .activate(self.memory.clone(), interrupt.clone()) { Ok(()) => self.device_activated.store(true, Ordering::SeqCst), Err(err) => { error!("Error activating device: {err:?}"); // Section 2.1.2 of the specification states that we need to send a device // configuration change interrupt let _ = interrupt.trigger(VirtioInterruptType::Config); } } } // Device has been reset by the driver if self.device_activated.load(Ordering::SeqCst) && self.is_driver_init() { let mut device = self.device.lock().unwrap(); let reset_result = device.reset(); match reset_result { Some(_) => { // Upon reset the device returns its interrupt EventFD self.virtio_interrupt = None; self.device_activated.store(false, Ordering::SeqCst); // Reset queue readiness (changes queue_enable), queue sizes // and selected_queue as per spec for reset self.virtio_device() .lock() .unwrap() .queues_mut() .iter_mut() .for_each(Queue::reset); self.common_config.queue_select = 0; } None => { error!("Attempt to reset device when not implemented in underlying device"); // TODO: currently we don't support device resetting, but we still // follow the spec and set the status field to 0. self.common_config.driver_status = DEVICE_INIT; } } } None } } impl BusDevice for VirtioPciDevice { fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) { self.read_bar(base, offset, data) } fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option> { self.write_bar(base, offset, data) } } #[cfg(test)] mod tests { use std::sync::{Arc, Mutex}; use event_manager::MutEventSubscriber; use linux_loader::loader::Cmdline; use pci::{PciCapabilityId, PciClassCode, PciSubclass}; use vm_memory::{ByteValued, Le32}; use super::{PciCapabilityType, VirtioPciDevice}; use crate::arch::MEM_64BIT_DEVICES_START; use crate::builder::tests::default_vmm; use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::device_status::{ACKNOWLEDGE, DRIVER, DRIVER_OK, FEATURES_OK}; use crate::devices::virtio::generated::virtio_config::VIRTIO_F_VERSION_1; use crate::devices::virtio::rng::Entropy; use crate::devices::virtio::transport::pci::device::{ COMMON_CONFIG_BAR_OFFSET, COMMON_CONFIG_SIZE, DEVICE_CONFIG_BAR_OFFSET, DEVICE_CONFIG_SIZE, ISR_CONFIG_BAR_OFFSET, ISR_CONFIG_SIZE, NOTIFICATION_BAR_OFFSET, NOTIFICATION_SIZE, NOTIFY_OFF_MULTIPLIER, PciVirtioSubclass, VirtioPciCap, VirtioPciCfgCap, VirtioPciNotifyCap, }; use crate::pci::PciDevice; use crate::pci::msix::MsixCap; use crate::rate_limiter::RateLimiter; use crate::utils::u64_to_usize; use crate::{Vm, Vmm}; fn create_vmm_with_virtio_pci_device() -> Vmm { let mut vmm = default_vmm(); vmm.device_manager.enable_pci(&vmm.vm); let entropy = Arc::new(Mutex::new(Entropy::new(RateLimiter::default()).unwrap())); let mut event_manager = crate::EventManager::new().unwrap(); vmm.device_manager .attach_virtio_device( &vmm.vm, "rng".to_string(), entropy.clone(), &mut Cmdline::new(1024).unwrap(), &mut event_manager, false, ) .unwrap(); vmm } fn get_virtio_device(vmm: &Vmm) -> Arc> { vmm.device_manager .pci_devices .get_virtio_device(VirtioDeviceType::Rng, "rng") .unwrap() .clone() } #[test] fn test_pci_device_config() { let mut vmm = create_vmm_with_virtio_pci_device(); let device = get_virtio_device(&vmm); let mut locked_virtio_pci_device = device.lock().unwrap(); // For more information for the values we are checking here look into the VirtIO spec here: // https://docs.oasis-open.org/virtio/virtio/v1.1/csprd01/virtio-v1.1-csprd01.html#x1-1220007 // and PCI Header type 0 layout here: https://wiki.osdev.org/PCI#Configuration_Space // | 16 bits | 16 bits | // |-----------|-----------| // regiger 0x0: | Device ID | Vendor ID | // // Vendor ID of VirtIO devices is 0x1af4 let reg0 = locked_virtio_pci_device.read_config_register(0); assert_eq!(reg0 & 0xffff, 0x1af4); // VirtIO PCI device IDs are in the range [0x1000, 0x107f]. (We are not using transitional // device IDs). let devid = reg0 >> 16; assert!( (0x1000..=0x107f).contains(&devid), "Device ID check: {:#x} >= 0x1000 && {:#x} <= 0x107f", devid, devid ); // | 16 bits | 16 bits | // |------------|-----------| // regiger 0x1: | Status | Command | // We offer the capabilities list (bit 4 of status register) at offset 0x34 let reg1 = locked_virtio_pci_device.read_config_register(1); assert_eq!(reg1, 0x0010_0000); // | 8 bits | 8 bits | 8 bits | 8 bits | // register 0x2: | Class code | Subclass | Prog IF | Revision ID | // // Class code: VIRTIO_PCI_VENDOR_ID for all VirtIO devices // Subclass: PciClassCode::NetworkController for net, PciClassCode::MassStore for block // PciClassCode::Other for everything else // Prog IF: A register defining some programmable interface register. 0 for VirtIO devices // Revision ID: 0x1 for modern VirtIO devices let reg2 = locked_virtio_pci_device.read_config_register(2); assert_eq!(reg2, 0xffff_0001); let class_code = ((reg2 >> 24) & 0xff) as u8; assert_eq!(class_code, PciClassCode::Other.get_register_value()); let subclass = ((reg2 >> 16) & 0xff) as u8; assert_eq!( subclass, PciVirtioSubclass::NonTransitionalBase.get_register_value() ); let prog_if = ((reg2 >> 8) & 0xff) as u8; assert_eq!(prog_if, 0); let revision_id = reg2 & 0xff; assert_eq!(revision_id, 0x1); // | 8 bits | 8 bits | 8 bits | 8 bits | // register 0x3: | BIST | Header Type | Latency timer | Cache line size | // // BIST: status and control for self test of PCI devices. Always 0 for VirtIO devices // HeaderType: 0x0 for general devices // LatencyTimer: Latency timer in units of PCI bus clocks, 0 for VirtIO // Cache Line size: 0 for VirtIO devices let reg3 = locked_virtio_pci_device.read_config_register(3); assert_eq!(reg3, 0x0); // register 0xa: Cardbus CIS pointer // // We don't emulate CardBus let reg10 = locked_virtio_pci_device.read_config_register(0xa); assert_eq!(reg10, 0); // | 16 bits | 16 bits | // regiger 0xb: | Subsystem ID | Subsystem vendor ID| // // For us Subsystem ID is same as device ID and subsystem vendor ID is same as vendor ID // (reg 0x0) let reg11 = locked_virtio_pci_device.read_config_register(0xb); assert_eq!(reg11, reg0); // register 0xc: Expansion ROM base address: 0x0 for us let reg12 = locked_virtio_pci_device.read_config_register(0xc); assert_eq!(reg12, 0); // | 24 bits | 8 bits | // register 0xd: | Reserved | Capabilities pointer | let reg13 = locked_virtio_pci_device.read_config_register(0xd); assert_eq!(reg13 >> 24, 0); // register 0xe: Reserved let reg14 = locked_virtio_pci_device.read_config_register(0xe); assert_eq!(reg14, 0); // | 8 bits | 8 bits | 8 bits | 8 bits | // register 0xf: | max latency | min grant | Interrupt pin | Interrupt line | // // We don't specify any of those let reg15 = locked_virtio_pci_device.read_config_register(0xf); assert_eq!(reg15, 0); } #[test] fn test_reading_bars() { let mut vmm = create_vmm_with_virtio_pci_device(); let device = get_virtio_device(&vmm); let mut locked_virtio_pci_device = device.lock().unwrap(); // According to OSdev wiki (https://wiki.osdev.org/PCI#Configuration_Space): // // When you want to retrieve the actual base address of a BAR, be sure to mask the lower // bits. For 16-bit Memory Space BARs, you calculate (BAR[x] & 0xFFF0). For 32-bit Memory // Space BARs, you calculate (BAR[x] & 0xFFFFFFF0). For 64-bit Memory Space BARs, you // calculate ((BAR[x] & 0xFFFFFFF0) + ((BAR[x + 1] & 0xFFFFFFFF) << 32)) For I/O Space // BARs, you calculate (BAR[x] & 0xFFFFFFFC). // We are allocating a single 64-bit MMIO bar for VirtIO capabilities list. As a result, we // are using the first two BAR registers from the configuration space. // // The BAR address layout is as follows: // // | Bits 31-4 | Bit 3 | Bits 2-1 | Bit 0 | // | 16-Byte Aligned Base Address | Prefetchable | Type | Always 0 | // // For 64-bit addresses though a second BAR is used to hold the upper 32 bits // of the address. Prefetchable and type will be help in the lower bits of the // first bar along with the lower 32-bits of the address which is always 16-bytes // aligned. let bar_addr_lo = locked_virtio_pci_device.read_config_register(0x4); let bar_addr_hi = locked_virtio_pci_device.read_config_register(0x5); let bar_addr = bar_addr_lo as u64 + ((bar_addr_hi as u64) << 32); // Bit 0 always 0 assert_eq!(bar_addr & 0x1, 0); // Type is 0x2 meaning 64-bit BAR assert_eq!((bar_addr & 0x6) >> 1, 2); // The actual address of the BAR should be the first available address of our 64-bit MMIO // region assert_eq!(bar_addr & 0xffff_ffff_ffff_fff0, MEM_64BIT_DEVICES_START); // Reading the BAR size is a bit more convoluted. According to OSDev wiki: // // To determine the amount of address space needed by a PCI device, you must save the // original value of the BAR, write a value of all 1's to the register, then read it back. // The amount of memory can then be determined by masking the information bits, performing // a bitwise NOT ('~' in C), and incrementing the value by 1. locked_virtio_pci_device.write_config_register(0x4, 0, &[0xff, 0xff, 0xff, 0xff]); // Read the lower size bits and mask out the last 4 bits include Prefetchable, Type and // hardwired-0 let bar_size_lo = locked_virtio_pci_device.read_config_register(0x4) as u64 & 0xfffffff0; locked_virtio_pci_device.write_config_register(0x5, 0, &[0xff, 0xff, 0xff, 0xff]); let bar_size_hi = locked_virtio_pci_device.read_config_register(0x5) as u64; let bar_size = !((bar_size_hi << 32) | bar_size_lo) + 1; // We create a capabilities BAR region of 0x80000 bytes assert_eq!(bar_size, 0x80000); } fn read_virtio_pci_cap( device: &mut VirtioPciDevice, offset: u32, ) -> (PciCapabilityId, u8, VirtioPciCap) { let word1 = device.read_config_register((offset >> 2) as usize); let word2 = device.read_config_register((offset >> 2) as usize + 1); let word3 = device.read_config_register((offset >> 2) as usize + 2); let word4 = device.read_config_register((offset >> 2) as usize + 3); let id = PciCapabilityId::from((word1 & 0xff) as u8); let next = ((word1 >> 8) & 0xff) as u8; let cap = VirtioPciCap { cap_len: ((word1 >> 16) & 0xff) as u8, cfg_type: ((word1 >> 24) & 0xff) as u8, pci_bar: (word2 & 0xff) as u8, id: ((word2 >> 8) & 0xff) as u8, padding: [0u8; 2], offset: Le32::from(word3), length: Le32::from(word4), }; // We only ever set a single capability of a type. It's ID is 0. assert_eq!(cap.id, 0); (id, next, cap) } fn read_virtio_notification_cap( device: &mut VirtioPciDevice, offset: u32, ) -> (PciCapabilityId, u8, VirtioPciNotifyCap) { let (id, next, cap) = read_virtio_pci_cap(device, offset); let word5 = device.read_config_register((offset >> 2) as usize + 4); let notification_cap = VirtioPciNotifyCap { cap, notify_off_multiplier: Le32::from(word5), }; (id, next, notification_cap) } fn read_virtio_pci_config_cap( device: &mut VirtioPciDevice, offset: u32, ) -> (PciCapabilityId, u8, VirtioPciCfgCap) { let (id, next, cap) = read_virtio_pci_cap(device, offset); let word5 = device.read_config_register((offset >> 2) as usize + 4); let pci_cfg_cap = VirtioPciCfgCap { cap, pci_cfg_data: word5.as_slice().try_into().unwrap(), }; (id, next, pci_cfg_cap) } fn read_msix_cap(device: &mut VirtioPciDevice, offset: u32) -> (PciCapabilityId, u8, MsixCap) { let word1 = device.read_config_register((offset >> 2) as usize); let table = device.read_config_register((offset >> 2) as usize + 1); let pba = device.read_config_register((offset >> 2) as usize + 2); let id = PciCapabilityId::from((word1 & 0xff) as u8); let next = ((word1 >> 8) & 0xff) as u8; let cap = MsixCap { msg_ctl: (word1 & 0xffff) as u16, table, pba, }; (id, next, cap) } fn capabilities_start(device: &mut VirtioPciDevice) -> u32 { device.read_config_register(0xd) & 0xfc } #[test] fn test_capabilities() { let mut vmm = create_vmm_with_virtio_pci_device(); let device = get_virtio_device(&vmm); let mut locked_virtio_pci_device = device.lock().unwrap(); // VirtIO devices need to expose a set of mandatory capabilities: // * Common configuration // * Notifications // * ISR status // * PCI configuration access // // and, optionally, a device-specific configuration area for those devices that need it. // // We always expose all 5 capabilities, so check that the capabilities are present // Common config let common_config_cap_offset = capabilities_start(&mut locked_virtio_pci_device); let (id, next, cap) = read_virtio_pci_cap(&mut locked_virtio_pci_device, common_config_cap_offset); assert_eq!(id, PciCapabilityId::VendorSpecific); assert_eq!(cap.cap_len as usize, size_of::() + 2); assert_eq!(cap.cfg_type, PciCapabilityType::Common as u8); assert_eq!(cap.pci_bar, 0); assert_eq!(u32::from(cap.offset) as u64, COMMON_CONFIG_BAR_OFFSET); assert_eq!(u32::from(cap.length) as u64, COMMON_CONFIG_SIZE); assert_eq!(next as u32, common_config_cap_offset + cap.cap_len as u32); // ISR let isr_cap_offset = next as u32; let (id, next, cap) = read_virtio_pci_cap(&mut locked_virtio_pci_device, isr_cap_offset); assert_eq!(id, PciCapabilityId::VendorSpecific); assert_eq!(cap.cap_len as usize, size_of::() + 2); assert_eq!(cap.cfg_type, PciCapabilityType::Isr as u8); assert_eq!(cap.pci_bar, 0); assert_eq!(u32::from(cap.offset) as u64, ISR_CONFIG_BAR_OFFSET); assert_eq!(u32::from(cap.length) as u64, ISR_CONFIG_SIZE); assert_eq!(next as u32, isr_cap_offset + cap.cap_len as u32); // Device config let device_config_cap_offset = next as u32; let (id, next, cap) = read_virtio_pci_cap(&mut locked_virtio_pci_device, device_config_cap_offset); assert_eq!(id, PciCapabilityId::VendorSpecific); assert_eq!(cap.cap_len as usize, size_of::() + 2); assert_eq!(cap.cfg_type, PciCapabilityType::Device as u8); assert_eq!(cap.pci_bar, 0); assert_eq!(u32::from(cap.offset) as u64, DEVICE_CONFIG_BAR_OFFSET); assert_eq!(u32::from(cap.length) as u64, DEVICE_CONFIG_SIZE); assert_eq!(next as u32, device_config_cap_offset + cap.cap_len as u32); let notification_cap_offset = next as u32; let (id, next, cap) = read_virtio_notification_cap(&mut locked_virtio_pci_device, notification_cap_offset); assert_eq!(id, PciCapabilityId::VendorSpecific); assert_eq!( cap.cap.cap_len as usize, size_of::() + 2 ); assert_eq!(cap.cap.cfg_type, PciCapabilityType::Notify as u8); assert_eq!(cap.cap.pci_bar, 0); assert_eq!(u32::from(cap.cap.offset) as u64, NOTIFICATION_BAR_OFFSET); assert_eq!(u32::from(cap.cap.length) as u64, NOTIFICATION_SIZE); assert_eq!( next as u32, notification_cap_offset + cap.cap.cap_len as u32 ); assert_eq!(u32::from(cap.notify_off_multiplier), NOTIFY_OFF_MULTIPLIER); let pci_config_cap_offset = next as u32; let (id, next, cap) = read_virtio_pci_config_cap(&mut locked_virtio_pci_device, pci_config_cap_offset); assert_eq!(id, PciCapabilityId::VendorSpecific); assert_eq!(cap.cap.cap_len as usize, size_of::() + 2); assert_eq!(cap.cap.cfg_type, PciCapabilityType::Pci as u8); assert_eq!(cap.cap.pci_bar, 0); assert_eq!(u32::from(cap.cap.offset) as u64, 0); assert_eq!(u32::from(cap.cap.length) as u64, 0); assert_eq!( locked_virtio_pci_device.cap_pci_cfg_info.offset, pci_config_cap_offset as usize + 2 ); assert_eq!(locked_virtio_pci_device.cap_pci_cfg_info.cap, cap); assert_eq!(next as u32, pci_config_cap_offset + cap.cap.cap_len as u32); let msix_cap_offset = next as u32; let (id, next, cap) = read_msix_cap(&mut locked_virtio_pci_device, msix_cap_offset); assert_eq!(id, PciCapabilityId::MsiX); assert_eq!(next, 0); } fn cap_pci_cfg_read(device: &mut VirtioPciDevice, bar_offset: u32, length: u32) -> u32 { let pci_config_cap_offset = capabilities_start(device) as usize + 3 * (size_of::() + 2) + (size_of::() + 2); // To program the access through the PCI config capability mechanism, we need to write the // bar offset and read length in the `VirtioPciCfgCap::cap.offset` and // `VirtioPciCfgCap::length` fields. These are the third and fourth word respectively // within the capability. The fifth word of the capability should contain the data let offset_register = (pci_config_cap_offset + 8) >> 2; let length_register = (pci_config_cap_offset + 12) >> 2; let data_register = (pci_config_cap_offset + 16) >> 2; device.write_config_register(offset_register, 0, bar_offset.as_slice()); device.write_config_register(length_register, 0, length.as_slice()); device.read_config_register(data_register) } fn cap_pci_cfg_write(device: &mut VirtioPciDevice, bar_offset: u32, length: u32, data: &[u8]) { let pci_config_cap_offset = capabilities_start(device) as usize + 3 * (size_of::() + 2) + (size_of::() + 2); // To program the access through the PCI config capability mechanism, we need to write the // bar offset and read length in the `VirtioPciCfgCap::cap.offset` and // `VirtioPciCfgCap::length` fields. These are the third and fourth word respectively // within the capability. The fifth word of the capability should contain the data let offset_register = (pci_config_cap_offset + 8) >> 2; let length_register = (pci_config_cap_offset + 12) >> 2; let data_register = (pci_config_cap_offset + 16) >> 2; device.write_config_register(offset_register, 0, bar_offset.as_slice()); device.write_config_register(length_register, 0, length.as_slice()); device.write_config_register(data_register, 0, data); } #[test] fn test_pci_configuration_cap() { let mut vmm = create_vmm_with_virtio_pci_device(); let device = get_virtio_device(&vmm); let mut locked_virtio_pci_device = device.lock().unwrap(); // Let's read the number of queues of the entropy device // That information is located at offset 0x12 past the BAR region belonging to the common // config capability. let bar_offset = u32::try_from(COMMON_CONFIG_BAR_OFFSET).unwrap() + 0x12; let len = 2u32; let num_queues = cap_pci_cfg_read(&mut locked_virtio_pci_device, bar_offset, len); assert_eq!(num_queues, 1); // Let's update the driver features and see if that takes effect let bar_offset = u32::try_from(COMMON_CONFIG_BAR_OFFSET).unwrap() + 0x14; let len = 1u32; let device_status = cap_pci_cfg_read(&mut locked_virtio_pci_device, bar_offset, len); assert_eq!(device_status, 0); cap_pci_cfg_write( &mut locked_virtio_pci_device, bar_offset, len, 0x42u32.as_slice(), ); let device_status = cap_pci_cfg_read(&mut locked_virtio_pci_device, bar_offset, len); assert_eq!(device_status, 0x42); // reads with out-of-bounds lengths should return 0s assert_eq!( cap_pci_cfg_read(&mut locked_virtio_pci_device, bar_offset, 8), 0 ); // writes out-of-bounds lengths should have no effect cap_pci_cfg_write( &mut locked_virtio_pci_device, bar_offset, 8, 0x84u32.as_slice(), ); assert_eq!( cap_pci_cfg_read(&mut locked_virtio_pci_device, bar_offset, 1), 0x42 ); // Make sure that we handle properly from/to a BAR where the access length doesn't match // what we've set in the capability's length cap_pci_cfg_write( &mut locked_virtio_pci_device, bar_offset, 2, 0x42u8.as_slice(), ); } fn isr_status_read(device: &mut VirtioPciDevice) -> u32 { let mut data = 0u32; device.read_bar(0, ISR_CONFIG_BAR_OFFSET, data.as_mut_slice()); data } fn isr_status_write(device: &mut VirtioPciDevice, data: u32) { device.write_bar(0, ISR_CONFIG_BAR_OFFSET, data.as_slice()); } #[test] fn test_isr_capability() { let mut vmm = create_vmm_with_virtio_pci_device(); let device = get_virtio_device(&vmm); let mut locked_virtio_pci_device = device.lock().unwrap(); // We don't support legacy interrupts so reads to ISR BAR should always return 0s and // writes to it should not have any effect assert_eq!(isr_status_read(&mut locked_virtio_pci_device), 0); isr_status_write(&mut locked_virtio_pci_device, 0x1312); assert_eq!(isr_status_read(&mut locked_virtio_pci_device), 0); } #[test] fn test_notification_capability() { let mut vmm = create_vmm_with_virtio_pci_device(); let device = get_virtio_device(&vmm); let mut locked_virtio_pci_device = device.lock().unwrap(); let notification_cap_offset = (capabilities_start(&mut locked_virtio_pci_device) as usize + 3 * (size_of::() + 2)) .try_into() .unwrap(); let (_, _, notify_cap) = read_virtio_notification_cap(&mut locked_virtio_pci_device, notification_cap_offset); // We do not offer `VIRTIO_F_NOTIFICATION_DATA` so: // * `cap.offset` MUST by 2-byte aligned assert_eq!(u32::from(notify_cap.cap.offset) & 0x3, 0); // * The device MUST either present notify_off_multiplier as an even power of 2, or present // notify_off_multiplier as 0. let multiplier = u32::from(notify_cap.notify_off_multiplier); assert!(multiplier.is_power_of_two() && multiplier.trailing_zeros() % 2 == 0); // * For all queues, the value cap.length presented by the device MUST satisfy: // // `cap.length >= queue_notify_off * notify_off_multiplier + 2` // // The spec allows for up to 65536 queues, but in reality the device we are using with most // queues is vsock (3). Let's check here for 16, projecting for future devices and // use-cases such as multiple queue pairs in network devices assert!(u32::from(notify_cap.cap.length) >= 15 * multiplier + 2); // Reads and writes to the notification region of the BAR are handled by IoEvent file // descriptors. Any such accesses should have no effects. let data = [0x42u8; u64_to_usize(NOTIFICATION_SIZE)]; locked_virtio_pci_device.write_bar(0, NOTIFICATION_BAR_OFFSET, &data); let mut buffer = [0x0; u64_to_usize(NOTIFICATION_SIZE)]; locked_virtio_pci_device.read_bar(0, NOTIFICATION_BAR_OFFSET, &mut buffer); assert_eq!(buffer, [0u8; u64_to_usize(NOTIFICATION_SIZE)]); } fn write_driver_status(device: &mut VirtioPciDevice, status: u8) { device.write_bar(0, COMMON_CONFIG_BAR_OFFSET + 0x14, status.as_slice()); } fn read_driver_status(device: &mut VirtioPciDevice) -> u8 { let mut status = 0u8; device.read_bar(0, COMMON_CONFIG_BAR_OFFSET + 0x14, status.as_mut_slice()); status } fn read_device_features(device: &mut VirtioPciDevice) -> u64 { let mut features_lo = 0u32; device.write_bar(0, COMMON_CONFIG_BAR_OFFSET, 0u32.as_slice()); device.read_bar( 0, COMMON_CONFIG_BAR_OFFSET + 0x4, features_lo.as_mut_slice(), ); let mut features_hi = 0u32; device.write_bar(0, COMMON_CONFIG_BAR_OFFSET, 1u32.as_slice()); device.read_bar( 0, COMMON_CONFIG_BAR_OFFSET + 0x4, features_hi.as_mut_slice(), ); features_lo as u64 | ((features_hi as u64) << 32) } fn write_driver_features(device: &mut VirtioPciDevice, features: u64) { device.write_bar(0, COMMON_CONFIG_BAR_OFFSET + 0x8, 0u32.as_slice()); device.write_bar( 0, COMMON_CONFIG_BAR_OFFSET + 0xc, ((features & 0xffff_ffff) as u32).as_slice(), ); device.write_bar(0, COMMON_CONFIG_BAR_OFFSET + 0x8, 1u32.as_slice()); device.write_bar( 0, COMMON_CONFIG_BAR_OFFSET + 0xc, (((features >> 32) & 0xffff_ffff) as u32).as_slice(), ); } fn setup_queues(device: &mut VirtioPciDevice) { device.write_bar( 0, COMMON_CONFIG_BAR_OFFSET + 0x20, 0x8000_0000u64.as_slice(), ); device.write_bar( 0, COMMON_CONFIG_BAR_OFFSET + 0x28, 0x8000_1000u64.as_slice(), ); device.write_bar( 0, COMMON_CONFIG_BAR_OFFSET + 0x30, 0x8000_2000u64.as_slice(), ); device.write_bar(0, COMMON_CONFIG_BAR_OFFSET + 0x1c, 1u16.as_slice()); } #[test] fn test_device_initialization() { let mut vmm = create_vmm_with_virtio_pci_device(); let device = get_virtio_device(&vmm); let mut locked_virtio_pci_device = device.lock().unwrap(); assert!(locked_virtio_pci_device.is_driver_init()); assert!(!locked_virtio_pci_device.is_driver_ready()); assert!( !locked_virtio_pci_device .device_activated .load(std::sync::atomic::Ordering::SeqCst) ); write_driver_status( &mut locked_virtio_pci_device, ACKNOWLEDGE.try_into().unwrap(), ); write_driver_status( &mut locked_virtio_pci_device, (ACKNOWLEDGE | DRIVER).try_into().unwrap(), ); assert!(!locked_virtio_pci_device.is_driver_init()); assert!(!locked_virtio_pci_device.is_driver_ready()); assert!( !locked_virtio_pci_device .device_activated .load(std::sync::atomic::Ordering::SeqCst) ); let status = read_driver_status(&mut locked_virtio_pci_device); assert_eq!(status as u32, ACKNOWLEDGE | DRIVER); // Entropy device just offers VIRTIO_F_VERSION_1 let offered_features = read_device_features(&mut locked_virtio_pci_device); assert_eq!(offered_features, 1 << VIRTIO_F_VERSION_1); // ACK features write_driver_features(&mut locked_virtio_pci_device, offered_features); write_driver_status( &mut locked_virtio_pci_device, (ACKNOWLEDGE | DRIVER | FEATURES_OK).try_into().unwrap(), ); let status = read_driver_status(&mut locked_virtio_pci_device); assert!((status & u8::try_from(FEATURES_OK).unwrap()) != 0); assert!(!locked_virtio_pci_device.is_driver_init()); assert!(!locked_virtio_pci_device.is_driver_ready()); assert!( !locked_virtio_pci_device .device_activated .load(std::sync::atomic::Ordering::SeqCst) ); setup_queues(&mut locked_virtio_pci_device); write_driver_status( &mut locked_virtio_pci_device, (ACKNOWLEDGE | DRIVER | FEATURES_OK | DRIVER_OK) .try_into() .unwrap(), ); assert!(!locked_virtio_pci_device.is_driver_init()); assert!(locked_virtio_pci_device.is_driver_ready()); assert!( locked_virtio_pci_device .device_activated .load(std::sync::atomic::Ordering::SeqCst) ); } } ================================================ FILE: src/vmm/src/devices/virtio/transport/pci/mod.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod common_config; pub mod device; ================================================ FILE: src/vmm/src/devices/virtio/vhost_user.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // Portions Copyright 2019 Intel Corporation. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::os::fd::AsRawFd; use std::os::unix::net::UnixStream; use std::sync::Arc; use vhost::vhost_user::message::*; use vhost::vhost_user::{Frontend, VhostUserFrontend}; use vhost::{Error as VhostError, VhostBackend, VhostUserMemoryRegionInfo, VringConfigData}; use vm_memory::{Address, GuestMemory, GuestMemoryError, GuestMemoryRegion}; use vmm_sys_util::eventfd::EventFd; use crate::devices::virtio::queue::Queue; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::vstate::memory::GuestMemoryMmap; /// vhost-user error. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VhostUserError { /// Invalid available address AvailAddress(GuestMemoryError), /// Failed to connect to UDS Unix stream: {0} Connect(#[from] std::io::Error), /// Invalid descriptor table address DescriptorTableAddress(GuestMemoryError), /// Get features failed: {0} VhostUserGetFeatures(VhostError), /// Get protocol features failed: {0} VhostUserGetProtocolFeatures(VhostError), /// Set owner failed: {0} VhostUserSetOwner(VhostError), /// Set features failed: {0} VhostUserSetFeatures(VhostError), /// Set protocol features failed: {0} VhostUserSetProtocolFeatures(VhostError), /// Set mem table failed: {0} VhostUserSetMemTable(VhostError), /// Set vring num failed: {0} VhostUserSetVringNum(VhostError), /// Set vring addr failed: {0} VhostUserSetVringAddr(VhostError), /// Set vring base failed: {0} VhostUserSetVringBase(VhostError), /// Set vring call failed: {0} VhostUserSetVringCall(VhostError), /// Set vring kick failed: {0} VhostUserSetVringKick(VhostError), /// Set vring enable failed: {0} VhostUserSetVringEnable(VhostError), /// Failed to read vhost eventfd: No memory region found VhostUserNoMemoryRegion, /// Invalid used address UsedAddress(GuestMemoryError), } // Trait with all methods we use from `Frontend` from vhost crate. // It allows us to create a mock implementation of the `Frontend` // to verify calls to the backend. // All methods have default impl in order to simplify mock impls. pub trait VhostUserHandleBackend: Sized { /// Constructor of `Frontend` fn from_stream(_sock: UnixStream, _max_queue_num: u64) -> Self { unimplemented!() } fn set_hdr_flags(&self, _flags: VhostUserHeaderFlag) { unimplemented!() } /// Get from the underlying vhost implementation the feature bitmask. fn get_features(&self) -> Result { unimplemented!() } /// Enable features in the underlying vhost implementation using a bitmask. fn set_features(&self, _features: u64) -> Result<(), vhost::Error> { unimplemented!() } /// Set the current Frontend as an owner of the session. fn set_owner(&self) -> Result<(), vhost::Error> { unimplemented!() } /// Set the memory map regions on the slave so it can translate the vring /// addresses. In the ancillary data there is an array of file descriptors fn set_mem_table(&self, _regions: &[VhostUserMemoryRegionInfo]) -> Result<(), vhost::Error> { unimplemented!() } /// Set the size of the queue. fn set_vring_num(&self, _queue_index: usize, _num: u16) -> Result<(), vhost::Error> { unimplemented!() } /// Sets the addresses of the different aspects of the vring. fn set_vring_addr( &self, _queue_index: usize, _config_data: &VringConfigData, ) -> Result<(), vhost::Error> { unimplemented!() } /// Sets the base offset in the available vring. fn set_vring_base(&self, _queue_index: usize, _base: u16) -> Result<(), vhost::Error> { unimplemented!() } /// Set the event file descriptor to signal when buffers are used. /// Bits (0-7) of the payload contain the vring index. Bit 8 is the invalid FD flag. This flag /// is set when there is no file descriptor in the ancillary data. This signals that polling /// will be used instead of waiting for the call. fn set_vring_call(&self, _queue_index: usize, _fd: &EventFd) -> Result<(), vhost::Error> { unimplemented!() } /// Set the event file descriptor for adding buffers to the vring. /// Bits (0-7) of the payload contain the vring index. Bit 8 is the invalid FD flag. This flag /// is set when there is no file descriptor in the ancillary data. This signals that polling /// should be used instead of waiting for a kick. fn set_vring_kick(&self, _queue_index: usize, _fd: &EventFd) -> Result<(), vhost::Error> { unimplemented!() } fn get_protocol_features(&mut self) -> Result { unimplemented!() } fn set_protocol_features( &mut self, _features: VhostUserProtocolFeatures, ) -> Result<(), vhost::Error> { unimplemented!() } fn set_vring_enable(&mut self, _queue_index: usize, _enable: bool) -> Result<(), vhost::Error> { unimplemented!() } fn get_config( &mut self, _offset: u32, _size: u32, _flags: VhostUserConfigFlags, _buf: &[u8], ) -> Result<(VhostUserConfig, VhostUserConfigPayload), vhost::Error> { unimplemented!() } fn set_config( &mut self, _offset: u32, _flags: VhostUserConfigFlags, _buf: &[u8], ) -> Result<(), vhost::Error> { unimplemented!() } } impl VhostUserHandleBackend for Frontend { fn from_stream(sock: UnixStream, max_queue_num: u64) -> Self { Frontend::from_stream(sock, max_queue_num) } fn set_hdr_flags(&self, flags: VhostUserHeaderFlag) { self.set_hdr_flags(flags) } /// Get from the underlying vhost implementation the feature bitmask. fn get_features(&self) -> Result { ::get_features(self) } /// Enable features in the underlying vhost implementation using a bitmask. fn set_features(&self, features: u64) -> Result<(), vhost::Error> { ::set_features(self, features) } /// Set the current Frontend as an owner of the session. fn set_owner(&self) -> Result<(), vhost::Error> { ::set_owner(self) } /// Set the memory map regions on the slave so it can translate the vring /// addresses. In the ancillary data there is an array of file descriptors fn set_mem_table(&self, regions: &[VhostUserMemoryRegionInfo]) -> Result<(), vhost::Error> { ::set_mem_table(self, regions) } /// Set the size of the queue. fn set_vring_num(&self, queue_index: usize, num: u16) -> Result<(), vhost::Error> { ::set_vring_num(self, queue_index, num) } /// Sets the addresses of the different aspects of the vring. fn set_vring_addr( &self, queue_index: usize, config_data: &VringConfigData, ) -> Result<(), vhost::Error> { ::set_vring_addr(self, queue_index, config_data) } /// Sets the base offset in the available vring. fn set_vring_base(&self, queue_index: usize, base: u16) -> Result<(), vhost::Error> { ::set_vring_base(self, queue_index, base) } /// Set the event file descriptor to signal when buffers are used. /// Bits (0-7) of the payload contain the vring index. Bit 8 is the invalid FD flag. This flag /// is set when there is no file descriptor in the ancillary data. This signals that polling /// will be used instead of waiting for the call. fn set_vring_call(&self, queue_index: usize, fd: &EventFd) -> Result<(), vhost::Error> { ::set_vring_call(self, queue_index, fd) } /// Set the event file descriptor for adding buffers to the vring. /// Bits (0-7) of the payload contain the vring index. Bit 8 is the invalid FD flag. This flag /// is set when there is no file descriptor in the ancillary data. This signals that polling /// should be used instead of waiting for a kick. fn set_vring_kick(&self, queue_index: usize, fd: &EventFd) -> Result<(), vhost::Error> { ::set_vring_kick(self, queue_index, fd) } fn get_protocol_features(&mut self) -> Result { ::get_protocol_features(self) } fn set_protocol_features( &mut self, features: VhostUserProtocolFeatures, ) -> Result<(), vhost::Error> { ::set_protocol_features(self, features) } fn set_vring_enable(&mut self, queue_index: usize, enable: bool) -> Result<(), vhost::Error> { ::set_vring_enable(self, queue_index, enable) } fn get_config( &mut self, offset: u32, size: u32, flags: VhostUserConfigFlags, buf: &[u8], ) -> Result<(VhostUserConfig, VhostUserConfigPayload), vhost::Error> { ::get_config(self, offset, size, flags, buf) } fn set_config( &mut self, offset: u32, flags: VhostUserConfigFlags, buf: &[u8], ) -> Result<(), vhost::Error> { ::set_config(self, offset, flags, buf) } } pub type VhostUserHandle = VhostUserHandleImpl; /// vhost-user socket handle #[derive(Clone)] pub struct VhostUserHandleImpl { pub vu: T, pub socket_path: String, } impl std::fmt::Debug for VhostUserHandleImpl { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("VhostUserHandle") .field("socket_path", &self.socket_path) .finish() } } impl VhostUserHandleImpl { /// Connect to the vhost-user backend socket and mark self as an /// owner of the session. pub fn new(socket_path: &str, num_queues: u64) -> Result { let stream = UnixStream::connect(socket_path).map_err(VhostUserError::Connect)?; let vu = T::from_stream(stream, num_queues); vu.set_owner().map_err(VhostUserError::VhostUserSetOwner)?; Ok(Self { vu, socket_path: socket_path.to_string(), }) } /// Set vhost-user features to the backend. pub fn set_features(&self, features: u64) -> Result<(), VhostUserError> { self.vu .set_features(features) .map_err(VhostUserError::VhostUserSetFeatures) } /// Set vhost-user protocol features to the backend. pub fn set_protocol_features( &mut self, acked_features: u64, acked_protocol_features: u64, ) -> Result<(), VhostUserError> { if acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() != 0 && let Some(acked_protocol_features) = VhostUserProtocolFeatures::from_bits(acked_protocol_features) { self.vu .set_protocol_features(acked_protocol_features) .map_err(VhostUserError::VhostUserSetProtocolFeatures)?; if acked_protocol_features.contains(VhostUserProtocolFeatures::REPLY_ACK) { self.vu.set_hdr_flags(VhostUserHeaderFlag::NEED_REPLY); } } Ok(()) } /// Negotiate virtio and protocol features with the backend. pub fn negotiate_features( &mut self, avail_features: u64, avail_protocol_features: VhostUserProtocolFeatures, ) -> Result<(u64, u64), VhostUserError> { // Get features from backend, do negotiation to get a feature collection which // both VMM and backend support. let backend_features = self .vu .get_features() .map_err(VhostUserError::VhostUserGetFeatures)?; let acked_features = avail_features & backend_features; let acked_protocol_features = // If frontend can negotiate protocol features. if acked_features & VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits() != 0 { let backend_protocol_features = self .vu .get_protocol_features() .map_err(VhostUserError::VhostUserGetProtocolFeatures)?; let acked_protocol_features = avail_protocol_features & backend_protocol_features; self.vu .set_protocol_features(acked_protocol_features) .map_err(VhostUserError::VhostUserSetProtocolFeatures)?; acked_protocol_features } else { VhostUserProtocolFeatures::empty() }; if acked_protocol_features.contains(VhostUserProtocolFeatures::REPLY_ACK) { self.vu.set_hdr_flags(VhostUserHeaderFlag::NEED_REPLY); } Ok((acked_features, acked_protocol_features.bits())) } /// Update guest memory table to the backend. fn update_mem_table(&self, mem: &GuestMemoryMmap) -> Result<(), VhostUserError> { let mut regions: Vec = Vec::new(); for region in mem.iter() { let (mmap_handle, mmap_offset) = match region.file_offset() { Some(_file_offset) => (_file_offset.file().as_raw_fd(), _file_offset.start()), None => { return Err(VhostUserError::VhostUserNoMemoryRegion); } }; let vhost_user_net_reg = VhostUserMemoryRegionInfo { guest_phys_addr: region.start_addr().raw_value(), memory_size: region.len(), userspace_addr: region.inner.as_ptr() as u64, mmap_offset, mmap_handle, }; regions.push(vhost_user_net_reg); } self.vu .set_mem_table(regions.as_slice()) .map_err(VhostUserError::VhostUserSetMemTable)?; Ok(()) } /// Set up vhost-user backend. This includes updating memory table, /// sending information about virtio rings and enabling them. pub fn setup_backend( &mut self, mem: &GuestMemoryMmap, queues: &[(usize, &Queue, &EventFd)], interrupt: Arc, ) -> Result<(), VhostUserError> { // Provide the memory table to the backend. self.update_mem_table(mem)?; // Send set_vring_num here, since it could tell backends, like SPDK, // how many virt queues to be handled, which backend required to know // at early stage. for (queue_index, queue, _) in queues.iter() { self.vu .set_vring_num(*queue_index, queue.size) .map_err(VhostUserError::VhostUserSetVringNum)?; } for (queue_index, queue, queue_evt) in queues.iter() { let config_data = VringConfigData { queue_max_size: queue.max_size, queue_size: queue.size, flags: 0u32, desc_table_addr: mem .get_host_address(queue.desc_table_address) .map_err(VhostUserError::DescriptorTableAddress)? as u64, used_ring_addr: mem .get_host_address(queue.used_ring_address) .map_err(VhostUserError::UsedAddress)? as u64, avail_ring_addr: mem .get_host_address(queue.avail_ring_address) .map_err(VhostUserError::AvailAddress)? as u64, log_addr: None, }; self.vu .set_vring_addr(*queue_index, &config_data) .map_err(VhostUserError::VhostUserSetVringAddr)?; self.vu .set_vring_base(*queue_index, queue.avail_ring_idx_get()) .map_err(VhostUserError::VhostUserSetVringBase)?; // No matter the queue, we set irq_evt for signaling the guest that buffers were // consumed. self.vu .set_vring_call( *queue_index, interrupt .notifier(VirtioInterruptType::Queue( (*queue_index).try_into().unwrap_or_else(|_| { panic!("vhost-user: invalid queue index: {}", *queue_index) }), )) .as_ref() .unwrap(), ) .map_err(VhostUserError::VhostUserSetVringCall)?; self.vu .set_vring_kick(*queue_index, queue_evt) .map_err(VhostUserError::VhostUserSetVringKick)?; self.vu .set_vring_enable(*queue_index, true) .map_err(VhostUserError::VhostUserSetVringEnable)?; } Ok(()) } } #[cfg(test)] pub(crate) mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::fs::File; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::devices::virtio::test_utils::default_interrupt; use crate::test_utils::create_tmp_socket; use crate::vstate::memory; use crate::vstate::memory::{GuestAddress, GuestRegionMmapExt}; pub(crate) fn create_mem(file: File, regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap { GuestMemoryMmap::from_regions( memory::create( regions.iter().copied(), libc::MAP_PRIVATE, Some(file), false, ) .unwrap() .into_iter() .map(|region| GuestRegionMmapExt::dram_from_mmap_region(region, 0)) .collect(), ) .unwrap() } #[test] fn test_new() { struct MockFrontend { sock: UnixStream, max_queue_num: u64, is_owner: std::cell::UnsafeCell, } impl VhostUserHandleBackend for MockFrontend { fn from_stream(sock: UnixStream, max_queue_num: u64) -> Self { Self { sock, max_queue_num, is_owner: std::cell::UnsafeCell::new(false), } } fn set_owner(&self) -> Result<(), vhost::Error> { unsafe { *self.is_owner.get() = true }; Ok(()) } } let max_queue_num = 69; let (_tmp_dir, tmp_socket_path) = create_tmp_socket(); // Creation of the VhostUserHandleImpl correctly connects to the socket, sets the maximum // number of queues and sets itself as an owner of the session. let vuh = VhostUserHandleImpl::::new(&tmp_socket_path, max_queue_num).unwrap(); assert_eq!( vuh.vu .sock .peer_addr() .unwrap() .as_pathname() .unwrap() .to_str() .unwrap(), &tmp_socket_path, ); assert_eq!(vuh.vu.max_queue_num, max_queue_num); assert!(unsafe { *vuh.vu.is_owner.get() }); } #[test] fn test_set_features() { struct MockFrontend { features: std::cell::UnsafeCell, } impl VhostUserHandleBackend for MockFrontend { fn set_features(&self, features: u64) -> Result<(), vhost::Error> { unsafe { *self.features.get() = features }; Ok(()) } } // VhostUserHandleImpl can correctly set backend features. let vuh = VhostUserHandleImpl { vu: MockFrontend { features: 0.into() }, socket_path: "".to_string(), }; vuh.set_features(0x69).unwrap(); assert_eq!(unsafe { *vuh.vu.features.get() }, 0x69); } #[test] fn test_set_protocol_features() { struct MockFrontend { protocol_features: VhostUserProtocolFeatures, hdr_flags: std::cell::UnsafeCell, } impl VhostUserHandleBackend for MockFrontend { fn set_hdr_flags(&self, flags: VhostUserHeaderFlag) { unsafe { *self.hdr_flags.get() = flags }; } fn set_protocol_features( &mut self, features: VhostUserProtocolFeatures, ) -> Result<(), vhost::Error> { self.protocol_features = features; Ok(()) } } let mut vuh = VhostUserHandleImpl { vu: MockFrontend { protocol_features: VhostUserProtocolFeatures::empty(), hdr_flags: std::cell::UnsafeCell::new(VhostUserHeaderFlag::empty()), }, socket_path: "".to_string(), }; // No protocol features are set if acked_features do not have PROTOCOL_FEATURES bit let acked_features = 0; let acked_protocol_features = VhostUserProtocolFeatures::empty(); vuh.set_protocol_features(acked_features, acked_protocol_features.bits()) .unwrap(); assert_eq!(vuh.vu.protocol_features, VhostUserProtocolFeatures::empty()); assert_eq!( unsafe { &*vuh.vu.hdr_flags.get() }.bits(), VhostUserHeaderFlag::empty().bits() ); // No protocol features are set if acked_features do not have PROTOCOL_FEATURES bit let acked_features = 0; let acked_protocol_features = VhostUserProtocolFeatures::all(); vuh.set_protocol_features(acked_features, acked_protocol_features.bits()) .unwrap(); assert_eq!(vuh.vu.protocol_features, VhostUserProtocolFeatures::empty()); assert_eq!( unsafe { &*vuh.vu.hdr_flags.get() }.bits(), VhostUserHeaderFlag::empty().bits() ); // If not REPLY_ACK present, no header is set let acked_features = VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); let mut acked_protocol_features = VhostUserProtocolFeatures::all(); acked_protocol_features.set(VhostUserProtocolFeatures::REPLY_ACK, false); vuh.set_protocol_features(acked_features, acked_protocol_features.bits()) .unwrap(); assert_eq!(vuh.vu.protocol_features, acked_protocol_features); assert_eq!( unsafe { &*vuh.vu.hdr_flags.get() }.bits(), VhostUserHeaderFlag::empty().bits() ); // If REPLY_ACK present, header is set let acked_features = VhostUserVirtioFeatures::PROTOCOL_FEATURES.bits(); let acked_protocol_features = VhostUserProtocolFeatures::all(); vuh.set_protocol_features(acked_features, acked_protocol_features.bits()) .unwrap(); assert_eq!(vuh.vu.protocol_features, acked_protocol_features); assert_eq!( unsafe { &*vuh.vu.hdr_flags.get() }.bits(), VhostUserHeaderFlag::NEED_REPLY.bits() ); } #[test] fn test_negotiate_features() { struct MockFrontend { features: u64, protocol_features: VhostUserProtocolFeatures, hdr_flags: std::cell::UnsafeCell, } impl VhostUserHandleBackend for MockFrontend { fn set_hdr_flags(&self, flags: VhostUserHeaderFlag) { unsafe { *self.hdr_flags.get() = flags }; } fn get_features(&self) -> Result { Ok(self.features) } fn get_protocol_features(&mut self) -> Result { Ok(self.protocol_features) } fn set_protocol_features( &mut self, features: VhostUserProtocolFeatures, ) -> Result<(), vhost::Error> { self.protocol_features = features; Ok(()) } } let mut vuh = VhostUserHandleImpl { vu: MockFrontend { features: 0, protocol_features: VhostUserProtocolFeatures::empty(), hdr_flags: std::cell::UnsafeCell::new(VhostUserHeaderFlag::empty()), }, socket_path: "".to_string(), }; // If nothing is available, nothing is negotiated let avail_features = 0; let avail_protocol_features = VhostUserProtocolFeatures::empty(); let (acked_features, acked_protocol_features) = vuh .negotiate_features(avail_features, avail_protocol_features) .unwrap(); assert_eq!(acked_features, avail_features); assert_eq!(acked_protocol_features, avail_protocol_features.bits()); assert_eq!(vuh.vu.protocol_features, VhostUserProtocolFeatures::empty()); assert_eq!( unsafe { &*vuh.vu.hdr_flags.get() }.bits(), VhostUserHeaderFlag::empty().bits() ); // If neither frontend avail_features nor backend avail_features contain PROTOCOL_FEATURES // bit, only features are negotiated let mut avail_features = VhostUserVirtioFeatures::all(); avail_features.set(VhostUserVirtioFeatures::PROTOCOL_FEATURES, false); // Pretend backend has same features as frontend vuh.vu.features = avail_features.bits(); let avail_protocol_features = VhostUserProtocolFeatures::empty(); let (acked_features, acked_protocol_features) = vuh .negotiate_features(avail_features.bits(), avail_protocol_features) .unwrap(); assert_eq!(acked_features, avail_features.bits()); assert_eq!(acked_protocol_features, avail_protocol_features.bits()); assert_eq!(vuh.vu.protocol_features, VhostUserProtocolFeatures::empty()); assert_eq!( unsafe { &*vuh.vu.hdr_flags.get() }.bits(), VhostUserHeaderFlag::empty().bits() ); // If PROTOCOL_FEATURES is negotiated, but REPLY_ACK is not, headers are not set let avail_features = VhostUserVirtioFeatures::all(); // Pretend backend has same features as frontend vuh.vu.features = avail_features.bits(); let mut avail_protocol_features = VhostUserProtocolFeatures::empty(); avail_protocol_features.set(VhostUserProtocolFeatures::CONFIG, true); let mut backend_protocol_features = VhostUserProtocolFeatures::empty(); backend_protocol_features.set(VhostUserProtocolFeatures::CONFIG, true); backend_protocol_features.set(VhostUserProtocolFeatures::PAGEFAULT, true); vuh.vu.protocol_features = backend_protocol_features; let (acked_features, acked_protocol_features) = vuh .negotiate_features(avail_features.bits(), avail_protocol_features) .unwrap(); assert_eq!(acked_features, avail_features.bits()); assert_eq!(acked_protocol_features, avail_protocol_features.bits()); assert_eq!(vuh.vu.protocol_features, avail_protocol_features); assert_eq!( unsafe { &*vuh.vu.hdr_flags.get() }.bits(), VhostUserHeaderFlag::empty().bits() ); // If PROTOCOL_FEATURES and REPLY_ACK are negotiated let avail_features = VhostUserVirtioFeatures::all(); // Pretend backend has same features as frontend vuh.vu.features = avail_features.bits(); let mut avail_protocol_features = VhostUserProtocolFeatures::empty(); avail_protocol_features.set(VhostUserProtocolFeatures::REPLY_ACK, true); // Pretend backend has same features as frontend vuh.vu.protocol_features = avail_protocol_features; let (acked_features, acked_protocol_features) = vuh .negotiate_features(avail_features.bits(), avail_protocol_features) .unwrap(); assert_eq!(acked_features, avail_features.bits()); assert_eq!(acked_protocol_features, avail_protocol_features.bits()); assert_eq!(vuh.vu.protocol_features, avail_protocol_features); assert_eq!( unsafe { &*vuh.vu.hdr_flags.get() }.bits(), VhostUserHeaderFlag::NEED_REPLY.bits(), ); } #[test] fn test_update_mem_table() { struct MockFrontend { regions: std::cell::UnsafeCell>, } impl VhostUserHandleBackend for MockFrontend { fn set_mem_table( &self, regions: &[VhostUserMemoryRegionInfo], ) -> Result<(), vhost::Error> { unsafe { (*self.regions.get()).extend_from_slice(regions) } Ok(()) } } let vuh = VhostUserHandleImpl { vu: MockFrontend { regions: std::cell::UnsafeCell::new(vec![]), }, socket_path: "".to_string(), }; let region_size = 0x10000; let file = TempFile::new().unwrap().into_file(); let file_size = 2 * region_size; file.set_len(file_size as u64).unwrap(); let regions = vec![ (GuestAddress(0x0), region_size), (GuestAddress(0x10000), region_size), ]; let guest_memory = create_mem(file, ®ions); vuh.update_mem_table(&guest_memory).unwrap(); // VhostUserMemoryRegionInfo should be correctly set by the VhostUserHandleImpl let expected_regions = guest_memory .iter() .map(|region| VhostUserMemoryRegionInfo { guest_phys_addr: region.start_addr().raw_value(), memory_size: region.len(), userspace_addr: region.inner.as_ptr() as u64, mmap_offset: region.file_offset().unwrap().start(), mmap_handle: region.file_offset().unwrap().file().as_raw_fd(), }) .collect::>(); for (region, expected) in (unsafe { &*vuh.vu.regions.get() }) .iter() .zip(expected_regions) { // VhostUserMemoryRegionInfo does not implement Eq. assert_eq!(region.guest_phys_addr, expected.guest_phys_addr); assert_eq!(region.memory_size, expected.memory_size); assert_eq!(region.userspace_addr, expected.userspace_addr); assert_eq!(region.mmap_offset, expected.mmap_offset); assert_eq!(region.mmap_handle, expected.mmap_handle); } } #[test] fn test_setup_backend() { #[derive(Default)] struct VringData { index: usize, size: u16, config: VringConfigData, base: u16, call: i32, kick: i32, enable: bool, } struct MockFrontend { vrings: std::cell::UnsafeCell>, } impl VhostUserHandleBackend for MockFrontend { fn set_mem_table( &self, _regions: &[VhostUserMemoryRegionInfo], ) -> Result<(), vhost::Error> { Ok(()) } fn set_vring_num(&self, queue_index: usize, num: u16) -> Result<(), vhost::Error> { unsafe { (*self.vrings.get()).push(VringData { index: queue_index, size: num, ..Default::default() }) }; Ok(()) } fn set_vring_addr( &self, queue_index: usize, config_data: &VringConfigData, ) -> Result<(), vhost::Error> { unsafe { (&mut (*self.vrings.get()))[queue_index].config = *config_data }; Ok(()) } fn set_vring_base(&self, queue_index: usize, base: u16) -> Result<(), vhost::Error> { unsafe { (&mut (*self.vrings.get()))[queue_index].base = base }; Ok(()) } fn set_vring_call(&self, queue_index: usize, fd: &EventFd) -> Result<(), vhost::Error> { unsafe { (&mut (*self.vrings.get()))[queue_index].call = fd.as_raw_fd() }; Ok(()) } fn set_vring_kick(&self, queue_index: usize, fd: &EventFd) -> Result<(), vhost::Error> { unsafe { (&mut (*self.vrings.get()))[queue_index].kick = fd.as_raw_fd() }; Ok(()) } fn set_vring_enable( &mut self, queue_index: usize, enable: bool, ) -> Result<(), vhost::Error> { unsafe { &mut *self.vrings.get() } .get_mut(queue_index) .unwrap() .enable = enable; Ok(()) } } let mut vuh = VhostUserHandleImpl { vu: MockFrontend { vrings: std::cell::UnsafeCell::new(vec![]), }, socket_path: "".to_string(), }; let region_size = 0x10000; let file = TempFile::new().unwrap().into_file(); file.set_len(region_size as u64).unwrap(); let regions = vec![(GuestAddress(0x0), region_size)]; let guest_memory = create_mem(file, ®ions); let mut queue = Queue::new(128); queue.ready = true; queue.size = queue.max_size; queue.initialize(&guest_memory).unwrap(); let event_fd = EventFd::new(0).unwrap(); let queues = [(0, &queue, &event_fd)]; let interrupt = default_interrupt(); vuh.setup_backend(&guest_memory, &queues, interrupt.clone()) .unwrap(); // VhostUserHandleImpl should correctly send memory and queues information to // the backend. let expected_config = VringData { index: 0, size: 128, config: VringConfigData { queue_max_size: 128, queue_size: 128, flags: 0, desc_table_addr: guest_memory .get_host_address(queue.desc_table_address) .unwrap() as u64, used_ring_addr: guest_memory .get_host_address(queue.used_ring_address) .unwrap() as u64, avail_ring_addr: guest_memory .get_host_address(queue.avail_ring_address) .unwrap() as u64, log_addr: None, }, base: queue.avail_ring_idx_get(), call: interrupt .notifier(VirtioInterruptType::Queue(0u16)) .as_ref() .unwrap() .as_raw_fd(), kick: event_fd.as_raw_fd(), enable: true, }; let result = unsafe { &*vuh.vu.vrings.get() }; assert_eq!(result.len(), 1); assert_eq!(result[0].index, expected_config.index); assert_eq!(result[0].size, expected_config.size); // VringConfigData does not implement Eq. assert_eq!( result[0].config.queue_max_size, expected_config.config.queue_max_size ); assert_eq!( result[0].config.queue_size, expected_config.config.queue_size ); assert_eq!(result[0].config.flags, expected_config.config.flags); assert_eq!( result[0].config.desc_table_addr, expected_config.config.desc_table_addr ); assert_eq!( result[0].config.used_ring_addr, expected_config.config.used_ring_addr ); assert_eq!( result[0].config.avail_ring_addr, expected_config.config.avail_ring_addr ); assert_eq!(result[0].config.log_addr, expected_config.config.log_addr); assert_eq!(result[0].base, expected_config.base); assert_eq!(result[0].call, expected_config.call); assert_eq!(result[0].kick, expected_config.kick); assert_eq!(result[0].enable, expected_config.enable); } } ================================================ FILE: src/vmm/src/devices/virtio/vhost_user_metrics.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the metrics system for vhost-user devices. //! //! # Metrics format //! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write(). //! //! ## JSON example with metrics: //! ```json //! { //! "vhost_user_{mod}_id0": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "init_time_us": SharedStoreMetric, //! "activate_time_us": SharedStoreMetric, //! "config_change_time_us": SharedStoreMetric, //! } //! "vhost_user_{mod}_id1": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "init_time_us": SharedStoreMetric, //! "activate_time_us": SharedStoreMetric, //! "config_change_time_us": SharedStoreMetric, //! } //! ... //! "vhost_user_{mod}_idN": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "init_time_us": SharedStoreMetric, //! "activate_time_us": SharedStoreMetric, //! "config_change_time_us": SharedStoreMetric, //! } //! } //! ``` //! Each `vhost_user` field in the example above is a serializable `VhostUserDeviceMetrics` //! structure collecting metrics such as `activate_fails`, `cfg_fails`, `init_time_us`, //! `activate_time_us` and `config_change_time_us` for the vhost_user device. //! For vhost-user block device having endpoint "/drives/drv0" the emitted metrics would be //! `vhost_user_block_drv0`. //! For vhost-user block device having endpoint "/drives/drvN" the emitted metrics would be //! `vhost_user_block_drvN`. //! Aggregate metrics for `vhost_user` if `not` emitted as it can be easily obtained in //! typical observability tools. //! //! # Design //! The main design goals of this system are: //! * To improve vhost_user device metrics by logging them at per device granularity. //! * `vhost_user` is a new device with no metrics emitted before so, backward compatibility doesn't //! come into picture like it was in the case of block/net devices. And since, metrics can be //! easily aggregated using typical observability tools, we chose not to provide aggregate //! vhost_user metrics. //! * Rely on `serde` to provide the actual serialization for writing the metrics. //! * Since all metrics start at 0, we implement the `Default` trait via derive for all of them, to //! avoid having to initialize everything by hand. //! //! * Follow the design of Block and Net device metrics and use a map of vhost_user device name and //! corresponding metrics. //! * Metrics are flushed with key `vhost_user_{module_specific_name}` and each module sets an //! appropriate `module_specific_name` in the format `{mod}_{id}`. e.g. vhost-user block device in //! this commit set this as `format!("{}_{}", "block_", config.drive_id.clone());` This way //! vhost_user_metrics stay generic while the specific vhost_user devices can have their unique //! metrics. //! //! The system implements 2 type of metrics: //! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter //! (i.e the number of times activating a device failed). These metrics are reset upon flush. //! * Shared Store Metrics (SharedStoreMetrics) - are targeted at keeping a persistent value, it is //! `not` intended to act as a counter (i.e for measure the process start up time for example). //! //! We add VhostUserDeviceMetrics entries from vhost_user_metrics::METRICS into vhost_user device //! instead of vhost_user device having individual separate VhostUserDeviceMetrics entries because //! vhost_user device is not accessible from signal handlers to flush metrics and //! vhost_user_metrics::METRICS is. use std::collections::BTreeMap; use std::sync::{Arc, RwLock}; use serde::ser::SerializeMap; use serde::{Serialize, Serializer}; use crate::logger::{SharedIncMetric, SharedStoreMetric}; /// map of vhost_user drive id and metrics /// this should be protected by a lock before accessing. #[allow(missing_debug_implementations)] pub struct VhostUserMetricsPerDevice { /// used to access per vhost_user device metrics pub metrics: BTreeMap>, } impl VhostUserMetricsPerDevice { /// Allocate `VhostUserDeviceMetrics` for vhost_user device having /// id `drive_id`. Also, allocate only if it doesn't /// exist to avoid overwriting previously allocated data. /// lock is always initialized so it is safe the unwrap /// the lock without a check. pub fn alloc(drive_id: String) -> Arc { Arc::clone( METRICS .write() .unwrap() .metrics .entry(drive_id) .or_insert_with(|| Arc::new(VhostUserDeviceMetrics::default())), ) } } /// Pool of vhost_user-related metrics per device behind a lock to /// keep things thread safe. Since the lock is initialized here /// it is safe to unwrap it without any check. static METRICS: RwLock = RwLock::new(VhostUserMetricsPerDevice { metrics: BTreeMap::new(), }); /// This function facilitates serialization of vhost_user device metrics. pub fn flush_metrics(serializer: S) -> Result { let vhost_user_metrics = METRICS.read().unwrap(); let metrics_len = vhost_user_metrics.metrics.len(); let mut seq = serializer.serialize_map(Some(metrics_len))?; for (name, metrics) in vhost_user_metrics.metrics.iter() { let devn = format!("vhost_user_{}", name); seq.serialize_entry(&devn, metrics)?; } seq.end() } /// vhost_user Device associated metrics. #[derive(Debug, Default, Serialize)] pub struct VhostUserDeviceMetrics { /// Number of times when activate failed on a vhost_user device. pub activate_fails: SharedIncMetric, /// Number of times when interacting with the space config of a vhost-user device failed. pub cfg_fails: SharedIncMetric, // Vhost-user init time in microseconds. pub init_time_us: SharedStoreMetric, // Vhost-user activate time in microseconds. pub activate_time_us: SharedStoreMetric, // Vhost-user config change time in microseconds. pub config_change_time_us: SharedStoreMetric, } #[cfg(test)] pub mod tests { use utils::time::{ClockType, get_time_us}; use super::*; use crate::logger::{IncMetric, StoreMetric}; // vhost-user metrics has both SharedIncMetrics and SharedStoreMetrics // In this test we try to test one field for each type by creating a // dummy vhost_user_block metric named `vhost_user_block_drvN`. // There is no specific reason to storing the measured time taken vs a // random number in `init_time_us`. // We add an additional test to confirm that `vhost_user_metrics::METRICS` // actually has an entry for `vhost_user_block_drvN` and compare it. // We chose serde_json to compare because that seemed easiest to compare // the entire struct format and serialization of VhostUserDeviceMetrics. #[test] fn test_vhost_user_basic_metrics() { let vhost_user_dev_name: String = String::from("vhost_user_block_drvN"); let start_time = get_time_us(ClockType::Monotonic); let vhost_user_metrics: Arc = VhostUserMetricsPerDevice::alloc(vhost_user_dev_name.clone()); let delta_us = get_time_us(ClockType::Monotonic) - start_time; vhost_user_metrics.activate_fails.inc(); assert_eq!(vhost_user_metrics.activate_fails.count(), 1); vhost_user_metrics.init_time_us.store(delta_us); assert_eq!(vhost_user_metrics.init_time_us.fetch(), delta_us); // fill another local variable with the same data and use it to compare with the METRICS // entry let vhost_user_metrics_backup: VhostUserDeviceMetrics = VhostUserDeviceMetrics::default(); vhost_user_metrics_backup.activate_fails.inc(); vhost_user_metrics_backup.init_time_us.store(delta_us); // serializing METRICS also flushes the SharedIncMetric data so we have to use _backup // variable for comparison. let vhost_user_metrics_global: String = serde_json::to_string(&METRICS.read().unwrap().metrics.get(&vhost_user_dev_name)) .unwrap(); let vhost_user_metrics_local: String = serde_json::to_string(&vhost_user_metrics_backup).unwrap(); assert_eq!(vhost_user_metrics_local, vhost_user_metrics_global); } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/csm/connection.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // use std::fmt::Debug; /// The main job of `VsockConnection` is to forward data traffic, back and forth, between a /// guest-side AF_VSOCK socket and a host-side generic `Read + Write + AsRawFd` stream, while /// also managing its internal state. /// To that end, `VsockConnection` implements: /// - `VsockChannel` for: /// - moving data from the host stream to a guest-provided RX buffer, via `recv_pkt()`; and /// - moving data from a guest-provided TX buffer to the host stream, via `send_pkt()`; and /// - updating its internal state, by absorbing control packets (anything other than /// VSOCK_OP_RW). /// - `VsockEpollListener` for getting notified about the availability of data or free buffer /// space at the host stream. /// /// Note: there is a certain asymmetry to the RX and TX data flows: /// - RX transfers do not need any data buffering, since data is read straight from the /// host stream and into the guest-provided RX buffer; /// - TX transfers may require some data to be buffered by `VsockConnection`, if the host /// peer can't keep up with reading the data that we're writing. This is because, once /// the guest driver provides some data in a virtio TX buffer, the vsock device must /// consume it. If that data can't be forwarded straight to the host stream, we'll /// have to store it in a buffer (and flush it at a later time). Vsock flow control /// ensures that our TX buffer doesn't overflow. // The code in this file is best read with a fresh memory of the vsock protocol inner-workings. // To help with that, here is a // // Short primer on the vsock protocol // ---------------------------------- // // 1. Establishing a connection A vsock connection is considered established after a two-way // handshake: // - the initiating peer sends a connection request packet (`hdr.op` == VSOCK_OP_REQUEST); // then // - the listening peer sends back a connection response packet (`hdr.op` == // VSOCK_OP_RESPONSE). // // 2. Terminating a connection When a peer wants to shut down an established connection, it // sends a VSOCK_OP_SHUTDOWN packet. Two header flags are used with VSOCK_OP_SHUTDOWN, // indicating the sender's intention: // - VSOCK_FLAGS_SHUTDOWN_RCV: the sender will receive no more data for this connection; and // - VSOCK_FLAGS_SHUTDOWN_SEND: the sender will send no more data for this connection. // After a shutdown packet, the receiving peer will have some protocol-undefined time to // flush its buffers, and then forcefully terminate the connection by sending back an RST // packet. If the shutdown-initiating peer doesn't receive this RST packet during a timeout // period, it will send one itself, thus terminating the connection. // Note: a peer can send more than one VSOCK_OP_SHUTDOWN packets. However, read/write // indications cannot be undone. E.g. once a "no-more-sending" promise was made, it // cannot be taken back. That is, `hdr.flags` will be ORed between subsequent // VSOCK_OP_SHUTDOWN packets. // // 3. Flow control Before sending a data packet (VSOCK_OP_RW), the sender must make sure that // the receiver has enough free buffer space to store that data. If this condition is not // respected, the receiving peer's behaviour is undefined. In this implementation, we // forcefully terminate the connection by sending back a VSOCK_OP_RST packet. Note: all // buffer space information is computed and stored on a per-connection basis. Peers keep // each other informed about the free buffer space they have by filling in two packet header // members with each packet they send: // - `hdr.buf_alloc`: the total buffer space the peer has allocated for receiving data; and // - `hdr.fwd_cnt`: the total number of bytes the peer has successfully flushed out of its // buffer. // One can figure out how much space its peer has available in its buffer by inspecting the // difference between how much it has sent to the peer and how much the peer has flushed out // (i.e. "forwarded", in the vsock spec terminology): // `peer_free = peer_buf_alloc - (total_bytes_sent_to_peer - peer_fwd_cnt)`. // Note: the above requires that peers constantly keep each other informed on their buffer // space situation. However, since there are no receipt acknowledgement packets // defined for the vsock protocol, packet flow can often be unidirectional (just one // peer sending data to another), so the sender's information about the receiver's // buffer space can get quickly outdated. The vsock protocol defines two solutions to // this problem: // 1. The sender can explicitly ask for a buffer space (i.e. "credit") update from its // peer, via a VSOCK_OP_CREDIT_REQUEST packet, to which it will get a // VSOCK_OP_CREDIT_UPDATE response (or any response will do, really, since credit // information must be included in any packet); // 2. The receiver can be proactive, and send VSOCK_OP_CREDIT_UPDATE packet, whenever // it thinks its peer's information is out of date. // Our implementation uses the proactive approach. use std::io::{ErrorKind, Write}; use std::num::Wrapping; use std::os::unix::io::{AsRawFd, RawFd}; use std::time::{Duration, Instant}; use log::{debug, error, info, warn}; use vm_memory::GuestMemoryError; use vm_memory::io::{ReadVolatile, WriteVolatile}; use vmm_sys_util::epoll::EventSet; use super::super::defs::uapi; use super::super::{VsockChannel, VsockEpollListener, VsockError}; use super::txbuf::TxBuf; use super::{ConnState, PendingRx, PendingRxSet, VsockCsmError, defs}; use crate::devices::virtio::vsock::metrics::METRICS; use crate::devices::virtio::vsock::packet::{VsockPacketHeader, VsockPacketRx, VsockPacketTx}; use crate::logger::IncMetric; use crate::utils::wrap_usize_to_u32; /// Trait that vsock connection backends need to implement. /// /// Used as an alias for `ReadVolatile + Write + WriteVolatile + AsRawFd` /// (sadly, trait aliases are not supported, /// ). pub trait VsockConnectionBackend: ReadVolatile + Write + WriteVolatile + AsRawFd {} /// A self-managing connection object, that handles communication between a guest-side AF_VSOCK /// socket and a host-side `ReadVolatile + Write + WriteVolatile + AsRawFd` stream. #[derive(Debug)] pub struct VsockConnection { /// The current connection state. state: ConnState, /// The local CID. Most of the time this will be the constant `2` (the vsock host CID). local_cid: u64, /// The peer (guest) CID. peer_cid: u64, /// The local (host) port. local_port: u32, /// The peer (guest) port. peer_port: u32, /// The (connected) host-side stream. stream: S, /// The TX buffer for this connection. tx_buf: TxBuf, /// Total number of bytes that have been successfully written to `self.stream`, either /// directly, or flushed from `self.tx_buf`. fwd_cnt: Wrapping, /// The amount of buffer space that the peer (guest) has allocated for this connection. peer_buf_alloc: u32, /// The total number of bytes that the peer has forwarded away. peer_fwd_cnt: Wrapping, /// The total number of bytes sent to the peer (guest vsock driver) rx_cnt: Wrapping, /// Our `self.fwd_cnt`, as last sent to the peer. This is used to provide proactive credit /// updates, and let the peer know it's OK to send more data. last_fwd_cnt_to_peer: Wrapping, /// The set of pending RX packet indications that `recv_pkt()` will use to fill in a /// packet for the peer (guest). pending_rx: PendingRxSet, /// Instant when this connection should be scheduled for immediate termination, due to some /// timeout condition having been fulfilled. expiry: Option, } impl VsockChannel for VsockConnection where S: VsockConnectionBackend + Debug, { /// Fill in a vsock packet, to be delivered to our peer (the guest driver). /// /// As per the `VsockChannel` trait, this should only be called when there is data to be /// fetched from the channel (i.e. `has_pending_rx()` is true). Otherwise, it will error /// out with `VsockError::NoData`. /// Pending RX indications are set by other mutable actions performed on the channel. For /// instance, `send_pkt()` could set an Rst indication, if called with a VSOCK_OP_SHUTDOWN /// packet, or `notify()` could set a Rw indication (a data packet can be fetched from the /// channel), if data was ready to be read from the host stream. /// /// Returns: /// - `Ok(())`: the packet has been successfully filled in and is ready for delivery; /// - `Err(VsockError::NoData)`: there was no data available with which to fill in the packet; /// - `Err(VsockError::PktBufMissing)`: the packet would've been filled in with data, but it is /// missing the data buffer. fn recv_pkt(&mut self, pkt: &mut VsockPacketRx) -> Result<(), VsockError> { // Perform some generic initialization that is the same for any packet operation (e.g. // source, destination, credit, etc). self.init_pkt_hdr(&mut pkt.hdr); METRICS.rx_packets_count.inc(); // If forceful termination is pending, there's no point in checking for anything else. // It's dead, Jim. if self.pending_rx.remove(PendingRx::Rst) { pkt.hdr.set_op(uapi::VSOCK_OP_RST); return Ok(()); } // Next up: if we're due a connection confirmation, that's all we need to know to fill // in this packet. if self.pending_rx.remove(PendingRx::Response) { self.state = ConnState::Established; pkt.hdr.set_op(uapi::VSOCK_OP_RESPONSE); return Ok(()); } // Same thing goes for locally-initiated connections that need to yield a connection // request. if self.pending_rx.remove(PendingRx::Request) { self.expiry = Some(Instant::now() + Duration::from_millis(defs::CONN_REQUEST_TIMEOUT_MS)); pkt.hdr.set_op(uapi::VSOCK_OP_REQUEST); return Ok(()); } if self.pending_rx.remove(PendingRx::Rw) { // We're due to produce a data packet, by reading the data from the host-side // Unix socket. match self.state { // A data packet is only valid for established connections, and connections for // which our peer has initiated a graceful shutdown, but can still receive data. ConnState::Established | ConnState::PeerClosed(false, _) => (), _ => { // Any other connection state is invalid at this point, and we need to kill it // with fire. pkt.hdr.set_op(uapi::VSOCK_OP_RST); return Ok(()); } } // Oh wait, before we start bringing in the big data, can our peer handle receiving so // much bytey goodness? if self.need_credit_update_from_peer() { self.last_fwd_cnt_to_peer = self.fwd_cnt; pkt.hdr.set_op(uapi::VSOCK_OP_CREDIT_REQUEST); return Ok(()); } // The maximum amount of data we can read in is limited by both the RX buffer size and // the peer available buffer space. let max_len = std::cmp::min(pkt.buf_size(), self.peer_avail_credit()); // Read data from the stream straight to the RX buffer, for maximum throughput. match pkt.read_at_offset_from(&mut self.stream, 0, max_len) { Ok(read_cnt) => { if read_cnt == 0 { // A 0-length read means the host stream was closed down. In that case, // we'll ask our peer to shut down the connection. We can neither send nor // receive any more data. self.state = ConnState::LocalClosed; self.expiry = Some( Instant::now() + Duration::from_millis(defs::CONN_SHUTDOWN_TIMEOUT_MS), ); pkt.hdr .set_op(uapi::VSOCK_OP_SHUTDOWN) .set_flag(uapi::VSOCK_FLAGS_SHUTDOWN_RCV) .set_flag(uapi::VSOCK_FLAGS_SHUTDOWN_SEND); } else { // On a successful data read, we fill in the packet with the RW op, and // length of the read data. // Safe to unwrap because read_cnt is no more than max_len, which is bounded // by self.peer_avail_credit(), a u32 internally. pkt.hdr.set_op(uapi::VSOCK_OP_RW).set_len(read_cnt); METRICS.rx_bytes_count.add(read_cnt as u64); } self.rx_cnt += Wrapping(pkt.hdr.len()); self.last_fwd_cnt_to_peer = self.fwd_cnt; return Ok(()); } Err(VsockError::GuestMemoryMmap(GuestMemoryError::IOError(err))) if err.kind() == ErrorKind::WouldBlock => { // This shouldn't actually happen (receiving EWOULDBLOCK after EPOLLIN), but // apparently it does, so we need to handle it gracefully. warn!( "vsock: unexpected EWOULDBLOCK while reading from backing stream: lp={}, \ pp={}, err={:?}", self.local_port, self.peer_port, err ); } Err(err) => { // We are not expecting any other errors when reading from the underlying // stream. If any show up, we'll immediately kill this connection. METRICS.rx_read_fails.inc(); error!( "vsock: error reading from backing stream: lp={}, pp={}, err={:?}", self.local_port, self.peer_port, err ); pkt.hdr.set_op(uapi::VSOCK_OP_RST); self.last_fwd_cnt_to_peer = self.fwd_cnt; return Ok(()); } }; } // A credit update is basically a no-op, so we should only waste a perfectly fine RX // buffer on it if we really have nothing else to say, hence we check for this RX // indication last. if self.pending_rx.remove(PendingRx::CreditUpdate) && !self.has_pending_rx() { pkt.hdr.set_op(uapi::VSOCK_OP_CREDIT_UPDATE); self.last_fwd_cnt_to_peer = self.fwd_cnt; return Ok(()); } // We've already checked for all conditions that would have produced a packet, so // if we got to here, we don't know how to yield one. Err(VsockError::NoData) } /// Deliver a guest-generated packet to this connection. /// /// This forwards the data in RW packets to the host stream, and absorbs control packets, /// using them to manage the internal connection state. /// /// Returns: /// always `Ok(())`: the packet has been consumed; fn send_pkt(&mut self, pkt: &VsockPacketTx) -> Result<(), VsockError> { // Update the peer credit information. self.peer_buf_alloc = pkt.hdr.buf_alloc(); self.peer_fwd_cnt = Wrapping(pkt.hdr.fwd_cnt()); METRICS.tx_packets_count.inc(); match self.state { // Most frequent case: this is an established connection that needs to forward some // data to the host stream. Also works for a connection that has begun shutting // down, but the peer still has some data to send. ConnState::Established | ConnState::PeerClosed(_, false) if pkt.hdr.op() == uapi::VSOCK_OP_RW => { if pkt.buf_size() == 0 { info!( "vsock: dropping empty data packet from guest (lp={}, pp={}", self.local_port, self.peer_port ); return Ok(()); } // Unwrapping here is safe, since we just checked `pkt.buf()` above. if let Err(err) = self.send_bytes(pkt) { // If we can't write to the host stream, that's an unrecoverable error, so // we'll terminate this connection. warn!( "vsock: error writing to local stream (lp={}, pp={}): {:?}", self.local_port, self.peer_port, err ); self.kill(); return Ok(()); } // We might've just consumed some data. If that's the case, we might need to // update the peer on our buffer space situation, so that it can keep sending // data packets our way. if self.peer_needs_credit_update() { self.pending_rx.insert(PendingRx::CreditUpdate); } } // Next up: receiving a response / confirmation for a host-initiated connection. // We'll move to an Established state, and pass on the good news through the host // stream. ConnState::LocalInit if pkt.hdr.op() == uapi::VSOCK_OP_RESPONSE => { self.expiry = None; self.state = ConnState::Established; } // The peer wants to shut down an established connection. If they have nothing // more to send nor receive, and we don't have to wait to drain our TX buffer, we // can schedule an RST packet (to terminate the connection on the next recv call). // Otherwise, we'll arm the kill timer. ConnState::Established if pkt.hdr.op() == uapi::VSOCK_OP_SHUTDOWN => { let recv_off = pkt.hdr.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_RCV != 0; let send_off = pkt.hdr.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_SEND != 0; self.state = ConnState::PeerClosed(recv_off, send_off); if recv_off && send_off { if self.tx_buf.is_empty() { self.pending_rx.insert(PendingRx::Rst); } else { self.expiry = Some( Instant::now() + Duration::from_millis(defs::CONN_SHUTDOWN_TIMEOUT_MS), ); } } } // The peer wants to update a shutdown request, with more receive/send indications. // The same logic as above applies. ConnState::PeerClosed(ref mut recv_off, ref mut send_off) if pkt.hdr.op() == uapi::VSOCK_OP_SHUTDOWN => { *recv_off = *recv_off || (pkt.hdr.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_RCV != 0); *send_off = *send_off || (pkt.hdr.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_SEND != 0); if *recv_off && *send_off && self.tx_buf.is_empty() { self.pending_rx.insert(PendingRx::Rst); } } // A credit update from our peer is valid only in a state which allows data // transfer towards the peer. ConnState::Established | ConnState::PeerInit | ConnState::PeerClosed(false, _) if pkt.hdr.op() == uapi::VSOCK_OP_CREDIT_UPDATE => { // Nothing to do here; we've already updated peer credit. } // A credit request from our peer is valid only in a state which allows data // transfer from the peer. We'll respond with a credit update packet. ConnState::Established | ConnState::PeerInit | ConnState::PeerClosed(_, false) if pkt.hdr.op() == uapi::VSOCK_OP_CREDIT_REQUEST => { self.pending_rx.insert(PendingRx::CreditUpdate); } _ => { debug!( "vsock: dropping invalid TX pkt for connection: state={:?}, pkt.hdr={:?}", self.state, pkt.hdr ); } }; Ok(()) } /// Check if the connection has any pending packet addressed to the peer. fn has_pending_rx(&self) -> bool { !self.pending_rx.is_empty() } } impl AsRawFd for VsockConnection where S: VsockConnectionBackend + Debug, { /// Get the file descriptor that this connection wants polled. /// /// The connection is interested in being notified about EPOLLIN / EPOLLOUT events on the /// host stream. fn as_raw_fd(&self) -> RawFd { self.stream.as_raw_fd() } } impl VsockEpollListener for VsockConnection where S: VsockConnectionBackend + Debug, { /// Get the event set that this connection is interested in. /// /// A connection will want to be notified when: /// - data is available to be read from the host stream, so that it can store an RW pending RX /// indication; and /// - data can be written to the host stream, and the TX buffer needs to be flushed. fn get_polled_evset(&self) -> EventSet { let mut evset = EventSet::empty(); if !self.tx_buf.is_empty() { // There's data waiting in the TX buffer, so we are interested in being notified // when writing to the host stream wouldn't block. evset.insert(EventSet::OUT); } // We're generally interested in being notified when data can be read from the host // stream, unless we're in a state which doesn't allow moving data from host to guest. match self.state { ConnState::Killed | ConnState::LocalClosed | ConnState::PeerClosed(true, _) => (), _ if self.need_credit_update_from_peer() => (), _ => evset.insert(EventSet::IN), } evset } /// Notify the connection about an event (or set of events) that it was interested in. fn notify(&mut self, evset: EventSet) { if evset.contains(EventSet::IN) { // Data can be read from the host stream. Setting a Rw pending indication, so that // the muxer will know to call `recv_pkt()` later. self.pending_rx.insert(PendingRx::Rw); } if evset.contains(EventSet::OUT) { // Data can be written to the host stream. Time to flush out the TX buffer. // if self.tx_buf.is_empty() { METRICS.conn_event_fails.inc(); info!("vsock: connection received unexpected EPOLLOUT event"); return; } let flushed = self .tx_buf .flush_to(&mut self.stream) .unwrap_or_else(|err| { METRICS.tx_flush_fails.inc(); warn!( "vsock: error flushing TX buf for (lp={}, pp={}): {:?}", self.local_port, self.peer_port, err ); match err { VsockCsmError::TxBufFlush(inner) if inner.kind() == ErrorKind::WouldBlock => { // This should never happen (EWOULDBLOCK after EPOLLOUT), but // it does, so let's absorb it. } _ => self.kill(), }; 0 }); self.fwd_cnt += wrap_usize_to_u32(flushed); METRICS.tx_bytes_count.add(flushed as u64); // If this connection was shutting down, but is waiting to drain the TX buffer // before forceful termination, the wait might be over. if self.state == ConnState::PeerClosed(true, true) && self.tx_buf.is_empty() { self.pending_rx.insert(PendingRx::Rst); } else if self.peer_needs_credit_update() { // If we've freed up some more buffer space, we may need to let the peer know it // can safely send more data our way. self.pending_rx.insert(PendingRx::CreditUpdate); } } } } impl VsockConnection where S: VsockConnectionBackend + Debug, { /// Create a new guest-initiated connection object. pub fn new_peer_init( stream: S, local_cid: u64, peer_cid: u64, local_port: u32, peer_port: u32, peer_buf_alloc: u32, ) -> Self { Self { local_cid, peer_cid, local_port, peer_port, stream, state: ConnState::PeerInit, tx_buf: TxBuf::new(), fwd_cnt: Wrapping(0), peer_buf_alloc, peer_fwd_cnt: Wrapping(0), rx_cnt: Wrapping(0), last_fwd_cnt_to_peer: Wrapping(0), pending_rx: PendingRxSet::from(PendingRx::Response), expiry: None, } } /// Create a new host-initiated connection object. pub fn new_local_init( stream: S, local_cid: u64, peer_cid: u64, local_port: u32, peer_port: u32, ) -> Self { Self { local_cid, peer_cid, local_port, peer_port, stream, state: ConnState::LocalInit, tx_buf: TxBuf::new(), fwd_cnt: Wrapping(0), peer_buf_alloc: 0, peer_fwd_cnt: Wrapping(0), rx_cnt: Wrapping(0), last_fwd_cnt_to_peer: Wrapping(0), pending_rx: PendingRxSet::from(PendingRx::Request), expiry: None, } } /// Check if there is an expiry (kill) timer set for this connection, sometime in the /// future. pub fn will_expire(&self) -> bool { match self.expiry { None => false, Some(t) => t > Instant::now(), } } /// Check if this connection needs to be scheduled for forceful termination, due to its /// kill timer having expired. pub fn has_expired(&self) -> bool { match self.expiry { None => false, Some(t) => t <= Instant::now(), } } /// Get the kill timer value, if one is set. pub fn expiry(&self) -> Option { self.expiry } /// Schedule the connection to be forcefully terminated ASAP (i.e. the next time the /// connection is asked to yield a packet, via `recv_pkt()`). pub fn kill(&mut self) { self.state = ConnState::Killed; self.pending_rx.insert(PendingRx::Rst); } /// Return the connections state. pub fn state(&self) -> ConnState { self.state } /// Send some raw, untracked, data straight to the underlying connected stream. /// Returns: number of bytes written, or the error describing the write failure. /// /// Warning: this will bypass the connection state machine and write directly to the /// underlying stream. No account of this write is kept, which includes bypassing /// vsock flow control. pub fn send_bytes_raw(&mut self, buf: &[u8]) -> Result { self.stream.write(buf).map_err(VsockCsmError::StreamWrite) } /// Send some raw data (a byte-slice) to the host stream. /// /// Raw data can either be sent straight to the host stream, or to our TX buffer, if the /// former fails. fn send_bytes(&mut self, pkt: &VsockPacketTx) -> Result<(), VsockError> { let len = pkt.hdr.len(); // If there is data in the TX buffer, that means we're already registered for EPOLLOUT // events on the underlying stream. Therefore, there's no point in attempting a write // at this point. `self.notify()` will get called when EPOLLOUT arrives, and it will // attempt to drain the TX buffer then. if !self.tx_buf.is_empty() { return pkt .write_from_offset_to(&mut self.tx_buf, 0, len) .map(|_| ()); } // The TX buffer is empty, so we can try to write straight to the host stream. let written = match pkt.write_from_offset_to(&mut self.stream, 0, len) { Ok(cnt) => cnt, Err(VsockError::GuestMemoryMmap(GuestMemoryError::IOError(err))) if err.kind() == ErrorKind::WouldBlock => { // Absorb any would-block errors, since we can always try again later. 0 } Err(err) => { // We don't know how to handle any other write error, so we'll send it up // the call chain. METRICS.tx_write_fails.inc(); return Err(err); } }; // Move the "forwarded bytes" counter ahead by how much we were able to send out. // Safe to unwrap because the maximum value is pkt.len(), which is a u32. self.fwd_cnt += written; METRICS.tx_bytes_count.add(written as u64); // If we couldn't write the whole slice, we'll need to push the remaining data to our // buffer. if written < len { pkt.write_from_offset_to(&mut self.tx_buf, written, len - written)?; } Ok(()) } /// Check if the credit information the peer has last received from us is outdated. fn peer_needs_credit_update(&self) -> bool { let peer_seen_free_buf = Wrapping(defs::CONN_TX_BUF_SIZE) - (self.fwd_cnt - self.last_fwd_cnt_to_peer); peer_seen_free_buf < Wrapping(defs::CONN_CREDIT_UPDATE_THRESHOLD) } /// Check if we need to ask the peer for a credit update before sending any more data its /// way. fn need_credit_update_from_peer(&self) -> bool { self.peer_avail_credit() == 0 } /// Get the maximum number of bytes that we can send to our peer, without overflowing its /// buffer. fn peer_avail_credit(&self) -> u32 { (Wrapping(self.peer_buf_alloc) - (self.rx_cnt - self.peer_fwd_cnt)).0 } /// Prepare a packet header for transmission to our peer. fn init_pkt_hdr(&self, hdr: &mut VsockPacketHeader) { hdr.set_src_cid(self.local_cid) .set_dst_cid(self.peer_cid) .set_src_port(self.local_port) .set_dst_port(self.peer_port) .set_type(uapi::VSOCK_TYPE_STREAM) .set_buf_alloc(defs::CONN_TX_BUF_SIZE) .set_fwd_cnt(self.fwd_cnt.0); } } #[cfg(test)] mod tests { use std::io::{Error as IoError, ErrorKind, Write}; use std::os::unix::io::RawFd; use std::time::{Duration, Instant}; use vm_memory::{VolatileMemoryError, VolatileSlice}; use vmm_sys_util::eventfd::EventFd; use super::super::super::defs::uapi; use super::super::defs as csm_defs; use super::*; use crate::devices::virtio::vsock::device::{RXQ_INDEX, TXQ_INDEX}; use crate::devices::virtio::vsock::test_utils; use crate::devices::virtio::vsock::test_utils::TestContext; use crate::vstate::memory::BitmapSlice; const LOCAL_CID: u64 = 2; const PEER_CID: u64 = 3; const LOCAL_PORT: u32 = 1002; const PEER_PORT: u32 = 1003; const PEER_BUF_ALLOC: u32 = 64 * 1024; #[derive(Debug)] enum StreamState { Closed, Error(ErrorKind), Ready, WouldBlock, } #[derive(Debug)] struct TestStream { fd: EventFd, read_buf: Vec, read_state: StreamState, write_buf: Vec, write_state: StreamState, } impl TestStream { fn new() -> Self { Self { fd: EventFd::new(libc::EFD_NONBLOCK).unwrap(), read_state: StreamState::Ready, write_state: StreamState::Ready, read_buf: Vec::new(), write_buf: Vec::new(), } } fn new_with_read_buf(buf: &[u8]) -> Self { let mut stream = Self::new(); stream.read_buf = buf.to_vec(); stream } } impl AsRawFd for TestStream { fn as_raw_fd(&self) -> RawFd { self.fd.as_raw_fd() } } impl ReadVolatile for TestStream { fn read_volatile( &mut self, buf: &mut VolatileSlice, ) -> Result { match self.read_state { StreamState::Closed => Ok(0), StreamState::Error(kind) => Err(vm_memory::VolatileMemoryError::IOError( IoError::new(kind, "whatevs"), )), StreamState::Ready => { if self.read_buf.is_empty() { return Err(vm_memory::VolatileMemoryError::IOError(IoError::new( ErrorKind::WouldBlock, "EAGAIN", ))); } let len = std::cmp::min(buf.len(), self.read_buf.len()); assert_ne!(len, 0); buf.copy_from(&self.read_buf[..len]); self.read_buf = self.read_buf.split_off(len); Ok(len) } StreamState::WouldBlock => Err(vm_memory::VolatileMemoryError::IOError( IoError::new(ErrorKind::WouldBlock, "EAGAIN"), )), } } } impl Write for TestStream { fn write(&mut self, data: &[u8]) -> Result { self.write_volatile(&VolatileSlice::from(data.to_vec().as_mut_slice())) .map_err(|err| match err { vm_memory::VolatileMemoryError::IOError(io_err) => io_err, _ => unreachable!(), }) } fn flush(&mut self) -> Result<(), IoError> { Ok(()) } } impl WriteVolatile for TestStream { fn write_volatile( &mut self, buf: &VolatileSlice, ) -> Result { match self.write_state { StreamState::Closed => Err(VolatileMemoryError::IOError(IoError::new( ErrorKind::BrokenPipe, "EPIPE", ))), StreamState::Error(kind) => { Err(VolatileMemoryError::IOError(IoError::new(kind, "whatevs"))) } StreamState::Ready => self.write_buf.write_volatile(buf), StreamState::WouldBlock => Err(VolatileMemoryError::IOError(IoError::new( ErrorKind::WouldBlock, "EAGAIN", ))), } } } impl VsockConnectionBackend for TestStream {} impl VsockConnection where S: VsockConnectionBackend + Debug, { /// Get the fwd_cnt value from the connection. pub(crate) fn fwd_cnt(&self) -> Wrapping { self.fwd_cnt } /// Forcefully insert a credit update flag. pub(crate) fn insert_credit_update(&mut self) { self.pending_rx.insert(PendingRx::CreditUpdate); } } fn init_pkt_hdr(hdr: &mut VsockPacketHeader, op: u16, len: u32) { hdr.set_src_cid(PEER_CID) .set_dst_cid(LOCAL_CID) .set_src_port(PEER_PORT) .set_dst_port(LOCAL_PORT) .set_type(uapi::VSOCK_TYPE_STREAM) .set_buf_alloc(PEER_BUF_ALLOC) .set_op(op) .set_len(len); } // This is the connection state machine test context: a helper struct to provide CSM testing // primitives. A single `VsockPacket` object will be enough for our testing needs. We'll be // using it for simulating both packet sends and packet receives. We need to keep the vsock // testing context alive, since `VsockPacket` is just a pointer-wrapper over some data that // resides in guest memory. The vsock test context owns the `GuestMemoryMmap` object, so we'll // make it a member here, in order to make sure that guest memory outlives our testing // packet. A single `VsockConnection` object will also suffice for our testing needs. We'll // be using a specially crafted `Read + Write + AsRawFd` object as a backing stream, so that // we can control the various error conditions that might arise. #[derive(Debug)] struct CsmTestContext { _vsock_test_ctx: TestContext, // Two views of the same in-memory packet. rx-view for writing, tx-view for reading rx_pkt: VsockPacketRx, tx_pkt: VsockPacketTx, conn: VsockConnection, } impl CsmTestContext { fn new_established() -> Self { Self::new(ConnState::Established) } fn new(conn_state: ConnState) -> Self { let vsock_test_ctx = TestContext::new(); let mut handler_ctx = vsock_test_ctx.create_event_handler_context(); let stream = TestStream::new(); let mut rx_pkt = VsockPacketRx::new().unwrap(); rx_pkt .parse( &vsock_test_ctx.mem, handler_ctx.device.queues[RXQ_INDEX].pop().unwrap().unwrap(), ) .unwrap(); let mut tx_pkt = VsockPacketTx::default(); tx_pkt .parse( &vsock_test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ) .unwrap(); let conn = match conn_state { ConnState::PeerInit => VsockConnection::::new_peer_init( stream, LOCAL_CID, PEER_CID, LOCAL_PORT, PEER_PORT, PEER_BUF_ALLOC, ), ConnState::LocalInit => VsockConnection::::new_local_init( stream, LOCAL_CID, PEER_CID, LOCAL_PORT, PEER_PORT, ), ConnState::Established => { let mut conn = VsockConnection::::new_peer_init( stream, LOCAL_CID, PEER_CID, LOCAL_PORT, PEER_PORT, PEER_BUF_ALLOC, ); assert!(conn.has_pending_rx()); conn.recv_pkt(&mut rx_pkt).unwrap(); assert_eq!(rx_pkt.hdr.op(), uapi::VSOCK_OP_RESPONSE); conn } other => panic!("invalid ctx state: {:?}", other), }; assert_eq!(conn.state, conn_state); Self { _vsock_test_ctx: vsock_test_ctx, rx_pkt, tx_pkt, conn, } } fn set_stream(&mut self, stream: TestStream) { self.conn.stream = stream; } fn set_peer_credit(&mut self, credit: u32) { assert!(credit < self.conn.peer_buf_alloc); self.conn.peer_fwd_cnt = Wrapping(0); self.conn.rx_cnt = Wrapping(self.conn.peer_buf_alloc - credit); assert_eq!(self.conn.peer_avail_credit(), credit); } fn send(&mut self) { self.conn.send_pkt(&self.tx_pkt).unwrap(); } fn recv(&mut self) { self.conn.recv_pkt(&mut self.rx_pkt).unwrap(); } fn notify_epollin(&mut self) { self.conn.notify(EventSet::IN); assert!(self.conn.has_pending_rx()); } fn notify_epollout(&mut self) { self.conn.notify(EventSet::OUT); } fn init_tx_pkt(&mut self, op: u16, len: u32) -> &mut VsockPacketTx { init_pkt_hdr(&mut self.tx_pkt.hdr, op, len); &mut self.tx_pkt } fn init_data_tx_pkt(&mut self, mut data: &[u8]) -> &VsockPacketTx { assert!(data.len() <= self.tx_pkt.buf_size() as usize); self.init_tx_pkt(uapi::VSOCK_OP_RW, u32::try_from(data.len()).unwrap()); let len = data.len(); self.rx_pkt .read_at_offset_from(&mut data, 0, len.try_into().unwrap()) .unwrap(); &self.tx_pkt } } #[test] fn test_peer_request() { let mut ctx = CsmTestContext::new(ConnState::PeerInit); assert!(ctx.conn.has_pending_rx()); ctx.recv(); // For peer-initiated requests, our connection should always yield a vsock reponse packet, // in order to establish the connection. assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RESPONSE); assert_eq!(ctx.rx_pkt.hdr.src_cid(), LOCAL_CID); assert_eq!(ctx.rx_pkt.hdr.dst_cid(), PEER_CID); assert_eq!(ctx.rx_pkt.hdr.src_port(), LOCAL_PORT); assert_eq!(ctx.rx_pkt.hdr.dst_port(), PEER_PORT); assert_eq!(ctx.rx_pkt.hdr.type_(), uapi::VSOCK_TYPE_STREAM); assert_eq!(ctx.rx_pkt.hdr.len(), 0); // After yielding the response packet, the connection should have transitioned to the // established state. assert_eq!(ctx.conn.state, ConnState::Established); } #[test] fn test_local_request() { let mut ctx = CsmTestContext::new(ConnState::LocalInit); // Host-initiated connections should first yield a connection request packet. assert!(ctx.conn.has_pending_rx()); // Before yielding the connection request packet, the timeout kill timer shouldn't be // armed. assert!(!ctx.conn.will_expire()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_REQUEST); // Since the request might time-out, the kill timer should now be armed. assert!(ctx.conn.will_expire()); assert!(!ctx.conn.has_expired()); ctx.init_tx_pkt(uapi::VSOCK_OP_RESPONSE, 0); ctx.send(); // Upon receiving a connection response, the connection should have transitioned to the // established state, and the kill timer should've been disarmed. assert_eq!(ctx.conn.state, ConnState::Established); assert!(!ctx.conn.will_expire()); } #[test] fn test_local_request_timeout() { let mut ctx = CsmTestContext::new(ConnState::LocalInit); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_REQUEST); assert!(ctx.conn.will_expire()); assert!(!ctx.conn.has_expired()); std::thread::sleep(std::time::Duration::from_millis( defs::CONN_REQUEST_TIMEOUT_MS, )); assert!(ctx.conn.has_expired()); } #[test] fn test_rx_data() { let mut ctx = CsmTestContext::new_established(); let data = &[1, 2, 3, 4]; ctx.set_stream(TestStream::new_with_read_buf(data)); assert_eq!(ctx.conn.as_raw_fd(), ctx.conn.stream.as_raw_fd()); ctx.notify_epollin(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RW); assert_eq!(ctx.rx_pkt.hdr.len() as usize, data.len()); let buf = test_utils::read_packet_data(&ctx.tx_pkt, 4); assert_eq!(&buf, data); // There's no more data in the stream, so `recv_pkt` should yield `VsockError::NoData`. // match ctx.conn.recv_pkt(&mut ctx.tx_pkt) { match ctx.conn.recv_pkt(&mut ctx.rx_pkt) { Err(VsockError::NoData) => (), other => panic!("{:?}", other), } // A recv attempt in an invalid state should yield an instant reset packet. ctx.conn.state = ConnState::LocalClosed; ctx.notify_epollin(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); } #[test] fn test_local_close() { let mut ctx = CsmTestContext::new_established(); let mut stream = TestStream::new(); stream.read_state = StreamState::Closed; ctx.set_stream(stream); ctx.notify_epollin(); ctx.recv(); // When the host-side stream is closed, we can neither send not receive any more data. // Therefore, the vsock shutdown packet that we'll deliver to the guest must contain both // the no-more-send and the no-more-recv indications. assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_SHUTDOWN); assert_ne!(ctx.rx_pkt.hdr.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_SEND, 0); assert_ne!(ctx.rx_pkt.hdr.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_RCV, 0); // The kill timer should now be armed. assert!(ctx.conn.will_expire()); assert!( ctx.conn.expiry().unwrap() < Instant::now() + Duration::from_millis(defs::CONN_SHUTDOWN_TIMEOUT_MS) ); } #[test] fn test_peer_close() { // Test that send/recv shutdown indications are handled correctly. // I.e. once set, an indication cannot be reset. { let mut ctx = CsmTestContext::new_established(); let tx_pkt = ctx.init_tx_pkt(uapi::VSOCK_OP_SHUTDOWN, 0); tx_pkt.hdr.set_flags(uapi::VSOCK_FLAGS_SHUTDOWN_RCV); ctx.send(); assert_eq!(ctx.conn.state, ConnState::PeerClosed(true, false)); // Attempting to reset the no-more-recv indication should not work // (we are only setting the no-more-send indication here). ctx.tx_pkt.hdr.set_flags(uapi::VSOCK_FLAGS_SHUTDOWN_SEND); ctx.send(); assert_eq!(ctx.conn.state, ConnState::PeerClosed(true, true)); } // Test case: // - reading data from a no-more-send connection should work; and // - writing data should have no effect. { let data = &[1, 2, 3, 4]; let mut ctx = CsmTestContext::new_established(); ctx.set_stream(TestStream::new_with_read_buf(data)); let tx_pkt = ctx.init_tx_pkt(uapi::VSOCK_OP_SHUTDOWN, 0); tx_pkt.hdr.set_flags(uapi::VSOCK_FLAGS_SHUTDOWN_SEND); ctx.send(); ctx.notify_epollin(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RW); let buf = test_utils::read_packet_data(&ctx.tx_pkt, 4); assert_eq!(&buf, data); ctx.init_data_tx_pkt(data); ctx.send(); assert_eq!(ctx.conn.stream.write_buf.len(), 0); assert!(ctx.conn.tx_buf.is_empty()); } // Test case: // - writing data to a no-more-recv connection should work; and // - attempting to read data from it should yield an RST packet. { let mut ctx = CsmTestContext::new_established(); let tx_pkt = ctx.init_tx_pkt(uapi::VSOCK_OP_SHUTDOWN, 0); tx_pkt.hdr.set_flags(uapi::VSOCK_FLAGS_SHUTDOWN_RCV); ctx.send(); let data = &[1, 2, 3, 4]; ctx.init_data_tx_pkt(data); ctx.send(); assert_eq!(ctx.conn.stream.write_buf, data.to_vec()); ctx.notify_epollin(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); } // Test case: setting both no-more-send and no-more-recv indications should have the // connection confirm termination (i.e. yield an RST). { let mut ctx = CsmTestContext::new_established(); let tx_pkt = ctx.init_tx_pkt(uapi::VSOCK_OP_SHUTDOWN, 0); tx_pkt .hdr .set_flags(uapi::VSOCK_FLAGS_SHUTDOWN_RCV | uapi::VSOCK_FLAGS_SHUTDOWN_SEND); ctx.send(); assert!(ctx.conn.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); } } #[test] fn test_local_read_error() { let mut ctx = CsmTestContext::new_established(); let mut stream = TestStream::new(); stream.read_state = StreamState::Error(ErrorKind::PermissionDenied); ctx.set_stream(stream); ctx.notify_epollin(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); } #[test] fn test_credit_request_to_peer() { let mut ctx = CsmTestContext::new_established(); ctx.set_peer_credit(0); ctx.notify_epollin(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_CREDIT_REQUEST); } #[test] fn test_credit_request_from_peer() { let mut ctx = CsmTestContext::new_established(); ctx.init_tx_pkt(uapi::VSOCK_OP_CREDIT_REQUEST, 0); ctx.send(); assert!(ctx.conn.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_CREDIT_UPDATE); assert_eq!(ctx.rx_pkt.hdr.buf_alloc(), csm_defs::CONN_TX_BUF_SIZE); assert_eq!(ctx.rx_pkt.hdr.fwd_cnt(), ctx.conn.fwd_cnt.0); } #[test] fn test_credit_update_to_peer() { let mut ctx = CsmTestContext::new_established(); // Force a stale state, where the peer hasn't been updated on our credit situation. ctx.conn.last_fwd_cnt_to_peer = Wrapping(0); // Since a credit update token is sent when the fwd_cnt value exceeds // CONN_TX_BUF_SIZE - CONN_CREDIT_UPDATE_THRESHOLD, we initialize // fwd_cnt at 6 bytes below the threshold. let initial_fwd_cnt = csm_defs::CONN_TX_BUF_SIZE - csm_defs::CONN_CREDIT_UPDATE_THRESHOLD - 6; ctx.conn.fwd_cnt = Wrapping(initial_fwd_cnt); // Use a 4-byte packet for triggering the credit update threshold. let data = &[1, 2, 3, 4]; // Check that there is no pending RX. ctx.init_data_tx_pkt(data); ctx.send(); assert!(!ctx.conn.has_pending_rx()); // Send a packet again. ctx.init_data_tx_pkt(data); ctx.send(); // The CSM should now have a credit update available for the peer. assert!(ctx.conn.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_CREDIT_UPDATE); assert_eq!( ctx.rx_pkt.hdr.fwd_cnt() as usize, initial_fwd_cnt as usize + data.len() * 2, ); assert_eq!(ctx.conn.fwd_cnt, ctx.conn.last_fwd_cnt_to_peer); } #[test] fn test_tx_buffering() { // Test case: // - when writing to the backing stream would block, TX data should end up in the TX buf // - when the CSM is notified that it can write to the backing stream, it should flush the // TX buf. { let mut ctx = CsmTestContext::new_established(); let mut stream = TestStream::new(); stream.write_state = StreamState::WouldBlock; ctx.set_stream(stream); // Send some data through the connection. The backing stream is set to reject writes, // so the data should end up in the TX buffer. let data = &[1, 2, 3, 4]; ctx.init_data_tx_pkt(data); ctx.send(); // When there's data in the TX buffer, the connection should ask to be notified when it // can write to its backing stream. assert!(ctx.conn.get_polled_evset().contains(EventSet::OUT)); assert_eq!(ctx.conn.tx_buf.len(), data.len()); // Unlock the write stream and notify the connection it can now write its bufferred // data. ctx.set_stream(TestStream::new()); ctx.conn.notify(EventSet::OUT); assert!(ctx.conn.tx_buf.is_empty()); assert_eq!(ctx.conn.stream.write_buf, data); } } #[test] fn test_stream_write_error() { // Test case: sending a data packet to a broken / closed backing stream should kill it. { let mut ctx = CsmTestContext::new_established(); let mut stream = TestStream::new(); stream.write_state = StreamState::Closed; ctx.set_stream(stream); let data = &[1, 2, 3, 4]; ctx.init_data_tx_pkt(data); ctx.send(); assert_eq!(ctx.conn.state, ConnState::Killed); assert!(ctx.conn.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); } // Test case: notifying a connection that it can flush its TX buffer to a broken stream // should kill the connection. { let mut ctx = CsmTestContext::new_established(); let mut stream = TestStream::new(); stream.write_state = StreamState::WouldBlock; ctx.set_stream(stream); // Send some data through the connection. The backing stream is set to reject writes, // so the data should end up in the TX buffer. let data = &[1, 2, 3, 4]; ctx.init_data_tx_pkt(data); ctx.send(); // Set the backing stream to error out on write. let mut stream = TestStream::new(); stream.write_state = StreamState::Closed; ctx.set_stream(stream); assert!(ctx.conn.get_polled_evset().contains(EventSet::OUT)); ctx.notify_epollout(); assert_eq!(ctx.conn.state, ConnState::Killed); } } #[test] fn test_peer_credit_misbehavior() { let mut ctx = CsmTestContext::new_established(); let mut stream = TestStream::new(); stream.write_state = StreamState::WouldBlock; ctx.set_stream(stream); // Fill up the TX buffer. let data = vec![0u8; ctx.tx_pkt.buf_size() as usize]; ctx.init_data_tx_pkt(data.as_slice()); for _i in 0..(csm_defs::CONN_TX_BUF_SIZE as usize / data.len()) { ctx.send(); } // Then try to send more data. ctx.send(); // The connection should've committed suicide. assert_eq!(ctx.conn.state, ConnState::Killed); assert!(ctx.conn.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/csm/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // /// This module implements our vsock connection state machine. The heavy lifting is done by /// `connection::VsockConnection`, while this file only defines some constants and helper structs. mod connection; mod txbuf; pub use connection::{VsockConnection, VsockConnectionBackend}; pub mod defs { /// Vsock connection TX buffer capacity. pub const CONN_TX_BUF_SIZE: u32 = 64 * 1024; /// When the guest thinks we have less than this amount of free buffer space, /// we will send them a credit update packet. pub const CONN_CREDIT_UPDATE_THRESHOLD: u32 = 4 * 1024; /// Connection request timeout, in millis. pub const CONN_REQUEST_TIMEOUT_MS: u64 = 2000; /// Connection graceful shutdown timeout, in millis. pub const CONN_SHUTDOWN_TIMEOUT_MS: u64 = 2000; } #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VsockCsmError { /// Attempted to push data to a full TX buffer TxBufFull, /// An I/O error occurred, when attempting to flush the connection TX buffer: {0} TxBufFlush(std::io::Error), /// An I/O error occurred, when attempting to write data to the host-side stream: {0} StreamWrite(std::io::Error), } /// A vsock connection state. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ConnState { /// The connection has been initiated by the host end, but is yet to be confirmed by the guest. LocalInit, /// The connection has been initiated by the guest, but we are yet to confirm it, by sending /// a response packet (VSOCK_OP_RESPONSE). PeerInit, /// The connection handshake has been performed successfully, and data can now be exchanged. Established, /// The host (AF_UNIX) socket was closed. LocalClosed, /// A VSOCK_OP_SHUTDOWN packet was received from the guest. The tuple represents the guest R/W /// indication: (will_not_recv_anymore_data, will_not_send_anymore_data). PeerClosed(bool, bool), /// The connection is scheduled to be forcefully terminated as soon as possible. Killed, } /// An RX indication, used by `VsockConnection` to schedule future `recv_pkt()` responses. /// For instance, after being notified that there is available data to be read from the host stream /// (via `notify()`), the connection will store a `PendingRx::Rw` to be later inspected by /// `recv_pkt()`. #[derive(Debug, Clone, Copy, PartialEq)] enum PendingRx { /// We need to yield a connection request packet (VSOCK_OP_REQUEST). Request = 0, /// We need to yield a connection response packet (VSOCK_OP_RESPONSE). Response = 1, /// We need to yield a forceful connection termination packet (VSOCK_OP_RST). Rst = 2, /// We need to yield a data packet (VSOCK_OP_RW), by reading from the AF_UNIX socket. Rw = 3, /// We need to yield a credit update packet (VSOCK_OP_CREDIT_UPDATE). CreditUpdate = 4, } impl PendingRx { /// Transform the enum value into a bitmask, that can be used for set operations. fn into_mask(self) -> u16 { 1u16 << (self as u16) } } /// A set of RX indications (`PendingRx` items). #[derive(Debug)] struct PendingRxSet { data: u16, } impl PendingRxSet { /// Insert an item into the set. fn insert(&mut self, it: PendingRx) { self.data |= it.into_mask(); } /// Remove an item from the set and return: /// - true, if the item was in the set; or /// - false, if the item wasn't in the set. fn remove(&mut self, it: PendingRx) -> bool { let ret = self.contains(it); self.data &= !it.into_mask(); ret } /// Check if an item is present in this set. fn contains(&self, it: PendingRx) -> bool { self.data & it.into_mask() != 0 } /// Check if the set is empty. fn is_empty(&self) -> bool { self.data == 0 } } /// Create a set containing only one item. impl From for PendingRxSet { fn from(it: PendingRx) -> Self { Self { data: it.into_mask(), } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_display_error() { assert_eq!( format!("{}", VsockCsmError::TxBufFull), "Attempted to push data to a full TX buffer" ); assert_eq!( VsockCsmError::TxBufFlush(std::io::Error::from(std::io::ErrorKind::Other)).to_string(), "An I/O error occurred, when attempting to flush the connection TX buffer: other error" ); assert_eq!( VsockCsmError::StreamWrite(std::io::Error::from(std::io::ErrorKind::Other)).to_string(), "An I/O error occurred, when attempting to write data to the host-side stream: other \ error" ); } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/csm/txbuf.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // use std::fmt::Debug; use std::io::Write; use std::num::Wrapping; use vm_memory::{VolatileMemoryError, VolatileSlice, WriteVolatile}; use super::{VsockCsmError, defs}; use crate::utils::wrap_usize_to_u32; use crate::vstate::memory::{BitmapSlice, Bytes}; /// A simple ring-buffer implementation, used by vsock connections to buffer TX (guest -> host) /// data. Memory for this buffer is allocated lazily, since buffering will only be needed when /// the host can't read fast enough. #[derive(Debug)] pub struct TxBuf { /// The actual u8 buffer - only allocated after the first push. data: Option>, /// Ring-buffer head offset - where new data is pushed to. head: Wrapping, /// Ring-buffer tail offset - where data is flushed from. tail: Wrapping, } impl TxBuf { /// Total buffer size, in bytes. const SIZE: usize = defs::CONN_TX_BUF_SIZE as usize; /// Ring-buffer constructor. pub fn new() -> Self { Self { data: None, head: Wrapping(0), tail: Wrapping(0), } } /// Get the used length of this buffer - number of bytes that have been pushed in, but not /// yet flushed out. pub fn len(&self) -> usize { (self.head - self.tail).0 as usize } /// Push a byte slice onto the ring-buffer. /// /// Either the entire source slice will be pushed to the ring-buffer, or none of it, if /// there isn't enough room, in which case `Err(Error::TxBufFull)` is returned. pub fn push(&mut self, src: &VolatileSlice) -> Result<(), VsockCsmError> { // Error out if there's no room to push the entire slice. if self.len() + src.len() > Self::SIZE { return Err(VsockCsmError::TxBufFull); } let data = self .data .get_or_insert_with(|| vec![0u8; Self::SIZE].into_boxed_slice()); // Buffer head, as an offset into the data slice. let head_ofs = self.head.0 as usize % Self::SIZE; // Pushing a slice to this buffer can take either one or two slice copies: - one copy, // if the slice fits between `head_ofs` and `Self::SIZE`; or - two copies, if the // ring-buffer head wraps around. // First copy length: we can only go from the head offset up to the total buffer size. let len = std::cmp::min(Self::SIZE - head_ofs, src.len()); let _ = src.read(&mut data[head_ofs..(head_ofs + len)], 0); // If the slice didn't fit, the buffer head will wrap around, and pushing continues // from the start of the buffer (`&self.data[0]`). if len < src.len() { let _ = src.read(&mut data[..(src.len() - len)], len); } // Either way, we've just pushed exactly `src.len()` bytes, so that's the amount by // which the (wrapping) buffer head needs to move forward. self.head += wrap_usize_to_u32(src.len()); Ok(()) } /// Flush the contents of the ring-buffer to a writable stream. /// /// Return the number of bytes that have been transferred out of the ring-buffer and into /// the writable stream. pub fn flush_to(&mut self, sink: &mut W) -> Result { // Nothing to do, if this buffer holds no data. if self.is_empty() { return Ok(0); } // Buffer tail, as an offset into the buffer data slice. let tail_ofs = self.tail.0 as usize % Self::SIZE; // Flushing the buffer can take either one or two writes: // - one write, if the tail doesn't need to wrap around to reach the head; or // - two writes, if the tail would wrap around: tail to slice end, then slice end to head. // First write length: the lesser of tail to slice end, or tail to head. let len_to_write = std::cmp::min(Self::SIZE - tail_ofs, self.len()); // It's safe to unwrap here, since we've already checked if the buffer was empty. let data = self.data.as_ref().unwrap(); // Issue the first write and absorb any `WouldBlock` error (we can just try again // later). let written = sink .write(&data[tail_ofs..(tail_ofs + len_to_write)]) .map_err(VsockCsmError::TxBufFlush)?; // Move the buffer tail ahead by the amount (of bytes) we were able to flush out. self.tail += wrap_usize_to_u32(written); // If we weren't able to flush out as much as we tried, there's no point in attempting // our second write. if written < len_to_write { return Ok(written); } // Attempt our second write. This will return immediately if a second write isn't // needed, since checking for an empty buffer is the first thing we do in this // function. // // Interesting corner case: if we've already written some data in the first pass, // and then the second write fails, we will consider the flush action a success // and return the number of bytes written in the first pass. Ok(written + self.flush_to(sink).unwrap_or(0)) } /// Check if the buffer holds any data that hasn't yet been flushed out. pub fn is_empty(&self) -> bool { self.len() == 0 } } impl WriteVolatile for TxBuf { fn write_volatile( &mut self, buf: &VolatileSlice, ) -> Result { self.push(buf) .map(|()| buf.len()) .map_err(|err| VolatileMemoryError::IOError(std::io::Error::other(err))) } } #[cfg(test)] mod tests { use std::io::{Error as IoError, ErrorKind, Write}; use super::*; #[derive(Debug)] struct TestSink { data: Vec, err: Option, capacity: usize, } impl TestSink { const DEFAULT_CAPACITY: usize = 2 * TxBuf::SIZE; fn new() -> Self { Self { data: Vec::with_capacity(Self::DEFAULT_CAPACITY), err: None, capacity: Self::DEFAULT_CAPACITY, } } } impl Write for TestSink { fn write(&mut self, src: &[u8]) -> Result { if self.err.is_some() { return Err(self.err.take().unwrap()); } let len_to_push = std::cmp::min(self.capacity - self.data.len(), src.len()); self.data.extend_from_slice(&src[..len_to_push]); Ok(len_to_push) } fn flush(&mut self) -> Result<(), IoError> { Ok(()) } } impl TestSink { fn clear(&mut self) { self.data = Vec::with_capacity(self.capacity); self.err = None; } fn set_err(&mut self, err: IoError) { self.err = Some(err); } fn set_capacity(&mut self, capacity: usize) { self.capacity = capacity; if self.data.len() > self.capacity { self.data.resize(self.capacity, 0); } } } #[test] fn test_push_nowrap() { let mut txbuf = TxBuf::new(); let mut sink = TestSink::new(); assert!(txbuf.is_empty()); assert!(txbuf.data.is_none()); txbuf .push(&VolatileSlice::from([1, 2, 3, 4].as_mut_slice())) .unwrap(); txbuf .push(&VolatileSlice::from([5, 6, 7, 8].as_mut_slice())) .unwrap(); txbuf.flush_to(&mut sink).unwrap(); assert_eq!(sink.data, [1, 2, 3, 4, 5, 6, 7, 8]); sink.clear(); txbuf .write_all_volatile(&VolatileSlice::from([10, 11, 12, 13].as_mut_slice())) .unwrap(); txbuf .write_all_volatile(&VolatileSlice::from([14, 15, 16, 17].as_mut_slice())) .unwrap(); txbuf.flush_to(&mut sink).unwrap(); assert_eq!(sink.data, [10, 11, 12, 13, 14, 15, 16, 17]); sink.clear(); } #[test] fn test_push_wrap() { let mut txbuf = TxBuf::new(); let mut sink = TestSink::new(); let mut tmp: Vec = vec![0; TxBuf::SIZE - 2]; txbuf .push(&VolatileSlice::from(tmp.as_mut_slice())) .unwrap(); txbuf.flush_to(&mut sink).unwrap(); sink.clear(); txbuf .push(&VolatileSlice::from([1, 2, 3, 4].as_mut_slice())) .unwrap(); assert_eq!(txbuf.flush_to(&mut sink).unwrap(), 4); assert_eq!(sink.data, [1, 2, 3, 4]); sink.clear(); txbuf .write_all_volatile(&VolatileSlice::from([5, 6, 7, 8].as_mut_slice())) .unwrap(); assert_eq!(txbuf.flush_to(&mut sink).unwrap(), 4); assert_eq!(sink.data, [5, 6, 7, 8]); } #[test] fn test_push_error() { let mut txbuf = TxBuf::new(); let mut tmp = Vec::with_capacity(TxBuf::SIZE); tmp.resize(TxBuf::SIZE - 1, 0); txbuf .push(&VolatileSlice::from(tmp.as_mut_slice())) .unwrap(); match txbuf.push(&VolatileSlice::from([1, 2].as_mut_slice())) { Err(VsockCsmError::TxBufFull) => (), other => panic!("Unexpected result: {:?}", other), } match txbuf.write_volatile(&VolatileSlice::from([1, 2].as_mut_slice())) { Err(err) => { assert_eq!( format!("{}", err), "Attempted to push data to a full TX buffer" ); } other => panic!("Unexpected result: {:?}", other), } } #[test] fn test_incomplete_flush() { let mut txbuf = TxBuf::new(); let mut sink = TestSink::new(); sink.set_capacity(2); txbuf .push(&VolatileSlice::from([1, 2, 3, 4].as_mut_slice())) .unwrap(); assert_eq!(txbuf.flush_to(&mut sink).unwrap(), 2); assert_eq!(txbuf.len(), 2); assert_eq!(sink.data, [1, 2]); sink.set_capacity(4); assert_eq!(txbuf.flush_to(&mut sink).unwrap(), 2); assert!(txbuf.is_empty()); assert_eq!(sink.data, [1, 2, 3, 4]); } #[test] fn test_flush_error() { const EACCESS: i32 = 13; let mut txbuf = TxBuf::new(); let mut sink = TestSink::new(); txbuf .push(&VolatileSlice::from([1, 2, 3, 4].as_mut_slice())) .unwrap(); let io_err = IoError::from_raw_os_error(EACCESS); sink.set_err(io_err); match txbuf.flush_to(&mut sink) { Err(VsockCsmError::TxBufFlush(ref err)) if err.kind() == ErrorKind::PermissionDenied => {} other => panic!("Unexpected result: {:?}", other), } } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/device.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. //! This is the `VirtioDevice` implementation for our vsock device. It handles the virtio-level //! device logic: feature negotiation, device configuration, and device activation. //! //! We aim to conform to the VirtIO v1.1 spec: //! https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html //! //! The vsock device has two input parameters: a CID to identify the device, and a //! `VsockBackend` to use for offloading vsock traffic. //! //! Upon its activation, the vsock device registers handlers for the following events/FDs: //! - an RX queue FD; //! - a TX queue FD; //! - an event queue FD; and //! - a backend FD. use std::fmt::Debug; use std::ops::Deref; use std::sync::Arc; use log::{error, info, warn}; use vmm_sys_util::eventfd::EventFd; use super::super::super::DeviceError; use super::defs::uapi; use super::packet::{VSOCK_PKT_HDR_SIZE, VsockPacketRx, VsockPacketTx}; use super::{VsockBackend, defs}; use crate::devices::virtio::ActivateError; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDevice, VirtioDeviceType}; use crate::devices::virtio::generated::virtio_config::{VIRTIO_F_IN_ORDER, VIRTIO_F_VERSION_1}; use crate::devices::virtio::queue::{InvalidAvailIdx, Queue as VirtQueue}; use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType}; use crate::devices::virtio::vsock::VsockError; use crate::devices::virtio::vsock::metrics::METRICS; use crate::impl_device_type; use crate::logger::IncMetric; use crate::utils::byte_order; use crate::vstate::memory::{Bytes, GuestMemoryMmap}; pub(crate) const RXQ_INDEX: usize = 0; pub(crate) const TXQ_INDEX: usize = 1; pub(crate) const EVQ_INDEX: usize = 2; pub(crate) const VIRTIO_VSOCK_EVENT_TRANSPORT_RESET: u32 = 0; /// The virtio features supported by our vsock device: /// - VIRTIO_F_VERSION_1: the device conforms to at least version 1.0 of the VirtIO spec. /// - VIRTIO_F_IN_ORDER: the device returns used buffers in the same order that the driver makes /// them available. pub(crate) const AVAIL_FEATURES: u64 = (1 << VIRTIO_F_VERSION_1 as u64) | (1 << VIRTIO_F_IN_ORDER as u64); /// Structure representing the vsock device. #[derive(Debug)] pub struct Vsock { cid: u64, pub(crate) queues: Vec, pub(crate) queue_events: Vec, pub(crate) backend: B, pub(crate) avail_features: u64, pub(crate) acked_features: u64, // This EventFd is the only one initially registered for a vsock device, and is used to convert // a VirtioDevice::activate call into an EventHandler read event which allows the other events // (queue and backend related) to be registered post virtio device activation. That's // mostly something we wanted to happen for the backend events, to prevent (potentially) // continuous triggers from happening before the device gets activated. pub(crate) activate_evt: EventFd, pub(crate) device_state: DeviceState, pub rx_packet: VsockPacketRx, pub tx_packet: VsockPacketTx, } // TODO: Detect / handle queue deadlock: // 1. If the driver halts RX queue processing, we'll need to notify `self.backend`, so that it can // unregister any EPOLLIN listeners, since otherwise it will keep spinning, unable to consume its // EPOLLIN events. impl Vsock where B: VsockBackend + Debug, { /// Auxiliary function for creating a new virtio-vsock device with the given VM CID, vsock /// backend and empty virtio queues. pub fn with_queues( cid: u64, backend: B, queues: Vec, ) -> Result, VsockError> { let mut queue_events = Vec::new(); for _ in 0..queues.len() { queue_events.push(EventFd::new(libc::EFD_NONBLOCK).map_err(VsockError::EventFd)?); } Ok(Vsock { cid, queues, queue_events, backend, avail_features: AVAIL_FEATURES, acked_features: 0, activate_evt: EventFd::new(libc::EFD_NONBLOCK).map_err(VsockError::EventFd)?, device_state: DeviceState::Inactive, rx_packet: VsockPacketRx::new()?, tx_packet: VsockPacketTx::default(), }) } /// Create a new virtio-vsock device with the given VM CID and vsock backend. pub fn new(cid: u64, backend: B) -> Result, VsockError> { let queues: Vec = defs::VSOCK_QUEUE_SIZES .iter() .map(|&max_size| VirtQueue::new(max_size)) .collect(); Self::with_queues(cid, backend, queues) } /// Retrieve the cid associated with this vsock device. pub fn cid(&self) -> u64 { self.cid } /// Access the backend behind the device. pub fn backend(&self) -> &B { &self.backend } /// Signal the guest driver that we've used some virtio buffers that it had previously made /// available. pub fn signal_used_queue(&self, qidx: usize) -> Result<(), DeviceError> { self.device_state .active_state() .expect("Device is not initialized") .interrupt .trigger(VirtioInterruptType::Queue(qidx.try_into().unwrap_or_else( |_| panic!("vsock: invalid queue index: {qidx}"), ))) .map_err(DeviceError::FailedSignalingIrq) } /// Signal the guest which queues are ready to be consumed pub fn signal_used_queues(&self, used_queues: &[u16]) -> Result<(), DeviceError> { self.device_state .active_state() .expect("Device is not initialized") .interrupt .trigger_queues(used_queues) .map_err(DeviceError::FailedSignalingIrq) } /// Walk the driver-provided RX queue buffers and attempt to fill them up with any data that we /// have pending. Return `true` if descriptors have been added to the used ring, and `false` /// otherwise. pub fn process_rx(&mut self) -> Result { // This is safe since we checked in the event handler that the device is activated. let mem = &self.device_state.active_state().unwrap().mem; let queue = &mut self.queues[RXQ_INDEX]; let mut have_used = false; while let Some(head) = queue.pop()? { let index = head.index; let used_len = match self.rx_packet.parse(mem, head) { Ok(()) => { if self.backend.recv_pkt(&mut self.rx_packet).is_ok() { match self.rx_packet.commit_hdr() { // This addition cannot overflow, because packet length // is previously validated against `MAX_PKT_BUF_SIZE` // bound as part of `commit_hdr()`. Ok(()) => VSOCK_PKT_HDR_SIZE + self.rx_packet.hdr.len(), Err(err) => { warn!( "vsock: Error writing packet header to guest memory: \ {:?}.Discarding the package.", err ); 0 } } } else { // We are using a consuming iterator over the virtio buffers, so, if we // can't fill in this buffer, we'll need to undo the // last iterator step. queue.undo_pop(); break; } } Err(err) => { warn!("vsock: RX queue error: {:?}. Discarding the package.", err); 0 } }; have_used = true; queue.add_used(index, used_len).unwrap_or_else(|err| { error!("Failed to add available descriptor {}: {}", index, err) }); } queue.advance_used_ring_idx(); Ok(have_used) } /// Walk the driver-provided TX queue buffers, package them up as vsock packets, and send them /// to the backend for processing. Return `true` if descriptors have been added to the used /// ring, and `false` otherwise. pub fn process_tx(&mut self) -> Result { // This is safe since we checked in the event handler that the device is activated. let mem = &self.device_state.active_state().unwrap().mem; let queue = &mut self.queues[TXQ_INDEX]; let mut have_used = false; while let Some(head) = queue.pop()? { let index = head.index; // let pkt = match VsockPacket::from_tx_virtq_head(mem, head) { match self.tx_packet.parse(mem, head) { Ok(()) => (), Err(err) => { error!("vsock: error reading TX packet: {:?}", err); have_used = true; queue.add_used(index, 0).unwrap_or_else(|err| { error!("Failed to add available descriptor {}: {}", index, err); }); continue; } }; if self.backend.send_pkt(&self.tx_packet).is_err() { queue.undo_pop(); break; } have_used = true; queue.add_used(index, 0).unwrap_or_else(|err| { error!("Failed to add available descriptor {}: {}", index, err); }); } queue.advance_used_ring_idx(); Ok(have_used) } // Send TRANSPORT_RESET_EVENT to driver. According to specs, the driver shuts down established // connections and the guest_cid configuration field is fetched again. Existing listen sockets // remain but their CID is updated to reflect the current guest_cid. pub fn send_transport_reset_event(&mut self) -> Result<(), DeviceError> { // This is safe since we checked in the caller function that the device is activated. let mem = &self.device_state.active_state().unwrap().mem; let queue = &mut self.queues[EVQ_INDEX]; let head = queue.pop()?.ok_or_else(|| { METRICS.ev_queue_event_fails.inc(); DeviceError::VsockError(VsockError::EmptyQueue) })?; mem.write_obj::(VIRTIO_VSOCK_EVENT_TRANSPORT_RESET, head.addr) .unwrap_or_else(|err| error!("Failed to write virtio vsock reset event: {:?}", err)); queue.add_used(head.index, head.len).unwrap_or_else(|err| { error!("Failed to add used descriptor {}: {}", head.index, err); }); queue.advance_used_ring_idx(); // NOTE: kick() will be called on resume and it will trigger the interrupt again. As calling // it multiple times should not cause any harm, it would be safer to call it here as well // as part of the sequence of actions that signal the reset event, prior to saving the // transport state. self.signal_used_queue(EVQ_INDEX)?; Ok(()) } } impl VirtioDevice for Vsock where B: VsockBackend + Debug + 'static, { impl_device_type!(VirtioDeviceType::Vsock); fn id(&self) -> &str { defs::VSOCK_DEV_ID } fn avail_features(&self) -> u64 { self.avail_features } fn acked_features(&self) -> u64 { self.acked_features } fn set_acked_features(&mut self, acked_features: u64) { self.acked_features = acked_features } fn queues(&self) -> &[VirtQueue] { &self.queues } fn queues_mut(&mut self) -> &mut [VirtQueue] { &mut self.queues } fn queue_events(&self) -> &[EventFd] { &self.queue_events } fn interrupt_trigger(&self) -> &dyn VirtioInterrupt { self.device_state .active_state() .expect("Device is not initialized") .interrupt .deref() } fn read_config(&self, offset: u64, data: &mut [u8]) { match offset { 0 if data.len() == 8 => byte_order::write_le_u64(data, self.cid()), 0 if data.len() == 4 => { byte_order::write_le_u32(data, (self.cid() & 0xffff_ffff) as u32) } 4 if data.len() == 4 => { byte_order::write_le_u32(data, ((self.cid() >> 32) & 0xffff_ffff) as u32) } _ => { METRICS.cfg_fails.inc(); warn!( "vsock: virtio-vsock received invalid read request of {} bytes at offset {}", data.len(), offset ) } } } fn write_config(&mut self, offset: u64, data: &[u8]) { METRICS.cfg_fails.inc(); warn!( "vsock: guest driver attempted to write device config (offset={:#x}, len={:#x})", offset, data.len() ); } fn activate( &mut self, mem: GuestMemoryMmap, interrupt: Arc, ) -> Result<(), ActivateError> { for q in self.queues.iter_mut() { q.initialize(&mem) .map_err(ActivateError::QueueMemoryError)?; } if self.queues.len() != defs::VSOCK_NUM_QUEUES { METRICS.activate_fails.inc(); return Err(ActivateError::QueueMismatch { expected: defs::VSOCK_NUM_QUEUES, got: self.queues.len(), }); } if self.activate_evt.write(1).is_err() { METRICS.activate_fails.inc(); return Err(ActivateError::EventFd); } self.device_state = DeviceState::Activated(ActiveState { mem, interrupt }); Ok(()) } fn is_activated(&self) -> bool { self.device_state.is_activated() } fn kick(&mut self) { // Vsock has complicated protocol that isn't resilient to any packet loss, // so for Vsock we don't support connection persistence through snapshot. // Any in-flight packets or events are simply lost. // Vsock is restored 'empty'. // The only reason we still `kick` it is to make guest process // `TRANSPORT_RESET_EVENT` event we sent during snapshot creation. if self.is_activated() { info!( "[{:?}:{}] signaling event queue", self.device_type(), self.id() ); self.signal_used_queue(EVQ_INDEX).unwrap(); } } fn prepare_save(&mut self) { // Send Transport event to reset connections if device // is activated. if self.is_activated() { self.send_transport_reset_event().unwrap_or_else(|err| { error!("Failed to send reset transport event: {:?}", err); }); } } } #[cfg(test)] mod tests { use super::*; use crate::devices::virtio::vsock::defs::uapi; use crate::devices::virtio::vsock::test_utils::TestContext; #[test] fn test_virtio_device() { let mut ctx = TestContext::new(); let device_features = AVAIL_FEATURES; let driver_features: u64 = AVAIL_FEATURES | 1 | (1 << 32); let device_pages = [ (device_features & 0xffff_ffff) as u32, (device_features >> 32) as u32, ]; let driver_pages = [ (driver_features & 0xffff_ffff) as u32, (driver_features >> 32) as u32, ]; assert_eq!(ctx.device.device_type(), VirtioDeviceType::Vsock); assert_eq!(ctx.device.avail_features_by_page(0), device_pages[0]); assert_eq!(ctx.device.avail_features_by_page(1), device_pages[1]); assert_eq!(ctx.device.avail_features_by_page(2), 0); // Ack device features, page 0. ctx.device.ack_features_by_page(0, driver_pages[0]); // Ack device features, page 1. ctx.device.ack_features_by_page(1, driver_pages[1]); // Ack some bogus page (i.e. 2). This should have no side effect. ctx.device.ack_features_by_page(2, 0); // Attempt to un-ack the first feature page. This should have no side effect. ctx.device.ack_features_by_page(0, !driver_pages[0]); // Check that no side effect are present, and that the acked features are exactly the same // as the device features. assert_eq!(ctx.device.acked_features, device_features & driver_features); // Test reading 32-bit chunks. let mut data = [0u8; 8]; ctx.device.read_config(0, &mut data[..4]); assert_eq!( u64::from(byte_order::read_le_u32(&data[..])), ctx.cid & 0xffff_ffff ); ctx.device.read_config(4, &mut data[4..]); assert_eq!( u64::from(byte_order::read_le_u32(&data[4..])), (ctx.cid >> 32) & 0xffff_ffff ); // Test reading 64-bit. let mut data = [0u8; 8]; ctx.device.read_config(0, &mut data); assert_eq!(byte_order::read_le_u64(&data), ctx.cid); // Check that out-of-bounds reading doesn't mutate the destination buffer. let mut data = [0u8, 1, 2, 3, 4, 5, 6, 7]; ctx.device.read_config(2, &mut data); assert_eq!(data, [0u8, 1, 2, 3, 4, 5, 6, 7]); // Just covering lines here, since the vsock device has no writable config. // A warning is, however, logged, if the guest driver attempts to write any config data. ctx.device.write_config(0, &data[..4]); // Test a bad activation. // let bad_activate = ctx.device.activate( // ctx.mem.clone(), // ); // match bad_activate { // Err(ActivateError::BadActivate) => (), // other => panic!("{:?}", other), // } // Test a correct activation. ctx.device .activate(ctx.mem.clone(), ctx.interrupt.clone()) .unwrap(); } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/event_handler.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::fmt::Debug; /// The vsock object implements the runtime logic of our vsock device: /// 1. Respond to TX queue events by wrapping virtio buffers into `VsockPacket`s, then sending /// those packets to the `VsockBackend`; /// 2. Forward backend FD event notifications to the `VsockBackend`; /// 3. Fetch incoming packets from the `VsockBackend` and place them into the virtio RX queue; /// 4. Whenever we have processed some virtio buffers (either TX or RX), let the driver know by /// raising our assigned IRQ. /// /// In a nutshell, the logic looks like this: /// - on TX queue event: /// - fetch all packets from the TX queue and send them to the backend; then /// - if the backend has queued up any incoming packets, fetch them into any available RX /// buffers. /// - on RX queue event: /// - fetch any incoming packets, queued up by the backend, into newly available RX buffers. /// - on backend event: /// - forward the event to the backend; then /// - again, attempt to fetch any incoming packets queued by the backend into virtio RX /// buffers. use event_manager::{EventOps, Events, MutEventSubscriber}; use log::{error, warn}; use vmm_sys_util::epoll::EventSet; use super::VsockBackend; use super::device::{EVQ_INDEX, RXQ_INDEX, TXQ_INDEX, Vsock}; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::queue::InvalidAvailIdx; use crate::devices::virtio::vsock::defs::VSOCK_NUM_QUEUES; use crate::devices::virtio::vsock::metrics::METRICS; use crate::logger::IncMetric; impl Vsock where B: Debug + VsockBackend + 'static, { const PROCESS_ACTIVATE: u32 = 0; const PROCESS_RXQ: u32 = 1; const PROCESS_TXQ: u32 = 2; const PROCESS_EVQ: u32 = 3; const PROCESS_NOTIFY_BACKEND: u32 = 4; pub fn handle_rxq_event(&mut self, evset: EventSet) -> Vec { let mut used_queues = Vec::new(); if evset != EventSet::IN { warn!("vsock: rxq unexpected event {:?}", evset); METRICS.rx_queue_event_fails.inc(); return used_queues; } if let Err(err) = self.queue_events[RXQ_INDEX].read() { error!("Failed to get vsock rx queue event: {:?}", err); METRICS.rx_queue_event_fails.inc(); } else if self.backend.has_pending_rx() { if self.process_rx().unwrap() { used_queues.push(RXQ_INDEX.try_into().unwrap()); } METRICS.rx_queue_event_count.inc(); } used_queues } pub fn handle_txq_event(&mut self, evset: EventSet) -> Vec { let mut used_queues = Vec::new(); if evset != EventSet::IN { warn!("vsock: txq unexpected event {:?}", evset); METRICS.tx_queue_event_fails.inc(); return used_queues; } if let Err(err) = self.queue_events[TXQ_INDEX].read() { error!("Failed to get vsock tx queue event: {:?}", err); METRICS.tx_queue_event_fails.inc(); } else { if self.process_tx().unwrap() { used_queues.push(TXQ_INDEX.try_into().unwrap()); } METRICS.tx_queue_event_count.inc(); // The backend may have queued up responses to the packets we sent during // TX queue processing. If that happened, we need to fetch those responses // and place them into RX buffers. if self.backend.has_pending_rx() && self.process_rx().unwrap() { used_queues.push(RXQ_INDEX.try_into().unwrap()); } } used_queues } pub fn handle_evq_event(&mut self, evset: EventSet) { if evset != EventSet::IN { warn!("vsock: evq unexpected event {:?}", evset); METRICS.ev_queue_event_fails.inc(); return; } if let Err(err) = self.queue_events[EVQ_INDEX].read() { error!("Failed to consume vsock evq event: {:?}", err); METRICS.ev_queue_event_fails.inc(); } } /// Notify backend of new events. pub fn notify_backend(&mut self, evset: EventSet) -> Result, InvalidAvailIdx> { let mut used_queues = Vec::new(); self.backend.notify(evset); // After the backend has been kicked, it might've freed up some resources, so we // can attempt to send it more data to process. // In particular, if `self.backend.send_pkt()` halted the TX queue processing (by // returning an error) at some point in the past, now is the time to try walking the // TX queue again. if self.process_tx()? { used_queues.push(TXQ_INDEX.try_into().unwrap()); } if self.backend.has_pending_rx() && self.process_rx()? { used_queues.push(RXQ_INDEX.try_into().unwrap()) } Ok(used_queues) } fn register_runtime_events(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.queue_events[RXQ_INDEX], Self::PROCESS_RXQ, EventSet::IN, )) { error!("Failed to register rx queue event: {}", err); } if let Err(err) = ops.add(Events::with_data( &self.queue_events[TXQ_INDEX], Self::PROCESS_TXQ, EventSet::IN, )) { error!("Failed to register tx queue event: {}", err); } if let Err(err) = ops.add(Events::with_data( &self.queue_events[EVQ_INDEX], Self::PROCESS_EVQ, EventSet::IN, )) { error!("Failed to register ev queue event: {}", err); } if let Err(err) = ops.add(Events::with_data( &self.backend, Self::PROCESS_NOTIFY_BACKEND, self.backend.get_polled_evset(), )) { error!("Failed to register vsock backend event: {}", err); } } fn register_activate_event(&self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::with_data( &self.activate_evt, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("Failed to register activate event: {}", err); } } fn handle_activate_event(&self, ops: &mut EventOps) { if let Err(err) = self.activate_evt.read() { error!("Failed to consume net activate event: {:?}", err); } self.register_runtime_events(ops); if let Err(err) = ops.remove(Events::with_data( &self.activate_evt, Self::PROCESS_ACTIVATE, EventSet::IN, )) { error!("Failed to un-register activate event: {}", err); } } } impl MutEventSubscriber for Vsock where B: Debug + VsockBackend + 'static, { fn process(&mut self, event: Events, ops: &mut EventOps) { let source = event.data(); let evset = event.event_set(); if self.is_activated() { let used_queues = match source { Self::PROCESS_ACTIVATE => { self.handle_activate_event(ops); Vec::new() } Self::PROCESS_RXQ => self.handle_rxq_event(evset), Self::PROCESS_TXQ => self.handle_txq_event(evset), Self::PROCESS_EVQ => { self.handle_evq_event(evset); Vec::new() } Self::PROCESS_NOTIFY_BACKEND => self.notify_backend(evset).unwrap(), _ => { warn!("Unexpected vsock event received: {:?}", source); Vec::new() } }; self.signal_used_queues(&used_queues) .expect("vsock: Could not trigger device interrupt"); } else { warn!( "Vsock: The device is not yet activated. Spurious event received: {:?}", source ); } } fn init(&mut self, ops: &mut EventOps) { // This function can be called during different points in the device lifetime: // - shortly after device creation, // - on device activation (is-activated already true at this point), // - on device restore from snapshot. if self.is_activated() { self.register_runtime_events(ops); } else { self.register_activate_event(ops); } } } #[cfg(test)] mod tests { use std::sync::{Arc, Mutex}; use event_manager::{EventManager, SubscriberOps}; use super::super::*; use super::*; use crate::devices::virtio::vsock::test_utils::{EventHandlerContext, TestContext}; #[test] fn test_txq_event() { // Test case: // - the driver has something to send (there's data in the TX queue); and // - the backend has no pending RX data. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); ctx.device.backend.set_pending_rx(false); ctx.signal_txq_event(); // The available TX descriptor should have been used. assert_eq!(ctx.guest_txvq.used.idx.get(), 1); // The available RX descriptor should be untouched. assert_eq!(ctx.guest_rxvq.used.idx.get(), 0); } // Test case: // - the driver has something to send (there's data in the TX queue); and // - the backend also has some pending RX data. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); ctx.device.backend.set_pending_rx(true); ctx.signal_txq_event(); // Both available RX and TX descriptors should have been used. assert_eq!(ctx.guest_txvq.used.idx.get(), 1); assert_eq!(ctx.guest_rxvq.used.idx.get(), 1); } // Test case: // - the driver has something to send (there's data in the TX queue); and // - the backend errors out and cannot process the TX queue. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); ctx.device.backend.set_pending_rx(false); ctx.device.backend.set_tx_err(Some(VsockError::NoData)); ctx.signal_txq_event(); // Both RX and TX queues should be untouched. assert_eq!(ctx.guest_txvq.used.idx.get(), 0); assert_eq!(ctx.guest_rxvq.used.idx.get(), 0); } // Test case: // - the driver supplied a malformed TX buffer. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); // Invalidate the descriptor chain, by setting its length to 0. ctx.guest_txvq.dtable[0].len.set(0); ctx.guest_txvq.dtable[1].len.set(0); ctx.signal_txq_event(); // The available descriptor should have been consumed, but no packet should have // reached the backend. assert_eq!(ctx.guest_txvq.used.idx.get(), 1); assert_eq!(ctx.device.backend.tx_ok_cnt, 0); } // Test case: spurious TXQ_EVENT. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); let metric_before = METRICS.tx_queue_event_fails.count(); ctx.device.handle_txq_event(EventSet::IN); assert_eq!(metric_before + 1, METRICS.tx_queue_event_fails.count()); } } #[test] fn test_rxq_event() { // Test case: // - there is pending RX data in the backend; and // - the driver makes RX buffers available; and // - the backend successfully places its RX data into the queue. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); ctx.device.backend.set_pending_rx(true); ctx.device.backend.set_rx_err(Some(VsockError::NoData)); ctx.signal_rxq_event(); // The available RX buffer should've been left untouched. assert_eq!(ctx.guest_rxvq.used.idx.get(), 0); } // Test case: // - there is pending RX data in the backend; and // - the driver makes RX buffers available; and // - the backend errors out, when attempting to receive data. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); ctx.device.backend.set_pending_rx(true); ctx.signal_rxq_event(); // The available RX buffer should have been used. assert_eq!(ctx.guest_rxvq.used.idx.get(), 1); } // Test case: the driver provided a malformed RX descriptor chain. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); // Invalidate the descriptor chain, by setting its length to 0. ctx.guest_rxvq.dtable[0].len.set(0); ctx.guest_rxvq.dtable[1].len.set(0); // The chain should've been processed, without employing the backend. assert!(ctx.device.process_rx().unwrap()); assert_eq!(ctx.guest_rxvq.used.idx.get(), 1); assert_eq!(ctx.device.backend.rx_ok_cnt, 0); } // Test case: spurious RXQ_EVENT. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); ctx.device.backend.set_pending_rx(false); let metric_before = METRICS.rx_queue_event_fails.count(); ctx.device.handle_rxq_event(EventSet::IN); assert_eq!(metric_before + 1, METRICS.rx_queue_event_fails.count()); } } #[test] fn test_evq_event() { // Test case: spurious EVQ_EVENT. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.device.backend.set_pending_rx(false); let metric_before = METRICS.ev_queue_event_fails.count(); ctx.device.handle_evq_event(EventSet::IN); assert_eq!(metric_before + 1, METRICS.ev_queue_event_fails.count()); } } #[test] fn test_backend_event() { // Test case: // - a backend event is received; and // - the backend has pending RX data. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); ctx.device.backend.set_pending_rx(true); ctx.device.notify_backend(EventSet::IN).unwrap(); // The backend should've received this event. assert_eq!(ctx.device.backend.evset, Some(EventSet::IN)); // TX queue processing should've been triggered. assert_eq!(ctx.guest_txvq.used.idx.get(), 1); // RX queue processing should've been triggered. assert_eq!(ctx.guest_rxvq.used.idx.get(), 1); } // Test case: // - a backend event is received; and // - the backend doesn't have any pending RX data. { let test_ctx = TestContext::new(); let mut ctx = test_ctx.create_event_handler_context(); ctx.mock_activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()); ctx.device.backend.set_pending_rx(false); ctx.device.notify_backend(EventSet::IN).unwrap(); // The backend should've received this event. assert_eq!(ctx.device.backend.evset, Some(EventSet::IN)); // TX queue processing should've been triggered. assert_eq!(ctx.guest_txvq.used.idx.get(), 1); // The RX queue should've been left untouched. assert_eq!(ctx.guest_rxvq.used.idx.get(), 0); } } // Creates an epoll handler context and attempts to assemble a VsockPkt from the descriptor // chains available on the rx and tx virtqueues, but first it will set the addr and len // of the descriptor specified by desc_idx to the provided values. We are only using this // function for testing error cases, so the asserts always expect is_err() to be true. When // desc_idx = 0 we are altering the header (first descriptor in the chain), and when // desc_idx = 1 we are altering the packet buffer. #[cfg(target_arch = "x86_64")] fn vsock_bof_helper(test_ctx: &mut TestContext, desc_idx: usize, addr: u64, len: u32) { use crate::vstate::memory::{Bytes, GuestAddress}; assert!(desc_idx <= 1); { let mut ctx = test_ctx.create_event_handler_context(); ctx.guest_rxvq.dtable[desc_idx].addr.set(addr); ctx.guest_rxvq.dtable[desc_idx].len.set(len); // If the descriptor chain is already declared invalid, there's no reason to assemble // a packet. if let Some(rx_desc) = ctx.device.queues[RXQ_INDEX].pop().unwrap() { VsockPacketRx::new() .unwrap() .parse(&test_ctx.mem, rx_desc) .unwrap_err(); } } { let mut ctx = test_ctx.create_event_handler_context(); // When modifying the buffer descriptor, make sure the len field is altered in the // vsock packet header descriptor as well. if desc_idx == 1 { // The vsock packet len field has offset 24 in the header. let hdr_len_addr = GuestAddress(ctx.guest_txvq.dtable[0].addr.get() + 24); test_ctx .mem .write_obj(len.to_le_bytes(), hdr_len_addr) .unwrap(); } ctx.guest_txvq.dtable[desc_idx].addr.set(addr); ctx.guest_txvq.dtable[desc_idx].len.set(len); if let Some(tx_desc) = ctx.device.queues[TXQ_INDEX].pop().unwrap() { VsockPacketTx::default() .parse(&test_ctx.mem, tx_desc) .unwrap_err(); } } } #[test] #[cfg(target_arch = "x86_64")] #[allow(clippy::cast_possible_truncation)] /* casting of constants we know fit into u32 */ fn test_vsock_bof() { use crate::arch::x86_64::layout::FIRST_ADDR_PAST_32BITS; use crate::arch::{MMIO32_MEM_SIZE, MMIO32_MEM_START}; use crate::devices::virtio::vsock::packet::VSOCK_PKT_HDR_SIZE; use crate::test_utils::multi_region_mem; use crate::utils::mib_to_bytes; use crate::vstate::memory::GuestAddress; const MIB: usize = mib_to_bytes(1); let mut test_ctx = TestContext::new(); test_ctx.mem = multi_region_mem(&[ (GuestAddress(0), 8 * MIB), (GuestAddress(MMIO32_MEM_START - MIB as u64), MIB), (GuestAddress(FIRST_ADDR_PAST_32BITS), MIB), ]); // The default configured descriptor chains are valid. { let mut ctx = test_ctx.create_event_handler_context(); let rx_desc = ctx.device.queues[RXQ_INDEX].pop().unwrap().unwrap(); VsockPacketRx::new() .unwrap() .parse(&test_ctx.mem, rx_desc) .unwrap(); } { let mut ctx = test_ctx.create_event_handler_context(); let tx_desc = ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(); VsockPacketTx::default() .parse(&test_ctx.mem, tx_desc) .unwrap(); } // Let's check what happens when the header descriptor is right before the gap. vsock_bof_helper(&mut test_ctx, 0, MMIO32_MEM_START - 1, VSOCK_PKT_HDR_SIZE); // Let's check what happens when the buffer descriptor crosses into the gap, but does // not go past its right edge. vsock_bof_helper( &mut test_ctx, 1, MMIO32_MEM_START - 4, MMIO32_MEM_SIZE as u32 + 4, ); // Let's modify the buffer descriptor addr and len such that it crosses over the MMIO gap, // and check we cannot assemble the VsockPkts. vsock_bof_helper( &mut test_ctx, 1, MMIO32_MEM_START - 4, MMIO32_MEM_SIZE as u32 + 100, ); } #[test] fn test_event_handler() { let mut event_manager = EventManager::new().unwrap(); let test_ctx = TestContext::new(); let EventHandlerContext { device, guest_rxvq, guest_txvq, .. } = test_ctx.create_event_handler_context(); let vsock = Arc::new(Mutex::new(device)); let _id = event_manager.add_subscriber(vsock.clone()); // Push a queue event // - the driver has something to send (there's data in the TX queue); and // - the backend also has some pending RX data. { let mut device = vsock.lock().unwrap(); device.backend.set_pending_rx(true); device.queue_events[TXQ_INDEX].write(1).unwrap(); } // EventManager should report no events since vsock has only registered // its activation event so far (even though there is also a queue event pending). let ev_count = event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 0); // Manually force a queue event and check it's ignored pre-activation. { let device = vsock.lock().unwrap(); // Artificially push event. device.queue_events[TXQ_INDEX].write(1).unwrap(); let ev_count = event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 0); // Both available RX and TX descriptors should be untouched. assert_eq!(guest_rxvq.used.idx.get(), 0); assert_eq!(guest_txvq.used.idx.get(), 0); } // Now activate the device. vsock .lock() .unwrap() .activate(test_ctx.mem.clone(), test_ctx.interrupt.clone()) .unwrap(); // Process the activate event. let ev_count = event_manager.run_with_timeout(50).unwrap(); assert_eq!(ev_count, 1); // Handle the previously pushed queue event through EventManager. { let ev_count = event_manager .run_with_timeout(100) .expect("Metrics event timeout or error."); assert_eq!(ev_count, 1); // Both available RX and TX descriptors should have been used. assert_eq!(guest_rxvq.used.idx.get(), 1); assert_eq!(guest_txvq.used.idx.get(), 1); } } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/metrics.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the metrics system for vsock devices. //! //! # Metrics format //! The metrics are flushed in JSON when requested by vmm::logger::metrics::METRICS.write(). //! //! ## JSON example with metrics: //! ```json //! "vsock": { //! "activate_fails": "SharedIncMetric", //! "cfg_fails": "SharedIncMetric", //! "rx_queue_event_fails": "SharedIncMetric", //! "tx_queue_event_fails": "SharedIncMetric", //! "ev_queue_event_fails": "SharedIncMetric", //! "muxer_event_fails": "SharedIncMetric", //! ... //! } //! } //! ``` //! Each `vsock` field in the example above is a serializable `VsockDeviceMetrics` structure //! collecting metrics such as `activate_fails`, `cfg_fails`, etc. for the Vsock device. //! Since vsock doesn't support multiple devices, there is no per device metrics and //! `vsock` represents the aggregate metrics for all vsock connections. //! //! # Design //! The main design goals of this system are: //! * Have a consistent approach of keeping device related metrics in the individual devices //! modules. //! * To decouple vsock device metrics from logger module by moving VsockDeviceMetrics out of //! FirecrackerDeviceMetrics. //! * Rely on `serde` to provide the actual serialization for writing the metrics. //! //! The system implements 1 type of metrics: //! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter //! (i.e the number of times an API request failed). These metrics are reset upon flush. use serde::ser::SerializeMap; use serde::{Serialize, Serializer}; use crate::logger::SharedIncMetric; /// Stores aggregate metrics of all Vsock connections/actions pub(super) static METRICS: VsockDeviceMetrics = VsockDeviceMetrics::new(); /// Called by METRICS.flush(), this function facilitates serialization of vsock device metrics. pub fn flush_metrics(serializer: S) -> Result { let mut seq = serializer.serialize_map(Some(1))?; seq.serialize_entry("vsock", &METRICS)?; seq.end() } /// Vsock-related metrics. #[derive(Debug, Serialize)] pub(super) struct VsockDeviceMetrics { /// Number of times when activate failed on a vsock device. pub activate_fails: SharedIncMetric, /// Number of times when interacting with the space config of a vsock device failed. pub cfg_fails: SharedIncMetric, /// Number of times when handling RX queue events on a vsock device failed. pub rx_queue_event_fails: SharedIncMetric, /// Number of times when handling TX queue events on a vsock device failed. pub tx_queue_event_fails: SharedIncMetric, /// Number of times when handling event queue events on a vsock device failed. pub ev_queue_event_fails: SharedIncMetric, /// Number of times when handling muxer events on a vsock device failed. pub muxer_event_fails: SharedIncMetric, /// Number of times when handling connection events on a vsock device failed. pub conn_event_fails: SharedIncMetric, /// Number of events associated with the receiving queue. pub rx_queue_event_count: SharedIncMetric, /// Number of events associated with the transmitting queue. pub tx_queue_event_count: SharedIncMetric, /// Number of bytes received. pub rx_bytes_count: SharedIncMetric, /// Number of transmitted bytes. pub tx_bytes_count: SharedIncMetric, /// Number of packets received. pub rx_packets_count: SharedIncMetric, /// Number of transmitted packets. pub tx_packets_count: SharedIncMetric, /// Number of added connections. pub conns_added: SharedIncMetric, /// Number of killed connections. pub conns_killed: SharedIncMetric, /// Number of removed connections. pub conns_removed: SharedIncMetric, /// How many times the killq has been resynced. pub killq_resync: SharedIncMetric, /// How many flush fails have been seen. pub tx_flush_fails: SharedIncMetric, /// How many write fails have been seen. pub tx_write_fails: SharedIncMetric, /// Number of times read() has failed. pub rx_read_fails: SharedIncMetric, } impl VsockDeviceMetrics { // We need this because vsock::metrics::METRICS does not accept // VsockDeviceMetrics::default() const fn new() -> Self { Self { activate_fails: SharedIncMetric::new(), cfg_fails: SharedIncMetric::new(), rx_queue_event_fails: SharedIncMetric::new(), tx_queue_event_fails: SharedIncMetric::new(), ev_queue_event_fails: SharedIncMetric::new(), muxer_event_fails: SharedIncMetric::new(), conn_event_fails: SharedIncMetric::new(), rx_queue_event_count: SharedIncMetric::new(), tx_queue_event_count: SharedIncMetric::new(), rx_bytes_count: SharedIncMetric::new(), tx_bytes_count: SharedIncMetric::new(), rx_packets_count: SharedIncMetric::new(), tx_packets_count: SharedIncMetric::new(), conns_added: SharedIncMetric::new(), conns_killed: SharedIncMetric::new(), conns_removed: SharedIncMetric::new(), killq_resync: SharedIncMetric::new(), tx_flush_fails: SharedIncMetric::new(), tx_write_fails: SharedIncMetric::new(), rx_read_fails: SharedIncMetric::new(), } } } #[cfg(test)] pub mod tests { use super::*; use crate::logger::IncMetric; #[test] fn test_vsock_dev_metrics() { let vsock_metrics: VsockDeviceMetrics = VsockDeviceMetrics::new(); let vsock_metrics_local: String = serde_json::to_string(&vsock_metrics).unwrap(); // the 1st serialize flushes the metrics and resets values to 0 so that // we can compare the values with local metrics. serde_json::to_string(&METRICS).unwrap(); let vsock_metrics_global: String = serde_json::to_string(&METRICS).unwrap(); assert_eq!(vsock_metrics_local, vsock_metrics_global); vsock_metrics.conns_added.inc(); assert_eq!(vsock_metrics.conns_added.count(), 1); } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. //! The Firecracker vsock device aims to provide full virtio-vsock support to //! software running inside the guest VM, while bypassing vhost kernel code on the //! host. To that end, Firecracker implements the virtio-vsock device model, and //! mediates communication between AF_UNIX sockets (on the host end) and AF_VSOCK //! sockets (on the guest end). mod csm; mod device; mod event_handler; pub mod metrics; mod packet; pub mod persist; pub mod test_utils; mod unix; use std::os::unix::io::AsRawFd; use vm_memory::GuestMemoryError; use vmm_sys_util::epoll::EventSet; pub use self::defs::VSOCK_DEV_ID; pub use self::device::Vsock; use self::packet::{VsockPacketRx, VsockPacketTx}; pub use self::unix::{VsockUnixBackend, VsockUnixBackendError}; use super::iov_deque::IovDequeError; use crate::devices::virtio::iovec::IoVecError; use crate::devices::virtio::persist::PersistError as VirtioStateError; mod defs { use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; /// Device ID used in MMIO device identification. /// Because Vsock is unique per-vm, this ID can be hardcoded. pub const VSOCK_DEV_ID: &str = "vsock"; /// Number of virtio queues. pub const VSOCK_NUM_QUEUES: usize = 3; /// Virtio queue sizes, in number of descriptor chain heads. /// There are 3 queues for a virtio device (in this order): RX, TX, Event pub const VSOCK_QUEUE_SIZES: [u16; VSOCK_NUM_QUEUES] = [ FIRECRACKER_MAX_QUEUE_SIZE, FIRECRACKER_MAX_QUEUE_SIZE, FIRECRACKER_MAX_QUEUE_SIZE, ]; /// Max vsock packet data/buffer size. pub const MAX_PKT_BUF_SIZE: u32 = 64 * 1024; pub mod uapi { /// Vsock packet operation IDs. /// Defined in `/include/uapi/linux/virtio_vsock.h`. /// /// Connection request. pub const VSOCK_OP_REQUEST: u16 = 1; /// Connection response. pub const VSOCK_OP_RESPONSE: u16 = 2; /// Connection reset. pub const VSOCK_OP_RST: u16 = 3; /// Connection clean shutdown. pub const VSOCK_OP_SHUTDOWN: u16 = 4; /// Connection data (read/write). pub const VSOCK_OP_RW: u16 = 5; /// Flow control credit update. pub const VSOCK_OP_CREDIT_UPDATE: u16 = 6; /// Flow control credit update request. pub const VSOCK_OP_CREDIT_REQUEST: u16 = 7; /// Vsock packet flags. /// Defined in `/include/uapi/linux/virtio_vsock.h`. /// /// Valid with a VSOCK_OP_SHUTDOWN packet: the packet sender will receive no more data. pub const VSOCK_FLAGS_SHUTDOWN_RCV: u32 = 1; /// Valid with a VSOCK_OP_SHUTDOWN packet: the packet sender will send no more data. pub const VSOCK_FLAGS_SHUTDOWN_SEND: u32 = 2; /// Vsock packet type. /// Defined in `/include/uapi/linux/virtio_vsock.h`. /// /// Stream / connection-oriented packet (the only currently valid type). pub const VSOCK_TYPE_STREAM: u16 = 1; pub const VSOCK_HOST_CID: u64 = 2; } } /// Vsock device related errors. #[derive(Debug, thiserror::Error, displaydoc::Display)] #[rustfmt::skip] pub enum VsockError { /** The total length of the descriptor chain ({0}) is too short to hold a packet of length {1} + header */ DescChainTooShortForPacket(u32, u32), /// Empty queue EmptyQueue, /// EventFd error: {0} EventFd(std::io::Error), /// Chained GuestMemoryMmap error: {0} GuestMemoryMmap(GuestMemoryError), /// Bounds check failed on guest memory pointer. GuestMemoryBounds, /** The total length of the descriptor chain ({0}) is less than the number of bytes required\ to hold a vsock packet header.*/ DescChainTooShortForHeader(usize), /// The descriptor chain length was greater than the max ([u32::MAX]) DescChainOverflow, /// The vsock header `len` field holds an invalid value: {0} InvalidPktLen(u32), /// A data fetch was attempted when no data was available. NoData, /// A data buffer was expected for the provided packet, but it is missing. PktBufMissing, /// Encountered an unexpected write-only virtio descriptor. UnreadableDescriptor, /// Encountered an unexpected read-only virtio descriptor. UnwritableDescriptor, /// Invalid virtio configuration: {0} VirtioState(VirtioStateError), /// Vsock uds backend error: {0} VsockUdsBackend(VsockUnixBackendError), /// Underlying IovDeque error: {0} IovDeque(IovDequeError), /// Tried to push to full IovDeque. IovDequeOverflow, } impl From for VsockError { fn from(value: IoVecError) -> Self { match value { IoVecError::WriteOnlyDescriptor => VsockError::UnreadableDescriptor, IoVecError::ReadOnlyDescriptor => VsockError::UnwritableDescriptor, IoVecError::GuestMemory(err) => VsockError::GuestMemoryMmap(err), IoVecError::OverflowedDescriptor => VsockError::DescChainOverflow, IoVecError::IovDeque(err) => VsockError::IovDeque(err), IoVecError::IovDequeOverflow => VsockError::IovDequeOverflow, } } } /// A passive, event-driven object, that needs to be notified whenever an epoll-able event occurs. /// An event-polling control loop will use `as_raw_fd()` and `get_polled_evset()` to query /// the listener for the file descriptor and the set of events it's interested in. When such an /// event occurs, the control loop will route the event to the listener via `notify()`. pub trait VsockEpollListener: AsRawFd { /// Get the set of events for which the listener wants to be notified. fn get_polled_evset(&self) -> EventSet; /// Notify the listener that one ore more events have occurred. fn notify(&mut self, evset: EventSet); } /// Any channel that handles vsock packet traffic: sending and receiving packets. Since we're /// implementing the device model here, our responsibility is to always process the sending of /// packets (i.e. the TX queue). So, any locally generated data, addressed to the driver (e.g. /// a connection response or RST), will have to be queued, until we get to processing the RX queue. /// /// Note: `recv_pkt()` and `send_pkt()` are named analogous to `Read::read()` and `Write::write()`, /// respectively. I.e. /// - `recv_pkt(&mut pkt)` will read data from the channel, and place it into `pkt`; and /// - `send_pkt(&pkt)` will fetch data from `pkt`, and place it into the channel. pub trait VsockChannel { /// Read/receive an incoming packet from the channel. fn recv_pkt(&mut self, pkt: &mut VsockPacketRx) -> Result<(), VsockError>; /// Write/send a packet through the channel. fn send_pkt(&mut self, pkt: &VsockPacketTx) -> Result<(), VsockError>; /// Checks whether there is pending incoming data inside the channel, meaning that a subsequent /// call to `recv_pkt()` won't fail. fn has_pending_rx(&self) -> bool; } /// The vsock backend, which is basically an epoll-event-driven vsock channel. /// Currently, the only implementation we have is `crate::devices::virtio::unix::muxer::VsockMuxer`, /// which translates guest-side vsock connections to host-side Unix domain socket connections. pub trait VsockBackend: VsockChannel + VsockEpollListener + Send {} ================================================ FILE: src/vmm/src/devices/virtio/vsock/packet.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // //! `VsockPacket` provides a thin wrapper over the buffers exchanged via virtio queues. //! There are two components to a vsock packet, each using its own descriptor in a //! virtio queue: //! - the packet header; and //! - the packet data/buffer. //! //! There is a 1:1 relation between descriptor chains and packets: the first (chain head) holds //! the header, and an optional second descriptor holds the data. The second descriptor is only //! present for data packets (VSOCK_OP_RW). //! //! `VsockPacket` wraps these two buffers and provides direct access to the data stored //! in guest memory. This is done to avoid unnecessarily copying data from guest memory //! to temporary buffers, before passing it on to the vsock backend. use std::fmt::Debug; use vm_memory::volatile_memory::Error; use vm_memory::{GuestMemoryError, ReadVolatile, WriteVolatile}; use super::{VsockError, defs}; use crate::devices::virtio::iovec::{IoVecBuffer, IoVecBufferMut}; use crate::devices::virtio::queue::DescriptorChain; use crate::vstate::memory::{ByteValued, GuestMemoryMmap}; // The vsock packet header is defined by the C struct: // // ```C // le64 src_cid; // le64 dst_cid; // le32 src_port; // le32 dst_port; // le32 len; // le16 type; // le16 op; // le32 flags; // le32 buf_alloc; // le32 fwd_cnt; // } __attribute__((packed)); // ``` // We create a rust structure that mirrors it. // The mirroring struct is only used privately by `VsockPacket`, that offers getter and setter // methods, for each struct field, that will also handle the correct endianess. #[repr(C, packed)] #[derive(Copy, Clone, Debug, Default)] pub struct VsockPacketHeader { // Source CID. src_cid: u64, // Destination CID. dst_cid: u64, // Source port. src_port: u32, // Destination port. dst_port: u32, // Data length (in bytes) - may be 0, if there is no data buffer. len: u32, // Socket type. Currently, only connection-oriented streams are defined by the vsock protocol. type_: u16, // Operation ID - one of the VSOCK_OP_* values; e.g. // - VSOCK_OP_RW: a data packet; // - VSOCK_OP_REQUEST: connection request; // - VSOCK_OP_RST: forcefull connection termination; // etc (see `super::defs::uapi` for the full list). op: u16, // Additional options (flags) associated with the current operation (`op`). // Currently, only used with shutdown requests (VSOCK_OP_SHUTDOWN). flags: u32, // Size (in bytes) of the packet sender receive buffer (for the connection to which this packet // belongs). buf_alloc: u32, // Number of bytes the sender has received and consumed (for the connection to which this // packet belongs). For instance, for our Unix backend, this counter would be the total // number of bytes we have successfully written to a backing Unix socket. fwd_cnt: u32, } impl VsockPacketHeader { pub fn src_cid(&self) -> u64 { u64::from_le(self.src_cid) } pub fn set_src_cid(&mut self, cid: u64) -> &mut Self { self.src_cid = cid.to_le(); self } pub fn dst_cid(&self) -> u64 { u64::from_le(self.dst_cid) } pub fn set_dst_cid(&mut self, cid: u64) -> &mut Self { self.dst_cid = cid.to_le(); self } pub fn src_port(&self) -> u32 { u32::from_le(self.src_port) } pub fn set_src_port(&mut self, port: u32) -> &mut Self { self.src_port = port.to_le(); self } pub fn dst_port(&self) -> u32 { u32::from_le(self.dst_port) } pub fn set_dst_port(&mut self, port: u32) -> &mut Self { self.dst_port = port.to_le(); self } pub fn len(&self) -> u32 { u32::from_le(self.len) } pub fn set_len(&mut self, len: u32) -> &mut Self { self.len = len.to_le(); self } pub fn type_(&self) -> u16 { u16::from_le(self.type_) } pub fn set_type(&mut self, type_: u16) -> &mut Self { self.type_ = type_.to_le(); self } pub fn op(&self) -> u16 { u16::from_le(self.op) } pub fn set_op(&mut self, op: u16) -> &mut Self { self.op = op.to_le(); self } pub fn flags(&self) -> u32 { u32::from_le(self.flags) } pub fn set_flags(&mut self, flags: u32) -> &mut Self { self.flags = flags.to_le(); self } pub fn set_flag(&mut self, flag: u32) -> &mut Self { self.set_flags(self.flags() | flag); self } pub fn buf_alloc(&self) -> u32 { u32::from_le(self.buf_alloc) } pub fn set_buf_alloc(&mut self, buf_alloc: u32) -> &mut Self { self.buf_alloc = buf_alloc.to_le(); self } pub fn fwd_cnt(&self) -> u32 { u32::from_le(self.fwd_cnt) } pub fn set_fwd_cnt(&mut self, fwd_cnt: u32) -> &mut Self { self.fwd_cnt = fwd_cnt.to_le(); self } } /// The vsock packet header struct size (the struct is packed). pub const VSOCK_PKT_HDR_SIZE: u32 = 44; // SAFETY: `VsockPacketHeader` is a POD and contains no padding. unsafe impl ByteValued for VsockPacketHeader {} // /// Struct describing a single vsock packet. // /// // /// Encapsulates the virtio descriptor chain containing the packet through the `IoVecBuffer[Mut]` // /// abstractions. #[derive(Debug, Default)] pub struct VsockPacketTx { /// A copy of the vsock packet's 44-byte header, held in hypervisor memory /// to minimize the number of accesses to guest memory. Can be written back /// to geust memory using [`VsockPacket::commit_hdr`] (only for RX buffers). pub hdr: VsockPacketHeader, /// The raw buffer, as it is contained in guest memory (containing both /// header and payload) buffer: IoVecBuffer, } impl VsockPacketTx { /// Create the packet wrapper from a TX virtq chain head. /// /// ## Errors /// Returns /// - [`VsockError::UnreadableDescriptor`] if the provided descriptor chain contains any /// descriptor not marked as writable. /// - [`VsockError::DescChainTooShortForHeader`] if the descriptor chain's total buffer length /// is insufficient to hold the 44 byte vsock header /// - [`VsockError::InvalidPktLen`] if the contained vsock header describes a vsock packet whose /// length would exceed [`defs::MAX_PKT_BUR_SIZE`]. /// - [`VsockError::DescChainTooShortForPacket`] if the contained vsock header describes a vsock /// packet whose length exceeds the descriptor chain's actual total buffer length. pub fn parse( &mut self, mem: &GuestMemoryMmap, chain: DescriptorChain, ) -> Result<(), VsockError> { // SAFETY: This descriptor chain is only loaded once // virtio requests are handled sequentially so no two IoVecBuffers // are live at the same time, meaning this has exclusive ownership over the memory unsafe { self.buffer.load_descriptor_chain(mem, chain)? }; let mut hdr = VsockPacketHeader::default(); match self.buffer.read_exact_volatile_at(hdr.as_mut_slice(), 0) { Ok(()) => (), Err(Error::PartialBuffer { completed, .. }) => { return Err(VsockError::DescChainTooShortForHeader(completed)); } Err(err) => return Err(VsockError::GuestMemoryMmap(err.into())), } if hdr.len > defs::MAX_PKT_BUF_SIZE { return Err(VsockError::InvalidPktLen(hdr.len)); } if hdr.len > self.buffer.len() - VSOCK_PKT_HDR_SIZE { return Err(VsockError::DescChainTooShortForPacket( self.buffer.len(), hdr.len, )); } self.hdr = hdr; Ok(()) } pub fn write_from_offset_to( &self, dst: &mut T, offset: u32, count: u32, ) -> Result { if count > self .buffer .len() .saturating_sub(VSOCK_PKT_HDR_SIZE) .saturating_sub(offset) { return Err(VsockError::GuestMemoryBounds); } self.buffer .read_volatile_at(dst, (offset + VSOCK_PKT_HDR_SIZE) as usize, count as usize) .map_err(|err| VsockError::GuestMemoryMmap(GuestMemoryError::from(err))) .and_then(|read| read.try_into().map_err(|_| VsockError::DescChainOverflow)) } /// Returns the total length of this [`VsockPacket`]'s buffer (e.g. the amount of data bytes /// contained in this packet). /// /// Return value will equal the total length of the underlying descriptor chain's buffers, /// minus the length of the vsock header. pub fn buf_size(&self) -> u32 { self.buffer.len() - VSOCK_PKT_HDR_SIZE } } /// Struct describing a single vsock packet. /// /// Encapsulates the virtio descriptor chain containing the packet through the `IoVecBuffer[Mut]` /// abstractions. #[derive(Debug)] pub struct VsockPacketRx { /// A copy of the vsock packet's 44-byte header, held in hypervisor memory /// to minimize the number of accesses to guest memory. Can be written back /// to geust memory using [`VsockPacket::commit_hdr`] (only for RX buffers). pub hdr: VsockPacketHeader, /// The raw buffer, as it is contained in guest memory (containing both /// header and payload) buffer: IoVecBufferMut, } impl VsockPacketRx { /// Creates new VsockPacketRx. pub fn new() -> Result { let buffer = IoVecBufferMut::new().map_err(VsockError::IovDeque)?; Ok(Self { hdr: Default::default(), buffer, }) } /// Create the packet wrapper from an RX virtq chain head. /// /// ## Errors /// Returns [`VsockError::DescChainTooShortForHeader`] if the descriptor chain's total buffer /// length is insufficient to hold the 44 byte vsock header pub fn parse( &mut self, mem: &GuestMemoryMmap, chain: DescriptorChain, ) -> Result<(), VsockError> { // SAFETY: This descriptor chain is only loaded once // virtio requests are handled sequentially so no two IoVecBuffers // are live at the same time, meaning this has exclusive ownership over the memory unsafe { self.buffer.load_descriptor_chain(mem, chain)? }; if self.buffer.len() < VSOCK_PKT_HDR_SIZE { return Err(VsockError::DescChainTooShortForHeader( self.buffer.len() as usize )); } self.hdr = VsockPacketHeader::default(); Ok(()) } /// Writes the local copy of the packet header to the guest memory. /// /// ## Errors /// The function returns [`VsockError::UnwritableDescriptor`] if this [`VsockPacket`] /// contains a guest-to-host (TX) packet. It returned [`VsockError::InvalidPktLen`] if the /// packet's payload as described by this [`VsockPacket`] would exceed /// [`defs::MAX_PKT_BUF_SIZE`]. pub fn commit_hdr(&mut self) -> Result<(), VsockError> { if self.hdr.len > defs::MAX_PKT_BUF_SIZE { return Err(VsockError::InvalidPktLen(self.hdr.len)); } self.buffer .write_all_volatile_at(self.hdr.as_slice(), 0) .map_err(GuestMemoryError::from) .map_err(VsockError::GuestMemoryMmap) } /// Returns the total length of this [`VsockPacket`]'s buffer (e.g. the amount of data bytes /// contained in this packet). /// /// Return value will equal the total length of the underlying descriptor chain's buffers, /// minus the length of the vsock header. pub fn buf_size(&self) -> u32 { self.buffer.len() - VSOCK_PKT_HDR_SIZE } pub fn read_at_offset_from( &mut self, src: &mut T, offset: u32, count: u32, ) -> Result { if count > self .buffer .len() .saturating_sub(VSOCK_PKT_HDR_SIZE) .saturating_sub(offset) { return Err(VsockError::GuestMemoryBounds); } self.buffer .write_volatile_at(src, (offset + VSOCK_PKT_HDR_SIZE) as usize, count as usize) .map_err(|err| VsockError::GuestMemoryMmap(GuestMemoryError::from(err))) .and_then(|read| read.try_into().map_err(|_| VsockError::DescChainOverflow)) } } #[cfg(test)] mod tests { use vm_memory::Bytes; use super::*; use crate::devices::virtio::queue::VIRTQ_DESC_F_WRITE; use crate::devices::virtio::test_utils::VirtqDesc as GuestQDesc; use crate::devices::virtio::vsock::defs::MAX_PKT_BUF_SIZE; use crate::devices::virtio::vsock::device::{RXQ_INDEX, TXQ_INDEX}; use crate::devices::virtio::vsock::test_utils::TestContext; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; macro_rules! create_context { ($test_ctx:ident, $handler_ctx:ident) => { let $test_ctx = TestContext::new(); let mut $handler_ctx = $test_ctx.create_event_handler_context(); // For TX packets, hdr.len should be set to a valid value. set_pkt_len(4096, &$handler_ctx.guest_txvq.dtable[0], &$test_ctx.mem); }; } fn set_pkt_len(len: u32, guest_desc: &GuestQDesc, mem: &GuestMemoryMmap) { let hdr_addr = GuestAddress(guest_desc.addr.get()); let mut hdr: VsockPacketHeader = mem.read_obj(hdr_addr).unwrap(); hdr.len = len.to_le(); mem.write_obj(hdr, hdr_addr).unwrap(); } #[test] fn test_packet_hdr_size() { assert_eq!( VSOCK_PKT_HDR_SIZE as usize, std::mem::size_of::(), ); } #[test] #[allow(clippy::cognitive_complexity)] fn test_tx_packet_assembly() { // Test case: successful TX packet assembly as linux < 6.1 would build them. { create_context!(test_ctx, handler_ctx); let mut pkt = VsockPacketTx::default(); pkt.parse( &test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ) .unwrap(); assert_eq!( TryInto::::try_into(pkt.buf_size()).unwrap(), handler_ctx.guest_txvq.dtable[1].len.get() ); } // Test case: error on write-only hdr descriptor. { create_context!(test_ctx, handler_ctx); handler_ctx.guest_txvq.dtable[0] .flags .set(VIRTQ_DESC_F_WRITE); assert!(matches!( VsockPacketTx::default().parse( &test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ), Err(VsockError::UnreadableDescriptor) )) } // Test case: header descriptor has insufficient space to hold the packet header. { create_context!(test_ctx, handler_ctx); handler_ctx.guest_txvq.dtable[0] .len .set(VSOCK_PKT_HDR_SIZE - 1); handler_ctx.guest_txvq.dtable[1].len.set(0); assert!(matches!( VsockPacketTx::default().parse( &test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ), Err(VsockError::DescChainTooShortForHeader(_)) )) } // Test case: zero-length TX packet. { create_context!(test_ctx, handler_ctx); set_pkt_len(0, &handler_ctx.guest_txvq.dtable[0], &test_ctx.mem); VsockPacketTx::default() .parse( &test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ) .unwrap(); } // Test case: TX packet has more data than we can handle. { create_context!(test_ctx, handler_ctx); set_pkt_len( MAX_PKT_BUF_SIZE + 1, &handler_ctx.guest_txvq.dtable[0], &test_ctx.mem, ); assert!(matches!( VsockPacketTx::default().parse( &test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ), Err(VsockError::InvalidPktLen(_)) )) } // Test case: // - packet header advertises some data length; and // - the data descriptor is missing. { create_context!(test_ctx, handler_ctx); set_pkt_len(1024, &handler_ctx.guest_txvq.dtable[0], &test_ctx.mem); handler_ctx.guest_txvq.dtable[0].flags.set(0); assert!(matches!( VsockPacketTx::default().parse( &test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ), Err(VsockError::DescChainTooShortForPacket(44, 1024)) )) } // Test case: error on write-only buf descriptor. { create_context!(test_ctx, handler_ctx); handler_ctx.guest_txvq.dtable[1] .flags .set(VIRTQ_DESC_F_WRITE); assert!(matches!( VsockPacketTx::default().parse( &test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ), Err(VsockError::UnreadableDescriptor) )) } // Test case: the buffer descriptor cannot fit all the data advertised by the // packet header `len` field. { create_context!(test_ctx, handler_ctx); set_pkt_len(8 * 1024, &handler_ctx.guest_txvq.dtable[0], &test_ctx.mem); handler_ctx.guest_txvq.dtable[1].len.set(4 * 1024); assert!(matches!( VsockPacketTx::default().parse( &test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ), Err(VsockError::DescChainTooShortForPacket(4140, 8192)) )) } } #[test] fn test_rx_packet_assembly() { // Test case: successful RX packet assembly. { create_context!(test_ctx, handler_ctx); let mut pkt = VsockPacketRx::new().unwrap(); pkt.parse( &test_ctx.mem, handler_ctx.device.queues[RXQ_INDEX].pop().unwrap().unwrap(), ) .unwrap(); assert_eq!(pkt.buf_size(), handler_ctx.guest_rxvq.dtable[1].len.get()); } // Test case: read-only RX packet header. { create_context!(test_ctx, handler_ctx); handler_ctx.guest_rxvq.dtable[0].flags.set(0); assert!(matches!( VsockPacketRx::new().unwrap().parse( &test_ctx.mem, handler_ctx.device.queues[RXQ_INDEX].pop().unwrap().unwrap(), ), Err(VsockError::UnwritableDescriptor) )) } // Test case: RX descriptor chain cannot fit packet header { create_context!(test_ctx, handler_ctx); handler_ctx.guest_rxvq.dtable[0] .len .set(VSOCK_PKT_HDR_SIZE - 1); handler_ctx.guest_rxvq.dtable[1].len.set(0); assert!(matches!( VsockPacketRx::new().unwrap().parse( &test_ctx.mem, handler_ctx.device.queues[RXQ_INDEX].pop().unwrap().unwrap(), ), Err(VsockError::DescChainTooShortForHeader(_)) )) } } #[test] #[allow(clippy::cognitive_complexity)] fn test_packet_hdr_accessors() { const SRC_CID: u64 = 1; const DST_CID: u64 = 2; const SRC_PORT: u32 = 3; const DST_PORT: u32 = 4; const LEN: u32 = 5; const TYPE: u16 = 6; const OP: u16 = 7; const FLAGS: u32 = 8; const BUF_ALLOC: u32 = 9; const FWD_CNT: u32 = 10; let mut hdr = VsockPacketHeader::default(); assert_eq!(hdr.src_cid(), 0); assert_eq!(hdr.dst_cid(), 0); assert_eq!(hdr.src_port(), 0); assert_eq!(hdr.dst_port(), 0); assert_eq!(hdr.len(), 0); assert_eq!(hdr.type_(), 0); assert_eq!(hdr.op(), 0); assert_eq!(hdr.flags(), 0); assert_eq!(hdr.buf_alloc(), 0); assert_eq!(hdr.fwd_cnt(), 0); // Test field accessors. hdr.set_src_cid(SRC_CID) .set_dst_cid(DST_CID) .set_src_port(SRC_PORT) .set_dst_port(DST_PORT) .set_len(LEN) .set_type(TYPE) .set_op(OP) .set_flags(FLAGS) .set_buf_alloc(BUF_ALLOC) .set_fwd_cnt(FWD_CNT); assert_eq!(hdr.src_cid(), SRC_CID); assert_eq!(hdr.dst_cid(), DST_CID); assert_eq!(hdr.src_port(), SRC_PORT); assert_eq!(hdr.dst_port(), DST_PORT); assert_eq!(hdr.len(), LEN); assert_eq!(hdr.type_(), TYPE); assert_eq!(hdr.op(), OP); assert_eq!(hdr.flags(), FLAGS); assert_eq!(hdr.buf_alloc(), BUF_ALLOC); assert_eq!(hdr.fwd_cnt(), FWD_CNT); // Test individual flag setting. let flags = hdr.flags() | 0b1000; hdr.set_flag(0b1000); assert_eq!(hdr.flags(), flags); } #[test] fn test_packet_buf() { create_context!(test_ctx, handler_ctx); // create_context gives us an rx descriptor chain and a tx descriptor chain pointing to the // same area of memory. We need both a rx-view and a tx-view into the packet, as tx-queue // buffers are read only, while rx queue buffers are write-only let mut pkt = VsockPacketRx::new().unwrap(); pkt.parse( &test_ctx.mem, handler_ctx.device.queues[RXQ_INDEX].pop().unwrap().unwrap(), ) .unwrap(); let mut pkt2 = VsockPacketTx::default(); pkt2.parse( &test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ) .unwrap(); let buf_desc = &mut handler_ctx.guest_rxvq.dtable[1]; assert_eq!(pkt.buf_size(), buf_desc.len.get()); let zeros = vec![0_u8; pkt.buf_size() as usize]; let data: Vec = (0..pkt.buf_size()) .map(|i| ((i as u64) & 0xff) as u8) .collect(); for offset in 0..pkt.buf_size() { buf_desc.set_data(&zeros); let mut expected_data = zeros[..offset as usize].to_vec(); expected_data.extend_from_slice(&data[..(pkt.buf_size() - offset) as usize]); pkt.read_at_offset_from(&mut data.as_slice(), offset, pkt.buf_size() - offset) .unwrap(); buf_desc.check_data(&expected_data); let mut buf = vec![0; pkt.buf_size() as usize]; pkt2.write_from_offset_to(&mut buf.as_mut_slice(), offset, pkt.buf_size() - offset) .unwrap(); assert_eq!( &buf[..(pkt.buf_size() - offset) as usize], &expected_data[offset as usize..] ); } let oob_cases = vec![ (1, pkt.buf_size()), (pkt.buf_size(), 1), (u32::MAX, 1), (1, u32::MAX), ]; let mut buf = vec![0; pkt.buf_size() as usize]; for (offset, count) in oob_cases { let res = pkt.read_at_offset_from(&mut data.as_slice(), offset, count); assert!(matches!(res, Err(VsockError::GuestMemoryBounds))); let res = pkt2.write_from_offset_to(&mut buf.as_mut_slice(), offset, count); assert!(matches!(res, Err(VsockError::GuestMemoryBounds))); } } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/persist.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines state and support structures for persisting Vsock devices and backends. use std::fmt::Debug; use std::sync::Arc; use serde::{Deserialize, Serialize}; use super::*; use crate::devices::virtio::device::{ActiveState, DeviceState, VirtioDeviceType}; use crate::devices::virtio::persist::VirtioDeviceState; use crate::devices::virtio::queue::FIRECRACKER_MAX_QUEUE_SIZE; use crate::devices::virtio::transport::VirtioInterrupt; use crate::snapshot::Persist; use crate::vstate::memory::GuestMemoryMmap; /// The Vsock serializable state. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VsockState { /// The vsock backend state. pub backend: VsockBackendState, /// The vsock frontend state. pub frontend: VsockFrontendState, } /// The Vsock frontend serializable state. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VsockFrontendState { /// Context Identifier. pub cid: u64, pub virtio_state: VirtioDeviceState, } /// The Vsock Unix Backend serializable state. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VsockBackendState { /// The path for the UDS socket. pub uds_path: String, /// The last used host-side port. pub local_port_last: u32, } /// A helper structure that holds the constructor arguments for VsockUnixBackend #[derive(Debug)] pub struct VsockConstructorArgs { /// Pointer to guest memory. pub mem: GuestMemoryMmap, /// The vsock Unix Backend. pub backend: B, } /// A helper structure that holds the constructor arguments for VsockUnixBackend #[derive(Debug)] pub struct VsockUdsConstructorArgs { /// cid available in VsockFrontendState. pub cid: u64, } impl Persist<'_> for VsockUnixBackend { type State = VsockBackendState; type ConstructorArgs = VsockUdsConstructorArgs; type Error = VsockUnixBackendError; fn save(&self) -> Self::State { VsockBackendState { uds_path: self.host_sock_path.clone(), local_port_last: self.local_port_last, } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let mut backend = Self::new(constructor_args.cid, state.uds_path.clone())?; backend.local_port_last = state.local_port_last; Ok(backend) } } impl Persist<'_> for Vsock where B: VsockBackend + 'static + Debug, { type State = VsockFrontendState; type ConstructorArgs = VsockConstructorArgs; type Error = VsockError; fn save(&self) -> Self::State { VsockFrontendState { cid: self.cid(), virtio_state: VirtioDeviceState::from_device(self), } } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { // Restore queues. let queues = state .virtio_state .build_queues_checked( &constructor_args.mem, VirtioDeviceType::Vsock, defs::VSOCK_NUM_QUEUES, FIRECRACKER_MAX_QUEUE_SIZE, ) .map_err(VsockError::VirtioState)?; let mut vsock = Self::with_queues(state.cid, constructor_args.backend, queues)?; vsock.acked_features = state.virtio_state.acked_features; vsock.avail_features = state.virtio_state.avail_features; vsock.device_state = DeviceState::Inactive; Ok(vsock) } } #[cfg(test)] pub(crate) mod tests { use super::device::AVAIL_FEATURES; use super::*; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::test_utils::default_interrupt; use crate::devices::virtio::vsock::defs::uapi; use crate::devices::virtio::vsock::test_utils::{TestBackend, TestContext}; use crate::utils::byte_order; impl Persist<'_> for TestBackend { type State = VsockBackendState; type ConstructorArgs = VsockUdsConstructorArgs; type Error = VsockUnixBackendError; fn save(&self) -> Self::State { VsockBackendState { uds_path: "test".to_owned(), local_port_last: 0xdeadbeef, } } fn restore(_: Self::ConstructorArgs, state: &Self::State) -> Result { Ok(TestBackend::new()) } } #[test] fn test_persist_uds_backend() { let ctx = TestContext::new(); let device_features = AVAIL_FEATURES; let driver_features: u64 = AVAIL_FEATURES | 1 | (1 << 32); let device_pages = [ (device_features & 0xffff_ffff) as u32, (device_features >> 32) as u32, ]; let driver_pages = [ (driver_features & 0xffff_ffff) as u32, (driver_features >> 32) as u32, ]; // Test serialization // Save backend and device state separately. let state = VsockState { backend: ctx.device.backend().save(), frontend: ctx.device.save(), }; let serialized_data = bitcode::serialize(&state).unwrap(); let restored_state: VsockState = bitcode::deserialize(&serialized_data).unwrap(); let mut restored_device = Vsock::restore( VsockConstructorArgs { mem: ctx.mem.clone(), backend: { assert_eq!(restored_state.backend.uds_path, "test".to_owned()); assert_eq!(restored_state.backend.local_port_last, 0xdeadbeef); TestBackend::new() }, }, &restored_state.frontend, ) .unwrap(); assert_eq!(restored_device.device_type(), VirtioDeviceType::Vsock); assert_eq!(restored_device.avail_features_by_page(0), device_pages[0]); assert_eq!(restored_device.avail_features_by_page(1), device_pages[1]); assert_eq!(restored_device.avail_features_by_page(2), 0); restored_device.ack_features_by_page(0, driver_pages[0]); restored_device.ack_features_by_page(1, driver_pages[1]); restored_device.ack_features_by_page(2, 0); restored_device.ack_features_by_page(0, !driver_pages[0]); assert_eq!( restored_device.acked_features(), device_features & driver_features ); // Test reading 32-bit chunks. let mut data = [0u8; 8]; restored_device.read_config(0, &mut data[..4]); assert_eq!( u64::from(byte_order::read_le_u32(&data[..])), ctx.cid & 0xffff_ffff ); restored_device.read_config(4, &mut data[4..]); assert_eq!( u64::from(byte_order::read_le_u32(&data[4..])), (ctx.cid >> 32) & 0xffff_ffff ); // Test reading 64-bit. let mut data = [0u8; 8]; restored_device.read_config(0, &mut data); assert_eq!(byte_order::read_le_u64(&data), ctx.cid); // Check that out-of-bounds reading doesn't mutate the destination buffer. let mut data = [0u8, 1, 2, 3, 4, 5, 6, 7]; restored_device.read_config(2, &mut data); assert_eq!(data, [0u8, 1, 2, 3, 4, 5, 6, 7]); } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/test_utils.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![cfg(test)] #![doc(hidden)] use std::os::unix::io::{AsRawFd, RawFd}; use std::sync::Arc; use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; use super::packet::{VsockPacketRx, VsockPacketTx}; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::queue::{VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE}; use crate::devices::virtio::test_utils::{VirtQueue as GuestQ, default_interrupt}; use crate::devices::virtio::transport::VirtioInterrupt; use crate::devices::virtio::vsock::device::{RXQ_INDEX, TXQ_INDEX}; use crate::devices::virtio::vsock::packet::VSOCK_PKT_HDR_SIZE; use crate::devices::virtio::vsock::{ Vsock, VsockBackend, VsockChannel, VsockEpollListener, VsockError, }; use crate::test_utils::single_region_mem; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; #[derive(Debug)] pub struct TestBackend { pub evfd: EventFd, pub rx_err: Option, pub tx_err: Option, pub pending_rx: bool, pub rx_ok_cnt: usize, pub tx_ok_cnt: usize, pub evset: Option, } impl TestBackend { pub fn new() -> Self { Self { evfd: EventFd::new(libc::EFD_NONBLOCK).unwrap(), rx_err: None, tx_err: None, pending_rx: false, rx_ok_cnt: 0, tx_ok_cnt: 0, evset: None, } } pub fn set_rx_err(&mut self, err: Option) { self.rx_err = err; } pub fn set_tx_err(&mut self, err: Option) { self.tx_err = err; } pub fn set_pending_rx(&mut self, prx: bool) { self.pending_rx = prx; } } impl Default for TestBackend { fn default() -> Self { Self::new() } } impl VsockChannel for TestBackend { fn recv_pkt(&mut self, pkt: &mut VsockPacketRx) -> Result<(), VsockError> { let cool_buf = [0xDu8, 0xE, 0xA, 0xD, 0xB, 0xE, 0xE, 0xF]; match self.rx_err.take() { None => { let buf_size = pkt.buf_size(); if buf_size > 0 { let buf: Vec = (0..buf_size) .map(|i| cool_buf[i as usize % cool_buf.len()]) .collect(); pkt.read_at_offset_from(&mut buf.as_slice(), 0, buf_size) .unwrap(); } self.rx_ok_cnt += 1; Ok(()) } Some(err) => Err(err), } } fn send_pkt(&mut self, _pkt: &VsockPacketTx) -> Result<(), VsockError> { match self.tx_err.take() { None => { self.tx_ok_cnt += 1; Ok(()) } Some(err) => Err(err), } } fn has_pending_rx(&self) -> bool { self.pending_rx } } impl AsRawFd for TestBackend { fn as_raw_fd(&self) -> RawFd { self.evfd.as_raw_fd() } } impl VsockEpollListener for TestBackend { fn get_polled_evset(&self) -> EventSet { EventSet::IN } fn notify(&mut self, evset: EventSet) { self.evset = Some(evset); } } impl VsockBackend for TestBackend {} #[derive(Debug)] pub struct TestContext { pub cid: u64, pub mem: GuestMemoryMmap, pub interrupt: Arc, pub mem_size: usize, pub device: Vsock, } impl TestContext { pub fn new() -> Self { const CID: u64 = 52; const MEM_SIZE: usize = 1024 * 1024 * 128; let mem = single_region_mem(MEM_SIZE); let mut device = Vsock::new(CID, TestBackend::new()).unwrap(); for q in device.queues_mut() { q.ready = true; q.size = q.max_size; } Self { cid: CID, mem, interrupt: default_interrupt(), mem_size: MEM_SIZE, device, } } pub fn create_event_handler_context(&self) -> EventHandlerContext<'_> { const QSIZE: u16 = 256; let guest_rxvq = GuestQ::new(GuestAddress(0x0010_0000), &self.mem, QSIZE); let guest_txvq = GuestQ::new(GuestAddress(0x0020_0000), &self.mem, QSIZE); let guest_evvq = GuestQ::new(GuestAddress(0x0030_0000), &self.mem, QSIZE); let rxvq = guest_rxvq.create_queue(); let txvq = guest_txvq.create_queue(); let evvq = guest_evvq.create_queue(); // Set up one available descriptor in the RX queue. guest_rxvq.dtable[0].set( 0x0040_0000, VSOCK_PKT_HDR_SIZE, VIRTQ_DESC_F_WRITE | VIRTQ_DESC_F_NEXT, 1, ); guest_rxvq.dtable[1].set(0x0040_1000, 4096, VIRTQ_DESC_F_WRITE, 0); guest_rxvq.avail.ring[0].set(0); guest_rxvq.avail.idx.set(1); // Set up one available descriptor in the TX queue. guest_txvq.dtable[0].set(0x0040_0000, VSOCK_PKT_HDR_SIZE, VIRTQ_DESC_F_NEXT, 1); guest_txvq.dtable[1].set(0x0040_1000, 4096, 0, 0); guest_txvq.avail.ring[0].set(0); guest_txvq.avail.idx.set(1); // Both descriptors above point to the same area of guest memory, to work around // the fact that through the TX queue, the memory is read-only, and through the RX queue, // the memory is write-only. let queues = vec![rxvq, txvq, evvq]; EventHandlerContext { guest_rxvq, guest_txvq, guest_evvq, device: Vsock::with_queues(self.cid, TestBackend::new(), queues).unwrap(), } } } impl Default for TestContext { fn default() -> Self { Self::new() } } #[derive(Debug)] pub struct EventHandlerContext<'a> { pub device: Vsock, pub guest_rxvq: GuestQ<'a>, pub guest_txvq: GuestQ<'a>, pub guest_evvq: GuestQ<'a>, } impl EventHandlerContext<'_> { pub fn mock_activate(&mut self, mem: GuestMemoryMmap, interrupt: Arc) { // Artificially activate the device. self.device.activate(mem, interrupt).unwrap(); } pub fn signal_txq_event(&mut self) { self.device.queue_events[TXQ_INDEX].write(1).unwrap(); self.device.handle_txq_event(EventSet::IN); } pub fn signal_rxq_event(&mut self) { self.device.queue_events[RXQ_INDEX].write(1).unwrap(); self.device.handle_rxq_event(EventSet::IN); } } #[cfg(test)] pub fn read_packet_data(pkt: &VsockPacketTx, how_much: u32) -> Vec { let mut buf = vec![0; how_much as usize]; pkt.write_from_offset_to(&mut buf.as_mut_slice(), 0, how_much) .unwrap(); buf } impl Vsock where B: VsockBackend, { pub fn write_element_in_queue(vsock: &Vsock, idx: usize, val: u64) { if idx > vsock.queue_events.len() - 1 { panic!("Index bigger than the number of queues of this device"); } vsock.queue_events[idx].write(val).unwrap(); } pub fn get_element_from_interest_list(vsock: &Vsock, idx: usize) -> u64 { match idx { 0..=2 => u64::try_from(vsock.queue_events[idx].as_raw_fd()).unwrap(), 3 => u64::try_from(vsock.backend.as_raw_fd()).unwrap(), 4 => u64::try_from(vsock.activate_evt.as_raw_fd()).unwrap(), _ => panic!("Index bigger than interest list"), } } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/unix/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // /// This module implements the Unix Domain Sockets backend for vsock - a mediator between /// guest-side AF_VSOCK sockets and host-side AF_UNIX sockets. The heavy lifting is performed by /// `muxer::VsockMuxer`, a connection multiplexer that uses `super::csm::VsockConnection` for /// handling vsock connection states. /// Check out `muxer.rs` for a more detailed explanation of the inner workings of this backend. mod muxer; mod muxer_killq; mod muxer_rxq; pub use muxer::VsockMuxer as VsockUnixBackend; use crate::devices::virtio::vsock::csm::VsockConnectionBackend; mod defs { /// Maximum number of established connections that we can handle. pub const MAX_CONNECTIONS: usize = 1023; /// Size of the muxer RX packet queue. pub const MUXER_RXQ_SIZE: u32 = 256; /// Size of the muxer connection kill queue. pub const MUXER_KILLQ_SIZE: u32 = 128; } /// Vsock backend related errors. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VsockUnixBackendError { /// Error registering a new epoll-listening FD: {0} EpollAdd(std::io::Error), /// Error creating an epoll FD: {0} EpollFdCreate(std::io::Error), /// The host made an invalid vsock port connection request. InvalidPortRequest, /// Error accepting a new connection from the host-side Unix socket: {0} UnixAccept(std::io::Error), /// Error binding to the host-side Unix socket: {0} UnixBind(std::io::Error), /// Error connecting to a host-side Unix socket: {0} UnixConnect(std::io::Error), /// Error reading from host-side Unix socket: {0} UnixRead(std::io::Error), /// Muxer connection limit reached. TooManyConnections, } type MuxerConnection = super::csm::VsockConnection; impl VsockConnectionBackend for std::os::unix::net::UnixStream {} ================================================ FILE: src/vmm/src/devices/virtio/vsock/unix/muxer.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // /// `VsockMuxer` is the device-facing component of the Unix domain sockets vsock backend. I.e. /// by implementing the `VsockBackend` trait, it abstracts away the gory details of translating /// between AF_VSOCK and AF_UNIX, and presents a clean interface to the rest of the vsock /// device model. /// /// The vsock muxer has two main roles: /// 1. Vsock connection multiplexer: It's the muxer's job to create, manage, and terminate /// `VsockConnection` objects. The muxer also routes packets to their owning connections. It /// does so via a connection `HashMap`, keyed by what is basically a (host_port, guest_port) /// tuple. Vsock packet traffic needs to be inspected, in order to detect connection request /// packets (leading to the creation of a new connection), and connection reset packets /// (leading to the termination of an existing connection). All other packets, though, must /// belong to an existing connection and, as such, the muxer simply forwards them. /// 2. Event dispatcher There are three event categories that the vsock backend is interested /// it: /// 1. A new host-initiated connection is ready to be accepted from the listening host Unix /// socket; /// 2. Data is available for reading from a newly-accepted host-initiated connection (i.e. /// the host is ready to issue a vsock connection request, informing us of the /// destination port to which it wants to connect); /// 3. Some event was triggered for a connected Unix socket, that belongs to a /// `VsockConnection`. /// /// The muxer gets notified about all of these events, because, as a `VsockEpollListener` /// implementor, it gets to register a nested epoll FD into the main VMM epolling loop. All /// other pollable FDs are then registered under this nested epoll FD. /// To route all these events to their handlers, the muxer uses another `HashMap` object, /// mapping `RawFd`s to `EpollListener`s. use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::io::Read; use std::os::unix::io::{AsRawFd, RawFd}; use std::os::unix::net::{UnixListener, UnixStream}; use log::{debug, error, info, warn}; use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; use super::super::csm::ConnState; use super::super::defs::uapi; use super::super::{VsockBackend, VsockChannel, VsockEpollListener, VsockError}; use super::muxer_killq::MuxerKillQ; use super::muxer_rxq::MuxerRxQ; use super::{MuxerConnection, VsockUnixBackendError, defs}; use crate::devices::virtio::vsock::metrics::METRICS; use crate::devices::virtio::vsock::packet::{VsockPacketRx, VsockPacketTx}; use crate::logger::IncMetric; /// A unique identifier of a `MuxerConnection` object. Connections are stored in a hash map, /// keyed by a `ConnMapKey` object. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct ConnMapKey { local_port: u32, peer_port: u32, } /// A muxer RX queue item. #[derive(Clone, Copy, Debug)] pub enum MuxerRx { /// The packet must be fetched from the connection identified by `ConnMapKey`. ConnRx(ConnMapKey), /// The muxer must produce an RST packet. RstPkt { local_port: u32, peer_port: u32 }, } /// An epoll listener, registered under the muxer's nested epoll FD. #[derive(Debug)] enum EpollListener { /// The listener is a `MuxerConnection`, identified by `key`, and interested in the events /// in `evset`. Since `MuxerConnection` implements `VsockEpollListener`, notifications will /// be forwarded to the listener via `VsockEpollListener::notify()`. Connection { key: ConnMapKey, evset: EventSet }, /// A listener interested in new host-initiated connections. HostSock, /// A listener interested in reading host `connect ` commands from a freshly /// connected host socket. LocalStream(UnixStream), } /// The vsock connection multiplexer. #[derive(Debug)] pub struct VsockMuxer { /// Guest CID. cid: u64, /// A hash map used to store the active connections. conn_map: HashMap, /// A hash map used to store epoll event listeners / handlers. listener_map: HashMap, /// The RX queue. Items in this queue are consumed by `VsockMuxer::recv_pkt()`, and /// produced /// - by `VsockMuxer::send_pkt()` (e.g. RST in response to a connection request packet); and /// - in response to EPOLLIN events (e.g. data available to be read from an AF_UNIX socket). rxq: MuxerRxQ, /// A queue used for terminating connections that are taking too long to shut down. killq: MuxerKillQ, /// The Unix socket, through which host-initiated connections are accepted. host_sock: UnixListener, /// The file system path of the host-side Unix socket. This is used to figure out the path /// to Unix sockets listening on specific ports. I.e. `"_"`. pub(crate) host_sock_path: String, /// The nested epoll event set, used to register epoll listeners. epoll: Epoll, /// A hash set used to keep track of used host-side (local) ports, in order to assign local /// ports to host-initiated connections. local_port_set: HashSet, /// The last used host-side port. /// /// Local ports are allocated in a round-robin fashion within the range [1 << 30, 1 << 31). /// There should be no inherent technical requirement for this specific range. But the range /// provides 1 billion available ports, making port collisions unlikely. In addition, the /// most significant bits are fixed to 01, which may facilitate debugging and identification. /// This appears to have been a design decision dating back to the initial introduction of the /// vsock implementation. pub(crate) local_port_last: u32, } impl VsockChannel for VsockMuxer { /// Deliver a vsock packet to the guest vsock driver. /// /// Retuns: /// - `Ok(())`: `pkt` has been successfully filled in; or /// - `Err(VsockError::NoData)`: there was no available data with which to fill in the packet. fn recv_pkt(&mut self, pkt: &mut VsockPacketRx) -> Result<(), VsockError> { // We'll look for instructions on how to build the RX packet in the RX queue. If the // queue is empty, that doesn't necessarily mean we don't have any pending RX, since // the queue might be out-of-sync. If that's the case, we'll attempt to sync it first, // and then try to pop something out again. if self.rxq.is_empty() && !self.rxq.is_synced() { self.rxq = MuxerRxQ::from_conn_map(&self.conn_map); } while let Some(rx) = self.rxq.peek() { let res = match rx { // We need to build an RST packet, going from `local_port` to `peer_port`. MuxerRx::RstPkt { local_port, peer_port, } => { pkt.hdr .set_op(uapi::VSOCK_OP_RST) .set_src_cid(uapi::VSOCK_HOST_CID) .set_dst_cid(self.cid) .set_src_port(local_port) .set_dst_port(peer_port) .set_len(0) .set_type(uapi::VSOCK_TYPE_STREAM) .set_flags(0) .set_buf_alloc(0) .set_fwd_cnt(0); self.rxq.pop().unwrap(); return Ok(()); } // We'll defer building the packet to this connection, since it has something // to say. MuxerRx::ConnRx(key) => { let mut conn_res = Err(VsockError::NoData); let mut do_pop = true; self.apply_conn_mutation(key, |conn| { conn_res = conn.recv_pkt(pkt); do_pop = !conn.has_pending_rx(); }); if do_pop { self.rxq.pop().unwrap(); } conn_res } }; if res.is_ok() { // Inspect traffic, looking for RST packets, since that means we have to // terminate and remove this connection from the active connection pool. // if pkt.hdr.op() == uapi::VSOCK_OP_RST { self.remove_connection(ConnMapKey { local_port: pkt.hdr.src_port(), peer_port: pkt.hdr.dst_port(), }); } debug!("vsock muxer: RX pkt: {:?}", pkt.hdr); return Ok(()); } } Err(VsockError::NoData) } /// Deliver a guest-generated packet to its destination in the vsock backend. /// /// This absorbs unexpected packets, handles RSTs (by dropping connections), and forwards /// all the rest to their owning `MuxerConnection`. /// /// Returns: /// always `Ok(())` - the packet has been consumed, and its virtio TX buffers can be /// returned to the guest vsock driver. fn send_pkt(&mut self, pkt: &VsockPacketTx) -> Result<(), VsockError> { let conn_key = ConnMapKey { local_port: pkt.hdr.dst_port(), peer_port: pkt.hdr.src_port(), }; debug!( "vsock: muxer.send[rxq.len={}]: {:?}", self.rxq.len(), pkt.hdr ); // If this packet has an unsupported type (!=stream), we must send back an RST. // if pkt.hdr.type_() != uapi::VSOCK_TYPE_STREAM { self.enq_rst(pkt.hdr.dst_port(), pkt.hdr.src_port()); return Ok(()); } // We don't know how to handle packets addressed to other CIDs. We only handle the host // part of the guest - host communication here. if pkt.hdr.dst_cid() != uapi::VSOCK_HOST_CID { info!( "vsock: dropping guest packet for unknown CID: {:?}", pkt.hdr ); return Ok(()); } if !self.conn_map.contains_key(&conn_key) { // This packet can't be routed to any active connection (based on its src and dst // ports). The only orphan / unroutable packets we know how to handle are // connection requests. if pkt.hdr.op() == uapi::VSOCK_OP_REQUEST { // Oh, this is a connection request! self.handle_peer_request_pkt(pkt); } else { // Send back an RST, to let the drive know we weren't expecting this packet. self.enq_rst(pkt.hdr.dst_port(), pkt.hdr.src_port()); } return Ok(()); } // Right, we know where to send this packet, then (to `conn_key`). // However, if this is an RST, we have to forcefully terminate the connection, so // there's no point in forwarding it the packet. if pkt.hdr.op() == uapi::VSOCK_OP_RST { self.remove_connection(conn_key); return Ok(()); } // Alright, everything looks in order - forward this packet to its owning connection. let mut res: Result<(), VsockError> = Ok(()); self.apply_conn_mutation(conn_key, |conn| { res = conn.send_pkt(pkt); }); res } /// Check if the muxer has any pending RX data, with which to fill a guest-provided RX /// buffer. fn has_pending_rx(&self) -> bool { !self.rxq.is_empty() || !self.rxq.is_synced() } } impl AsRawFd for VsockMuxer { /// Get the FD to be registered for polling upstream (in the main VMM epoll loop, in this /// case). /// /// This will be the muxer's nested epoll FD. fn as_raw_fd(&self) -> RawFd { self.epoll.as_raw_fd() } } impl VsockEpollListener for VsockMuxer { /// Get the epoll events to be polled upstream. /// /// Since the polled FD is a nested epoll FD, we're only interested in EPOLLIN events (i.e. /// some event occurred on one of the FDs registered under our epoll FD). fn get_polled_evset(&self) -> EventSet { EventSet::IN } /// Notify the muxer about a pending event having occured under its nested epoll FD. fn notify(&mut self, _: EventSet) { let mut epoll_events = vec![EpollEvent::new(EventSet::empty(), 0); 32]; match self.epoll.wait(0, epoll_events.as_mut_slice()) { Ok(ev_cnt) => { for ev in &epoll_events[0..ev_cnt] { self.handle_event( ev.fd(), // It's ok to unwrap here, since the `epoll_events[i].events` is filled // in by `epoll::wait()`, and therefore contains only valid epoll // flags. EventSet::from_bits(ev.events).unwrap(), ); } } Err(err) => { warn!("vsock: failed to consume muxer epoll event: {}", err); METRICS.muxer_event_fails.inc(); } } } } impl VsockBackend for VsockMuxer {} impl VsockMuxer { /// Muxer constructor. pub fn new(cid: u64, host_sock_path: String) -> Result { // Open/bind on the host Unix socket, so we can accept host-initiated // connections. let host_sock = UnixListener::bind(&host_sock_path) .and_then(|sock| sock.set_nonblocking(true).map(|_| sock)) .map_err(VsockUnixBackendError::UnixBind)?; let mut muxer = Self { cid, host_sock, host_sock_path, epoll: Epoll::new().map_err(VsockUnixBackendError::EpollFdCreate)?, rxq: MuxerRxQ::new(), conn_map: HashMap::with_capacity(defs::MAX_CONNECTIONS), listener_map: HashMap::with_capacity(defs::MAX_CONNECTIONS + 1), killq: MuxerKillQ::new(), local_port_last: (1u32 << 30) - 1, local_port_set: HashSet::with_capacity(defs::MAX_CONNECTIONS), }; // Listen on the host initiated socket, for incoming connections. muxer.add_listener(muxer.host_sock.as_raw_fd(), EpollListener::HostSock)?; Ok(muxer) } /// Return the file system path of the host-side Unix socket. pub fn host_sock_path(&self) -> &str { &self.host_sock_path } /// Handle/dispatch an epoll event to its listener. fn handle_event(&mut self, fd: RawFd, event_set: EventSet) { debug!( "vsock: muxer processing event: fd={}, evset={:?}", fd, event_set ); match self.listener_map.get_mut(&fd) { // This event needs to be forwarded to a `MuxerConnection` that is listening for // it. Some(EpollListener::Connection { key, evset: _ }) => { let key_copy = *key; // The handling of this event will most probably mutate the state of the // receiving connection. We'll need to check for new pending RX, event set // mutation, and all that, so we're wrapping the event delivery inside those // checks. self.apply_conn_mutation(key_copy, |conn| { conn.notify(event_set); }); } // A new host-initiated connection is ready to be accepted. Some(EpollListener::HostSock) => { if self.conn_map.len() == defs::MAX_CONNECTIONS { // If we're already maxed-out on connections, we'll just accept and // immediately discard this potentially new one. warn!("vsock: connection limit reached; refusing new host connection"); self.host_sock.accept().map(|_| 0).unwrap_or(0); return; } self.host_sock .accept() .map_err(VsockUnixBackendError::UnixAccept) .and_then(|(stream, _)| { stream .set_nonblocking(true) .map(|_| stream) .map_err(VsockUnixBackendError::UnixAccept) }) .and_then(|stream| { // Before forwarding this connection to a listening AF_VSOCK socket on // the guest side, we need to know the destination port. We'll read // that port from a "connect" command received on this socket, so the // next step is to ask to be notified the moment we can read from it. self.add_listener(stream.as_raw_fd(), EpollListener::LocalStream(stream)) }) .unwrap_or_else(|err| { warn!("vsock: unable to accept local connection: {:?}", err); }); } // Data is ready to be read from a host-initiated connection. That would be the // "connect" command that we're expecting. Some(EpollListener::LocalStream(_)) => { if let Some(EpollListener::LocalStream(mut stream)) = self.remove_listener(fd) { Self::read_local_stream_port(&mut stream) .map(|peer_port| (self.allocate_local_port(), peer_port)) .and_then(|(local_port, peer_port)| { self.add_connection( ConnMapKey { local_port, peer_port, }, MuxerConnection::new_local_init( stream, uapi::VSOCK_HOST_CID, self.cid, local_port, peer_port, ), ) }) .unwrap_or_else(|err| { info!("vsock: error adding local-init connection: {:?}", err); }) } } _ => { info!( "vsock: unexpected event: fd={:?}, evset={:?}", fd, event_set ); METRICS.muxer_event_fails.inc(); } } } /// Parse a host "connect" command, and extract the destination vsock port. fn read_local_stream_port(stream: &mut UnixStream) -> Result { let mut buf = [0u8; 32]; // This is the minimum number of bytes that we should be able to read, when parsing a // valid connection request. I.e. `b"connect 0\n".len()`. const MIN_READ_LEN: usize = 10; // Bring in the minimum number of bytes that we should be able to read. stream .read_exact(&mut buf[..MIN_READ_LEN]) .map_err(VsockUnixBackendError::UnixRead)?; // Now, finish reading the destination port number, by bringing in one byte at a time, // until we reach an EOL terminator (or our buffer space runs out). Yeah, not // particularly proud of this approach, but it will have to do for now. let mut blen = MIN_READ_LEN; while buf[blen - 1] != b'\n' && blen < buf.len() { stream .read_exact(&mut buf[blen..=blen]) .map_err(VsockUnixBackendError::UnixRead)?; blen += 1; } let mut word_iter = std::str::from_utf8(&buf[..blen]) .map_err(|_| VsockUnixBackendError::InvalidPortRequest)? .split_whitespace(); word_iter .next() .ok_or(VsockUnixBackendError::InvalidPortRequest) .and_then(|word| { if word.to_lowercase() == "connect" { Ok(()) } else { Err(VsockUnixBackendError::InvalidPortRequest) } }) .and_then(|_| { word_iter .next() .ok_or(VsockUnixBackendError::InvalidPortRequest) }) .and_then(|word| { word.parse::() .map_err(|_| VsockUnixBackendError::InvalidPortRequest) }) .map_err(|_| VsockUnixBackendError::InvalidPortRequest) } /// Add a new connection to the active connection pool. fn add_connection( &mut self, key: ConnMapKey, conn: MuxerConnection, ) -> Result<(), VsockUnixBackendError> { // We might need to make room for this new connection, so let's sweep the kill queue // first. It's fine to do this here because: // - unless the kill queue is out of sync, this is a pretty inexpensive operation; and // - we are under no pressure to respect any accurate timing for connection termination. self.sweep_killq(); if self.conn_map.len() >= defs::MAX_CONNECTIONS { info!( "vsock: muxer connection limit reached ({})", defs::MAX_CONNECTIONS ); return Err(VsockUnixBackendError::TooManyConnections); } self.add_listener( conn.as_raw_fd(), EpollListener::Connection { key, evset: conn.get_polled_evset(), }, ) .map(|_| { if conn.has_pending_rx() { // We can safely ignore any error in adding a connection RX indication. Worst // case scenario, the RX queue will get desynchronized, but we'll handle that // the next time we need to yield an RX packet. self.rxq.push(MuxerRx::ConnRx(key)); } self.conn_map.insert(key, conn); METRICS.conns_added.inc(); }) } /// Remove a connection from the active connection poll. fn remove_connection(&mut self, key: ConnMapKey) { if let Some(conn) = self.conn_map.remove(&key) { self.remove_listener(conn.as_raw_fd()); METRICS.conns_removed.inc(); } self.free_local_port(key.local_port); } /// Schedule a connection for immediate termination. /// I.e. as soon as we can also let our peer know we're dropping the connection, by sending /// it an RST packet. fn kill_connection(&mut self, key: ConnMapKey) { let mut had_rx = false; METRICS.conns_killed.inc(); self.conn_map.entry(key).and_modify(|conn| { had_rx = conn.has_pending_rx(); conn.kill(); }); // This connection will now have an RST packet to yield, so we need to add it to the RX // queue. However, there's no point in doing that if it was already in the queue. if !had_rx { // We can safely ignore any error in adding a connection RX indication. Worst case // scenario, the RX queue will get desynchronized, but we'll handle that the next // time we need to yield an RX packet. self.rxq.push(MuxerRx::ConnRx(key)); } } /// Register a new epoll listener under the muxer's nested epoll FD. fn add_listener( &mut self, fd: RawFd, listener: EpollListener, ) -> Result<(), VsockUnixBackendError> { let evset = match listener { EpollListener::Connection { evset, .. } => evset, EpollListener::LocalStream(_) => EventSet::IN, EpollListener::HostSock => EventSet::IN, }; self.epoll .ctl( ControlOperation::Add, fd, EpollEvent::new(evset, u64::try_from(fd).unwrap()), ) .map(|_| { self.listener_map.insert(fd, listener); }) .map_err(VsockUnixBackendError::EpollAdd)?; Ok(()) } /// Remove (and return) a previously registered epoll listener. fn remove_listener(&mut self, fd: RawFd) -> Option { let maybe_listener = self.listener_map.remove(&fd); if maybe_listener.is_some() { self.epoll .ctl(ControlOperation::Delete, fd, EpollEvent::default()) .unwrap_or_else(|err| { warn!( "vosck muxer: error removing epoll listener for fd {:?}: {:?}", fd, err ); }); } maybe_listener } /// Allocate a host-side port to be assigned to a new host-initiated connection. fn allocate_local_port(&mut self) -> u32 { // TODO: this doesn't seem very space-efficient. // Mybe rewrite this to limit port range and use a bitmap? // loop { self.local_port_last = (self.local_port_last + 1) & !(1 << 31) | (1 << 30); if self.local_port_set.insert(self.local_port_last) { break; } } self.local_port_last } /// Mark a previously used host-side port as free. fn free_local_port(&mut self, port: u32) { self.local_port_set.remove(&port); } /// Handle a new connection request comming from our peer (the guest vsock driver). /// /// This will attempt to connect to a host-side Unix socket, expected to be listening at /// the file system path corresponing to the destination port. If successful, a new /// connection object will be created and added to the connection pool. On failure, a new /// RST packet will be scheduled for delivery to the guest. fn handle_peer_request_pkt(&mut self, pkt: &VsockPacketTx) { let port_path = format!("{}_{}", self.host_sock_path, pkt.hdr.dst_port()); UnixStream::connect(port_path) .and_then(|stream| stream.set_nonblocking(true).map(|_| stream)) .map_err(VsockUnixBackendError::UnixConnect) .and_then(|stream| { self.add_connection( ConnMapKey { local_port: pkt.hdr.dst_port(), peer_port: pkt.hdr.src_port(), }, MuxerConnection::new_peer_init( stream, uapi::VSOCK_HOST_CID, self.cid, pkt.hdr.dst_port(), pkt.hdr.src_port(), pkt.hdr.buf_alloc(), ), ) }) .unwrap_or_else(|_| self.enq_rst(pkt.hdr.dst_port(), pkt.hdr.src_port())); } /// Perform an action that might mutate a connection's state. /// /// This is used as shorthand for repetitive tasks that need to be performed after a /// connection object mutates. E.g. /// - update the connection's epoll listener; /// - schedule the connection to be queried for RX data; /// - kill the connection if an unrecoverable error occurs. fn apply_conn_mutation(&mut self, key: ConnMapKey, mut_fn: F) where F: FnOnce(&mut MuxerConnection), { if let Some(conn) = self.conn_map.get_mut(&key) { let had_rx = conn.has_pending_rx(); let was_expiring = conn.will_expire(); let prev_state = conn.state(); mut_fn(conn); // If this is a host-initiated connection that has just become established, we'll have // to send an ack message to the host end. if prev_state == ConnState::LocalInit && conn.state() == ConnState::Established { let msg = format!("OK {}\n", key.local_port); match conn.send_bytes_raw(msg.as_bytes()) { Ok(written) if written == msg.len() => (), Ok(_) => { // If we can't write a dozen bytes to a pristine connection something // must be really wrong. Killing it. conn.kill(); warn!("vsock: unable to fully write connection ack msg."); } Err(err) => { conn.kill(); warn!("vsock: unable to ack host connection: {:?}", err); } }; } // If the connection wasn't previously scheduled for RX, add it to our RX queue. if !had_rx && conn.has_pending_rx() { self.rxq.push(MuxerRx::ConnRx(key)); } // If the connection wasn't previously scheduled for termination, add it to the // kill queue. if !was_expiring && conn.will_expire() { // It's safe to unwrap here, since `conn.will_expire()` already guaranteed that // an `conn.expiry` is available. self.killq.push(key, conn.expiry().unwrap()); } let fd = conn.as_raw_fd(); let new_evset = conn.get_polled_evset(); if new_evset.is_empty() { // If the connection no longer needs epoll notifications, remove its listener // from our list. self.remove_listener(fd); return; } if let Some(EpollListener::Connection { evset, .. }) = self.listener_map.get_mut(&fd) { if *evset != new_evset { // If the set of events that the connection is interested in has changed, // we need to update its epoll listener. debug!( "vsock: updating listener for (lp={}, pp={}): old={:?}, new={:?}", key.local_port, key.peer_port, *evset, new_evset ); *evset = new_evset; self.epoll .ctl( ControlOperation::Modify, fd, EpollEvent::new(new_evset, u64::try_from(fd).unwrap()), ) .unwrap_or_else(|err| { // This really shouldn't happen, like, ever. However, "famous last // words" and all that, so let's just kill it with fire, and walk away. self.kill_connection(key); error!( "vsock: error updating epoll listener for (lp={}, pp={}): {:?}", key.local_port, key.peer_port, err ); METRICS.muxer_event_fails.inc(); }); } } else { // The connection had previously asked to be removed from the listener map (by // returning an empty event set via `get_polled_fd()`), but now wants back in. self.add_listener( fd, EpollListener::Connection { key, evset: new_evset, }, ) .unwrap_or_else(|err| { self.kill_connection(key); error!( "vsock: error updating epoll listener for (lp={}, pp={}): {:?}", key.local_port, key.peer_port, err ); METRICS.muxer_event_fails.inc(); }); } } } /// Check if any connections have timed out, and if so, schedule them for immediate /// termination. fn sweep_killq(&mut self) { while let Some(key) = self.killq.pop() { // Connections don't get removed from the kill queue when their kill timer is // disarmed, since that would be a costly operation. This means we must check if // the connection has indeed expired, prior to killing it. let mut kill = false; self.conn_map .entry(key) .and_modify(|conn| kill = conn.has_expired()); if kill { self.kill_connection(key); } } if self.killq.is_empty() && !self.killq.is_synced() { self.killq = MuxerKillQ::from_conn_map(&self.conn_map); METRICS.killq_resync.inc(); // If we've just re-created the kill queue, we can sweep it again; maybe there's // more to kill. self.sweep_killq(); } } /// Enqueue an RST packet into `self.rxq`. /// /// Enqueue errors aren't propagated up the call chain, since there is nothing we can do to /// handle them. We do, however, log a warning, since not being able to enqueue an RST /// packet means we have to drop it, which is not normal operation. fn enq_rst(&mut self, local_port: u32, peer_port: u32) { let pushed = self.rxq.push(MuxerRx::RstPkt { local_port, peer_port, }); if !pushed { warn!( "vsock: muxer.rxq full; dropping RST packet for lp={}, pp={}", local_port, peer_port ); } } } #[cfg(test)] mod tests { use std::io::{Read, Write}; use std::ops::Drop; use std::os::unix::net::{UnixListener, UnixStream}; use std::path::{Path, PathBuf}; use vmm_sys_util::tempfile::TempFile; use super::super::super::csm::defs as csm_defs; use super::*; use crate::devices::virtio::vsock::device::{RXQ_INDEX, TXQ_INDEX}; use crate::devices::virtio::vsock::test_utils; use crate::devices::virtio::vsock::test_utils::TestContext as VsockTestContext; const PEER_CID: u64 = 3; const PEER_BUF_ALLOC: u32 = 64 * 1024; #[derive(Debug)] struct MuxerTestContext { _vsock_test_ctx: VsockTestContext, // Two views of the same in-memory packet. rx-view for writing, tx-view for reading rx_pkt: VsockPacketRx, tx_pkt: VsockPacketTx, muxer: VsockMuxer, } impl Drop for MuxerTestContext { fn drop(&mut self) { std::fs::remove_file(self.muxer.host_sock_path.as_str()).unwrap(); } } // Create a TempFile with a given prefix and return it as a nice String fn get_file(fprefix: &str) -> String { let listener_path = TempFile::new_with_prefix(fprefix).unwrap(); listener_path .as_path() .as_os_str() .to_str() .unwrap() .to_owned() } impl MuxerTestContext { fn new(name: &str) -> Self { let vsock_test_ctx = VsockTestContext::new(); let mut handler_ctx = vsock_test_ctx.create_event_handler_context(); let mut rx_pkt = VsockPacketRx::new().unwrap(); rx_pkt .parse( &vsock_test_ctx.mem, handler_ctx.device.queues[RXQ_INDEX].pop().unwrap().unwrap(), ) .unwrap(); let mut tx_pkt = VsockPacketTx::default(); tx_pkt .parse( &vsock_test_ctx.mem, handler_ctx.device.queues[TXQ_INDEX].pop().unwrap().unwrap(), ) .unwrap(); let muxer = VsockMuxer::new(PEER_CID, get_file(name)).unwrap(); Self { _vsock_test_ctx: vsock_test_ctx, rx_pkt, tx_pkt, muxer, } } fn init_tx_pkt(&mut self, local_port: u32, peer_port: u32, op: u16) -> &mut VsockPacketTx { self.tx_pkt .hdr .set_type(uapi::VSOCK_TYPE_STREAM) .set_src_cid(PEER_CID) .set_dst_cid(uapi::VSOCK_HOST_CID) .set_src_port(peer_port) .set_dst_port(local_port) .set_op(op) .set_buf_alloc(PEER_BUF_ALLOC); &mut self.tx_pkt } fn init_data_tx_pkt( &mut self, local_port: u32, peer_port: u32, mut data: &[u8], ) -> &mut VsockPacketTx { assert!(data.len() <= self.tx_pkt.buf_size() as usize); let tx_pkt = self.init_tx_pkt(local_port, peer_port, uapi::VSOCK_OP_RW); tx_pkt.hdr.set_len(u32::try_from(data.len()).unwrap()); let data_len = data.len().try_into().unwrap(); // store in tmp var to make borrow checker happy. self.rx_pkt .read_at_offset_from(&mut data, 0, data_len) .unwrap(); &mut self.tx_pkt } fn send(&mut self) { self.muxer.send_pkt(&self.tx_pkt).unwrap(); } fn recv(&mut self) { self.muxer.recv_pkt(&mut self.rx_pkt).unwrap(); } fn notify_muxer(&mut self) { self.muxer.notify(EventSet::IN); } fn count_epoll_listeners(&self) -> (usize, usize) { let mut local_lsn_count = 0usize; let mut conn_lsn_count = 0usize; for key in self.muxer.listener_map.values() { match key { EpollListener::LocalStream(_) => local_lsn_count += 1, EpollListener::Connection { .. } => conn_lsn_count += 1, _ => (), }; } (local_lsn_count, conn_lsn_count) } fn create_local_listener(&self, port: u32) -> LocalListener { LocalListener::new(format!("{}_{}", self.muxer.host_sock_path, port)) } fn local_connect(&mut self, peer_port: u32) -> (UnixStream, u32) { let (init_local_lsn_count, init_conn_lsn_count) = self.count_epoll_listeners(); let mut stream = UnixStream::connect(self.muxer.host_sock_path.clone()).unwrap(); stream.set_nonblocking(true).unwrap(); // The muxer would now get notified of a new connection having arrived at its Unix // socket, so it can accept it. self.notify_muxer(); // Just after having accepted a new local connection, the muxer should've added a new // `LocalStream` listener to its `listener_map`. let (local_lsn_count, _) = self.count_epoll_listeners(); assert_eq!(local_lsn_count, init_local_lsn_count + 1); let buf = format!("CONNECT {}\n", peer_port); stream.write_all(buf.as_bytes()).unwrap(); // The muxer would now get notified that data is available for reading from the locally // initiated connection. self.notify_muxer(); // Successfully reading and parsing the connection request should have removed the // LocalStream epoll listener and added a Connection epoll listener. let (local_lsn_count, conn_lsn_count) = self.count_epoll_listeners(); assert_eq!(local_lsn_count, init_local_lsn_count); assert_eq!(conn_lsn_count, init_conn_lsn_count + 1); // A LocalInit connection should've been added to the muxer connection map. A new // local port should also have been allocated for the new LocalInit connection. let local_port = self.muxer.local_port_last; let key = ConnMapKey { local_port, peer_port, }; assert!(self.muxer.conn_map.contains_key(&key)); assert!(self.muxer.local_port_set.contains(&local_port)); // A connection request for the peer should now be available from the muxer. assert!(self.muxer.has_pending_rx()); self.recv(); assert_eq!(self.rx_pkt.hdr.op(), uapi::VSOCK_OP_REQUEST); assert_eq!(self.rx_pkt.hdr.dst_port(), peer_port); assert_eq!(self.rx_pkt.hdr.src_port(), local_port); self.init_tx_pkt(local_port, peer_port, uapi::VSOCK_OP_RESPONSE); self.send(); let mut buf = [0u8; 32]; let len = stream.read(&mut buf[..]).unwrap(); assert_eq!(&buf[..len], format!("OK {}\n", local_port).as_bytes()); (stream, local_port) } } #[derive(Debug)] struct LocalListener { path: PathBuf, sock: UnixListener, } impl LocalListener { fn new + Clone + Debug>(path: P) -> Self { let path_buf = path.as_ref().to_path_buf(); let sock = UnixListener::bind(path).unwrap(); sock.set_nonblocking(true).unwrap(); Self { path: path_buf, sock, } } fn accept(&mut self) -> UnixStream { let (stream, _) = self.sock.accept().unwrap(); stream.set_nonblocking(true).unwrap(); stream } } impl Drop for LocalListener { fn drop(&mut self) { std::fs::remove_file(&self.path).unwrap(); } } #[test] fn test_muxer_epoll_listener() { let ctx = MuxerTestContext::new("muxer_epoll_listener"); assert_eq!(ctx.muxer.as_raw_fd(), ctx.muxer.epoll.as_raw_fd()); assert_eq!(ctx.muxer.get_polled_evset(), EventSet::IN); } #[test] fn test_muxer_epoll_listener_regression() { let mut ctx = MuxerTestContext::new("muxer_epoll_listener"); ctx.local_connect(1025); let (_, conn) = ctx.muxer.conn_map.iter().next().unwrap(); assert_eq!(conn.get_polled_evset(), EventSet::IN); assert_eq!(METRICS.conn_event_fails.count(), 0); let conn_eventfd = conn.as_raw_fd(); ctx.muxer.handle_event(conn_eventfd, EventSet::OUT); assert_eq!(METRICS.conn_event_fails.count(), 1); } #[test] fn test_bad_peer_pkt() { const LOCAL_PORT: u32 = 1026; const PEER_PORT: u32 = 1025; const SOCK_DGRAM: u16 = 2; let mut ctx = MuxerTestContext::new("bad_peer_pkt"); let tx_pkt = ctx.init_tx_pkt(LOCAL_PORT, PEER_PORT, uapi::VSOCK_OP_REQUEST); tx_pkt.hdr.set_type(SOCK_DGRAM); ctx.send(); // The guest sent a SOCK_DGRAM packet. Per the vsock spec, we need to reply with an RST // packet, since vsock only supports stream sockets. assert!(ctx.muxer.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); assert_eq!(ctx.rx_pkt.hdr.src_cid(), uapi::VSOCK_HOST_CID); assert_eq!(ctx.rx_pkt.hdr.dst_cid(), PEER_CID); assert_eq!(ctx.rx_pkt.hdr.src_port(), LOCAL_PORT); assert_eq!(ctx.rx_pkt.hdr.dst_port(), PEER_PORT); // Any orphan (i.e. without a connection), non-RST packet, should be replied to with an // RST. let bad_ops = [ uapi::VSOCK_OP_RESPONSE, uapi::VSOCK_OP_CREDIT_REQUEST, uapi::VSOCK_OP_CREDIT_UPDATE, uapi::VSOCK_OP_SHUTDOWN, uapi::VSOCK_OP_RW, ]; for op in bad_ops.iter() { ctx.init_tx_pkt(LOCAL_PORT, PEER_PORT, *op); ctx.send(); assert!(ctx.muxer.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); assert_eq!(ctx.rx_pkt.hdr.src_port(), LOCAL_PORT); assert_eq!(ctx.rx_pkt.hdr.dst_port(), PEER_PORT); } // Any packet addressed to anything other than VSOCK_VHOST_CID should get dropped. assert!(!ctx.muxer.has_pending_rx()); let tx_pkt = ctx.init_tx_pkt(LOCAL_PORT, PEER_PORT, uapi::VSOCK_OP_REQUEST); tx_pkt.hdr.set_dst_cid(uapi::VSOCK_HOST_CID + 1); ctx.send(); assert!(!ctx.muxer.has_pending_rx()); } #[test] fn test_peer_connection() { const LOCAL_PORT: u32 = 1026; const PEER_PORT: u32 = 1025; let mut ctx = MuxerTestContext::new("peer_connection"); // Test peer connection refused. ctx.init_tx_pkt(LOCAL_PORT, PEER_PORT, uapi::VSOCK_OP_REQUEST); ctx.send(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); assert_eq!(ctx.rx_pkt.hdr.len(), 0); assert_eq!(ctx.rx_pkt.hdr.src_cid(), uapi::VSOCK_HOST_CID); assert_eq!(ctx.rx_pkt.hdr.dst_cid(), PEER_CID); assert_eq!(ctx.rx_pkt.hdr.src_port(), LOCAL_PORT); assert_eq!(ctx.rx_pkt.hdr.dst_port(), PEER_PORT); // Test peer connection accepted. let mut listener = ctx.create_local_listener(LOCAL_PORT); ctx.init_tx_pkt(LOCAL_PORT, PEER_PORT, uapi::VSOCK_OP_REQUEST); ctx.send(); assert_eq!(ctx.muxer.conn_map.len(), 1); let mut stream = listener.accept(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RESPONSE); assert_eq!(ctx.rx_pkt.hdr.len(), 0); assert_eq!(ctx.rx_pkt.hdr.src_cid(), uapi::VSOCK_HOST_CID); assert_eq!(ctx.rx_pkt.hdr.dst_cid(), PEER_CID); assert_eq!(ctx.rx_pkt.hdr.src_port(), LOCAL_PORT); assert_eq!(ctx.rx_pkt.hdr.dst_port(), PEER_PORT); let key = ConnMapKey { local_port: LOCAL_PORT, peer_port: PEER_PORT, }; assert!(ctx.muxer.conn_map.contains_key(&key)); // Test guest -> host data flow. let data = [1, 2, 3, 4]; ctx.init_data_tx_pkt(LOCAL_PORT, PEER_PORT, &data); ctx.send(); let mut buf = vec![0; data.len()]; stream.read_exact(buf.as_mut_slice()).unwrap(); assert_eq!(buf.as_slice(), data); // Test host -> guest data flow. let data = [5u8, 6, 7, 8]; stream.write_all(&data).unwrap(); // When data is available on the local stream, an EPOLLIN event would normally be delivered // to the muxer's nested epoll FD. For testing only, we can fake that event notification // here. ctx.notify_muxer(); // After being notified, the muxer should've figured out that RX data was available for one // of its connections, so it should now be reporting that it can fill in an RX packet. assert!(ctx.muxer.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RW); assert_eq!(ctx.rx_pkt.hdr.src_port(), LOCAL_PORT); assert_eq!(ctx.rx_pkt.hdr.dst_port(), PEER_PORT); let buf = test_utils::read_packet_data(&ctx.tx_pkt, 4); assert_eq!(&buf, &data); assert!(!ctx.muxer.has_pending_rx()); } #[test] fn test_local_connection() { // Test guest -> host data flow. let mut ctx = MuxerTestContext::new("local_connection"); let peer_port = 1025; let (mut stream, local_port) = ctx.local_connect(peer_port); let data = [1, 2, 3, 4]; ctx.init_data_tx_pkt(local_port, peer_port, &data); ctx.send(); let mut buf = vec![0u8; data.len()]; stream.read_exact(buf.as_mut_slice()).unwrap(); assert_eq!(buf.as_slice(), &data); // Test host -> guest data flow. let mut ctx = MuxerTestContext::new("local_connection"); let peer_port = 1025; let (mut stream, local_port) = ctx.local_connect(peer_port); let data = [5, 6, 7, 8]; stream.write_all(&data).unwrap(); ctx.notify_muxer(); assert!(ctx.muxer.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RW); assert_eq!(ctx.rx_pkt.hdr.src_port(), local_port); assert_eq!(ctx.rx_pkt.hdr.dst_port(), peer_port); let buf = test_utils::read_packet_data(&ctx.tx_pkt, 4); assert_eq!(&buf, &data); } #[test] fn test_local_close() { let peer_port = 1025; let mut ctx = MuxerTestContext::new("local_close"); let local_port; { let (_stream, local_port_) = ctx.local_connect(peer_port); local_port = local_port_; } // Local var `_stream` was now dropped, thus closing the local stream. After the muxer gets // notified via EPOLLIN, it should attempt to gracefully shutdown the connection, issuing a // VSOCK_OP_SHUTDOWN with both no-more-send and no-more-recv indications set. ctx.notify_muxer(); assert!(ctx.muxer.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_SHUTDOWN); assert_ne!(ctx.rx_pkt.hdr.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_SEND, 0); assert_ne!(ctx.rx_pkt.hdr.flags() & uapi::VSOCK_FLAGS_SHUTDOWN_RCV, 0); assert_eq!(ctx.rx_pkt.hdr.src_port(), local_port); assert_eq!(ctx.rx_pkt.hdr.dst_port(), peer_port); // The connection should get removed (and its local port freed), after the peer replies // with an RST. ctx.init_tx_pkt(local_port, peer_port, uapi::VSOCK_OP_RST); ctx.send(); let key = ConnMapKey { local_port, peer_port, }; assert!(!ctx.muxer.conn_map.contains_key(&key)); assert!(!ctx.muxer.local_port_set.contains(&local_port)); } #[test] fn test_peer_close() { let peer_port = 1025; let local_port = 1026; let mut ctx = MuxerTestContext::new("peer_close"); let mut sock = ctx.create_local_listener(local_port); ctx.init_tx_pkt(local_port, peer_port, uapi::VSOCK_OP_REQUEST); ctx.send(); let mut stream = sock.accept(); assert!(ctx.muxer.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RESPONSE); assert_eq!(ctx.rx_pkt.hdr.src_port(), local_port); assert_eq!(ctx.rx_pkt.hdr.dst_port(), peer_port); let key = ConnMapKey { local_port, peer_port, }; assert!(ctx.muxer.conn_map.contains_key(&key)); // Emulate a full shutdown from the peer (no-more-send + no-more-recv). let tx_pkt = ctx.init_tx_pkt(local_port, peer_port, uapi::VSOCK_OP_SHUTDOWN); tx_pkt.hdr.set_flag(uapi::VSOCK_FLAGS_SHUTDOWN_SEND); tx_pkt.hdr.set_flag(uapi::VSOCK_FLAGS_SHUTDOWN_RCV); ctx.send(); // Now, the muxer should remove the connection from its map, and reply with an RST. assert!(ctx.muxer.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); assert_eq!(ctx.rx_pkt.hdr.src_port(), local_port); assert_eq!(ctx.rx_pkt.hdr.dst_port(), peer_port); let key = ConnMapKey { local_port, peer_port, }; assert!(!ctx.muxer.conn_map.contains_key(&key)); // The muxer should also drop / close the local Unix socket for this connection. let mut buf = vec![0u8; 16]; assert_eq!(stream.read(buf.as_mut_slice()).unwrap(), 0); } #[test] fn test_muxer_rxq() { let mut ctx = MuxerTestContext::new("muxer_rxq"); let local_port = 1026; let peer_port_first = 1025; let mut listener = ctx.create_local_listener(local_port); let mut streams: Vec = Vec::new(); for peer_port in peer_port_first..peer_port_first + defs::MUXER_RXQ_SIZE { ctx.init_tx_pkt(local_port, peer_port, uapi::VSOCK_OP_REQUEST); ctx.send(); streams.push(listener.accept()); } // The muxer RX queue should now be full (with connection reponses), but still // synchronized. assert!(ctx.muxer.rxq.is_synced()); // One more queued reply should desync the RX queue. ctx.init_tx_pkt( local_port, peer_port_first + defs::MUXER_RXQ_SIZE, uapi::VSOCK_OP_REQUEST, ); ctx.send(); assert!(!ctx.muxer.rxq.is_synced()); // With an out-of-sync queue, an RST should evict any non-RST packet from the queue, and // take its place. We'll check that by making sure that the last packet popped from the // queue is an RST. ctx.init_tx_pkt(local_port + 1, peer_port_first, uapi::VSOCK_OP_REQUEST); ctx.send(); for peer_port in peer_port_first..peer_port_first + defs::MUXER_RXQ_SIZE - 1 { ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RESPONSE); // The response order should hold. The evicted response should have been the last // enqueued. assert_eq!(ctx.rx_pkt.hdr.dst_port(), peer_port); } // There should be one more packet in the queue: the RST. assert_eq!(ctx.muxer.rxq.len(), 1); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); // The queue should now be empty, but out-of-sync, so the muxer should report it has some // pending RX. assert!(ctx.muxer.rxq.is_empty()); assert!(!ctx.muxer.rxq.is_synced()); assert!(ctx.muxer.has_pending_rx()); // The next recv should sync the queue back up. It should also yield one of the two // responses that are still left: // - the one that desynchronized the queue; and // - the one that got evicted by the RST. ctx.recv(); assert!(ctx.muxer.rxq.is_synced()); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RESPONSE); assert!(ctx.muxer.has_pending_rx()); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RESPONSE); } #[test] fn test_muxer_killq() { let mut ctx = MuxerTestContext::new("muxer_killq"); let local_port = 1026; let peer_port_first = 1025; let peer_port_last = peer_port_first + defs::MUXER_KILLQ_SIZE; let mut listener = ctx.create_local_listener(local_port); // Save metrics relevant for this test. let conns_added = METRICS.conns_added.count(); let conns_killed = METRICS.conns_killed.count(); let conns_removed = METRICS.conns_removed.count(); let killq_resync = METRICS.killq_resync.count(); for peer_port in peer_port_first..=peer_port_last { ctx.init_tx_pkt(local_port, peer_port, uapi::VSOCK_OP_REQUEST); ctx.send(); ctx.notify_muxer(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RESPONSE); assert_eq!(ctx.rx_pkt.hdr.src_port(), local_port); assert_eq!(ctx.rx_pkt.hdr.dst_port(), peer_port); { let _stream = listener.accept(); } ctx.notify_muxer(); ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_SHUTDOWN); assert_eq!(ctx.rx_pkt.hdr.src_port(), local_port); assert_eq!(ctx.rx_pkt.hdr.dst_port(), peer_port); // The kill queue should be synchronized, up until the `defs::MUXER_KILLQ_SIZE`th // connection we schedule for termination. assert_eq!( ctx.muxer.killq.is_synced(), peer_port < peer_port_first + defs::MUXER_KILLQ_SIZE ); } assert!(!ctx.muxer.killq.is_synced()); assert!(!ctx.muxer.has_pending_rx()); // Wait for the kill timers to expire. std::thread::sleep(std::time::Duration::from_millis( csm_defs::CONN_SHUTDOWN_TIMEOUT_MS, )); // Trigger a kill queue sweep, by requesting a new connection. ctx.init_tx_pkt(local_port, peer_port_last + 1, uapi::VSOCK_OP_REQUEST); ctx.send(); // Check that MUXER_KILLQ_SIZE + 2 connections were added // We count +2, because there are two extra connections being // done outside of the loop. assert_eq!( METRICS.conns_added.count(), conns_added + u64::from(defs::MUXER_KILLQ_SIZE) + 2 ); // Check that MUXER_KILLQ_SIZE connections were killed assert_eq!( METRICS.conns_killed.count(), conns_killed + u64::from(defs::MUXER_KILLQ_SIZE) ); // No connections should be removed at this point. assert_eq!(METRICS.conns_removed.count(), conns_removed); assert_eq!(METRICS.killq_resync.count(), killq_resync + 1); // After sweeping the kill queue, it should now be synced (assuming the RX queue is larger // than the kill queue, since an RST packet will be queued for each killed connection). assert!(ctx.muxer.killq.is_synced()); assert!(ctx.muxer.has_pending_rx()); // There should be `defs::MUXER_KILLQ_SIZE` RSTs in the RX queue, from terminating the // dying connections in the recent killq sweep. for _p in peer_port_first..peer_port_last { ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RST); assert_eq!(ctx.rx_pkt.hdr.src_port(), local_port); } // The connections should have been removed here. assert_eq!( METRICS.conns_removed.count(), conns_removed + u64::from(defs::MUXER_KILLQ_SIZE) ); // There should be one more packet in the RX queue: the connection response our request // that triggered the kill queue sweep. ctx.recv(); assert_eq!(ctx.rx_pkt.hdr.op(), uapi::VSOCK_OP_RESPONSE); assert_eq!(ctx.rx_pkt.hdr.dst_port(), peer_port_last + 1); assert!(!ctx.muxer.has_pending_rx()); } #[test] fn test_regression_handshake() { // Address one of the issues found while fixing the following issue: // https://github.com/firecracker-microvm/firecracker/issues/1751 // This test checks that the handshake message is not accounted for let mut ctx = MuxerTestContext::new("regression_handshake"); let peer_port = 1025; // Create a local connection. let (_, local_port) = ctx.local_connect(peer_port); // Get the connection from the connection map. let key = ConnMapKey { local_port, peer_port, }; let conn = ctx.muxer.conn_map.get_mut(&key).unwrap(); // Check that fwd_cnt is 0 - "OK ..." was not accounted for. assert_eq!(conn.fwd_cnt().0, 0); } #[test] fn test_regression_rxq_pop() { // Address one of the issues found while fixing the following issue: // https://github.com/firecracker-microvm/firecracker/issues/1751 // This test checks that a connection is not popped out of the muxer // rxq when multiple flags are set let mut ctx = MuxerTestContext::new("regression_rxq_pop"); let peer_port = 1025; let (mut stream, local_port) = ctx.local_connect(peer_port); // Send some data. let data = [5u8, 6, 7, 8]; stream.write_all(&data).unwrap(); ctx.notify_muxer(); // Get the connection from the connection map. let key = ConnMapKey { local_port, peer_port, }; let conn = ctx.muxer.conn_map.get_mut(&key).unwrap(); // Forcefully insert another flag. conn.insert_credit_update(); // Call recv twice in order to check that the connection is still // in the rxq. assert!(ctx.muxer.has_pending_rx()); ctx.recv(); assert!(ctx.muxer.has_pending_rx()); ctx.recv(); // Since initially the connection had two flags set, now there should // not be any pending RX in the muxer. assert!(!ctx.muxer.has_pending_rx()); } #[test] fn test_vsock_basic_metrics() { // Save the metrics values that we need tested. let mut tx_packets_count = METRICS.tx_packets_count.count(); let mut rx_packets_count = METRICS.rx_packets_count.count(); let tx_bytes_count = METRICS.tx_bytes_count.count(); let rx_bytes_count = METRICS.rx_bytes_count.count(); let conns_added = METRICS.conns_added.count(); let conns_removed = METRICS.conns_removed.count(); // Create a basic connection. let mut ctx = MuxerTestContext::new("vsock_basic_metrics"); let peer_port = 1025; let (mut stream, local_port) = ctx.local_connect(peer_port); // Once the handshake is done, we check that the TX bytes count has // not been increased. assert_eq!(METRICS.tx_bytes_count.count(), tx_bytes_count); // Check that one packet was sent through the handshake. assert_eq!(METRICS.tx_packets_count.count(), tx_packets_count + 1); tx_packets_count = METRICS.tx_packets_count.count(); // Check that one packet was received through the handshake. assert_eq!(METRICS.rx_packets_count.count(), rx_packets_count + 1); rx_packets_count = METRICS.rx_packets_count.count(); // Check that a new connection was added. assert_eq!(METRICS.conns_added.count(), conns_added + 1); // Send some data from guest to host. let data = [1, 2, 3, 4]; ctx.init_data_tx_pkt(local_port, peer_port, &data); ctx.send(); // Check that tx_bytes was incremented. assert_eq!( METRICS.tx_bytes_count.count(), tx_bytes_count + data.len() as u64 ); // Check that one packet was accounted for. assert_eq!(METRICS.tx_packets_count.count(), tx_packets_count + 1); // Send some data from the host to the guest. let data = [1, 2, 3, 4, 5, 6]; stream.write_all(&data).unwrap(); ctx.notify_muxer(); ctx.recv(); // Check that a packet was received. assert_eq!(METRICS.rx_packets_count.count(), rx_packets_count + 1); // Check that the 6 bytes have been received. assert_eq!( METRICS.rx_bytes_count.count(), rx_bytes_count + data.len() as u64 ); // Send a connection reset. ctx.init_tx_pkt(local_port, peer_port, uapi::VSOCK_OP_RST); ctx.send(); // Check that the connection was removed. assert_eq!(METRICS.conns_removed.count(), conns_removed + 1); } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/unix/muxer_killq.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // /// `MuxerKillQ` implements a helper object that `VsockMuxer` can use for scheduling forced /// connection termination. I.e. after one peer issues a clean shutdown request /// (VSOCK_OP_SHUTDOWN), the concerned connection is queued for termination (VSOCK_OP_RST) in /// the near future (herein implemented via an expiring timer). /// /// Whenever the muxer needs to schedule a connection for termination, it pushes it (or rather /// an identifier - the connection key) to this queue. A subsequent pop() operation will /// succeed if and only if the first connection in the queue is ready to be terminated (i.e. /// its kill timer expired). /// /// Without using this queue, the muxer would have to walk its entire connection pool /// (hashmap), whenever it needs to check for expired kill timers. With this queue, both /// scheduling and termination are performed in constant time. However, since we don't want to /// waste space on a kill queue that's as big as the connection hashmap itself, it is possible /// that this queue may become full at times. We call this kill queue "synchronized" if we are /// certain that all connections that are awaiting termination are present in the queue. This /// means a simple constant-time pop() operation is enough to check whether any connections /// need to be terminated. When the kill queue becomes full, though, pushing fails, so /// connections that should be terminated are left out. The queue is not synchronized anymore. /// When that happens, the muxer will first drain the queue, and then replace it with a new /// queue, created by walking the connection pool, looking for connections that will be /// expiring in the future. use std::collections::{HashMap, VecDeque}; use std::time::Instant; use super::muxer::ConnMapKey; use super::{MuxerConnection, defs}; /// A kill queue item, holding the connection key and the scheduled time for termination. #[derive(Debug, Clone, Copy)] struct MuxerKillQItem { key: ConnMapKey, kill_time: Instant, } /// The connection kill queue: a FIFO structure, storing the connections that are scheduled for /// termination. #[derive(Debug)] pub struct MuxerKillQ { /// The kill queue contents. q: VecDeque, /// The kill queue sync status: /// - when true, all connections that are awaiting termination are guaranteed to be in this /// queue; /// - when false, some connections may have been left out. synced: bool, } impl MuxerKillQ { const SIZE: usize = defs::MUXER_KILLQ_SIZE as usize; /// Trivial kill queue constructor. pub fn new() -> Self { Self { q: VecDeque::with_capacity(Self::SIZE), synced: true, } } /// Create a kill queue by walking the connection pool, looking for connections that are /// set to expire at some point in the future. /// Note: if more than `Self::SIZE` connections are found, the queue will be created in an /// out-of-sync state, and will be discarded after it is emptied. pub fn from_conn_map(conn_map: &HashMap) -> Self { let mut q_buf: Vec = Vec::with_capacity(Self::SIZE); let mut synced = true; for (key, conn) in conn_map.iter() { if !conn.will_expire() { continue; } if q_buf.len() >= Self::SIZE { synced = false; break; } q_buf.push(MuxerKillQItem { key: *key, kill_time: conn.expiry().unwrap(), }); } q_buf.sort_unstable_by_key(|it| it.kill_time); Self { q: q_buf.into(), synced, } } /// Push a connection key to the queue, scheduling it for termination at /// `CONN_SHUTDOWN_TIMEOUT_MS` from now (the push time). pub fn push(&mut self, key: ConnMapKey, kill_time: Instant) { if !self.is_synced() || self.is_full() { self.synced = false; return; } self.q.push_back(MuxerKillQItem { key, kill_time }); } /// Attempt to pop an expired connection from the kill queue. /// /// This will succeed and return a connection key, only if the connection at the front of /// the queue has expired. Otherwise, `None` is returned. pub fn pop(&mut self) -> Option { if let Some(item) = self.q.front() && Instant::now() > item.kill_time { return self.q.pop_front().map(|entry| entry.key); } None } /// Check if the kill queue is synchronized with the connection pool. pub fn is_synced(&self) -> bool { self.synced } /// Check if the kill queue is empty, obviously. pub fn is_empty(&self) -> bool { self.q.len() == 0 } /// Check if the kill queue is full. pub fn is_full(&self) -> bool { self.q.len() == Self::SIZE } } ================================================ FILE: src/vmm/src/devices/virtio/vsock/unix/muxer_rxq.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // /// `MuxerRxQ` implements a helper object that `VsockMuxer` can use for queuing RX (host -> /// guest) packets (or rather instructions on how to build said packets). /// /// Under ideal operation, every connection, that has pending RX data, will be present in the /// muxer RX queue. However, since the RX queue is smaller than the connection pool, it may, /// under some conditions, become full, meaning that it can no longer account for all the /// connections that can yield RX data. When that happens, we say that it is no longer /// "synchronized" (i.e. with the connection pool). A desynchronized RX queue still holds /// valid data, and the muxer will continue to pop packets from it. However, when a /// desynchronized queue is drained, additional data may still be available, so the muxer will /// have to perform a more costly walk of the entire connection pool to find it. This walk is /// performed here, as part of building an RX queue from the connection pool. When an /// out-of-sync is drained, the muxer will discard it, and attempt to rebuild a synced one. use std::collections::{HashMap, VecDeque}; use super::super::VsockChannel; use super::muxer::{ConnMapKey, MuxerRx}; use super::{MuxerConnection, defs}; /// The muxer RX queue. #[derive(Debug)] pub struct MuxerRxQ { /// The RX queue data. q: VecDeque, /// The RX queue sync status. synced: bool, } impl MuxerRxQ { const SIZE: usize = defs::MUXER_RXQ_SIZE as usize; /// Trivial RX queue constructor. pub fn new() -> Self { Self { q: VecDeque::with_capacity(Self::SIZE), synced: true, } } /// Attempt to build an RX queue, that is synchronized to the connection pool. /// Note: the resulting queue may still be desynchronized, if there are too many connections /// that have pending RX data. In that case, the muxer will first drain this queue, and /// then try again to build a synchronized one. pub fn from_conn_map(conn_map: &HashMap) -> Self { let mut q = VecDeque::new(); let mut synced = true; for (key, conn) in conn_map.iter() { if !conn.has_pending_rx() { continue; } if q.len() >= Self::SIZE { synced = false; break; } q.push_back(MuxerRx::ConnRx(*key)); } Self { q, synced } } /// Push a new RX item to the queue. /// /// A push will fail when: /// - trying to push a connection key onto an out-of-sync, or full queue; or /// - trying to push an RST onto a queue already full of RSTs. /// /// RSTs take precedence over connections, because connections can always be queried for /// pending RX data later. Aside from this queue, there is no other storage for RSTs, so /// failing to push one means that we have to drop the packet. /// /// Returns: /// - `true` if the new item has been successfully queued; or /// - `false` if there was no room left in the queue. pub fn push(&mut self, rx: MuxerRx) -> bool { // Pushing to a non-full, synchronized queue will always succeed. if self.is_synced() && !self.is_full() { self.q.push_back(rx); return true; } match rx { MuxerRx::RstPkt { .. } => { // If we just failed to push an RST packet, we'll look through the queue, trying to // find a connection key that we could evict. This way, the queue does lose sync, // but we don't drop any packets. for qi in self.q.iter_mut().rev() { if let MuxerRx::ConnRx(_) = qi { *qi = rx; self.synced = false; return true; } } } MuxerRx::ConnRx(_) => { self.synced = false; } }; false } /// Peek into the front of the queue. pub fn peek(&self) -> Option { self.q.front().copied() } /// Pop an RX item from the front of the queue. pub fn pop(&mut self) -> Option { self.q.pop_front() } /// Check if the RX queue is synchronized with the connection pool. pub fn is_synced(&self) -> bool { self.synced } /// Get the total number of items in the queue. pub fn len(&self) -> usize { self.q.len() } /// Check if the queue is empty. pub fn is_empty(&self) -> bool { self.len() == 0 } /// Check if the queue is full. pub fn is_full(&self) -> bool { self.len() == Self::SIZE } } ================================================ FILE: src/vmm/src/dumbo/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Provides helper logic for parsing and writing protocol data units, and minimalist //! implementations of a TCP listener, a TCP connection, and an HTTP/1.1 server. pub mod pdu; pub mod tcp; use std::ops::Index; pub use crate::dumbo::pdu::arp::{ETH_IPV4_FRAME_LEN, EthIPv4ArpFrame}; pub use crate::dumbo::pdu::ethernet::{ ETHERTYPE_ARP, ETHERTYPE_IPV4, EthernetFrame, PAYLOAD_OFFSET as ETHERNET_PAYLOAD_OFFSET, }; pub use crate::dumbo::pdu::ipv4::{IPv4Packet, PROTOCOL_TCP, PROTOCOL_UDP}; use crate::utils::net::mac::MacAddr; /// Represents a generalization of a borrowed `[u8]` slice. #[allow(clippy::len_without_is_empty)] pub trait ByteBuffer: Index { /// Returns the length of the buffer. fn len(&self) -> usize; /// Reads `buf.len()` bytes from `self` into `buf`, starting at `offset`. /// /// # Panics /// /// Panics if `offset + buf.len()` > `self.len()`. fn read_to_slice(&self, offset: usize, buf: &mut [u8]); } impl ByteBuffer for [u8] { #[inline] fn len(&self) -> usize { self.len() } #[inline] fn read_to_slice(&self, offset: usize, buf: &mut [u8]) { let buf_len = buf.len(); buf.copy_from_slice(&self[offset..offset + buf_len]); } } #[cfg(test)] mod tests { use std::fmt::Debug; use super::*; fn bb_len(buf: &T) -> usize { buf.len() } fn bb_is_empty(buf: &T) -> bool { buf.len() == 0 } fn bb_read_from_1(src: &T, dst: &mut [u8]) { src.read_to_slice(1, dst); } #[test] fn test_u8_byte_buffer() { let a = [1u8, 2, 3]; let mut b = [0u8; 2]; assert_eq!(bb_len(a.as_ref()), a.len()); assert!(!bb_is_empty(a.as_ref())); bb_read_from_1(a.as_ref(), b.as_mut()); assert_eq!(b, [2, 3]); } } ================================================ FILE: src/vmm/src/dumbo/pdu/arp.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Contains logic that helps with handling ARP frames over Ethernet, which encapsulate requests //! or replies related to IPv4 addresses. //! //! A more detailed view of an ARP frame can be found [here]. //! //! [here]: https://en.wikipedia.org/wiki/Address_Resolution_Protocol use std::convert::From; use std::fmt::Debug; use std::net::Ipv4Addr; use super::bytes::{InnerBytes, NetworkBytes, NetworkBytesMut}; use super::ethernet::{self, ETHERTYPE_IPV4}; use crate::utils::net::mac::{MAC_ADDR_LEN, MacAddr}; /// ARP Request operation pub const OPER_REQUEST: u16 = 0x0001; /// ARP Reply operation pub const OPER_REPLY: u16 = 0x0002; /// ARP is for Ethernet hardware pub const HTYPE_ETHERNET: u16 = 0x0001; /// The length of an ARP frame for IPv4 over Ethernet. pub const ETH_IPV4_FRAME_LEN: usize = 28; const HTYPE_OFFSET: usize = 0; const PTYPE_OFFSET: usize = 2; const HLEN_OFFSET: usize = 4; const PLEN_OFFSET: usize = 5; const OPER_OFFSET: usize = 6; const SHA_OFFSET: usize = 8; // The following constants are specific to ARP requests/responses // associated with IPv4 over Ethernet. const ETH_IPV4_SPA_OFFSET: usize = 14; const ETH_IPV4_THA_OFFSET: usize = 18; const ETH_IPV4_TPA_OFFSET: usize = 24; const IPV4_ADDR_LEN: u8 = 4; /// Represents errors which may occur while parsing or writing a frame. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum ArpError { /// Invalid hardware address length. HLen, /// Invalid hardware type. HType, /// Invalid operation. Operation, /// Invalid protocol address length. PLen, /// Invalid protocol type. PType, /// The provided slice does not fit the size of a frame. SliceExactLen, } /// The inner bytes will be interpreted as an ARP frame. /// /// ARP is a generic protocol as far as data /// link layer and network layer protocols go, but this particular implementation is concerned with /// ARP frames related to IPv4 over Ethernet. #[derive(Debug)] pub struct EthIPv4ArpFrame<'a, T: 'a> { bytes: InnerBytes<'a, T>, } #[allow(clippy::len_without_is_empty)] impl EthIPv4ArpFrame<'_, T> { /// Interprets the given bytes as an ARP frame, without doing any validity checks beforehand. /// /// # Panics /// /// This method does not panic, but further method calls on the resulting object may panic if /// `bytes` contains invalid input. #[inline] pub fn from_bytes_unchecked(bytes: T) -> Self { EthIPv4ArpFrame { bytes: InnerBytes::new(bytes), } } /// Tries to interpret a byte slice as a valid IPv4 over Ethernet ARP request. /// /// If no error occurs, it guarantees accessor methods (which make use of various `_unchecked` /// functions) are safe to call on the result, because all predefined offsets will be valid. pub fn request_from_bytes(bytes: T) -> Result { // This kind of frame has a fixed length, so we know what to expect. if bytes.len() != ETH_IPV4_FRAME_LEN { return Err(ArpError::SliceExactLen); } let maybe = EthIPv4ArpFrame::from_bytes_unchecked(bytes); if maybe.htype() != HTYPE_ETHERNET { return Err(ArpError::HType); } if maybe.ptype() != ETHERTYPE_IPV4 { return Err(ArpError::PType); } // We could theoretically skip the hlen and plen checks, since they are kinda implicit. if maybe.hlen() != MAC_ADDR_LEN { return Err(ArpError::HLen); } if maybe.plen() != IPV4_ADDR_LEN { return Err(ArpError::PLen); } if maybe.operation() != OPER_REQUEST { return Err(ArpError::Operation); } Ok(maybe) } /// Returns the hardware type of the frame. #[inline] pub fn htype(&self) -> u16 { self.bytes.ntohs_unchecked(HTYPE_OFFSET) } /// Returns the protocol type of the frame. #[inline] pub fn ptype(&self) -> u16 { self.bytes.ntohs_unchecked(PTYPE_OFFSET) } /// Returns the hardware address length of the frame. #[inline] pub fn hlen(&self) -> u8 { self.bytes[HLEN_OFFSET] } /// Returns the protocol address length of the frame. #[inline] pub fn plen(&self) -> u8 { self.bytes[PLEN_OFFSET] } /// Returns the type of operation within the frame. #[inline] pub fn operation(&self) -> u16 { self.bytes.ntohs_unchecked(OPER_OFFSET) } /// Returns the sender hardware address. #[inline] pub fn sha(&self) -> MacAddr { MacAddr::from_bytes_unchecked(&self.bytes[SHA_OFFSET..ETH_IPV4_SPA_OFFSET]) } /// Returns the sender protocol address. #[inline] pub fn spa(&self) -> Ipv4Addr { Ipv4Addr::from(self.bytes.ntohl_unchecked(ETH_IPV4_SPA_OFFSET)) } /// Returns the target hardware address. #[inline] pub fn tha(&self) -> MacAddr { MacAddr::from_bytes_unchecked(&self.bytes[ETH_IPV4_THA_OFFSET..ETH_IPV4_TPA_OFFSET]) } /// Returns the target protocol address. #[inline] pub fn tpa(&self) -> Ipv4Addr { Ipv4Addr::from(self.bytes.ntohl_unchecked(ETH_IPV4_TPA_OFFSET)) } /// Returns the length of the frame. #[inline] pub fn len(&self) -> usize { // This might as well return ETH_IPV4_FRAME_LEN directly, since we check this is the actual // length in request_from_bytes(). For some reason it seems nicer leaving it as is. self.bytes.len() } } impl EthIPv4ArpFrame<'_, T> { #[allow(clippy::too_many_arguments)] fn write_raw( buf: T, htype: u16, ptype: u16, hlen: u8, plen: u8, operation: u16, sha: MacAddr, spa: Ipv4Addr, tha: MacAddr, tpa: Ipv4Addr, ) -> Result { if buf.len() != ETH_IPV4_FRAME_LEN { return Err(ArpError::SliceExactLen); } // This is ok, because we've checked the length of the slice. let mut frame = EthIPv4ArpFrame::from_bytes_unchecked(buf); frame.set_htype(htype); frame.set_ptype(ptype); frame.set_hlen(hlen); frame.set_plen(plen); frame.set_operation(operation); frame.set_sha(sha); frame.set_spa(spa); frame.set_tha(tha); frame.set_tpa(tpa); Ok(frame) } /// Attempts to write an ARP request to `buf`, based on the specified hardware and protocol /// addresses. #[inline] pub fn write_request( buf: T, sha: MacAddr, spa: Ipv4Addr, tha: MacAddr, tpa: Ipv4Addr, ) -> Result { Self::write_raw( buf, HTYPE_ETHERNET, ETHERTYPE_IPV4, MAC_ADDR_LEN, IPV4_ADDR_LEN, OPER_REQUEST, sha, spa, tha, tpa, ) } /// Attempts to write an ARP reply to `buf`, based on the specified hardware and protocol /// addresses. #[inline] pub fn write_reply( buf: T, sha: MacAddr, spa: Ipv4Addr, tha: MacAddr, tpa: Ipv4Addr, ) -> Result { Self::write_raw( buf, HTYPE_ETHERNET, ETHERTYPE_IPV4, MAC_ADDR_LEN, IPV4_ADDR_LEN, OPER_REPLY, sha, spa, tha, tpa, ) } /// Sets the hardware type of the frame. #[inline] pub fn set_htype(&mut self, value: u16) { self.bytes.htons_unchecked(HTYPE_OFFSET, value); } /// Sets the protocol type of the frame. #[inline] pub fn set_ptype(&mut self, value: u16) { self.bytes.htons_unchecked(PTYPE_OFFSET, value); } /// Sets the hardware address length of the frame. #[inline] pub fn set_hlen(&mut self, value: u8) { self.bytes[HLEN_OFFSET] = value; } /// Sets the protocol address length of the frame. #[inline] pub fn set_plen(&mut self, value: u8) { self.bytes[PLEN_OFFSET] = value; } /// Sets the operation within the frame. #[inline] pub fn set_operation(&mut self, value: u16) { self.bytes.htons_unchecked(OPER_OFFSET, value); } /// Sets the sender hardware address. #[inline] pub fn set_sha(&mut self, addr: MacAddr) { self.bytes[SHA_OFFSET..ETH_IPV4_SPA_OFFSET].copy_from_slice(addr.get_bytes()); } /// Sets the sender protocol address. #[inline] pub fn set_spa(&mut self, addr: Ipv4Addr) { self.bytes .htonl_unchecked(ETH_IPV4_SPA_OFFSET, u32::from(addr)); } /// Sets the target hardware address. #[inline] pub fn set_tha(&mut self, addr: MacAddr) { self.bytes[ETH_IPV4_THA_OFFSET..ETH_IPV4_TPA_OFFSET].copy_from_slice(addr.get_bytes()); } /// Sets the target protocol address. #[inline] pub fn set_tpa(&mut self, addr: Ipv4Addr) { self.bytes .htonl_unchecked(ETH_IPV4_TPA_OFFSET, u32::from(addr)); } } /// This function checks if `buf` may hold an Ethernet frame which encapsulates an /// `EthIPv4ArpRequest` for the given address. Cannot produce false negatives. #[inline] pub fn test_speculative_tpa(buf: &[u8], addr: Ipv4Addr) -> bool { // The unchecked methods are safe because we actually check the buffer length beforehand. if buf.len() >= ethernet::PAYLOAD_OFFSET + ETH_IPV4_FRAME_LEN { let bytes = &buf[ethernet::PAYLOAD_OFFSET..]; if EthIPv4ArpFrame::from_bytes_unchecked(bytes).tpa() == addr { return true; } } false } #[cfg(test)] mod tests { use std::str::FromStr; use super::*; #[test] fn test_eth_ipv4_arp_frame() { let mut a = [0u8; 1000]; let mut bad_array = [0u8; 1]; let sha = MacAddr::from_str("01:23:45:67:89:ab").unwrap(); let tha = MacAddr::from_str("cd:ef:01:23:45:67").unwrap(); let spa = Ipv4Addr::new(10, 1, 2, 3); let tpa = Ipv4Addr::new(10, 4, 5, 6); // Slice is too short. assert_eq!( EthIPv4ArpFrame::request_from_bytes(bad_array.as_ref()).unwrap_err(), ArpError::SliceExactLen ); // Slice is too short. assert_eq!( EthIPv4ArpFrame::write_reply(bad_array.as_mut(), sha, spa, tha, tpa).unwrap_err(), ArpError::SliceExactLen ); // Slice is too long. assert_eq!( EthIPv4ArpFrame::write_reply(a.as_mut(), sha, spa, tha, tpa).unwrap_err(), ArpError::SliceExactLen ); // We write a valid ARP reply to the specified slice. { let f = EthIPv4ArpFrame::write_reply(&mut a[..ETH_IPV4_FRAME_LEN], sha, spa, tha, tpa) .unwrap(); // This is a bit redundant given the following tests, but assert away! assert_eq!(f.htype(), HTYPE_ETHERNET); assert_eq!(f.ptype(), ETHERTYPE_IPV4); assert_eq!(f.hlen(), MAC_ADDR_LEN); assert_eq!(f.plen(), IPV4_ADDR_LEN); assert_eq!(f.operation(), OPER_REPLY); assert_eq!(f.sha(), sha); assert_eq!(f.spa(), spa); assert_eq!(f.tha(), tha); assert_eq!(f.tpa(), tpa); } // Now let's try to parse a request. // Slice is too long. assert_eq!( EthIPv4ArpFrame::request_from_bytes(a.as_ref()).unwrap_err(), ArpError::SliceExactLen ); // The length is fine now, but the operation is a reply instead of request. assert_eq!( EthIPv4ArpFrame::request_from_bytes(&a[..ETH_IPV4_FRAME_LEN]).unwrap_err(), ArpError::Operation ); // Various requests let requests = [ ( HTYPE_ETHERNET, ETHERTYPE_IPV4, MAC_ADDR_LEN, IPV4_ADDR_LEN, None, ), // Valid request ( HTYPE_ETHERNET + 1, ETHERTYPE_IPV4, MAC_ADDR_LEN, IPV4_ADDR_LEN, Some(ArpError::HType), ), // Invalid htype ( HTYPE_ETHERNET, ETHERTYPE_IPV4 + 1, MAC_ADDR_LEN, IPV4_ADDR_LEN, Some(ArpError::PType), ), // Invalid ptype ( HTYPE_ETHERNET, ETHERTYPE_IPV4, MAC_ADDR_LEN + 1, IPV4_ADDR_LEN, Some(ArpError::HLen), ), // Invalid hlen ( HTYPE_ETHERNET, ETHERTYPE_IPV4, MAC_ADDR_LEN, IPV4_ADDR_LEN + 1, Some(ArpError::PLen), ), // Invalid plen ]; for (htype, ptype, hlen, plen, err) in requests.iter() { EthIPv4ArpFrame::write_raw( &mut a[..ETH_IPV4_FRAME_LEN], *htype, *ptype, *hlen, *plen, OPER_REQUEST, sha, spa, tha, tpa, ) .unwrap(); match err { None => { EthIPv4ArpFrame::request_from_bytes(&a[..ETH_IPV4_FRAME_LEN]).unwrap(); } Some(arp_error) => assert_eq!( EthIPv4ArpFrame::request_from_bytes(&a[..ETH_IPV4_FRAME_LEN]).unwrap_err(), *arp_error ), } } } #[test] fn test_speculative() { let mut a = [0u8; 1000]; let addr = Ipv4Addr::new(1, 2, 3, 4); assert!(!test_speculative_tpa(a.as_ref(), addr)); { let mac = MacAddr::from_bytes_unchecked(&[0; 6]); let mut eth = crate::dumbo::pdu::ethernet::EthernetFrame::write_incomplete( a.as_mut(), mac, mac, 0, ) .unwrap(); let mut arp = EthIPv4ArpFrame::from_bytes_unchecked(eth.inner_mut().payload_mut()); arp.set_tpa(addr); } assert!(test_speculative_tpa(a.as_ref(), addr)); // Let's also test for a very small buffer. let small = [0u8; 1]; assert!(!test_speculative_tpa(small.as_ref(), addr)); } } ================================================ FILE: src/vmm/src/dumbo/pdu/bytes.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines traits which allow byte slices to be interpreted as sequences of bytes that stand for //! different values packed together using network byte ordering (such as network packets). //! //! The main use of these traits is reading and writing numerical values at a given offset in the //! underlying slice. Why are they needed? Given a byte slice, there are two approaches to //! reading/writing packet data that come to mind: //! //! (1) Have structs which represent the potential contents of each packet type, unsafely cast the //! bytes slice to a struct pointer/reference (after doing the required checks), and then use the //! newly obtained pointer/reference to access the data. //! //! (2) Access fields by reading bytes at the appropriate offset from the original slice. //! //! The first solution looks more appealing at first, but it requires some unsafe code. Moreover, //! de-referencing unaligned pointers or references is considered undefined behaviour in Rust, and //! it's not clear whether this undermines the approach or not. Until any further developments, //! the second option is used, based on the `NetworkBytes` implementation. //! //! What's with the `T: Deref`? Is there really a need to be that generic? //! Not really. The logic in this crate currently expects to work with byte slices (`&[u8]` and //! `&mut [u8]`), but there's a significant inconvenience. Consider `NetworkBytes` is defined as: //! //! ``` //! struct NetworkBytes<'a> { //! bytes: &'a [u8], //! } //! ``` //! //! This is perfectly fine for reading values from immutable slices, but what about writing values? //! Implementing methods such as `fn write_something(&mut self)`, is not really possible, because //! even with a mutable reference to `self`, `self.bytes` is still an immutable slice. On the other //! hand, `NetworkBytes` can be defined as: //! //! ``` //! struct NetworkBytes<'a> { //! bytes: &'a mut [u8], //! } //! ``` //! //! This allows both reads and writes, but requires a mutable reference at all times (and it looks //! weird to use one for immutable operations). This is where one interesting feature of Rust //! comes in handy; given a type `Something`, it's possible to implement different features //! depending on trait bounds on `T`. For `NetworkBytes`, if `T` implements `Deref` //! (which `&[u8]` does), read operations are possible to define. If `T` implements //! `DerefMut`, write operations are also a possibility. Since //! `DerefMut` implies `Deref`, `NetworkBytes<&mut [u8]>` implements //! both read and write operations. //! //! This can theoretically lead to code bloat when using both `&[u8]` and `&mut [u8]` (as opposed //! to just `&mut [u8]`), but most calls should be inlined anyway, so it probably doesn't matter //! in the end. `NetworkBytes` itself implements `Deref` (and `DerefMut` when `T: DerefMut`), so //! this line of reasoning can be extended to structs which represent different kinds of protocol //! data units (such as IPv4 packets, Ethernet frames, etc.). //! //! Finally, why `Deref` and not something like `AsRef`? The answer is `Deref` coercion, which in //! this case means that a `NetworkBytes` value will automatically coerce to `&[u8]` //! (or `&mut [u8]`), without having to go through an explicit `as_ref()` call, which makes the //! code easier to work with. //! //! Method names have the **unchecked** suffix as a reminder they do not check whether the //! read/write goes beyond the boundaries of a slice. Callers must take the necessary precautions //! to avoid panics. use std::fmt::Debug; use std::marker::PhantomData; use std::ops::{Deref, DerefMut}; use crate::utils::byte_order; /// Represents an immutable view into a sequence of bytes which stands for different values packed /// together using network byte ordering. pub trait NetworkBytes: Deref { /// Reads an `u16` value from the specified offset, converting it to host byte ordering. /// /// # Panics /// /// This method will panic if `offset` is invalid. #[inline] fn ntohs_unchecked(&self, offset: usize) -> u16 { // The unwrap() can fail when the offset is invalid, or there aren't enough bytes (2 in this // case) left until the end of the slice. The caller must ensure this doesn't happen (hence // the `unchecked` suffix). byte_order::read_be_u16(&self[offset..]) } /// Reads an `u32` value from the specified offset, converting it to host byte ordering. /// /// # Panics /// /// This method will panic if `offset` is invalid. #[inline] fn ntohl_unchecked(&self, offset: usize) -> u32 { byte_order::read_be_u32(&self[offset..]) } /// Shrinks the current slice to the given `len`. /// /// Does not check whether `len` is actually smaller than `self.len()`. /// /// # Panics /// /// This method will panic if `len` is greater than `self.len()`. fn shrink_unchecked(&mut self, len: usize); } /// Offers mutable access to a sequence of bytes which stands for different values packed /// together using network byte ordering. pub trait NetworkBytesMut: NetworkBytes + DerefMut { /// Writes the given `u16` value at the specified `offset` using network byte ordering. /// /// # Panics /// /// If `value` cannot be written into `self` at the given `offset` /// (e.g. if `offset > self.len() - size_of::()`). #[inline] fn htons_unchecked(&mut self, offset: usize, value: u16) { assert!(offset <= self.len() - std::mem::size_of::()); byte_order::write_be_u16(&mut self[offset..], value) } /// Writes the given `u32` value at the specified `offset` using network byte ordering. /// /// # Panics /// /// If `value` cannot be written into `self` at the given `offset` /// (e.g. if `offset > self.len() - size_of::()`). #[inline] fn htonl_unchecked(&mut self, offset: usize, value: u32) { assert!(offset <= self.len() - std::mem::size_of::()); byte_order::write_be_u32(&mut self[offset..], value) } } impl NetworkBytes for &[u8] { #[inline] fn shrink_unchecked(&mut self, len: usize) { *self = &self[..len]; } } impl NetworkBytes for &mut [u8] { #[inline] fn shrink_unchecked(&mut self, len: usize) { *self = &mut std::mem::take(self)[..len]; } } impl NetworkBytesMut for &mut [u8] {} // This struct is used as a convenience for any type which contains a generic member implementing // NetworkBytes with a lifetime, so we don't have to also add the PhantomData member each time. We // use pub(super) here because we only want this to be usable by the child modules of `pdu`. #[derive(Debug)] pub(super) struct InnerBytes<'a, T: 'a> { bytes: T, phantom: PhantomData<&'a T>, } impl InnerBytes<'_, T> { /// Creates a new instance as a wrapper around `bytes`. #[inline] pub fn new(bytes: T) -> Self { InnerBytes { bytes, phantom: PhantomData, } } } impl + Debug> Deref for InnerBytes<'_, T> { type Target = [u8]; #[inline] fn deref(&self) -> &[u8] { self.bytes.deref() } } impl + Debug> DerefMut for InnerBytes<'_, T> { #[inline] fn deref_mut(&mut self) -> &mut [u8] { self.bytes.deref_mut() } } impl NetworkBytes for InnerBytes<'_, T> { #[inline] fn shrink_unchecked(&mut self, len: usize) { self.bytes.shrink_unchecked(len); } } impl NetworkBytesMut for InnerBytes<'_, T> {} #[cfg(test)] mod tests { use super::*; #[test] #[should_panic] fn test_htons_unchecked() { let mut buf = [u8::default(); std::mem::size_of::()]; let mut a = buf.as_mut(); a.htons_unchecked(1, u16::default()); } #[test] #[should_panic] fn test_htonl_unchecked() { let mut buf = [u8::default(); std::mem::size_of::()]; let mut a = buf.as_mut(); a.htonl_unchecked(1, u32::default()); } #[test] fn test_network_bytes() { let mut buf = [0u8; 1000]; { let mut a = buf.as_mut(); a.htons_unchecked(1, 123); a.htonl_unchecked(100, 1234); assert_eq!(a.ntohs_unchecked(1), 123); assert_eq!(a.ntohl_unchecked(100), 1234); a.shrink_unchecked(500); assert_eq!(a.len(), 500); assert_eq!(a.ntohs_unchecked(1), 123); assert_eq!(a.ntohl_unchecked(100), 1234); } { let mut b = buf.as_ref(); b.shrink_unchecked(500); assert_eq!(b.len(), 500); assert_eq!(b.ntohs_unchecked(1), 123); assert_eq!(b.ntohl_unchecked(100), 1234); } } } ================================================ FILE: src/vmm/src/dumbo/pdu/ethernet.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Contains support for parsing and writing Ethernet frames. Does not currently offer support for //! 802.1Q tags. use std::fmt::Debug; use super::Incomplete; use super::bytes::{InnerBytes, NetworkBytes, NetworkBytesMut}; use crate::dumbo::MacAddr; const DST_MAC_OFFSET: usize = 0; const SRC_MAC_OFFSET: usize = 6; const ETHERTYPE_OFFSET: usize = 12; // We don't support 802.1Q tags. // TODO: support 802.1Q tags?! If so, don't forget to change the speculative_test_* functions // for ARP and IPv4. /// Payload offset in an ethernet frame pub const PAYLOAD_OFFSET: usize = 14; /// Ethertype value for ARP frames. pub const ETHERTYPE_ARP: u16 = 0x0806; /// Ethertype value for IPv4 packets. pub const ETHERTYPE_IPV4: u16 = 0x0800; /// Describes the errors which may occur when handling Ethernet frames. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum EthernetError { /// The specified byte sequence is shorter than the Ethernet header length. SliceTooShort, } /// Interprets the inner bytes as an Ethernet frame. #[derive(Debug)] pub struct EthernetFrame<'a, T: 'a> { bytes: InnerBytes<'a, T>, } #[allow(clippy::len_without_is_empty)] impl EthernetFrame<'_, T> { /// Interprets `bytes` as an Ethernet frame without any validity checks. /// /// # Panics /// /// This method does not panic, but further method calls on the resulting object may panic if /// `bytes` contains invalid input. #[inline] pub fn from_bytes_unchecked(bytes: T) -> Self { EthernetFrame { bytes: InnerBytes::new(bytes), } } /// Checks whether the specified byte sequence can be interpreted as an Ethernet frame. #[inline] pub fn from_bytes(bytes: T) -> Result { if bytes.len() < PAYLOAD_OFFSET { return Err(EthernetError::SliceTooShort); } Ok(EthernetFrame::from_bytes_unchecked(bytes)) } /// Returns the destination MAC address. #[inline] pub fn dst_mac(&self) -> MacAddr { MacAddr::from_bytes_unchecked(&self.bytes[DST_MAC_OFFSET..SRC_MAC_OFFSET]) } /// Returns the source MAC address. #[inline] pub fn src_mac(&self) -> MacAddr { MacAddr::from_bytes_unchecked(&self.bytes[SRC_MAC_OFFSET..ETHERTYPE_OFFSET]) } /// Returns the ethertype of the frame. #[inline] pub fn ethertype(&self) -> u16 { self.bytes.ntohs_unchecked(ETHERTYPE_OFFSET) } /// Returns the offset of the payload within the frame. #[inline] pub fn payload_offset(&self) -> usize { PAYLOAD_OFFSET } /// Returns the payload of the frame as an `[&u8]` slice. #[inline] pub fn payload(&self) -> &[u8] { self.bytes.split_at(self.payload_offset()).1 } /// Returns the length of the frame. #[inline] pub fn len(&self) -> usize { self.bytes.len() } } impl EthernetFrame<'_, T> { /// Attempts to write an Ethernet frame using the given header fields to `buf`. fn new_with_header( buf: T, dst_mac: MacAddr, src_mac: MacAddr, ethertype: u16, ) -> Result { if buf.len() < PAYLOAD_OFFSET { return Err(EthernetError::SliceTooShort); } let mut frame = EthernetFrame::from_bytes_unchecked(buf); frame .set_dst_mac(dst_mac) .set_src_mac(src_mac) .set_ethertype(ethertype); Ok(frame) } /// Attempts to write an incomplete Ethernet frame (whose length is currently unknown) to `buf`, /// using the specified header fields. #[inline] pub fn write_incomplete( buf: T, dst_mac: MacAddr, src_mac: MacAddr, ethertype: u16, ) -> Result, EthernetError> { Ok(Incomplete::new(Self::new_with_header( buf, dst_mac, src_mac, ethertype, )?)) } /// Sets the destination MAC address. #[inline] pub fn set_dst_mac(&mut self, addr: MacAddr) -> &mut Self { self.bytes[DST_MAC_OFFSET..SRC_MAC_OFFSET].copy_from_slice(addr.get_bytes()); self } /// Sets the source MAC address. #[inline] pub fn set_src_mac(&mut self, addr: MacAddr) -> &mut Self { self.bytes[SRC_MAC_OFFSET..ETHERTYPE_OFFSET].copy_from_slice(addr.get_bytes()); self } /// Sets the ethertype of the frame. #[inline] pub fn set_ethertype(&mut self, value: u16) -> &mut Self { self.bytes.htons_unchecked(ETHERTYPE_OFFSET, value); self } /// Returns the payload of the frame as a `&mut [u8]` slice. #[inline] pub fn payload_mut(&mut self) -> &mut [u8] { // We need this let to avoid confusing the borrow checker. let offset = self.payload_offset(); self.bytes.split_at_mut(offset).1 } } impl<'a, T: NetworkBytes + Debug> Incomplete> { /// Completes the inner frame by shrinking it to its actual length. /// /// # Panics /// /// This method panics if `len` is greater than the length of the inner byte sequence. #[inline] pub fn with_payload_len_unchecked(mut self, payload_len: usize) -> EthernetFrame<'a, T> { let payload_offset = self.inner.payload_offset(); self.inner .bytes .shrink_unchecked(payload_offset + payload_len); self.inner } } #[cfg(test)] mod tests { use std::str::FromStr; use super::*; #[test] fn test_ethernet_frame() { let mut a = [0u8; 10000]; let mut bad_array = [0u8; 1]; let dst_mac = MacAddr::from_str("01:23:45:67:89:ab").unwrap(); let src_mac = MacAddr::from_str("cd:ef:01:23:45:67").unwrap(); let ethertype = 1289; assert_eq!( EthernetFrame::from_bytes(bad_array.as_ref()).unwrap_err(), EthernetError::SliceTooShort ); assert_eq!( EthernetFrame::new_with_header(bad_array.as_mut(), dst_mac, src_mac, ethertype) .unwrap_err(), EthernetError::SliceTooShort ); { let mut f1 = EthernetFrame::new_with_header(a.as_mut(), dst_mac, src_mac, ethertype).unwrap(); assert_eq!(f1.dst_mac(), dst_mac); assert_eq!(f1.src_mac(), src_mac); assert_eq!(f1.ethertype(), ethertype); f1.payload_mut()[1] = 132; } { let f2 = EthernetFrame::from_bytes(a.as_ref()).unwrap(); assert_eq!(f2.dst_mac(), dst_mac); assert_eq!(f2.src_mac(), src_mac); assert_eq!(f2.ethertype(), ethertype); assert_eq!(f2.payload()[1], 132); assert_eq!(f2.len(), f2.bytes.len()); } { let f3 = EthernetFrame::write_incomplete(a.as_mut(), dst_mac, src_mac, ethertype).unwrap(); let f3_complete = f3.with_payload_len_unchecked(123); assert_eq!(f3_complete.len(), f3_complete.payload_offset() + 123); } } } #[cfg(kani)] #[allow(dead_code)] // Avoid warning when using stubs. mod kani_proofs { use super::*; use crate::utils::net::mac::MAC_ADDR_LEN; // See the Virtual I/O Device (VIRTIO) specification, Sec. 5.1.6.2. // https://docs.oasis-open.org/virtio/virtio/v1.2/csd01/virtio-v1.2-csd01.pdf pub const MAX_FRAME_SIZE: usize = 1514; const MAC_ADDR_LEN_USIZE: usize = MAC_ADDR_LEN as usize; impl<'a, T: NetworkBytesMut + Debug> EthernetFrame<'a, T> { fn is_valid(&self) -> bool { self.len() >= PAYLOAD_OFFSET } } // We consider the MMDS Network Stack spec for all postconditions in the harnesses. // See https://github.com/firecracker-microvm/firecracker/blob/main/docs/mmds/mmds-design.md#mmds-network-stack #[kani::proof] fn verify_from_bytes_unchecked() { // Create non-deterministic stream of bytes up to MAX_FRAME_SIZE let mut bytes: [u8; MAX_FRAME_SIZE] = kani::Arbitrary::any_array::(); let slice_length = bytes.len(); // Verify from_bytes_unchecked let ethernet = EthernetFrame::from_bytes_unchecked(bytes.as_mut()); // Check for post-conditions assert_eq!(ethernet.len(), slice_length); assert!( !(ethernet.is_valid()) || (ethernet.payload().len() == slice_length - PAYLOAD_OFFSET) ); } #[kani::proof] fn verify_from_bytes() { // Create non-deterministic stream of bytes up to MAX_FRAME_SIZE let mut bytes: [u8; MAX_FRAME_SIZE] = kani::Arbitrary::any_array::(); let slice_length = bytes.len(); // Verify from_bytes let ethernet = EthernetFrame::from_bytes(bytes.as_mut()); // Check for post-conditions if slice_length >= PAYLOAD_OFFSET { let ethernet = ethernet.unwrap(); assert!(ethernet.is_valid()); assert_eq!(ethernet.len(), slice_length); assert_eq!(ethernet.payload().len(), slice_length - PAYLOAD_OFFSET); } else { ethernet.unwrap_err(); } } #[kani::proof] fn verify_dst_mac() { // Create non-deterministic stream of bytes up to MAX_FRAME_SIZE let mut bytes: [u8; MAX_FRAME_SIZE] = kani::Arbitrary::any_array::(); // Create valid non-deterministic ethernet let ethernet = EthernetFrame::from_bytes(bytes.as_mut()); kani::assume(ethernet.is_ok()); let mut ethernet = ethernet.unwrap(); // Verify set_dst_mac let mac_bytes: [u8; MAC_ADDR_LEN as usize] = kani::any(); let dst_mac = MacAddr::from(mac_bytes); ethernet.set_dst_mac(dst_mac); // Verify dst_mac let dst_addr = EthernetFrame::dst_mac(ðernet); // Check for post-conditions // MAC addresses should always have 48 bits assert_eq!(dst_addr.get_bytes().len(), MAC_ADDR_LEN as usize); // Check duality between set_dst_mac and dst_mac operations let i: usize = kani::any(); kani::assume(i < mac_bytes.len()); assert_eq!(mac_bytes[i], dst_addr.get_bytes()[i]); } #[kani::proof] fn verify_src_mac() { // Create non-deterministic stream of bytes up to MAX_FRAME_SIZE let mut bytes: [u8; MAX_FRAME_SIZE] = kani::Arbitrary::any_array::(); // Create valid non-deterministic ethernet let ethernet = EthernetFrame::from_bytes(bytes.as_mut()); kani::assume(ethernet.is_ok()); let mut ethernet = ethernet.unwrap(); // Verify set_src_mac let mac_bytes: [u8; MAC_ADDR_LEN as usize] = kani::any(); let src_mac = MacAddr::from(mac_bytes); ethernet.set_src_mac(src_mac); // Verify src_mac let src_addr = EthernetFrame::src_mac(ðernet); // Check for post-conditions // MAC addresses should always have 48 bits assert_eq!(src_addr.get_bytes().len(), MAC_ADDR_LEN as usize); // Check duality between set_src_mac and src_mac operations let i: usize = kani::any(); kani::assume(i < mac_bytes.len()); assert_eq!(mac_bytes[i], src_addr.get_bytes()[i]); } #[kani::proof] fn verify_src_mac_isolation() { // Create non-deterministic stream of bytes up to MAX_FRAME_SIZE let mut bytes: [u8; MAX_FRAME_SIZE] = kani::Arbitrary::any_array::(); // Create valid non-deterministic ethernet let ethernet = EthernetFrame::from_bytes(bytes.as_mut()); kani::assume(ethernet.is_ok()); let mut ethernet = ethernet.unwrap(); // Verify set_src_mac let mac_bytes: [u8; MAC_ADDR_LEN as usize] = kani::any(); let src_mac = MacAddr::from(mac_bytes); ethernet.set_src_mac(src_mac); let payload_offset = ethernet.payload_offset(); if kani::any() { let dst_mac_bytes: [u8; MAC_ADDR_LEN as usize] = kani::any(); let dst_mac = MacAddr::from(dst_mac_bytes); ethernet.set_dst_mac(dst_mac); } if kani::any() { let ethertype_in: u16 = kani::any(); ethernet.set_ethertype(ethertype_in); } // Payload info doesn't change assert_eq!(ethernet.payload_offset(), payload_offset); // Verify src_mac let src_addr = EthernetFrame::src_mac(ðernet); // Check for post-conditions // MAC addresses should always have 48 bits assert_eq!(src_addr.get_bytes().len(), MAC_ADDR_LEN as usize); // Check duality between set_src_mac and src_mac operations let i: usize = kani::any(); kani::assume(i < mac_bytes.len()); assert_eq!(mac_bytes[i], src_addr.get_bytes()[i]); } #[kani::proof] fn verify_ethertype() { // Create non-deterministic stream of bytes up to MAX_FRAME_SIZE let mut bytes: [u8; MAX_FRAME_SIZE] = kani::Arbitrary::any_array::(); // Create valid non-deterministic ethernet let ethernet = EthernetFrame::from_bytes(bytes.as_mut()); kani::assume(ethernet.is_ok()); let mut ethernet = ethernet.unwrap(); // Verify set_ethertype let ethertype_in: u16 = kani::any(); ethernet.set_ethertype(ethertype_in); // Verify ethertype let ethertype_out = ethernet.ethertype(); // Check for post-conditions // Check duality between set_ethertype and ethertype operations assert_eq!(ethertype_in, ethertype_out); } #[kani::proof] #[kani::unwind(1515)] fn verify_payload() { // Create non-deterministic stream of bytes up to MAX_FRAME_SIZE let mut bytes: [u8; MAX_FRAME_SIZE] = kani::Arbitrary::any_array::(); // Create valid non-deterministic ethernet let ethernet = EthernetFrame::from_bytes(bytes.as_mut()); kani::assume(ethernet.is_ok()); let ethernet = ethernet.unwrap(); // Verify payload_offset let payload_offset = ethernet.payload_offset(); // Verify payload() let payload = ethernet.payload(); // Verify payload_mut let payload_mut = ethernet.payload(); // Check for post-conditions // Check payload_offset value assert_eq!(payload_offset, PAYLOAD_OFFSET); // Check equivalence assert_eq!(payload, payload_mut); } #[kani::proof] fn verify_new_with_header() { // Create non-deterministic stream of bytes up to MAX_FRAME_SIZE let mut bytes: [u8; MAX_FRAME_SIZE] = kani::Arbitrary::any_array::(); let bytes_length = bytes.len(); // Create valid non-deterministic dst_mac let dst_mac_bytes: [u8; MAC_ADDR_LEN as usize] = kani::Arbitrary::any_array::(); let dst_mac = MacAddr::from(dst_mac_bytes); // Create valid non-deterministic src_mac let src_mac_bytes: [u8; MAC_ADDR_LEN as usize] = kani::Arbitrary::any_array::(); let src_mac = MacAddr::from(src_mac_bytes); // Create valid non-deterministic ethertype let ethertype: u16 = kani::any(); // Verify new_with_header let frame = EthernetFrame::new_with_header(bytes.as_mut(), dst_mac, src_mac, ethertype).unwrap(); // Check for post-conditions assert_eq!(frame.dst_mac(), dst_mac); assert_eq!(frame.src_mac(), src_mac); assert_eq!(frame.ethertype(), ethertype); assert_eq!(frame.len(), bytes_length); assert!(frame.is_valid() && (frame.payload().len() == bytes_length - PAYLOAD_OFFSET)); } #[kani::proof] fn verify_write_incomplete() { // Create non-deterministic stream of bytes up to MAX_FRAME_SIZE let mut bytes: [u8; MAX_FRAME_SIZE] = kani::Arbitrary::any_array::(); // Create valid non-deterministic dst_mac let dst_mac_bytes: [u8; MAC_ADDR_LEN as usize] = kani::Arbitrary::any_array::(); let dst_mac = MacAddr::from(dst_mac_bytes); // Create valid non-deterministic src_mac let src_mac_bytes: [u8; MAC_ADDR_LEN as usize] = kani::Arbitrary::any_array::(); let src_mac = MacAddr::from(src_mac_bytes); // Create valid non-deterministic ethertype let ethertype: u16 = kani::any(); // Verify write_incomplete let incomplete_frame = EthernetFrame::write_incomplete(bytes.as_mut(), dst_mac, src_mac, ethertype).unwrap(); // Check for post-conditions assert_eq!(incomplete_frame.inner.dst_mac(), dst_mac); assert_eq!(incomplete_frame.inner.src_mac(), src_mac); assert_eq!(incomplete_frame.inner.ethertype(), ethertype); } #[kani::proof] #[kani::solver(cadical)] fn verify_with_payload_len_unchecked() { // Create non-deterministic stream of bytes up to MAX_FRAME_SIZE let mut bytes: [u8; MAX_FRAME_SIZE] = kani::Arbitrary::any_array::(); // Create valid non-deterministic dst_mac let dst_mac_bytes: [u8; MAC_ADDR_LEN as usize] = kani::Arbitrary::any_array::(); let dst_mac = MacAddr::from(dst_mac_bytes); // Create valid non-deterministic src_mac let src_mac_bytes: [u8; MAC_ADDR_LEN as usize] = kani::Arbitrary::any_array::(); let src_mac = MacAddr::from(src_mac_bytes); // Create valid non-deterministic ethertype let ethertype: u16 = kani::any(); // Create a non-deterministic incomplete frame let incomplete_frame = EthernetFrame::write_incomplete(bytes.as_mut(), dst_mac, src_mac, ethertype).unwrap(); let incomplete_frame_payload_offset = incomplete_frame.inner.payload_offset(); let incomplete_frame_len = incomplete_frame.inner.len(); // Create a non-deterministic payload_len let payload_len: usize = kani::any(); kani::assume(payload_len <= incomplete_frame_len - incomplete_frame_payload_offset); // Verify with_payload_len_unchecked let unchecked_frame = incomplete_frame.with_payload_len_unchecked(payload_len); // Check for post-conditions assert!(unchecked_frame.is_valid()); assert_eq!(unchecked_frame.dst_mac(), dst_mac); assert_eq!(unchecked_frame.src_mac(), src_mac); assert_eq!(unchecked_frame.ethertype(), ethertype); } } ================================================ FILE: src/vmm/src/dumbo/pdu/ipv4.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Contains support for parsing and writing IPv4 packets. //! //! A picture of the IPv4 packet header can be found [here] (watch out for the MSB 0 bit numbering). //! //! [here]: https://en.wikipedia.org/wiki/IPv4#Packet_structure use std::convert::From; use std::fmt::Debug; use std::net::Ipv4Addr; use crate::dumbo::pdu::bytes::{InnerBytes, NetworkBytes, NetworkBytesMut}; use crate::dumbo::pdu::{Incomplete, ethernet}; const VERSION_AND_IHL_OFFSET: usize = 0; const DSCP_AND_ECN_OFFSET: usize = 1; const TOTAL_LEN_OFFSET: usize = 2; const IDENTIFICATION_OFFSET: usize = 4; const FLAGS_AND_FRAGMENTOFF_OFFSET: usize = 6; const TTL_OFFSET: usize = 8; const PROTOCOL_OFFSET: usize = 9; const HEADER_CHECKSUM_OFFSET: usize = 10; const SOURCE_ADDRESS_OFFSET: usize = 12; const DESTINATION_ADDRESS_OFFSET: usize = 16; const OPTIONS_OFFSET: u8 = 20; /// Indicates version 4 of the IP protocol pub const IPV4_VERSION: u8 = 0x04; /// Default TTL value pub const DEFAULT_TTL: u8 = 1; /// The IP protocol number associated with TCP. pub const PROTOCOL_TCP: u8 = 0x06; /// The IP protocol number associated with UDP. pub const PROTOCOL_UDP: u8 = 0x11; /// Describes the errors which may occur while handling IPv4 packets. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum Ipv4Error { /// The header checksum is invalid. Checksum, /// The header length is invalid. HeaderLen, /// The total length of the packet is invalid. InvalidTotalLen, /// The length of the given slice does not match the length of the packet. SliceExactLen, /// The length of the given slice is less than the IPv4 header length. SliceTooShort, /// The version header field is invalid. Version, } /// Interprets the inner bytes as an IPv4 packet. #[derive(Debug)] pub struct IPv4Packet<'a, T: 'a> { bytes: InnerBytes<'a, T>, } #[allow(clippy::len_without_is_empty)] impl IPv4Packet<'_, T> { /// Interpret `bytes` as an IPv4Packet without checking the validity of the header fields, and /// the length of the inner byte sequence. /// /// # Panics /// /// This method does not panic, but further method calls on the resulting object may panic if /// `bytes` contains invalid input. #[inline] pub fn from_bytes_unchecked(bytes: T) -> Self { IPv4Packet { bytes: InnerBytes::new(bytes), } } /// Attempts to interpret `bytes` as an IPv4 packet, checking the validity of the header fields /// and the length of the inner byte sequence. pub fn from_bytes(bytes: T, verify_checksum: bool) -> Result { let bytes_len = bytes.len(); if bytes_len < usize::from(OPTIONS_OFFSET) { return Err(Ipv4Error::SliceTooShort); } let packet = IPv4Packet::from_bytes_unchecked(bytes); let (version, header_len) = packet.version_and_header_len(); if version != IPV4_VERSION { return Err(Ipv4Error::Version); } let total_len = packet.total_len() as usize; if total_len < header_len.into() { return Err(Ipv4Error::InvalidTotalLen); } if total_len != bytes_len { return Err(Ipv4Error::SliceExactLen); } if header_len < OPTIONS_OFFSET { return Err(Ipv4Error::HeaderLen); } // We ignore the TTL field since only routers should care about it. An end host has no // reason really to discard an otherwise valid packet. if verify_checksum && packet.compute_checksum_unchecked(header_len.into()) != 0 { return Err(Ipv4Error::Checksum); } Ok(packet) } /// Returns the value of the `version` header field, and the header length. /// /// This method returns the actual length (in bytes) of the header, and not the value of the /// `ihl` header field). #[inline] pub fn version_and_header_len(&self) -> (u8, u8) { let x = self.bytes[VERSION_AND_IHL_OFFSET]; let ihl = x & 0x0f; let header_len = ihl << 2; (x >> 4, header_len) } /// Returns the packet header length (in bytes). #[inline] pub fn header_len(&self) -> u8 { let (_, header_len) = self.version_and_header_len(); header_len } /// Returns the values of the `dscp` and `ecn` header fields. #[inline] pub fn dscp_and_ecn(&self) -> (u8, u8) { let x = self.bytes[DSCP_AND_ECN_OFFSET]; (x >> 2, x & 0b11) } /// Returns the value of the 'total length' header field. #[inline] pub fn total_len(&self) -> u16 { self.bytes.ntohs_unchecked(TOTAL_LEN_OFFSET) } /// Returns the value of the `identification` header field. #[inline] pub fn identification(&self) -> u16 { self.bytes.ntohs_unchecked(IDENTIFICATION_OFFSET) } /// Returns the values of the `flags` and `fragment offset` header fields. #[inline] pub fn flags_and_fragment_offset(&self) -> (u8, u16) { let x = self.bytes.ntohs_unchecked(FLAGS_AND_FRAGMENTOFF_OFFSET); ((x >> 13) as u8, x & 0x1fff) } /// Returns the value of the `ttl` header field. #[inline] pub fn ttl(&self) -> u8 { self.bytes[TTL_OFFSET] } /// Returns the value of the `protocol` header field. #[inline] pub fn protocol(&self) -> u8 { self.bytes[PROTOCOL_OFFSET] } /// Returns the value of the `header checksum` header field. #[inline] pub fn header_checksum(&self) -> u16 { self.bytes.ntohs_unchecked(HEADER_CHECKSUM_OFFSET) } /// Returns the source IPv4 address of the packet. #[inline] pub fn source_address(&self) -> Ipv4Addr { Ipv4Addr::from(self.bytes.ntohl_unchecked(SOURCE_ADDRESS_OFFSET)) } /// Returns the destination IPv4 address of the packet. #[inline] pub fn destination_address(&self) -> Ipv4Addr { Ipv4Addr::from(self.bytes.ntohl_unchecked(DESTINATION_ADDRESS_OFFSET)) } /// Returns a byte slice containing the payload, using the given header length value to compute /// the payload offset. /// /// # Panics /// /// This method may panic if the value of `header_len` is invalid. #[inline] pub fn payload_unchecked(&self, header_len: usize) -> &[u8] { self.bytes.split_at(header_len).1 } /// Returns a byte slice that contains the payload of the packet. #[inline] pub fn payload(&self) -> &[u8] { self.payload_unchecked(self.header_len().into()) } /// Returns the length of the inner byte sequence. /// /// This is equal to the output of the `total_len()` method for properly constructed instances /// of `IPv4Packet`. #[inline] pub fn len(&self) -> usize { self.bytes.len() } /// Computes and returns the packet header checksum using the provided header length. /// /// A nice description of how this works can be found [here]. May panic for invalid values of /// `header_len`. /// /// # Panics /// /// This method may panic if the value of `header_len` is invalid. /// /// [here]: https://en.wikipedia.org/wiki/IPv4_header_checksum pub fn compute_checksum_unchecked(&self, header_len: usize) -> u16 { let mut sum = 0u32; for i in 0..header_len / 2 { sum += u32::from(self.bytes.ntohs_unchecked(i * 2)); } while sum >> 16 != 0 { sum = (sum & 0xffff) + (sum >> 16); } // Safe to unwrap due to the while loop. !u16::try_from(sum).unwrap() } /// Computes and returns the packet header checksum. #[inline] pub fn compute_checksum(&self) -> u16 { self.compute_checksum_unchecked(self.header_len().into()) } } impl IPv4Packet<'_, T> { /// Attempts to write an IPv4 packet header to `buf`, making sure there is enough space. /// /// This method returns an incomplete packet, because the size of the payload might be unknown /// at this point. IP options are not allowed, which means `header_len == OPTIONS_OFFSET`. The /// `dscp`, `ecn`, `identification`, `flags`, and `fragment_offset` fields are set to 0. The /// `ttl` is set to a default value. The `total_len` and `checksum` fields will be set when /// the length of the incomplete packet is determined. pub fn write_header( buf: T, protocol: u8, src_addr: Ipv4Addr, dst_addr: Ipv4Addr, ) -> Result, Ipv4Error> { if buf.len() < usize::from(OPTIONS_OFFSET) { return Err(Ipv4Error::SliceTooShort); } let mut packet = IPv4Packet::from_bytes_unchecked(buf); packet .set_version_and_header_len(IPV4_VERSION, OPTIONS_OFFSET) .set_dscp_and_ecn(0, 0) .set_identification(0) .set_flags_and_fragment_offset(0, 0) .set_ttl(DEFAULT_TTL) .set_protocol(protocol) .set_source_address(src_addr) .set_destination_address(dst_addr); Ok(Incomplete::new(packet)) } /// Sets the values of the `version` and `ihl` header fields (the latter is computed from the /// value of `header_len`). #[inline] pub fn set_version_and_header_len(&mut self, version: u8, header_len: u8) -> &mut Self { let version = version << 4; let ihl = (header_len >> 2) & 0xf; self.bytes[VERSION_AND_IHL_OFFSET] = version | ihl; self } /// Sets the values of the `dscp` and `ecn` header fields. #[inline] pub fn set_dscp_and_ecn(&mut self, dscp: u8, ecn: u8) -> &mut Self { self.bytes[DSCP_AND_ECN_OFFSET] = (dscp << 2) | ecn; self } /// Sets the value of the `total length` header field. #[inline] pub fn set_total_len(&mut self, value: u16) -> &mut Self { self.bytes.htons_unchecked(TOTAL_LEN_OFFSET, value); self } /// Sets the value of the `identification` header field. #[inline] pub fn set_identification(&mut self, value: u16) -> &mut Self { self.bytes.htons_unchecked(IDENTIFICATION_OFFSET, value); self } /// Sets the values of the `flags` and `fragment offset` header fields. #[inline] pub fn set_flags_and_fragment_offset(&mut self, flags: u8, fragment_offset: u16) -> &mut Self { let value = (u16::from(flags) << 13) | fragment_offset; self.bytes .htons_unchecked(FLAGS_AND_FRAGMENTOFF_OFFSET, value); self } /// Sets the value of the `ttl` header field. #[inline] pub fn set_ttl(&mut self, value: u8) -> &mut Self { self.bytes[TTL_OFFSET] = value; self } /// Sets the value of the `protocol` header field. #[inline] pub fn set_protocol(&mut self, value: u8) -> &mut Self { self.bytes[PROTOCOL_OFFSET] = value; self } /// Sets the value of the `header checksum` header field. #[inline] pub fn set_header_checksum(&mut self, value: u16) -> &mut Self { self.bytes.htons_unchecked(HEADER_CHECKSUM_OFFSET, value); self } /// Sets the source address of the packet. #[inline] pub fn set_source_address(&mut self, addr: Ipv4Addr) -> &mut Self { self.bytes .htonl_unchecked(SOURCE_ADDRESS_OFFSET, u32::from(addr)); self } /// Sets the destination address of the packet. #[inline] pub fn set_destination_address(&mut self, addr: Ipv4Addr) -> &mut Self { self.bytes .htonl_unchecked(DESTINATION_ADDRESS_OFFSET, u32::from(addr)); self } /// Returns a mutable byte slice representing the payload of the packet, using the provided /// header length to compute the payload offset. /// /// # Panics /// /// This method may panic if the value of `header_len` is invalid. #[inline] pub fn payload_mut_unchecked(&mut self, header_len: usize) -> &mut [u8] { self.bytes.split_at_mut(header_len).1 } /// Returns a mutable byte slice representing the payload of the packet. #[inline] pub fn payload_mut(&mut self) -> &mut [u8] { // Can't use self.header_len() as a fn parameter on the following line, because // the borrow checker complains. This may change when it becomes smarter. let header_len = self.header_len(); self.payload_mut_unchecked(header_len.into()) } } /// An incomplete packet is one where the payload length has not been determined yet. /// /// It can be transformed into an `IPv4Packet` by specifying the size of the payload, and /// shrinking the inner byte sequence to be as large as the packet itself (this includes setting /// the `total length` header field). impl<'a, T: NetworkBytesMut + Debug> Incomplete> { /// Transforms `self` into an `IPv4Packet` based on the supplied header and payload length. May /// panic for invalid values of the input parameters. /// /// # Panics /// /// This method may panic if the combination of `header_len` and `payload_len` is invalid, /// or any of the individual values are invalid. #[inline] pub fn with_header_and_payload_len_unchecked( mut self, header_len: u8, payload_len: u16, compute_checksum: bool, ) -> IPv4Packet<'a, T> { let total_len = u16::from(header_len) + payload_len; { let packet = &mut self.inner; // This unchecked is fine as long as total_len is smaller than the length of the // original slice, which should be the case if our code is not wrong. packet.bytes.shrink_unchecked(total_len.into()); // Set the total_len. packet.set_total_len(total_len); if compute_checksum { // Ensure this is set to 0 first. packet.set_header_checksum(0); // Now compute the actual checksum. let checksum = packet.compute_checksum_unchecked(header_len.into()); packet.set_header_checksum(checksum); } } self.inner } /// Transforms `self` into an `IPv4Packet` based on the supplied options and payload length. /// /// # Panics /// /// This method may panic if the combination of `options_len` and `payload_len` is invalid, /// or any of the individual values are invalid. #[inline] pub fn with_options_and_payload_len_unchecked( self, options_len: u8, payload_len: u16, compute_checksum: bool, ) -> IPv4Packet<'a, T> { let header_len = OPTIONS_OFFSET + options_len; self.with_header_and_payload_len_unchecked(header_len, payload_len, compute_checksum) } /// Transforms `self` into an `IPv4Packet` based on the supplied payload length. May panic for /// invalid values of the input parameters. /// /// # Panics /// /// This method may panic if the value of `header_len` is invalid. #[inline] pub fn with_payload_len_unchecked( self, payload_len: u16, compute_checksum: bool, ) -> IPv4Packet<'a, T> { let header_len = self.inner().header_len(); self.with_header_and_payload_len_unchecked(header_len, payload_len, compute_checksum) } } /// This function checks if `buf` may hold an IPv4Packet heading towards the given address. Cannot /// produce false negatives. #[inline] pub fn test_speculative_dst_addr(buf: &[u8], addr: Ipv4Addr) -> bool { // The unchecked methods are safe because we actually check the buffer length beforehand. if buf.len() >= ethernet::PAYLOAD_OFFSET + usize::from(OPTIONS_OFFSET) { let bytes = &buf[ethernet::PAYLOAD_OFFSET..]; if IPv4Packet::from_bytes_unchecked(bytes).destination_address() == addr { return true; } } false } #[cfg(test)] mod tests { use super::*; use crate::dumbo::MacAddr; const MAX_HEADER_LEN: u8 = 60; #[test] fn test_set_get() { let mut a = [0u8; 100]; let mut p = IPv4Packet::from_bytes_unchecked(a.as_mut()); assert_eq!(p.version_and_header_len(), (0, 0)); p.set_version_and_header_len(IPV4_VERSION, 24); assert_eq!(p.version_and_header_len(), (IPV4_VERSION, 24)); assert_eq!(p.dscp_and_ecn(), (0, 0)); p.set_dscp_and_ecn(3, 2); assert_eq!(p.dscp_and_ecn(), (3, 2)); assert_eq!(p.total_len(), 0); p.set_total_len(123); assert_eq!(p.total_len(), 123); assert_eq!(p.identification(), 0); p.set_identification(1112); assert_eq!(p.identification(), 1112); assert_eq!(p.flags_and_fragment_offset(), (0, 0)); p.set_flags_and_fragment_offset(7, 1000); assert_eq!(p.flags_and_fragment_offset(), (7, 1000)); assert_eq!(p.ttl(), 0); p.set_ttl(123); assert_eq!(p.ttl(), 123); assert_eq!(p.protocol(), 0); p.set_protocol(114); assert_eq!(p.protocol(), 114); assert_eq!(p.header_checksum(), 0); p.set_header_checksum(1234); assert_eq!(p.header_checksum(), 1234); let addr = Ipv4Addr::new(10, 11, 12, 13); assert_eq!(p.source_address(), Ipv4Addr::from(0)); p.set_source_address(addr); assert_eq!(p.source_address(), addr); assert_eq!(p.destination_address(), Ipv4Addr::from(0)); p.set_destination_address(addr); assert_eq!(p.destination_address(), addr); } #[test] fn test_constructors() { // We fill this with 1 to notice if the appropriate values get zeroed out. let mut buf = [1u8; 100]; let src = Ipv4Addr::new(10, 100, 11, 21); let dst = Ipv4Addr::new(192, 168, 121, 35); let buf_len = u16::try_from(buf.len()).unwrap(); // No IPv4 option support for now. let header_len = OPTIONS_OFFSET; let payload_len = buf_len - u16::from(OPTIONS_OFFSET); { let mut p = IPv4Packet::write_header(buf.as_mut(), PROTOCOL_TCP, src, dst) .unwrap() .with_header_and_payload_len_unchecked(header_len, payload_len, true); assert_eq!(p.version_and_header_len(), (IPV4_VERSION, header_len)); assert_eq!(p.dscp_and_ecn(), (0, 0)); assert_eq!(p.total_len(), buf_len); assert_eq!(p.identification(), 0); assert_eq!(p.flags_and_fragment_offset(), (0, 0)); assert_eq!(p.ttl(), DEFAULT_TTL); assert_eq!(p.protocol(), PROTOCOL_TCP); let checksum = p.header_checksum(); p.set_header_checksum(0); let computed_checksum = p.compute_checksum(); assert_eq!(computed_checksum, checksum); p.set_header_checksum(computed_checksum); assert_eq!(p.compute_checksum(), 0); assert_eq!(p.source_address(), src); assert_eq!(p.destination_address(), dst); // The mutable borrow of buf will end here. } IPv4Packet::from_bytes(buf.as_ref(), true).unwrap(); // Now let's check some error conditions. // Using a helper function here instead of a closure because it's hard (impossible?) to // specify lifetime bounds for closure arguments. fn p(buf: &mut [u8]) -> IPv4Packet<'_, &mut [u8]> { IPv4Packet::from_bytes_unchecked(buf) } // Just a helper closure. let look_for_error = |buf: &[u8], err: Ipv4Error| { assert_eq!(IPv4Packet::from_bytes(buf, true).unwrap_err(), err); }; // Invalid version. p(buf.as_mut()).set_version_and_header_len(IPV4_VERSION + 1, header_len); look_for_error(buf.as_ref(), Ipv4Error::Version); // Short header length. p(buf.as_mut()).set_version_and_header_len(IPV4_VERSION, OPTIONS_OFFSET - 1); look_for_error(buf.as_ref(), Ipv4Error::HeaderLen); // Header length too large. We have to add at least 4 here, because the setter converts // header_len into the ihl field via division by 4, so anything less will lead to a valid // result (the ihl corresponding to IPV4_MAX_HEADER_LEN). When decoding the header_len back // from the packet, we'll get a smaller value than OPTIONS_OFFSET, because it wraps around // modulo 60, since the ihl field is only four bits wide, and then gets multiplied with 4. p(buf.as_mut()).set_version_and_header_len(IPV4_VERSION, MAX_HEADER_LEN + 4); look_for_error(buf.as_ref(), Ipv4Error::HeaderLen); // Total length smaller than header length. p(buf.as_mut()) .set_version_and_header_len(IPV4_VERSION, OPTIONS_OFFSET) .set_total_len(u16::from(OPTIONS_OFFSET) - 1); look_for_error(buf.as_ref(), Ipv4Error::InvalidTotalLen); // Total len not matching slice length. p(buf.as_mut()).set_total_len(buf_len - 1); look_for_error(buf.as_ref(), Ipv4Error::SliceExactLen); // The original packet header should contain a valid checksum. assert_eq!(p(buf.as_mut()).set_total_len(buf_len).compute_checksum(), 0); // Let's make it invalid. let checksum = p(buf.as_mut()).header_checksum(); p(buf.as_mut()).set_header_checksum(checksum.wrapping_add(1)); look_for_error(buf.as_ref(), Ipv4Error::Checksum); // Finally, a couple of tests for a small buffer. let mut small_buf = [0u8; 1]; look_for_error(small_buf.as_ref(), Ipv4Error::SliceTooShort); assert_eq!( IPv4Packet::write_header(small_buf.as_mut(), PROTOCOL_TCP, src, dst).unwrap_err(), Ipv4Error::SliceTooShort ); } #[test] fn test_incomplete() { let mut buf = [0u8; 100]; let src = Ipv4Addr::new(10, 100, 11, 21); let dst = Ipv4Addr::new(192, 168, 121, 35); let payload_len = 30; // This is kinda mandatory, since we don't implement options support yet. let options_len = 0; let header_len = OPTIONS_OFFSET + options_len; { let p = IPv4Packet::write_header(buf.as_mut(), PROTOCOL_TCP, src, dst) .unwrap() .with_payload_len_unchecked(payload_len, true); assert_eq!(p.compute_checksum(), 0); assert_eq!(p.total_len() as usize, p.len()); assert_eq!(p.len(), usize::from(header_len) + usize::from(payload_len)); } { let p = IPv4Packet::write_header(buf.as_mut(), PROTOCOL_TCP, src, dst) .unwrap() .with_options_and_payload_len_unchecked(options_len, payload_len, true); assert_eq!(p.compute_checksum(), 0); assert_eq!(p.total_len() as usize, p.len()); assert_eq!(p.len(), usize::from(header_len) + usize::from(payload_len)); } } #[test] fn test_speculative() { let mut buf = [0u8; 1000]; let mac = MacAddr::from_bytes_unchecked(&[0; 6]); let ip = Ipv4Addr::new(1, 2, 3, 4); let other_ip = Ipv4Addr::new(5, 6, 7, 8); { let mut eth = crate::dumbo::pdu::ethernet::EthernetFrame::write_incomplete( buf.as_mut(), mac, mac, 0, ) .unwrap(); IPv4Packet::from_bytes_unchecked(eth.inner_mut().payload_mut()) .set_destination_address(ip); } assert!(test_speculative_dst_addr(buf.as_ref(), ip)); { let mut eth = crate::dumbo::pdu::ethernet::EthernetFrame::write_incomplete( buf.as_mut(), mac, mac, 0, ) .unwrap(); IPv4Packet::from_bytes_unchecked(eth.inner_mut().payload_mut()) .set_destination_address(other_ip); } assert!(!test_speculative_dst_addr(buf.as_ref(), ip)); let small = [0u8; 1]; assert!(!test_speculative_dst_addr(small.as_ref(), ip)); } } ================================================ FILE: src/vmm/src/dumbo/pdu/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! A module for interpreting byte slices as protocol data units (PDUs). //! //! A PDU represents data transmitted as a single unit during communication using a specific //! protocol. Ethernet frames, IP packets, and TCP segments are all examples of protocol data //! units. use std::fmt::Debug; use std::net::Ipv4Addr; use crate::dumbo::pdu::bytes::NetworkBytes; use crate::dumbo::pdu::ipv4::{PROTOCOL_TCP, PROTOCOL_UDP}; pub mod arp; pub mod bytes; pub mod ethernet; pub mod ipv4; pub mod tcp; /// This is the baseline definition of the `Incomplete` struct, which wraps a PDU that does is /// still missing some values or content. /// /// It's mostly important when writing PDUs, because fields like checksum /// can only be computed after the payload becomes known. Also, the length of the underlying slice /// should be equal to the actual size for a complete PDU. To that end, whenever a variable-length /// payload is involved, the slice is shrunk to an exact fit. The particular ways of completing an /// `Incomplete` are implemented for each specific PDU. #[derive(Debug)] pub struct Incomplete { inner: T, } impl Incomplete { #[inline] fn new(inner: T) -> Self { Incomplete { inner } } /// Returns a reference to the wrapped object. #[inline] pub fn inner(&self) -> &T { &self.inner } /// Returns a mutable reference to the wrapped object. #[inline] pub fn inner_mut(&mut self) -> &mut T { &mut self.inner } } #[repr(u8)] #[derive(Debug, Copy, Clone, PartialEq)] enum ChecksumProto { Tcp = PROTOCOL_TCP, Udp = PROTOCOL_UDP, } /// Computes the checksum of a TCP/UDP packet. Since both protocols use /// the same algorithm to compute the checksum. /// /// # Arguments /// * `bytes` - Raw bytes of a TCP packet or a UDP datagram /// * `src_addr` - IPv4 source address /// * `dst_addr` - IPv4 destination address /// * `protocol` - **must** be either `PROTOCOL_TCP` or `PROTOCOL_UDP` defined in `ipv4` module /// /// More details about TCP checksum computation can be found [here]. /// /// [here]: https://en.wikipedia.org/wiki/Transmission_Control_Protocol#Checksum_computation #[inline] fn compute_checksum( bytes: &T, src_addr: Ipv4Addr, dst_addr: Ipv4Addr, protocol: ChecksumProto, ) -> u16 { let mut sum = 0usize; let a = u32::from(src_addr) as usize; sum += a & 0xffff; sum += a >> 16; let b = u32::from(dst_addr) as usize; sum += b & 0xffff; sum += b >> 16; let len = bytes.len(); sum += protocol as usize; sum += len; for i in 0..len / 2 { sum += usize::from(bytes.ntohs_unchecked(i * 2)); } if !len.is_multiple_of(2) { sum += usize::from(bytes[len - 1]) << 8; } while sum >> 16 != 0 { sum = (sum & 0xffff) + (sum >> 16); } // Safe to unwrap due to the while loop above let mut csum = !u16::try_from(sum).unwrap(); // If a UDP packet checksum is 0, an all ones value is transmitted if protocol == ChecksumProto::Udp && csum == 0x0 { csum = !csum; } csum } ================================================ FILE: src/vmm/src/dumbo/pdu/tcp.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Contains support for parsing and writing TCP segments. //! //! [Here]'s a useful depiction of the TCP header layout (watch out for the MSB 0 bit numbering.) //! //! [Here]: https://en.wikipedia.org/wiki/Transmission_Control_Protocol#TCP_segment_structure use std::cmp::min; use std::fmt::Debug; use std::net::Ipv4Addr; use std::num::NonZeroU16; use bitflags::bitflags; use super::Incomplete; use super::bytes::{InnerBytes, NetworkBytes, NetworkBytesMut}; use crate::dumbo::ByteBuffer; use crate::dumbo::pdu::ChecksumProto; const SOURCE_PORT_OFFSET: usize = 0; const DESTINATION_PORT_OFFSET: usize = 2; const SEQ_NUMBER_OFFSET: usize = 4; const ACK_NUMBER_OFFSET: usize = 8; const DATAOFF_RSVD_NS_OFFSET: usize = 12; const FLAGS_AFTER_NS_OFFSET: usize = 13; const WINDOW_SIZE_OFFSET: usize = 14; const CHECKSUM_OFFSET: usize = 16; const URG_POINTER_OFFSET: usize = 18; const OPTIONS_OFFSET: u8 = 20; const MAX_HEADER_LEN: u8 = 60; const OPTION_KIND_EOL: u8 = 0x00; const OPTION_KIND_NOP: u8 = 0x01; const OPTION_KIND_MSS: u8 = 0x02; const OPTION_LEN_MSS: u8 = 0x04; // An arbitrarily chosen value, used for sanity checks. const MSS_MIN: u16 = 100; bitflags! { /// Represents the TCP header flags, with the exception of `NS`. /// /// These values are only valid in conjunction with the [`flags_after_ns()`] method (and its /// associated setter method), which operates on the header byte containing every other flag /// besides `NS`. /// /// [`flags_after_ns()`]: struct.TcpSegment.html#method.flags_after_ns #[derive(Debug, Copy, Clone, PartialEq)] pub struct Flags: u8 { /// Congestion window reduced. const CWR = 1 << 7; /// ECN-echo. const ECE = 1 << 6; /// Urgent pointer. const URG = 1 << 5; /// The acknowledgement number field is valid. const ACK = 1 << 4; /// Push flag. const PSH = 1 << 3; /// Reset the connection. const RST = 1 << 2; /// SYN flag. const SYN = 1 << 1; /// FIN flag. const FIN = 1; } } /// Describes the errors which may occur while handling TCP segments. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum TcpError { /// Invalid checksum. Checksum, /// A payload has been specified for the segment, but the maximum readable length is 0. EmptyPayload, /// Invalid header length. HeaderLen, /// The MSS option contains an invalid value. MssOption, /// The remaining segment length cannot accommodate the MSS option. MssRemaining, /// The specified slice is shorter than the header length. SliceTooShort, } // TODO: The implementation of TcpSegment is IPv4 specific in regard to checksum computation. Maybe // make it more generic at some point. /// Interprets the inner bytes as a TCP segment. #[derive(Debug)] pub struct TcpSegment<'a, T: 'a> { bytes: InnerBytes<'a, T>, } #[allow(clippy::len_without_is_empty)] impl TcpSegment<'_, T> { /// Returns the source port. #[inline] pub fn source_port(&self) -> u16 { self.bytes.ntohs_unchecked(SOURCE_PORT_OFFSET) } /// Returns the destination port. #[inline] pub fn destination_port(&self) -> u16 { self.bytes.ntohs_unchecked(DESTINATION_PORT_OFFSET) } /// Returns the sequence number. #[inline] pub fn sequence_number(&self) -> u32 { self.bytes.ntohl_unchecked(SEQ_NUMBER_OFFSET) } /// Returns the acknowledgement number (only valid if the `ACK` flag is set). #[inline] pub fn ack_number(&self) -> u32 { self.bytes.ntohl_unchecked(ACK_NUMBER_OFFSET) } /// Returns the header length, the value of the reserved bits, and whether the `NS` flag /// is set or not. #[inline] pub fn header_len_rsvd_ns(&self) -> (u8, u8, bool) { let value = self.bytes[DATAOFF_RSVD_NS_OFFSET]; let data_offset = value >> 4; let header_len = data_offset * 4; let rsvd = value & 0x0e; let ns = (value & 1) != 0; (header_len, rsvd, ns) } /// Returns the length of the header. #[inline] pub fn header_len(&self) -> u8 { self.header_len_rsvd_ns().0 } /// Returns the TCP header flags, with the exception of `NS`. #[inline] pub fn flags_after_ns(&self) -> Flags { Flags::from_bits_truncate(self.bytes[FLAGS_AFTER_NS_OFFSET]) } /// Returns the value of the `window size` header field. #[inline] pub fn window_size(&self) -> u16 { self.bytes.ntohs_unchecked(WINDOW_SIZE_OFFSET) } /// Returns the value of the `checksum` header field. #[inline] pub fn checksum(&self) -> u16 { self.bytes.ntohs_unchecked(CHECKSUM_OFFSET) } /// Returns the value of the `urgent pointer` header field (only valid if the /// `URG` flag is set). #[inline] pub fn urgent_pointer(&self) -> u16 { self.bytes.ntohs_unchecked(URG_POINTER_OFFSET) } /// Returns the TCP header options as an `[&u8]` slice. /// /// # Panics /// /// This method may panic if the value of `header_len` is invalid. #[inline] pub fn options_unchecked(&self, header_len: usize) -> &[u8] { &self.bytes[usize::from(OPTIONS_OFFSET)..header_len] } /// Returns a slice which contains the payload of the segment. May panic if the value of /// `header_len` is invalid. /// /// # Panics /// /// This method may panic if the value of `header_len` is invalid. #[inline] pub fn payload_unchecked(&self, header_len: usize) -> &[u8] { self.bytes.split_at(header_len).1 } /// Returns the length of the segment. #[inline] pub fn len(&self) -> u16 { // NOTE: This appears to be a safe conversion in all current cases. // Packets are always set up in the context of an Ipv4Packet, which is // capped at a u16 size. However, I'd rather be safe here. u16::try_from(self.bytes.len()).unwrap_or(u16::MAX) } /// Returns a slice which contains the payload of the segment. #[inline] pub fn payload(&self) -> &[u8] { self.payload_unchecked(self.header_len().into()) } /// Returns the length of the payload. #[inline] pub fn payload_len(&self) -> u16 { self.len() - u16::from(self.header_len()) } /// Computes the TCP checksum of the segment. More details about TCP checksum computation can /// be found [here]. /// /// [here]: https://en.wikipedia.org/wiki/Transmission_Control_Protocol#Checksum_computation pub fn compute_checksum(&self, src_addr: Ipv4Addr, dst_addr: Ipv4Addr) -> u16 { crate::dumbo::pdu::compute_checksum(&self.bytes, src_addr, dst_addr, ChecksumProto::Tcp) } /// Parses TCP header options (only `MSS` is supported for now). /// /// If no error is encountered, returns the `MSS` value, or `None` if the option is not /// present. /// /// # Panics /// /// This method may panic if the value of `header_len` is invalid. pub fn parse_mss_option_unchecked( &self, header_len: usize, ) -> Result, TcpError> { let b = self.options_unchecked(header_len); let mut i = 0; // All TCP options (except EOL and NOP) are encoded using x bytes (x >= 2), where the first // byte represents the option kind, the second is the option length (including these first // two bytes), and finally the next x - 2 bytes represent option data. The length of // the MSS option is 4, so the option data encodes an u16 in network order. // The MSS option is 4 bytes wide, so we need at least 4 more bytes to look for it. while i + 3 < b.len() { match b[i] { OPTION_KIND_EOL => break, OPTION_KIND_NOP => { i += 1; continue; } OPTION_KIND_MSS => { // Read from option data (we skip checking if the len is valid). // TODO: To be super strict, we should make sure there aren't additional MSS // options present (which would be super wrong). Should we be super strict? let mss = b.ntohs_unchecked(i + 2); if mss < MSS_MIN { return Err(TcpError::MssOption); } // The unwarp() is safe because mms >= MSS_MIN at this point. return Ok(Some(NonZeroU16::new(mss).unwrap())); } _ => { // Some other option; just skip opt_len bytes in total. // Per RFC 9293 (MUST-7), opt_len includes the kind and // length bytes so the minimum valid value is 2. let opt_len = b[i + 1] as usize; if opt_len < 2 { return Err(TcpError::MssOption); } i += opt_len; continue; } } } Ok(None) } /// Interprets `bytes` as a TCP segment without any validity checks. /// /// # Panics /// /// This method does not panic, but further method calls on the resulting object may panic if /// `bytes` contains invalid input. #[inline] pub fn from_bytes_unchecked(bytes: T) -> Self { TcpSegment { bytes: InnerBytes::new(bytes), } } /// Attempts to interpret `bytes` as a TCP segment, checking the validity of the header fields. /// /// The `verify_checksum` parameter must contain the source and destination addresses from the /// enclosing IPv4 packet if the TCP checksum must be validated. #[inline] pub fn from_bytes( bytes: T, verify_checksum: Option<(Ipv4Addr, Ipv4Addr)>, ) -> Result { if bytes.len() < usize::from(OPTIONS_OFFSET) { return Err(TcpError::SliceTooShort); } let segment = Self::from_bytes_unchecked(bytes); // We skip checking if the reserved bits are 0b000 (and a couple of other things). let header_len = segment.header_len(); if header_len < OPTIONS_OFFSET || u16::from(header_len) > min(u16::from(MAX_HEADER_LEN), segment.len()) { return Err(TcpError::HeaderLen); } if let Some((src_addr, dst_addr)) = verify_checksum && segment.compute_checksum(src_addr, dst_addr) != 0 { return Err(TcpError::Checksum); } Ok(segment) } } impl TcpSegment<'_, T> { /// Sets the source port. #[inline] pub fn set_source_port(&mut self, value: u16) -> &mut Self { self.bytes.htons_unchecked(SOURCE_PORT_OFFSET, value); self } /// Sets the destination port. #[inline] pub fn set_destination_port(&mut self, value: u16) -> &mut Self { self.bytes.htons_unchecked(DESTINATION_PORT_OFFSET, value); self } /// Sets the value of the sequence number field. #[inline] pub fn set_sequence_number(&mut self, value: u32) -> &mut Self { self.bytes.htonl_unchecked(SEQ_NUMBER_OFFSET, value); self } /// Sets the value of the acknowledgement number field. #[inline] pub fn set_ack_number(&mut self, value: u32) -> &mut Self { self.bytes.htonl_unchecked(ACK_NUMBER_OFFSET, value); self } /// Sets the value of the `ihl` header field based on `header_len` (which should be a multiple /// of 4), clears the reserved bits, and sets the `NS` flag according to the last parameter. // TODO: Check that header_len | 0b11 == 0 and the resulting data_offset is valid? #[inline] pub fn set_header_len_rsvd_ns(&mut self, header_len: u8, ns: bool) -> &mut Self { let mut value = header_len << 2; if ns { value |= 1; } self.bytes[DATAOFF_RSVD_NS_OFFSET] = value; self } /// Sets the value of the header byte containing every TCP flag except `NS`. #[inline] pub fn set_flags_after_ns(&mut self, flags: Flags) -> &mut Self { self.bytes[FLAGS_AFTER_NS_OFFSET] = flags.bits(); self } /// Sets the value of the `window size` field. #[inline] pub fn set_window_size(&mut self, value: u16) -> &mut Self { self.bytes.htons_unchecked(WINDOW_SIZE_OFFSET, value); self } /// Sets the value of the `checksum` field. #[inline] pub fn set_checksum(&mut self, value: u16) -> &mut Self { self.bytes.htons_unchecked(CHECKSUM_OFFSET, value); self } /// Sets the value of the `urgent pointer` field. #[inline] pub fn set_urgent_pointer(&mut self, value: u16) -> &mut Self { self.bytes.htons_unchecked(URG_POINTER_OFFSET, value); self } /// Returns a mutable slice containing the segment payload. /// /// # Panics /// /// This method may panic if the value of `header_len` is invalid. #[inline] pub fn payload_mut_unchecked(&mut self, header_len: usize) -> &mut [u8] { self.bytes.split_at_mut(header_len).1 } /// Returns a mutable slice containing the segment payload. #[inline] pub fn payload_mut(&mut self) -> &mut [u8] { let header_len = self.header_len(); self.payload_mut_unchecked(header_len.into()) } /// Writes a complete TCP segment. /// /// # Arguments /// /// * `buf` - Write the segment to this buffer. /// * `src_port` - Source port. /// * `dst_port` - Destination port. /// * `seq_number` - Sequence number. /// * `ack_number` - Acknowledgement number. /// * `flags_after_ns` - TCP flags to set (except `NS`, which is always set to 0). /// * `window_size` - Value to write in the `window size` field. /// * `mss_option` - When a value is specified, use it to add a TCP MSS option to the header. /// * `mss_remaining` - Represents an upper bound on the payload length (the number of bytes /// used up by things like IP options have to be subtracted from the MSS). There is some /// redundancy looking at this argument and the next one, so we might end up removing or /// changing something. /// * `payload` - May contain a buffer which holds payload data and the maximum amount of bytes /// we should read from that buffer. When `None`, the TCP segment will carry no payload. /// * `compute_checksum` - May contain the pair addresses from the enclosing IPv4 packet, which /// are required for TCP checksum computation. Skip the checksum altogether when `None`. #[allow(clippy::too_many_arguments)] #[inline] pub fn write_segment( buf: T, src_port: u16, dst_port: u16, seq_number: u32, ack_number: u32, flags_after_ns: Flags, window_size: u16, mss_option: Option, mss_remaining: u16, payload: Option<(&R, usize)>, compute_checksum: Option<(Ipv4Addr, Ipv4Addr)>, ) -> Result { Ok(Self::write_incomplete_segment( buf, seq_number, ack_number, flags_after_ns, window_size, mss_option, mss_remaining, payload, )? .finalize(src_port, dst_port, compute_checksum)) } /// Writes an incomplete TCP segment, which is missing the `source port`, `destination port`, /// and `checksum` fields. /// /// This method writes the rest of the segment, including data (when available). Only the `MSS` /// option is supported for now. The `NS` flag, `URG` flag, and `urgent pointer` field are set /// to 0. /// /// # Arguments /// /// * `buf` - Write the segment to this buffer. /// * `seq_number` - Sequence number. /// * `ack_number` - Acknowledgement number. /// * `flags_after_ns` - TCP flags to set (except `NS`, which is always set to 0). /// * `window_size` - Value to write in the `window size` field. /// * `mss_option` - When a value is specified, use it to add a TCP MSS option to the header. /// * `mss_remaining` - Represents an upper bound on the payload length (the number of bytes /// used up by things like IP options have to be subtracted from the MSS). There is some /// redundancy looking at this argument and the next one, so we might end up removing or /// changing something. /// * `payload` - May contain a buffer which holds payload data and the maximum amount of bytes /// we should read from that buffer. When `None`, the TCP segment will carry no payload. // Marked inline because a lot of code vanishes after constant folding when // we don't add TCP options, or when mss_remaining is actually a constant, etc. #[allow(clippy::too_many_arguments)] #[inline] pub fn write_incomplete_segment( buf: T, seq_number: u32, ack_number: u32, flags_after_ns: Flags, window_size: u16, mss_option: Option, mss_remaining: u16, payload: Option<(&R, usize)>, ) -> Result, TcpError> { let mut mss_left = mss_remaining; // We're going to need at least this many bytes. let mut segment_len = u16::from(OPTIONS_OFFSET); // The TCP options will require this much more bytes. let options_len = if mss_option.is_some() { mss_left = mss_left .checked_sub(OPTION_LEN_MSS.into()) .ok_or(TcpError::MssRemaining)?; OPTION_LEN_MSS } else { 0 }; segment_len += u16::from(options_len); if buf.len() < usize::from(segment_len) { return Err(TcpError::SliceTooShort); } // The unchecked call is safe because buf.len() >= segment_len. let mut segment = Self::from_bytes_unchecked(buf); segment .set_sequence_number(seq_number) .set_ack_number(ack_number) .set_header_len_rsvd_ns(OPTIONS_OFFSET + options_len, false) .set_flags_after_ns(flags_after_ns) .set_window_size(window_size) .set_urgent_pointer(0); // Let's write the MSS option if we have to. if let Some(value) = mss_option { segment.bytes[usize::from(OPTIONS_OFFSET)] = OPTION_KIND_MSS; segment.bytes[usize::from(OPTIONS_OFFSET) + 1] = OPTION_LEN_MSS; segment .bytes .htons_unchecked(usize::from(OPTIONS_OFFSET) + 2, value); } let payload_bytes_count = if let Some((payload_buf, max_payload_bytes)) = payload { let left_to_read = min(payload_buf.len(), max_payload_bytes); // The subtraction makes sense because we previously checked that // buf.len() >= segment_len. let mut room_for_payload = min(segment.len() - segment_len, mss_left); // The unwrap is safe because room_for_payload is a u16. room_for_payload = u16::try_from(min(usize::from(room_for_payload), left_to_read)).unwrap(); if room_for_payload == 0 { return Err(TcpError::EmptyPayload); } // Copy `room_for_payload` bytes into `payload_buf` using `offset=0`. // Guaranteed not to panic since we checked above that: // `offset + room_for_payload <= payload_buf.len()`. payload_buf.read_to_slice( 0, &mut segment.bytes [usize::from(segment_len)..usize::from(segment_len + room_for_payload)], ); room_for_payload } else { 0 }; segment_len += payload_bytes_count; // This is ok because segment_len <= buf.len(). segment.bytes.shrink_unchecked(segment_len.into()); // Shrink the resulting segment to a slice of exact size, so using self.len() makes sense. Ok(Incomplete::new(segment)) } } impl<'a, T: NetworkBytesMut + Debug> Incomplete> { /// Transforms `self` into a `TcpSegment` by specifying values for the `source port`, /// `destination port`, and (optionally) the information required to compute the TCP checksum. #[inline] pub fn finalize( mut self, src_port: u16, dst_port: u16, compute_checksum: Option<(Ipv4Addr, Ipv4Addr)>, ) -> TcpSegment<'a, T> { self.inner.set_source_port(src_port); self.inner.set_destination_port(dst_port); if let Some((src_addr, dst_addr)) = compute_checksum { // Set this to 0 first. self.inner.set_checksum(0); let checksum = self.inner.compute_checksum(src_addr, dst_addr); self.inner.set_checksum(checksum); } self.inner } } #[cfg(test)] mod tests { use super::*; #[test] fn test_set_get() { let mut a = [0u8; 100]; let mut p = TcpSegment::from_bytes_unchecked(a.as_mut()); assert_eq!(p.source_port(), 0); p.set_source_port(123); assert_eq!(p.source_port(), 123); assert_eq!(p.destination_port(), 0); p.set_destination_port(322); assert_eq!(p.destination_port(), 322); assert_eq!(p.sequence_number(), 0); p.set_sequence_number(1_234_567); assert_eq!(p.sequence_number(), 1_234_567); assert_eq!(p.ack_number(), 0); p.set_ack_number(345_234); assert_eq!(p.ack_number(), 345_234); assert_eq!(p.header_len_rsvd_ns(), (0, 0, false)); assert_eq!(p.header_len(), 0); // Header_len must be a multiple of 4 here to be valid. let header_len = 60; p.set_header_len_rsvd_ns(header_len, true); assert_eq!(p.header_len_rsvd_ns(), (header_len, 0, true)); assert_eq!(p.header_len(), header_len); assert_eq!(p.flags_after_ns().bits(), 0); p.set_flags_after_ns(Flags::SYN | Flags::URG); assert_eq!(p.flags_after_ns(), Flags::SYN | Flags::URG); assert_eq!(p.window_size(), 0); p.set_window_size(60000); assert_eq!(p.window_size(), 60000); assert_eq!(p.checksum(), 0); p.set_checksum(4321); assert_eq!(p.checksum(), 4321); assert_eq!(p.urgent_pointer(), 0); p.set_urgent_pointer(5554); assert_eq!(p.urgent_pointer(), 5554); } #[test] fn test_constructors() { let mut a = [1u8; 1460]; let b = [2u8; 1000]; let c = [3u8; 2000]; let src_addr = Ipv4Addr::new(10, 1, 2, 3); let dst_addr = Ipv4Addr::new(192, 168, 44, 77); let src_port = 1234; let dst_port = 5678; let seq_number = 11_111_222; let ack_number = 34_566_543; let flags_after_ns = Flags::SYN | Flags::RST; let window_size = 19999; let mss_left = 1460; let mss_option = Some(mss_left); let payload = Some((b.as_ref(), b.len())); let header_len = OPTIONS_OFFSET + OPTION_LEN_MSS; let segment_len = { let mut segment = TcpSegment::write_segment( a.as_mut(), src_port, dst_port, seq_number, ack_number, flags_after_ns, window_size, mss_option, mss_left, payload, Some((src_addr, dst_addr)), ) .unwrap(); assert_eq!(segment.source_port(), src_port); assert_eq!(segment.destination_port(), dst_port); assert_eq!(segment.sequence_number(), seq_number); assert_eq!(segment.ack_number(), ack_number); assert_eq!(segment.header_len_rsvd_ns(), (header_len, 0, false)); assert_eq!(segment.flags_after_ns(), flags_after_ns); assert_eq!(segment.window_size(), window_size); let checksum = segment.checksum(); segment.set_checksum(0); let computed_checksum = segment.compute_checksum(src_addr, dst_addr); assert_eq!(checksum, computed_checksum); segment.set_checksum(checksum); assert_eq!(segment.compute_checksum(src_addr, dst_addr), 0); assert_eq!(segment.urgent_pointer(), 0); { let options = segment.options_unchecked(header_len.into()); assert_eq!(options.len(), usize::from(OPTION_LEN_MSS)); assert_eq!(options[0], OPTION_KIND_MSS); assert_eq!(options[1], OPTION_LEN_MSS); assert_eq!(options.ntohs_unchecked(2), mss_left); } // Payload was smaller than mss_left after options. assert_eq!( usize::from(segment.len()), usize::from(header_len) + b.len(), ); segment.len() // Mutable borrow of a goes out of scope. }; { let segment = TcpSegment::from_bytes(&a[..segment_len.into()], Some((src_addr, dst_addr))) .unwrap(); assert_eq!( segment.parse_mss_option_unchecked(header_len.into()), Ok(Some(NonZeroU16::new(mss_left).unwrap())) ); } // Let's quickly see what happens when the payload buf is larger than our mutable slice. { let segment_len = TcpSegment::write_segment( a.as_mut(), src_port, dst_port, seq_number, ack_number, flags_after_ns, window_size, mss_option, mss_left, Some((c.as_ref(), c.len())), Some((src_addr, dst_addr)), ) .unwrap() .len(); assert_eq!(segment_len, mss_left); } // Now let's test the error value for from_bytes(). // Using a helper function here instead of a closure because it's hard (impossible?) to // specify lifetime bounds for closure arguments. fn p(buf: &mut [u8]) -> TcpSegment<'_, &mut [u8]> { TcpSegment::from_bytes_unchecked(buf) } // Just a helper closure. let look_for_error = |buf: &[u8], err: TcpError| { assert_eq!( TcpSegment::from_bytes(buf, Some((src_addr, dst_addr))).unwrap_err(), err ); }; // Header length too short. p(a.as_mut()).set_header_len_rsvd_ns(OPTIONS_OFFSET.checked_sub(1).unwrap(), false); look_for_error(a.as_ref(), TcpError::HeaderLen); // Header length too large. p(a.as_mut()).set_header_len_rsvd_ns(MAX_HEADER_LEN.checked_add(4).unwrap(), false); look_for_error(a.as_ref(), TcpError::HeaderLen); // The previously set checksum should be valid. assert_eq!( p(a.as_mut()) .set_header_len_rsvd_ns(header_len, false) .compute_checksum(src_addr, dst_addr), 0 ); // Let's make it invalid. let checksum = p(a.as_mut()).checksum(); p(a.as_mut()).set_checksum(checksum.wrapping_add(1)); look_for_error(a.as_ref(), TcpError::Checksum); // Now we use a very small buffer. let mut small_buf = [0u8; 1]; look_for_error(small_buf.as_ref(), TcpError::SliceTooShort); assert_eq!( TcpSegment::write_segment( small_buf.as_mut(), src_port, dst_port, seq_number, ack_number, flags_after_ns, window_size, mss_option, mss_left, payload, Some((src_addr, dst_addr)), ) .unwrap_err(), TcpError::SliceTooShort ); // Make sure we get the proper error for an insufficient value of mss_remaining. assert_eq!( TcpSegment::write_segment( small_buf.as_mut(), src_port, dst_port, seq_number, ack_number, flags_after_ns, window_size, mss_option, 0, payload, Some((src_addr, dst_addr)), ) .unwrap_err(), TcpError::MssRemaining ); } #[test] fn test_invalid_tcp_option_len() { // Build a minimal segment with header_len = 24 (OPTIONS_OFFSET + 4 bytes of options). let mut buf = [0u8; 100]; let header_len: u8 = OPTIONS_OFFSET + 4; { let mut seg = TcpSegment::from_bytes_unchecked(buf.as_mut()); seg.set_header_len_rsvd_ns(header_len, false); } // Write an unknown option kind (0xFF) with opt_len = 0 (invalid, < 2). let opts_start = usize::from(OPTIONS_OFFSET); buf[opts_start] = 0xFF; buf[opts_start + 1] = 0; let seg = TcpSegment::from_bytes_unchecked(buf.as_ref()); assert_eq!( seg.parse_mss_option_unchecked(header_len.into()), Err(TcpError::MssOption) ); } } ================================================ FILE: src/vmm/src/dumbo/tcp/connection.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! This module contains a minimalist TCP [`Connection`] implementation, which only supports //! passive open scenarios, and some auxiliary logic and data structures. //! //! [`Connection`]: struct.Connection.html use std::fmt::Debug; use std::num::{NonZeroU16, NonZeroU64, NonZeroUsize, Wrapping}; use bitflags::bitflags; use vmm_sys_util::rand::xor_pseudo_rng_u32; use crate::dumbo::ByteBuffer; use crate::dumbo::pdu::Incomplete; use crate::dumbo::pdu::bytes::NetworkBytes; use crate::dumbo::pdu::tcp::{Flags as TcpFlags, TcpError as TcpSegmentError, TcpSegment}; use crate::dumbo::tcp::{ MAX_WINDOW_SIZE, MSS_DEFAULT, NextSegmentStatus, RstConfig, seq_after, seq_at_or_after, }; bitflags! { // We use a set of flags, instead of a state machine, to represent the connection status. Some // parts of the status information are reflected in other fields of the Connection struct, such // as Connection::fin_received. #[derive(Debug, Clone, PartialEq)] struct ConnStatusFlags: u8 { const SYN_RECEIVED = 1; const SYNACK_SENT = 1 << 1; const ESTABLISHED = 1 << 2; // We signal the end of the TX half by setting Connection.send_fin to Some(sequence_number), // and use this flag to record that at least one FIN segment has been sent. const FIN_SENT = 1 << 3; // The other endpoint has ACKed our FIN. const FIN_ACKED = 1 << 4; // The connection is reset, because we either sent, or received a RST segment. const RESET = 1 << 5; } } bitflags! { /// Represents any unusual conditions which may occur when receiving a TCP segment. #[derive(Debug, Clone, Copy, PartialEq)] pub struct RecvStatusFlags: u16 { /// The acknowledgement number is invalid. const INVALID_ACK = 1; /// The connection received a duplicate ACK. const DUP_ACK = 1 << 1; /// The connection received a data segment which does not fall within the limits of the /// current receive window. const SEGMENT_BEYOND_RWND = 1 << 2; /// The connection received a data segment, but the sequence number does not match the /// next expected sequence number. const UNEXPECTED_SEQ = 1 << 3; /// The other endpoint advertised a receive window edge which has been moved to the left. const REMOTE_RWND_EDGE = 1 << 4; /// The other endpoint transmitted additional data after sending a `FIN`. const DATA_BEYOND_FIN = 1 << 5; /// The connection received a valid `RST` segment. const RESET_RECEIVED = 1 << 6; /// The connection received an invalid `RST` segment. const INVALID_RST = 1 << 7; /// The connection received an invalid segment for its current state. const INVALID_SEGMENT = 1 << 8; /// The connection is resetting, and will switch to being reset after getting the /// chance to transmit a `RST` segment. const CONN_RESETTING = 1 << 9; /// The connection received a `FIN` whose sequence number does not match the next /// expected sequence number. const INVALID_FIN = 1 << 10; } } /// Defines a segment payload source. /// /// When not `None`, it contains a [`ByteBuffer`] which holds the actual data, and the sequence /// number associated with the first byte from the buffer. /// /// [`ByteBuffer`]: ../../trait.ByteBuffer.html // R should have the trait bound R: ByteBuffer, but bounds are ignored on type aliases. pub type PayloadSource<'a, R> = Option<(&'a R, Wrapping)>; /// Describes errors which may occur during a passive open. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum PassiveOpenError { /// The incoming segment is not a valid `SYN`. InvalidSyn, /// The `SYN` segment carries an invalid `MSS` option. MssOption, } /// Describes errors which may occur when an existing connection receives a TCP segment. #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum RecvError { /// The payload length is larger than the receive buffer size. BufferTooSmall, /// The connection cannot receive the segment because it has been previously reset. ConnectionReset, } /// Describes errors which may occur when a connection attempts to write a segment. /// Needs `rustfmt::skip` to make multiline comments work #[rustfmt::skip] #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum WriteNextError { /// The connection cannot write the segment because it has been previously reset. ConnectionReset, /// The write sends additional data after a `FIN` has been transmitted. DataAfterFin, /** The remaining MSS (which can be reduced by IP and/or TCP options) is not large enough to \ write the segment. */ MssRemaining, // The payload source specifies a buffer larger than [`MAX_WINDOW_SIZE`]. // // [`MAX_WINDOW_SIZE`]: ../constant.MAX_WINDOW_SIZE.html /// The payload source is too large. PayloadBufTooLarge, /// The payload source does not contain the first sequence number that should be sent. PayloadMissingSeq, /// An error occurred during the actual write to the buffer: {0} TcpSegment(#[from] TcpSegmentError), } /// Contains the state information and implements the logic for a minimalist TCP connection. /// /// One particular thing is that whenever the connection sends a `RST` segment, it will also stop /// working itself. This is just a design decision for our envisioned use cases; /// improvements/changes may happen in the future (this also goes for other aspects of the /// current implementation). /// /// A `Connection` object can only be created via passive open, and will not recognize/use any TCP /// options except `MSS` during the handshake. The associated state machine is similar to how /// TCP normally functions, but there are some differences: /// /// * Since only passive opens are supported, a `Connection` can only be instantiated in response to /// an incoming `SYN` segment. If the segment is valid, it will start directly in a state called /// `SYN_RECEIVED`. The valid events at this point are receiving a retransmission of the previous /// `SYN` (which does nothing), and getting the chance to write a `SYNACK`, which also moves the /// connection to the `SYNACK_SENT` state. Any incoming segment which is not a copy of the /// previous `SYN` will reset the connection. /// * In the `SYNACK_SENT` state, the connection awaits an `ACK` for the `SYNACK`. A retransmission /// of the original `SYN` moves the state back to `SYN_RECEIVED`. A valid `ACK` advances the state /// to `ESTABLISHED`. Any unexpected/invalid segment resets the connection. /// * While `ESTABLISHED`, the connection will only reset if it receives a `RST` or a `SYN`. Invalid /// segments are simply ignored. `FIN` handling is simplifed: when [`close`] is invoked the /// connection records the `FIN` sequence number, and starts setting the `FIN` flag (when /// possible) on outgoing segments. A `FIN` from the other endpoint is only taken into /// consideration if it has the next expected sequence number. When the connection has both sent /// and received a `FIN`, it marks itself as being done. There's no equivalent for the `TIME_WAIT` /// TCP state. /// /// The current implementation does not do any kind of congestion control, expects segments to /// arrive in order, triggers a retransmission after the first duplicate `ACK`, and relies on the /// user to supply an opaque `u64` timestamp value when invoking send or receive functionality. The /// timestamps must be non-decreasing, and are mainly used for retransmission timeouts. /// /// See [mmds-design](https://github.com/firecracker-microvm/firecracker/blob/main/docs/mmds/mmds-design.md#dumbo) /// for why we are able to make these simplifications. Specifically, we want to stress that no /// traffic handled by dumbo ever leaves a microVM. /// /// [`close`]: #method.close #[derive(Debug, Clone)] pub struct Connection { // The sequence number to ACK at the next opportunity. This is 1 + the highest received // in-order sequence number. ack_to_send: Wrapping, // The highest ACK we received from the other end of the connection. highest_ack_received: Wrapping, // The sequence number of the first byte which has NOT yet been sent to the other endpoint. first_not_sent: Wrapping, // The right edge of the local receive window. We shouldn't receive any data past this point. local_rwnd_edge: Wrapping, // The right edge of the remote receive window. We shouldn't send any data past this point. remote_rwnd_edge: Wrapping, // The last time we received an ACK which advanced the receive window. Only makes sense as // long as we seq_after(first_not_sent, highest_ack_received), and if we sent something that // takes up sequence number space. rto_start: u64, // How much time can pass after rto_start, without making progress in the ACK space, before a // retransmission is triggered. rto_period: u64, // How many retransmissions triggered before receiving a valid ACK from the other endpoint. rto_count: u16, // When rto_count reaches this value, the next retransmission will actually reset the // connection. rto_count_max: u16, // Set to the FIN sequence number received from the other endpoint. fin_received: Option>, // When set, it represents the sequence number of the FIN byte which closes our end of the // connection. No data may be sent past that point. send_fin: Option>, // If some, send a RST segment with the specified sequence and ACK numbers, and mark the // connection as reset afterwards. The second option determines whether we set the ACK flag // on the RST segment. send_rst: Option, // The MSS used when sending data segments. mss: u16, // If true, send an ACK segment at the first opportunity. ACKs can piggyback data segments, so // we'll only send an empty ACK segment if we can't transmit any data. pending_ack: bool, // We've got a duplicate ACK, so we'll retransmit the highest ACKed sequence number at the // first opportunity. Unlike regular TCP, we retransmit after the first duplicate ACK. dup_ack: bool, status_flags: ConnStatusFlags, } fn parse_mss_option( segment: &TcpSegment, ) -> Result { match segment.parse_mss_option_unchecked(segment.header_len().into()) { Ok(Some(value)) => Ok(value.get()), Ok(None) => Ok(MSS_DEFAULT), Err(_) => Err(PassiveOpenError::MssOption), } } fn is_valid_syn(segment: &TcpSegment) -> bool { segment.flags_after_ns() == TcpFlags::SYN && segment.payload_len() == 0 } impl Connection { /// Attempts to create a new `Connection` in response to an incoming `SYN` segment. /// /// # Arguments /// /// * `segment` - The incoming `SYN`. /// * `local_rwnd_size` - Initial size of the local receive window. /// * `rto_period` - How long the connection waits before a retransmission timeout fires for the /// first segment which has not been acknowledged yet. This uses an opaque time unit. /// * `rto_count_max` - How many consecutive timeout-based retransmission may occur before the /// connection resets itself. pub fn passive_open( segment: &TcpSegment, local_rwnd_size: u32, rto_period: NonZeroU64, rto_count_max: NonZeroU16, ) -> Result { // We don't accepting anything other than a SYN segment here. if !is_valid_syn(segment) { return Err(PassiveOpenError::InvalidSyn); } // TODO: If we ever implement window scaling, change the part that computes // remote_rwnd_edge below. // We only care about the MSS option for now. let mss = parse_mss_option(segment)?; // This is going to get sent on the SYNACK. let ack_to_send = Wrapping(segment.sequence_number()) + Wrapping(1); // Let's pick the initial sequence number. let isn = Wrapping(xor_pseudo_rng_u32()); let first_not_sent = isn + Wrapping(1); let remote_rwnd_edge = first_not_sent + Wrapping(u32::from(segment.window_size())); Ok(Connection { ack_to_send, highest_ack_received: isn, // The ISN is sent over the SYNACK, and this is the next sequence number. first_not_sent, local_rwnd_edge: ack_to_send + Wrapping(local_rwnd_size), // We have no information about this yet. It will get updated as the connection reaches // the ESTABLISHED state. remote_rwnd_edge, rto_start: 0, rto_period: rto_period.get(), rto_count: 0, rto_count_max: rto_count_max.get(), fin_received: None, send_fin: None, send_rst: None, mss, pending_ack: false, dup_ack: false, status_flags: ConnStatusFlags::SYN_RECEIVED, }) } fn flags_intersect(&self, flags: ConnStatusFlags) -> bool { self.status_flags.intersects(flags) } fn set_flags(&mut self, flags: ConnStatusFlags) { self.status_flags.insert(flags); } fn clear_flags(&mut self, flags: ConnStatusFlags) { self.status_flags.remove(flags); } fn syn_received(&self) -> bool { self.flags_intersect(ConnStatusFlags::SYN_RECEIVED) } fn synack_pending(&self) -> bool { self.syn_received() && !self.synack_sent() } fn synack_sent(&self) -> bool { self.flags_intersect(ConnStatusFlags::SYNACK_SENT) } fn is_reset(&self) -> bool { self.flags_intersect(ConnStatusFlags::RESET) } fn fin_sent(&self) -> bool { self.flags_intersect(ConnStatusFlags::FIN_SENT) } fn fin_acked(&self) -> bool { self.flags_intersect(ConnStatusFlags::FIN_ACKED) } fn is_same_syn(&self, segment: &TcpSegment) -> bool { // This only really makes sense before getting into ESTABLISHED, but that's fine // because we only use it before that point. if !is_valid_syn(segment) || self.ack_to_send.0 != segment.sequence_number().wrapping_add(1) { return false; } matches!(parse_mss_option(segment), Ok(mss) if mss == self.mss) } fn reset_for_segment(&mut self, s: &TcpSegment) { if !self.rst_pending() { self.send_rst = Some(RstConfig::new(s)); } } fn rst_pending(&self) -> bool { self.send_rst.is_some() } fn rto_expired(&self, now: u64) -> bool { now - self.rto_start >= self.rto_period } // We send a FIN control segment if every data byte up to the self.send_fin sequence number // has been ACKed by the other endpoint, and no FIN has been previously sent. fn can_send_first_fin(&self) -> bool { !self.fin_sent() && matches!(self.send_fin, Some(fin_seq) if fin_seq == self.highest_ack_received) } // Returns the window size which should be written to an outgoing segment. This is going to be // even more useful when we'll support window scaling. fn local_rwnd(&self) -> u16 { let rwnd = (self.local_rwnd_edge - self.ack_to_send).0; u16::try_from(rwnd).unwrap_or(u16::MAX) } // Will actually become meaningful when/if we implement window scaling. fn remote_window_size(&self, window_size: u16) -> u32 { u32::from(window_size) } // Computes the remote rwnd edge given the ACK number and window size from an incoming segment. fn compute_remote_rwnd_edge(&self, ack: Wrapping, window_size: u16) -> Wrapping { ack + Wrapping(self.remote_window_size(window_size)) } // Has this name just in case the pending_ack status will be more than just some boolean at // some point in the future. fn enqueue_ack(&mut self) { self.pending_ack = true; } /// Closes this half of the connection. /// /// Subsequent calls after the first one do not have any effect. The sequence number of the /// `FIN` is the first sequence number not yet sent at this point. #[inline] pub fn close(&mut self) { if self.send_fin.is_none() { self.send_fin = Some(self.first_not_sent); } } /// Returns a valid configuration for a `RST` segment, which can be sent to the other /// endpoint to signal the connection should be reset. #[inline] pub fn make_rst_config(&self) -> RstConfig { if self.is_established() { RstConfig::Seq(self.first_not_sent.0) } else { RstConfig::Ack(self.ack_to_send.0) } } /// Specifies that a `RST` segment should be sent to the other endpoint, and then the /// connection should be destroyed. #[inline] pub fn reset(&mut self) { if !self.rst_pending() { self.send_rst = Some(self.make_rst_config()); } } /// Returns `true` if the connection is past the `ESTABLISHED` point. #[inline] pub fn is_established(&self) -> bool { self.flags_intersect(ConnStatusFlags::ESTABLISHED) } /// Returns `true` if a `FIN` has been received. #[inline] pub fn fin_received(&self) -> bool { self.fin_received.is_some() } // TODO: The description of this method is also a TODO in disguise. /// Returns `true` if the connection is done communicating with the other endpoint. /// /// Maybe it would be a good idea to return true only after our FIN has also been ACKed? /// Otherwise, when using the TCP handler there's pretty much always going to be an ACK for the /// FIN that's going to trigger a gratuitous RST (best case), or can even be considered valid if /// a new connection is created meanwhile using the same tuple and we get very unlucky (worst /// case, extremely unlikely though). #[inline] pub fn is_done(&self) -> bool { self.is_reset() || (self.fin_received() && self.flags_intersect(ConnStatusFlags::FIN_SENT)) } /// Returns the first sequence number which has not been sent yet for the current window. #[inline] pub fn first_not_sent(&self) -> Wrapping { self.first_not_sent } /// Returns the highest acknowledgement number received for the current window. #[inline] pub fn highest_ack_received(&self) -> Wrapping { self.highest_ack_received } /// Advances the right edge of the local receive window. /// /// This is effectively allowing the other endpoint to send more data, because no byte can be /// sent unless its sequence number falls into the receive window. // TODO: return the actual advance value here #[inline] pub fn advance_local_rwnd_edge(&mut self, value: u32) { let v = Wrapping(value); let max_w = Wrapping(MAX_WINDOW_SIZE); let current_w = self.local_rwnd_edge - self.ack_to_send; // Enqueue an ACK if we have to let the other endpoint know the window is opening. if current_w.0 == 0 { self.enqueue_ack(); } if v + current_w > max_w { self.local_rwnd_edge = self.ack_to_send + max_w; } else { self.local_rwnd_edge += v; } } /// Returns the right edge of the receive window advertised by the other endpoint. #[inline] pub fn remote_rwnd_edge(&self) -> Wrapping { self.remote_rwnd_edge } /// Returns `true` if a retransmission caused by the reception of a duplicate `ACK` is pending. #[inline] pub fn dup_ack_pending(&self) -> bool { self.dup_ack } /// Describes whether a control segment can be sent immediately, a retransmission is pending, /// or there's nothing to transmit until more segments are received. /// /// This function does not tell whether any data segments can/will be sent, because the /// Connection itself does not control the send buffer. Thus the information returned here /// only pertains to control segments and timeout expiry. Data segment related status will /// be reported by higher level components, which also manage the contents of the send buffer. #[inline] pub fn control_segment_or_timeout_status(&self) -> NextSegmentStatus { if self.synack_pending() || self.rst_pending() || self.can_send_first_fin() || self.pending_ack { NextSegmentStatus::Available } else if self.highest_ack_received != self.first_not_sent { NextSegmentStatus::Timeout(self.rto_start + self.rto_period) } else { NextSegmentStatus::Nothing } } // We use this helper method to set up self.send_rst and prepare a return value in one go. It's // only used by the receive_segment() method. fn reset_for_segment_helper( &mut self, s: &TcpSegment, flags: RecvStatusFlags, ) -> Result<(Option, RecvStatusFlags), RecvError> { self.reset_for_segment(s); Ok((None, RecvStatusFlags::CONN_RESETTING | flags)) } /// Handles an incoming segment. /// /// When no errors occur, returns a pair consisting of how many /// bytes (if any) were received, and whether any unusual conditions arose while processing the /// segment. Since a `Connection` does not have its own internal buffer, `buf` is required to /// store any data carried by incoming segments. /// /// # Arguments /// /// * `s` - The incoming segment. /// * `buf` - The receive buffer where payload data (if any) from `s` is going to be written. /// * `now` - An opaque timestamp representing the current moment in time. pub fn receive_segment( &mut self, s: &TcpSegment, buf: &mut [u8], now: u64, ) -> Result<(Option, RecvStatusFlags), RecvError> { if self.rst_pending() || self.is_reset() { return Err(RecvError::ConnectionReset); } // TODO: The following logic fully makes sense only for a passive open (which is what we // currently support). Things must change a bit if/when we also implement active opens. let segment_flags = s.flags_after_ns(); if segment_flags.intersects(TcpFlags::RST) { let seq = Wrapping(s.sequence_number()); // We accept the RST only if it carries an in-window sequence number. // TODO: If/when we support active opens, we'll also have to accept RST/SYN segments, // which must acknowledge our SYN to be valid. if seq_at_or_after(seq, self.ack_to_send) && seq_after(self.local_rwnd_edge, seq) { self.set_flags(ConnStatusFlags::RESET); return Ok((None, RecvStatusFlags::RESET_RECEIVED)); } else { return Ok((None, RecvStatusFlags::INVALID_RST)); } } let payload_len = s.len() - u16::from(s.header_len()); let mut recv_status_flags = RecvStatusFlags::empty(); if !self.synack_sent() { // We received another segment before getting the chance to send a SYNACK. It's either // a retransmitted SYN, or something that does not make sense. if self.is_same_syn(s) { return Ok((None, recv_status_flags)); } else { return self.reset_for_segment_helper(s, RecvStatusFlags::INVALID_SEGMENT); } } else if !self.is_established() { // So at this point we've sent at least one SYNACK, but the connection is not // ESTABLISHED yet. We only accept SYN retransmissions and ACKs. I'm not sure that // it's completely forbidden to sent an ACK + data in response to a SYNACK, so we don't // complain about non-pure ACKs (or even data + ACK + FIN segments). if self.is_same_syn(s) { // Maybe our previous SYNACK got lost or smt, so clear SYN_ACK_SENT to resend it. self.clear_flags(ConnStatusFlags::SYNACK_SENT); return Ok((None, recv_status_flags)); } else if segment_flags.intersects(TcpFlags::SYN) { // So we basically freak out over SYN segments which are not valid SYN // retransmission. return self.reset_for_segment_helper(s, RecvStatusFlags::INVALID_SEGMENT); } } else { // Reaching this branch means the connection is ESTABLISHED. The only thing we want to // do right now is reset if we get segments which carry the SYN flag, because they are // obviously invalid, and something must be really wrong. // TODO: Is it an overreaction to reset here? if s.flags_after_ns().intersects(TcpFlags::SYN) { return self.reset_for_segment_helper(s, RecvStatusFlags::INVALID_SEGMENT); } } // The ACK number can only be valid when ACK flag is set. The following logic applies to // pretty much all connection states which can reach this point. if segment_flags.intersects(TcpFlags::ACK) { let ack = Wrapping(s.ack_number()); if seq_at_or_after(ack, self.highest_ack_received) && seq_at_or_after(self.first_not_sent, ack) { // This is a valid ACK. Reset rto_count, since this means the other side is still // alive and kicking (or ACking). self.rto_count = 0; if ack == self.highest_ack_received && ack != self.first_not_sent { if !self.is_established() { // Just kidding, a DUPACK is not valid before the connection is ESTABLISHED. return self.reset_for_segment_helper(s, RecvStatusFlags::INVALID_ACK); } // Duplicate ACKs can only increase in sequence number, so there's no need // to check if this one is older than self.dup_ack. self.dup_ack = true; recv_status_flags |= RecvStatusFlags::DUP_ACK; } else { // We're making progress. We should also reset rto_start in this case. self.highest_ack_received = ack; self.rto_start = now; if !self.is_established() && self.synack_sent() { // The connection becomes ESTABLISHED. self.set_flags(ConnStatusFlags::ESTABLISHED); } if self.fin_sent() && ack == self.first_not_sent { self.set_flags(ConnStatusFlags::FIN_ACKED); } } // Look for remote remote rwnd updates. if self.is_established() { let edge = self.compute_remote_rwnd_edge(ack, s.window_size()); if seq_after(edge, self.remote_rwnd_edge) { self.remote_rwnd_edge = edge; } else if edge != self.remote_rwnd_edge { // The right edge of the remote receive window has been moved to the left, // or has been set to an invalid value. Both cases represent erroneous TCP // behaviour. recv_status_flags |= RecvStatusFlags::REMOTE_RWND_EDGE; } } } else { recv_status_flags |= RecvStatusFlags::INVALID_ACK; if !self.is_established() { // Reset the connection if we receive an invalid ACK before reaching the // ESTABLISHED state. return self.reset_for_segment_helper(s, recv_status_flags); } } } // We start looking at the payload and/or FIN next. This makes sense only if the // connection is established. if !self.is_established() { return Ok((None, recv_status_flags)); } let seq = Wrapping(s.sequence_number()); let wrapping_payload_len = Wrapping(u32::from(payload_len)); if usize::from(payload_len) > buf.len() { return Err(RecvError::BufferTooSmall); } let mut enqueue_ack = if payload_len > 0 { let data_end_seq = seq + wrapping_payload_len; if let Some(fin_seq) = self.fin_received && !seq_at_or_after(fin_seq, data_end_seq) { // TODO: This is a strange situation, because the other endpoint is sending data // after it initially closed its half of the connection. We simply ignore the // segment for now. return Ok((None, recv_status_flags | RecvStatusFlags::DATA_BEYOND_FIN)); } if !seq_at_or_after(self.local_rwnd_edge, data_end_seq) { // TODO: This is another strange (and potentially dangerous) situation, because // either we or the other endpoint broke receive window semantics. We simply ignore // the segment for now. return Ok(( None, recv_status_flags | RecvStatusFlags::SEGMENT_BEYOND_RWND, )); } // We currently assume segments are seldom lost or reordered, and only accept those with // the exact next sequence number we're waiting for. if seq != self.ack_to_send { // TODO: Maybe we should enqueue multiple ACKs here (after making such a thing // possible in the first place), just so we're more likely to trigger a // retransmission. self.enqueue_ack(); return Ok((None, recv_status_flags | RecvStatusFlags::UNEXPECTED_SEQ)); } self.ack_to_send = data_end_seq; true } else { false }; // We assume the sequence number of the FIN does not change via conflicting FIN carrying // segments (as it should be the case during TCP normal operation). It the other endpoint // breaks this convention, it will have to deal with potentially hanging (until timing out) // connections and/or RST segments. if segment_flags.intersects(TcpFlags::FIN) && !self.fin_received() { let fin_seq = seq + wrapping_payload_len; // In order to avoid some complexity on our side, we only accept an incoming FIN if its // sequence number matches that of the first byte yet to be received (this is similar to // what we do for data segments right now). if fin_seq == self.ack_to_send { self.fin_received = Some(fin_seq); // Increase this to also ACK the FIN. self.ack_to_send += Wrapping(1); enqueue_ack = true; } else { recv_status_flags |= RecvStatusFlags::INVALID_FIN; } } if enqueue_ack { self.enqueue_ack(); // We check this here because if a valid payload has been received, then we must have // set enqueue_ack = true earlier. if let Some(payload_len) = NonZeroUsize::new(payload_len.into()) { buf[..payload_len.into()].copy_from_slice(s.payload()); return Ok((Some(payload_len), recv_status_flags)); } } Ok((None, recv_status_flags)) } // The write helper functions return incomplete segments because &self does not have information // regarding the identity of the endpoints, such as source and destination ports, or source and // destination L3 addresses (which are required for checksum computation). We need this stupid // ?Sized trait bound, because otherwise Sized would be implied, and we can have unsized types // which implement ByteBuffer (such as [u8]), since payload expects a reference to some R. fn write_segment<'a, R: ByteBuffer + ?Sized + Debug>( &mut self, buf: &'a mut [u8], mss_reserved: u16, seq: Wrapping, ack: Wrapping, flags_after_ns: TcpFlags, payload: Option<(&R, usize)>, ) -> Result>, WriteNextError> { // Write the MSS option on SYNACK segments. let mss_option = if flags_after_ns == TcpFlags::SYN | TcpFlags::ACK { Some(self.mss) } else { None }; let segment = TcpSegment::write_incomplete_segment( buf, seq.0, ack.0, flags_after_ns, self.local_rwnd(), mss_option, self.mss .checked_sub(mss_reserved) .ok_or(WriteNextError::MssRemaining)?, payload, )?; if flags_after_ns.intersects(TcpFlags::ACK) { self.pending_ack = false; } Ok(segment) } // Control segments are segments with no payload (at least I like to use this name). fn write_control_segment<'a, R: ByteBuffer + ?Sized + Debug>( &mut self, buf: &'a mut [u8], mss_reserved: u16, ) -> Result>, WriteNextError> { let mut seq = self.highest_ack_received; let mut ack = self.ack_to_send; let mut flags_after_ns = TcpFlags::empty(); if let Some(cfg) = self.send_rst { let t = cfg.seq_ack_tcp_flags(); seq = Wrapping(t.0); ack = Wrapping(t.1); flags_after_ns = t.2; } else if !self.is_established() { // We can only send SYNACKs on this branch. The ISN should be right before // self.first_not_sent. flags_after_ns |= TcpFlags::SYN | TcpFlags::ACK; seq = self.first_not_sent - Wrapping(1); } else { // If we got to this point, the connection is ESTABLISHED, and we're not sending a RST. // We always want to enable the ACK flag. flags_after_ns = TcpFlags::ACK; if let Some(fin_seq) = self.send_fin { // When all outgoing data segments have been acked, we place the FIN flag and the // appropriate sequence number on outgoing control segments, unless we received an // ACK for the FIN. if !self.fin_acked() && seq_at_or_after(seq, fin_seq) { flags_after_ns |= TcpFlags::FIN; seq = fin_seq; } } } self.write_segment::(buf, mss_reserved, seq, ack, flags_after_ns, None) } /// Writes a new segment (if available) to the specified buffer. /// /// The `payload_src` argument is required because the `Connection` does not have an internal /// send buffer. If the payload source is present, the data referenced therein must not amount /// to more than [`MAX_WINDOW_SIZE`]. /// /// # Arguments /// /// * `buf` - The buffer where the segment is written. /// * `mss_reserved` - How much (if anything) of the MSS value has been already used at the /// lower layers (by IP options, for example). This will be zero most of the time. /// * `payload_src` - References a buffer which contains data to send, and also specifies the /// sequence number associated with the first byte from that buffer. /// * `now` - An opaque timestamp representing the current moment in time. /// /// [`MAX_WINDOW_SIZE`]: ../constant.MAX_WINDOW_SIZE.html pub fn write_next_segment<'a, R: ByteBuffer + ?Sized + Debug>( &mut self, buf: &'a mut [u8], mss_reserved: u16, payload_src: PayloadSource, now: u64, ) -> Result>>, WriteNextError> { // TODO: like receive_segment(), this function is specific in some ways to Connections // created via passive open. When/if we also implement active opens, some things will // have to change. if self.is_reset() { return Err(WriteNextError::ConnectionReset); } if self.send_rst.is_some() { // A RST is pending. Try to write it, and change the state of the connection to reset // if successfull. let segment = self.write_control_segment::(buf, mss_reserved)?; self.set_flags(ConnStatusFlags::RESET); return Ok(Some(segment)); } // The first thing we have to do is reply with a SYNACK if needed. if self.synack_pending() { let segment = self.write_control_segment::(buf, mss_reserved)?; self.set_flags(ConnStatusFlags::SYNACK_SENT); self.rto_start = now; return Ok(Some(segment)); } // Resend a SYNACK if the RTO expired. Otherwise, no reason to continue until the connection // becomes ESTABLISHED. if !self.is_established() { if self.rto_expired(now) { // If we exceeded the maximum retransmission count, reset the connection and call // write_next_segment one more time to generate the RST. self.rto_count += 1; if self.rto_count >= self.rto_count_max { self.reset(); return self.write_next_segment(buf, mss_reserved, payload_src, now); } let segment = self.write_control_segment::(buf, mss_reserved)?; self.rto_start = now; return Ok(Some(segment)); } return Ok(None); } // First, try sending a data segment, because we can piggy back ACKs and FINs on top of it. if let Some((read_buf, payload_seq)) = payload_src { // Limit the size of read_buf so it doesn't mess up later calculations (as usual, I take // the easy way out). let len = match u32::try_from(read_buf.len()) { Ok(len) if len <= MAX_WINDOW_SIZE => len, _ => return Err(WriteNextError::PayloadBufTooLarge), }; let payload_end = payload_seq + Wrapping(len); let mut rto_triggered = false; // Decide what sequence number to send next. Check out if a timeout expired first. let seq_to_send = if self.highest_ack_received != self.first_not_sent && self.rto_expired(now) { self.rto_count += 1; if self.rto_count >= self.rto_count_max { self.reset(); return self.write_next_segment(buf, mss_reserved, payload_src, now); } if let Some(fin_seq) = self.send_fin && self.highest_ack_received == fin_seq { // We're in the relatively unlikely situation where our FIN got lost. // Simply calling write_control_segment() will retransmit it. let segment = self.write_control_segment::(buf, mss_reserved)?; self.rto_start = now; return Ok(Some(segment)); } // We have to remember this is a retransmission for later. rto_triggered = true; self.highest_ack_received } else if self.dup_ack { // We retransmit an older segment if a DUPACK is recorded. We'll clear // self.dup_ack after we make sure the segment has been successfully written. self.highest_ack_received } else { // Otherwise, we send some data (if possible) starting with the first byte not // yet sent. self.first_not_sent }; // The payload buffer begins after the first sequence number we are trying to send // (or the payload_seq is totally off). if !seq_at_or_after(seq_to_send, payload_seq) { return Err(WriteNextError::PayloadMissingSeq); } // We can only send data if it's within both the send buffer and the remote rwnd, and // before the sequence number of the local FIN (if the connection is closing). let actual_end = if seq_at_or_after(self.remote_rwnd_edge, payload_end) { payload_end } else { self.remote_rwnd_edge }; // Make sure we're not trying to send data past the FIN sequence we previously // announced. if let Some(fin_seq) = self.send_fin && seq_after(actual_end, fin_seq) { return Err(WriteNextError::DataAfterFin); } // We only proceed with writing a data segment if the previously computed bounds // delimit a valid sequence number interval. if seq_after(actual_end, seq_to_send) { let max_payload_len = (actual_end - seq_to_send).0 as usize; // We always set the ACK flag for data segments. let tcp_flags = TcpFlags::ACK; let ack_to_send = self.ack_to_send; let mut segment = self.write_segment( buf, mss_reserved, seq_to_send, ack_to_send, tcp_flags, Some((read_buf, max_payload_len)), )?; // If self.dup_ack was Some(_), we've just written the retransmission segment, // either directly or via the RTO timer expiring. self.dup_ack = false; let payload_len = segment.inner().payload_len(); let mut first_seq_after = seq_to_send + Wrapping(u32::from(payload_len)); if let Some(fin_seq) = self.send_fin && first_seq_after == fin_seq { // This segment contains the last bytes of data we're going to send, so // we should also set the FIN flag. segment .inner_mut() .set_flags_after_ns(tcp_flags | TcpFlags::FIN); // The FIN takes up 1 sequence number. first_seq_after += Wrapping(1); // The main purpose of knowing we sent at least one FIN is to signal that // we already added 1 to self.first_not_sent, to account for its sequence // number. self.set_flags(ConnStatusFlags::FIN_SENT); } if rto_triggered || self.first_not_sent == self.highest_ack_received { // Reset the RTO "timer" after each retransmission, or after sending the first // unacknowledged segment in the current window. self.rto_start = now; } if seq_after(first_seq_after, self.first_not_sent) { self.first_not_sent = first_seq_after; } return Ok(Some(segment)); } } // At this point, we only send a control segment if there's a pending ACK, or we didn't send // a FIN segment before and we would be sending the first one. // The FIN flag will be automatically added to the segment when necessary by the // write_control_segment() method. let send_first_fin = self.can_send_first_fin(); if self.pending_ack || send_first_fin { let segment = self.write_control_segment::(buf, mss_reserved)?; if send_first_fin { self.first_not_sent += Wrapping(1); self.set_flags(ConnStatusFlags::FIN_SENT); } return Ok(Some(segment)); } Ok(None) } } // TODO: I'll be honest: the tests here cover the situations most likely to be encountered, but are // not even close to being exhaustive. Something like that might be worth pursuing after polishing // the rougher edges around the current implementation, and deciding its scope relative to an // actual TCP implementation. #[cfg(test)] pub(crate) mod tests { use super::*; // A segment without options or a payload is 20 bytes long. const BASIC_SEGMENT_SIZE: usize = 20; #[derive(Debug)] pub struct ConnectionTester { buf: [u8; 2000], src_port: u16, dst_port: u16, remote_window_size: u16, pub mss: u16, pub mss_reserved: u16, local_rwnd_size: u32, remote_isn: u32, pub rto_period: u64, rto_count_max: u16, now: u64, } impl ConnectionTester { pub fn new() -> Self { ConnectionTester { buf: [0u8; 2000], src_port: 1000, dst_port: 80, remote_window_size: 11000, mss: 1100, mss_reserved: 0, local_rwnd_size: 10000, remote_isn: 12_345_678, rto_period: 100_000, rto_count_max: 3, now: 0, } } fn passive_open( &self, s: &TcpSegment, ) -> Result { Connection::passive_open( s, self.local_rwnd_size, NonZeroU64::new(self.rto_period).unwrap(), NonZeroU16::new(self.rto_count_max).unwrap(), ) } // This helps write segments; it uses a lot of default values, and sets the ACK and SEQ // numbers to 0, and self.remote_isn respectively. fn write_segment_helper<'a>( &self, buf: &'a mut [u8], add_mss_option: bool, payload: Option<(&[u8], usize)>, ) -> TcpSegment<'a, &'a mut [u8]> { let mss_option = if add_mss_option { Some(self.mss) } else { None }; TcpSegment::write_segment( buf, self.src_port, self.dst_port, self.remote_isn, 0, TcpFlags::empty(), self.remote_window_size, mss_option, self.mss.checked_sub(self.mss_reserved).unwrap(), payload, None, ) .unwrap() } pub fn write_syn<'a>(&self, buf: &'a mut [u8]) -> TcpSegment<'a, &'a mut [u8]> { self.write_segment_helper(buf, true, None) } pub fn write_ctrl<'a>(&self, buf: &'a mut [u8]) -> TcpSegment<'a, &'a mut [u8]> { self.write_segment_helper(buf, false, None) } pub fn write_data<'a>( &self, buf: &'a mut [u8], data_buf: &[u8], ) -> TcpSegment<'a, &'a mut [u8]> { let segment = self.write_segment_helper(buf, false, Some((data_buf, data_buf.len()))); assert_eq!(usize::from(segment.payload_len()), data_buf.len()); segment } fn receive_segment( &mut self, c: &mut Connection, s: &TcpSegment, ) -> Result<(Option, RecvStatusFlags), RecvError> { c.receive_segment(s, self.buf.as_mut(), self.now) } fn write_next_segment( &mut self, c: &mut Connection, payload_src: Option<(&[u8], Wrapping)>, ) -> Result>, WriteNextError> { let src_port = self.src_port; let dst_port = self.dst_port; c.write_next_segment(self.buf.as_mut(), self.mss_reserved, payload_src, self.now) .map(|o| o.map(|incomplete| incomplete.finalize(src_port, dst_port, None))) } // Checks if the specified connection will reset after receiving the provided segment, and // that the receive_segment() method also returns the specified RecvStatusFlags. We // also make sure the outgoing RST segment has additional_segment_flags set besides // TcpFlags::RST. fn should_reset_after( &mut self, c: &mut Connection, s: &TcpSegment, recv_flags: RecvStatusFlags, additional_segment_flags: TcpFlags, ) { assert_eq!(self.receive_segment(c, s).unwrap(), (None, recv_flags)); // We add a payload also, to see that sending a RST has precedence over everything. let send_buf = [0u8; 2000]; let payload_src = Some((send_buf.as_ref(), c.highest_ack_received)); if !recv_flags.intersects(RecvStatusFlags::RESET_RECEIVED) { // If the connection initiated the reset, the next segment to write should be a RST. // The first unwrap is for the Result, and the second for the Option. check_control_segment( &self.write_next_segment(c, payload_src).unwrap().unwrap(), 0, additional_segment_flags | TcpFlags::RST, ); } // Calling write_next_segment again should result in a ConnectionReset error. assert_eq!( self.write_next_segment(c, payload_src).unwrap_err(), WriteNextError::ConnectionReset ); // Receive should also no longer work. assert_eq!( self.receive_segment(c, s).unwrap_err(), RecvError::ConnectionReset ); assert!(c.is_done()); } // Checks that the next segment sent by c is a SYNACK. fn check_synack_is_next(&mut self, c: &mut Connection) { let send_buf = [0u8; 2000]; let payload_src = Some((send_buf.as_ref(), c.highest_ack_received)); let remote_isn = self.remote_isn; let conn_isn = c.first_not_sent.0.wrapping_sub(1); let mss = self.mss; let s = self.write_next_segment(c, payload_src).unwrap().unwrap(); // The MSS option is 4 bytes long. check_control_segment(&s, 4, TcpFlags::SYN | TcpFlags::ACK); assert_eq!(s.sequence_number(), conn_isn); assert_eq!(s.ack_number(), remote_isn.wrapping_add(1)); // Our implementation mirrors the received value of the MSS option. assert_eq!(parse_mss_option(&s).unwrap(), mss); check_synack_sent(c); } } // Verifies whether we are dealing with a control segment with the specified flags. fn check_control_segment( s: &TcpSegment, options_len: usize, flags_after_ns: TcpFlags, ) { assert_eq!(usize::from(s.len()), BASIC_SEGMENT_SIZE + options_len); assert_eq!(s.flags_after_ns(), flags_after_ns); } // Checks if the segment ACKs the specified sequence number, and whether the additional_flags // are set (besides ACK). fn check_acks( s: &TcpSegment, ack_number: u32, additional_flags: TcpFlags, ) { assert_eq!(s.flags_after_ns(), TcpFlags::ACK | additional_flags); assert_eq!(s.ack_number(), ack_number); } // The following "check_" helper functions ensure a Connection in a certain state does not have // any unwarranted status flags set. We wouldn't need to look at this if we used a state enum // instead of a status flags set. fn check_syn_received(c: &Connection) { assert_eq!(c.status_flags, ConnStatusFlags::SYN_RECEIVED); } fn check_synack_sent(c: &Connection) { assert_eq!( c.status_flags, ConnStatusFlags::SYN_RECEIVED | ConnStatusFlags::SYNACK_SENT ); } fn check_established(c: &Connection) { assert_eq!( c.status_flags, ConnStatusFlags::SYN_RECEIVED | ConnStatusFlags::SYNACK_SENT | ConnStatusFlags::ESTABLISHED ); } fn check_fin_received_but_not_sent(c: &Connection) { assert_eq!( c.status_flags, ConnStatusFlags::SYN_RECEIVED | ConnStatusFlags::SYNACK_SENT | ConnStatusFlags::ESTABLISHED ); assert!(c.fin_received()); } #[test] #[allow(clippy::cognitive_complexity)] fn test_connection() { // These are used to support some segments we play around with. let mut buf1 = [0u8; 100]; let mut buf2 = [0u8; 100]; let mut buf3 = [0u8; 1500]; // Buffer containing the payload of the incoming data segment. let data_buf = [2u8; 1000]; // Buffer containing the data which the connection sends on outgoing segments. let send_buf = [11u8; 20000]; let mut t = ConnectionTester::new(); let mut syn = t.write_syn(buf1.as_mut()); let mut ctrl = t.write_ctrl(buf2.as_mut()); let mut data = t.write_data(buf3.as_mut(), data_buf.as_ref()); // Test creating a new connection based on invalid SYN segments. // Invalid flags. syn.set_flags_after_ns(TcpFlags::SYN | TcpFlags::ACK); assert_eq!( t.passive_open(&syn).unwrap_err(), PassiveOpenError::InvalidSyn ); // SYN segment with payload. data.set_flags_after_ns(TcpFlags::SYN); assert_eq!( t.passive_open(&data).unwrap_err(), PassiveOpenError::InvalidSyn ); // Ok, now let's test with connections created using valid SYN segments. // Set valid flags. syn.set_flags_after_ns(TcpFlags::SYN); let mut c = t.passive_open(&syn).unwrap(); assert_eq!(c.ack_to_send.0, t.remote_isn.wrapping_add(1)); assert_eq!(c.first_not_sent, c.highest_ack_received + Wrapping(1)); assert_eq!( c.local_rwnd_edge.0, t.remote_isn.wrapping_add(1 + t.local_rwnd_size) ); assert_eq!( c.remote_rwnd_edge, c.first_not_sent + Wrapping(u32::from(t.remote_window_size)) ); check_syn_received(&c); // There's a SYNACK to send. assert_eq!( c.control_segment_or_timeout_status(), NextSegmentStatus::Available ); let mut c_clone = c.clone(); // While the connection is in this state, we send another SYN, with a different ISN. syn.set_sequence_number(t.remote_isn.wrapping_add(1)); t.should_reset_after( &mut c, &syn, RecvStatusFlags::INVALID_SEGMENT | RecvStatusFlags::CONN_RESETTING, TcpFlags::ACK, ); // Let's restore the connection. c = c_clone; let mut payload_src = Some((send_buf.as_ref(), c.highest_ack_received)); // Sending the exact same SYN again should be fine. syn.set_sequence_number(t.remote_isn); assert_eq!( t.receive_segment(&mut c, &syn).unwrap(), (None, RecvStatusFlags::empty()) ); // The connection should send a SYNACK at the next opportunity. t.check_synack_is_next(&mut c); // Calling write_next_segment again should not send anything else. assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); // Also, we now have a RTO pending. assert_eq!( c.control_segment_or_timeout_status(), NextSegmentStatus::Timeout(t.rto_period) ); // However, if we advance the time until just after the RTO, a SYNACK is retransmitted. t.now += t.rto_period; t.check_synack_is_next(&mut c); assert_eq!( c.control_segment_or_timeout_status(), NextSegmentStatus::Timeout(2 * t.rto_period) ); // Re-receiving a valid SYN moves the connection back to SYN_RECEIVED. assert_eq!( t.receive_segment(&mut c, &syn).unwrap(), (None, RecvStatusFlags::empty()) ); check_syn_received(&c); // And thus, a SYNACK will be the next segment to be transmitted once again. t.check_synack_is_next(&mut c); // Now is a time as good as any to see what happens if we receive a RST. First, let's try // with an invalid RST (its sequence number is out of window). ctrl.set_sequence_number(c.ack_to_send.0.wrapping_sub(1)) .set_flags_after_ns(TcpFlags::RST); assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), (None, RecvStatusFlags::INVALID_RST) ); // Let's back up c, because the next segment will be a valid RST. c_clone = c.clone(); ctrl.set_sequence_number(c.ack_to_send.0); t.should_reset_after( &mut c, &ctrl, RecvStatusFlags::RESET_RECEIVED, TcpFlags::ACK, ); // Cool, let's restore c and continue. c = c_clone.clone(); let conn_isn = c.first_not_sent.0.wrapping_sub(1); // Ok so we're waiting for the SYNACK to be acked. Any incoming segment which is not a // retransmitted SYN, but has the SYN flag set will cause a reset. data.set_flags_after_ns(TcpFlags::ACK | TcpFlags::SYN) .set_ack_number(conn_isn.wrapping_add(1)) .set_sequence_number(t.remote_isn.wrapping_add(1)); t.should_reset_after( &mut c, &data, RecvStatusFlags::INVALID_SEGMENT | RecvStatusFlags::CONN_RESETTING, // The RST emitted in response won't have the ACK flag set because we can infer a // sequence number from the ACK carried by the data segment. TcpFlags::empty(), ); c = c_clone.clone(); // A valid ACK should move the connection into ESTABLISHED. Also, since we allow more than // just pure ACKs at this point, any valid data should be received as well. data.set_flags_after_ns(TcpFlags::ACK); assert_eq!( t.receive_segment(&mut c, &data).unwrap(), ( Some(NonZeroUsize::new(data_buf.len()).unwrap()), RecvStatusFlags::empty() ) ); assert!(c.is_established()); c = c_clone.clone(); // In fact, since we're so like whatever about the segments we receive here, let's see what // happens if data also carries the FIN flag (spoiler: it works). data.set_flags_after_ns(TcpFlags::ACK | TcpFlags::FIN); assert_eq!( t.receive_segment(&mut c, &data).unwrap(), ( Some(NonZeroUsize::new(data_buf.len()).unwrap()), RecvStatusFlags::empty() ) ); assert!(c.is_established()); assert!(c.fin_received()); c = c_clone.clone(); // That being said, let's move into established via a pure ACK. ctrl.set_flags_after_ns(TcpFlags::ACK) .set_ack_number(conn_isn.wrapping_add(1)); assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), (None, RecvStatusFlags::empty()) ); check_established(&c); // Cool, let's back c up. c_clone = c.clone(); // We still get spooked if we get something with a SYN. data.set_flags_after_ns(TcpFlags::SYN | TcpFlags::ACK); t.should_reset_after( &mut c, &data, RecvStatusFlags::INVALID_SEGMENT | RecvStatusFlags::CONN_RESETTING, // The RST emitted in response won't have the ACK flag set because we can infer a // sequence number from the ACK carried by the data segment. TcpFlags::empty(), ); c = c_clone.clone(); // Ok so back to ESTABLISHED, let's make sure we only accept the exact sequence // number we expect (which is t.remote_isn + 1 at this point). The following segment should // not be accepted. data.set_flags_after_ns(TcpFlags::ACK) .set_sequence_number(t.remote_isn); assert_eq!( t.receive_segment(&mut c, &data).unwrap(), (None, RecvStatusFlags::UNEXPECTED_SEQ) ); // However, if we set the expected sequence everything should be ok. data.set_sequence_number(t.remote_isn + 1); assert_eq!( t.receive_segment(&mut c, &data).unwrap(), ( Some(NonZeroUsize::new(data.payload_len().into()).unwrap()), RecvStatusFlags::empty() ) ); // This is the ack number that should be set/sent. let expected_ack = t.remote_isn.wrapping_add(u32::from(data.payload_len()) + 1); // Check that internal state gets updated properly. assert_eq!(c.ack_to_send.0, expected_ack); { // We should get a pure ACK here, because we don't provide a payload source. let s = t.write_next_segment(&mut c, None).unwrap().unwrap(); check_acks(&s, expected_ack, TcpFlags::empty()); } // Calling write_next_segment (without a payload source) again should not send // anything else. assert!(t.write_next_segment(&mut c, None).unwrap().is_none()); { let payload_len = u32::from(data.payload_len()); // Assuming no one changed the code, the local window size of the connection was 10000, // so we should be able to successfully receive 9 more segments with 1000 byte payloads. let max = 9; for i in 1u32..=max { // The 1 we add is because the SYN consumes a sequence number. data.set_sequence_number(t.remote_isn.wrapping_add(1 + i * payload_len)); assert_eq!( t.receive_segment(&mut c, &data).unwrap(), ( Some(NonZeroUsize::new(data.payload_len().into()).unwrap()), RecvStatusFlags::empty() ) ); } let expected_ack = t.remote_isn.wrapping_add(1 + (max + 1) * payload_len); // The connection should send a single cumulative ACK, and no other segment afterward // (if we don't also provide a payload source, which we don't). { { let s = t.write_next_segment(&mut c, None).unwrap().unwrap(); check_acks(&s, expected_ack, TcpFlags::empty()); } assert!(t.write_next_segment(&mut c, None).unwrap().is_none()); } // Sending any more new data should be outside of the receive window of the connection. data.set_sequence_number(expected_ack); assert_eq!( t.receive_segment(&mut c, &data).unwrap(), (None, RecvStatusFlags::SEGMENT_BEYOND_RWND) ); } // Restore connection state to just after ESTABLISHED, and make it send some data. c = c_clone.clone(); // This should send anything, as the payload source does not contain the next sequence // number to be sent. // Should contain conn_isn + 1 to be fine, but we make it start just after. payload_src.as_mut().unwrap().1 = Wrapping(conn_isn) + Wrapping(2); assert_eq!( t.write_next_segment(&mut c, payload_src).unwrap_err(), WriteNextError::PayloadMissingSeq ); // Let's fix it. payload_src.as_mut().unwrap().1 = Wrapping(conn_isn) + Wrapping(1); // The mss is 1100, and the remote window is 11000, so we can send 10 data packets. let max = 10; let remote_isn = t.remote_isn; let mss = t.mss; let (payload_buf, mut response_seq) = payload_src.unwrap(); let mut payload_offset = 0; for i in 0..max { // Using the expects to get the value of i if there's an error. let s = t .write_next_segment(&mut c, Some((&payload_buf[payload_offset..], response_seq))) .unwrap_or_else(|_| panic!("{}", i)) .unwrap_or_else(|| panic!("{}", i)); payload_offset += usize::from(s.payload_len()); response_seq += Wrapping(u32::from(s.payload_len())); // Again, the 1 accounts for the sequence number taken up by the SYN. assert_eq!( s.sequence_number(), conn_isn.wrapping_add(1 + i * u32::from(mss)), ); assert_eq!(s.ack_number(), remote_isn.wrapping_add(1)); assert_eq!(s.flags_after_ns(), TcpFlags::ACK); assert_eq!(s.payload_len(), mss); } // No more new data can be sent until the window advances, even though data_buf // contains 20_000 bytes. assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); // Let's ACK the first segment previously sent. ctrl.set_ack_number(conn_isn.wrapping_add(1 + u32::from(mss))) .set_flags_after_ns(TcpFlags::ACK); assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), (None, RecvStatusFlags::empty()) ); // We should be able to send one more segment now. { let s = t.write_next_segment(&mut c, payload_src).unwrap().unwrap(); assert_eq!( s.sequence_number(), conn_isn.wrapping_add(1 + max * u32::from(mss)), ); assert_eq!(s.payload_len(), mss); } assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); // We have to wait for the window to open again in order to send new data, but we can // have retransmissions. For example, receiving the previous ACK again will cause a // DUPACK, which will trigger a retransmission. assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), (None, RecvStatusFlags::DUP_ACK) ); assert!(c.dup_ack_pending()); // Let's check that we indeed get a single retransmitted segment. { let s = t.write_next_segment(&mut c, payload_src).unwrap().unwrap(); assert_eq!(s.sequence_number(), ctrl.ack_number()); assert_eq!(s.payload_len(), mss); } assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); // Retransmissions also trigger after time-out. t.now += t.rto_period; { let s = t.write_next_segment(&mut c, payload_src).unwrap().unwrap(); assert_eq!(s.sequence_number(), ctrl.ack_number()); assert_eq!(s.payload_len(), mss); } assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); // Btw, let's also make sure another retransmission will happen after another time-out, // but not earlier. t.now += t.rto_period - 1; assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); t.now += 1; { let s = t.write_next_segment(&mut c, payload_src).unwrap().unwrap(); assert_eq!(s.sequence_number(), ctrl.ack_number()); assert_eq!(s.payload_len(), mss); } assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); c_clone = c.clone(); // Triggering another timeout should reset the connection, because t.rto_count_max == 3. t.now += t.rto_period; { let s = t.write_next_segment(&mut c, payload_src).unwrap().unwrap(); assert!(s.flags_after_ns().intersects(TcpFlags::RST)); assert!(c.is_reset()); } // Let's undo the reset. t.now -= t.rto_period; c = c_clone; // Also, time-outs should stop happening if we got ACKs for all outgoing segments. This // ACK also closes the remote receive window so we can't send any new data. ctrl.set_ack_number(c.first_not_sent.0).set_window_size(0); assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), (None, RecvStatusFlags::empty()) ); assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); t.now += t.rto_period; assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); // Let's open the window a bit, to see that the next transmitted segment fits that // exact size. ctrl.set_window_size(123); assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), (None, RecvStatusFlags::empty()) ); { let s = t.write_next_segment(&mut c, payload_src).unwrap().unwrap(); assert_eq!(s.sequence_number(), ctrl.ack_number()); assert_eq!(s.payload_len(), 123); } // And let's do one more retransmission timing check. assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); t.now += t.rto_period - 1; assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); t.now += 1; { let s = t.write_next_segment(&mut c, payload_src).unwrap().unwrap(); assert_eq!(s.sequence_number(), ctrl.ack_number()); assert_eq!(s.payload_len(), 123); } // This looks like a good time to check what happens for some invalid ACKs. First, let's // make sure we properly detect an invalid window_size advertisement (where the remote rwnd // edge decreases compared to previously received info). ctrl.set_window_size(100); assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), ( None, RecvStatusFlags::DUP_ACK | RecvStatusFlags::REMOTE_RWND_EDGE ) ); // Let's clear the DUP_ACK related state. c.dup_ack = false; // Now let try some invalid ACKs. This one is an older ACK. ctrl.set_ack_number(c.highest_ack_received.0.wrapping_sub(1)); assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), (None, RecvStatusFlags::INVALID_ACK) ); // Another example of invalid ACK is one that tries to acknowledge a sequence number yet // to be sent. ctrl.set_ack_number(c.first_not_sent.0.wrapping_add(1)); assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), (None, RecvStatusFlags::INVALID_ACK) ); // FIN time! As usual let's begin with receiving an invalid FIN, one that does not match // the sequence number we expect. ctrl.set_flags_after_ns(TcpFlags::FIN) .set_sequence_number(c.ack_to_send.0.wrapping_sub(1)); assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), (None, RecvStatusFlags::INVALID_FIN) ); // Ok now let's use a valid FIN. ctrl.set_sequence_number(c.ack_to_send.0); assert_eq!( t.receive_segment(&mut c, &ctrl).unwrap(), (None, RecvStatusFlags::empty()) ); check_fin_received_but_not_sent(&c); // The next segment right now should be a pure ACK for the FIN. { let s = t.write_next_segment(&mut c, payload_src).unwrap().unwrap(); check_control_segment(&s, 0, TcpFlags::ACK); assert_eq!(s.ack_number(), ctrl.sequence_number().wrapping_add(1),); } // Receiving data after the FIN is an error. We increase the rwnd edge for c, because the // window was full after the earlier reception tests. c.advance_local_rwnd_edge(10_000); // We'll also get the INVALID_ACK RecvStausFlag here because the ACK number is old. data.set_sequence_number(c.ack_to_send.0); assert_eq!( t.receive_segment(&mut c, &data).unwrap(), ( None, RecvStatusFlags::DATA_BEYOND_FIN | RecvStatusFlags::INVALID_ACK ) ); assert!(t.write_next_segment(&mut c, payload_src).unwrap().is_none()); // c = c_clone.clone(); // We change payload_src to only include those parts of send_buf that were already sent, // so it makes sense to close the connection as if we're done transmitting data. let bytes_sent_by_c = c.first_not_sent.0.wrapping_sub(conn_isn + 1) as usize; payload_src.as_mut().unwrap().0 = &send_buf[..bytes_sent_by_c]; // We artifically increase the remote rwnd for c, so we can verify we sent everything, and // we're not just rwnd bound. We also make it so everything is ACKed, so we can sent a FIN // right after calling close() below (this is needed because we didn't ACK the last // segment sent by c). c.remote_rwnd_edge += Wrapping(50_000); c.highest_ack_received = c.first_not_sent; // Save the state. // c_clone = c.clone(); // Close the connection. c.close(); // We shouldn't be done yet. Even though we got a FIN, we didn't send our own yet. assert!(!c.is_done()); // If we call write_next at this point, the next outgoing segment should be a pure FIN/ACK. { let s = t.write_next_segment(&mut c, payload_src).unwrap().unwrap(); check_control_segment(&s, 0, TcpFlags::FIN | TcpFlags::ACK); assert_eq!( s.sequence_number(), conn_isn.wrapping_add(1 + u32::try_from(bytes_sent_by_c).unwrap()) ); } // At this point, the connection should be done, because we both sent and received a FIN, // and we don't wait for our FIN to be ACKed. assert!(c.is_done()); } } ================================================ FILE: src/vmm/src/dumbo/tcp/endpoint.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // When designing the MMDS, we thought about a split in functionality, were we have some basic // building blocks (such as the simplified TCP implementation, and the micro HTTP server) which can // even be exported as libraries at some point, and then we have things built on top of those. // That's why the Connection struct (and the upcoming TcpHandler) do not log things, or increase // metrics, but rather express status via return values. The Endpoint struct implements our HTTP // based interaction with the MMDS, making use of the aforementioned building blocks, and is // totally specific to Firecracker. Ideally, the current crate should only contain the generic // components, but since the separation/interface is not very well defined yet, we keep the // Endpoint in here too for the time being. use std::fmt::Debug; use std::num::{NonZeroU16, NonZeroU64, Wrapping}; use micro_http::{Body, Request, RequestError, Response, StatusCode, Version}; use utils::time::timestamp_cycles; use crate::dumbo::pdu::Incomplete; use crate::dumbo::pdu::bytes::NetworkBytes; use crate::dumbo::pdu::tcp::TcpSegment; use crate::dumbo::tcp::connection::{Connection, PassiveOpenError, RecvStatusFlags}; use crate::dumbo::tcp::{MAX_WINDOW_SIZE, NextSegmentStatus, seq_after}; use crate::logger::{IncMetric, METRICS}; // TODO: These are currently expressed in cycles. Normally, they would be the equivalent of a // certain duration, depending on the frequency of the CPU, but we still have a bit to go until // that functionality is available, so we just use some conservative-ish values. Even on a fast // 4GHz CPU, the first is roughly equal to 10 seconds, and the other is ~300 ms. const EVICTION_THRESHOLD: u64 = 40_000_000_000; const CONNECTION_RTO_PERIOD: u64 = 1_200_000_000; const CONNECTION_RTO_COUNT_MAX: u16 = 15; // This is one plus the size of the largest bytestream carrying an HTTP request we are willing to // accept. It's limited in order to have a bound on memory usage. This value should be plenty for // imaginable regular MMDS requests. // TODO: Maybe at some point include this in the checks we do when populating the MMDS via the API, // since it effectively limits the size of the keys (URIs) we're willing to use. const RCV_BUF_MAX_SIZE: u32 = 2500; // Represents the local endpoint of a HTTP over TCP connection which carries GET requests // to the MMDS. #[derive(Debug)] pub struct Endpoint { // A fixed size buffer used to store bytes received via TCP. If the current request does not // fit within, we reset the connection, since we see this as a hard memory bound. receive_buf: [u8; RCV_BUF_MAX_SIZE as usize], // Represents the next available position in the buffer. receive_buf_left: usize, // This is filled with the HTTP response bytes after we parse a request and generate the reply. response_buf: Vec, // Initial response sequence, used to track if the entire `response_buf` was sent. initial_response_seq: Wrapping, // Represents the sequence number associated with the first byte from response_buf. response_seq: Wrapping, // The TCP connection that does all the receiving/sending work. connection: Connection, // Timestamp (in cycles) associated with the most recent reception of a segment. last_segment_received_timestamp: u64, // These many time units have to pass since receiving the last segment to make the current // Endpoint evictable. eviction_threshold: u64, // We ignore incoming segments when this is set, and that happens when we decide to reset // the connection (or it decides to reset itself). stop_receiving: bool, } // The "contract" for the Endpoint (if it implemented a trait or something) is something along // these lines: // - Incoming segments are passed by calling receive_segment(). // - To check whether the Endpoint has something to transmit, we must call write_next_segment() // (the buf parameter should point to where the TCP segment begins). This function will return // None if there's nothing to write (or there was an error writing, in which case it also // increases a metric). // - After calling either of the previous functions, the user should also call is_done() to see // if the Endpoint is finished. // - The is_evictable() function returns true if the Endpoint can be destroyed as far as its // internal logic is concerned. It's going to be used by the connection handler when trying to // find a new slot for incoming connections if none are free (when replacing an existing connection // is the only option). impl Endpoint { /// Creates a new Endpoint from a [`crate::tcp::connection::Connection`] /// ## Arguments: /// - `segment`: The incoming `SYN`. /// - `eviction_threshold`: CPU cycles that must elapse before this Endpoint is evictable /// - `connection_rto_period`: How long the connection waits before a retransmission timeout /// fires for the first segment which has not been acknowledged yet. This uses an opaque time /// unit. /// - `connection_rto_count_max`: How many consecutive timeout-based retransmission may occur /// before the connection resets itself. /// ## Panics: /// - `assert!(RCV_BUF_MAX_SIZE <= MAX_WINDOW_SIZE as usize);` pub fn new( segment: &TcpSegment, eviction_threshold: NonZeroU64, connection_rto_period: NonZeroU64, connection_rto_count_max: NonZeroU16, ) -> Result { // This simplifies things, and is a very reasonable assumption. #[allow(clippy::assertions_on_constants)] { assert!(RCV_BUF_MAX_SIZE <= MAX_WINDOW_SIZE); } let connection = Connection::passive_open( segment, RCV_BUF_MAX_SIZE, connection_rto_period, connection_rto_count_max, )?; Ok(Endpoint { receive_buf: [0u8; RCV_BUF_MAX_SIZE as usize], receive_buf_left: 0, response_buf: Vec::new(), // TODO: Using first_not_sent() makes sense here because a connection is currently // created via passive open only, so this points to the sequence number right after // the SYNACK. It might stop working like that if/when the implementation changes. response_seq: connection.first_not_sent(), initial_response_seq: connection.first_not_sent(), connection, last_segment_received_timestamp: timestamp_cycles(), eviction_threshold: eviction_threshold.get(), stop_receiving: false, }) } pub fn new_with_defaults( segment: &TcpSegment, ) -> Result { // The unwraps are safe because the constants are greater than 0. Self::new( segment, NonZeroU64::new(EVICTION_THRESHOLD).unwrap(), NonZeroU64::new(CONNECTION_RTO_PERIOD).unwrap(), NonZeroU16::new(CONNECTION_RTO_COUNT_MAX).unwrap(), ) } pub fn receive_segment Response>( &mut self, s: &TcpSegment, callback: F, ) { if self.stop_receiving { return; } let now = timestamp_cycles(); self.last_segment_received_timestamp = now; // As long as new segments arrive, we save data in the buffer. We don't have to worry // about writing out of bounds because we set the receive window of the connection to // match the size of the buffer. When space frees up, we'll advance the window // accordingly. let (value, status) = match self.connection.receive_segment( s, &mut self.receive_buf[self.receive_buf_left..], now, ) { Ok(pair) => pair, Err(_) => { METRICS.mmds.rx_accepted_err.inc(); return; } }; if !status.is_empty() { METRICS.mmds.rx_accepted_unusual.inc(); if status.intersects(RecvStatusFlags::CONN_RESETTING) { self.stop_receiving = true; return; } } // Advance receive_buf_left by how many bytes were actually written. if let Some(len) = value { self.receive_buf_left += len.get(); }; // The unwrap here should be safe because we assert the size whenever we append to // response_buf. if !self.response_buf.is_empty() && self.connection.highest_ack_received() == self.initial_response_seq + Wrapping(u32::try_from(self.response_buf.len()).unwrap()) { // If we got here, then we still have some response bytes to send (which are // stored in self.response_buf). // It seems we just received the last ACK we were waiting for, so the entire // response has been successfully received. Set the new response_seq and clear // the response_buf. self.response_seq = self.connection.highest_ack_received(); self.initial_response_seq = self.response_seq; self.response_buf.clear(); } if self.response_buf.is_empty() { // There's no pending response currently, so we're back to waiting for a request to be // available in self.receive_buf. // The following is some ugly but workable code that attempts to find the end of an // HTTP 1.x request in receive_buf. We need to do this for now because // parse_request_bytes() expects the entire request contents as parameter. if self.receive_buf_left > 2 { let b = self.receive_buf.as_mut(); for i in 0..self.receive_buf_left - 1 { // We're basically looking for a double new line, which can only appear at the // end of a valid request. if b[i] == b'\n' { let end = if b[i + 1] == b'\n' { i + 2 } else if i + 3 <= self.receive_buf_left && &b[i + 1..i + 3] == b"\r\n" { i + 3 } else { continue; }; // We found a potential request, let's parse it. let response = parse_request_bytes(&b[..end], callback); // The unwrap is safe because a Vec will allocate more space until all the // writes succeed. response.write_all(&mut self.response_buf).unwrap(); // Sanity check because the current logic operates under this assumption. assert!(self.response_buf.len() < u32::MAX as usize); // We have to remove the bytes up to end from receive_buf, by shifting the // others to the beginning of the buffer, and updating receive_buf_left. // Also, advance the rwnd edge of the inner connection. b.copy_within(end.., 0); self.receive_buf_left -= end; // Safe to unwrap because we assert that the response buffer is small // enough. self.connection .advance_local_rwnd_edge(u32::try_from(end).unwrap()); break; } } } if self.receive_buf_left == self.receive_buf.len() { // If we get here the buffer is full, but we still couldn't identify the end of a // request, so we reset because we are over the maximum request size. self.connection.reset(); self.stop_receiving = true; return; } } // We close the connection after receiving a FIN, and making sure there are no more // responses to send. if self.connection.fin_received() && self.response_buf.is_empty() { self.connection.close(); } } pub fn write_next_segment<'a>( &mut self, buf: &'a mut [u8], mss_reserved: u16, ) -> Option>> { let tcp_payload_src = if !self.response_buf.is_empty() { let offset = self.response_seq - self.initial_response_seq; Some(( self.response_buf.split_at(offset.0 as usize).1, self.response_seq, )) } else { None }; match self.connection.write_next_segment( buf, mss_reserved, tcp_payload_src, timestamp_cycles(), ) { Ok(write_result) => write_result.inspect(|segment| { self.response_seq += Wrapping(u32::from(segment.inner().payload_len())); }), Err(_) => { METRICS.mmds.tx_errors.inc(); None } } } #[inline] pub fn is_done(&self) -> bool { self.connection.is_done() } #[inline] pub fn is_evictable(&self) -> bool { timestamp_cycles().wrapping_sub(self.last_segment_received_timestamp) > self.eviction_threshold } pub fn next_segment_status(&self) -> NextSegmentStatus { let can_send_new_data = !self.response_buf.is_empty() && seq_after( self.connection.remote_rwnd_edge(), self.connection.first_not_sent(), ); if can_send_new_data || self.connection.dup_ack_pending() { NextSegmentStatus::Available } else { self.connection.control_segment_or_timeout_status() } } #[inline] pub fn connection(&self) -> &Connection { &self.connection } } fn build_response(status_code: StatusCode, body: Body) -> Response { let mut response = Response::new(Version::default(), status_code); response.set_body(body); response } /// Parses the request bytes and builds a `micro_http::Response` by the given callback function. fn parse_request_bytes Response>( byte_stream: &[u8], callback: F, ) -> Response { let request = Request::try_from(byte_stream, None); match request { Ok(request) => callback(request), Err(err) => match err { RequestError::BodyWithoutPendingRequest | RequestError::HeadersWithoutPendingRequest | RequestError::Overflow | RequestError::Underflow => { build_response(StatusCode::BadRequest, Body::new(err.to_string())) } RequestError::InvalidUri(err_msg) => { build_response(StatusCode::BadRequest, Body::new(err_msg.to_string())) } RequestError::InvalidHttpVersion(err_msg) | RequestError::InvalidHttpMethod(err_msg) => { build_response(StatusCode::NotImplemented, Body::new(err_msg.to_string())) } RequestError::HeaderError(err_msg) => { build_response(StatusCode::BadRequest, Body::new(err_msg.to_string())) } RequestError::InvalidRequest => build_response( StatusCode::BadRequest, Body::new("Invalid request.".to_string()), ), RequestError::SizeLimitExceeded(_, _) => { build_response(StatusCode::PayloadTooLarge, Body::new(err.to_string())) } }, } } #[cfg(test)] mod tests { use std::str::from_utf8; use super::*; use crate::dumbo::pdu::tcp::Flags as TcpFlags; use crate::dumbo::tcp::connection::tests::ConnectionTester; use crate::dumbo::tcp::tests::mock_callback; impl Endpoint { pub fn set_eviction_threshold(&mut self, value: u64) { self.eviction_threshold = value; } } #[test] #[allow(clippy::cognitive_complexity)] fn test_endpoint() { let mut buf1 = [0u8; 500]; let mut buf2 = [0u8; 500]; let mut write_buf = [0u8; RCV_BUF_MAX_SIZE as usize + 100]; let mut t = ConnectionTester::new(); let mut syn = t.write_syn(buf1.as_mut()); // Put another flag on the SYN so it becomes invalid. syn.set_flags_after_ns(TcpFlags::ACK); assert_eq!( Endpoint::new_with_defaults(&syn).unwrap_err(), PassiveOpenError::InvalidSyn ); // Fix the SYN and create an endpoint. syn.set_flags_after_ns(TcpFlags::SYN); let remote_isn = syn.sequence_number(); let mut endpoint = Endpoint::new_with_defaults(&syn).unwrap(); // Let's complete the three-way handshake. The next segment sent by the endpoint should // be a SYNACK. assert_eq!(endpoint.next_segment_status(), NextSegmentStatus::Available); let endpoint_isn = { // We need this block to delimit the mut borrow of write_buf. let s = endpoint .write_next_segment(write_buf.as_mut(), t.mss_reserved) .unwrap(); assert_eq!(s.inner().flags_after_ns(), TcpFlags::SYN | TcpFlags::ACK); s.inner().sequence_number() }; // A RTO should be pending until the SYNACK is ACKed. if let NextSegmentStatus::Timeout(_) = endpoint.next_segment_status() { assert_eq!( endpoint.next_segment_status(), endpoint.connection().control_segment_or_timeout_status() ); } else { panic!("missing expected timeout."); } // And now we ACK the SYNACK. let mut ctrl = t.write_ctrl(buf2.as_mut()); ctrl.set_flags_after_ns(TcpFlags::ACK); ctrl.set_ack_number(endpoint_isn.wrapping_add(1)); assert!(!endpoint.connection.is_established()); endpoint.receive_segment(&ctrl, mock_callback); assert!(endpoint.connection.is_established()); // Also, there should be nothing to send now anymore, nor any timeout pending. assert_eq!(endpoint.next_segment_status(), NextSegmentStatus::Nothing); // Incomplete because it's missing the newlines at the end. let incomplete_request = b"GET http://169.254.169.255/asdfghjkl HTTP/1.1"; { let mut data = t.write_data(write_buf.as_mut(), incomplete_request.as_ref()); data.set_flags_after_ns(TcpFlags::ACK); data.set_sequence_number(remote_isn.wrapping_add(1)); data.set_ack_number(endpoint_isn.wrapping_add(1)); endpoint.receive_segment(&data, mock_callback); } assert_eq!(endpoint.receive_buf_left, incomplete_request.len()); // 1 for the SYN. let mut remote_first_not_sent = remote_isn.wrapping_add(1 + u32::try_from(incomplete_request.len()).unwrap()); // The endpoint should write an ACK at this point. { assert_eq!(endpoint.next_segment_status(), NextSegmentStatus::Available); let s = endpoint .write_next_segment(write_buf.as_mut(), t.mss_reserved) .unwrap(); assert_eq!(s.inner().flags_after_ns(), TcpFlags::ACK); assert_eq!(s.inner().ack_number(), remote_first_not_sent); } // There should be nothing else to send. assert_eq!(endpoint.next_segment_status(), NextSegmentStatus::Nothing); let rest_of_the_request = b"\r\n\r\n"; // Let's also send the newlines. { let mut data = t.write_data(write_buf.as_mut(), rest_of_the_request.as_ref()); data.set_flags_after_ns(TcpFlags::ACK); data.set_sequence_number(remote_first_not_sent); data.set_ack_number(endpoint_isn + 1); endpoint.receive_segment(&data, mock_callback); } remote_first_not_sent = remote_first_not_sent.wrapping_add(rest_of_the_request.len().try_into().unwrap()); let mut endpoint_first_not_sent; // We should get a data segment that also ACKs the latest bytes received. { assert_eq!(endpoint.next_segment_status(), NextSegmentStatus::Available); let s = endpoint .write_next_segment(write_buf.as_mut(), t.mss_reserved) .unwrap(); assert_eq!(s.inner().flags_after_ns(), TcpFlags::ACK); assert_eq!(s.inner().ack_number(), remote_first_not_sent); let response = from_utf8(s.inner().payload()).unwrap(); // The response should contain "200" because the HTTP request is correct. assert!(response.contains("200")); endpoint_first_not_sent = s .inner() .sequence_number() .wrapping_add(u32::from(s.inner().payload_len())); } // Cool, now let's check that even though receive_buf is limited to some value, we can // respond to any number of requests, as long as each fits individually inside the buffer. // We're going to use the simple approach where we send the same request over and over // again, for a relatively large number of iterations. let complete_request = b"GET http://169.254.169.255/asdfghjkl HTTP/1.1\r\n\r\n"; let last_request = b"GET http://169.254.169.255/asdfghjkl HTTP/1.1\r\n\r\n123"; // Send one request for each byte in receive_buf, just to be sure. let max_iter = endpoint.receive_buf.len(); for i in 1..=max_iter { // We want to use last_request for the last request. let request = if i == max_iter { last_request.as_ref() } else { complete_request.as_ref() }; // Send request. { let mut data = t.write_data(write_buf.as_mut(), request); data.set_flags_after_ns(TcpFlags::ACK); data.set_sequence_number(remote_first_not_sent); data.set_ack_number(endpoint_first_not_sent); endpoint.receive_segment(&data, mock_callback); } remote_first_not_sent = remote_first_not_sent.wrapping_add(request.len().try_into().unwrap()); // Check response. { let s = endpoint .write_next_segment(write_buf.as_mut(), t.mss_reserved) .unwrap(); assert_eq!(s.inner().flags_after_ns(), TcpFlags::ACK); assert_eq!(s.inner().ack_number(), remote_first_not_sent); let response = from_utf8(s.inner().payload()).unwrap(); assert!(response.contains("200")); endpoint_first_not_sent = endpoint_first_not_sent.wrapping_add(u32::from(s.inner().payload_len())); } } // The value of receive_buf_left should be 3 right now, because of the trailing chars from // last_request. assert_eq!(endpoint.receive_buf_left, 3); // Unless the machine running the tests is super slow for some reason, we should be nowhere // near the expiry of the eviction timer. assert!(!endpoint.is_evictable()); // Let's hack this a bit and change the eviction_threshold to 0. endpoint.set_eviction_threshold(0); // The endpoint should be evictable now. assert!(endpoint.is_evictable()); // Finally, let's fill self.receive_buf with the following request, and see if we get the // reset we expect on the next segment. let request_to_fill = vec![0u8; RCV_BUF_MAX_SIZE as usize - endpoint.receive_buf_left]; { // Hack: have to artificially increase t.mss to create this segment which is 2k+. t.mss = RCV_BUF_MAX_SIZE.try_into().unwrap(); let mut data = t.write_data(write_buf.as_mut(), request_to_fill.as_ref()); data.set_flags_after_ns(TcpFlags::ACK); data.set_sequence_number(remote_first_not_sent); data.set_ack_number(endpoint_first_not_sent); endpoint.receive_segment(&data, mock_callback); } { let s = endpoint .write_next_segment(write_buf.as_mut(), t.mss_reserved) .unwrap(); assert_eq!(s.inner().flags_after_ns(), TcpFlags::RST); } } #[test] fn test_parse_request_bytes_error() { // Test unsupported HTTP version. let request_bytes = b"GET http://169.254.169.255/ HTTP/2.0\r\n\r\n"; let mut expected_response = Response::new(Version::Http11, StatusCode::NotImplemented); expected_response.set_body(Body::new("Unsupported HTTP version.".to_string())); let actual_response = parse_request_bytes(request_bytes, mock_callback); assert_eq!(actual_response, expected_response); // Test invalid URI (empty URI). let request_bytes = b"GET HTTP/1.0\r\n\r\n"; let mut expected_response = Response::new(Version::Http11, StatusCode::BadRequest); expected_response.set_body(Body::new("Empty URI not allowed.".to_string())); let actual_response = parse_request_bytes(request_bytes, mock_callback); assert_eq!(actual_response, expected_response); // Test invalid HTTP methods. let invalid_methods = ["POST", "HEAD", "DELETE", "CONNECT", "OPTIONS", "TRACE"]; for method in invalid_methods.iter() { let request_bytes = format!("{} http://169.254.169.255/ HTTP/1.0\r\n\r\n", method); let mut expected_response = Response::new(Version::Http11, StatusCode::NotImplemented); expected_response.set_body(Body::new("Unsupported HTTP method.".to_string())); let actual_response = parse_request_bytes(request_bytes.as_bytes(), mock_callback); assert_eq!(actual_response, expected_response); } // Test valid methods. let valid_methods = ["PUT", "PATCH", "GET"]; for method in valid_methods.iter() { let request_bytes = format!("{} http://169.254.169.255/ HTTP/1.0\r\n\r\n", method); let expected_response = Response::new(Version::Http11, StatusCode::OK); let actual_response = parse_request_bytes(request_bytes.as_bytes(), mock_callback); assert_eq!(actual_response, expected_response); } // Test invalid HTTP format. let request_bytes = b"GET / HTTP/1.1\r\n"; let mut expected_response = Response::new(Version::Http11, StatusCode::BadRequest); expected_response.set_body(Body::new("Invalid request.".to_string())); let actual_response = parse_request_bytes(request_bytes, mock_callback); assert_eq!(actual_response, expected_response); // Test invalid HTTP headers. let request_bytes = b"PATCH http://localhost/home HTTP/1.1\r\n\ Expect: 100-continue\r\n\ Transfer-Encoding: identity; q=0\r\n\ Content-Length: 26\r\n\r\nthis is not\n\r\na json \nbody"; assert!( parse_request_bytes(request_bytes, mock_callback) .body() .is_none() ); let request_bytes = b"PATCH http://localhost/home HTTP/1.1\r\n\ Expect: 100-continue\r\n\ Transfer-Encoding: identity; q=0\r\n\ Content-Length: alpha\r\n\r\nthis is not\n\r\na json \nbody"; let mut expected_response = Response::new(Version::Http11, StatusCode::BadRequest); expected_response.set_body(Body::new( "Invalid value. Key:Content-Length; Value: alpha".to_string(), )); let actual_response = parse_request_bytes(request_bytes, mock_callback); assert_eq!(actual_response, expected_response); let request_bytes = b"PATCH http://localhost/home HTTP/1.1\r\n\ Expect: 100-continue\r\n\ Transfer-Encoding: identity; q=0\r\n\ Content-Length: 67\r\n\ Accept-Encoding: deflate, compress, *;q=0\r\n\r\nthis is not\n\r\na json \nbody"; let mut expected_response = Response::new(Version::Http11, StatusCode::BadRequest); expected_response.set_body(Body::new( "Invalid value. Key:Accept-Encoding; Value: *;q=0".to_string(), )); let actual_response = parse_request_bytes(request_bytes, mock_callback); assert_eq!(actual_response, expected_response); } } ================================================ FILE: src/vmm/src/dumbo/tcp/handler.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Exposes simple TCP over IPv4 listener functionality via the [`TcpIPv4Handler`] structure. //! //! [`TcpIPv4Handler`]: struct.TcpIPv4Handler.html use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::net::Ipv4Addr; use std::num::NonZeroUsize; use micro_http::{Request, Response}; use crate::dumbo::pdu::bytes::NetworkBytes; use crate::dumbo::pdu::ipv4::{IPv4Packet, Ipv4Error as IPv4PacketError, PROTOCOL_TCP}; use crate::dumbo::pdu::tcp::{Flags as TcpFlags, TcpError as TcpSegmentError, TcpSegment}; use crate::dumbo::tcp::endpoint::Endpoint; use crate::dumbo::tcp::{NextSegmentStatus, RstConfig}; // TODO: This is currently IPv4 specific. Maybe change it to a more generic implementation. /// Describes events which may occur when the handler receives packets. #[derive(Debug, PartialEq, Eq)] pub enum RecvEvent { /// The local endpoint is done communicating, and has been removed. EndpointDone, /// An error occurred while trying to create a new `Endpoint` object, based on an incoming /// `SYN` segment. FailedNewConnection, /// A new local `Endpoint` has been successfully created. NewConnectionSuccessful, /// Failed to add a local `Endpoint` because the handler is already at the maximum number of /// concurrent connections, and there are no evictable Endpoints. NewConnectionDropped, /// A new local `Endpoint` has been successfully created, but the handler had to make room by /// evicting an older `Endpoint`. NewConnectionReplacing, /// Nothing interesting happened regarding the state of the handler. Nothing, /// The handler received a non-`SYN` segment which does not belong to any existing /// connection. UnexpectedSegment, } /// Describes events which may occur when the handler writes packets. #[derive(Debug, PartialEq, Eq)] pub enum WriteEvent { /// The local `Endpoint` transitioned to being done after this segment was written. EndpointDone, /// Nothing interesting happened. Nothing, } /// Describes errors which may be encountered by the [`receive_packet`] method from /// [`TcpIPv4Handler`]. /// /// [`receive_packet`]: struct.TcpIPv4Handler.html#method.receive_packet /// [`TcpIPv4Handler`]: struct.TcpIPv4Handler.html #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum RecvError { /// The inner segment has an invalid destination port. InvalidPort, /// The handler encountered an error while parsing the inner TCP segment: {0} TcpSegment(#[from] TcpSegmentError), } /// Describes errors which may be encountered by the [`write_next_packet`] method from /// [`TcpIPv4Handler`]. /// /// [`write_next_packet`]: struct.TcpIPv4Handler.html#method.write_next_packet /// [`TcpIPv4Handler`]: struct.TcpIPv4Handler.html #[derive(Debug, PartialEq, Eq, thiserror::Error, displaydoc::Display)] pub enum WriteNextError { /// There was an error while writing the contents of the IPv4 packet: {0} IPv4Packet(#[from] IPv4PacketError), /// There was an error while writing the contents of the inner TCP segment: {0} TcpSegment(#[from] TcpSegmentError), } // Generally speaking, a TCP/IPv4 connection is identified using the four-tuple (src_addr, src_port, // dst_addr, dst_port). However, the IPv4 address and TCP port of the MMDS endpoint are fixed, so // we can get away with uniquely identifying connections using just the remote address and port. #[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)] struct ConnectionTuple { remote_addr: Ipv4Addr, remote_port: u16, } impl ConnectionTuple { fn new(remote_addr: Ipv4Addr, remote_port: u16) -> Self { ConnectionTuple { remote_addr, remote_port, } } } /// Implements a minimalist TCP over IPv4 listener. /// /// Forwards incoming TCP segments to the appropriate connection object, based on the associated /// tuple, or attempts to establish new connections (when receiving `SYN` segments). Aside from /// constructors, the handler operation is based on three methods: /// /// * [`receive_packet`] examines an incoming IPv4 packet. It checks whether the destination address /// is correct, the attempts examine the inner TCP segment, making sure the destination port /// number is also correct. Then, it steers valid segments towards exiting connections, creates /// new connections for incoming `SYN` segments, and enqueues `RST` replies in response to any /// segments which cannot be associated with a connection (except other `RST` segments). On /// success, also describes any internal status changes triggered by the reception of the packet. /// * [`write_next_packet`] writes the next IPv4 packet (if available) that would be sent by the /// handler itself (right now it can only mean an enqueued `RST`), or one of the existing /// connections. On success, also describes any internal status changes triggered as the packet /// gets transmitted. /// * [`next_segment_status`] describes whether the handler can send a packet immediately, or after /// some retransmission timeout associated with a connection fires, or if there's nothing to send /// for the moment. This is used to determine whether it's appropriate to call /// [`write_next_packet`]. /// /// [`receive_packet`]: ../handler/struct.TcpIPv4Handler.html#method.receive_packet /// [`write_next_packet`]: ../handler/struct.TcpIPv4Handler.html#method.write_next_packet /// [`next_segment_status`]: ../handler/struct.TcpIPv4Handler.html#method.next_segment_status #[derive(Debug)] pub struct TcpIPv4Handler { // Handler IPv4 address used for every connection. local_ipv4_addr: Ipv4Addr, // Handler TCP port used for every connection. local_port: u16, // This map holds the currently active endpoints, identified by their connection tuple. connections: HashMap, // Maximum number of concurrent connections we are willing to handle. max_connections: NonZeroUsize, // Holds connections which are able to send segments immediately. active_connections: HashSet, // Remembers the closest timestamp into the future when one of the connections has to deal // with an RTO trigger. next_timeout: Option<(u64, ConnectionTuple)>, // RST segments awaiting to be sent. rst_queue: Vec<(ConnectionTuple, RstConfig)>, // Maximum size of the RST queue. max_pending_resets: NonZeroUsize, } // Only used locally, in the receive_packet method, to differentiate between different outcomes // associated with processing incoming packets. #[derive(Debug)] enum RecvSegmentOutcome { EndpointDone, EndpointRunning(NextSegmentStatus), NewConnection, UnexpectedSegment(bool), } impl TcpIPv4Handler { /// Creates a new `TcpIPv4Handler`. /// /// The handler acts as if bound to `local_addr`:`local_port`, and will accept at most /// `max_connections` concurrent connections. `RST` segments generated by unexpected incoming /// segments are placed in a queue which is at most `max_pending_resets` long. #[inline] pub fn new( local_ipv4_addr: Ipv4Addr, local_port: u16, max_connections: NonZeroUsize, max_pending_resets: NonZeroUsize, ) -> Self { TcpIPv4Handler { local_ipv4_addr, local_port, connections: HashMap::with_capacity(max_connections.get()), max_connections, active_connections: HashSet::with_capacity(max_connections.get()), next_timeout: None, rst_queue: Vec::with_capacity(max_pending_resets.get()), max_pending_resets, } } /// Setter for the local IPv4 address of this TCP handler. pub fn set_local_ipv4_addr(&mut self, ipv4_addr: Ipv4Addr) { self.local_ipv4_addr = ipv4_addr; } /// Returns the local IPv4 address of this TCP handler. pub fn local_ipv4_addr(&self) -> Ipv4Addr { self.local_ipv4_addr } /// Returns the local port of this TCP handler. pub fn local_port(&self) -> u16 { self.local_port } /// Returns the max connections of this TCP handler. pub fn max_connections(&self) -> NonZeroUsize { self.max_connections } /// Returns the max pending resets of this TCP handler. pub fn max_pending_resets(&self) -> NonZeroUsize { self.max_pending_resets } /// Contains logic for handling incoming segments. /// /// Any changes to the state of the handler are communicated through an `Ok(RecvEvent)`. pub fn receive_packet Response>( &mut self, packet: &IPv4Packet, callback: F, ) -> Result { // TODO: We skip verifying the checksum, just in case the device model relies on offloading // checksum computation from the guest to some other entity. Clear this up at some point! // (Issue #520) let segment = TcpSegment::from_bytes(packet.payload(), None)?; if segment.destination_port() != self.local_port { return Err(RecvError::InvalidPort); } let tuple = ConnectionTuple::new(packet.source_address(), segment.source_port()); let outcome = if let Some(endpoint) = self.connections.get_mut(&tuple) { endpoint.receive_segment(&segment, callback); if endpoint.is_done() { RecvSegmentOutcome::EndpointDone } else { RecvSegmentOutcome::EndpointRunning(endpoint.next_segment_status()) } } else if segment.flags_after_ns() == TcpFlags::SYN { RecvSegmentOutcome::NewConnection } else { // We should send a RST for every non-RST unexpected segment we receive. RecvSegmentOutcome::UnexpectedSegment( !segment.flags_after_ns().intersects(TcpFlags::RST), ) }; match outcome { RecvSegmentOutcome::EndpointDone => { self.remove_connection(tuple); Ok(RecvEvent::EndpointDone) } RecvSegmentOutcome::EndpointRunning(status) => { if !self.check_next_segment_status(tuple, status) { // The connection may not have been a member of active_connection, but it's // more straightforward to cover both cases this way. self.active_connections.remove(&tuple); } Ok(RecvEvent::Nothing) } RecvSegmentOutcome::NewConnection => { let endpoint = match Endpoint::new_with_defaults(&segment) { Ok(endpoint) => endpoint, Err(_) => return Ok(RecvEvent::FailedNewConnection), }; if self.connections.len() >= self.max_connections.get() { if let Some(evict_tuple) = self.find_evictable_connection() { let rst_config = self.connections[&evict_tuple] .connection() .make_rst_config(); self.enqueue_rst_config(evict_tuple, rst_config); self.remove_connection(evict_tuple); self.add_connection(tuple, endpoint); Ok(RecvEvent::NewConnectionReplacing) } else { // No room to accept the new connection. Try to enqueue a RST, and forget // about it. self.enqueue_rst(tuple, &segment); Ok(RecvEvent::NewConnectionDropped) } } else { self.add_connection(tuple, endpoint); Ok(RecvEvent::NewConnectionSuccessful) } } RecvSegmentOutcome::UnexpectedSegment(enqueue_rst) => { if enqueue_rst { self.enqueue_rst(tuple, &segment); } Ok(RecvEvent::UnexpectedSegment) } } } fn check_timeout(&mut self, value: u64, tuple: ConnectionTuple) { match self.next_timeout { Some((t, _)) if t > value => self.next_timeout = Some((value, tuple)), None => self.next_timeout = Some((value, tuple)), _ => (), }; } fn find_next_timeout(&mut self) { let mut next_timeout = None; for (tuple, endpoint) in self.connections.iter() { if let NextSegmentStatus::Timeout(value) = endpoint.next_segment_status() { if let Some((t, _)) = next_timeout { if t > value { next_timeout = Some((value, *tuple)); } } else { next_timeout = Some((value, *tuple)); } } } self.next_timeout = next_timeout; } // Returns true if the endpoint has been added to the set of active connections (it may have // been there already). fn check_next_segment_status( &mut self, tuple: ConnectionTuple, status: NextSegmentStatus, ) -> bool { if let Some((_, timeout_tuple)) = self.next_timeout && tuple == timeout_tuple { self.find_next_timeout(); } match status { NextSegmentStatus::Available => { self.active_connections.insert(tuple); return true; } NextSegmentStatus::Timeout(value) => self.check_timeout(value, tuple), NextSegmentStatus::Nothing => (), }; false } fn add_connection(&mut self, tuple: ConnectionTuple, endpoint: Endpoint) { self.check_next_segment_status(tuple, endpoint.next_segment_status()); self.connections.insert(tuple, endpoint); } fn remove_connection(&mut self, tuple: ConnectionTuple) { // Just in case it's in there somewhere. self.active_connections.remove(&tuple); self.connections.remove(&tuple); if let Some((_, timeout_tuple)) = self.next_timeout && timeout_tuple == tuple { self.find_next_timeout(); } } // TODO: I guess this should be refactored at some point to also remove the endpoint if found. fn find_evictable_connection(&self) -> Option { for (tuple, endpoint) in self.connections.iter() { if endpoint.is_evictable() { return Some(*tuple); } } None } fn enqueue_rst_config(&mut self, tuple: ConnectionTuple, cfg: RstConfig) { // We simply forgo sending any RSTs if the queue is already full. if self.rst_queue.len() < self.max_pending_resets.get() { self.rst_queue.push((tuple, cfg)); } } fn enqueue_rst(&mut self, tuple: ConnectionTuple, s: &TcpSegment) { self.enqueue_rst_config(tuple, RstConfig::new(s)); } /// Attempts to write one packet, from either the `RST` queue or one of the existing endpoints, /// to `buf`. /// /// On success, the function returns a pair containing an `Option` and a /// `WriteEvent`. The options represents how many bytes have been written to `buf`, or /// that no packet can be send presently (when equal to `None`). The `WriteEvent` describes /// whether any noteworthy state changes are associated with the write. pub fn write_next_packet( &mut self, buf: &mut [u8], ) -> Result<(Option, WriteEvent), WriteNextError> { let mut len = None; let mut writer_status = None; let mut event = WriteEvent::Nothing; // Write an incomplete Ipv4 packet and complete it afterwards with missing information. let mut packet = IPv4Packet::write_header(buf, PROTOCOL_TCP, Ipv4Addr::LOCALHOST, Ipv4Addr::LOCALHOST)?; // We set mss_used to 0, because we don't add any IP options. // TODO: Maybe get this nicely from packet at some point. let mss_reserved = 0; // We prioritize sending RSTs for now. The 10000 value for window size is just an arbitrary // number, and using mss_remaining = 0 is perfectly fine in this case, because we don't add // any TCP options, or a payload. if let Some((tuple, rst_cfg)) = self.rst_queue.pop() { let (seq, ack, flags_after_ns) = rst_cfg.seq_ack_tcp_flags(); let segment_len = TcpSegment::write_incomplete_segment::<[u8]>( packet.inner_mut().payload_mut(), seq, ack, flags_after_ns, 10000, None, 0, None, )? .finalize( self.local_port, tuple.remote_port, Some((self.local_ipv4_addr, tuple.remote_addr)), ) .len(); packet .inner_mut() .set_source_address(self.local_ipv4_addr) .set_destination_address(tuple.remote_addr); let packet_len = packet.with_payload_len_unchecked(segment_len, true).len(); // The unwrap() is safe because packet_len > 0. return Ok(( Some(NonZeroUsize::new(packet_len).unwrap()), WriteEvent::Nothing, )); } for tuple in self .active_connections .iter() .chain(self.next_timeout.as_ref().map(|(_, x)| x)) { // Tuples in self.active_connection or self.next_timeout should also appear as keys // in self.connections. let endpoint = self.connections.get_mut(tuple).unwrap(); // We need this block to clearly delimit the lifetime of the mutable borrow started by // the following packet.inner_mut(). let segment_len = { let maybe_segment = endpoint.write_next_segment(packet.inner_mut().payload_mut(), mss_reserved); match maybe_segment { Some(segment) => segment .finalize( self.local_port, tuple.remote_port, Some((self.local_ipv4_addr, tuple.remote_addr)), ) .len(), None => continue, } }; packet .inner_mut() .set_source_address(self.local_ipv4_addr) .set_destination_address(tuple.remote_addr); let ip_len = packet.with_payload_len_unchecked(segment_len, true).len(); // The unwrap is safe because ip_len > 0. len = Some(NonZeroUsize::new(ip_len).unwrap()); writer_status = Some((*tuple, endpoint.is_done())); break; } if let Some((tuple, is_done)) = writer_status { if is_done { self.remove_connection(tuple); event = WriteEvent::EndpointDone; } else { // The unwrap is safe because tuple is present as a key in self.connections if we // got here. let status = self.connections[&tuple].next_segment_status(); if !self.check_next_segment_status(tuple, status) { self.active_connections.remove(&tuple); } } } Ok((len, event)) } /// Describes the status of the next segment to be sent by the handler. #[inline] pub fn next_segment_status(&self) -> NextSegmentStatus { if !self.active_connections.is_empty() || !self.rst_queue.is_empty() { return NextSegmentStatus::Available; } if let Some((value, _)) = self.next_timeout { return NextSegmentStatus::Timeout(value); } NextSegmentStatus::Nothing } } #[cfg(test)] mod tests { use std::fmt::Debug; use super::*; use crate::dumbo::pdu::bytes::NetworkBytesMut; use crate::dumbo::tcp::tests::mock_callback; fn inner_tcp_mut<'a, T: NetworkBytesMut + Debug>( p: &'a mut IPv4Packet<'_, T>, ) -> TcpSegment<'a, &'a mut [u8]> { TcpSegment::from_bytes(p.payload_mut(), None).unwrap() } #[allow(clippy::type_complexity)] fn write_next<'a>( h: &mut TcpIPv4Handler, buf: &'a mut [u8], ) -> Result<(Option>, WriteEvent), WriteNextError> { h.write_next_packet(buf).map(|(o, err)| { ( o.map(move |len| { let len = len.get(); IPv4Packet::from_bytes(buf.split_at_mut(len).0, true).unwrap() }), err, ) }) } fn next_written_segment<'a>( h: &mut TcpIPv4Handler, buf: &'a mut [u8], expected_event: WriteEvent, ) -> TcpSegment<'a, &'a mut [u8]> { let (segment_start, segment_end) = { let (o, event) = write_next(h, buf).unwrap(); assert_eq!(event, expected_event); let p = o.unwrap(); (p.header_len(), p.len()) }; TcpSegment::from_bytes(&mut buf[segment_start.into()..segment_end], None).unwrap() } // Calls write_next_packet until either an error occurs, or there's nothing left to send. // When successful, returns how many packets were written. The remote_addr argument is used // to check the packets are sent to the appropriate destination. fn drain_packets( h: &mut TcpIPv4Handler, src_addr: Ipv4Addr, remote_addr: Ipv4Addr, ) -> Result { let mut buf = [0u8; 2000]; let mut count: usize = 0; loop { let (o, _) = write_next(h, buf.as_mut())?; if let Some(packet) = o { count += 1; assert_eq!(packet.source_address(), src_addr); assert_eq!(packet.destination_address(), remote_addr); } else { break; } } Ok(count) } #[test] #[allow(clippy::cognitive_complexity)] fn test_handler() { let mut buf = [0u8; 100]; let mut buf2 = [0u8; 2000]; let wrong_local_addr = Ipv4Addr::new(123, 123, 123, 123); let local_addr = Ipv4Addr::new(169, 254, 169, 254); let local_port = 80; let remote_addr = Ipv4Addr::new(10, 0, 0, 1); let remote_port = 1012; let max_connections = 2; let max_pending_resets = 2; let mut h = TcpIPv4Handler::new( local_addr, local_port, NonZeroUsize::new(max_connections).unwrap(), NonZeroUsize::new(max_pending_resets).unwrap(), ); // We start with a wrong destination address and destination port to check those error // conditions first. let mut p = IPv4Packet::write_header(buf.as_mut(), PROTOCOL_TCP, remote_addr, wrong_local_addr) .unwrap(); let seq_number = 123; let s_len = { // We're going to use this simple segment to test stuff. let s = TcpSegment::write_segment::<[u8]>( p.inner_mut().payload_mut(), remote_port, // We use the wrong port here initially, to trigger an error. local_port + 1, seq_number, 456, TcpFlags::empty(), 10000, None, 100, None, None, ) .unwrap(); s.len() }; // The handler should have nothing to send at this point. assert_eq!(h.next_segment_status(), NextSegmentStatus::Nothing); assert_eq!(drain_packets(&mut h, local_addr, remote_addr), Ok(0)); let mut p = p.with_payload_len_unchecked(s_len, false); p.set_destination_address(local_addr); assert_eq!( h.receive_packet(&p, mock_callback).unwrap_err(), RecvError::InvalidPort ); // Let's fix the port. However, the segment is not a valid SYN, so we should get an // UnexpectedSegment status, and the handler should write a RST. assert_eq!(h.rst_queue.len(), 0); inner_tcp_mut(&mut p).set_destination_port(local_port); assert_eq!( h.receive_packet(&p, mock_callback), Ok(RecvEvent::UnexpectedSegment) ); assert_eq!(h.rst_queue.len(), 1); assert_eq!(h.next_segment_status(), NextSegmentStatus::Available); { let s = next_written_segment(&mut h, buf2.as_mut(), WriteEvent::Nothing); assert!(s.flags_after_ns().intersects(TcpFlags::RST)); assert_eq!(s.source_port(), local_port); assert_eq!(s.destination_port(), remote_port); } assert_eq!(h.rst_queue.len(), 0); assert_eq!(h.next_segment_status(), NextSegmentStatus::Nothing); // Let's check we can only enqueue max_pending_resets resets. assert_eq!( h.receive_packet(&p, mock_callback), Ok(RecvEvent::UnexpectedSegment) ); assert_eq!(h.rst_queue.len(), 1); assert_eq!( h.receive_packet(&p, mock_callback), Ok(RecvEvent::UnexpectedSegment) ); assert_eq!(h.rst_queue.len(), 2); assert_eq!( h.receive_packet(&p, mock_callback), Ok(RecvEvent::UnexpectedSegment) ); assert_eq!(h.rst_queue.len(), 2); // Drain the resets. assert_eq!(h.next_segment_status(), NextSegmentStatus::Available); assert_eq!(drain_packets(&mut h, local_addr, remote_addr), Ok(2)); assert_eq!(h.next_segment_status(), NextSegmentStatus::Nothing); // Ok now let's send a valid SYN. assert_eq!(h.connections.len(), 0); inner_tcp_mut(&mut p).set_flags_after_ns(TcpFlags::SYN); assert_eq!( h.receive_packet(&p, mock_callback), Ok(RecvEvent::NewConnectionSuccessful) ); assert_eq!(h.connections.len(), 1); assert_eq!(h.active_connections.len(), 1); // Let's immediately send a RST to the newly initiated connection. This should // terminate it. inner_tcp_mut(&mut p) .set_flags_after_ns(TcpFlags::RST) .set_sequence_number(seq_number.wrapping_add(1)); assert_eq!( h.receive_packet(&p, mock_callback), Ok(RecvEvent::EndpointDone) ); assert_eq!(h.connections.len(), 0); assert_eq!(h.active_connections.len(), 0); // Now, let's restore the previous SYN, and resend it to initiate a connection. inner_tcp_mut(&mut p) .set_flags_after_ns(TcpFlags::SYN) .set_sequence_number(seq_number); assert_eq!( h.receive_packet(&p, mock_callback), Ok(RecvEvent::NewConnectionSuccessful) ); assert_eq!(h.connections.len(), 1); assert_eq!(h.active_connections.len(), 1); // There will be a SYNACK in response. assert_eq!(h.next_segment_status(), NextSegmentStatus::Available); assert_eq!(drain_packets(&mut h, local_addr, remote_addr), Ok(1)); let remote_tuple = ConnectionTuple::new(remote_addr, remote_port); let remote_tuple2 = ConnectionTuple::new(remote_addr, remote_port + 1); // Also, there should be a retransmission timer associated with the previous SYNACK now. assert_eq!(h.active_connections.len(), 0); let old_timeout_value = if let Some((t, tuple)) = h.next_timeout { assert_eq!(tuple, remote_tuple); t } else { panic!("missing first expected timeout"); }; // Using the same SYN again will route the packet to the previous connection, and not // create a new one. assert_eq!(h.receive_packet(&p, mock_callback), Ok(RecvEvent::Nothing)); assert_eq!(h.connections.len(), 1); // SYNACK retransmission. assert_eq!(drain_packets(&mut h, local_addr, remote_addr), Ok(1)); // The timeout value should've gotten updated. assert_eq!(h.active_connections.len(), 0); if let Some((t, tuple)) = h.next_timeout { assert_eq!(tuple, remote_tuple); // The current Endpoint implementation gets timestamps using timestamp_cycles(), which // increases VERY fast so the following inequality is guaranteed to be true. If the // timestamp source gets coarser at some point, we might need an explicit wait before // the previous h.receive_packet() :-s assert!(t > old_timeout_value); } else { panic!("missing second expected timeout"); }; // Let's ACK the SYNACK. { let seq = h.connections[&remote_tuple].connection().first_not_sent().0; inner_tcp_mut(&mut p) .set_flags_after_ns(TcpFlags::ACK) .set_ack_number(seq); assert_eq!(h.receive_packet(&p, mock_callback), Ok(RecvEvent::Nothing)); } // There should be no more active connections now, and also no pending timeout. assert_eq!(h.active_connections.len(), 0); assert_eq!(h.next_timeout, None); // Make p a SYN packet again. inner_tcp_mut(&mut p).set_flags_after_ns(TcpFlags::SYN); // Create a new connection, from a different remote_port. inner_tcp_mut(&mut p).set_source_port(remote_port + 1); assert_eq!( h.receive_packet(&p, mock_callback), Ok(RecvEvent::NewConnectionSuccessful) ); assert_eq!(h.connections.len(), 2); assert_eq!(h.active_connections.len(), 1); // SYNACK assert_eq!(drain_packets(&mut h, local_addr, remote_addr), Ok(1)); // The timeout associated with the SYNACK of the second connection should be next. assert_eq!(h.active_connections.len(), 0); if let Some((_, tuple)) = h.next_timeout { assert_ne!(tuple, ConnectionTuple::new(remote_addr, remote_port)); } else { panic!("missing third expected timeout"); } // No more room for another one. { let port = remote_port + 2; inner_tcp_mut(&mut p).set_source_port(port); assert_eq!( h.receive_packet(&p, mock_callback), Ok(RecvEvent::NewConnectionDropped) ); assert_eq!(h.connections.len(), 2); // We should get a RST. assert_eq!(h.rst_queue.len(), 1); let s = next_written_segment(&mut h, buf2.as_mut(), WriteEvent::Nothing); assert!(s.flags_after_ns().intersects(TcpFlags::RST)); assert_eq!(s.destination_port(), port); } // Let's make the second endpoint evictable. h.connections .get_mut(&remote_tuple2) .unwrap() .set_eviction_threshold(0); // The new connection will replace the old one. assert_eq!( h.receive_packet(&p, mock_callback), Ok(RecvEvent::NewConnectionReplacing) ); assert_eq!(h.connections.len(), 2); assert_eq!(h.active_connections.len(), 1); // One SYNACK for the new connection, and one RST for the old one. assert_eq!(h.rst_queue.len(), 1); assert_eq!(drain_packets(&mut h, local_addr, remote_addr), Ok(2)); assert_eq!(h.rst_queue.len(), 0); assert_eq!(h.active_connections.len(), 0); // Let's send another SYN to the first connection. This should make it reappear among the // active connections (because it will have a RST to send), and then cause it to be removed // altogether after sending the RST (because is_done() will be true). inner_tcp_mut(&mut p).set_source_port(remote_port); assert_eq!(h.receive_packet(&p, mock_callback), Ok(RecvEvent::Nothing)); assert_eq!(h.active_connections.len(), 1); assert_eq!(drain_packets(&mut h, local_addr, remote_addr), Ok(1)); assert_eq!(h.connections.len(), 1); assert_eq!(h.active_connections.len(), 0); } } ================================================ FILE: src/vmm/src/dumbo/tcp/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Provides functionality for handling incoming TCP connections. pub mod connection; mod endpoint; pub mod handler; use std::fmt::Debug; use std::num::Wrapping; use crate::dumbo::pdu::bytes::NetworkBytes; use crate::dumbo::pdu::tcp::{Flags as TcpFlags, TcpSegment}; /// The largest possible window size (requires the window scaling option). pub const MAX_WINDOW_SIZE: u32 = 1_073_725_440; /// The default maximum segment size (MSS) value, used when no MSS information is carried /// over the initial handshake. pub const MSS_DEFAULT: u16 = 536; /// Describes whether a particular entity (a [`Connection`] for example) has segments to send. /// /// [`Connection`]: connection/struct.Connection.html #[derive(Debug, PartialEq, Eq)] pub enum NextSegmentStatus { /// At least one segment is available immediately. Available, /// There's nothing to send. Nothing, /// A retransmission timeout (RTO) will trigger after the specified point in time. Timeout(u64), } /// Represents the configuration of the sequence number and `ACK` number fields for outgoing /// `RST` segments. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum RstConfig { /// The `RST` segment will carry the specified sequence number, and will not have /// the `ACK` flag set. Seq(u32), /// The `RST` segment will carry 0 as the sequence number, will have the `ACK` flag enabled, /// and the `ACK` number will be set to the specified value. Ack(u32), } impl RstConfig { /// Creates a `RstConfig` in response to the given segment. pub fn new(s: &TcpSegment) -> Self { if s.flags_after_ns().intersects(TcpFlags::ACK) { // If s contains an ACK number, we use that as the sequence number of the RST. RstConfig::Seq(s.ack_number()) } else { // Otherwise we try to guess a valid ACK number for the RST like this. RstConfig::Ack(s.sequence_number().wrapping_add(s.payload_len().into())) } } /// Returns the sequence number, acknowledgement number, and TCP flags (not counting `NS`) that /// must be set on the outgoing `RST` segment. pub fn seq_ack_tcp_flags(self) -> (u32, u32, TcpFlags) { match self { RstConfig::Seq(seq) => (seq, 0, TcpFlags::RST), RstConfig::Ack(ack) => (0, ack, TcpFlags::RST | TcpFlags::ACK), } } } /// Returns true if `a` comes after `b` in the sequence number space, relative to the maximum /// possible window size. /// /// Please note this is not a connex binary relation; in other words, given two sequence numbers, /// it's sometimes possible that `seq_after(a, b) || seq_after(b, a) == false`. This is why /// `seq_after(a, b)` can't be defined as simply `!seq_at_or_after(b, a)`. #[inline] pub fn seq_after(a: Wrapping, b: Wrapping) -> bool { a != b && (a - b).0 < MAX_WINDOW_SIZE } /// Returns true if `a` comes after, or is at `b` in the sequence number space, relative to /// the maximum possible window size. /// /// Please note this is not a connex binary relation; in other words, given two sequence numbers, /// it's sometimes possible that `seq_at_or_after(a, b) || seq_at_or_after(b, a) == false`. This /// is why `seq_after(a, b)` can't be defined as simply `!seq_at_or_after(b, a)`. #[inline] pub fn seq_at_or_after(a: Wrapping, b: Wrapping) -> bool { (a - b).0 < MAX_WINDOW_SIZE } #[cfg(test)] mod tests { use micro_http::{Request, Response, StatusCode, Version}; use super::*; // In tcp tests, some of the functions require a callback parameter. Since we do not care, // for the purpose of those tests, what that callback does, we need to provide a dummy one. pub fn mock_callback(_request: Request) -> Response { Response::new(Version::Http11, StatusCode::OK) } #[test] fn test_rst_config() { let mut buf = [0u8; 100]; let seq = 1234; let ack = 5678; let mut s = TcpSegment::write_segment::<[u8]>( buf.as_mut(), 0, 0, seq, ack, TcpFlags::empty(), 0, None, 100, None, None, ) .unwrap(); // The ACK flag isn't set, and the payload length is 0. let cfg = RstConfig::new(&s); assert_eq!(cfg, RstConfig::Ack(seq)); assert_eq!( cfg.seq_ack_tcp_flags(), (0, seq, TcpFlags::RST | TcpFlags::ACK) ); // Let's set the ACK flag. s.set_flags_after_ns(TcpFlags::ACK); let cfg = RstConfig::new(&s); assert_eq!(cfg, RstConfig::Seq(ack)); assert_eq!(cfg.seq_ack_tcp_flags(), (ack, 0, TcpFlags::RST)); } #[test] fn test_seq_at_or_after() { let a = Wrapping(123); let b = a + Wrapping(100); let c = a + Wrapping(MAX_WINDOW_SIZE); assert!(seq_at_or_after(a, a)); assert!(!seq_after(a, a)); assert!(seq_at_or_after(b, a)); assert!(seq_after(b, a)); assert!(!seq_at_or_after(a, b)); assert!(!seq_after(a, b)); assert!(!seq_at_or_after(c, a)); assert!(!seq_after(c, a)); assert!(seq_at_or_after(c, b)); assert!(seq_after(c, b)); } } ================================================ FILE: src/vmm/src/gdb/arch/aarch64.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::mem::offset_of; use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; use kvm_bindings::{ KVM_GUESTDBG_ENABLE, KVM_GUESTDBG_SINGLESTEP, KVM_GUESTDBG_USE_HW, KVM_GUESTDBG_USE_SW_BP, KVM_REG_ARM_CORE, KVM_REG_ARM64, KVM_REG_SIZE_U64, kvm_guest_debug, kvm_regs, user_pt_regs, }; use kvm_ioctls::VcpuFd; use vm_memory::{Bytes, GuestAddress}; use crate::Vmm; use crate::arch::aarch64::regs::{ Aarch64RegisterVec, ID_AA64MMFR0_EL1, TCR_EL1, TTBR1_EL1, arm64_core_reg_id, }; use crate::arch::aarch64::vcpu::get_registers; use crate::gdb::target::GdbTargetError; /// Configures the number of bytes required for a software breakpoint. /// /// The breakpoint instruction operation also includes the immediate argument which we 0 hence the /// size. pub const SW_BP_SIZE: usize = 4; /// The bytes stored for a software breakpoint. /// /// This is the BRK instruction with a 0 immediate argument. /// https://developer.arm.com/documentation/ddi0602/2024-09/Base-Instructions/BRK--Breakpoint-instruction- pub const SW_BP: [u8; SW_BP_SIZE] = [0, 0, 32, 212]; /// Register id for the program counter const PC_REG_ID: u64 = arm64_core_reg_id!(KVM_REG_SIZE_U64, offset_of!(user_pt_regs, pc)); /// Retrieve a single register from a Vcpu fn get_sys_reg(reg: u64, vcpu_fd: &VcpuFd) -> Result { let mut register_vec = Aarch64RegisterVec::default(); get_registers(vcpu_fd, &[reg], &mut register_vec)?; let register = register_vec .iter() .next() .ok_or(GdbTargetError::ReadRegisterVecError)?; Ok(register.value()) } /// Gets the PC value for a Vcpu pub fn get_instruction_pointer(vcpu_fd: &VcpuFd) -> Result { get_sys_reg(PC_REG_ID, vcpu_fd) } /// Helper to extract a specific number of bits at an offset from a u64 macro_rules! extract_bits_64 { ($value: tt, $offset: tt, $length: tt) => { ($value >> $offset) & (!0u64 >> (64 - $length)) }; } /// Mask to clear the last 3 bits from the page table entry const PTE_ADDRESS_MASK: u64 = !0b111u64; /// Read a u64 value from a guest memory address fn read_address(vmm: &Vmm, address: u64) -> Result { let mut buf = [0; 8]; vmm.vm .guest_memory() .read(&mut buf, GuestAddress(address))?; Ok(u64::from_le_bytes(buf)) } /// The grainsize used with 4KB paging const GRAIN_SIZE: usize = 9; /// Translates a virtual address according to the Vcpu's current address translation mode. /// Returns the GPA (guest physical address) /// /// To simplify the implementation we've made some assumptions about the paging setup. /// Here we just assert firstly paging is setup and these assumptions are correct. pub fn translate_gva(vcpu_fd: &VcpuFd, gva: u64, vmm: &Vmm) -> Result { // Check this virtual address is in kernel space if extract_bits_64!(gva, 55, 1) == 0 { return Err(GdbTargetError::GvaTranslateError); } // Translation control register let tcr_el1: u64 = get_sys_reg(TCR_EL1, vcpu_fd)?; // If this is 0 then translation is not yet ready if extract_bits_64!(tcr_el1, 16, 6) == 0 { return Ok(gva); } // Check 4KB pages are being used if extract_bits_64!(tcr_el1, 30, 2) != 2 { return Err(GdbTargetError::GvaTranslateError); } // ID_AA64MMFR0_EL1 provides information about the implemented memory model and memory // management. Check this is a physical address size we support let pa_size = match get_sys_reg(ID_AA64MMFR0_EL1, vcpu_fd)? & 0b1111 { 0 => 32, 1 => 36, 2 => 40, 3 => 42, 4 => 44, 5 => 48, _ => return Err(GdbTargetError::GvaTranslateError), }; // A mask of the physical address size for a virtual address let pa_address_mask: u64 = !0u64 >> (64 - pa_size); // A mask used to take the bottom 12 bits of a value this is as we have a grainsize of 9 // asserted with our 4kb page, plus the offset of 3 let lower_mask: u64 = 0xFFF; // A mask for a physical address mask with the lower 12 bits cleared let desc_mask: u64 = pa_address_mask & !lower_mask; let page_indices = [ (gva >> (GRAIN_SIZE * 4)) & lower_mask, (gva >> (GRAIN_SIZE * 3)) & lower_mask, (gva >> (GRAIN_SIZE * 2)) & lower_mask, (gva >> GRAIN_SIZE) & lower_mask, ]; // Transition table base register used for initial table lookup. // Take the bottom 48 bits from the register value. let mut address: u64 = get_sys_reg(TTBR1_EL1, vcpu_fd)? & pa_address_mask; let mut level = 0; while level < 4 { // Clear the bottom 3 bits from this address let pte = read_address(vmm, (address + page_indices[level]) & PTE_ADDRESS_MASK)?; address = pte & desc_mask; // If this is a valid table entry and we aren't at the end of the page tables // then loop again and check next level if (pte & 2 != 0) && (level < 3) { level += 1; continue; } break; } // Generate a mask to split between the page table entry and the GVA. The split point is // dependent on which level we terminate at. This is calculated by taking the level we // hit multiplied by the grainsize then adding the 3 offset let page_size = 1u64 << ((GRAIN_SIZE * (4 - level)) + 3); // Clear bottom bits of page size address &= !(page_size - 1); address |= gva & (page_size - 1); Ok(address) } /// Configures the kvm guest debug regs to register the hardware breakpoints fn set_kvm_debug( control: u32, vcpu_fd: &VcpuFd, addrs: &[GuestAddress], ) -> Result<(), GdbTargetError> { let mut dbg = kvm_guest_debug { control, ..Default::default() }; for (i, addr) in addrs.iter().enumerate() { // DBGBCR_EL1 (Debug Breakpoint Control Registers, D13.3.2): // bit 0: 1 (Enabled) // bit 1~2: 0b11 (PMC = EL1/EL0) // bit 5~8: 0b1111 (BAS = AArch64) // others: 0 dbg.arch.dbg_bcr[i] = 0b1 | (0b11 << 1) | (0b1111 << 5); // DBGBVR_EL1 (Debug Breakpoint Value Registers, D13.3.3): // bit 2~52: VA[2:52] dbg.arch.dbg_bvr[i] = (!0u64 >> 11) & addr.0; } vcpu_fd.set_guest_debug(&dbg)?; Ok(()) } /// Bits in a Vcpu pstate for IRQ const IRQ_ENABLE_FLAGS: u64 = 0x80 | 0x40; /// Register id for pstate const PSTATE_ID: u64 = arm64_core_reg_id!(KVM_REG_SIZE_U64, offset_of!(user_pt_regs, pstate)); /// Disable IRQ interrupts to avoid getting stuck in a loop while single stepping /// /// When GDB hits a single breakpoint and resumes it will follow the steps: /// - Clear SW breakpoint we've hit /// - Single step /// - Re-insert the SW breakpoint /// - Resume /// /// However, with IRQ enabled the single step takes us into the IRQ handler so when we resume we /// immediately hit the SW breapoint we just re-inserted getting stuck in a loop. fn toggle_interrupts(vcpu_fd: &VcpuFd, enable: bool) -> Result<(), GdbTargetError> { let mut pstate = get_sys_reg(PSTATE_ID, vcpu_fd)?; if enable { pstate |= IRQ_ENABLE_FLAGS; } else { pstate &= !IRQ_ENABLE_FLAGS; } vcpu_fd.set_one_reg(PSTATE_ID, &pstate.to_le_bytes())?; Ok(()) } /// Configures the Vcpu for debugging and sets the hardware breakpoints on the Vcpu pub fn vcpu_set_debug( vcpu_fd: &VcpuFd, addrs: &[GuestAddress], step: bool, ) -> Result<(), GdbTargetError> { let mut control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW | KVM_GUESTDBG_USE_SW_BP; if step { control |= KVM_GUESTDBG_SINGLESTEP; } toggle_interrupts(vcpu_fd, step)?; set_kvm_debug(control, vcpu_fd, addrs) } /// KVM does not support injecting breakpoints on aarch64 so this is a no-op pub fn vcpu_inject_bp( _vcpu_fd: &VcpuFd, _addrs: &[GuestAddress], _step: bool, ) -> Result<(), GdbTargetError> { Ok(()) } /// The number of general purpose registers const GENERAL_PURPOSE_REG_COUNT: usize = 31; /// The number of core registers we read from the Vcpu const CORE_REG_COUNT: usize = 33; /// Stores the register ids of registers to be read from the Vcpu const CORE_REG_IDS: [u64; CORE_REG_COUNT] = { let mut regs = [0; CORE_REG_COUNT]; let mut idx = 0; let reg_offset = offset_of!(kvm_regs, regs); let mut off = reg_offset; while idx < GENERAL_PURPOSE_REG_COUNT { regs[idx] = arm64_core_reg_id!(KVM_REG_SIZE_U64, off); idx += 1; off += std::mem::size_of::(); } regs[idx] = arm64_core_reg_id!(KVM_REG_SIZE_U64, offset_of!(user_pt_regs, sp)); idx += 1; regs[idx] = arm64_core_reg_id!(KVM_REG_SIZE_U64, offset_of!(user_pt_regs, pc)); regs }; /// Reads the registers for the Vcpu pub fn read_registers(vcpu_fd: &VcpuFd, regs: &mut CoreRegs) -> Result<(), GdbTargetError> { let mut register_vec = Aarch64RegisterVec::default(); get_registers(vcpu_fd, &CORE_REG_IDS, &mut register_vec)?; let mut registers = register_vec.iter(); for i in 0..GENERAL_PURPOSE_REG_COUNT { regs.x[i] = registers .next() .ok_or(GdbTargetError::ReadRegisterVecError)? .value(); } regs.sp = registers .next() .ok_or(GdbTargetError::ReadRegisterVecError)? .value(); regs.pc = registers .next() .ok_or(GdbTargetError::ReadRegisterVecError)? .value(); Ok(()) } /// Writes to the registers for the Vcpu pub fn write_registers(vcpu_fd: &VcpuFd, regs: &CoreRegs) -> Result<(), GdbTargetError> { let kreg_off = offset_of!(kvm_regs, regs); let mut off = kreg_off; for i in 0..GENERAL_PURPOSE_REG_COUNT { vcpu_fd.set_one_reg( arm64_core_reg_id!(KVM_REG_SIZE_U64, off), ®s.x[i].to_le_bytes(), )?; off += std::mem::size_of::(); } let off = offset_of!(user_pt_regs, sp); vcpu_fd.set_one_reg( arm64_core_reg_id!(KVM_REG_SIZE_U64, off + kreg_off), ®s.sp.to_le_bytes(), )?; let off = offset_of!(user_pt_regs, pc); vcpu_fd.set_one_reg( arm64_core_reg_id!(KVM_REG_SIZE_U64, off + kreg_off), ®s.pc.to_le_bytes(), )?; Ok(()) } ================================================ FILE: src/vmm/src/gdb/arch/mod.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // #[cfg(target_arch = "aarch64")] mod aarch64; #[cfg(target_arch = "aarch64")] pub use aarch64::*; #[cfg(target_arch = "x86_64")] mod x86; #[cfg(target_arch = "x86_64")] pub use x86::*; ================================================ FILE: src/vmm/src/gdb/arch/x86.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use gdbstub_arch::x86::reg::X86_64CoreRegs as CoreRegs; use kvm_bindings::*; use kvm_ioctls::VcpuFd; use vm_memory::GuestAddress; use crate::Vmm; use crate::gdb::target::GdbTargetError; use crate::logger::error; /// Sets the 9th (Global Exact Breakpoint enable) and the 10th (always 1) bits for the DR7 debug /// control register const X86_GLOBAL_DEBUG_ENABLE: u64 = 0b11 << 9; /// Op code to trigger a software breakpoint in x86 const X86_SW_BP_OP: u8 = 0xCC; /// Configures the number of bytes required for a software breakpoint pub const SW_BP_SIZE: usize = 1; /// The bytes stored for an x86 software breakpoint pub const SW_BP: [u8; SW_BP_SIZE] = [X86_SW_BP_OP]; /// Gets the RIP value for a Vcpu pub fn get_instruction_pointer(vcpu_fd: &VcpuFd) -> Result { let regs = vcpu_fd.get_regs()?; Ok(regs.rip) } /// Translates a virtual address according to the vCPU's current address translation mode. pub fn translate_gva(vcpu_fd: &VcpuFd, gva: u64, _vmm: &Vmm) -> Result { let tr = vcpu_fd.translate_gva(gva)?; if tr.valid == 0 { return Err(GdbTargetError::GvaTranslateError); } Ok(tr.physical_address) } /// Configures the kvm guest debug regs to register the hardware breakpoints, the `arch.debugreg` /// attribute is used to store the location of the hardware breakpoints, with the 8th slot being /// used as a bitfield to track which registers are enabled and setting the /// `X86_GLOBAL_DEBUG_ENABLE` flags. Further reading on the DR7 register can be found here: /// https://en.wikipedia.org/wiki/X86_debug_register#DR7_-_Debug_control fn set_kvm_debug( control: u32, vcpu_fd: &VcpuFd, addrs: &[GuestAddress], ) -> Result<(), GdbTargetError> { let mut dbg = kvm_guest_debug { control, ..Default::default() }; dbg.arch.debugreg[7] = X86_GLOBAL_DEBUG_ENABLE; for (i, addr) in addrs.iter().enumerate() { dbg.arch.debugreg[i] = addr.0; // Set global breakpoint enable flag for the specific breakpoint number by setting the bit dbg.arch.debugreg[7] |= 2 << (i * 2); } vcpu_fd.set_guest_debug(&dbg)?; Ok(()) } /// Configures the Vcpu for debugging and sets the hardware breakpoints on the Vcpu pub fn vcpu_set_debug( vcpu_fd: &VcpuFd, addrs: &[GuestAddress], step: bool, ) -> Result<(), GdbTargetError> { let mut control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP | KVM_GUESTDBG_USE_SW_BP; if step { control |= KVM_GUESTDBG_SINGLESTEP; } set_kvm_debug(control, vcpu_fd, addrs) } /// Injects a BP back into the guest kernel for it to handle, this is particularly useful for the /// kernels selftesting which can happen during boot. pub fn vcpu_inject_bp( vcpu_fd: &VcpuFd, addrs: &[GuestAddress], step: bool, ) -> Result<(), GdbTargetError> { let mut control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP | KVM_GUESTDBG_USE_SW_BP | KVM_GUESTDBG_INJECT_BP; if step { control |= KVM_GUESTDBG_SINGLESTEP; } set_kvm_debug(control, vcpu_fd, addrs) } /// Reads the registers for the Vcpu pub fn read_registers(vcpu_fd: &VcpuFd, regs: &mut CoreRegs) -> Result<(), GdbTargetError> { let cpu_regs = vcpu_fd.get_regs()?; regs.regs[0] = cpu_regs.rax; regs.regs[1] = cpu_regs.rbx; regs.regs[2] = cpu_regs.rcx; regs.regs[3] = cpu_regs.rdx; regs.regs[4] = cpu_regs.rsi; regs.regs[5] = cpu_regs.rdi; regs.regs[6] = cpu_regs.rbp; regs.regs[7] = cpu_regs.rsp; regs.regs[8] = cpu_regs.r8; regs.regs[9] = cpu_regs.r9; regs.regs[10] = cpu_regs.r10; regs.regs[11] = cpu_regs.r11; regs.regs[12] = cpu_regs.r12; regs.regs[13] = cpu_regs.r13; regs.regs[14] = cpu_regs.r14; regs.regs[15] = cpu_regs.r15; regs.rip = cpu_regs.rip; regs.eflags = u32::try_from(cpu_regs.rflags).map_err(|e| { error!("Error {e:?} converting rflags to u32"); GdbTargetError::RegFlagConversionError })?; Ok(()) } /// Writes to the registers for the Vcpu pub fn write_registers(vcpu_fd: &VcpuFd, regs: &CoreRegs) -> Result<(), GdbTargetError> { let new_regs = kvm_regs { rax: regs.regs[0], rbx: regs.regs[1], rcx: regs.regs[2], rdx: regs.regs[3], rsi: regs.regs[4], rdi: regs.regs[5], rbp: regs.regs[6], rsp: regs.regs[7], r8: regs.regs[8], r9: regs.regs[9], r10: regs.regs[10], r11: regs.regs[11], r12: regs.regs[12], r13: regs.regs[13], r14: regs.regs[14], r15: regs.regs[15], rip: regs.rip, rflags: regs.eflags as u64, }; Ok(vcpu_fd.set_regs(&new_regs)?) } ================================================ FILE: src/vmm/src/gdb/event_loop.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::os::unix::net::UnixStream; use std::sync::mpsc::Receiver; use std::sync::mpsc::TryRecvError::Empty; use std::sync::{Arc, Mutex}; use gdbstub::common::{Signal, Tid}; use gdbstub::conn::{Connection, ConnectionExt}; use gdbstub::stub::run_blocking::{self, WaitForStopReasonError}; use gdbstub::stub::{DisconnectReason, GdbStub, MultiThreadStopReason}; use gdbstub::target::Target; use vm_memory::GuestAddress; use super::target::{FirecrackerTarget, GdbTargetError, vcpuid_to_tid}; use crate::Vmm; use crate::logger::{error, trace}; /// Starts the GDB event loop which acts as a proxy between the Vcpus and GDB pub fn event_loop( connection: UnixStream, vmm: Arc>, gdb_event_receiver: Receiver, entry_addr: GuestAddress, ) { let target = FirecrackerTarget::new(vmm, gdb_event_receiver, entry_addr); let connection: Box> = { Box::new(connection) }; let debugger = GdbStub::new(connection); // We wait for the VM to reach the inital breakpoint we inserted before starting the event loop target .gdb_event .recv() .expect("Error getting initial gdb event"); gdb_event_loop_thread(debugger, target); } struct GdbBlockingEventLoop {} impl run_blocking::BlockingEventLoop for GdbBlockingEventLoop { type Target = FirecrackerTarget; type Connection = Box>; type StopReason = MultiThreadStopReason; /// Poll for events from either Vcpu's or packets from the GDB connection fn wait_for_stop_reason( target: &mut FirecrackerTarget, conn: &mut Self::Connection, ) -> Result< run_blocking::Event>, run_blocking::WaitForStopReasonError< ::Error, ::Error, >, > { loop { match target.gdb_event.try_recv() { Ok(cpu_id) => { // The Vcpu reports it's id from raw_id so we straight convert here let tid = Tid::new(cpu_id).expect("Error converting cpu id to Tid"); // If notify paused returns false this means we were already debugging a single // core, the target will track this for us to pick up later target.set_paused_vcpu(tid); trace!("Vcpu: {tid:?} paused from debug exit"); let stop_reason = target .get_stop_reason(tid) .map_err(WaitForStopReasonError::Target)?; let Some(stop_response) = stop_reason else { // If we returned None this is a break which should be handled by // the guest kernel (e.g. kernel int3 self testing) so we won't notify // GDB and instead inject this back into the guest target .inject_bp_to_guest(tid) .map_err(WaitForStopReasonError::Target)?; target .resume_vcpu(tid) .map_err(WaitForStopReasonError::Target)?; trace!("Injected BP into guest early exit"); continue; }; trace!("Returned stop reason to gdb: {stop_response:?}"); return Ok(run_blocking::Event::TargetStopped(stop_response)); } Err(Empty) => (), Err(_) => { return Err(WaitForStopReasonError::Target( GdbTargetError::GdbQueueError, )); } } if conn.peek().map(|b| b.is_some()).unwrap_or(false) { let byte = conn .read() .map_err(run_blocking::WaitForStopReasonError::Connection)?; return Ok(run_blocking::Event::IncomingData(byte)); } } } /// Invoked when the GDB client sends a Ctrl-C interrupt. fn on_interrupt( target: &mut FirecrackerTarget, ) -> Result>, ::Error> { // notify the target that a ctrl-c interrupt has occurred. let main_core = vcpuid_to_tid(0)?; target.pause_vcpu(main_core)?; target.set_paused_vcpu(main_core); let exit_reason = MultiThreadStopReason::SignalWithThread { tid: main_core, signal: Signal::SIGINT, }; Ok(Some(exit_reason)) } } /// Runs while communication with GDB is in progress, after GDB disconnects we /// shutdown firecracker fn gdb_event_loop_thread( debugger: GdbStub>>, mut target: FirecrackerTarget, ) { match debugger.run_blocking::(&mut target) { Ok(disconnect_reason) => match disconnect_reason { DisconnectReason::Disconnect => { trace!("Client disconnected") } DisconnectReason::TargetExited(code) => { trace!("Target exited with code {}", code) } DisconnectReason::TargetTerminated(sig) => { trace!("Target terminated with signal {}", sig) } DisconnectReason::Kill => trace!("GDB sent a kill command"), }, Err(e) => { if e.is_target_error() { error!("target encountered a fatal error: {e:?}") } else if e.is_connection_error() { error!("connection error: {e:?}") } else { error!("gdbstub encountered a fatal error {e:?}") } } } target.shutdown_vmm(); } ================================================ FILE: src/vmm/src/gdb/mod.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 /// Arch specific implementations mod arch; /// Event loop for connection to GDB server mod event_loop; /// Target for gdb pub mod target; use std::os::unix::net::UnixListener; use std::path::Path; use std::sync::mpsc::Receiver; use std::sync::{Arc, Mutex}; use arch::vcpu_set_debug; use event_loop::event_loop; use target::GdbTargetError; use vm_memory::GuestAddress; use crate::Vmm; use crate::logger::trace; /// Kickstarts the GDB debugging process, it takes in the VMM object, a slice of /// the paused Vcpu's, the GDB event queue which is used as a mechanism for the Vcpu's to notify /// our GDB thread that they've been paused, then finally the entry address of the kernel. /// /// Firstly the function will start by configuring the Vcpus with KVM for debugging /// /// This will then create the GDB socket which will be used for communication to the GDB process. /// After creating this, the function will block while waiting for GDB to connect. /// /// After the connection has been established the function will start a new thread for handling /// communcation to the GDB server pub fn gdb_thread( vmm: Arc>, gdb_event_receiver: Receiver, entry_addr: GuestAddress, socket_addr: &str, ) -> Result<(), GdbTargetError> { // We register a hw breakpoint at the entry point as GDB expects the application // to be stopped as it connects. This also allows us to set breakpoints before kernel starts. // This entry adddress is automatically used as it is not tracked inside the target state, so // when resumed will be removed { let vmm = vmm.lock().unwrap(); vcpu_set_debug(&vmm.vcpus_handles[0].vcpu_fd, &[entry_addr], false)?; for handle in &vmm.vcpus_handles[1..] { vcpu_set_debug(&handle.vcpu_fd, &[], false)?; } } let path = Path::new(socket_addr); let listener = UnixListener::bind(path).map_err(GdbTargetError::ServerSocketError)?; trace!("Waiting for GDB server connection on {}...", path.display()); let (connection, _addr) = listener .accept() .map_err(GdbTargetError::ServerSocketError)?; std::thread::Builder::new() .name("gdb".into()) .spawn(move || event_loop(connection, vmm, gdb_event_receiver, entry_addr)) .map_err(|_| GdbTargetError::GdbThreadError)?; Ok(()) } ================================================ FILE: src/vmm/src/gdb/target.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::HashMap; use std::sync::mpsc::{Receiver, RecvError}; use std::sync::{Arc, Mutex, PoisonError}; use arrayvec::ArrayVec; use gdbstub::arch::Arch; use gdbstub::common::{Signal, Tid}; use gdbstub::stub::{BaseStopReason, MultiThreadStopReason}; use gdbstub::target::ext::base::BaseOps; use gdbstub::target::ext::base::multithread::{ MultiThreadBase, MultiThreadResume, MultiThreadResumeOps, MultiThreadSchedulerLocking, MultiThreadSchedulerLockingOps, MultiThreadSingleStep, MultiThreadSingleStepOps, }; use gdbstub::target::ext::breakpoints::{ Breakpoints, BreakpointsOps, HwBreakpoint, HwBreakpointOps, SwBreakpoint, SwBreakpointOps, }; use gdbstub::target::ext::thread_extra_info::{ThreadExtraInfo, ThreadExtraInfoOps}; use gdbstub::target::{Target, TargetError, TargetResult}; #[cfg(target_arch = "aarch64")] use gdbstub_arch::aarch64::AArch64 as GdbArch; #[cfg(target_arch = "aarch64")] use gdbstub_arch::aarch64::reg::AArch64CoreRegs as CoreRegs; #[cfg(target_arch = "x86_64")] use gdbstub_arch::x86::X86_64_SSE as GdbArch; #[cfg(target_arch = "x86_64")] use gdbstub_arch::x86::reg::X86_64CoreRegs as CoreRegs; use vm_memory::{Bytes, GuestAddress, GuestMemoryError}; use super::arch; use crate::arch::GUEST_PAGE_SIZE; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::vcpu::VcpuArchError as AarchVcpuError; use crate::logger::{error, info}; use crate::utils::u64_to_usize; use crate::vstate::vcpu::VcpuSendEventError; use crate::{FcExitCode, VcpuEvent, VcpuResponse, Vmm}; #[derive(Debug, Default, Clone, Copy)] /// Stores the current state of a Vcpu with a copy of the Vcpu file descriptor struct VcpuState { single_step: bool, paused: bool, has_resume_action: bool, } impl VcpuState { /// Disables single stepping on the Vcpu state fn reset_vcpu_state(&mut self) { self.single_step = false; self.has_resume_action = false; } } /// Errors from interactions between GDB and the VMM #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum GdbTargetError { /// An error during a GDB request GdbRequest, /// An error with the queue between the target and the Vcpus GdbQueueError, /// The response from the Vcpu was not allowed VcuRequestError, /// No currently paused Vcpu error NoPausedVcpu, /// Error when setting Vcpu debug flags VcpuKvmError, /// Server socket Error: {0} ServerSocketError(std::io::Error), /// Error with creating GDB thread GdbThreadError, /// VMM locking error VmmLockError, /// Vcpu send event error VcpuSendEventError(#[from] VcpuSendEventError), /// Recieve error from Vcpu channel VcpuRecvError(#[from] RecvError), /// TID Conversion error TidConversionError, /// KVM set guest debug error KvmIoctlsError(#[from] kvm_ioctls::Error), /// Gva no translation available GvaTranslateError, /// Conversion error with cpu rflags RegFlagConversionError, #[cfg(target_arch = "aarch64")] /// Error retrieving registers from a Vcpu ReadRegisterError(#[from] AarchVcpuError), #[cfg(target_arch = "aarch64")] /// Error retrieving registers from a register vec. ReadRegisterVecError, /// Error while reading/writing to guest memory GuestMemoryError(#[from] GuestMemoryError), } impl From for TargetError { fn from(error: GdbTargetError) -> Self { match error { GdbTargetError::VmmLockError => TargetError::Fatal(GdbTargetError::VmmLockError), _ => TargetError::NonFatal, } } } impl From> for GdbTargetError { fn from(_value: PoisonError) -> Self { GdbTargetError::VmmLockError } } /// Debug Target for firecracker. /// /// This is used the manage the debug implementation and handle requests sent via GDB #[derive(Debug)] pub struct FirecrackerTarget { /// A mutex around the VMM to allow communicataion to the Vcpus vmm: Arc>, /// Store the guest entry point entry_addr: GuestAddress, /// Listener for events sent from the Vcpu pub gdb_event: Receiver, /// Used to track the currently configured hardware breakpoints. /// Limited to 4 in x86 see: /// https://elixir.bootlin.com/linux/v6.1/source/arch/x86/include/asm/kvm_host.h#L210 hw_breakpoints: ArrayVec, /// Used to track the currently configured software breakpoints and store the op-code /// which was swapped out sw_breakpoints: HashMap<::Usize, [u8; arch::SW_BP_SIZE]>, /// Stores the current state of each Vcpu vcpu_state: Vec, /// Stores the current paused thread id, GDB can inact commands without providing us a Tid to /// run on and expects us to use the last paused thread. paused_vcpu: Option, /// Whether scheduler locking is enabled for the current resume cycle scheduler_locking: bool, } /// Convert the 1 indexed Tid to the 0 indexed Vcpuid fn tid_to_vcpuid(tid: Tid) -> usize { tid.get() - 1 } /// Converts the inernal index of a Vcpu to /// the Tid required by GDB pub fn vcpuid_to_tid(cpu_id: usize) -> Result { Tid::new(get_raw_tid(cpu_id)).ok_or(GdbTargetError::TidConversionError) } /// Converts the inernal index of a Vcpu to /// the 1 indexed value for GDB pub fn get_raw_tid(cpu_id: usize) -> usize { cpu_id + 1 } impl FirecrackerTarget { /// Creates a new Target for GDB stub. This is used as the layer between GDB and the VMM it /// will handle requests from GDB and perform the appropriate actions, while also updating GDB /// with the state of the VMM / Vcpu's as we hit debug events pub fn new(vmm: Arc>, gdb_event: Receiver, entry_addr: GuestAddress) -> Self { let mut vcpu_state = vec![VcpuState::default(); vmm.lock().unwrap().vcpus_handles.len()]; // By default vcpu 1 will be paused at the entry point vcpu_state[0].paused = true; Self { vmm, entry_addr, gdb_event, // We only support 4 hw breakpoints on x86 this will need to be configurable on arm hw_breakpoints: Default::default(), sw_breakpoints: HashMap::new(), vcpu_state, paused_vcpu: Tid::new(1), scheduler_locking: false, } } // Update KVM debug info for a specific vcpu index. fn update_vcpu_kvm_debug( &self, vcpu_idx: usize, hw_breakpoints: &[GuestAddress], ) -> Result<(), GdbTargetError> { let state = &self.vcpu_state[vcpu_idx]; if !state.paused { info!("Attempted to update kvm debug on a non paused Vcpu"); return Ok(()); } let vcpu_fd = &self.vmm.lock().unwrap().vcpus_handles[vcpu_idx].vcpu_fd; arch::vcpu_set_debug(vcpu_fd, hw_breakpoints, state.single_step) } /// Translate guest virtual address to guest pysical address. fn translate_gva(&self, vcpu_idx: usize, addr: u64) -> Result { let vmm = self.vmm.lock().unwrap(); let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; arch::translate_gva(vcpu_fd, addr, &vmm) } /// Retrieves the currently paused Vcpu id returns an error if there is no currently paused Vcpu fn get_paused_vcpu_id(&self) -> Result { self.paused_vcpu.ok_or(GdbTargetError::NoPausedVcpu) } /// Returns the index of the paused vcpu. fn get_paused_vcpu_idx(&self) -> Result { Ok(tid_to_vcpuid(self.get_paused_vcpu_id()?)) } /// Updates state to reference the currently paused Vcpu and store that the cpu is currently /// paused pub fn set_paused_vcpu(&mut self, tid: Tid) { self.vcpu_state[tid_to_vcpuid(tid)].paused = true; self.paused_vcpu = Some(tid); } /// Resumes execution of all paused Vcpus, update them with current kvm debug info /// and resumes fn resume_all_vcpus(&mut self) -> Result<(), GdbTargetError> { for idx in 0..self.vcpu_state.len() { self.update_vcpu_kvm_debug(idx, &self.hw_breakpoints)?; } for cpu_id in 0..self.vcpu_state.len() { if self.scheduler_locking && !self.vcpu_state[cpu_id].has_resume_action { continue; } let tid = vcpuid_to_tid(cpu_id)?; self.resume_vcpu(tid)?; } self.paused_vcpu = None; Ok(()) } /// Resets all Vcpus to their base state fn reset_all_vcpu_states(&mut self) { for value in self.vcpu_state.iter_mut() { value.reset_vcpu_state(); } } /// Shuts down the VMM pub fn shutdown_vmm(&self) { self.vmm .lock() .expect("error unlocking vmm") .stop(FcExitCode::Ok) } /// Pauses the requested Vcpu pub fn pause_vcpu(&mut self, tid: Tid) -> Result<(), GdbTargetError> { let vcpu_state = &mut self.vcpu_state[tid_to_vcpuid(tid)]; if vcpu_state.paused { info!("Attempted to pause a vcpu already paused."); // Pausing an already paused vcpu is not considered an error case from GDB return Ok(()); } let cpu_handle = &mut self.vmm.lock()?.vcpus_handles[tid_to_vcpuid(tid)]; cpu_handle.send_event(VcpuEvent::Pause)?; let _ = cpu_handle.response_receiver().recv()?; vcpu_state.paused = true; Ok(()) } /// A helper function to allow the event loop to inject this breakpoint back into the Vcpu pub fn inject_bp_to_guest(&mut self, tid: Tid) -> Result<(), GdbTargetError> { let vmm = self.vmm.lock().unwrap(); let vcpu_idx = tid_to_vcpuid(tid); let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; arch::vcpu_inject_bp(vcpu_fd, &self.hw_breakpoints, false) } /// Resumes the Vcpu, will return early if the Vcpu is already running pub fn resume_vcpu(&mut self, tid: Tid) -> Result<(), GdbTargetError> { let vcpu_state = &mut self.vcpu_state[tid_to_vcpuid(tid)]; if !vcpu_state.paused { info!("Attempted to resume a vcpu already running."); // Resuming an already running Vcpu is not considered an error case from GDB return Ok(()); } let cpu_handle = &mut self.vmm.lock()?.vcpus_handles[tid_to_vcpuid(tid)]; cpu_handle.send_event(VcpuEvent::Resume)?; let response = cpu_handle.response_receiver().recv()?; if let VcpuResponse::NotAllowed(message) = response { error!("Response resume : {message}"); return Err(GdbTargetError::VcuRequestError); } vcpu_state.paused = false; Ok(()) } /// Identifies why the specific core was paused to be returned to GDB if None is returned this /// indicates to handle this internally and don't notify GDB pub fn get_stop_reason( &self, tid: Tid, ) -> Result>, GdbTargetError> { let vcpu_idx = tid_to_vcpuid(tid); let vcpu_state = &self.vcpu_state[vcpu_idx]; if vcpu_state.single_step { return Ok(Some(MultiThreadStopReason::SignalWithThread { tid, signal: Signal::SIGTRAP, })); } let vmm = self.vmm.lock().unwrap(); let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; let Ok(ip) = arch::get_instruction_pointer(vcpu_fd) else { // If we error here we return an arbitrary Software Breakpoint, GDB will handle // this gracefully return Ok(Some(MultiThreadStopReason::SwBreak(tid))); }; let gpa = arch::translate_gva(vcpu_fd, ip, &vmm)?; if self.sw_breakpoints.contains_key(&gpa) { return Ok(Some(MultiThreadStopReason::SwBreak(tid))); } if self.hw_breakpoints.contains(&GuestAddress(ip)) { return Ok(Some(MultiThreadStopReason::HwBreak(tid))); } if ip == self.entry_addr.0 { return Ok(Some(MultiThreadStopReason::HwBreak(tid))); } // This is not a breakpoint we've set, likely one set by the guest Ok(None) } } impl Target for FirecrackerTarget { type Error = GdbTargetError; type Arch = GdbArch; #[inline(always)] fn base_ops(&mut self) -> BaseOps<'_, Self::Arch, Self::Error> { BaseOps::MultiThread(self) } #[inline(always)] fn support_breakpoints(&mut self) -> Option> { Some(self) } /// We disable implicit sw breakpoints as we want to manage these internally so we can inject /// breakpoints back into the guest if we didn't create them #[inline(always)] fn guard_rail_implicit_sw_breakpoints(&self) -> bool { false } } impl MultiThreadBase for FirecrackerTarget { /// Reads the registers for the Vcpu fn read_registers(&mut self, regs: &mut CoreRegs, tid: Tid) -> TargetResult<(), Self> { let vmm = self.vmm.lock().unwrap(); let vcpu_idx = tid_to_vcpuid(tid); let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; arch::read_registers(vcpu_fd, regs)?; Ok(()) } /// Writes to the registers for the Vcpu fn write_registers(&mut self, regs: &CoreRegs, tid: Tid) -> TargetResult<(), Self> { let vmm = self.vmm.lock().unwrap(); let vcpu_idx = tid_to_vcpuid(tid); let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; arch::write_registers(vcpu_fd, regs)?; Ok(()) } /// Writes data to a guest virtual address for the Vcpu fn read_addrs( &mut self, mut gva: ::Usize, mut data: &mut [u8], tid: Tid, ) -> TargetResult { let data_len = data.len(); let vmm = self.vmm.lock().unwrap(); let vcpu_idx = tid_to_vcpuid(tid); let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; while !data.is_empty() { let gpa = arch::translate_gva(vcpu_fd, gva, &vmm).map_err(|e| { error!("Error {e:?} translating gva on read address: {gva:#X}"); })?; // Compute the amount space left in the page after the gpa let read_len = std::cmp::min( data.len(), GUEST_PAGE_SIZE - (u64_to_usize(gpa) & (GUEST_PAGE_SIZE - 1)), ); vmm.vm .guest_memory() .read(&mut data[..read_len], GuestAddress(gpa as u64)) .map_err(|e| { error!("Error reading memory {e:?} gpa is {gpa}"); })?; data = &mut data[read_len..]; gva += read_len as u64; } Ok(data_len) } /// Writes data at a guest virtual address for the Vcpu fn write_addrs( &mut self, mut gva: ::Usize, mut data: &[u8], tid: Tid, ) -> TargetResult<(), Self> { let vmm = self.vmm.lock().unwrap(); let vcpu_idx = tid_to_vcpuid(tid); let vcpu_fd = &vmm.vcpus_handles[vcpu_idx].vcpu_fd; while !data.is_empty() { let gpa = arch::translate_gva(vcpu_fd, gva, &vmm).map_err(|e| { error!("Error {e:?} translating gva on read address: {gva:#X}"); })?; // Compute the amount space left in the page after the gpa let write_len = std::cmp::min( data.len(), GUEST_PAGE_SIZE - (u64_to_usize(gpa) & (GUEST_PAGE_SIZE - 1)), ); vmm.vm .guest_memory() .write(&data[..write_len], GuestAddress(gpa)) .map_err(|e| { error!("Error {e:?} writing memory at {gpa:#X}"); })?; data = &data[write_len..]; gva += write_len as u64; } Ok(()) } #[inline(always)] /// Makes the callback provided with each Vcpu /// GDB expects us to return all threads currently running with this command, for firecracker /// this is all Vcpus fn list_active_threads( &mut self, thread_is_active: &mut dyn FnMut(Tid), ) -> Result<(), Self::Error> { for id in 0..self.vcpu_state.len() { thread_is_active(vcpuid_to_tid(id)?) } Ok(()) } #[inline(always)] fn support_resume(&mut self) -> Option> { Some(self) } #[inline(always)] fn support_thread_extra_info(&mut self) -> Option> { Some(self) } } impl MultiThreadResume for FirecrackerTarget { /// Disables single step on the Vcpu fn set_resume_action_continue( &mut self, tid: Tid, _signal: Option, ) -> Result<(), Self::Error> { let state = &mut self.vcpu_state[tid_to_vcpuid(tid)]; state.single_step = false; state.has_resume_action = true; Ok(()) } /// Resumes the execution of all currently paused Vcpus fn resume(&mut self) -> Result<(), Self::Error> { self.resume_all_vcpus() } /// Clears the state of all Vcpus setting it back to base config fn clear_resume_actions(&mut self) -> Result<(), Self::Error> { self.reset_all_vcpu_states(); self.scheduler_locking = false; Ok(()) } #[inline(always)] fn support_single_step(&mut self) -> Option> { Some(self) } #[inline(always)] fn support_scheduler_locking(&mut self) -> Option> { Some(self) } } impl MultiThreadSingleStep for FirecrackerTarget { /// Enabled single step on the Vcpu fn set_resume_action_step( &mut self, tid: Tid, _signal: Option, ) -> Result<(), Self::Error> { let state = &mut self.vcpu_state[tid_to_vcpuid(tid)]; state.single_step = true; state.has_resume_action = true; Ok(()) } } impl MultiThreadSchedulerLocking for FirecrackerTarget { fn set_resume_action_scheduler_lock(&mut self) -> Result<(), Self::Error> { self.scheduler_locking = true; Ok(()) } } impl Breakpoints for FirecrackerTarget { #[inline(always)] fn support_hw_breakpoint(&mut self) -> Option> { Some(self) } #[inline(always)] fn support_sw_breakpoint(&mut self) -> Option> { Some(self) } } impl HwBreakpoint for FirecrackerTarget { /// Adds a hardware breakpoint The breakpoint addresses are /// stored in state so we can track the reason for an exit. fn add_hw_breakpoint( &mut self, gva: ::Usize, _kind: ::BreakpointKind, ) -> TargetResult { let ga = GuestAddress(gva); if self.hw_breakpoints.contains(&ga) { return Ok(true); } if self.hw_breakpoints.try_push(ga).is_err() { return Ok(false); } let vcpu_idx = self.get_paused_vcpu_idx()?; self.update_vcpu_kvm_debug(vcpu_idx, &self.hw_breakpoints)?; Ok(true) } /// Removes a hardware breakpoint. fn remove_hw_breakpoint( &mut self, gva: ::Usize, _kind: ::BreakpointKind, ) -> TargetResult { match self.hw_breakpoints.iter().position(|&b| b.0 == gva) { None => return Ok(false), Some(pos) => self.hw_breakpoints.remove(pos), }; let vcpu_idx = self.get_paused_vcpu_idx()?; self.update_vcpu_kvm_debug(vcpu_idx, &self.hw_breakpoints)?; Ok(true) } } impl SwBreakpoint for FirecrackerTarget { /// Inserts a software breakpoint. /// We initially translate the guest virtual address to a guest physical address and then check /// if this is already present, if so we return early. Otherwise we store the opcode at the /// specified guest physical address in our store and replace it with the `X86_SW_BP_OP` fn add_sw_breakpoint( &mut self, addr: ::Usize, _kind: ::BreakpointKind, ) -> TargetResult { let vcpu_idx = self.get_paused_vcpu_idx()?; let gpa = self.translate_gva(vcpu_idx, addr)?; if self.sw_breakpoints.contains_key(&gpa) { return Ok(true); } let paused_vcpu_id = self.get_paused_vcpu_id()?; let mut saved_register = [0; arch::SW_BP_SIZE]; self.read_addrs(addr, &mut saved_register, paused_vcpu_id)?; self.sw_breakpoints.insert(gpa, saved_register); self.write_addrs(addr, &arch::SW_BP, paused_vcpu_id)?; Ok(true) } /// Removes a software breakpoint. /// We firstly translate the guest virtual address to a guest physical address, we then check if /// the resulting gpa is in our store, if so we load the stored opcode and write this back fn remove_sw_breakpoint( &mut self, addr: ::Usize, _kind: ::BreakpointKind, ) -> TargetResult { let vcpu_idx = self.get_paused_vcpu_idx()?; let gpa = self.translate_gva(vcpu_idx, addr)?; if let Some(removed) = self.sw_breakpoints.remove(&gpa) { self.write_addrs(addr, &removed, self.get_paused_vcpu_id()?)?; return Ok(true); } Ok(false) } } impl ThreadExtraInfo for FirecrackerTarget { /// Allows us to configure the formatting of the thread information, we just return the ID of /// the Vcpu fn thread_extra_info(&self, tid: Tid, buf: &mut [u8]) -> Result { let info = format!("Vcpu ID: {}", tid_to_vcpuid(tid)); let size = buf.len().min(info.len()); buf[..size].copy_from_slice(&info.as_bytes()[..size]); Ok(size) } } ================================================ FILE: src/vmm/src/initrd.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fs::File; use std::os::unix::fs::MetadataExt; use vm_memory::{GuestAddress, GuestMemory, ReadVolatile, VolatileMemoryError}; use crate::arch::initrd_load_addr; use crate::utils::u64_to_usize; use crate::vmm_config::boot_source::BootConfig; use crate::vstate::memory::GuestMemoryMmap; /// Errors associated with initrd loading. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum InitrdError { /// Failed to compute the initrd address. Address, /// Cannot load initrd due to an invalid memory configuration. Load, /// Cannot image metadata: {0} Metadata(std::io::Error), /// Cannot copy initrd file fd: {0} CloneFd(std::io::Error), /// Cannot load initrd due to an invalid image: {0} Read(VolatileMemoryError), } /// Type for passing information about the initrd in the guest memory. #[derive(Debug)] pub struct InitrdConfig { /// Load address of initrd in guest memory pub address: GuestAddress, /// Size of initrd in guest memory pub size: usize, } impl InitrdConfig { /// Load initrd into guest memory based on the boot config. pub fn from_config( boot_cfg: &BootConfig, vm_memory: &GuestMemoryMmap, ) -> Result, InitrdError> { Ok(match &boot_cfg.initrd_file { Some(f) => { let f = f.try_clone().map_err(InitrdError::CloneFd)?; Some(Self::from_file(vm_memory, f)?) } None => None, }) } /// Loads the initrd from a file into guest memory. pub fn from_file(vm_memory: &GuestMemoryMmap, mut file: File) -> Result { let size = file.metadata().map_err(InitrdError::Metadata)?.size(); let size = u64_to_usize(size); let Some(address) = initrd_load_addr(vm_memory, size) else { return Err(InitrdError::Address); }; let mut slice = vm_memory .get_slice(GuestAddress(address), size) .map_err(|_| InitrdError::Load)?; file.read_exact_volatile(&mut slice) .map_err(InitrdError::Read)?; Ok(InitrdConfig { address: GuestAddress(address), size, }) } } #[cfg(test)] mod tests { use std::io::{Seek, SeekFrom, Write}; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::arch::GUEST_PAGE_SIZE; use crate::test_utils::{single_region_mem, single_region_mem_at}; fn make_test_bin() -> Vec { let mut fake_bin = Vec::new(); fake_bin.resize(1_000_000, 0xAA); fake_bin } #[test] // Test that loading the initrd is successful on different archs. fn test_load_initrd() { let image = make_test_bin(); let mem_size: usize = image.len() * 2 + GUEST_PAGE_SIZE; let tempfile = TempFile::new().unwrap(); let mut tempfile = tempfile.into_file(); tempfile.write_all(&image).unwrap(); #[cfg(target_arch = "x86_64")] let gm = single_region_mem(mem_size); #[cfg(target_arch = "aarch64")] let gm = single_region_mem(mem_size + crate::arch::aarch64::layout::FDT_MAX_SIZE); // Need to reset the cursor to read initrd properly. tempfile.seek(SeekFrom::Start(0)).unwrap(); let initrd = InitrdConfig::from_file(&gm, tempfile).unwrap(); assert!(gm.address_in_range(initrd.address)); assert_eq!(initrd.size, image.len()); } #[test] fn test_load_initrd_no_memory() { let gm = single_region_mem(79); let image = make_test_bin(); let tempfile = TempFile::new().unwrap(); let mut tempfile = tempfile.into_file(); tempfile.write_all(&image).unwrap(); // Need to reset the cursor to read initrd properly. tempfile.seek(SeekFrom::Start(0)).unwrap(); let res = InitrdConfig::from_file(&gm, tempfile); assert!(matches!(res, Err(InitrdError::Address)), "{:?}", res); } #[test] fn test_load_initrd_unaligned() { let image = vec![1, 2, 3, 4]; let tempfile = TempFile::new().unwrap(); let mut tempfile = tempfile.into_file(); tempfile.write_all(&image).unwrap(); let gm = single_region_mem_at(GUEST_PAGE_SIZE as u64 + 1, image.len() * 2); // Need to reset the cursor to read initrd properly. tempfile.seek(SeekFrom::Start(0)).unwrap(); let res = InitrdConfig::from_file(&gm, tempfile); assert!(matches!(res, Err(InitrdError::Address)), "{:?}", res); } } ================================================ FILE: src/vmm/src/io_uring/generated.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // automatically generated by tools/bindgen.sh #![allow( non_camel_case_types, non_upper_case_globals, dead_code, non_snake_case, clippy::ptr_as_ptr, clippy::undocumented_unsafe_blocks, missing_debug_implementations, clippy::tests_outside_test_module, unsafe_op_in_unsafe_fn, clippy::redundant_static_lifetimes )] #[repr(C)] #[derive(Default)] pub struct __IncompleteArrayField(::std::marker::PhantomData, [T; 0]); impl __IncompleteArrayField { #[inline] pub const fn new() -> Self { __IncompleteArrayField(::std::marker::PhantomData, []) } #[inline] pub fn as_ptr(&self) -> *const T { self as *const _ as *const T } #[inline] pub fn as_mut_ptr(&mut self) -> *mut T { self as *mut _ as *mut T } #[inline] pub unsafe fn as_slice(&self, len: usize) -> &[T] { ::std::slice::from_raw_parts(self.as_ptr(), len) } #[inline] pub unsafe fn as_mut_slice(&mut self, len: usize) -> &mut [T] { ::std::slice::from_raw_parts_mut(self.as_mut_ptr(), len) } } impl ::std::fmt::Debug for __IncompleteArrayField { fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { fmt.write_str("__IncompleteArrayField") } } #[repr(C)] pub struct __BindgenUnionField(::std::marker::PhantomData); impl __BindgenUnionField { #[inline] pub const fn new() -> Self { __BindgenUnionField(::std::marker::PhantomData) } #[inline] pub unsafe fn as_ref(&self) -> &T { ::std::mem::transmute(self) } #[inline] pub unsafe fn as_mut(&mut self) -> &mut T { ::std::mem::transmute(self) } } impl ::std::default::Default for __BindgenUnionField { #[inline] fn default() -> Self { Self::new() } } impl ::std::clone::Clone for __BindgenUnionField { #[inline] fn clone(&self) -> Self { *self } } impl ::std::marker::Copy for __BindgenUnionField {} impl ::std::fmt::Debug for __BindgenUnionField { fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { fmt.write_str("__BindgenUnionField") } } impl ::std::hash::Hash for __BindgenUnionField { fn hash(&self, _state: &mut H) {} } impl ::std::cmp::PartialEq for __BindgenUnionField { fn eq(&self, _other: &__BindgenUnionField) -> bool { true } } impl ::std::cmp::Eq for __BindgenUnionField {} pub const IORING_FILE_INDEX_ALLOC: i32 = -1; pub const IORING_SETUP_IOPOLL: u32 = 1; pub const IORING_SETUP_SQPOLL: u32 = 2; pub const IORING_SETUP_SQ_AFF: u32 = 4; pub const IORING_SETUP_CQSIZE: u32 = 8; pub const IORING_SETUP_CLAMP: u32 = 16; pub const IORING_SETUP_ATTACH_WQ: u32 = 32; pub const IORING_SETUP_R_DISABLED: u32 = 64; pub const IORING_SETUP_SUBMIT_ALL: u32 = 128; pub const IORING_SETUP_COOP_TASKRUN: u32 = 256; pub const IORING_SETUP_TASKRUN_FLAG: u32 = 512; pub const IORING_SETUP_SQE128: u32 = 1024; pub const IORING_SETUP_CQE32: u32 = 2048; pub const IORING_SETUP_SINGLE_ISSUER: u32 = 4096; pub const IORING_SETUP_DEFER_TASKRUN: u32 = 8192; pub const IORING_SETUP_NO_MMAP: u32 = 16384; pub const IORING_SETUP_REGISTERED_FD_ONLY: u32 = 32768; pub const IORING_SETUP_NO_SQARRAY: u32 = 65536; pub const IORING_SETUP_HYBRID_IOPOLL: u32 = 131072; pub const IORING_URING_CMD_FIXED: u32 = 1; pub const IORING_URING_CMD_MASK: u32 = 1; pub const IORING_FSYNC_DATASYNC: u32 = 1; pub const IORING_TIMEOUT_ABS: u32 = 1; pub const IORING_TIMEOUT_UPDATE: u32 = 2; pub const IORING_TIMEOUT_BOOTTIME: u32 = 4; pub const IORING_TIMEOUT_REALTIME: u32 = 8; pub const IORING_LINK_TIMEOUT_UPDATE: u32 = 16; pub const IORING_TIMEOUT_ETIME_SUCCESS: u32 = 32; pub const IORING_TIMEOUT_MULTISHOT: u32 = 64; pub const IORING_TIMEOUT_CLOCK_MASK: u32 = 12; pub const IORING_TIMEOUT_UPDATE_MASK: u32 = 18; pub const IORING_POLL_ADD_MULTI: u32 = 1; pub const IORING_POLL_UPDATE_EVENTS: u32 = 2; pub const IORING_POLL_UPDATE_USER_DATA: u32 = 4; pub const IORING_POLL_ADD_LEVEL: u32 = 8; pub const IORING_ASYNC_CANCEL_ALL: u32 = 1; pub const IORING_ASYNC_CANCEL_FD: u32 = 2; pub const IORING_ASYNC_CANCEL_ANY: u32 = 4; pub const IORING_ASYNC_CANCEL_FD_FIXED: u32 = 8; pub const IORING_ASYNC_CANCEL_USERDATA: u32 = 16; pub const IORING_ASYNC_CANCEL_OP: u32 = 32; pub const IORING_RECVSEND_POLL_FIRST: u32 = 1; pub const IORING_RECV_MULTISHOT: u32 = 2; pub const IORING_RECVSEND_FIXED_BUF: u32 = 4; pub const IORING_SEND_ZC_REPORT_USAGE: u32 = 8; pub const IORING_RECVSEND_BUNDLE: u32 = 16; pub const IORING_NOTIF_USAGE_ZC_COPIED: u32 = 2147483648; pub const IORING_ACCEPT_MULTISHOT: u32 = 1; pub const IORING_ACCEPT_DONTWAIT: u32 = 2; pub const IORING_ACCEPT_POLL_FIRST: u32 = 4; pub const IORING_MSG_RING_CQE_SKIP: u32 = 1; pub const IORING_MSG_RING_FLAGS_PASS: u32 = 2; pub const IORING_FIXED_FD_NO_CLOEXEC: u32 = 1; pub const IORING_NOP_INJECT_RESULT: u32 = 1; pub const IORING_NOP_FILE: u32 = 2; pub const IORING_NOP_FIXED_FILE: u32 = 4; pub const IORING_NOP_FIXED_BUFFER: u32 = 8; pub const IORING_CQE_F_BUFFER: u32 = 1; pub const IORING_CQE_F_MORE: u32 = 2; pub const IORING_CQE_F_SOCK_NONEMPTY: u32 = 4; pub const IORING_CQE_F_NOTIF: u32 = 8; pub const IORING_CQE_F_BUF_MORE: u32 = 16; pub const IORING_CQE_BUFFER_SHIFT: u32 = 16; pub const IORING_OFF_SQ_RING: u32 = 0; pub const IORING_OFF_CQ_RING: u32 = 134217728; pub const IORING_OFF_SQES: u32 = 268435456; pub const IORING_OFF_PBUF_RING: u32 = 2147483648; pub const IORING_OFF_PBUF_SHIFT: u32 = 16; pub const IORING_OFF_MMAP_MASK: u32 = 4160749568; pub const IORING_SQ_NEED_WAKEUP: u32 = 1; pub const IORING_SQ_CQ_OVERFLOW: u32 = 2; pub const IORING_SQ_TASKRUN: u32 = 4; pub const IORING_CQ_EVENTFD_DISABLED: u32 = 1; pub const IORING_ENTER_GETEVENTS: u32 = 1; pub const IORING_ENTER_SQ_WAKEUP: u32 = 2; pub const IORING_ENTER_SQ_WAIT: u32 = 4; pub const IORING_ENTER_EXT_ARG: u32 = 8; pub const IORING_ENTER_REGISTERED_RING: u32 = 16; pub const IORING_ENTER_ABS_TIMER: u32 = 32; pub const IORING_ENTER_EXT_ARG_REG: u32 = 64; pub const IORING_FEAT_SINGLE_MMAP: u32 = 1; pub const IORING_FEAT_NODROP: u32 = 2; pub const IORING_FEAT_SUBMIT_STABLE: u32 = 4; pub const IORING_FEAT_RW_CUR_POS: u32 = 8; pub const IORING_FEAT_CUR_PERSONALITY: u32 = 16; pub const IORING_FEAT_FAST_POLL: u32 = 32; pub const IORING_FEAT_POLL_32BITS: u32 = 64; pub const IORING_FEAT_SQPOLL_NONFIXED: u32 = 128; pub const IORING_FEAT_EXT_ARG: u32 = 256; pub const IORING_FEAT_NATIVE_WORKERS: u32 = 512; pub const IORING_FEAT_RSRC_TAGS: u32 = 1024; pub const IORING_FEAT_CQE_SKIP: u32 = 2048; pub const IORING_FEAT_LINKED_FILE: u32 = 4096; pub const IORING_FEAT_REG_REG_RING: u32 = 8192; pub const IORING_FEAT_RECVSEND_BUNDLE: u32 = 16384; pub const IORING_FEAT_MIN_TIMEOUT: u32 = 32768; pub const IORING_RSRC_REGISTER_SPARSE: u32 = 1; pub const IORING_REGISTER_FILES_SKIP: i32 = -2; pub const IO_URING_OP_SUPPORTED: u32 = 1; pub type __u8 = ::std::os::raw::c_uchar; pub type __u16 = ::std::os::raw::c_ushort; pub type __s32 = ::std::os::raw::c_int; pub type __u32 = ::std::os::raw::c_uint; pub type __u64 = ::std::os::raw::c_ulonglong; pub type __kernel_time64_t = ::std::os::raw::c_longlong; pub type __kernel_rwf_t = ::std::os::raw::c_int; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct __kernel_timespec { pub tv_sec: __kernel_time64_t, pub tv_nsec: ::std::os::raw::c_longlong, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of __kernel_timespec"][::std::mem::size_of::<__kernel_timespec>() - 16usize]; ["Alignment of __kernel_timespec"][::std::mem::align_of::<__kernel_timespec>() - 8usize]; ["Offset of field: __kernel_timespec::tv_sec"] [::std::mem::offset_of!(__kernel_timespec, tv_sec) - 0usize]; ["Offset of field: __kernel_timespec::tv_nsec"] [::std::mem::offset_of!(__kernel_timespec, tv_nsec) - 8usize]; }; #[repr(C)] #[derive(Copy, Clone)] pub struct io_uring_sqe { pub opcode: __u8, pub flags: __u8, pub ioprio: __u16, pub fd: __s32, pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_1, pub __bindgen_anon_2: io_uring_sqe__bindgen_ty_2, pub len: __u32, pub __bindgen_anon_3: io_uring_sqe__bindgen_ty_3, pub user_data: __u64, pub __bindgen_anon_4: io_uring_sqe__bindgen_ty_4, pub personality: __u16, pub __bindgen_anon_5: io_uring_sqe__bindgen_ty_5, pub __bindgen_anon_6: io_uring_sqe__bindgen_ty_6, } #[repr(C)] #[derive(Copy, Clone)] pub union io_uring_sqe__bindgen_ty_1 { pub off: __u64, pub addr2: __u64, pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_1__bindgen_ty_1, } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_sqe__bindgen_ty_1__bindgen_ty_1 { pub cmd_op: __u32, pub __pad1: __u32, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe__bindgen_ty_1__bindgen_ty_1"] [::std::mem::size_of::() - 8usize]; ["Alignment of io_uring_sqe__bindgen_ty_1__bindgen_ty_1"] [::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_sqe__bindgen_ty_1__bindgen_ty_1::cmd_op"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_1__bindgen_ty_1, cmd_op) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_1__bindgen_ty_1::__pad1"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_1__bindgen_ty_1, __pad1) - 4usize]; }; #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe__bindgen_ty_1"] [::std::mem::size_of::() - 8usize]; ["Alignment of io_uring_sqe__bindgen_ty_1"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_sqe__bindgen_ty_1::off"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_1, off) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_1::addr2"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_1, addr2) - 0usize]; }; impl Default for io_uring_sqe__bindgen_ty_1 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C)] #[derive(Copy, Clone)] pub union io_uring_sqe__bindgen_ty_2 { pub addr: __u64, pub splice_off_in: __u64, pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_2__bindgen_ty_1, } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_sqe__bindgen_ty_2__bindgen_ty_1 { pub level: __u32, pub optname: __u32, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe__bindgen_ty_2__bindgen_ty_1"] [::std::mem::size_of::() - 8usize]; ["Alignment of io_uring_sqe__bindgen_ty_2__bindgen_ty_1"] [::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_sqe__bindgen_ty_2__bindgen_ty_1::level"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_2__bindgen_ty_1, level) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_2__bindgen_ty_1::optname"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_2__bindgen_ty_1, optname) - 4usize]; }; #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe__bindgen_ty_2"] [::std::mem::size_of::() - 8usize]; ["Alignment of io_uring_sqe__bindgen_ty_2"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_sqe__bindgen_ty_2::addr"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_2, addr) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_2::splice_off_in"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_2, splice_off_in) - 0usize]; }; impl Default for io_uring_sqe__bindgen_ty_2 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C)] #[derive(Copy, Clone)] pub union io_uring_sqe__bindgen_ty_3 { pub rw_flags: __kernel_rwf_t, pub fsync_flags: __u32, pub poll_events: __u16, pub poll32_events: __u32, pub sync_range_flags: __u32, pub msg_flags: __u32, pub timeout_flags: __u32, pub accept_flags: __u32, pub cancel_flags: __u32, pub open_flags: __u32, pub statx_flags: __u32, pub fadvise_advice: __u32, pub splice_flags: __u32, pub rename_flags: __u32, pub unlink_flags: __u32, pub hardlink_flags: __u32, pub xattr_flags: __u32, pub msg_ring_flags: __u32, pub uring_cmd_flags: __u32, pub waitid_flags: __u32, pub futex_flags: __u32, pub install_fd_flags: __u32, pub nop_flags: __u32, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe__bindgen_ty_3"] [::std::mem::size_of::() - 4usize]; ["Alignment of io_uring_sqe__bindgen_ty_3"] [::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::rw_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, rw_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::fsync_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, fsync_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::poll_events"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, poll_events) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::poll32_events"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, poll32_events) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::sync_range_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, sync_range_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::msg_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, msg_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::timeout_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, timeout_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::accept_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, accept_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::cancel_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, cancel_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::open_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, open_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::statx_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, statx_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::fadvise_advice"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, fadvise_advice) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::splice_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, splice_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::rename_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, rename_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::unlink_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, unlink_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::hardlink_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, hardlink_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::xattr_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, xattr_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::msg_ring_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, msg_ring_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::uring_cmd_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, uring_cmd_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::waitid_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, waitid_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::futex_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, futex_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::install_fd_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, install_fd_flags) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_3::nop_flags"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_3, nop_flags) - 0usize]; }; impl Default for io_uring_sqe__bindgen_ty_3 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C, packed)] #[derive(Copy, Clone)] pub union io_uring_sqe__bindgen_ty_4 { pub buf_index: __u16, pub buf_group: __u16, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe__bindgen_ty_4"] [::std::mem::size_of::() - 2usize]; ["Alignment of io_uring_sqe__bindgen_ty_4"] [::std::mem::align_of::() - 1usize]; ["Offset of field: io_uring_sqe__bindgen_ty_4::buf_index"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_4, buf_index) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_4::buf_group"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_4, buf_group) - 0usize]; }; impl Default for io_uring_sqe__bindgen_ty_4 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C)] #[derive(Copy, Clone)] pub union io_uring_sqe__bindgen_ty_5 { pub splice_fd_in: __s32, pub file_index: __u32, pub optlen: __u32, pub __bindgen_anon_1: io_uring_sqe__bindgen_ty_5__bindgen_ty_1, } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_sqe__bindgen_ty_5__bindgen_ty_1 { pub addr_len: __u16, pub __pad3: [__u16; 1usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe__bindgen_ty_5__bindgen_ty_1"] [::std::mem::size_of::() - 4usize]; ["Alignment of io_uring_sqe__bindgen_ty_5__bindgen_ty_1"] [::std::mem::align_of::() - 2usize]; ["Offset of field: io_uring_sqe__bindgen_ty_5__bindgen_ty_1::addr_len"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_5__bindgen_ty_1, addr_len) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_5__bindgen_ty_1::__pad3"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_5__bindgen_ty_1, __pad3) - 2usize]; }; #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe__bindgen_ty_5"] [::std::mem::size_of::() - 4usize]; ["Alignment of io_uring_sqe__bindgen_ty_5"] [::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_sqe__bindgen_ty_5::splice_fd_in"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_5, splice_fd_in) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_5::file_index"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_5, file_index) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_5::optlen"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_5, optlen) - 0usize]; }; impl Default for io_uring_sqe__bindgen_ty_5 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C)] #[derive(Copy, Clone)] pub struct io_uring_sqe__bindgen_ty_6 { pub __bindgen_anon_1: __BindgenUnionField, pub optval: __BindgenUnionField<__u64>, pub cmd: __BindgenUnionField<[__u8; 0usize]>, pub bindgen_union_field: [u64; 2usize], } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_sqe__bindgen_ty_6__bindgen_ty_1 { pub addr3: __u64, pub __pad2: [__u64; 1usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe__bindgen_ty_6__bindgen_ty_1"] [::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_sqe__bindgen_ty_6__bindgen_ty_1"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_sqe__bindgen_ty_6__bindgen_ty_1::addr3"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_6__bindgen_ty_1, addr3) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_6__bindgen_ty_1::__pad2"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_6__bindgen_ty_1, __pad2) - 8usize]; }; #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe__bindgen_ty_6"] [::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_sqe__bindgen_ty_6"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_sqe__bindgen_ty_6::optval"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_6, optval) - 0usize]; ["Offset of field: io_uring_sqe__bindgen_ty_6::cmd"] [::std::mem::offset_of!(io_uring_sqe__bindgen_ty_6, cmd) - 0usize]; }; impl Default for io_uring_sqe__bindgen_ty_6 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sqe"][::std::mem::size_of::() - 64usize]; ["Alignment of io_uring_sqe"][::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_sqe::opcode"] [::std::mem::offset_of!(io_uring_sqe, opcode) - 0usize]; ["Offset of field: io_uring_sqe::flags"][::std::mem::offset_of!(io_uring_sqe, flags) - 1usize]; ["Offset of field: io_uring_sqe::ioprio"] [::std::mem::offset_of!(io_uring_sqe, ioprio) - 2usize]; ["Offset of field: io_uring_sqe::fd"][::std::mem::offset_of!(io_uring_sqe, fd) - 4usize]; ["Offset of field: io_uring_sqe::len"][::std::mem::offset_of!(io_uring_sqe, len) - 24usize]; ["Offset of field: io_uring_sqe::user_data"] [::std::mem::offset_of!(io_uring_sqe, user_data) - 32usize]; ["Offset of field: io_uring_sqe::personality"] [::std::mem::offset_of!(io_uring_sqe, personality) - 42usize]; }; impl Default for io_uring_sqe { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } pub mod io_uring_sqe_flags_bit { pub type Type = ::std::os::raw::c_uint; pub const IOSQE_FIXED_FILE_BIT: Type = 0; pub const IOSQE_IO_DRAIN_BIT: Type = 1; pub const IOSQE_IO_LINK_BIT: Type = 2; pub const IOSQE_IO_HARDLINK_BIT: Type = 3; pub const IOSQE_ASYNC_BIT: Type = 4; pub const IOSQE_BUFFER_SELECT_BIT: Type = 5; pub const IOSQE_CQE_SKIP_SUCCESS_BIT: Type = 6; } pub mod io_uring_op { pub type Type = ::std::os::raw::c_uint; pub const IORING_OP_NOP: Type = 0; pub const IORING_OP_READV: Type = 1; pub const IORING_OP_WRITEV: Type = 2; pub const IORING_OP_FSYNC: Type = 3; pub const IORING_OP_READ_FIXED: Type = 4; pub const IORING_OP_WRITE_FIXED: Type = 5; pub const IORING_OP_POLL_ADD: Type = 6; pub const IORING_OP_POLL_REMOVE: Type = 7; pub const IORING_OP_SYNC_FILE_RANGE: Type = 8; pub const IORING_OP_SENDMSG: Type = 9; pub const IORING_OP_RECVMSG: Type = 10; pub const IORING_OP_TIMEOUT: Type = 11; pub const IORING_OP_TIMEOUT_REMOVE: Type = 12; pub const IORING_OP_ACCEPT: Type = 13; pub const IORING_OP_ASYNC_CANCEL: Type = 14; pub const IORING_OP_LINK_TIMEOUT: Type = 15; pub const IORING_OP_CONNECT: Type = 16; pub const IORING_OP_FALLOCATE: Type = 17; pub const IORING_OP_OPENAT: Type = 18; pub const IORING_OP_CLOSE: Type = 19; pub const IORING_OP_FILES_UPDATE: Type = 20; pub const IORING_OP_STATX: Type = 21; pub const IORING_OP_READ: Type = 22; pub const IORING_OP_WRITE: Type = 23; pub const IORING_OP_FADVISE: Type = 24; pub const IORING_OP_MADVISE: Type = 25; pub const IORING_OP_SEND: Type = 26; pub const IORING_OP_RECV: Type = 27; pub const IORING_OP_OPENAT2: Type = 28; pub const IORING_OP_EPOLL_CTL: Type = 29; pub const IORING_OP_SPLICE: Type = 30; pub const IORING_OP_PROVIDE_BUFFERS: Type = 31; pub const IORING_OP_REMOVE_BUFFERS: Type = 32; pub const IORING_OP_TEE: Type = 33; pub const IORING_OP_SHUTDOWN: Type = 34; pub const IORING_OP_RENAMEAT: Type = 35; pub const IORING_OP_UNLINKAT: Type = 36; pub const IORING_OP_MKDIRAT: Type = 37; pub const IORING_OP_SYMLINKAT: Type = 38; pub const IORING_OP_LINKAT: Type = 39; pub const IORING_OP_MSG_RING: Type = 40; pub const IORING_OP_FSETXATTR: Type = 41; pub const IORING_OP_SETXATTR: Type = 42; pub const IORING_OP_FGETXATTR: Type = 43; pub const IORING_OP_GETXATTR: Type = 44; pub const IORING_OP_SOCKET: Type = 45; pub const IORING_OP_URING_CMD: Type = 46; pub const IORING_OP_SEND_ZC: Type = 47; pub const IORING_OP_SENDMSG_ZC: Type = 48; pub const IORING_OP_READ_MULTISHOT: Type = 49; pub const IORING_OP_WAITID: Type = 50; pub const IORING_OP_FUTEX_WAIT: Type = 51; pub const IORING_OP_FUTEX_WAKE: Type = 52; pub const IORING_OP_FUTEX_WAITV: Type = 53; pub const IORING_OP_FIXED_FD_INSTALL: Type = 54; pub const IORING_OP_FTRUNCATE: Type = 55; pub const IORING_OP_BIND: Type = 56; pub const IORING_OP_LISTEN: Type = 57; pub const IORING_OP_LAST: Type = 58; } pub mod io_uring_msg_ring_flags { pub type Type = ::std::os::raw::c_uint; pub const IORING_MSG_DATA: Type = 0; pub const IORING_MSG_SEND_FD: Type = 1; } #[repr(C)] #[derive(Clone, Copy, Debug, Default)] pub struct io_uring_cqe { pub user_data: __u64, pub res: __s32, pub flags: __u32, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_cqe"][::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_cqe"][::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_cqe::user_data"] [::std::mem::offset_of!(io_uring_cqe, user_data) - 0usize]; ["Offset of field: io_uring_cqe::res"][::std::mem::offset_of!(io_uring_cqe, res) - 8usize]; ["Offset of field: io_uring_cqe::flags"][::std::mem::offset_of!(io_uring_cqe, flags) - 12usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_sqring_offsets { pub head: __u32, pub tail: __u32, pub ring_mask: __u32, pub ring_entries: __u32, pub flags: __u32, pub dropped: __u32, pub array: __u32, pub resv1: __u32, pub user_addr: __u64, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_sqring_offsets"][::std::mem::size_of::() - 40usize]; ["Alignment of io_sqring_offsets"][::std::mem::align_of::() - 8usize]; ["Offset of field: io_sqring_offsets::head"] [::std::mem::offset_of!(io_sqring_offsets, head) - 0usize]; ["Offset of field: io_sqring_offsets::tail"] [::std::mem::offset_of!(io_sqring_offsets, tail) - 4usize]; ["Offset of field: io_sqring_offsets::ring_mask"] [::std::mem::offset_of!(io_sqring_offsets, ring_mask) - 8usize]; ["Offset of field: io_sqring_offsets::ring_entries"] [::std::mem::offset_of!(io_sqring_offsets, ring_entries) - 12usize]; ["Offset of field: io_sqring_offsets::flags"] [::std::mem::offset_of!(io_sqring_offsets, flags) - 16usize]; ["Offset of field: io_sqring_offsets::dropped"] [::std::mem::offset_of!(io_sqring_offsets, dropped) - 20usize]; ["Offset of field: io_sqring_offsets::array"] [::std::mem::offset_of!(io_sqring_offsets, array) - 24usize]; ["Offset of field: io_sqring_offsets::resv1"] [::std::mem::offset_of!(io_sqring_offsets, resv1) - 28usize]; ["Offset of field: io_sqring_offsets::user_addr"] [::std::mem::offset_of!(io_sqring_offsets, user_addr) - 32usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_cqring_offsets { pub head: __u32, pub tail: __u32, pub ring_mask: __u32, pub ring_entries: __u32, pub overflow: __u32, pub cqes: __u32, pub flags: __u32, pub resv1: __u32, pub user_addr: __u64, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_cqring_offsets"][::std::mem::size_of::() - 40usize]; ["Alignment of io_cqring_offsets"][::std::mem::align_of::() - 8usize]; ["Offset of field: io_cqring_offsets::head"] [::std::mem::offset_of!(io_cqring_offsets, head) - 0usize]; ["Offset of field: io_cqring_offsets::tail"] [::std::mem::offset_of!(io_cqring_offsets, tail) - 4usize]; ["Offset of field: io_cqring_offsets::ring_mask"] [::std::mem::offset_of!(io_cqring_offsets, ring_mask) - 8usize]; ["Offset of field: io_cqring_offsets::ring_entries"] [::std::mem::offset_of!(io_cqring_offsets, ring_entries) - 12usize]; ["Offset of field: io_cqring_offsets::overflow"] [::std::mem::offset_of!(io_cqring_offsets, overflow) - 16usize]; ["Offset of field: io_cqring_offsets::cqes"] [::std::mem::offset_of!(io_cqring_offsets, cqes) - 20usize]; ["Offset of field: io_cqring_offsets::flags"] [::std::mem::offset_of!(io_cqring_offsets, flags) - 24usize]; ["Offset of field: io_cqring_offsets::resv1"] [::std::mem::offset_of!(io_cqring_offsets, resv1) - 28usize]; ["Offset of field: io_cqring_offsets::user_addr"] [::std::mem::offset_of!(io_cqring_offsets, user_addr) - 32usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_params { pub sq_entries: __u32, pub cq_entries: __u32, pub flags: __u32, pub sq_thread_cpu: __u32, pub sq_thread_idle: __u32, pub features: __u32, pub wq_fd: __u32, pub resv: [__u32; 3usize], pub sq_off: io_sqring_offsets, pub cq_off: io_cqring_offsets, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_params"][::std::mem::size_of::() - 120usize]; ["Alignment of io_uring_params"][::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_params::sq_entries"] [::std::mem::offset_of!(io_uring_params, sq_entries) - 0usize]; ["Offset of field: io_uring_params::cq_entries"] [::std::mem::offset_of!(io_uring_params, cq_entries) - 4usize]; ["Offset of field: io_uring_params::flags"] [::std::mem::offset_of!(io_uring_params, flags) - 8usize]; ["Offset of field: io_uring_params::sq_thread_cpu"] [::std::mem::offset_of!(io_uring_params, sq_thread_cpu) - 12usize]; ["Offset of field: io_uring_params::sq_thread_idle"] [::std::mem::offset_of!(io_uring_params, sq_thread_idle) - 16usize]; ["Offset of field: io_uring_params::features"] [::std::mem::offset_of!(io_uring_params, features) - 20usize]; ["Offset of field: io_uring_params::wq_fd"] [::std::mem::offset_of!(io_uring_params, wq_fd) - 24usize]; ["Offset of field: io_uring_params::resv"] [::std::mem::offset_of!(io_uring_params, resv) - 28usize]; ["Offset of field: io_uring_params::sq_off"] [::std::mem::offset_of!(io_uring_params, sq_off) - 40usize]; ["Offset of field: io_uring_params::cq_off"] [::std::mem::offset_of!(io_uring_params, cq_off) - 80usize]; }; pub mod io_uring_register_op { pub type Type = ::std::os::raw::c_uint; pub const IORING_REGISTER_BUFFERS: Type = 0; pub const IORING_UNREGISTER_BUFFERS: Type = 1; pub const IORING_REGISTER_FILES: Type = 2; pub const IORING_UNREGISTER_FILES: Type = 3; pub const IORING_REGISTER_EVENTFD: Type = 4; pub const IORING_UNREGISTER_EVENTFD: Type = 5; pub const IORING_REGISTER_FILES_UPDATE: Type = 6; pub const IORING_REGISTER_EVENTFD_ASYNC: Type = 7; pub const IORING_REGISTER_PROBE: Type = 8; pub const IORING_REGISTER_PERSONALITY: Type = 9; pub const IORING_UNREGISTER_PERSONALITY: Type = 10; pub const IORING_REGISTER_RESTRICTIONS: Type = 11; pub const IORING_REGISTER_ENABLE_RINGS: Type = 12; pub const IORING_REGISTER_FILES2: Type = 13; pub const IORING_REGISTER_FILES_UPDATE2: Type = 14; pub const IORING_REGISTER_BUFFERS2: Type = 15; pub const IORING_REGISTER_BUFFERS_UPDATE: Type = 16; pub const IORING_REGISTER_IOWQ_AFF: Type = 17; pub const IORING_UNREGISTER_IOWQ_AFF: Type = 18; pub const IORING_REGISTER_IOWQ_MAX_WORKERS: Type = 19; pub const IORING_REGISTER_RING_FDS: Type = 20; pub const IORING_UNREGISTER_RING_FDS: Type = 21; pub const IORING_REGISTER_PBUF_RING: Type = 22; pub const IORING_UNREGISTER_PBUF_RING: Type = 23; pub const IORING_REGISTER_SYNC_CANCEL: Type = 24; pub const IORING_REGISTER_FILE_ALLOC_RANGE: Type = 25; pub const IORING_REGISTER_PBUF_STATUS: Type = 26; pub const IORING_REGISTER_NAPI: Type = 27; pub const IORING_UNREGISTER_NAPI: Type = 28; pub const IORING_REGISTER_CLOCK: Type = 29; pub const IORING_REGISTER_CLONE_BUFFERS: Type = 30; pub const IORING_REGISTER_SEND_MSG_RING: Type = 31; pub const IORING_REGISTER_RESIZE_RINGS: Type = 33; pub const IORING_REGISTER_MEM_REGION: Type = 34; pub const IORING_REGISTER_LAST: Type = 35; pub const IORING_REGISTER_USE_REGISTERED_RING: Type = 2147483648; } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_files_update { pub offset: __u32, pub resv: __u32, pub fds: __u64, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_files_update"][::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_files_update"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_files_update::offset"] [::std::mem::offset_of!(io_uring_files_update, offset) - 0usize]; ["Offset of field: io_uring_files_update::resv"] [::std::mem::offset_of!(io_uring_files_update, resv) - 4usize]; ["Offset of field: io_uring_files_update::fds"] [::std::mem::offset_of!(io_uring_files_update, fds) - 8usize]; }; pub mod _bindgen_ty_1 { pub type Type = ::std::os::raw::c_uint; pub const IORING_MEM_REGION_TYPE_USER: Type = 1; } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_region_desc { pub user_addr: __u64, pub size: __u64, pub flags: __u32, pub id: __u32, pub mmap_offset: __u64, pub __resv: [__u64; 4usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_region_desc"][::std::mem::size_of::() - 64usize]; ["Alignment of io_uring_region_desc"][::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_region_desc::user_addr"] [::std::mem::offset_of!(io_uring_region_desc, user_addr) - 0usize]; ["Offset of field: io_uring_region_desc::size"] [::std::mem::offset_of!(io_uring_region_desc, size) - 8usize]; ["Offset of field: io_uring_region_desc::flags"] [::std::mem::offset_of!(io_uring_region_desc, flags) - 16usize]; ["Offset of field: io_uring_region_desc::id"] [::std::mem::offset_of!(io_uring_region_desc, id) - 20usize]; ["Offset of field: io_uring_region_desc::mmap_offset"] [::std::mem::offset_of!(io_uring_region_desc, mmap_offset) - 24usize]; ["Offset of field: io_uring_region_desc::__resv"] [::std::mem::offset_of!(io_uring_region_desc, __resv) - 32usize]; }; pub mod _bindgen_ty_2 { pub type Type = ::std::os::raw::c_uint; pub const IORING_MEM_REGION_REG_WAIT_ARG: Type = 1; } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_mem_region_reg { pub region_uptr: __u64, pub flags: __u64, pub __resv: [__u64; 2usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_mem_region_reg"][::std::mem::size_of::() - 32usize]; ["Alignment of io_uring_mem_region_reg"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_mem_region_reg::region_uptr"] [::std::mem::offset_of!(io_uring_mem_region_reg, region_uptr) - 0usize]; ["Offset of field: io_uring_mem_region_reg::flags"] [::std::mem::offset_of!(io_uring_mem_region_reg, flags) - 8usize]; ["Offset of field: io_uring_mem_region_reg::__resv"] [::std::mem::offset_of!(io_uring_mem_region_reg, __resv) - 16usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_rsrc_register { pub nr: __u32, pub flags: __u32, pub resv2: __u64, pub data: __u64, pub tags: __u64, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_rsrc_register"][::std::mem::size_of::() - 32usize]; ["Alignment of io_uring_rsrc_register"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_rsrc_register::nr"] [::std::mem::offset_of!(io_uring_rsrc_register, nr) - 0usize]; ["Offset of field: io_uring_rsrc_register::flags"] [::std::mem::offset_of!(io_uring_rsrc_register, flags) - 4usize]; ["Offset of field: io_uring_rsrc_register::resv2"] [::std::mem::offset_of!(io_uring_rsrc_register, resv2) - 8usize]; ["Offset of field: io_uring_rsrc_register::data"] [::std::mem::offset_of!(io_uring_rsrc_register, data) - 16usize]; ["Offset of field: io_uring_rsrc_register::tags"] [::std::mem::offset_of!(io_uring_rsrc_register, tags) - 24usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_rsrc_update { pub offset: __u32, pub resv: __u32, pub data: __u64, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_rsrc_update"][::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_rsrc_update"][::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_rsrc_update::offset"] [::std::mem::offset_of!(io_uring_rsrc_update, offset) - 0usize]; ["Offset of field: io_uring_rsrc_update::resv"] [::std::mem::offset_of!(io_uring_rsrc_update, resv) - 4usize]; ["Offset of field: io_uring_rsrc_update::data"] [::std::mem::offset_of!(io_uring_rsrc_update, data) - 8usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_rsrc_update2 { pub offset: __u32, pub resv: __u32, pub data: __u64, pub tags: __u64, pub nr: __u32, pub resv2: __u32, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_rsrc_update2"][::std::mem::size_of::() - 32usize]; ["Alignment of io_uring_rsrc_update2"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_rsrc_update2::offset"] [::std::mem::offset_of!(io_uring_rsrc_update2, offset) - 0usize]; ["Offset of field: io_uring_rsrc_update2::resv"] [::std::mem::offset_of!(io_uring_rsrc_update2, resv) - 4usize]; ["Offset of field: io_uring_rsrc_update2::data"] [::std::mem::offset_of!(io_uring_rsrc_update2, data) - 8usize]; ["Offset of field: io_uring_rsrc_update2::tags"] [::std::mem::offset_of!(io_uring_rsrc_update2, tags) - 16usize]; ["Offset of field: io_uring_rsrc_update2::nr"] [::std::mem::offset_of!(io_uring_rsrc_update2, nr) - 24usize]; ["Offset of field: io_uring_rsrc_update2::resv2"] [::std::mem::offset_of!(io_uring_rsrc_update2, resv2) - 28usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_probe_op { pub op: __u8, pub resv: __u8, pub flags: __u16, pub resv2: __u32, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_probe_op"][::std::mem::size_of::() - 8usize]; ["Alignment of io_uring_probe_op"][::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_probe_op::op"] [::std::mem::offset_of!(io_uring_probe_op, op) - 0usize]; ["Offset of field: io_uring_probe_op::resv"] [::std::mem::offset_of!(io_uring_probe_op, resv) - 1usize]; ["Offset of field: io_uring_probe_op::flags"] [::std::mem::offset_of!(io_uring_probe_op, flags) - 2usize]; ["Offset of field: io_uring_probe_op::resv2"] [::std::mem::offset_of!(io_uring_probe_op, resv2) - 4usize]; }; #[repr(C)] #[derive(Debug, Default)] pub struct io_uring_probe { pub last_op: __u8, pub ops_len: __u8, pub resv: __u16, pub resv2: [__u32; 3usize], pub ops: __IncompleteArrayField, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_probe"][::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_probe"][::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_probe::last_op"] [::std::mem::offset_of!(io_uring_probe, last_op) - 0usize]; ["Offset of field: io_uring_probe::ops_len"] [::std::mem::offset_of!(io_uring_probe, ops_len) - 1usize]; ["Offset of field: io_uring_probe::resv"] [::std::mem::offset_of!(io_uring_probe, resv) - 2usize]; ["Offset of field: io_uring_probe::resv2"] [::std::mem::offset_of!(io_uring_probe, resv2) - 4usize]; ["Offset of field: io_uring_probe::ops"][::std::mem::offset_of!(io_uring_probe, ops) - 16usize]; }; #[repr(C)] #[derive(Copy, Clone)] pub struct io_uring_restriction { pub opcode: __u16, pub __bindgen_anon_1: io_uring_restriction__bindgen_ty_1, pub resv: __u8, pub resv2: [__u32; 3usize], } #[repr(C)] #[derive(Copy, Clone)] pub union io_uring_restriction__bindgen_ty_1 { pub register_op: __u8, pub sqe_op: __u8, pub sqe_flags: __u8, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_restriction__bindgen_ty_1"] [::std::mem::size_of::() - 1usize]; ["Alignment of io_uring_restriction__bindgen_ty_1"] [::std::mem::align_of::() - 1usize]; ["Offset of field: io_uring_restriction__bindgen_ty_1::register_op"] [::std::mem::offset_of!(io_uring_restriction__bindgen_ty_1, register_op) - 0usize]; ["Offset of field: io_uring_restriction__bindgen_ty_1::sqe_op"] [::std::mem::offset_of!(io_uring_restriction__bindgen_ty_1, sqe_op) - 0usize]; ["Offset of field: io_uring_restriction__bindgen_ty_1::sqe_flags"] [::std::mem::offset_of!(io_uring_restriction__bindgen_ty_1, sqe_flags) - 0usize]; }; impl Default for io_uring_restriction__bindgen_ty_1 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_restriction"][::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_restriction"][::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_restriction::opcode"] [::std::mem::offset_of!(io_uring_restriction, opcode) - 0usize]; ["Offset of field: io_uring_restriction::resv"] [::std::mem::offset_of!(io_uring_restriction, resv) - 3usize]; ["Offset of field: io_uring_restriction::resv2"] [::std::mem::offset_of!(io_uring_restriction, resv2) - 4usize]; }; impl Default for io_uring_restriction { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_clock_register { pub clockid: __u32, pub __resv: [__u32; 3usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_clock_register"][::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_clock_register"] [::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_clock_register::clockid"] [::std::mem::offset_of!(io_uring_clock_register, clockid) - 0usize]; ["Offset of field: io_uring_clock_register::__resv"] [::std::mem::offset_of!(io_uring_clock_register, __resv) - 4usize]; }; pub mod _bindgen_ty_3 { pub type Type = ::std::os::raw::c_uint; pub const IORING_REGISTER_SRC_REGISTERED: Type = 1; pub const IORING_REGISTER_DST_REPLACE: Type = 2; } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_clone_buffers { pub src_fd: __u32, pub flags: __u32, pub src_off: __u32, pub dst_off: __u32, pub nr: __u32, pub pad: [__u32; 3usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_clone_buffers"][::std::mem::size_of::() - 32usize]; ["Alignment of io_uring_clone_buffers"] [::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_clone_buffers::src_fd"] [::std::mem::offset_of!(io_uring_clone_buffers, src_fd) - 0usize]; ["Offset of field: io_uring_clone_buffers::flags"] [::std::mem::offset_of!(io_uring_clone_buffers, flags) - 4usize]; ["Offset of field: io_uring_clone_buffers::src_off"] [::std::mem::offset_of!(io_uring_clone_buffers, src_off) - 8usize]; ["Offset of field: io_uring_clone_buffers::dst_off"] [::std::mem::offset_of!(io_uring_clone_buffers, dst_off) - 12usize]; ["Offset of field: io_uring_clone_buffers::nr"] [::std::mem::offset_of!(io_uring_clone_buffers, nr) - 16usize]; ["Offset of field: io_uring_clone_buffers::pad"] [::std::mem::offset_of!(io_uring_clone_buffers, pad) - 20usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_buf { pub addr: __u64, pub len: __u32, pub bid: __u16, pub resv: __u16, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_buf"][::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_buf"][::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_buf::addr"][::std::mem::offset_of!(io_uring_buf, addr) - 0usize]; ["Offset of field: io_uring_buf::len"][::std::mem::offset_of!(io_uring_buf, len) - 8usize]; ["Offset of field: io_uring_buf::bid"][::std::mem::offset_of!(io_uring_buf, bid) - 12usize]; ["Offset of field: io_uring_buf::resv"][::std::mem::offset_of!(io_uring_buf, resv) - 14usize]; }; #[repr(C)] pub struct io_uring_buf_ring { pub __bindgen_anon_1: io_uring_buf_ring__bindgen_ty_1, } #[repr(C)] pub struct io_uring_buf_ring__bindgen_ty_1 { pub __bindgen_anon_1: __BindgenUnionField, pub __bindgen_anon_2: __BindgenUnionField, pub bindgen_union_field: [u64; 2usize], } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1 { pub resv1: __u64, pub resv2: __u32, pub resv3: __u16, pub tail: __u16, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1"] [::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1::resv1"] [::std::mem::offset_of!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1, resv1) - 0usize]; ["Offset of field: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1::resv2"] [::std::mem::offset_of!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1, resv2) - 8usize]; ["Offset of field: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1::resv3"] [::std::mem::offset_of!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1, resv3) - 12usize]; ["Offset of field: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1::tail"] [::std::mem::offset_of!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_1, tail) - 14usize]; }; #[repr(C)] #[derive(Debug, Default)] pub struct io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2 { pub __empty_bufs: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2__bindgen_ty_1, pub bufs: __IncompleteArrayField, } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2__bindgen_ty_1 {} #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2__bindgen_ty_1"][::std::mem::size_of::< io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2__bindgen_ty_1, >() - 0usize]; ["Alignment of io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2__bindgen_ty_1"] [::std::mem::align_of::() - 1usize]; }; #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2"] [::std::mem::size_of::() - 0usize]; ["Alignment of io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2::__empty_bufs"][::std::mem::offset_of!( io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2, __empty_bufs ) - 0usize]; ["Offset of field: io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2::bufs"] [::std::mem::offset_of!(io_uring_buf_ring__bindgen_ty_1__bindgen_ty_2, bufs) - 0usize]; }; #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_buf_ring__bindgen_ty_1"] [::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_buf_ring__bindgen_ty_1"] [::std::mem::align_of::() - 8usize]; }; impl Default for io_uring_buf_ring__bindgen_ty_1 { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_buf_ring"][::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_buf_ring"][::std::mem::align_of::() - 8usize]; }; impl Default for io_uring_buf_ring { fn default() -> Self { let mut s = ::std::mem::MaybeUninit::::uninit(); unsafe { ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1); s.assume_init() } } } pub mod io_uring_register_pbuf_ring_flags { pub type Type = ::std::os::raw::c_uint; pub const IOU_PBUF_RING_MMAP: Type = 1; pub const IOU_PBUF_RING_INC: Type = 2; } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_buf_reg { pub ring_addr: __u64, pub ring_entries: __u32, pub bgid: __u16, pub flags: __u16, pub resv: [__u64; 3usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_buf_reg"][::std::mem::size_of::() - 40usize]; ["Alignment of io_uring_buf_reg"][::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_buf_reg::ring_addr"] [::std::mem::offset_of!(io_uring_buf_reg, ring_addr) - 0usize]; ["Offset of field: io_uring_buf_reg::ring_entries"] [::std::mem::offset_of!(io_uring_buf_reg, ring_entries) - 8usize]; ["Offset of field: io_uring_buf_reg::bgid"] [::std::mem::offset_of!(io_uring_buf_reg, bgid) - 12usize]; ["Offset of field: io_uring_buf_reg::flags"] [::std::mem::offset_of!(io_uring_buf_reg, flags) - 14usize]; ["Offset of field: io_uring_buf_reg::resv"] [::std::mem::offset_of!(io_uring_buf_reg, resv) - 16usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_buf_status { pub buf_group: __u32, pub head: __u32, pub resv: [__u32; 8usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_buf_status"][::std::mem::size_of::() - 40usize]; ["Alignment of io_uring_buf_status"][::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_buf_status::buf_group"] [::std::mem::offset_of!(io_uring_buf_status, buf_group) - 0usize]; ["Offset of field: io_uring_buf_status::head"] [::std::mem::offset_of!(io_uring_buf_status, head) - 4usize]; ["Offset of field: io_uring_buf_status::resv"] [::std::mem::offset_of!(io_uring_buf_status, resv) - 8usize]; }; pub mod io_uring_napi_op { pub type Type = ::std::os::raw::c_uint; pub const IO_URING_NAPI_REGISTER_OP: Type = 0; pub const IO_URING_NAPI_STATIC_ADD_ID: Type = 1; pub const IO_URING_NAPI_STATIC_DEL_ID: Type = 2; } pub mod io_uring_napi_tracking_strategy { pub type Type = ::std::os::raw::c_uint; pub const IO_URING_NAPI_TRACKING_DYNAMIC: Type = 0; pub const IO_URING_NAPI_TRACKING_STATIC: Type = 1; pub const IO_URING_NAPI_TRACKING_INACTIVE: Type = 255; } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_napi { pub busy_poll_to: __u32, pub prefer_busy_poll: __u8, pub opcode: __u8, pub pad: [__u8; 2usize], pub op_param: __u32, pub resv: __u32, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_napi"][::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_napi"][::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_napi::busy_poll_to"] [::std::mem::offset_of!(io_uring_napi, busy_poll_to) - 0usize]; ["Offset of field: io_uring_napi::prefer_busy_poll"] [::std::mem::offset_of!(io_uring_napi, prefer_busy_poll) - 4usize]; ["Offset of field: io_uring_napi::opcode"] [::std::mem::offset_of!(io_uring_napi, opcode) - 5usize]; ["Offset of field: io_uring_napi::pad"][::std::mem::offset_of!(io_uring_napi, pad) - 6usize]; ["Offset of field: io_uring_napi::op_param"] [::std::mem::offset_of!(io_uring_napi, op_param) - 8usize]; ["Offset of field: io_uring_napi::resv"][::std::mem::offset_of!(io_uring_napi, resv) - 12usize]; }; pub mod io_uring_register_restriction_op { pub type Type = ::std::os::raw::c_uint; pub const IORING_RESTRICTION_REGISTER_OP: Type = 0; pub const IORING_RESTRICTION_SQE_OP: Type = 1; pub const IORING_RESTRICTION_SQE_FLAGS_ALLOWED: Type = 2; pub const IORING_RESTRICTION_SQE_FLAGS_REQUIRED: Type = 3; pub const IORING_RESTRICTION_LAST: Type = 4; } pub mod _bindgen_ty_4 { pub type Type = ::std::os::raw::c_uint; pub const IORING_REG_WAIT_TS: Type = 1; } #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_reg_wait { pub ts: __kernel_timespec, pub min_wait_usec: __u32, pub flags: __u32, pub sigmask: __u64, pub sigmask_sz: __u32, pub pad: [__u32; 3usize], pub pad2: [__u64; 2usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_reg_wait"][::std::mem::size_of::() - 64usize]; ["Alignment of io_uring_reg_wait"][::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_reg_wait::ts"] [::std::mem::offset_of!(io_uring_reg_wait, ts) - 0usize]; ["Offset of field: io_uring_reg_wait::min_wait_usec"] [::std::mem::offset_of!(io_uring_reg_wait, min_wait_usec) - 16usize]; ["Offset of field: io_uring_reg_wait::flags"] [::std::mem::offset_of!(io_uring_reg_wait, flags) - 20usize]; ["Offset of field: io_uring_reg_wait::sigmask"] [::std::mem::offset_of!(io_uring_reg_wait, sigmask) - 24usize]; ["Offset of field: io_uring_reg_wait::sigmask_sz"] [::std::mem::offset_of!(io_uring_reg_wait, sigmask_sz) - 32usize]; ["Offset of field: io_uring_reg_wait::pad"] [::std::mem::offset_of!(io_uring_reg_wait, pad) - 36usize]; ["Offset of field: io_uring_reg_wait::pad2"] [::std::mem::offset_of!(io_uring_reg_wait, pad2) - 48usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_getevents_arg { pub sigmask: __u64, pub sigmask_sz: __u32, pub min_wait_usec: __u32, pub ts: __u64, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_getevents_arg"][::std::mem::size_of::() - 24usize]; ["Alignment of io_uring_getevents_arg"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_getevents_arg::sigmask"] [::std::mem::offset_of!(io_uring_getevents_arg, sigmask) - 0usize]; ["Offset of field: io_uring_getevents_arg::sigmask_sz"] [::std::mem::offset_of!(io_uring_getevents_arg, sigmask_sz) - 8usize]; ["Offset of field: io_uring_getevents_arg::min_wait_usec"] [::std::mem::offset_of!(io_uring_getevents_arg, min_wait_usec) - 12usize]; ["Offset of field: io_uring_getevents_arg::ts"] [::std::mem::offset_of!(io_uring_getevents_arg, ts) - 16usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_sync_cancel_reg { pub addr: __u64, pub fd: __s32, pub flags: __u32, pub timeout: __kernel_timespec, pub opcode: __u8, pub pad: [__u8; 7usize], pub pad2: [__u64; 3usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_sync_cancel_reg"] [::std::mem::size_of::() - 64usize]; ["Alignment of io_uring_sync_cancel_reg"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_sync_cancel_reg::addr"] [::std::mem::offset_of!(io_uring_sync_cancel_reg, addr) - 0usize]; ["Offset of field: io_uring_sync_cancel_reg::fd"] [::std::mem::offset_of!(io_uring_sync_cancel_reg, fd) - 8usize]; ["Offset of field: io_uring_sync_cancel_reg::flags"] [::std::mem::offset_of!(io_uring_sync_cancel_reg, flags) - 12usize]; ["Offset of field: io_uring_sync_cancel_reg::timeout"] [::std::mem::offset_of!(io_uring_sync_cancel_reg, timeout) - 16usize]; ["Offset of field: io_uring_sync_cancel_reg::opcode"] [::std::mem::offset_of!(io_uring_sync_cancel_reg, opcode) - 32usize]; ["Offset of field: io_uring_sync_cancel_reg::pad"] [::std::mem::offset_of!(io_uring_sync_cancel_reg, pad) - 33usize]; ["Offset of field: io_uring_sync_cancel_reg::pad2"] [::std::mem::offset_of!(io_uring_sync_cancel_reg, pad2) - 40usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_file_index_range { pub off: __u32, pub len: __u32, pub resv: __u64, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_file_index_range"] [::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_file_index_range"] [::std::mem::align_of::() - 8usize]; ["Offset of field: io_uring_file_index_range::off"] [::std::mem::offset_of!(io_uring_file_index_range, off) - 0usize]; ["Offset of field: io_uring_file_index_range::len"] [::std::mem::offset_of!(io_uring_file_index_range, len) - 4usize]; ["Offset of field: io_uring_file_index_range::resv"] [::std::mem::offset_of!(io_uring_file_index_range, resv) - 8usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] pub struct io_uring_recvmsg_out { pub namelen: __u32, pub controllen: __u32, pub payloadlen: __u32, pub flags: __u32, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { ["Size of io_uring_recvmsg_out"][::std::mem::size_of::() - 16usize]; ["Alignment of io_uring_recvmsg_out"][::std::mem::align_of::() - 4usize]; ["Offset of field: io_uring_recvmsg_out::namelen"] [::std::mem::offset_of!(io_uring_recvmsg_out, namelen) - 0usize]; ["Offset of field: io_uring_recvmsg_out::controllen"] [::std::mem::offset_of!(io_uring_recvmsg_out, controllen) - 4usize]; ["Offset of field: io_uring_recvmsg_out::payloadlen"] [::std::mem::offset_of!(io_uring_recvmsg_out, payloadlen) - 8usize]; ["Offset of field: io_uring_recvmsg_out::flags"] [::std::mem::offset_of!(io_uring_recvmsg_out, flags) - 12usize]; }; pub mod io_uring_socket_op { pub type Type = ::std::os::raw::c_uint; pub const SOCKET_URING_OP_SIOCINQ: Type = 0; pub const SOCKET_URING_OP_SIOCOUTQ: Type = 1; pub const SOCKET_URING_OP_GETSOCKOPT: Type = 2; pub const SOCKET_URING_OP_SETSOCKOPT: Type = 3; } ================================================ FILE: src/vmm/src/io_uring/mod.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 mod generated; pub mod operation; mod probe; mod queue; pub mod restriction; use std::collections::HashSet; use std::fmt::Debug; use std::fs::File; use std::io::Error as IOError; use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; use generated::io_uring_params; use operation::{Cqe, FixedFd, OpCode, Operation}; use probe::{PROBE_LEN, ProbeWrapper}; pub use queue::completion::CQueueError; use queue::completion::CompletionQueue; pub use queue::submission::SQueueError; use queue::submission::SubmissionQueue; use restriction::Restriction; use vmm_sys_util::syscall::SyscallReturnCode; use crate::io_uring::generated::io_uring_register_op; // IO_uring operations that we require to be supported by the host kernel. const REQUIRED_OPS: [OpCode; 2] = [OpCode::Read, OpCode::Write]; // Taken from linux/fs/io_uring.c const IORING_MAX_FIXED_FILES: usize = 1 << 15; #[derive(Debug, thiserror::Error, displaydoc::Display)] /// IoUring Error. pub enum IoUringError { /// Error originating in the completion queue: {0} CQueue(CQueueError), /// Could not enable the ring: {0} Enable(IOError), /// A FamStructWrapper operation has failed: {0} Fam(vmm_sys_util::fam::Error), /// The number of ops in the ring is >= CQ::count FullCQueue, /// Fd was not registered: {0} InvalidFixedFd(FixedFd), /// There are no registered fds. NoRegisteredFds, /// Error probing the io_uring subsystem: {0} Probe(IOError), /// Could not register eventfd: {0} RegisterEventfd(IOError), /// Could not register file: {0} RegisterFile(IOError), /// Attempted to register too many files. RegisterFileLimitExceeded, /// Could not register restrictions: {0} RegisterRestrictions(IOError), /// Error calling io_uring_setup: {0} Setup(IOError), /// Error originating in the submission queue: {0} SQueue(SQueueError), /// Required feature is not supported on the host kernel: {0} UnsupportedFeature(&'static str), /// Required operation is not supported on the host kernel: {0} UnsupportedOperation(&'static str), } impl IoUringError { /// Return true if this error is caused by a full submission or completion queue. pub fn is_throttling_err(&self) -> bool { matches!( self, Self::FullCQueue | Self::SQueue(SQueueError::FullQueue) ) } } /// Main object representing an io_uring instance. #[derive(Debug)] pub struct IoUring { registered_fds_count: u32, squeue: SubmissionQueue, cqueue: CompletionQueue, // Make sure the fd is declared after the queues, so that it isn't dropped before them. // If we drop the queues after the File, the associated kernel mem will never be freed. // The correct cleanup order is munmap(rings) -> close(fd). // We don't need to manually drop the fields in order,since Rust has a well defined drop order. fd: File, // The total number of ops. These includes the ops on the submission queue, the in-flight ops // and the ops that are in the CQ, but haven't been popped yet. num_ops: u32, slab: slab::Slab, } impl IoUring { /// Create a new instance. /// /// # Arguments /// /// * `num_entries` - Requested number of entries in the ring. Will be rounded up to the nearest /// power of two. /// * `files` - Files to be registered for IO. /// * `restrictions` - Vector of [`Restriction`](restriction/enum.Restriction.html)s /// * `eventfd` - Optional eventfd for receiving completion notifications. pub fn new( num_entries: u32, files: Vec<&File>, restrictions: Vec, eventfd: Option, ) -> Result { let mut params = io_uring_params { // Create the ring as disabled, so that we may register restrictions. flags: generated::IORING_SETUP_R_DISABLED, ..Default::default() }; // SAFETY: Safe because values are valid and we check the return value. let fd = SyscallReturnCode(unsafe { libc::syscall( libc::SYS_io_uring_setup, num_entries, &mut params as *mut io_uring_params, ) }) .into_result() .map_err(IoUringError::Setup)?; // Safe to unwrap because the fd is valid. let fd = RawFd::try_from(fd).unwrap(); // SAFETY: Safe because the fd is valid and because this struct owns the fd. let file = unsafe { File::from_raw_fd(fd) }; Self::check_features(params)?; let squeue = SubmissionQueue::new(fd, ¶ms).map_err(IoUringError::SQueue)?; let cqueue = CompletionQueue::new(fd, ¶ms).map_err(IoUringError::CQueue)?; let slab = slab::Slab::with_capacity(params.sq_entries as usize + params.cq_entries as usize); let mut instance = Self { squeue, cqueue, fd: file, registered_fds_count: 0, num_ops: 0, slab, }; instance.check_operations()?; if let Some(eventfd) = eventfd { instance.register_eventfd(eventfd)?; } instance.register_restrictions(restrictions)?; instance.register_files(files)?; instance.enable()?; Ok(instance) } /// Push an [`Operation`](operation/struct.Operation.html) onto the submission queue. pub fn push(&mut self, op: Operation) -> Result<(), (IoUringError, T)> { // validate that we actually did register fds let fd = op.fd(); match self.registered_fds_count { 0 => Err((IoUringError::NoRegisteredFds, op.user_data)), len if fd >= len => Err((IoUringError::InvalidFixedFd(fd), op.user_data)), _ => { if self.num_ops >= self.cqueue.count() { return Err((IoUringError::FullCQueue, op.user_data)); } self.squeue .push(op.into_sqe(&mut self.slab)) .inspect(|_| { // This is safe since self.num_ops < IORING_MAX_CQ_ENTRIES (65536) self.num_ops += 1; }) .map_err(|(sqe_err, user_data_key)| -> (IoUringError, T) { ( IoUringError::SQueue(sqe_err), // We don't use slab.try_remove here for 2 reasons: // 1. user_data was inserted in slab with step `op.into_sqe` just // before the push op so the user_data key should be valid and if // key is valid then `slab.remove()` will not fail. // 2. If we use `slab.try_remove()` we'll have to find a way to return // a default value for the generic type T which is difficult because // it expands to more crates which don't make it easy to define a // default/clone type for type T. // So believing that `slab.remove` won't fail we don't use // the `slab.try_remove` method. #[allow(clippy::cast_possible_truncation)] self.slab.remove(user_data_key as usize), ) }) } } } /// Pop a completed entry off the completion queue. Returns `Ok(None)` if there are no entries. /// The type `T` must be the same as the `user_data` type used for `push`-ing the operation. pub fn pop(&mut self) -> Result>, IoUringError> { self.cqueue .pop(&mut self.slab) .map(|maybe_cqe| { maybe_cqe.inspect(|_| { // This is safe since the pop-ed CQEs have been previously pushed. However // we use a saturating_sub for extra safety. self.num_ops = self.num_ops.saturating_sub(1); }) }) .map_err(IoUringError::CQueue) } fn do_submit(&mut self, min_complete: u32) -> Result { self.squeue .submit(min_complete) .map_err(IoUringError::SQueue) } /// Submit all operations but don't wait for any completions. pub fn submit(&mut self) -> Result { self.do_submit(0) } /// Submit all operations and wait for their completion. pub fn submit_and_wait_all(&mut self) -> Result { self.do_submit(self.num_ops) } /// Return the number of operations currently on the submission queue. pub fn pending_sqes(&self) -> Result { self.squeue.pending().map_err(IoUringError::SQueue) } /// A total of the number of ops in the submission and completion queues, as well as the /// in-flight ops. pub fn num_ops(&self) -> u32 { self.num_ops } fn enable(&mut self) -> Result<(), IoUringError> { // SAFETY: Safe because values are valid and we check the return value. SyscallReturnCode(unsafe { libc::syscall( libc::SYS_io_uring_register, self.fd.as_raw_fd(), io_uring_register_op::IORING_REGISTER_ENABLE_RINGS, std::ptr::null::(), 0, ) }) .into_empty_result() .map_err(IoUringError::Enable) } fn register_files(&mut self, files: Vec<&File>) -> Result<(), IoUringError> { if files.is_empty() { // No-op. return Ok(()); } if (self.registered_fds_count as usize).saturating_add(files.len()) > IORING_MAX_FIXED_FILES { return Err(IoUringError::RegisterFileLimitExceeded); } // SAFETY: Safe because values are valid and we check the return value. SyscallReturnCode(unsafe { libc::syscall( libc::SYS_io_uring_register, self.fd.as_raw_fd(), io_uring_register_op::IORING_REGISTER_FILES, files .iter() .map(|f| f.as_raw_fd()) .collect::>() .as_mut_slice() .as_mut_ptr() as *const _, files.len(), ) }) .into_empty_result() .map_err(IoUringError::RegisterFile)?; // Safe to truncate since files.len() < IORING_MAX_FIXED_FILES self.registered_fds_count += u32::try_from(files.len()).unwrap(); Ok(()) } fn register_eventfd(&self, fd: RawFd) -> Result<(), IoUringError> { // SAFETY: Safe because values are valid and we check the return value. SyscallReturnCode(unsafe { libc::syscall( libc::SYS_io_uring_register, self.fd.as_raw_fd(), io_uring_register_op::IORING_REGISTER_EVENTFD, (&fd) as *const _, 1, ) }) .into_empty_result() .map_err(IoUringError::RegisterEventfd) } fn register_restrictions(&self, restrictions: Vec) -> Result<(), IoUringError> { if restrictions.is_empty() { // No-op. return Ok(()); } // SAFETY: Safe because values are valid and we check the return value. SyscallReturnCode(unsafe { libc::syscall( libc::SYS_io_uring_register, self.fd.as_raw_fd(), io_uring_register_op::IORING_REGISTER_RESTRICTIONS, restrictions .iter() .map(generated::io_uring_restriction::from) .collect::>() .as_mut_slice() .as_mut_ptr(), restrictions.len(), ) }) .into_empty_result() .map_err(IoUringError::RegisterRestrictions) } fn check_features(params: io_uring_params) -> Result<(), IoUringError> { // We require that the host kernel will never drop completed entries due to an (unlikely) // overflow in the completion queue. // This feature is supported for kernels greater than 5.7. // An alternative fix would be to keep an internal counter that tracks the number of // submitted entries that haven't been completed and makes sure it doesn't exceed // (2 * num_entries). if (params.features & generated::IORING_FEAT_NODROP) == 0 { return Err(IoUringError::UnsupportedFeature("IORING_FEAT_NODROP")); } Ok(()) } fn check_operations(&self) -> Result<(), IoUringError> { let mut probes = ProbeWrapper::new(PROBE_LEN).map_err(IoUringError::Fam)?; // SAFETY: Safe because values are valid and we check the return value. SyscallReturnCode(unsafe { libc::syscall( libc::SYS_io_uring_register, self.fd.as_raw_fd(), io_uring_register_op::IORING_REGISTER_PROBE, probes.as_mut_fam_struct_ptr(), PROBE_LEN, ) }) .into_empty_result() .map_err(IoUringError::Probe)?; let supported_opcodes: HashSet = probes .as_slice() .iter() .filter(|op| ((u32::from(op.flags)) & generated::IO_URING_OP_SUPPORTED) != 0) .map(|op| op.op) .collect(); for opcode in REQUIRED_OPS.iter() { if !supported_opcodes.contains(&(*opcode as u8)) { return Err(IoUringError::UnsupportedOperation((*opcode).into())); } } Ok(()) } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::os::unix::fs::FileExt; use proptest::prelude::*; use proptest::strategy::Strategy; use proptest::test_runner::{Config, TestRunner}; use vm_memory::VolatileMemory; use vmm_sys_util::syscall::SyscallReturnCode; use vmm_sys_util::tempfile::TempFile; /// ------------------------------------- /// BEGIN PROPERTY BASED TESTING use super::*; use crate::vstate::memory::{Bytes, MmapRegion}; fn drain_cqueue(ring: &mut IoUring) { while let Some(entry) = ring.pop().unwrap() { entry.result().unwrap(); // Assert that there were no partial writes. let count = entry.result().unwrap(); let user_data = entry.user_data(); assert_eq!(count, user_data); } } fn setup_mem_region(len: usize) -> MmapRegion { const PROT: i32 = libc::PROT_READ | libc::PROT_WRITE; const FLAGS: i32 = libc::MAP_ANONYMOUS | libc::MAP_PRIVATE; let ptr = unsafe { libc::mmap(std::ptr::null_mut(), len, PROT, FLAGS, -1, 0) }; if (ptr as isize) < 0 { panic!("Mmap failed with {}", std::io::Error::last_os_error()); } unsafe { // Use the raw version because we want to unmap memory ourselves. MmapRegion::build_raw(ptr.cast::(), len, PROT, FLAGS).unwrap() } } fn free_mem_region(region: MmapRegion) { unsafe { libc::munmap(region.as_ptr().cast::(), region.len()) }; } fn read_entire_mem_region(region: &MmapRegion) -> Vec { let mut result = vec![0u8; region.len()]; let count = region.as_volatile_slice().read(&mut result[..], 0).unwrap(); assert_eq!(count, region.len()); result } #[allow(clippy::let_with_type_underscore)] fn arbitrary_rw_operation(file_len: u32) -> impl Strategy> { ( // OpCode: 0 -> Write, 1 -> Read. 0..2, // Length of the operation. 0u32..file_len, ) .prop_flat_map(move |(op, len)| { ( // op Just(op), // len Just(len), // offset (0u32..(file_len - len)), // mem region offset (0u32..(file_len - len)), ) }) .prop_map(move |(op, len, off, mem_off)| { // We actually use an offset instead of an address, because we later need to modify // the memory region on which the operation is performed, based on the opcode. let mut operation = match op { 0 => Operation::write(0, mem_off as usize, len, off.into(), len), _ => Operation::read(0, mem_off as usize, len, off.into(), len), }; // Make sure the operations are executed in-order, so that they are equivalent to // their sync counterparts. operation.set_linked(); operation }) } #[test] fn proptest_read_write_correctness() { // Performs a sequence of random read and write operations on two files, with sync and // async IO, respectively. // Verifies that the files are identical afterwards and that the read operations returned // the same values. const FILE_LEN: u32 = 1024; // The number of arbitrary operations in a testrun. const OPS_COUNT: usize = 2000; const RING_SIZE: u32 = 128; // Allocate and init memory for holding the data that will be written into the file. let write_mem_region = setup_mem_region(FILE_LEN as usize); let sync_read_mem_region = setup_mem_region(FILE_LEN as usize); let async_read_mem_region = setup_mem_region(FILE_LEN as usize); // Init the write buffers with 0,1,2,... for i in 0..FILE_LEN { write_mem_region .as_volatile_slice() .write_obj(u8::try_from(i % u32::from(u8::MAX)).unwrap(), i as usize) .unwrap(); } // Create two files and init their contents to zeros. let init_contents = [0u8; FILE_LEN as usize]; let file_async = TempFile::new().unwrap().into_file(); file_async.write_all_at(&init_contents, 0).unwrap(); let file_sync = TempFile::new().unwrap().into_file(); file_sync.write_all_at(&init_contents, 0).unwrap(); // Create a custom test runner since we had to add some state buildup to the test. // (Referring to the above initializations). let mut runner = TestRunner::new(Config { #[cfg(target_arch = "x86_64")] cases: 1000, // Should run for about a minute. // Lower the cases on ARM since they take longer and cause coverage test timeouts. #[cfg(target_arch = "aarch64")] cases: 500, ..Config::default() }); runner .run( &proptest::collection::vec(arbitrary_rw_operation(FILE_LEN), OPS_COUNT), |set| { let mut ring = IoUring::new(RING_SIZE, vec![&file_async], vec![], None).unwrap(); for mut operation in set { // Perform the sync op. let count = match operation.opcode { OpCode::Write => u32::try_from( SyscallReturnCode(unsafe { libc::pwrite( file_sync.as_raw_fd(), write_mem_region.as_ptr().add(operation.addr.unwrap()) as *const libc::c_void, operation.len.unwrap() as usize, i64::try_from(operation.offset.unwrap()).unwrap(), ) }) .into_result() .unwrap(), ) .unwrap(), OpCode::Read => u32::try_from( SyscallReturnCode(unsafe { libc::pread( file_sync.as_raw_fd(), sync_read_mem_region .as_ptr() .add(operation.addr.unwrap()) .cast::(), operation.len.unwrap() as usize, i64::try_from(operation.offset.unwrap()).unwrap(), ) }) .into_result() .unwrap(), ) .unwrap(), _ => unreachable!(), }; if count < operation.len.unwrap() { panic!("Synchronous partial operation: {:?}", operation); } // Perform the async op. // Modify the operation address based on the opcode. match operation.opcode { OpCode::Write => { operation.addr = Some(unsafe { write_mem_region.as_ptr().add(operation.addr.unwrap()) as usize }) } OpCode::Read => { operation.addr = Some(unsafe { async_read_mem_region.as_ptr().add(operation.addr.unwrap()) as usize }) } _ => unreachable!(), }; // If the ring is full, submit and wait. if ring.pending_sqes().unwrap() == RING_SIZE { ring.submit_and_wait_all().unwrap(); drain_cqueue(&mut ring); } ring.push(operation).unwrap(); } // Submit any left async ops and wait. ring.submit_and_wait_all().unwrap(); drain_cqueue(&mut ring); // Get the write result for async IO. let mut async_result = [0u8; FILE_LEN as usize]; file_async.read_exact_at(&mut async_result, 0).unwrap(); // Get the write result for sync IO. let mut sync_result = [0u8; FILE_LEN as usize]; file_sync.read_exact_at(&mut sync_result, 0).unwrap(); // Now compare the write results. assert_eq!(sync_result, async_result); // Now compare the read results for sync and async IO. assert_eq!( read_entire_mem_region(&sync_read_mem_region), read_entire_mem_region(&async_read_mem_region) ); Ok(()) }, ) .unwrap(); // Clean up the memory. free_mem_region(write_mem_region); free_mem_region(sync_read_mem_region); free_mem_region(async_read_mem_region); } } ================================================ FILE: src/vmm/src/io_uring/operation/cqe.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; use crate::io_uring::generated::io_uring_cqe; use crate::vstate::memory::ByteValued; // SAFETY: Struct is POD and contains no references or niches. unsafe impl ByteValued for io_uring_cqe {} /// Wrapper over a completed operation. #[derive(Debug)] pub struct Cqe { res: i32, user_data: T, } impl Cqe { /// Construct a Cqe object. pub fn new(res: i32, user_data: T) -> Self { Self { res, user_data } } /// Return the number of bytes successfully transferred by this operation. pub fn count(&self) -> u32 { u32::try_from(self.res).unwrap_or(0) } /// Return the result associated to the IO operation. pub fn result(&self) -> Result { let res = self.res; if res < 0 { Err(std::io::Error::from_raw_os_error(res)) } else { Ok(u32::try_from(self.res).unwrap()) } } /// Create a new Cqe, applying the passed function to the user_data. pub fn map_user_data U>(self, op: F) -> Cqe { Cqe { res: self.res, user_data: op(self.user_data()), } } /// Consume the object and return the user_data. pub fn user_data(self) -> T { self.user_data } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use super::*; #[test] fn test_result() { // Check that `result()` returns an `Error` when `res` is negative. { let user_data = 10_u8; let cqe: Cqe = Cqe::new(-22, user_data); assert_eq!( cqe.result().unwrap_err().kind(), std::io::Error::from_raw_os_error(-22).kind() ); } // Check that `result()` returns Ok() when `res` is positive. { let user_data = 10_u8; let cqe: Cqe = Cqe::new(128, user_data); assert_eq!(cqe.result().unwrap(), 128); } } #[test] fn test_user_data() { let user_data = 10_u8; let cqe: Cqe = Cqe::new(0, user_data); assert_eq!(cqe.user_data(), 10); } #[test] fn test_map_user_data() { let user_data = 10_u8; let cqe: Cqe = Cqe::new(0, user_data); let cqe = cqe.map_user_data(|x| x + 1); assert_eq!(cqe.user_data(), 11); } } ================================================ FILE: src/vmm/src/io_uring/operation/mod.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Module exposing data structures for working with io_uring operations. mod cqe; mod sqe; use std::convert::From; use std::fmt::{self, Debug}; pub use cqe::Cqe; pub(crate) use sqe::Sqe; use crate::io_uring::generated::{io_uring_op, io_uring_sqe, io_uring_sqe_flags_bit}; /// The index of a registered fd. pub type FixedFd = u32; #[repr(u8)] #[derive(Debug, Clone, Copy)] // These constants are generated as u32, but we use u8; const try_from() is unstable #[allow(clippy::cast_possible_truncation)] /// Supported operation types. pub enum OpCode { /// Read operation. Read = io_uring_op::IORING_OP_READ as u8, /// Write operation. Write = io_uring_op::IORING_OP_WRITE as u8, /// Fsync operation. Fsync = io_uring_op::IORING_OP_FSYNC as u8, } // Useful for outputting errors. impl From for &'static str { fn from(opcode: OpCode) -> Self { match opcode { OpCode::Read => "read", OpCode::Write => "write", OpCode::Fsync => "fsync", } } } /// Operation type for populating the submission queue, parametrised with the `user_data` type `T`. /// The `user_data` is used for identifying the operation once completed. pub struct Operation { fd: FixedFd, pub(crate) opcode: OpCode, pub(crate) addr: Option, pub(crate) len: Option, flags: u8, pub(crate) offset: Option, pub(crate) user_data: T, } // Needed for proptesting. impl fmt::Debug for Operation { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, " Operation {{ opcode: {:?}, addr: {:?}, len: {:?}, offset: {:?}, }} ", self.opcode, self.addr, self.len, self.offset ) } } #[allow(clippy::len_without_is_empty)] impl Operation { /// Construct a read operation. pub fn read(fd: FixedFd, addr: usize, len: u32, offset: u64, user_data: T) -> Self { Self { fd, opcode: OpCode::Read, addr: Some(addr), len: Some(len), flags: 0, offset: Some(offset), user_data, } } /// Construct a write operation. pub fn write(fd: FixedFd, addr: usize, len: u32, offset: u64, user_data: T) -> Self { Self { fd, opcode: OpCode::Write, addr: Some(addr), len: Some(len), flags: 0, offset: Some(offset), user_data, } } /// Construct a fsync operation. pub fn fsync(fd: FixedFd, user_data: T) -> Self { Self { fd, opcode: OpCode::Fsync, addr: None, len: None, flags: 0, offset: None, user_data, } } pub(crate) fn fd(&self) -> FixedFd { self.fd } // Needed for proptesting. #[cfg(test)] pub(crate) fn set_linked(&mut self) { self.flags |= 1 << io_uring_sqe_flags_bit::IOSQE_IO_LINK_BIT; } /// Transform the operation into an `Sqe`. /// Note: remember remove user_data from slab or it will leak. pub(crate) fn into_sqe(self, slab: &mut slab::Slab) -> Sqe { // SAFETY: // Safe because all-zero value is valid. The sqe is made up of integers and raw pointers. let mut inner: io_uring_sqe = unsafe { std::mem::zeroed() }; inner.opcode = self.opcode as u8; inner.fd = i32::try_from(self.fd).unwrap(); // Simplifying assumption that we only used pre-registered FDs. inner.flags = self.flags | (1 << io_uring_sqe_flags_bit::IOSQE_FIXED_FILE_BIT); if let Some(addr) = self.addr { inner.__bindgen_anon_2.addr = addr as u64; } if let Some(len) = self.len { inner.len = len; } if let Some(offset) = self.offset { inner.__bindgen_anon_1.off = offset; } inner.user_data = slab.insert(self.user_data) as u64; Sqe::new(inner) } } ================================================ FILE: src/vmm/src/io_uring/operation/sqe.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::{self}; use crate::io_uring::generated::io_uring_sqe; use crate::vstate::memory::ByteValued; // SAFETY: Struct is POD and contains no references or niches. unsafe impl ByteValued for io_uring_sqe {} /// Newtype wrapper over a raw sqe. pub(crate) struct Sqe(pub(crate) io_uring_sqe); impl fmt::Debug for Sqe { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Sqe").finish() } } impl Sqe { /// Construct a new sqe. pub(crate) fn new(inner: io_uring_sqe) -> Self { Self(inner) } /// Return the key to the `user_data` stored in slab. pub(crate) fn user_data(&self) -> u64 { self.0.user_data } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use super::*; #[test] fn test_user_data() { let user_data = 10_u64; let mut inner: io_uring_sqe = unsafe { std::mem::zeroed() }; inner.user_data = user_data; let sqe: Sqe = Sqe::new(inner); assert_eq!(sqe.user_data(), 10); } } ================================================ FILE: src/vmm/src/io_uring/probe.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use vmm_sys_util::fam::{FamStruct, FamStructWrapper}; use vmm_sys_util::generate_fam_struct_impl; use crate::io_uring::generated::{io_uring_probe, io_uring_probe_op}; // There is no max for the number of operations returned by probing. So we fallback to using the // number of values representable in a u8; pub(crate) const PROBE_LEN: usize = u8::MAX as usize + 1; generate_fam_struct_impl!( io_uring_probe, io_uring_probe_op, ops, u8, ops_len, PROBE_LEN ); pub(crate) type ProbeWrapper = FamStructWrapper; ================================================ FILE: src/vmm/src/io_uring/queue/completion.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; use std::num::Wrapping; use std::os::unix::io::RawFd; use std::sync::atomic::Ordering; use vm_memory::{Bytes, VolatileMemory, VolatileMemoryError}; use super::mmap::{MmapError, mmap}; use crate::io_uring::generated; use crate::io_uring::operation::Cqe; use crate::vstate::memory::MmapRegion; #[derive(Debug, thiserror::Error, displaydoc::Display)] /// CQueue Error. pub enum CQueueError { /// Error mapping the ring: {0} Mmap(#[from] MmapError), /// Error reading/writing volatile memory: {0} VolatileMemory(#[from] VolatileMemoryError), /// Error in removing data from the slab SlabRemoveFailed, } #[derive(Debug)] pub(crate) struct CompletionQueue { // Offsets. head_off: usize, tail_off: usize, cqes_off: usize, // Cached values. unmasked_head: Wrapping, count: u32, ring_mask: u32, // Mmap-ed cqes ring. cqes: MmapRegion, } impl CompletionQueue { pub(crate) fn new( io_uring_fd: RawFd, params: &generated::io_uring_params, ) -> Result { let offsets = params.cq_off; // Map the CQ_ring. The actual size of the ring is `num_entries * size_of(entry_type)`. // To this we add an offset as per the io_uring specifications. let ring_size = (params.cq_off.cqes as usize) + (params.cq_entries as usize) * std::mem::size_of::(); let cqes = mmap(ring_size, io_uring_fd, generated::IORING_OFF_CQ_RING.into())?; let ring = cqes.as_volatile_slice(); let ring_mask = ring.read_obj(offsets.ring_mask as usize)?; Ok(Self { // safe because it's an u32 offset head_off: offsets.head as usize, // safe because it's an u32 offset tail_off: offsets.tail as usize, // safe because it's an u32 offset cqes_off: offsets.cqes as usize, // We can init this to 0 and cache it because we are the only ones modifying it. unmasked_head: Wrapping(0), count: params.cq_entries, ring_mask, cqes, }) } pub(crate) fn count(&self) -> u32 { self.count } pub(crate) fn pop( &mut self, slab: &mut slab::Slab, ) -> Result>, CQueueError> { let ring = self.cqes.as_volatile_slice(); // get the head & tail let head = self.unmasked_head.0 & self.ring_mask; let unmasked_tail = ring.load::(self.tail_off, Ordering::Acquire)?; // validate that we have smth to fetch if Wrapping(unmasked_tail) - self.unmasked_head > Wrapping(0) { let cqe: generated::io_uring_cqe = ring.read_obj( self.cqes_off + (head as usize) * std::mem::size_of::(), )?; // increase the head self.unmasked_head += Wrapping(1u32); ring.store(self.unmasked_head.0, self.head_off, Ordering::Release)?; let res = cqe.res; #[allow(clippy::cast_possible_truncation)] let index = cqe.user_data as usize; match slab.try_remove(index) { Some(user_data) => Ok(Some(Cqe::new(res, user_data))), None => Err(CQueueError::SlabRemoveFailed), } } else { Ok(None) } } } impl Drop for CompletionQueue { fn drop(&mut self) { // SAFETY: Safe because parameters are valid. unsafe { libc::munmap(self.cqes.as_ptr().cast::(), self.cqes.size()) }; } } ================================================ FILE: src/vmm/src/io_uring/queue/mmap.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::io::Error as IOError; use std::os::unix::io::RawFd; use vm_memory::mmap::MmapRegionError; use crate::vstate::memory::MmapRegion; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MmapError { /// Os: {0} Os(IOError), /// BuildMmapRegion: {0} BuildMmapRegion(MmapRegionError), } pub(crate) fn mmap(size: usize, fd: RawFd, offset: i64) -> Result { let prot = libc::PROT_READ | libc::PROT_WRITE; let flags = libc::MAP_SHARED | libc::MAP_POPULATE; // SAFETY: Safe because values are valid and we check the return value. let ptr = unsafe { libc::mmap(std::ptr::null_mut(), size, prot, flags, fd, offset) }; if (ptr as isize) < 0 { return Err(MmapError::Os(IOError::last_os_error())); } // SAFETY: Safe because the mmap did not return error. unsafe { MmapRegion::build_raw(ptr.cast::(), size, prot, flags) .map_err(MmapError::BuildMmapRegion) } } ================================================ FILE: src/vmm/src/io_uring/queue/mod.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 pub mod completion; mod mmap; pub mod submission; ================================================ FILE: src/vmm/src/io_uring/queue/submission.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; use std::io::Error as IOError; use std::mem; use std::num::Wrapping; use std::os::unix::io::RawFd; use std::sync::atomic::Ordering; use vm_memory::{VolatileMemory, VolatileMemoryError}; use vmm_sys_util::syscall::SyscallReturnCode; use super::mmap::{MmapError, mmap}; use crate::io_uring::generated; use crate::io_uring::operation::Sqe; use crate::vstate::memory::{Bytes, MmapRegion}; #[derive(Debug, thiserror::Error, displaydoc::Display)] /// SQueue Error. pub enum SQueueError { /// The queue is full. FullQueue, /// Error mapping the ring: {0} Mmap(#[from] MmapError), /// Error reading/writing volatile memory: {0} VolatileMemory(#[from] VolatileMemoryError), /// Error returned by `io_uring_enter`: {0} Submit(#[from] IOError), } #[derive(Debug)] pub(crate) struct SubmissionQueue { io_uring_fd: RawFd, // Offsets. head_off: usize, tail_off: usize, // Cached values. ring_mask: u32, count: u32, unmasked_tail: Wrapping, // Mmap-ed ring. ring: MmapRegion, // Mmap-ed sqes. sqes: MmapRegion, // Number of ops yet to be submitted. to_submit: u32, } impl SubmissionQueue { pub(crate) fn new( io_uring_fd: RawFd, params: &generated::io_uring_params, ) -> Result { let (ring, sqes) = Self::mmap(io_uring_fd, params)?; let ring_slice = ring.as_volatile_slice(); // since we don't need the extra layer of indirection, we can simply map the index array // to be array[i] = i; let sq_array = ring_slice.offset(params.sq_off.array as usize)?; for i in 0..params.sq_entries { sq_array.write_obj(i, mem::size_of::() * (i as usize))?; } let ring_mask = ring_slice.read_obj(params.sq_off.ring_mask as usize)?; Ok(Self { io_uring_fd, head_off: params.sq_off.head as usize, tail_off: params.sq_off.tail as usize, ring_mask, count: params.sq_entries, // We can init this to 0 and cache it because we are the only ones modifying it. unmasked_tail: Wrapping(0), ring, sqes, to_submit: 0, }) } pub(crate) fn push(&mut self, sqe: Sqe) -> Result<(), (SQueueError, u64)> { let ring_slice = self.ring.as_volatile_slice(); // get the sqe tail let tail = self.unmasked_tail.0 & self.ring_mask; // get the pending sqes let pending = match self.pending() { Ok(n) => n, Err(err) => return Err((err, sqe.user_data())), }; if pending >= self.count { return Err((SQueueError::FullQueue, sqe.user_data())); } // retrieve and populate the sqe if let Err(err) = self.sqes.as_volatile_slice().write_obj( sqe.0, (tail as usize) * mem::size_of::(), ) { return Err((SQueueError::VolatileMemory(err), sqe.user_data())); } // increment the sqe tail self.unmasked_tail += Wrapping(1u32); if let Err(err) = ring_slice.store(self.unmasked_tail.0, self.tail_off, Ordering::Release) { return Err((SQueueError::VolatileMemory(err), sqe.user_data())); } // This is safe since we already checked if there is enough space in the queue; self.to_submit += 1; Ok(()) } pub(crate) fn submit(&mut self, min_complete: u32) -> Result { if self.to_submit == 0 && min_complete == 0 { // Nothing to submit and nothing to wait for. return Ok(0); } let mut flags = 0; if min_complete > 0 { flags |= generated::IORING_ENTER_GETEVENTS; } // SAFETY: Safe because values are valid and we check the return value. let submitted = SyscallReturnCode(unsafe { libc::syscall( libc::SYS_io_uring_enter, self.io_uring_fd, self.to_submit, min_complete, flags, std::ptr::null::(), ) }) .into_result()?; // It's safe to convert to u32 since the syscall didn't return an error. let submitted = u32::try_from(submitted).unwrap(); // This is safe since submitted <= self.to_submit. However we use a saturating_sub // for extra safety. self.to_submit = self.to_submit.saturating_sub(submitted); Ok(submitted) } fn mmap( io_uring_fd: RawFd, params: &generated::io_uring_params, ) -> Result<(MmapRegion, MmapRegion), SQueueError> { // map the SQ_ring. The actual size of the ring is `num_entries * size_of(entry_type)`. // To this we add an offset as per the io_uring specifications. let sqe_ring_size = (params.sq_off.array as usize) + (params.sq_entries as usize) * mem::size_of::(); let sqe_ring = mmap( sqe_ring_size, io_uring_fd, generated::IORING_OFF_SQ_RING.into(), )?; // map the SQEs. let sqes_array_size = (params.sq_entries as usize) * mem::size_of::(); let sqes = mmap( sqes_array_size, io_uring_fd, generated::IORING_OFF_SQES.into(), )?; Ok((sqe_ring, sqes)) } pub(crate) fn pending(&self) -> Result { let ring_slice = self.ring.as_volatile_slice(); // get the sqe head let unmasked_head = ring_slice.load::(self.head_off, Ordering::Acquire)?; Ok((self.unmasked_tail - Wrapping(unmasked_head)).0) } } impl Drop for SubmissionQueue { fn drop(&mut self) { // SAFETY: Safe because parameters are valid. unsafe { libc::munmap(self.ring.as_ptr().cast::(), self.ring.size()) }; // SAFETY: Safe because parameters are valid. unsafe { libc::munmap(self.sqes.as_ptr().cast::(), self.sqes.size()) }; } } ================================================ FILE: src/vmm/src/io_uring/restriction.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Seccomp-like restrictions for the allowed operations on an IoUring instance. //! //! One can configure the restrictions to only allow certain operations and/or allow only ops on //! registered files. //! If passed to the [`IoUring`] constructor, they take effect immediately and can never be //! deactivated. //! //! [`IoUring`]: ../struct.IoUring.html use std::convert::From; use crate::io_uring::generated::{ io_uring_register_restriction_op, io_uring_restriction, io_uring_sqe_flags_bit, }; use crate::io_uring::operation::OpCode; /// Adds support for restricting the operations allowed by io_uring. #[derive(Debug)] pub enum Restriction { /// Allow an operation. AllowOpCode(OpCode), /// Only allow operations on pre-registered fds. RequireFixedFds, } impl From<&Restriction> for io_uring_restriction { fn from(restriction: &Restriction) -> Self { use Restriction::*; // SAFETY: Safe because it only contains integer values. let mut instance: Self = unsafe { std::mem::zeroed() }; match restriction { AllowOpCode(opcode) => { instance.opcode = u16::try_from(io_uring_register_restriction_op::IORING_RESTRICTION_SQE_OP) .unwrap(); instance.__bindgen_anon_1.sqe_op = *opcode as u8; } RequireFixedFds => { instance.opcode = u16::try_from( io_uring_register_restriction_op::IORING_RESTRICTION_SQE_FLAGS_REQUIRED, ) .unwrap(); instance.__bindgen_anon_1.sqe_flags = 1 << io_uring_sqe_flags_bit::IOSQE_FIXED_FILE_BIT; } }; instance } } ================================================ FILE: src/vmm/src/lib.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. //! Virtual Machine Monitor that leverages the Linux Kernel-based Virtual Machine (KVM), //! and other virtualization features to run a single lightweight micro-virtual //! machine (microVM). #![warn(missing_docs)] #![warn(clippy::undocumented_unsafe_blocks)] #![allow(clippy::blanket_clippy_restriction_lints)] /// Implements platform specific functionality. /// Supported platforms: x86_64 and aarch64. pub mod arch; /// High-level interface over Linux io_uring. /// /// Aims to provide an easy-to-use interface, while making some Firecracker-specific simplifying /// assumptions. The crate does not currently aim at supporting all io_uring features and use /// cases. For example, it only works with pre-registered fds and read/write/fsync requests. /// /// Requires at least kernel version 5.10.51. /// For more information on io_uring, refer to the man pages. /// [This pdf](https://kernel.dk/io_uring.pdf) is also very useful, though outdated at times. pub mod io_uring; /// # Rate Limiter /// /// Provides a rate limiter written in Rust useful for IO operations that need to /// be throttled. /// /// ## Behavior /// /// The rate limiter starts off as 'unblocked' with two token buckets configured /// with the values passed in the `RateLimiter::new()` constructor. /// All subsequent accounting is done independently for each token bucket based /// on the `TokenType` used. If any of the buckets runs out of budget, the limiter /// goes in the 'blocked' state. At this point an internal timer is set up which /// will later 'wake up' the user in order to retry sending data. The 'wake up' /// notification will be dispatched as an event on the FD provided by the `AsRawFD` /// trait implementation. /// /// The contract is that the user shall also call the `event_handler()` method on /// receipt of such an event. /// /// The token buckets are replenished when a called `consume()` doesn't find enough /// tokens in the bucket. The amount of tokens replenished is automatically calculated /// to respect the `complete_refill_time` configuration parameter provided by the user. /// The token buckets will never replenish above their respective `size`. /// /// Each token bucket can start off with a `one_time_burst` initial extra capacity /// on top of their `size`. This initial extra credit does not replenish and /// can be used for an initial burst of data. /// /// The granularity for 'wake up' events when the rate limiter is blocked is /// currently hardcoded to `100 milliseconds`. /// /// ## Limitations /// /// This rate limiter implementation relies on the *Linux kernel's timerfd* so its /// usage is limited to Linux systems. /// /// Another particularity of this implementation is that it is not self-driving. /// It is meant to be used in an external event loop and thus implements the `AsRawFd` /// trait and provides an *event-handler* as part of its API. This *event-handler* /// needs to be called by the user on every event on the rate limiter's `AsRawFd` FD. pub mod rate_limiter; /// Module for handling ACPI tables. /// Currently, we only use ACPI on x86 microVMs. #[cfg(target_arch = "x86_64")] pub mod acpi; /// Handles setup and initialization a `Vmm` object. pub mod builder; /// Types for guest configuration. pub mod cpu_config; pub(crate) mod device_manager; /// Emulates virtual and hardware devices. #[allow(missing_docs)] pub mod devices; /// minimalist HTTP/TCP/IPv4 stack named DUMBO pub mod dumbo; /// Support for GDB debugging the guest #[cfg(feature = "gdb")] pub mod gdb; /// Logger pub mod logger; /// microVM Metadata Service MMDS pub mod mmds; /// PCI specific emulation code. pub mod pci; /// Save/restore utilities. pub mod persist; /// Resource store for configured microVM resources. pub mod resources; /// microVM RPC API adapters. pub mod rpc_interface; /// Seccomp filter utilities. pub mod seccomp; /// Signal handling utilities. pub mod signal_handler; /// Serialization and deserialization facilities pub mod snapshot; /// Utility functions for integration and benchmark testing pub mod test_utils; /// Utility functions and struct pub mod utils; /// Wrappers over structures used to configure the VMM. pub mod vmm_config; /// Module with virtual state structs. pub mod vstate; /// Module with initrd. pub mod initrd; use std::collections::HashMap; use std::io; use std::os::unix::io::AsRawFd; use std::sync::mpsc::RecvTimeoutError; use std::sync::{Arc, Barrier, Mutex}; use std::time::Duration; use device_manager::DeviceManager; use event_manager::{EventManager as BaseEventManager, EventOps, Events, MutEventSubscriber}; use seccomp::BpfProgram; use snapshot::Persist; use userfaultfd::Uffd; use vmm_sys_util::epoll::EventSet; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::terminal::Terminal; use vstate::kvm::Kvm; use vstate::vcpu::{self, StartThreadedError, VcpuSendEventError}; use crate::cpu_config::templates::CpuConfiguration; use crate::devices::virtio::balloon::device::{HintingStatus, StartHintingCmd}; use crate::devices::virtio::balloon::{ BALLOON_DEV_ID, Balloon, BalloonConfig, BalloonError, BalloonStats, }; use crate::devices::virtio::block::BlockError; use crate::devices::virtio::block::device::Block; use crate::devices::virtio::device::VirtioDeviceType; use crate::devices::virtio::mem::device::VirtioMem; use crate::devices::virtio::mem::{VIRTIO_MEM_DEV_ID, VirtioMemError, VirtioMemStatus}; use crate::devices::virtio::net::Net; use crate::devices::virtio::pmem::device::Pmem; use crate::devices::virtio::rng::Entropy; use crate::devices::virtio::vsock::{Vsock, VsockUnixBackend}; use crate::logger::{METRICS, MetricsError, error, info, warn}; use crate::mmds::data_store::Mmds; use crate::persist::{MicrovmState, MicrovmStateError, VmInfo}; use crate::rate_limiter::BucketUpdate; use crate::resources::VmmConfig; use crate::vmm_config::balloon::BalloonDeviceConfig; use crate::vmm_config::boot_source::BootSourceConfig; use crate::vmm_config::entropy::EntropyDeviceConfig; use crate::vmm_config::instance_info::{InstanceInfo, VmState}; use crate::vmm_config::machine_config::MachineConfig; use crate::vmm_config::memory_hotplug::MemoryHotplugConfig; use crate::vmm_config::mmds::MmdsConfig; use crate::vmm_config::net::NetworkInterfaceConfig; use crate::vmm_config::vsock::VsockDeviceConfig; use crate::vstate::memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion}; use crate::vstate::vcpu::VcpuState; pub use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuEvent, VcpuHandle, VcpuResponse}; pub use crate::vstate::vm::Vm; /// Shorthand type for the EventManager flavour used by Firecracker. pub type EventManager = BaseEventManager>>; // Since the exit code names e.g. `SIGBUS` are most appropriate yet trigger a test error with the // clippy lint `upper_case_acronyms` we have disabled this lint for this enum. /// Vmm exit-code type. #[allow(clippy::upper_case_acronyms)] #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum FcExitCode { /// Success exit code. Ok = 0, /// Generic error exit code. GenericError = 1, /// Generic exit code error; not possible to occur if the program logic is sound. UnexpectedError = 2, /// Firecracker was shut down after intercepting a restricted system call. BadSyscall = 148, /// Firecracker was shut down after intercepting `SIGBUS`. SIGBUS = 149, /// Firecracker was shut down after intercepting `SIGSEGV`. SIGSEGV = 150, /// Firecracker was shut down after intercepting `SIGXFSZ`. SIGXFSZ = 151, /// Firecracker was shut down after intercepting `SIGXCPU`. SIGXCPU = 154, /// Firecracker was shut down after intercepting `SIGPIPE`. SIGPIPE = 155, /// Firecracker was shut down after intercepting `SIGHUP`. SIGHUP = 156, /// Firecracker was shut down after intercepting `SIGILL`. SIGILL = 157, /// Bad configuration for microvm's resources, when using a single json. BadConfiguration = 152, /// Command line arguments parsing error. ArgParsing = 153, } /// Timeout used in recv_timeout, when waiting for a vcpu response on /// Pause/Resume/Save/Restore. A high enough limit that should not be reached during normal usage, /// used to detect a potential vcpu deadlock. pub const RECV_TIMEOUT_SEC: Duration = Duration::from_secs(30); /// Default byte limit of accepted http requests on API and MMDS servers. pub const HTTP_MAX_PAYLOAD_SIZE: usize = 51200; /// Errors associated with the VMM internal logic. These errors cannot be generated by direct user /// input, but can result from bad configuration of the host (for example if Firecracker doesn't /// have permissions to open the KVM fd). #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VmmError { #[cfg(target_arch = "aarch64")] /// Invalid command line error. Cmdline, /// Device manager error: {0} DeviceManager(#[from] device_manager::DeviceManagerCreateError), /// MMIO Device manager error: {0} MmioDeviceManager(device_manager::mmio::MmioError), /// Error getting the KVM dirty bitmap. {0} DirtyBitmap(kvm_ioctls::Error), /// I8042 error: {0} I8042Error(devices::legacy::I8042DeviceError), #[cfg(target_arch = "x86_64")] /// Cannot add devices to the legacy I/O Bus. {0} LegacyIOBus(device_manager::legacy::LegacyDeviceError), /// Metrics error: {0} Metrics(MetricsError), /// Cannot add a device to the MMIO Bus. {0} RegisterMMIODevice(device_manager::mmio::MmioError), /// Cannot install seccomp filters: {0} SeccompFilters(seccomp::InstallationError), /// Error writing to the serial console: {0} Serial(io::Error), /// Error creating the vcpu: {0} VcpuCreate(vstate::vcpu::VcpuError), /// Cannot send event to vCPU. {0} VcpuEvent(vstate::vcpu::VcpuError), /// Cannot create a vCPU handle. {0} VcpuHandle(vstate::vcpu::VcpuError), /// Failed to start vCPUs VcpuStart(StartVcpusError), /// Failed to pause the vCPUs. VcpuPause, /// Failed to exit the vCPUs. VcpuExit, /// Failed to resume the vCPUs. VcpuResume, /// Failed to message the vCPUs. VcpuMessage, /// Cannot spawn Vcpu thread: {0} VcpuSpawn(io::Error), /// Vm error: {0} Vm(#[from] vstate::vm::VmError), /// Kvm error: {0} Kvm(#[from] vstate::kvm::KvmError), /// Failed perform action on device: {0} FindDeviceError(#[from] device_manager::FindDeviceError), /// Block: {0} Block(#[from] BlockError), /// Balloon: {0} Balloon(#[from] BalloonError), /// Failed to create memory hotplug device: {0} VirtioMem(#[from] VirtioMemError), } /// Shorthand type for KVM dirty page bitmap. pub type DirtyBitmap = HashMap>; /// Returns the size of guest memory, in MiB. pub(crate) fn mem_size_mib(guest_memory: &GuestMemoryMmap) -> u64 { guest_memory.iter().map(|region| region.len()).sum::() >> 20 } // Error type for [`Vmm::emulate_serial_init`]. /// Emulate serial init error: {0} #[derive(Debug, thiserror::Error, displaydoc::Display)] pub struct EmulateSerialInitError(#[from] std::io::Error); /// Error type for [`Vmm::start_vcpus`]. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum StartVcpusError { /// VMM observer init error: {0} VmmObserverInit(#[from] vmm_sys_util::errno::Error), /// Vcpu handle error: {0} VcpuHandle(#[from] StartThreadedError), } /// Error type for [`Vmm::dump_cpu_config()`] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum DumpCpuConfigError { /// Failed to send event to vcpu thread: {0} SendEvent(#[from] VcpuSendEventError), /// Got unexpected response from vcpu thread. UnexpectedResponse, /// Failed to dump CPU config: {0} DumpCpuConfig(#[from] vcpu::VcpuError), /// Operation not allowed: {0} NotAllowed(String), } /// Contains the state and associated methods required for the Firecracker VMM. #[derive(Debug)] pub struct Vmm { /// The [`InstanceInfo`] state of this [`Vmm`]. pub instance_info: InstanceInfo, /// Machine config pub machine_config: MachineConfig, boot_source_config: BootSourceConfig, shutdown_exit_code: Option, // Guest VM core resources. kvm: Kvm, /// VM object pub vm: Arc, // Save UFFD in order to keep it open in the Firecracker process, as well. #[allow(unused)] uffd: Option, /// Handles to the vcpu threads with vcpu_fds inside them. pub vcpus_handles: Vec, // Used by Vcpus and devices to initiate teardown; Vmm should never write here. vcpus_exit_evt: EventFd, // Device manager device_manager: DeviceManager, } impl Vmm { /// Gets Vmm version. pub fn version(&self) -> String { self.instance_info.vmm_version.clone() } /// Gets Vmm instance info. pub fn instance_info(&self) -> InstanceInfo { self.instance_info.clone() } /// Gets MMDS reference, if any. pub fn get_mmds(&self) -> Option>> { let mut mmds = None; self.device_manager .for_each_virtio_device(|device_type, device| { if device_type == VirtioDeviceType::Net && let Some(net) = device.as_any().downcast_ref::() && let Some(mmds_ns) = &net.mmds_ns { mmds = Some(mmds_ns.mmds.clone()); } }); mmds } /// Provides the Vmm shutdown exit code if there is one. pub fn shutdown_exit_code(&self) -> Option { self.shutdown_exit_code } /// Builds a FullVmConfig from the current Vmm state. pub fn full_config(&self) -> VmmConfig { let mut block = Vec::new(); let mut net = Vec::new(); let mut net_with_mmds = Vec::new(); let mut pmem = Vec::new(); let mut balloon = None; let mut vsock = None; let mut entropy = None; let mut memory_hotplug = None; let mut mmds_ipv4_address = None; let mut mmds_ref = None; self.device_manager .for_each_virtio_device(|device_type, device| match device_type { VirtioDeviceType::Block => { if let Some(b) = device.as_any().downcast_ref::() { block.push(b.config()); } } VirtioDeviceType::Net => { if let Some(n) = device.as_any().downcast_ref::() { net.push(NetworkInterfaceConfig::from(n)); if let Some(mmds_ns) = &n.mmds_ns { net_with_mmds.push(n.id.clone()); if mmds_ref.is_none() { mmds_ref = Some(mmds_ns.mmds.clone()); mmds_ipv4_address = Some(mmds_ns.ipv4_addr()); } } } } VirtioDeviceType::Pmem => { if let Some(p) = device.as_any().downcast_ref::() { pmem.push(p.config.clone()); } } VirtioDeviceType::Balloon => { if let Some(b) = device.as_any().downcast_ref::() { balloon = Some(BalloonDeviceConfig::from(b.config())); } } VirtioDeviceType::Vsock => { if let Some(v) = device.as_any().downcast_ref::>() { vsock = Some(VsockDeviceConfig::from(v)); } } VirtioDeviceType::Rng => { if let Some(e) = device.as_any().downcast_ref::() { entropy = Some(EntropyDeviceConfig::from(e)); } } VirtioDeviceType::Mem => { if let Some(m) = device.as_any().downcast_ref::() { memory_hotplug = Some(MemoryHotplugConfig::from(m)); } } }); let mmds_config = mmds_ref.map(|mmds| { let mmds = mmds.lock().expect("Poisoned lock"); MmdsConfig { version: mmds.version(), ipv4_address: mmds_ipv4_address, network_interfaces: net_with_mmds, imds_compat: mmds.imds_compat(), } }); // This must match the From<&VmResources> for VmmConfig implementation // in resources.rs which is used to retrieve the config before the VM // is started. VmmConfig { balloon, drives: block, boot_source: self.boot_source_config.clone(), cpu_config: None, logger: None, machine_config: Some(self.machine_config.clone()), metrics: None, mmds_config, network_interfaces: net, vsock, entropy, pmem_devices: pmem, // serial_config is marked serde(skip) so that it doesnt end up in snapshots serial_config: None, memory_hotplug, } } /// Starts the microVM vcpus. /// /// # Errors /// /// When: /// - [`vmm::VmmEventsObserver::on_vmm_boot`] errors. /// - [`vmm::vstate::vcpu::Vcpu::start_threaded`] errors. pub fn start_vcpus( &mut self, mut vcpus: Vec, vcpu_seccomp_filter: Arc, ) -> Result<(), StartVcpusError> { let vcpu_count = vcpus.len(); let barrier = Arc::new(Barrier::new(vcpu_count + 1)); let stdin = std::io::stdin().lock(); // Set raw mode for stdin. stdin.set_raw_mode().inspect_err(|&err| { warn!("Cannot set raw mode for the terminal. {:?}", err); })?; // Set non blocking stdin. stdin.set_non_block(true).inspect_err(|&err| { warn!("Cannot set non block for the terminal. {:?}", err); })?; self.vcpus_handles.reserve(vcpu_count); for mut vcpu in vcpus.drain(..) { vcpu.set_mmio_bus(self.vm.common.mmio_bus.clone()); #[cfg(target_arch = "x86_64")] vcpu.kvm_vcpu.set_pio_bus(self.vm.pio_bus.clone()); self.vcpus_handles.push(vcpu.start_threaded( &self.vm, vcpu_seccomp_filter.clone(), barrier.clone(), )?); } self.instance_info.state = VmState::Paused; // Wait for vCPUs to initialize their TLS before moving forward. barrier.wait(); Ok(()) } /// Sends a resume command to the vCPUs. pub fn resume_vm(&mut self) -> Result<(), VmmError> { self.device_manager.kick_virtio_devices(); // Send the events. self.vcpus_handles .iter_mut() .try_for_each(|handle| handle.send_event(VcpuEvent::Resume)) .map_err(|_| VmmError::VcpuMessage)?; // Check the responses. if self .vcpus_handles .iter() .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC)) .any(|response| !matches!(response, Ok(VcpuResponse::Resumed))) { return Err(VmmError::VcpuMessage); } self.instance_info.state = VmState::Running; Ok(()) } /// Sends a pause command to the vCPUs. pub fn pause_vm(&mut self) -> Result<(), VmmError> { // Send the events. self.vcpus_handles .iter_mut() .try_for_each(|handle| handle.send_event(VcpuEvent::Pause)) .map_err(|_| VmmError::VcpuMessage)?; // Check the responses. if self .vcpus_handles .iter() .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC)) .any(|response| !matches!(response, Ok(VcpuResponse::Paused))) { return Err(VmmError::VcpuMessage); } self.instance_info.state = VmState::Paused; Ok(()) } /// Injects CTRL+ALT+DEL keystroke combo in the i8042 device. #[cfg(target_arch = "x86_64")] pub fn send_ctrl_alt_del(&mut self) -> Result<(), VmmError> { self.device_manager .legacy_devices .i8042 .lock() .expect("i8042 lock was poisoned") .trigger_ctrl_alt_del() .map_err(VmmError::I8042Error) } /// Saves the state of a paused Microvm. pub fn save_state(&mut self, vm_info: &VmInfo) -> Result { use self::MicrovmStateError::SaveVmState; // We need to save device state before saving KVM state. // Some devices, (at the time of writing this comment block device with async engine) // might modify the VirtIO transport and send an interrupt to the guest. If we save KVM // state before we save device state, that interrupt will never be delivered to the guest // upon resuming from the snapshot. let device_states = self.device_manager.save(); let vcpu_states = self.save_vcpu_states()?; let kvm_state = self.kvm.save_state(); let vm_state = { #[cfg(target_arch = "x86_64")] { self.vm.save_state().map_err(SaveVmState)? } #[cfg(target_arch = "aarch64")] { let mpidrs = construct_kvm_mpidrs(&vcpu_states); self.vm.save_state(&mpidrs).map_err(SaveVmState)? } }; Ok(MicrovmState { vm_info: vm_info.clone(), kvm_state, vm_state, vcpu_states, device_states, }) } fn save_vcpu_states(&mut self) -> Result, MicrovmStateError> { for handle in self.vcpus_handles.iter_mut() { handle .send_event(VcpuEvent::SaveState) .map_err(MicrovmStateError::SignalVcpu)?; } let vcpu_responses = self .vcpus_handles .iter() // `Iterator::collect` can transform a `Vec` into a `Result`. .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC)) .collect::, RecvTimeoutError>>() .map_err(|_| MicrovmStateError::UnexpectedVcpuResponse)?; let vcpu_states = vcpu_responses .into_iter() .map(|response| match response { VcpuResponse::SavedState(state) => Ok(*state), VcpuResponse::Error(err) => Err(MicrovmStateError::SaveVcpuState(err)), VcpuResponse::NotAllowed(reason) => Err(MicrovmStateError::NotAllowed(reason)), _ => Err(MicrovmStateError::UnexpectedVcpuResponse), }) .collect::, MicrovmStateError>>()?; Ok(vcpu_states) } /// Dumps CPU configuration. pub fn dump_cpu_config(&mut self) -> Result, DumpCpuConfigError> { for handle in self.vcpus_handles.iter_mut() { handle .send_event(VcpuEvent::DumpCpuConfig) .map_err(DumpCpuConfigError::SendEvent)?; } let vcpu_responses = self .vcpus_handles .iter() .map(|handle| handle.response_receiver().recv_timeout(RECV_TIMEOUT_SEC)) .collect::, RecvTimeoutError>>() .map_err(|_| DumpCpuConfigError::UnexpectedResponse)?; let cpu_configs = vcpu_responses .into_iter() .map(|response| match response { VcpuResponse::DumpedCpuConfig(cpu_config) => Ok(*cpu_config), VcpuResponse::Error(err) => Err(DumpCpuConfigError::DumpCpuConfig(err)), VcpuResponse::NotAllowed(reason) => Err(DumpCpuConfigError::NotAllowed(reason)), _ => Err(DumpCpuConfigError::UnexpectedResponse), }) .collect::, DumpCpuConfigError>>()?; Ok(cpu_configs) } /// Updates the path of the host file backing the emulated block device with id `drive_id`. /// We update the disk image on the device and its virtio configuration. pub fn update_block_device_path( &mut self, drive_id: &str, path_on_host: String, ) -> Result<(), VmmError> { self.device_manager .with_virtio_device(drive_id, |block: &mut Block| { block.update_disk_image(path_on_host) })??; Ok(()) } /// Updates the rate limiter parameters for block device with `drive_id` id. pub fn update_block_rate_limiter( &mut self, drive_id: &str, rl_bytes: BucketUpdate, rl_ops: BucketUpdate, ) -> Result<(), VmmError> { self.device_manager .with_virtio_device(drive_id, |block: &mut Block| { block.update_rate_limiter(rl_bytes, rl_ops) })??; Ok(()) } /// Updates the rate limiter parameters for block device with `drive_id` id. pub fn update_vhost_user_block_config(&mut self, drive_id: &str) -> Result<(), VmmError> { self.device_manager .with_virtio_device(drive_id, |block: &mut Block| block.update_config())??; Ok(()) } /// Updates the rate limiter parameters for net device with `net_id` id. pub fn update_net_rate_limiters( &mut self, net_id: &str, rx_bytes: BucketUpdate, rx_ops: BucketUpdate, tx_bytes: BucketUpdate, tx_ops: BucketUpdate, ) -> Result<(), VmmError> { self.device_manager .with_virtio_device(net_id, |net: &mut Net| { net.patch_rate_limiters(rx_bytes, rx_ops, tx_bytes, tx_ops) })?; Ok(()) } /// Returns a reference to the balloon device if present. pub fn balloon_config(&self) -> Result { let config = self .device_manager .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.config())?; Ok(config) } /// Returns the latest balloon statistics if they are enabled. pub fn latest_balloon_stats(&self) -> Result { let stats = self .device_manager .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.latest_stats())??; Ok(stats) } /// Updates configuration for the balloon device target size. pub fn update_balloon_config(&mut self, amount_mib: u32) -> Result<(), VmmError> { self.device_manager .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| { dev.update_size(amount_mib) })??; Ok(()) } /// Updates configuration for the balloon device as described in `balloon_stats_update`. pub fn update_balloon_stats_config( &mut self, stats_polling_interval_s: u16, ) -> Result<(), VmmError> { self.device_manager .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| { dev.update_stats_polling_interval(stats_polling_interval_s) })??; Ok(()) } /// Returns the current state of the memory hotplug device. pub fn memory_hotplug_status(&self) -> Result { self.device_manager .with_virtio_device(VIRTIO_MEM_DEV_ID, |dev: &mut VirtioMem| dev.status()) .map_err(VmmError::FindDeviceError) } /// Returns the current state of the memory hotplug device. pub fn update_memory_hotplug_size(&self, requested_size_mib: usize) -> Result<(), VmmError> { self.device_manager .with_virtio_device(VIRTIO_MEM_DEV_ID, |dev: &mut VirtioMem| { dev.update_requested_size(requested_size_mib) }) .map_err(VmmError::FindDeviceError)??; Ok(()) } /// Starts the balloon free page hinting run pub fn start_balloon_hinting(&mut self, cmd: StartHintingCmd) -> Result<(), VmmError> { self.device_manager .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.start_hinting(cmd))??; Ok(()) } /// Retrieves the status of the balloon hinting run pub fn get_balloon_hinting_status(&mut self) -> Result { let status = self .device_manager .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.get_hinting_status())??; Ok(status) } /// Stops the balloon free page hinting run pub fn stop_balloon_hinting(&mut self) -> Result<(), VmmError> { self.device_manager .with_virtio_device(BALLOON_DEV_ID, |dev: &mut Balloon| dev.stop_hinting())??; Ok(()) } /// Signals Vmm to stop and exit. pub fn stop(&mut self, exit_code: FcExitCode) { info!("Vmm is stopping."); // Break the main event loop, propagating the Vmm exit-code. self.shutdown_exit_code = Some(exit_code); } /// Gets a reference to kvm-ioctls Vm #[cfg(feature = "gdb")] pub fn vm(&self) -> &Vm { &self.vm } } /// Process the content of the MPIDR_EL1 register in order to be able to pass it to KVM /// /// The kernel expects to find the four affinity levels of the MPIDR in the first 32 bits of the /// VGIC register attribute: /// https://elixir.free-electrons.com/linux/v4.14.203/source/virt/kvm/arm/vgic/vgic-kvm-device.c#L445. /// /// The format of the MPIDR_EL1 register is: /// | 39 .... 32 | 31 .... 24 | 23 .... 16 | 15 .... 8 | 7 .... 0 | /// | Aff3 | Other | Aff2 | Aff1 | Aff0 | /// /// The KVM mpidr format is: /// | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | /// | Aff3 | Aff2 | Aff1 | Aff0 | /// As specified in the linux kernel: Documentation/virt/kvm/devices/arm-vgic-v3.rst #[cfg(target_arch = "aarch64")] fn construct_kvm_mpidrs(vcpu_states: &[VcpuState]) -> Vec { vcpu_states .iter() .map(|state| { let cpu_affid = ((state.mpidr & 0xFF_0000_0000) >> 8) | (state.mpidr & 0xFF_FFFF); cpu_affid << 32 }) .collect() } impl Drop for Vmm { fn drop(&mut self) { info!("Killing vCPU threads"); // Send a "Finish" event to the vCPU threads so that they terminate. for (idx, handle) in self.vcpus_handles.iter_mut().enumerate() { if let Err(err) = handle.send_event(VcpuEvent::Finish) { error!("Failed to send VcpuEvent::Finish to vCPU {}: {}", idx, err); } } // Join the vCPU threads by running VcpuHandle::drop(). self.vcpus_handles.clear(); if let Err(err) = std::io::stdin().lock().set_canon_mode() { warn!("Cannot set canonical mode for the terminal. {:?}", err); } // Write the metrics before exiting. if let Err(err) = METRICS.write() { error!("Failed to write metrics while stopping: {}", err); } if !self.vcpus_handles.is_empty() { error!("Failed to tear down Vmm: the vcpu threads have not finished execution."); } } } impl MutEventSubscriber for Vmm { /// Handle a read event (EPOLLIN). fn process(&mut self, event: Events, _: &mut EventOps) { let source = event.fd(); let event_set = event.event_set(); if source == self.vcpus_exit_evt.as_raw_fd() && event_set == EventSet::IN { // Exit event handling should never do anything more than call 'self.stop()'. let _ = self.vcpus_exit_evt.read(); let exit_code = 'exit_code: { // Query each vcpu for their exit_code. for handle in &self.vcpus_handles { // Drain all vcpu responses that are pending from this vcpu until we find an // exit status. for response in handle.response_receiver().try_iter() { if let VcpuResponse::Exited(status) = response { // It could be that some vcpus exited successfully while others // errored out. Thus make sure that error exits from one vcpu always // takes precedence over "ok" exits if status != FcExitCode::Ok { break 'exit_code status; } } } } // No CPUs exited with error status code, report "Ok" FcExitCode::Ok }; self.stop(exit_code); } else { error!("Spurious EventManager event for handler: Vmm"); } } fn init(&mut self, ops: &mut EventOps) { if let Err(err) = ops.add(Events::new(&self.vcpus_exit_evt, EventSet::IN)) { error!("Failed to register vmm exit event: {}", err); } } } ================================================ FILE: src/vmm/src/logger/logging.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; use std::io::Write; use std::path::PathBuf; use std::str::FromStr; use std::sync::{Mutex, OnceLock}; use std::thread; use log::{Log, Metadata, Record}; use serde::{Deserialize, Deserializer, Serialize}; use utils::time::LocalTime; use super::metrics::{IncMetric, METRICS}; use crate::utils::open_file_nonblock; /// Default level filter for logger matching the swagger specification /// (`src/firecracker/swagger/firecracker.yaml`). pub const DEFAULT_LEVEL: log::LevelFilter = log::LevelFilter::Info; /// Default instance id. pub const DEFAULT_INSTANCE_ID: &str = "anonymous-instance"; /// Instance id. pub static INSTANCE_ID: OnceLock = OnceLock::new(); /// The logger. /// /// Default values matching the swagger specification (`src/firecracker/swagger/firecracker.yaml`). pub static LOGGER: Logger = Logger(Mutex::new(LoggerConfiguration { target: None, filter: LogFilter { module: None }, format: LogFormat { show_level: false, show_log_origin: false, }, })); /// Error type for [`Logger::init`]. pub type LoggerInitError = log::SetLoggerError; /// Error type for [`Logger::update`]. #[derive(Debug, thiserror::Error)] #[error("Failed to open target file: {0}")] pub struct LoggerUpdateError(pub std::io::Error); impl Logger { /// Initialize the logger. pub fn init(&'static self) -> Result<(), LoggerInitError> { log::set_logger(self)?; log::set_max_level(DEFAULT_LEVEL); Ok(()) } /// Applies the given logger configuration the logger. pub fn update(&self, config: LoggerConfig) -> Result<(), LoggerUpdateError> { let mut guard = self.0.lock().unwrap(); log::set_max_level( config .level .map(log::LevelFilter::from) .unwrap_or(DEFAULT_LEVEL), ); if let Some(log_path) = config.log_path { let file = open_file_nonblock(&log_path).map_err(LoggerUpdateError)?; guard.target = Some(file); }; if let Some(show_level) = config.show_level { guard.format.show_level = show_level; } if let Some(show_log_origin) = config.show_log_origin { guard.format.show_log_origin = show_log_origin; } if let Some(module) = config.module { guard.filter.module = Some(module); } // Ensure we drop the guard before attempting to log, otherwise this // would deadlock. drop(guard); Ok(()) } } #[derive(Debug)] pub struct LogFilter { pub module: Option, } #[derive(Debug)] pub struct LogFormat { pub show_level: bool, pub show_log_origin: bool, } #[derive(Debug)] pub struct LoggerConfiguration { pub target: Option, pub filter: LogFilter, pub format: LogFormat, } #[derive(Debug)] pub struct Logger(pub Mutex); impl Log for Logger { // No additional filters to . fn enabled(&self, _metadata: &Metadata) -> bool { true } fn log(&self, record: &Record) { // Lock the logger. let mut guard = self.0.lock().unwrap(); // Check if the log message is enabled { let enabled_module = match (&guard.filter.module, record.module_path()) { (Some(filter), Some(source)) => source.starts_with(filter), (Some(_), None) => false, (None, _) => true, }; let enabled = enabled_module; if !enabled { return; } } // Prints log message { let thread = thread::current().name().unwrap_or("-").to_string(); let level = match guard.format.show_level { true => format!(":{}", record.level()), false => String::new(), }; let origin = match guard.format.show_log_origin { true => { let file = record.file().unwrap_or("?"); let line = match record.line() { Some(x) => x.to_string(), None => String::from("?"), }; format!(":{file}:{line}") } false => String::new(), }; let message = format!( "{} [{}:{thread}{level}{origin}] {}\n", LocalTime::now(), INSTANCE_ID .get() .map(|s| s.as_str()) .unwrap_or(DEFAULT_INSTANCE_ID), record.args() ); let result = if let Some(file) = &mut guard.target { file.write_all(message.as_bytes()) } else { std::io::stdout().write_all(message.as_bytes()) }; // If the write returns an error, increment missed log count. // No reason to log the error to stderr here, just increment the metric. if result.is_err() { METRICS.logger.missed_log_count.inc(); } } } fn flush(&self) {} } /// Strongly typed structure used to describe the logger. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct LoggerConfig { /// Named pipe or file used as output for logs. pub log_path: Option, /// The level of the Logger. pub level: Option, /// Whether to show the log level in the log. pub show_level: Option, /// Whether to show the log origin in the log. pub show_log_origin: Option, /// The module to filter logs by. pub module: Option, } /// This is required since we originally supported `Warning` and uppercase variants being used as /// the log level filter. It would be a breaking change to no longer support this. In the next /// breaking release this should be removed (replaced with `log::LevelFilter` and only supporting /// its default deserialization). #[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize)] pub enum LevelFilter { /// [`log::LevelFilter::Off`] Off, /// [`log::LevelFilter::Trace`] Trace, /// [`log::LevelFilter::Debug`] Debug, /// [`log::LevelFilter::Info`] Info, /// [`log::LevelFilter::Warn`] Warn, /// [`log::LevelFilter::Error`] Error, } impl From for log::LevelFilter { fn from(filter: LevelFilter) -> log::LevelFilter { match filter { LevelFilter::Off => log::LevelFilter::Off, LevelFilter::Trace => log::LevelFilter::Trace, LevelFilter::Debug => log::LevelFilter::Debug, LevelFilter::Info => log::LevelFilter::Info, LevelFilter::Warn => log::LevelFilter::Warn, LevelFilter::Error => log::LevelFilter::Error, } } } impl<'de> Deserialize<'de> for LevelFilter { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { use serde::de::Error; let key = String::deserialize(deserializer)?; match key.to_lowercase().as_str() { "off" => Ok(LevelFilter::Off), "trace" => Ok(LevelFilter::Trace), "debug" => Ok(LevelFilter::Debug), "info" => Ok(LevelFilter::Info), "warn" | "warning" => Ok(LevelFilter::Warn), "error" => Ok(LevelFilter::Error), _ => Err(D::Error::custom("Invalid LevelFilter")), } } } /// Error type for [`::from_str`]. #[derive(Debug, PartialEq, Eq, thiserror::Error)] #[error("Failed to parse string to level filter: {0}")] pub struct LevelFilterFromStrError(String); impl FromStr for LevelFilter { type Err = LevelFilterFromStrError; fn from_str(s: &str) -> Result { match s.to_ascii_lowercase().as_str() { "off" => Ok(Self::Off), "trace" => Ok(Self::Trace), "debug" => Ok(Self::Debug), "info" => Ok(Self::Info), "warn" | "warning" => Ok(Self::Warn), "error" => Ok(Self::Error), _ => Err(LevelFilterFromStrError(String::from(s))), } } } #[cfg(test)] mod tests { use log::Level; use super::*; #[test] fn levelfilter_from_levelfilter() { assert_eq!( log::LevelFilter::from(LevelFilter::Off), log::LevelFilter::Off ); assert_eq!( log::LevelFilter::from(LevelFilter::Trace), log::LevelFilter::Trace ); assert_eq!( log::LevelFilter::from(LevelFilter::Debug), log::LevelFilter::Debug ); assert_eq!( log::LevelFilter::from(LevelFilter::Info), log::LevelFilter::Info ); assert_eq!( log::LevelFilter::from(LevelFilter::Warn), log::LevelFilter::Warn ); assert_eq!( log::LevelFilter::from(LevelFilter::Error), log::LevelFilter::Error ); } #[test] fn levelfilter_from_str_all_variants() { use itertools::Itertools; #[derive(Deserialize)] struct Foo { #[allow(dead_code)] level: LevelFilter, } for (level, level_enum) in [ ("off", LevelFilter::Off), ("trace", LevelFilter::Trace), ("debug", LevelFilter::Debug), ("info", LevelFilter::Info), ("warn", LevelFilter::Warn), ("warning", LevelFilter::Warn), ("error", LevelFilter::Error), ] { let multi = level.chars().map(|_| 0..=1).multi_cartesian_product(); for combination in multi { let variant = level .chars() .zip_eq(combination) .map(|(c, v)| match v { 0 => c.to_ascii_lowercase(), 1 => c.to_ascii_uppercase(), _ => unreachable!(), }) .collect::(); let ex = format!("{{ \"level\": \"{}\" }}", variant); assert_eq!(LevelFilter::from_str(&variant), Ok(level_enum)); assert!(serde_json::from_str::(&ex).is_ok(), "{ex}"); } } let ex = "{{ \"level\": \"blah\" }}".to_string(); assert!( serde_json::from_str::(&ex).is_err(), "expected error got {ex:#?}" ); assert_eq!( LevelFilter::from_str("bad"), Err(LevelFilterFromStrError(String::from("bad"))) ); } #[test] fn logger() { // Get temp file path. let file = vmm_sys_util::tempfile::TempFile::new().unwrap(); let path = file.as_path().to_str().unwrap().to_string(); drop(file); // Create temp file. let target = std::fs::OpenOptions::new() .create(true) .write(true) .truncate(true) .open(&path) .unwrap(); // Create logger. let logger = Logger(Mutex::new(LoggerConfiguration { target: Some(target), filter: LogFilter { module: Some(String::from("module")), }, format: LogFormat { show_level: true, show_log_origin: true, }, })); // Assert results of enabled given specific metadata. assert!(logger.enabled(&Metadata::builder().level(Level::Warn).build())); assert!(logger.enabled(&Metadata::builder().level(Level::Debug).build())); // Log let metadata = Metadata::builder().level(Level::Error).build(); let record = Record::builder() .args(format_args!("Error!")) .metadata(metadata) .file(Some("dir/app.rs")) .line(Some(200)) .module_path(Some("module::server")) .build(); logger.log(&record); // Test calling flush. logger.flush(); // Asserts result of log. let contents = std::fs::read_to_string(&path).unwrap(); let (_time, rest) = contents.split_once(' ').unwrap(); let thread = thread::current().name().unwrap_or("-").to_string(); assert_eq!( rest, format!("[{DEFAULT_INSTANCE_ID}:{thread}:ERROR:dir/app.rs:200] Error!\n") ); std::fs::remove_file(path).unwrap(); } } ================================================ FILE: src/vmm/src/logger/metrics.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the metrics system. //! //! # Metrics format //! The metrics are flushed in JSON format each 60 seconds. The first field will always be the //! timestamp followed by the JSON representation of the structures representing each component on //! which we are capturing specific metrics. //! //! ## JSON example with metrics: //! ```json //! { //! "utc_timestamp_ms": 1541591155180, //! "api_server": { //! "process_startup_time_us": 0, //! "process_startup_time_cpu_us": 0 //! }, //! "block": { //! "activate_fails": 0, //! "cfg_fails": 0, //! "event_fails": 0, //! "flush_count": 0, //! "queue_event_count": 0, //! "read_count": 0, //! "write_count": 0 //! } //! } //! ``` //! The example above means that inside the structure representing all the metrics there is a field //! named `block` which is in turn a serializable child structure collecting metrics for //! the block device such as `activate_fails`, `cfg_fails`, etc. //! //! # Limitations //! Metrics are only written to buffers. //! //! # Design //! The main design goals of this system are: //! * Use lockless operations, preferably ones that don't require anything other than simple //! reads/writes being atomic. //! * Exploit interior mutability and atomics being Sync to allow all methods (including the ones //! which are effectively mutable) to be callable on a global non-mut static. //! * Rely on `serde` to provide the actual serialization for writing the metrics. //! * Since all metrics start at 0, we implement the `Default` trait via derive for all of them, to //! avoid having to initialize everything by hand. //! //! The system implements 2 types of metrics: //! * Shared Incremental Metrics (SharedIncMetrics) - dedicated for the metrics which need a counter //! (i.e the number of times an API request failed). These metrics are reset upon flush. //! * Shared Store Metrics (SharedStoreMetrics) - are targeted at keeping a persistent value, it is //! not intended to act as a counter (i.e for measure the process start up time for example). //! //! The current approach for the `SharedIncMetrics` type is to store two values (current and //! previous) and compute the delta between them each time we do a flush (i.e by serialization). //! There are a number of advantages to this approach, including: //! * We don't have to introduce an additional write (to reset the value) from the thread which does //! to actual writing, so less synchronization effort is required. //! * We don't have to worry at all that much about losing some data if writing fails for a while //! (this could be a concern, I guess). //! //! If if turns out this approach is not really what we want, it's pretty easy to resort to //! something else, while working behind the same interface. use std::fmt::Debug; use std::io::Write; use std::ops::Deref; use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Mutex, OnceLock}; use serde::{Serialize, Serializer}; use utils::time::{ClockType, get_time_ns, get_time_us}; use super::FcLineWriter; use crate::devices::legacy; use crate::devices::virtio::balloon::metrics as balloon_metrics; use crate::devices::virtio::block::virtio::metrics as block_metrics; use crate::devices::virtio::mem::metrics as virtio_mem_metrics; use crate::devices::virtio::net::metrics as net_metrics; use crate::devices::virtio::pmem::metrics as pmem_metrics; use crate::devices::virtio::rng::metrics as entropy_metrics; use crate::devices::virtio::vhost_user_metrics; use crate::devices::virtio::vsock::metrics as vsock_metrics; /// Static instance used for handling metrics. pub static METRICS: Metrics = Metrics::::new(FirecrackerMetrics::new()); /// Metrics system. // All member fields have types which are Sync, and exhibit interior mutability, so // we can call operations on metrics using a non-mut static global variable. #[derive(Debug)] pub struct Metrics { // Metrics will get flushed here. metrics_buf: OnceLock>, pub app_metrics: T, } impl Metrics { /// Creates a new instance of the current metrics. pub const fn new(app_metrics: T) -> Metrics { Metrics { metrics_buf: OnceLock::new(), app_metrics, } } /// Initialize metrics system (once and only once). /// Every call made after the first will have no effect besides returning `Ok` or `Err`. /// /// This function is supposed to be called only from a single thread, once. /// It is not thread-safe and is not meant to be used in a multithreaded /// scenario. The reason `is_initialized` is an `AtomicBool` instead of /// just a `bool` is that `lazy_static` enforces thread-safety on all its /// members. /// /// # Arguments /// /// * `metrics_dest` - Buffer for JSON formatted metrics. Needs to implement `Write` and `Send`. pub fn init(&self, metrics_dest: M) -> Result<(), MetricsError> { self.metrics_buf .set(Mutex::new(metrics_dest)) .map_err(|_| MetricsError::AlreadyInitialized) } /// Writes metrics to the destination provided as argument upon initialization of the metrics. /// Upon failure, an error is returned if metrics system is initialized and metrics could not be /// written. /// Upon success, the function will return `True` (if metrics system was initialized and metrics /// were successfully written to disk) or `False` (if metrics system was not yet initialized). /// /// This function is usually supposed to be called only from a single thread and /// is not meant to be used in a multithreaded scenario. The reason /// `metrics_buf` is enclosed in a `Mutex` is that `lazy_static` enforces /// thread-safety on all its members. /// The only exception is for signal handlers that result in process exit, which may be run on /// any thread. To prevent the race condition present in the serialisation step of /// SharedIncMetrics, deadly signals use SharedStoreMetrics instead (which have a thread-safe /// serialise implementation). /// The only known caveat is that other metrics may not be properly written before exiting from /// a signal handler. We make this compromise since the process will be killed anyway and the /// important metric in this case is the signal one. /// The alternative is to hold a Mutex over the entire function call, but this increases the /// known deadlock potential. pub fn write(&self) -> Result { if let Some(lock) = self.metrics_buf.get() { let mut writer = lock.lock().expect("poisoned lock"); serde_json::to_writer(writer.by_ref(), &self.app_metrics) .map_err(|err| MetricsError::Serde(err.to_string()))?; writer.write_all(b"\n").map_err(MetricsError::Write)?; Ok(true) } else { // If the metrics are not initialized, no error is thrown but we do let the user know // that metrics were not written. Ok(false) } } } impl Deref for Metrics { type Target = T; fn deref(&self) -> &Self::Target { &self.app_metrics } } /// Describes the errors which may occur while handling metrics scenarios. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MetricsError { /// {0} NeverInitialized(String), /// Reinitialization of metrics not allowed. AlreadyInitialized, /// {0} Serde(String), /// Failed to write metrics: {0} Write(std::io::Error), } /// Used for defining new types of metrics that act as a counter (i.e they are continuously updated /// by incrementing their value). pub trait IncMetric { /// Adds `value` to the current counter. fn add(&self, value: u64); /// Increments by 1 unit the current counter. fn inc(&self) { self.add(1); } /// Returns current value of the counter. fn count(&self) -> u64; /// Returns diff of current and old value of the counter. /// Mostly used in process of aggregating per device metrics. fn fetch_diff(&self) -> u64; } /// Used for defining new types of metrics that do not need a counter and act as a persistent /// indicator. pub trait StoreMetric { /// Returns current value of the counter. fn fetch(&self) -> u64; /// Stores `value` to the current counter. fn store(&self, value: u64); } /// Representation of a metric that is expected to be incremented from more than one thread, so more /// synchronization is necessary. // It's currently used for vCPU metrics. An alternative here would be // to have one instance of every metric for each thread, and to // aggregate them when writing. However this probably overkill unless we have a lot of vCPUs // incrementing metrics very often. Still, it's there if we ever need it :-s // We will be keeping two values for each metric for being able to reset // counters on each metric. // 1st member - current value being updated // 2nd member - old value that gets the current value whenever metrics is flushed to disk #[derive(Debug, Default)] pub struct SharedIncMetric(AtomicU64, AtomicU64); impl SharedIncMetric { /// Const default construction. pub const fn new() -> Self { Self(AtomicU64::new(0), AtomicU64::new(0)) } } /// Representation of a metric that is expected to hold a value that can be accessed /// from more than one thread, so more synchronization is necessary. #[derive(Debug, Default)] pub struct SharedStoreMetric(AtomicU64); impl SharedStoreMetric { /// Const default construction. pub const fn new() -> Self { Self(AtomicU64::new(0)) } } impl IncMetric for SharedIncMetric { // While the order specified for this operation is still Relaxed, the actual instruction will // be an asm "LOCK; something" and thus atomic across multiple threads, simply because of the // fetch_and_add (as opposed to "store(load() + 1)") implementation for atomics. // TODO: would a stronger ordering make a difference here? fn add(&self, value: u64) { self.0.fetch_add(value, Ordering::Relaxed); } fn count(&self) -> u64 { self.0.load(Ordering::Relaxed) } fn fetch_diff(&self) -> u64 { self.0.load(Ordering::Relaxed) - self.1.load(Ordering::Relaxed) } } impl StoreMetric for SharedStoreMetric { fn fetch(&self) -> u64 { self.0.load(Ordering::Relaxed) } fn store(&self, value: u64) { self.0.store(value, Ordering::Relaxed); } } impl Serialize for SharedIncMetric { /// Reset counters of each metrics. Here we suppose that Serialize's goal is to help with the /// flushing of metrics. /// !!! Any print of the metrics will also reset them. Use with caution !!! fn serialize(&self, serializer: S) -> Result { let snapshot = self.0.load(Ordering::Relaxed); let res = serializer.serialize_u64(snapshot - self.1.load(Ordering::Relaxed)); if res.is_ok() { self.1.store(snapshot, Ordering::Relaxed); } res } } impl Serialize for SharedStoreMetric { fn serialize(&self, serializer: S) -> Result { serializer.serialize_u64(self.0.load(Ordering::Relaxed)) } } /// Reporter object which computes the process wall time and /// process CPU time and populates the metric with the results. #[derive(Debug)] pub struct ProcessTimeReporter { // Process start time in us. start_time_us: Option, // Process CPU start time in us. start_time_cpu_us: Option, // Firecracker's parent process CPU time. parent_cpu_time_us: Option, } impl ProcessTimeReporter { /// Constructor for the process time-related reporter. pub fn new( start_time_us: Option, start_time_cpu_us: Option, parent_cpu_time_us: Option, ) -> ProcessTimeReporter { ProcessTimeReporter { start_time_us, start_time_cpu_us, parent_cpu_time_us, } } /// Obtain process start time in microseconds. pub fn report_start_time(&self) { if let Some(start_time) = self.start_time_us { let delta_us = get_time_us(ClockType::Monotonic) - start_time; METRICS.api_server.process_startup_time_us.store(delta_us); } } /// Obtain process CPU start time in microseconds. pub fn report_cpu_start_time(&self) { if let Some(cpu_start_time) = self.start_time_cpu_us { let delta_us = get_time_us(ClockType::ProcessCpu) - cpu_start_time + self.parent_cpu_time_us.unwrap_or_default(); METRICS .api_server .process_startup_time_cpu_us .store(delta_us); } } } // The following structs are used to define a certain organization for the set of metrics we // are interested in. Whenever the name of a field differs from its ideal textual representation // in the serialized form, we can use the #[serde(rename = "name")] attribute to, well, rename it. /// Metrics related to the internal API server. #[derive(Debug, Default, Serialize)] pub struct ApiServerMetrics { /// Measures the process's startup time in microseconds. pub process_startup_time_us: SharedStoreMetric, /// Measures the cpu's startup time in microseconds. pub process_startup_time_cpu_us: SharedStoreMetric, } impl ApiServerMetrics { /// Const default construction. pub const fn new() -> Self { Self { process_startup_time_us: SharedStoreMetric::new(), process_startup_time_cpu_us: SharedStoreMetric::new(), } } } /// Metrics specific to GET API Requests for counting user triggered actions and/or failures. #[derive(Debug, Default, Serialize)] pub struct GetRequestsMetrics { /// Number of GETs for getting information on the instance. pub instance_info_count: SharedIncMetric, /// Number of GETs for getting status on attaching machine configuration. pub machine_cfg_count: SharedIncMetric, /// Number of GETs for getting mmds. pub mmds_count: SharedIncMetric, /// Number of GETs for getting the VMM version. pub vmm_version_count: SharedIncMetric, /// Number of GETs for getting hotpluggable memory status. pub hotplug_memory_count: SharedIncMetric, } impl GetRequestsMetrics { /// Const default construction. pub const fn new() -> Self { Self { instance_info_count: SharedIncMetric::new(), machine_cfg_count: SharedIncMetric::new(), mmds_count: SharedIncMetric::new(), vmm_version_count: SharedIncMetric::new(), hotplug_memory_count: SharedIncMetric::new(), } } } /// Metrics specific to PUT API Requests for counting user triggered actions and/or failures. #[derive(Debug, Default, Serialize)] pub struct PutRequestsMetrics { /// Number of PUTs triggering an action on the VM. pub actions_count: SharedIncMetric, /// Number of failures in triggering an action on the VM. pub actions_fails: SharedIncMetric, /// Number of PUTs for attaching source of boot. pub boot_source_count: SharedIncMetric, /// Number of failures during attaching source of boot. pub boot_source_fails: SharedIncMetric, /// Number of PUTs triggering a block attach. pub drive_count: SharedIncMetric, /// Number of failures in attaching a block device. pub drive_fails: SharedIncMetric, /// Number of PUTs for initializing the logging system. pub logger_count: SharedIncMetric, /// Number of failures in initializing the logging system. pub logger_fails: SharedIncMetric, /// Number of PUTs for configuring the machine. pub machine_cfg_count: SharedIncMetric, /// Number of failures in configuring the machine. pub machine_cfg_fails: SharedIncMetric, /// Number of PUTs for configuring a guest's vCPUs. pub cpu_cfg_count: SharedIncMetric, /// Number of failures in configuring a guest's vCPUs. pub cpu_cfg_fails: SharedIncMetric, /// Number of PUTs for initializing the metrics system. pub metrics_count: SharedIncMetric, /// Number of failures in initializing the metrics system. pub metrics_fails: SharedIncMetric, /// Number of PUTs for creating a new network interface. pub network_count: SharedIncMetric, /// Number of failures in creating a new network interface. pub network_fails: SharedIncMetric, /// Number of PUTs for creating mmds. pub mmds_count: SharedIncMetric, /// Number of failures in creating a new mmds. pub mmds_fails: SharedIncMetric, /// Number of PUTs for creating a vsock device. pub vsock_count: SharedIncMetric, /// Number of failures in creating a vsock device. pub vsock_fails: SharedIncMetric, /// Number of PUTs triggering a pmem attach. pub pmem_count: SharedIncMetric, /// Number of failures in attaching a pmem device. pub pmem_fails: SharedIncMetric, /// Number of PUTs to /serial pub serial_count: SharedIncMetric, /// Number of failed PUTs to /serial pub serial_fails: SharedIncMetric, /// Number of PUTs to /hotplug/memory pub hotplug_memory_count: SharedIncMetric, /// Number of failed PUTs to /hotplug/memory pub hotplug_memory_fails: SharedIncMetric, } impl PutRequestsMetrics { /// Const default construction. pub const fn new() -> Self { Self { actions_count: SharedIncMetric::new(), actions_fails: SharedIncMetric::new(), boot_source_count: SharedIncMetric::new(), boot_source_fails: SharedIncMetric::new(), drive_count: SharedIncMetric::new(), drive_fails: SharedIncMetric::new(), logger_count: SharedIncMetric::new(), logger_fails: SharedIncMetric::new(), machine_cfg_count: SharedIncMetric::new(), machine_cfg_fails: SharedIncMetric::new(), cpu_cfg_count: SharedIncMetric::new(), cpu_cfg_fails: SharedIncMetric::new(), metrics_count: SharedIncMetric::new(), metrics_fails: SharedIncMetric::new(), network_count: SharedIncMetric::new(), network_fails: SharedIncMetric::new(), mmds_count: SharedIncMetric::new(), mmds_fails: SharedIncMetric::new(), vsock_count: SharedIncMetric::new(), vsock_fails: SharedIncMetric::new(), pmem_count: SharedIncMetric::new(), pmem_fails: SharedIncMetric::new(), serial_count: SharedIncMetric::new(), serial_fails: SharedIncMetric::new(), hotplug_memory_count: SharedIncMetric::new(), hotplug_memory_fails: SharedIncMetric::new(), } } } /// Metrics specific to PATCH API Requests for counting user triggered actions and/or failures. #[derive(Debug, Default, Serialize)] pub struct PatchRequestsMetrics { /// Number of tries to PATCH a block device. pub drive_count: SharedIncMetric, /// Number of failures in PATCHing a block device. pub drive_fails: SharedIncMetric, /// Number of tries to PATCH a net device. pub network_count: SharedIncMetric, /// Number of failures in PATCHing a net device. pub network_fails: SharedIncMetric, /// Number of PATCHs for configuring the machine. pub machine_cfg_count: SharedIncMetric, /// Number of failures in configuring the machine. pub machine_cfg_fails: SharedIncMetric, /// Number of tries to PATCH an mmds. pub mmds_count: SharedIncMetric, /// Number of failures in PATCHing an mmds. pub mmds_fails: SharedIncMetric, /// Number of PATCHes to /hotplug/memory pub hotplug_memory_count: SharedIncMetric, /// Number of failed PATCHes to /hotplug/memory pub hotplug_memory_fails: SharedIncMetric, } impl PatchRequestsMetrics { /// Const default construction. pub const fn new() -> Self { Self { drive_count: SharedIncMetric::new(), drive_fails: SharedIncMetric::new(), network_count: SharedIncMetric::new(), network_fails: SharedIncMetric::new(), machine_cfg_count: SharedIncMetric::new(), machine_cfg_fails: SharedIncMetric::new(), mmds_count: SharedIncMetric::new(), mmds_fails: SharedIncMetric::new(), hotplug_memory_count: SharedIncMetric::new(), hotplug_memory_fails: SharedIncMetric::new(), } } } /// Metrics related to deprecated user-facing API calls. #[derive(Debug, Default, Serialize)] pub struct DeprecatedApiMetrics { /// Total number of calls to deprecated HTTP endpoints. pub deprecated_http_api_calls: SharedIncMetric, } impl DeprecatedApiMetrics { /// Const default construction. pub const fn new() -> Self { Self { deprecated_http_api_calls: SharedIncMetric::new(), } } } /// Metrics for the logging subsystem. #[derive(Debug, Default, Serialize)] pub struct LoggerSystemMetrics { /// Number of misses on flushing metrics. pub missed_metrics_count: SharedIncMetric, /// Number of errors during metrics handling. pub metrics_fails: SharedIncMetric, /// Number of misses on logging human readable content. pub missed_log_count: SharedIncMetric, } impl LoggerSystemMetrics { /// Const default construction. pub const fn new() -> Self { Self { missed_metrics_count: SharedIncMetric::new(), metrics_fails: SharedIncMetric::new(), missed_log_count: SharedIncMetric::new(), } } } /// Metrics for the MMDS functionality. #[derive(Debug, Default, Serialize)] pub struct MmdsMetrics { /// Number of frames rerouted to MMDS. pub rx_accepted: SharedIncMetric, /// Number of errors while handling a frame through MMDS. pub rx_accepted_err: SharedIncMetric, /// Number of uncommon events encountered while processing packets through MMDS. pub rx_accepted_unusual: SharedIncMetric, /// The number of buffers which couldn't be parsed as valid Ethernet frames by the MMDS. pub rx_bad_eth: SharedIncMetric, /// The number of GET requests with invalid tokens. pub rx_invalid_token: SharedIncMetric, /// The number of GET requests with no tokens. pub rx_no_token: SharedIncMetric, /// The total number of successful receive operations by the MMDS. pub rx_count: SharedIncMetric, /// The total number of bytes sent by the MMDS. pub tx_bytes: SharedIncMetric, /// The total number of successful send operations by the MMDS. pub tx_count: SharedIncMetric, /// The number of errors raised by the MMDS while attempting to send frames/packets/segments. pub tx_errors: SharedIncMetric, /// The number of frames sent by the MMDS. pub tx_frames: SharedIncMetric, /// The number of connections successfully accepted by the MMDS TCP handler. pub connections_created: SharedIncMetric, /// The number of connections cleaned up by the MMDS TCP handler. pub connections_destroyed: SharedIncMetric, } impl MmdsMetrics { /// Const default construction. pub const fn new() -> Self { Self { rx_accepted: SharedIncMetric::new(), rx_accepted_err: SharedIncMetric::new(), rx_accepted_unusual: SharedIncMetric::new(), rx_bad_eth: SharedIncMetric::new(), rx_invalid_token: SharedIncMetric::new(), rx_no_token: SharedIncMetric::new(), rx_count: SharedIncMetric::new(), tx_bytes: SharedIncMetric::new(), tx_count: SharedIncMetric::new(), tx_errors: SharedIncMetric::new(), tx_frames: SharedIncMetric::new(), connections_created: SharedIncMetric::new(), connections_destroyed: SharedIncMetric::new(), } } } /// Performance metrics related for the moment only to snapshots. // These store the duration of creating/loading a snapshot and of // pausing/resuming the microVM. // If there are more than one `/snapshot/create` request in a minute // (until the metrics are flushed), only the duration of the last // snapshot creation is stored in the metric. If the user is interested // in all the durations, a `FlushMetrics` request should be sent after // each `create` request. #[derive(Debug, Default, Serialize)] pub struct PerformanceMetrics { /// Measures the snapshot full create time, at the API (user) level, in microseconds. pub full_create_snapshot: SharedStoreMetric, /// Measures the snapshot diff create time, at the API (user) level, in microseconds. pub diff_create_snapshot: SharedStoreMetric, /// Measures the snapshot load time, at the API (user) level, in microseconds. pub load_snapshot: SharedStoreMetric, /// Measures the microVM pausing duration, at the API (user) level, in microseconds. pub pause_vm: SharedStoreMetric, /// Measures the microVM resuming duration, at the API (user) level, in microseconds. pub resume_vm: SharedStoreMetric, /// Measures the snapshot full create time, at the VMM level, in microseconds. pub vmm_full_create_snapshot: SharedStoreMetric, /// Measures the snapshot diff create time, at the VMM level, in microseconds. pub vmm_diff_create_snapshot: SharedStoreMetric, /// Measures the snapshot load time, at the VMM level, in microseconds. pub vmm_load_snapshot: SharedStoreMetric, /// Measures the microVM pausing duration, at the VMM level, in microseconds. pub vmm_pause_vm: SharedStoreMetric, /// Measures the microVM resuming duration, at the VMM level, in microseconds. pub vmm_resume_vm: SharedStoreMetric, } impl PerformanceMetrics { /// Const default construction. pub const fn new() -> Self { Self { full_create_snapshot: SharedStoreMetric::new(), diff_create_snapshot: SharedStoreMetric::new(), load_snapshot: SharedStoreMetric::new(), pause_vm: SharedStoreMetric::new(), resume_vm: SharedStoreMetric::new(), vmm_full_create_snapshot: SharedStoreMetric::new(), vmm_diff_create_snapshot: SharedStoreMetric::new(), vmm_load_snapshot: SharedStoreMetric::new(), vmm_pause_vm: SharedStoreMetric::new(), vmm_resume_vm: SharedStoreMetric::new(), } } } /// Metrics for the seccomp filtering. #[derive(Debug, Default, Serialize)] pub struct SeccompMetrics { /// Number of errors inside the seccomp filtering. pub num_faults: SharedStoreMetric, } impl SeccompMetrics { /// Const default construction. pub const fn new() -> Self { Self { num_faults: SharedStoreMetric::new(), } } } /// Metrics related to signals. /// Deadly signals must be of `SharedStoreMetric` type, since they can ever be either 0 or 1. /// This avoids a tricky race condition caused by the unatomic serialize method of /// `SharedIncMetric`, between two threads calling `METRICS.write()`. #[derive(Debug, Default, Serialize)] pub struct SignalMetrics { /// Number of times that SIGBUS was handled. pub sigbus: SharedStoreMetric, /// Number of times that SIGSEGV was handled. pub sigsegv: SharedStoreMetric, /// Number of times that SIGXFSZ was handled. pub sigxfsz: SharedStoreMetric, /// Number of times that SIGXCPU was handled. pub sigxcpu: SharedStoreMetric, /// Number of times that SIGPIPE was handled. pub sigpipe: SharedIncMetric, /// Number of times that SIGHUP was handled. pub sighup: SharedStoreMetric, /// Number of times that SIGILL was handled. pub sigill: SharedStoreMetric, } impl SignalMetrics { /// Const default construction. pub const fn new() -> Self { Self { sigbus: SharedStoreMetric::new(), sigsegv: SharedStoreMetric::new(), sigxfsz: SharedStoreMetric::new(), sigxcpu: SharedStoreMetric::new(), sigpipe: SharedIncMetric::new(), sighup: SharedStoreMetric::new(), sigill: SharedStoreMetric::new(), } } } /// Provides efficient way to record LatencyAggregateMetrics #[derive(Debug)] pub struct LatencyMetricsRecorder<'a> { start_time: u64, metric: &'a LatencyAggregateMetrics, } impl<'a> LatencyMetricsRecorder<'a> { /// Const default construction. fn new(metric: &'a LatencyAggregateMetrics) -> Self { Self { start_time: get_time_us(ClockType::Monotonic), metric, } } } impl Drop for LatencyMetricsRecorder<'_> { /// records aggregate (min/max/sum) for the given metric /// This captures delta between self.start_time and current time /// and updates min/max/sum metrics. /// self.start_time is recorded in new() and metrics are updated in drop fn drop(&mut self) { let delta_us = get_time_us(ClockType::Monotonic) - self.start_time; self.metric.sum_us.add(delta_us); let min_us = self.metric.min_us.fetch(); let max_us = self.metric.max_us.fetch(); if (0 == min_us) || (min_us > delta_us) { self.metric.min_us.store(delta_us); } if (0 == max_us) || (max_us < delta_us) { self.metric.max_us.store(delta_us); } } } /// Used to record Aggregate (min/max/sum) of latency metrics #[derive(Debug, Default, Serialize)] pub struct LatencyAggregateMetrics { /// represents minimum value of the metrics in microseconds pub min_us: SharedStoreMetric, /// represents maximum value of the metrics in microseconds pub max_us: SharedStoreMetric, /// represents sum of the metrics in microseconds pub sum_us: SharedIncMetric, } impl LatencyAggregateMetrics { /// Const default construction. pub const fn new() -> Self { Self { min_us: SharedStoreMetric::new(), max_us: SharedStoreMetric::new(), sum_us: SharedIncMetric::new(), } } /// returns a latency recorder which captures stores start_time /// and updates the actual metrics at the end of recorders lifetime. /// in short instead of below 2 lines : /// 1st for start_time_us = get_time_us() /// 2nd for delta_time_us = get_time_us() - start_time; and metrics.store(delta_time_us) /// we have just `_m = metrics.record_latency_metrics()` pub fn record_latency_metrics(&self) -> LatencyMetricsRecorder<'_> { LatencyMetricsRecorder::new(self) } } /// Structure provides Metrics specific to VCPUs' mode of functioning. /// Sample_count or number of kvm exits for IO and MMIO VM exits are covered by: /// `exit_io_in`, `exit_io_out`, `exit_mmio_read` and , `exit_mmio_write`. /// Count of other vm exits for events like shutdown/hlt/errors are /// covered by existing "failures" metric. /// The only vm exit for which sample_count is not covered is system /// event reset/shutdown but that should be fine since they are not /// failures and the vm is terminated anyways. /// LatencyAggregateMetrics only covers minimum, maximum and sum /// because average can be deduced from available metrics. e.g. /// dividing `exit_io_in_agg.sum_us` by exit_io_in` gives average of KVM exits handling input IO. #[derive(Debug, Default, Serialize)] pub struct VcpuMetrics { /// Number of KVM exits for handling input IO. pub exit_io_in: SharedIncMetric, /// Number of KVM exits for handling output IO. pub exit_io_out: SharedIncMetric, /// Number of KVM exits for handling MMIO reads. pub exit_mmio_read: SharedIncMetric, /// Number of KVM exits for handling MMIO writes. pub exit_mmio_write: SharedIncMetric, /// Number of errors during this VCPU's run. pub failures: SharedIncMetric, /// Number of times that the `KVM_KVMCLOCK_CTRL` ioctl failed. pub kvmclock_ctrl_fails: SharedIncMetric, /// Provides Min/max/sum for KVM exits handling input IO. pub exit_io_in_agg: LatencyAggregateMetrics, /// Provides Min/max/sum for KVM exits handling output IO. pub exit_io_out_agg: LatencyAggregateMetrics, /// Provides Min/max/sum for KVM exits handling MMIO reads. pub exit_mmio_read_agg: LatencyAggregateMetrics, /// Provides Min/max/sum for KVM exits handling MMIO writes. pub exit_mmio_write_agg: LatencyAggregateMetrics, } impl VcpuMetrics { /// Const default construction. pub const fn new() -> Self { Self { exit_io_in: SharedIncMetric::new(), exit_io_out: SharedIncMetric::new(), exit_mmio_read: SharedIncMetric::new(), exit_mmio_write: SharedIncMetric::new(), failures: SharedIncMetric::new(), kvmclock_ctrl_fails: SharedIncMetric::new(), exit_io_in_agg: LatencyAggregateMetrics::new(), exit_io_out_agg: LatencyAggregateMetrics::new(), exit_mmio_read_agg: LatencyAggregateMetrics::new(), exit_mmio_write_agg: LatencyAggregateMetrics::new(), } } } /// MicroVM interrupt-related metrics #[derive(Debug, Default, Serialize)] pub struct InterruptMetrics { /// Number of interrupt triggers pub triggers: SharedIncMetric, /// Configuration updates pub config_updates: SharedIncMetric, } impl InterruptMetrics { /// Const default construction. pub const fn new() -> Self { Self { triggers: SharedIncMetric::new(), config_updates: SharedIncMetric::new(), } } } /// Metrics specific to the machine manager as a whole. #[derive(Debug, Default, Serialize)] pub struct VmmMetrics { /// Metric for signaling a panic has occurred. pub panic_count: SharedStoreMetric, } impl VmmMetrics { /// Const default construction. pub const fn new() -> Self { Self { panic_count: SharedStoreMetric::new(), } } } // The sole purpose of this struct is to produce an UTC timestamp when an instance is serialized. #[derive(Debug, Default)] struct SerializeToUtcTimestampMs; impl SerializeToUtcTimestampMs { /// Const default construction. pub const fn new() -> Self { SerializeToUtcTimestampMs } } impl Serialize for SerializeToUtcTimestampMs { fn serialize(&self, serializer: S) -> Result { serializer.serialize_i64(i64::try_from(get_time_ns(ClockType::Real) / 1_000_000).unwrap()) } } macro_rules! create_serialize_proxy { // By using the below structure in FirecrackerMetrics it is easy // to serialise Firecracker app_metrics as a single json object which // otherwise would have required extra string manipulation to pack // $metric_mod as part of the same json object as FirecrackerMetrics. ($proxy_struct:ident, $metric_mod:ident) => { #[derive(Default, Debug)] pub struct $proxy_struct; impl Serialize for $proxy_struct { fn serialize(&self, serializer: S) -> Result where S: Serializer, { $metric_mod::flush_metrics(serializer) } } }; } create_serialize_proxy!(BlockMetricsSerializeProxy, block_metrics); create_serialize_proxy!(NetMetricsSerializeProxy, net_metrics); create_serialize_proxy!(VhostUserMetricsSerializeProxy, vhost_user_metrics); create_serialize_proxy!(BalloonMetricsSerializeProxy, balloon_metrics); create_serialize_proxy!(EntropyMetricsSerializeProxy, entropy_metrics); create_serialize_proxy!(VsockMetricsSerializeProxy, vsock_metrics); create_serialize_proxy!(PmemMetricsSerializeProxy, pmem_metrics); create_serialize_proxy!(LegacyDevMetricsSerializeProxy, legacy); create_serialize_proxy!(MemoryHotplugSerializeProxy, virtio_mem_metrics); /// Structure storing all metrics while enforcing serialization support on them. #[derive(Debug, Default, Serialize)] pub struct FirecrackerMetrics { utc_timestamp_ms: SerializeToUtcTimestampMs, /// API Server related metrics. pub api_server: ApiServerMetrics, #[serde(flatten)] /// A balloon device's related metrics. pub balloon_ser: BalloonMetricsSerializeProxy, #[serde(flatten)] /// A block device's related metrics. pub block_ser: BlockMetricsSerializeProxy, /// Metrics related to deprecated API calls. pub deprecated_api: DeprecatedApiMetrics, /// Metrics related to API GET requests. pub get_api_requests: GetRequestsMetrics, #[serde(flatten)] /// Metrics related to the legacy device. pub legacy_dev_ser: LegacyDevMetricsSerializeProxy, /// Metrics related to performance measurements. pub latencies_us: PerformanceMetrics, /// Logging related metrics. pub logger: LoggerSystemMetrics, /// Metrics specific to MMDS functionality. pub mmds: MmdsMetrics, #[serde(flatten)] /// A network device's related metrics. pub net_ser: NetMetricsSerializeProxy, /// Metrics related to API PATCH requests. pub patch_api_requests: PatchRequestsMetrics, /// Metrics related to API PUT requests. pub put_api_requests: PutRequestsMetrics, /// Metrics related to seccomp filtering. pub seccomp: SeccompMetrics, /// Metrics related to a vcpu's functioning. pub vcpu: VcpuMetrics, /// Metrics related to the virtual machine manager. pub vmm: VmmMetrics, /// Metrics related to signals. pub signals: SignalMetrics, #[serde(flatten)] /// Metrics related to virtio-vsockets. pub vsock_ser: VsockMetricsSerializeProxy, #[serde(flatten)] /// Metrics related to virtio-rng entropy device. pub entropy_ser: EntropyMetricsSerializeProxy, #[serde(flatten)] /// Metrics related to virtio-pmem entropy device. pub pmem_ser: PmemMetricsSerializeProxy, #[serde(flatten)] /// Vhost-user device related metrics. pub vhost_user_ser: VhostUserMetricsSerializeProxy, /// Interrupt related metrics pub interrupts: InterruptMetrics, #[serde(flatten)] /// Virtio-mem device related metrics (memory hotplugging) pub memory_hotplug_ser: MemoryHotplugSerializeProxy, } impl FirecrackerMetrics { /// Const default construction. pub const fn new() -> Self { Self { utc_timestamp_ms: SerializeToUtcTimestampMs::new(), api_server: ApiServerMetrics::new(), balloon_ser: BalloonMetricsSerializeProxy {}, block_ser: BlockMetricsSerializeProxy {}, deprecated_api: DeprecatedApiMetrics::new(), get_api_requests: GetRequestsMetrics::new(), legacy_dev_ser: LegacyDevMetricsSerializeProxy {}, latencies_us: PerformanceMetrics::new(), logger: LoggerSystemMetrics::new(), mmds: MmdsMetrics::new(), net_ser: NetMetricsSerializeProxy {}, patch_api_requests: PatchRequestsMetrics::new(), put_api_requests: PutRequestsMetrics::new(), seccomp: SeccompMetrics::new(), vcpu: VcpuMetrics::new(), vmm: VmmMetrics::new(), signals: SignalMetrics::new(), vsock_ser: VsockMetricsSerializeProxy {}, entropy_ser: EntropyMetricsSerializeProxy {}, pmem_ser: PmemMetricsSerializeProxy {}, vhost_user_ser: VhostUserMetricsSerializeProxy {}, interrupts: InterruptMetrics::new(), memory_hotplug_ser: MemoryHotplugSerializeProxy {}, } } } #[cfg(test)] mod tests { use std::io::{ErrorKind, LineWriter}; use std::sync::Arc; use std::sync::atomic::fence; use std::thread; use vmm_sys_util::tempfile::TempFile; use super::*; #[test] fn test_init() { // This test has a conflict with the vmm_config test // `test_init_metrics` which also uses "METRICS" and // tests fail with an already initialized error. // This test is to validate the init() which doesn't require // using METRICS specifically. So, to avoid the conflict we // use a local Metrics to test init() instead of the global // "METRICS" let m = &Metrics::<_, FcLineWriter>::new(FirecrackerMetrics::new()); // Trying to write metrics, when metrics system is not initialized, should not throw error. let res = m.write(); assert!(res.is_ok() && !res.unwrap()); let f = TempFile::new().expect("Failed to create temporary metrics file"); m.init(LineWriter::new(f.into_file())).unwrap(); m.write().unwrap(); let f = TempFile::new().expect("Failed to create temporary metrics file"); m.init(LineWriter::new(f.into_file())).unwrap_err(); } #[test] fn test_shared_inc_metric() { let metric = Arc::new(SharedIncMetric::default()); // We're going to create a number of threads that will attempt to increase this metric // in parallel. If everything goes fine we still can't be sure the synchronization works, // but if something fails, then we definitely have a problem :-s const NUM_THREADS_TO_SPAWN: usize = 4; const NUM_INCREMENTS_PER_THREAD: u64 = 10_0000; const M2_INITIAL_COUNT: u64 = 123; metric.add(M2_INITIAL_COUNT); let mut v = Vec::with_capacity(NUM_THREADS_TO_SPAWN); for _ in 0..NUM_THREADS_TO_SPAWN { let r = metric.clone(); v.push(thread::spawn(move || { for _ in 0..NUM_INCREMENTS_PER_THREAD { r.inc(); } })); } for handle in v { handle.join().unwrap(); } assert_eq!( metric.count(), M2_INITIAL_COUNT + NUM_THREADS_TO_SPAWN as u64 * NUM_INCREMENTS_PER_THREAD ); } #[test] fn test_shared_store_metric() { let m1 = Arc::new(SharedStoreMetric::default()); m1.store(1); fence(Ordering::SeqCst); assert_eq!(1, m1.fetch()); } #[test] fn test_serialize() { let s = serde_json::to_string(&FirecrackerMetrics::default()); s.unwrap(); } #[test] fn test_error_messages() { assert_eq!( format!( "{}", MetricsError::NeverInitialized(String::from("Bad Metrics Path Provided")) ), "Bad Metrics Path Provided" ); assert_eq!( format!("{}", MetricsError::AlreadyInitialized), "Reinitialization of metrics not allowed." ); assert_eq!( format!( "{}", MetricsError::Write(std::io::Error::new(ErrorKind::Interrupted, "write")) ), "Failed to write metrics: write" ); assert_eq!( format!( "{}", MetricsError::Serde("Failed to serialize the given data structure.".to_string()) ), "Failed to serialize the given data structure." ); } } ================================================ FILE: src/vmm/src/logger/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Crate that implements Firecracker specific functionality as far as logging and metrics //! collecting. mod logging; mod metrics; pub use log::{Level, debug, error, info, log_enabled, trace, warn}; pub use logging::{ DEFAULT_INSTANCE_ID, DEFAULT_LEVEL, INSTANCE_ID, LOGGER, LevelFilter, LevelFilterFromStrError, LoggerConfig, LoggerInitError, LoggerUpdateError, }; pub use metrics::{ IncMetric, LatencyAggregateMetrics, METRICS, MetricsError, ProcessTimeReporter, SharedIncMetric, SharedStoreMetric, StoreMetric, }; use utils::time::{ClockType, get_time_us}; /// Alias for `std::io::LineWriter`. pub type FcLineWriter = std::io::LineWriter; /// Prefix to be used in log lines for functions/modules in Firecracker /// that are not generally available. const DEV_PREVIEW_LOG_PREFIX: &str = "[DevPreview]"; /// Log a standard warning message indicating a given feature name /// is in development preview. pub fn log_dev_preview_warning(feature_name: &str, msg_opt: Option) { match msg_opt { None => warn!("{DEV_PREVIEW_LOG_PREFIX} {feature_name} is in development preview."), Some(msg) => { warn!("{DEV_PREVIEW_LOG_PREFIX} {feature_name} is in development preview - {msg}") } } } /// Helper function for updating the value of a store metric with elapsed time since some time in a /// past. pub fn update_metric_with_elapsed_time(metric: &SharedStoreMetric, start_time_us: u64) -> u64 { let delta_us = get_time_us(ClockType::Monotonic) - start_time_us; metric.store(delta_us); delta_us } ================================================ FILE: src/vmm/src/mmds/data_store.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt; use std::fmt::{Display, Formatter}; use serde::{Deserialize, Serialize}; use serde_json::{Value, to_vec}; use crate::mmds::token::{MmdsTokenError as TokenError, TokenAuthority}; /// The Mmds is the Microvm Metadata Service represented as an untyped json. #[derive(Debug)] pub struct Mmds { version: MmdsVersion, data_store: Value, token_authority: TokenAuthority, is_initialized: bool, data_store_limit: usize, imds_compat: bool, } /// MMDS version. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] pub enum MmdsVersion { #[default] /// MMDS version 1 V1, /// MMDS version 2 V2, } impl Display for MmdsVersion { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { MmdsVersion::V1 => write!(f, "V1"), MmdsVersion::V2 => write!(f, "V2"), } } } /// MMDS possible outputs. #[derive(Debug, Clone, Copy)] pub enum OutputFormat { /// MMDS output format as Json Json, /// MMDS output format as Imds Imds, } #[derive(Debug, thiserror::Error, displaydoc::Display)] /// MMDS data store errors pub enum MmdsDatastoreError { /// The MMDS patch request doesn't fit. DataStoreLimitExceeded, /// The MMDS resource does not exist. NotFound, /// The MMDS data store is not initialized. NotInitialized, /// Token Authority error: {0} TokenAuthority(#[from] TokenError), /// Cannot retrieve value. The value has an unsupported type. UnsupportedValueType, } // Used for ease of use in tests. impl Default for Mmds { fn default() -> Self { Self::try_new(51200).unwrap() } } impl Mmds { /// MMDS default instance with limit `data_store_limit` pub fn try_new(data_store_limit: usize) -> Result { Ok(Mmds { version: MmdsVersion::default(), data_store: Value::default(), token_authority: TokenAuthority::try_new()?, is_initialized: false, data_store_limit, imds_compat: false, }) } /// This method is needed to check if data store is initialized. /// When a PATCH request is made on an uninitialized Mmds structure this method /// should return a NotFound error. fn check_data_store_initialized(&self) -> Result<(), MmdsDatastoreError> { if self.is_initialized { Ok(()) } else { Err(MmdsDatastoreError::NotInitialized) } } /// Set the MMDS version. pub fn set_version(&mut self, version: MmdsVersion) { self.version = version; } /// Get the MMDS version. pub fn version(&self) -> MmdsVersion { self.version } /// Set the compatibility with EC2 IMDS. pub fn set_imds_compat(&mut self, imds_compat: bool) { self.imds_compat = imds_compat; } /// Get the compatibility with EC2 IMDS. pub fn imds_compat(&self) -> bool { self.imds_compat } /// Sets the Additional Authenticated Data to be used for encryption and /// decryption of the session token. pub fn set_aad(&mut self, instance_id: &str) { self.token_authority.set_aad(instance_id); } /// Checks if the provided token has not expired. pub fn is_valid_token(&self, token: &str) -> bool { self.token_authority.is_valid(token) } /// Generate a new Mmds token using the token authority. pub fn generate_token(&mut self, ttl_seconds: u32) -> Result { self.token_authority.generate_token_secret(ttl_seconds) } /// set MMDS data store limit to `data_store_limit` pub fn set_data_store_limit(&mut self, data_store_limit: usize) { self.data_store_limit = data_store_limit; } /// put `data` in MMDS data store pub fn put_data(&mut self, data: Value) -> Result<(), MmdsDatastoreError> { // It is safe to unwrap because any map keys are all strings and // we are using default serializer which does not return error. if to_vec(&data).unwrap().len() > self.data_store_limit { Err(MmdsDatastoreError::DataStoreLimitExceeded) } else { self.data_store = data; self.is_initialized = true; Ok(()) } } /// patch update MMDS data store with `patch_data` pub fn patch_data(&mut self, patch_data: Value) -> Result<(), MmdsDatastoreError> { self.check_data_store_initialized()?; let mut data_store_clone = self.data_store.clone(); super::json_patch(&mut data_store_clone, &patch_data); // It is safe to unwrap because our data store keys are all strings and // we are using default serializer which does not return error. if to_vec(&data_store_clone).unwrap().len() > self.data_store_limit { return Err(MmdsDatastoreError::DataStoreLimitExceeded); } self.data_store = data_store_clone; Ok(()) } /// return MMDS data store value /// We do not check size of data_store before returning a result because due /// to limit from put/patch the data_store can not be bigger than the limit /// imposed by the server. pub fn data_store_value(&self) -> Value { self.data_store.clone() } /// Returns the serde::Value in IMDS format plaintext. /// Currently, only JSON objects and strings can be IMDS formatted. /// /// See the docs for detailed description of the IMDS format: /// /// /// # Examples /// /// ```json /// { /// "key1" : { /// "key11": "value11" /// "key12": "value12" /// } /// "key2" : "value3" /// "key3" : "value3" /// } /// ``` /// /// IMDS formatted JSON object: /// ```text /// key1/ /// key2 /// key3 /// ``` /// /// JSON string: /// ```json /// "value" /// ``` /// /// IMDS formatted string: /// ```text /// value /// ``` /// /// If the `serde_json::Value` is not supported, an `UnsupportedValueType` error is returned. fn format_imds(json: &Value) -> Result { // If the `dict` is Value::Null, Error::NotFound is thrown. // If the `dict` is not a dictionary, a Vec with the value corresponding to // the key is returned. match json.as_object() { Some(map) => { let mut ret = Vec::new(); // When the object is a map, push all the keys in the Vec. for key in map.keys() { let mut key = key.clone(); // If the key corresponds to a dictionary, a "/" is appended // to the key name. if map[&key].is_object() { key.push('/'); } ret.push(key); } Ok(ret.join("\n")) } None => { // When the object is not a map, return the value. // Support only `Value::String`. match json.as_str() { Some(str_val) => Ok(str_val.to_string()), None => Err(MmdsDatastoreError::UnsupportedValueType), } } } } /// Returns the subtree located at path. When the path corresponds to a leaf, it returns the /// value. Returns Error::NotFound when the path is invalid. pub fn get_value( &self, path: String, format: OutputFormat, ) -> Result { // The pointer function splits the input by "/". With a trailing "/", pointer does not // know how to get the object. let value = if path.ends_with('/') { self.data_store.pointer(&path.as_str()[..(path.len() - 1)]) } else { self.data_store.pointer(path.as_str()) }; if let Some(json) = value { match self.imds_compat { // EC2 IMDS ignores the Accept header. true => Mmds::format_imds(json), false => match format { OutputFormat::Json => Ok(json.to_string()), OutputFormat::Imds => Mmds::format_imds(json), }, } } else { Err(MmdsDatastoreError::NotFound) } } } #[cfg(test)] mod tests { use super::*; impl Mmds { fn get_data_str(&self) -> String { if self.data_store.is_null() { return String::from("{}"); } self.data_store.to_string() } } #[test] fn test_display_mmds_version() { assert_eq!(MmdsVersion::V1.to_string(), "V1"); assert_eq!(MmdsVersion::V2.to_string(), "V2"); assert_eq!(MmdsVersion::default().to_string(), "V1"); } #[test] fn test_mmds_version() { let mut mmds = Mmds::default(); // Test default MMDS version. assert_eq!(mmds.version(), MmdsVersion::V1); // Test setting MMDS version to v2. mmds.set_version(MmdsVersion::V2); assert_eq!(mmds.version(), MmdsVersion::V2); // Test setting MMDS version back to v1. mmds.set_version(MmdsVersion::V1); assert_eq!(mmds.version(), MmdsVersion::V1); } #[test] fn test_mmds() { let mut mmds = Mmds::default(); assert_eq!( mmds.check_data_store_initialized().unwrap_err().to_string(), "The MMDS data store is not initialized.".to_string(), ); let mut mmds_json = "{\"meta-data\":{\"iam\":\"dummy\"},\"user-data\":\"1522850095\"}"; mmds.put_data(serde_json::from_str(mmds_json).unwrap()) .unwrap(); mmds.check_data_store_initialized().unwrap(); assert_eq!(mmds.get_data_str(), mmds_json); // update the user-data field add test that patch works as expected let patch_json = "{\"user-data\":\"10\"}"; mmds.patch_data(serde_json::from_str(patch_json).unwrap()) .unwrap(); mmds_json = "{\"meta-data\":{\"iam\":\"dummy\"},\"user-data\":\"10\"}"; assert_eq!(mmds.get_data_str(), mmds_json); } #[test] fn test_get_value() { for imds_compat in [false, true] { let mut mmds = Mmds::default(); mmds.set_imds_compat(imds_compat); let data = r#"{ "name": { "first": "John", "second": "Doe" }, "age": 43, "phones": [ "+401234567", "+441234567" ], "member": false, "shares_percentage": 12.12, "balance": -24, "json_string": "{\n \"hello\": \"world\"\n}" }"#; let data_store: Value = serde_json::from_str(data).unwrap(); mmds.put_data(data_store).unwrap(); for format in [OutputFormat::Imds, OutputFormat::Json] { // Test invalid path. assert_eq!( mmds.get_value("/invalid_path".to_string(), format) .unwrap_err() .to_string(), MmdsDatastoreError::NotFound.to_string() ); // Retrieve an object. let expected = match (imds_compat, format) { (false, OutputFormat::Imds) | (true, _) => "first\nsecond", (false, OutputFormat::Json) => r#"{"first":"John","second":"Doe"}"#, }; assert_eq!( mmds.get_value("/name".to_string(), format).unwrap(), expected ); // Retrieve an integer. match (imds_compat, format) { (false, OutputFormat::Imds) | (true, _) => assert_eq!( mmds.get_value("/age".to_string(), format) .err() .unwrap() .to_string(), MmdsDatastoreError::UnsupportedValueType.to_string() ), (false, OutputFormat::Json) => { assert_eq!(mmds.get_value("/age".to_string(), format).unwrap(), "43") } }; // Test path ends with /; Value is a dictionary. // Retrieve an array. match (imds_compat, format) { (false, OutputFormat::Imds) | (true, _) => assert_eq!( mmds.get_value("/phones/".to_string(), format) .err() .unwrap() .to_string(), MmdsDatastoreError::UnsupportedValueType.to_string() ), (false, OutputFormat::Json) => assert_eq!( mmds.get_value("/phones/".to_string(), format).unwrap(), r#"["+401234567","+441234567"]"# ), } // Test path does NOT end with /; Value is a dictionary. match (imds_compat, format) { (false, OutputFormat::Imds) | (true, _) => assert_eq!( mmds.get_value("/phones".to_string(), format) .err() .unwrap() .to_string(), MmdsDatastoreError::UnsupportedValueType.to_string() ), (false, OutputFormat::Json) => assert_eq!( mmds.get_value("/phones".to_string(), format).unwrap(), r#"["+401234567","+441234567"]"# ), } // Retrieve the first element of an array. let expected = match (imds_compat, format) { (false, OutputFormat::Imds) | (true, _) => "+401234567", (false, OutputFormat::Json) => "\"+401234567\"", }; assert_eq!( mmds.get_value("/phones/0/".to_string(), format).unwrap(), expected ); // Retrieve a boolean. match (imds_compat, format) { (false, OutputFormat::Imds) | (true, _) => assert_eq!( mmds.get_value("/member".to_string(), format) .err() .unwrap() .to_string(), MmdsDatastoreError::UnsupportedValueType.to_string() ), (false, OutputFormat::Json) => assert_eq!( mmds.get_value("/member".to_string(), format).unwrap(), "false" ), } // Retrieve a float. match (imds_compat, format) { (false, OutputFormat::Imds) | (true, _) => assert_eq!( mmds.get_value("/shares_percentage".to_string(), format) .err() .unwrap() .to_string(), MmdsDatastoreError::UnsupportedValueType.to_string() ), (false, OutputFormat::Json) => assert_eq!( mmds.get_value("/shares_percentage".to_string(), format) .unwrap(), "12.12" ), } // Retrieve a negative integer. match (imds_compat, format) { (false, OutputFormat::Imds) | (true, _) => assert_eq!( mmds.get_value("/balance".to_string(), format) .err() .unwrap() .to_string(), MmdsDatastoreError::UnsupportedValueType.to_string(), ), (false, OutputFormat::Json) => assert_eq!( mmds.get_value("/balance".to_string(), format).unwrap(), "-24" ), } // Retrieve a string including escapes. let expected = match (imds_compat, format) { (false, OutputFormat::Imds) | (true, _) => "{\n \"hello\": \"world\"\n}", (false, OutputFormat::Json) => r#""{\n \"hello\": \"world\"\n}""#, }; assert_eq!( mmds.get_value("/json_string".to_string(), format).unwrap(), expected ); } } } #[test] fn test_update_data_store() { let mut mmds = Mmds::default(); let data = r#"{ "name": { "first": "John", "second": "Doe" }, "age": "43" }"#; let data_store: Value = serde_json::from_str(data).unwrap(); mmds.put_data(data_store).unwrap(); let data = r#"{ "name": { "first": "John", "second": "Doe" }, "age": "100" }"#; let data_store: Value = serde_json::from_str(data).unwrap(); mmds.patch_data(data_store).unwrap(); let data = r#"{ "name": { "first": "John", "second": "Doe" }, "age": 43 }"#; let data_store: Value = serde_json::from_str(data).unwrap(); mmds.put_data(data_store).unwrap(); let data = r#"{ "name": { "first": "John", "second": null }, "age": "43" }"#; let data_store: Value = serde_json::from_str(data).unwrap(); mmds.patch_data(data_store).unwrap(); let filling = (0..51151).map(|_| "X").collect::(); let data = "{\"new_key\": \"".to_string() + &filling + "\"}"; let data_store: Value = serde_json::from_str(&data).unwrap(); mmds.patch_data(data_store).unwrap(); let data = "{\"new_key2\" : \"smth\"}"; let data_store: Value = serde_json::from_str(data).unwrap(); assert_eq!( mmds.patch_data(data_store).unwrap_err().to_string(), MmdsDatastoreError::DataStoreLimitExceeded.to_string() ); assert!(!mmds.get_data_str().contains("smth")); let data = "{\"new_key\" : \"smth\"}"; let data_store: Value = serde_json::from_str(data).unwrap(); mmds.patch_data(data_store).unwrap(); assert!(mmds.get_data_str().contains("smth")); assert_eq!(mmds.get_data_str().len(), 53); let data = "{\"new_key2\" : \"smth2\"}"; let data_store: Value = serde_json::from_str(data).unwrap(); mmds.patch_data(data_store).unwrap(); assert!(mmds.get_data_str().contains("smth2")); assert_eq!(mmds.get_data_str().len(), 72); } #[test] fn test_put_size_limit() { let mut mmds = Mmds::default(); let filling = (0..51300).map(|_| "X").collect::(); let data = "{\"key\": \"".to_string() + &filling + "\"}"; let data_store: Value = serde_json::from_str(&data).unwrap(); assert_eq!( mmds.put_data(data_store).unwrap_err().to_string(), MmdsDatastoreError::DataStoreLimitExceeded.to_string() ); assert_eq!(mmds.get_data_str().len(), 2); } } ================================================ FILE: src/vmm/src/mmds/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 /// MMDS data store pub mod data_store; /// MMDS network stack pub mod ns; /// Defines the structures needed for saving/restoring MmdsNetworkStack. pub mod persist; mod token; /// MMDS token headers pub mod token_headers; use std::sync::{Arc, Mutex}; use micro_http::{ Body, HttpHeaderError, MediaType, Method, Request, RequestError, Response, StatusCode, Version, }; use serde_json::{Map, Value}; use crate::logger::{IncMetric, METRICS}; use crate::mmds::data_store::{Mmds, MmdsDatastoreError as MmdsError, MmdsVersion, OutputFormat}; use crate::mmds::token::PATH_TO_TOKEN; use crate::mmds::token_headers::{ X_AWS_EC2_METADATA_TOKEN_HEADER, X_AWS_EC2_METADATA_TOKEN_SSL_SECONDS_HEADER, X_FORWARDED_FOR_HEADER, X_METADATA_TOKEN_HEADER, X_METADATA_TOKEN_TTL_SECONDS_HEADER, get_header_value_pair, }; #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display)] /// MMDS token errors pub enum VmmMmdsError { /// MMDS token not valid. InvalidToken, /// Invalid URI. InvalidURI, /// Not allowed HTTP method. MethodNotAllowed, /// No MMDS token provided. Use `X-metadata-token` or `X-aws-ec2-metadata-token` header to specify the session token. NoTokenProvided, /// Token time to live value not found. Use `X-metadata-token-ttl-seconds` or `X-aws-ec2-metadata-token-ttl-seconds` header to specify the token's lifetime. NoTtlProvided, /// Resource not found: {0}. ResourceNotFound(String), } impl From for OutputFormat { fn from(media_type: MediaType) -> Self { match media_type { MediaType::ApplicationJson => OutputFormat::Json, MediaType::PlainText => OutputFormat::Imds, } } } // Builds the `micro_http::Response` with a given HTTP version, status code, and body. fn build_response( http_version: Version, status_code: StatusCode, content_type: MediaType, body: Body, ) -> Response { let mut response = Response::new(http_version, status_code); response.set_content_type(content_type); response.set_body(body); response } /// Patch provided JSON document (given as `serde_json::Value`) in-place with JSON Merge Patch /// [RFC 7396](https://tools.ietf.org/html/rfc7396). pub fn json_patch(target: &mut Value, patch: &Value) { if patch.is_object() { if !target.is_object() { // Replace target with a serde_json object so we can recursively copy patch values. *target = Value::Object(Map::new()); } // This is safe since we make sure patch and target are objects beforehand. let doc = target.as_object_mut().unwrap(); for (key, value) in patch.as_object().unwrap() { if value.is_null() { // If the value in the patch is null we remove the entry. doc.remove(key.as_str()); } else { // Recursive call to update target document. // If `key` is not in the target document (it's a new field defined in `patch`) // insert a null placeholder and pass it as the new target // so we can insert new values recursively. json_patch(doc.entry(key.as_str()).or_insert(Value::Null), value); } } } else { *target = patch.clone(); } } // Make the URI a correct JSON pointer value. fn sanitize_uri(mut uri: String) -> String { let mut len = u32::MAX as usize; // Loop while the deduping decreases the sanitized len. // Each iteration will attempt to dedup "//". while uri.len() < len { len = uri.len(); uri = uri.replace("//", "/"); } uri } /// Build a response for `request` and return response based on MMDS version pub fn convert_to_response(mmds: Arc>, request: Request) -> Response { // Check URI is not empty let uri = request.uri().get_abs_path(); if uri.is_empty() { return build_response( request.http_version(), StatusCode::BadRequest, MediaType::PlainText, Body::new(VmmMmdsError::InvalidURI.to_string()), ); } let mut mmds_guard = mmds.lock().expect("Poisoned lock"); // Allow only GET and PUT requests match request.method() { Method::Get => match mmds_guard.version() { MmdsVersion::V1 => respond_to_get_request_v1(&mmds_guard, request), MmdsVersion::V2 => respond_to_get_request_v2(&mmds_guard, request), }, Method::Put => respond_to_put_request(&mut mmds_guard, request), _ => { let mut response = build_response( request.http_version(), StatusCode::MethodNotAllowed, MediaType::PlainText, Body::new(VmmMmdsError::MethodNotAllowed.to_string()), ); response.allow_method(Method::Get); response.allow_method(Method::Put); response } } } fn respond_to_get_request_v1(mmds: &Mmds, request: Request) -> Response { match get_header_value_pair( request.headers.custom_entries(), &[X_METADATA_TOKEN_HEADER, X_AWS_EC2_METADATA_TOKEN_HEADER], ) { Some((_, token)) => { if !mmds.is_valid_token(token) { METRICS.mmds.rx_invalid_token.inc(); } } None => { METRICS.mmds.rx_no_token.inc(); } } respond_to_get_request(mmds, request) } fn respond_to_get_request_v2(mmds: &Mmds, request: Request) -> Response { // Check whether a token exists. let token = match get_header_value_pair( request.headers.custom_entries(), &[X_METADATA_TOKEN_HEADER, X_AWS_EC2_METADATA_TOKEN_HEADER], ) { Some((_, token)) => token, None => { METRICS.mmds.rx_no_token.inc(); let error_msg = VmmMmdsError::NoTokenProvided.to_string(); return build_response( request.http_version(), StatusCode::Unauthorized, MediaType::PlainText, Body::new(error_msg), ); } }; // Validate the token. match mmds.is_valid_token(token) { true => respond_to_get_request(mmds, request), false => { METRICS.mmds.rx_invalid_token.inc(); build_response( request.http_version(), StatusCode::Unauthorized, MediaType::PlainText, Body::new(VmmMmdsError::InvalidToken.to_string()), ) } } } fn respond_to_get_request(mmds: &Mmds, request: Request) -> Response { let uri = request.uri().get_abs_path(); // The data store expects a strict json path, so we need to // sanitize the URI. let json_path = sanitize_uri(uri.to_string()); let content_type = request.headers.accept(); match mmds.get_value(json_path, content_type.into()) { Ok(response_body) => build_response( request.http_version(), StatusCode::OK, content_type, Body::new(response_body), ), Err(err) => match err { MmdsError::NotFound => { let error_msg = VmmMmdsError::ResourceNotFound(String::from(uri)).to_string(); build_response( request.http_version(), StatusCode::NotFound, MediaType::PlainText, Body::new(error_msg), ) } MmdsError::UnsupportedValueType => build_response( request.http_version(), StatusCode::NotImplemented, MediaType::PlainText, Body::new(err.to_string()), ), MmdsError::DataStoreLimitExceeded => build_response( request.http_version(), StatusCode::PayloadTooLarge, MediaType::PlainText, Body::new(err.to_string()), ), _ => unreachable!(), }, } } fn respond_to_put_request(mmds: &mut Mmds, request: Request) -> Response { let custom_headers = request.headers.custom_entries(); // Reject `PUT` requests that contain `X-Forwarded-For` header. if let Some((header, _)) = get_header_value_pair(custom_headers, &[X_FORWARDED_FOR_HEADER]) { let error_msg = RequestError::HeaderError(HttpHeaderError::UnsupportedName(header.to_string())) .to_string(); return build_response( request.http_version(), StatusCode::BadRequest, MediaType::PlainText, Body::new(error_msg), ); } let uri = request.uri().get_abs_path(); // Sanitize the URI into a strict json path. let json_path = sanitize_uri(uri.to_string()); // Only accept PUT requests towards TOKEN_PATH. if json_path != PATH_TO_TOKEN { let error_msg = VmmMmdsError::ResourceNotFound(String::from(uri)).to_string(); return build_response( request.http_version(), StatusCode::NotFound, MediaType::PlainText, Body::new(error_msg), ); } // Get token lifetime value. let (header, ttl_seconds) = match get_header_value_pair( custom_headers, &[ X_METADATA_TOKEN_TTL_SECONDS_HEADER, X_AWS_EC2_METADATA_TOKEN_SSL_SECONDS_HEADER, ], ) { // Header found Some((header, value)) => match value.parse::() { Ok(ttl_seconds) => (header, ttl_seconds), Err(_) => { return build_response( request.http_version(), StatusCode::BadRequest, MediaType::PlainText, Body::new( RequestError::HeaderError(HttpHeaderError::InvalidValue( header.into(), value.into(), )) .to_string(), ), ); } }, // Header not found None => { return build_response( request.http_version(), StatusCode::BadRequest, MediaType::PlainText, Body::new(VmmMmdsError::NoTtlProvided.to_string()), ); } }; // Generate token. let result = mmds.generate_token(ttl_seconds); match result { Ok(token) => { let mut response = build_response( request.http_version(), StatusCode::OK, MediaType::PlainText, Body::new(token), ); let custom_headers = [(header.into(), ttl_seconds.to_string())].into(); // Safe to unwrap because the header name and the value are valid as US-ASCII. // - `header` is either `X_METADATA_TOKEN_TTL_SECONDS_HEADER` or // `X_AWS_EC2_METADATA_TOKEN_SSL_SECONDS_HEADER`. // - `ttl_seconds` is a decimal number between `MIN_TOKEN_TTL_SECONDS` and // `MAX_TOKEN_TTL_SECONDS`. response.set_custom_headers(&custom_headers).unwrap(); response } Err(err) => build_response( request.http_version(), StatusCode::BadRequest, MediaType::PlainText, Body::new(err.to_string()), ), } } #[cfg(test)] mod tests { use std::time::Duration; use super::*; use crate::mmds::token::{MAX_TOKEN_TTL_SECONDS, MIN_TOKEN_TTL_SECONDS}; fn populate_mmds() -> Arc> { let data = r#"{ "name": { "first": "John", "second": "Doe" }, "age": 43, "phones": { "home": { "RO": "+401234567", "UK": "+441234567" }, "mobile": "+442345678" } }"#; let mmds = Arc::new(Mutex::new(Mmds::default())); mmds.lock() .expect("Poisoned lock") .put_data(serde_json::from_str(data).unwrap()) .unwrap(); mmds } fn get_json_data() -> &'static str { r#"{ "age": 43, "name": { "first": "John", "second": "Doe" }, "phones": { "home": { "RO": "+401234567", "UK": "+441234567" }, "mobile": "+442345678" } }"# } fn get_plain_text_data() -> &'static str { "age\nname/\nphones/" } fn generate_request_and_expected_response( request_bytes: &[u8], media_type: MediaType, ) -> (Request, Response) { let request = Request::try_from(request_bytes, None).unwrap(); let mut response = Response::new(Version::Http10, StatusCode::OK); response.set_content_type(media_type); let body = match media_type { MediaType::ApplicationJson => { let mut body = get_json_data().to_string(); body.retain(|c| !c.is_whitespace()); body } MediaType::PlainText => get_plain_text_data().to_string(), }; response.set_body(Body::new(body)); (request, response) } #[test] fn test_sanitize_uri() { let sanitized = "/a/b/c/d"; assert_eq!(sanitize_uri("/a/b/c/d".to_owned()), sanitized); assert_eq!(sanitize_uri("/a////b/c//d".to_owned()), sanitized); assert_eq!(sanitize_uri("/a///b/c///d".to_owned()), sanitized); assert_eq!(sanitize_uri("/a//b/c////d".to_owned()), sanitized); assert_eq!(sanitize_uri("///////a//b///c//d".to_owned()), sanitized); assert_eq!(sanitize_uri("a".to_owned()), "a"); assert_eq!(sanitize_uri("a/".to_owned()), "a/"); assert_eq!(sanitize_uri("aa//".to_owned()), "aa/"); assert_eq!(sanitize_uri("aa".to_owned()), "aa"); assert_eq!(sanitize_uri("/".to_owned()), "/"); assert_eq!(sanitize_uri("".to_owned()), ""); assert_eq!(sanitize_uri("////".to_owned()), "/"); assert_eq!(sanitize_uri("aa//bb///cc//d".to_owned()), "aa/bb/cc/d"); assert_eq!(sanitize_uri("//aa//bb///cc//d".to_owned()), "/aa/bb/cc/d"); } #[test] fn test_request_accept_header() { // This test validates the response `Content-Type` header and the response content for // various request `Accept` headers. // Populate MMDS with data. let mmds = populate_mmds(); // Test without `Accept` header. micro-http defaults to `Accept: text/plain`. let (request, expected_response) = generate_request_and_expected_response( b"GET http://169.254.169.254/ HTTP/1.0\r\n\r\n", MediaType::PlainText, ); assert_eq!( convert_to_response(mmds.clone(), request), expected_response ); // Test with empty `Accept` header. micro-http defaults to `Accept: text/plain`. let (request, expected_response) = generate_request_and_expected_response( b"GET http://169.254.169.254/ HTTP/1.0\r\n\" Accept:\r\n\r\n", MediaType::PlainText, ); assert_eq!( convert_to_response(mmds.clone(), request), expected_response ); // Test with `Accept: */*` header. let (request, expected_response) = generate_request_and_expected_response( b"GET http://169.254.169.254/ HTTP/1.0\r\n\" Accept: */*\r\n\r\n", MediaType::PlainText, ); assert_eq!( convert_to_response(mmds.clone(), request), expected_response ); // Test with `Accept: text/plain`. let (request, expected_response) = generate_request_and_expected_response( b"GET http://169.254.169.254/ HTTP/1.0\r\n\ Accept: text/plain\r\n\r\n", MediaType::PlainText, ); assert_eq!( convert_to_response(mmds.clone(), request), expected_response ); // Test with `Accept: application/json`. let (request, expected_response) = generate_request_and_expected_response( b"GET http://169.254.169.254/ HTTP/1.0\r\n\ Accept: application/json\r\n\r\n", MediaType::ApplicationJson, ); assert_eq!(convert_to_response(mmds, request), expected_response); } // Test the version-independent error paths of `convert_to_response()`. #[test] fn test_convert_to_response_negative() { for version in [MmdsVersion::V1, MmdsVersion::V2] { let mmds = populate_mmds(); mmds.lock().expect("Poisoned lock").set_version(version); // Test InvalidURI (empty absolute path). let request = Request::try_from(b"GET http:// HTTP/1.0\r\n\r\n", None).unwrap(); let mut expected_response = Response::new(Version::Http10, StatusCode::BadRequest); expected_response.set_content_type(MediaType::PlainText); expected_response.set_body(Body::new(VmmMmdsError::InvalidURI.to_string())); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); // Test MethodNotAllowed (PATCH method). let request = Request::try_from(b"PATCH http://169.254.169.255/ HTTP/1.0\r\n\r\n", None).unwrap(); let mut expected_response = Response::new(Version::Http10, StatusCode::MethodNotAllowed); expected_response.set_content_type(MediaType::PlainText); expected_response.set_body(Body::new(VmmMmdsError::MethodNotAllowed.to_string())); expected_response.allow_method(Method::Get); expected_response.allow_method(Method::Put); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); } } #[test] fn test_respond_to_request_mmdsv1() { let mmds = populate_mmds(); mmds.lock() .expect("Poisoned lock") .set_version(MmdsVersion::V1); // Test valid v1 GET request. let (request, expected_response) = generate_request_and_expected_response( b"GET http://169.254.169.254/ HTTP/1.0\r\n\ Accept: application/json\r\n\r\n", MediaType::ApplicationJson, ); let prev_rx_invalid_token = METRICS.mmds.rx_invalid_token.count(); let prev_rx_no_token = METRICS.mmds.rx_no_token.count(); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); assert_eq!(prev_rx_invalid_token, METRICS.mmds.rx_invalid_token.count()); assert_eq!(prev_rx_no_token + 1, METRICS.mmds.rx_no_token.count()); // Test valid PUT request to generate a valid token. let request = Request::try_from( b"PUT http://169.254.169.254/latest/api/token HTTP/1.0\r\n\ X-metadata-token-ttl-seconds: 60\r\n\r\n", None, ) .unwrap(); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response.status(), StatusCode::OK); assert_eq!(actual_response.content_type(), MediaType::PlainText); let valid_token = String::from_utf8(actual_response.body().unwrap().body).unwrap(); // Test valid v2 GET request. #[rustfmt::skip] let (request, expected_response) = generate_request_and_expected_response( format!( "GET http://169.254.169.254/ HTTP/1.0\r\n\ Accept: application/json\r\n\ X-metadata-token: {valid_token}\r\n\r\n", ) .as_bytes(), MediaType::ApplicationJson, ); let prev_rx_invalid_token = METRICS.mmds.rx_invalid_token.count(); let prev_rx_no_token = METRICS.mmds.rx_no_token.count(); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); assert_eq!(prev_rx_invalid_token, METRICS.mmds.rx_invalid_token.count()); assert_eq!(prev_rx_no_token, METRICS.mmds.rx_no_token.count()); // Test GET request with invalid token is accepted when v1 is configured. let (request, expected_response) = generate_request_and_expected_response( b"GET http://169.254.169.254/ HTTP/1.0\r\n\ Accept: application/json\r\n\ X-metadata-token: INVALID_TOKEN\r\n\r\n", MediaType::ApplicationJson, ); let prev_rx_invalid_token = METRICS.mmds.rx_invalid_token.count(); let prev_rx_no_token = METRICS.mmds.rx_no_token.count(); let actual_response = convert_to_response(mmds, request); assert_eq!(actual_response, expected_response); assert_eq!( prev_rx_invalid_token + 1, METRICS.mmds.rx_invalid_token.count() ); assert_eq!(prev_rx_no_token, METRICS.mmds.rx_no_token.count()); } #[test] fn test_respond_to_request_mmdsv2() { let mmds = populate_mmds(); mmds.lock() .expect("Poisoned lock") .set_version(MmdsVersion::V2); // Test valid PUT to generate a valid token. let request = Request::try_from( b"PUT http://169.254.169.254/latest/api/token HTTP/1.0\r\n\ X-metadata-token-ttl-seconds: 60\r\n\r\n", None, ) .unwrap(); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response.status(), StatusCode::OK); assert_eq!(actual_response.content_type(), MediaType::PlainText); let valid_token = String::from_utf8(actual_response.body().unwrap().body).unwrap(); // Test valid GET. #[rustfmt::skip] let (request, expected_response) = generate_request_and_expected_response( format!( "GET http://169.254.169.254/ HTTP/1.0\r\n\ Accept: application/json\r\n\ X-metadata-token: {valid_token}\r\n\r\n", ) .as_bytes(), MediaType::ApplicationJson, ); let prev_rx_invalid_token = METRICS.mmds.rx_invalid_token.count(); let prev_rx_no_token = METRICS.mmds.rx_no_token.count(); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); assert_eq!(prev_rx_invalid_token, METRICS.mmds.rx_invalid_token.count()); assert_eq!(prev_rx_no_token, METRICS.mmds.rx_no_token.count()); // Test GET request without token should return Unauthorized status code. let request = Request::try_from(b"GET http://169.254.169.254/ HTTP/1.0\r\n\r\n", None).unwrap(); let mut expected_response = Response::new(Version::Http10, StatusCode::Unauthorized); expected_response.set_content_type(MediaType::PlainText); expected_response.set_body(Body::new(VmmMmdsError::NoTokenProvided.to_string())); let prev_rx_no_token = METRICS.mmds.rx_no_token.count(); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); assert_eq!(prev_rx_no_token + 1, METRICS.mmds.rx_no_token.count()); // Create an expired token. let request = Request::try_from( b"PUT http://169.254.169.254/latest/api/token HTTP/1.0\r\n\ X-metadata-token-ttl-seconds: 1\r\n\r\n", None, ) .unwrap(); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response.status(), StatusCode::OK); assert_eq!(actual_response.content_type(), MediaType::PlainText); let expired_token = String::from_utf8(actual_response.body().unwrap().body).unwrap(); std::thread::sleep(Duration::from_secs(1)); // Test GET request with invalid tokens. let tokens = ["INVALID_TOKEN", &expired_token]; for token in tokens.iter() { #[rustfmt::skip] let request = Request::try_from( format!( "GET http://169.254.169.254/ HTTP/1.0\r\n\ X-metadata-token: {token}\r\n\r\n", ) .as_bytes(), None, ) .unwrap(); let mut expected_response = Response::new(Version::Http10, StatusCode::Unauthorized); expected_response.set_content_type(MediaType::PlainText); expected_response.set_body(Body::new(VmmMmdsError::InvalidToken.to_string())); let prev_rx_invalid_token = METRICS.mmds.rx_invalid_token.count(); let prev_rx_no_token = METRICS.mmds.rx_no_token.count(); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); assert_eq!( prev_rx_invalid_token + 1, METRICS.mmds.rx_invalid_token.count() ); assert_eq!(prev_rx_no_token, METRICS.mmds.rx_no_token.count()); } } // Test the version-independent parts of GET request #[test] fn test_respond_to_get_request() { for version in [MmdsVersion::V1, MmdsVersion::V2] { let mmds = populate_mmds(); mmds.lock().expect("Poisoned lock").set_version(version); // Generate a token let request = Request::try_from( b"PUT http://169.254.169.254/latest/api/token HTTP/1.0\r\n\ X-metadata-token-ttl-seconds: 60\r\n\r\n", None, ) .unwrap(); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response.status(), StatusCode::OK); assert_eq!(actual_response.content_type(), MediaType::PlainText); let valid_token = String::from_utf8(actual_response.body().unwrap().body).unwrap(); // Test invalid path #[rustfmt::skip] let request = Request::try_from( format!( "GET http://169.254.169.254/invalid HTTP/1.0\r\n\ X-metadata-token: {valid_token}\r\n\r\n", ) .as_bytes(), None, ) .unwrap(); let mut expected_response = Response::new(Version::Http10, StatusCode::NotFound); expected_response.set_content_type(MediaType::PlainText); expected_response.set_body(Body::new( VmmMmdsError::ResourceNotFound(String::from("/invalid")).to_string(), )); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); // Test unsupported type #[rustfmt::skip] let request = Request::try_from( format!( "GET /age HTTP/1.1\r\n\ X-metadata-token: {valid_token}\r\n\r\n", ) .as_bytes(), None, ) .unwrap(); let mut expected_response = Response::new(Version::Http11, StatusCode::NotImplemented); expected_response.set_content_type(MediaType::PlainText); let body = "Cannot retrieve value. The value has an unsupported type.".to_string(); expected_response.set_body(Body::new(body)); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); // Test invalid `X-metadata-token-ttl-seconds` value is ignored if not PUT request. #[rustfmt::skip] let (request, expected_response) = generate_request_and_expected_response( format!( "GET http://169.254.169.254/ HTTP/1.0\r\n\ X-metadata-token: {valid_token}\r\n\ X-metadata-token-ttl-seconds: application/json\r\n\r\n", ) .as_bytes(), MediaType::PlainText, ); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); } } // Test PUT request (version-independent) #[test] fn test_respond_to_put_request() { for version in [MmdsVersion::V1, MmdsVersion::V2] { let mmds = populate_mmds(); mmds.lock().expect("Poisoned lock").set_version(version); // Test valid PUT let request = Request::try_from( b"PUT http://169.254.169.254/latest/api/token HTTP/1.0\r\n\ X-metadata-token-ttl-seconds: 60\r\n\r\n", None, ) .unwrap(); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response.status(), StatusCode::OK); assert_eq!(actual_response.content_type(), MediaType::PlainText); assert_eq!( actual_response .custom_headers() .get("X-metadata-token-ttl-seconds") .unwrap(), "60" ); // Test unsupported `X-Forwarded-For` header for header in ["X-Forwarded-For", "x-forwarded-for", "X-fOrWaRdEd-FoR"] { #[rustfmt::skip] let request = Request::try_from( format!( "PUT http://169.254.169.254/latest/api/token HTTP/1.0\r\n\ {header}: 203.0.113.195\r\n\r\n" ) .as_bytes(), None, ) .unwrap(); let mut expected_response = Response::new(Version::Http10, StatusCode::BadRequest); expected_response.set_content_type(MediaType::PlainText); expected_response.set_body(Body::new(format!( "Invalid header. Reason: Unsupported header name. Key: {header}" ))); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); } // Test invalid path let request = Request::try_from( b"PUT http://169.254.169.254/token HTTP/1.0\r\n\ X-metadata-token-ttl-seconds: 60\r\n\r\n", None, ) .unwrap(); let mut expected_response = Response::new(Version::Http10, StatusCode::NotFound); expected_response.set_content_type(MediaType::PlainText); expected_response.set_body(Body::new( VmmMmdsError::ResourceNotFound(String::from("/token")).to_string(), )); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); // Test non-numeric `X-metadata-token-ttl-seconds` value let request = Request::try_from( b"PUT http://169.254.169.254/latest/api/token HTTP/1.0\r\n\ X-metadata-token-ttl-seconds: application/json\r\n\r\n", None, ) .unwrap(); let mut expected_response = Response::new(Version::Http10, StatusCode::BadRequest); expected_response.set_content_type(MediaType::PlainText); #[rustfmt::skip] expected_response.set_body(Body::new( "Invalid header. Reason: Invalid value. \ Key:X-metadata-token-ttl-seconds; Value:application/json" .to_string(), )); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); // Test out-of-range `X-metadata-token-ttl-seconds` value let invalid_values = [MIN_TOKEN_TTL_SECONDS - 1, MAX_TOKEN_TTL_SECONDS + 1]; for invalid_value in invalid_values.iter() { #[rustfmt::skip] let request = Request::try_from( format!( "PUT http://169.254.169.254/latest/api/token HTTP/1.0\r\n\ X-metadata-token-ttl-seconds: {invalid_value}\r\n\r\n", ) .as_bytes(), None, ) .unwrap(); let mut expected_response = Response::new(Version::Http10, StatusCode::BadRequest); expected_response.set_content_type(MediaType::PlainText); #[rustfmt::skip] let error_msg = format!( "Invalid time to live value provided for token: {invalid_value}. \ Please provide a value between {MIN_TOKEN_TTL_SECONDS} and {MAX_TOKEN_TTL_SECONDS}.", ); expected_response.set_body(Body::new(error_msg)); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); } // Test lack of `X-metadata-token-ttl-seconds` header let request = Request::try_from( b"PUT http://169.254.169.254/latest/api/token HTTP/1.0\r\n\r\n", None, ) .unwrap(); let mut expected_response = Response::new(Version::Http10, StatusCode::BadRequest); expected_response.set_content_type(MediaType::PlainText); expected_response.set_body(Body::new(VmmMmdsError::NoTtlProvided.to_string())); let actual_response = convert_to_response(mmds.clone(), request); assert_eq!(actual_response, expected_response); } } #[test] fn test_json_patch() { let mut data = serde_json::json!({ "name": { "first": "John", "second": "Doe" }, "age": "43", "phones": { "home": { "RO": "+40 1234567", "UK": "+44 1234567" }, "mobile": "+44 2345678" } }); let patch = serde_json::json!({ "name": { "second": null, "last": "Kennedy" }, "age": "44", "phones": { "home": "+44 1234567", "mobile": { "RO": "+40 2345678", "UK": "+44 2345678" } } }); json_patch(&mut data, &patch); // Test value replacement in target document. assert_eq!(data["age"], patch["age"]); // Test null value removal from target document. assert_eq!(data["name"]["second"], Value::Null); // Test add value to target document. assert_eq!(data["name"]["last"], patch["name"]["last"]); assert!(!data["phones"]["home"].is_object()); assert_eq!(data["phones"]["home"], patch["phones"]["home"]); assert!(data["phones"]["mobile"].is_object()); assert_eq!( data["phones"]["mobile"]["RO"], patch["phones"]["mobile"]["RO"] ); assert_eq!( data["phones"]["mobile"]["UK"], patch["phones"]["mobile"]["UK"] ); } #[test] fn test_error_display() { assert_eq!( VmmMmdsError::InvalidToken.to_string(), "MMDS token not valid." ); assert_eq!(VmmMmdsError::InvalidURI.to_string(), "Invalid URI."); assert_eq!( VmmMmdsError::MethodNotAllowed.to_string(), "Not allowed HTTP method." ); assert_eq!( VmmMmdsError::NoTokenProvided.to_string(), "No MMDS token provided. Use `X-metadata-token` or `X-aws-ec2-metadata-token` header \ to specify the session token." ); assert_eq!( VmmMmdsError::NoTtlProvided.to_string(), "Token time to live value not found. Use `X-metadata-token-ttl-seconds` or \ `X-aws-ec2-metadata-token-ttl-seconds` header to specify the token's lifetime." ); assert_eq!( VmmMmdsError::ResourceNotFound(String::from("invalid/")).to_string(), "Resource not found: invalid/." ) } } ================================================ FILE: src/vmm/src/mmds/ns.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // TODO: get rid of this when splitting dumbo into public and internal parts. #![allow(missing_docs)] use std::convert::From; use std::net::Ipv4Addr; use std::num::NonZeroUsize; use std::str::FromStr; use std::sync::{Arc, Mutex}; use utils::time::timestamp_cycles; use crate::dumbo::pdu::Incomplete; use crate::dumbo::pdu::arp::{ ArpError as ArpFrameError, ETH_IPV4_FRAME_LEN, EthIPv4ArpFrame, test_speculative_tpa, }; use crate::dumbo::pdu::ethernet::{ ETHERTYPE_ARP, ETHERTYPE_IPV4, EthernetError as EthernetFrameError, EthernetFrame, }; use crate::dumbo::pdu::ipv4::{ IPv4Packet, Ipv4Error as IPv4PacketError, PROTOCOL_TCP, test_speculative_dst_addr, }; use crate::dumbo::pdu::tcp::TcpError as TcpSegmentError; use crate::dumbo::tcp::NextSegmentStatus; use crate::dumbo::tcp::handler::{RecvEvent, TcpIPv4Handler, WriteEvent, WriteNextError}; use crate::logger::{IncMetric, METRICS}; use crate::mmds::data_store::Mmds; use crate::utils::net::mac::MacAddr; const DEFAULT_MAC_ADDR: &str = "06:01:23:45:67:01"; const DEFAULT_IPV4_ADDR: [u8; 4] = [169, 254, 169, 254]; const DEFAULT_TCP_PORT: u16 = 80; const DEFAULT_MAX_CONNECTIONS: usize = 30; const DEFAULT_MAX_PENDING_RESETS: usize = 100; #[derive(Debug, PartialEq, thiserror::Error, displaydoc::Display)] enum WriteArpFrameError { /// NoPendingArpReply NoPendingArpReply, /// ARP error: {0} Arp(#[from] ArpFrameError), /// Ethernet error: {0} Ethernet(#[from] EthernetFrameError), } #[derive(Debug, PartialEq, thiserror::Error, displaydoc::Display)] enum WritePacketError { /// IPv4Packet error: {0} IPv4Packet(#[from] IPv4PacketError), /// Ethernet error: {0} Ethernet(#[from] EthernetFrameError), /// TcpSegment error: {0} TcpSegment(#[from] TcpSegmentError), /// WriteNext error: {0} WriteNext(#[from] WriteNextError), } #[derive(Debug)] pub struct MmdsNetworkStack { // Network interface MAC address used by frames/packets heading to MMDS server. remote_mac_addr: MacAddr, // The Ethernet MAC address of the MMDS server. pub(crate) mac_addr: MacAddr, // MMDS server IPv4 address. pub ipv4_addr: Ipv4Addr, // ARP reply destination IPv4 address (requester of address resolution reply). // It is the Ipv4Addr of the network interface for which the MmdsNetworkStack // routes the packets. pending_arp_reply_dest: Option, // This handles MMDS<->guest interaction at the TCP level. pub(crate) tcp_handler: TcpIPv4Handler, // Data store reference shared across all MmdsNetworkStack instances. pub mmds: Arc>, } impl MmdsNetworkStack { pub fn new( mac_addr: MacAddr, ipv4_addr: Ipv4Addr, tcp_port: u16, mmds: Arc>, ) -> Self { MmdsNetworkStack { remote_mac_addr: mac_addr, mac_addr, ipv4_addr, pending_arp_reply_dest: None, tcp_handler: TcpIPv4Handler::new( ipv4_addr, tcp_port, NonZeroUsize::new(DEFAULT_MAX_CONNECTIONS).unwrap(), NonZeroUsize::new(DEFAULT_MAX_PENDING_RESETS).unwrap(), ), mmds, } } pub fn new_with_defaults(mmds_ipv4_addr: Option, mmds: Arc>) -> Self { let mac_addr = MacAddr::from_str(DEFAULT_MAC_ADDR).unwrap(); let ipv4_addr = mmds_ipv4_addr.unwrap_or_else(|| Ipv4Addr::from(DEFAULT_IPV4_ADDR)); // The unwrap()s are safe because the given literals are greater than 0. Self::new(mac_addr, ipv4_addr, DEFAULT_TCP_PORT, mmds) } pub fn set_ipv4_addr(&mut self, ipv4_addr: Ipv4Addr) { self.ipv4_addr = ipv4_addr; self.tcp_handler.set_local_ipv4_addr(ipv4_addr); } pub fn ipv4_addr(&self) -> Ipv4Addr { self.ipv4_addr } pub fn default_ipv4_addr() -> Ipv4Addr { Ipv4Addr::from(DEFAULT_IPV4_ADDR) } /// Check if a frame is destined for `mmds` /// /// This returns `true` if the frame is an ARP or IPv4 frame destined for /// the `mmds` service, or `false` otherwise. It does not consume the frame. pub fn is_mmds_frame(&self, src: &[u8]) -> bool { if let Ok(eth) = EthernetFrame::from_bytes(src) { match eth.ethertype() { ETHERTYPE_ARP => test_speculative_tpa(src, self.ipv4_addr), ETHERTYPE_IPV4 => test_speculative_dst_addr(src, self.ipv4_addr), _ => false, } } else { false } } /// Handles a frame destined for `mmds` /// /// It assumes that the frame is indeed destined for `mmds`, so the caller /// must make a call to `is_mmds_frame` to ensure that. /// /// # Returns /// /// `true` if the frame was consumed by `mmds` or `false` if an error occured pub fn detour_frame(&mut self, src: &[u8]) -> bool { if let Ok(eth) = EthernetFrame::from_bytes(src) { match eth.ethertype() { ETHERTYPE_ARP => return self.detour_arp(eth), ETHERTYPE_IPV4 => return self.detour_ipv4(eth), _ => (), } } else { METRICS.mmds.rx_bad_eth.inc(); } false } fn detour_arp(&mut self, eth: EthernetFrame<&[u8]>) -> bool { if let Ok(arp) = EthIPv4ArpFrame::request_from_bytes(eth.payload()) { self.remote_mac_addr = arp.sha(); self.pending_arp_reply_dest = Some(arp.spa()); return true; } false } fn detour_ipv4(&mut self, eth: EthernetFrame<&[u8]>) -> bool { // TODO: We skip verifying the checksum, just in case the device model relies on offloading // checksum computation from the guest driver to some other entity. Clear up this entire // context at some point! if let Ok(ip) = IPv4Packet::from_bytes(eth.payload(), false) { if ip.protocol() == PROTOCOL_TCP { // Note-1: `remote_mac_address` is actually the network device mac address, where // this TCP segment came from. // Note-2: For every routed packet we will have a single source MAC address, because // each MmdsNetworkStack routes packets for only one network device. self.remote_mac_addr = eth.src_mac(); let mmds_instance = self.mmds.clone(); match &mut self.tcp_handler.receive_packet(&ip, move |request| { super::convert_to_response(mmds_instance, request) }) { Ok(event) => { METRICS.mmds.rx_count.inc(); match event { RecvEvent::NewConnectionSuccessful => { METRICS.mmds.connections_created.inc() } RecvEvent::NewConnectionReplacing => { METRICS.mmds.connections_created.inc(); METRICS.mmds.connections_destroyed.inc(); } RecvEvent::EndpointDone => { METRICS.mmds.connections_destroyed.inc(); } _ => (), } } Err(_) => METRICS.mmds.rx_accepted_err.inc(), } } else { // A non-TCP IPv4 packet heading towards the MMDS; we consider it unusual. METRICS.mmds.rx_accepted_unusual.inc(); } return true; } false } // Allows the MMDS network stack to write a frame to the specified buffer. Will return: // - None, if the MMDS network stack has no frame to send at this point. The buffer can be // used for something else by the device model. // - Some(len), if a frame of the given length has been written to the specified buffer. pub fn write_next_frame(&mut self, buf: &mut [u8]) -> Option { // We try to send ARP replies first. if self.pending_arp_reply_dest.is_some() { return match self.write_arp_reply(buf) { Ok(something) => { METRICS.mmds.tx_count.inc(); self.pending_arp_reply_dest = None; something } Err(_) => { METRICS.mmds.tx_errors.inc(); None } }; } else { let call_write = match self.tcp_handler.next_segment_status() { NextSegmentStatus::Available => true, NextSegmentStatus::Timeout(value) => timestamp_cycles() >= value, NextSegmentStatus::Nothing => false, }; if call_write { return match self.write_packet(buf) { Ok(something) => { METRICS.mmds.tx_count.inc(); something } Err(_) => { METRICS.mmds.tx_errors.inc(); None } }; } } None } fn prepare_eth_unsized<'a>( &self, buf: &'a mut [u8], ethertype: u16, ) -> Result>, EthernetFrameError> { EthernetFrame::write_incomplete(buf, self.remote_mac_addr, self.mac_addr, ethertype) } fn write_arp_reply(&self, buf: &mut [u8]) -> Result, WriteArpFrameError> { let arp_reply_dest = self .pending_arp_reply_dest .ok_or(WriteArpFrameError::NoPendingArpReply)?; let mut eth_unsized = self.prepare_eth_unsized(buf, ETHERTYPE_ARP)?; let arp_len = EthIPv4ArpFrame::write_reply( eth_unsized .inner_mut() .payload_mut() .split_at_mut(ETH_IPV4_FRAME_LEN) .0, self.mac_addr, self.ipv4_addr, self.remote_mac_addr, arp_reply_dest, )? .len(); Ok(Some( // The unwrap() is safe because arp_len > 0. NonZeroUsize::new(eth_unsized.with_payload_len_unchecked(arp_len).len()).unwrap(), )) } fn write_packet(&mut self, buf: &mut [u8]) -> Result, WritePacketError> { let mut eth_unsized = self.prepare_eth_unsized(buf, ETHERTYPE_IPV4)?; let (maybe_len, event) = self .tcp_handler .write_next_packet(eth_unsized.inner_mut().payload_mut())?; if let WriteEvent::EndpointDone = event { METRICS.mmds.connections_destroyed.inc() } if let Some(packet_len) = maybe_len { return Ok(Some( // The unwrap() is safe because packet_len > 0. NonZeroUsize::new( eth_unsized .with_payload_len_unchecked(packet_len.get()) .len(), ) .unwrap(), )); } Ok(None) } } #[cfg(test)] mod tests { use std::str::FromStr; use super::*; use crate::dumbo::pdu::tcp::{Flags as TcpFlags, TcpSegment}; // We use LOCALHOST here because const new() is not stable yet, so just reuse this const, since // all we're interested in is having some address different from the MMDS one. const REMOTE_ADDR: Ipv4Addr = Ipv4Addr::LOCALHOST; const REMOTE_MAC_STR: &str = "11:11:11:22:22:22"; const MMDS_PORT: u16 = 80; const REMOTE_PORT: u16 = 1235; const SEQ_NUMBER: u32 = 123; // Helper methods which only make sense for testing. impl MmdsNetworkStack { fn write_arp_request(&mut self, buf: &mut [u8], for_mmds: bool) -> usize { // Write a reply and then modify it into a request. self.pending_arp_reply_dest = Some(REMOTE_ADDR); let len = self.write_arp_reply(buf).unwrap().unwrap().get(); self.pending_arp_reply_dest = None; let mut eth = EthernetFrame::from_bytes_unchecked(&mut buf[..len]); let mut arp = EthIPv4ArpFrame::from_bytes_unchecked(eth.payload_mut()); // Set the operation to REQUEST. arp.set_operation(1); arp.set_sha(MacAddr::from_str(REMOTE_MAC_STR).unwrap()); arp.set_spa(REMOTE_ADDR); // The tpa remains REMOTE_ADDR otherwise, and is thus invalid for the MMDS. if for_mmds { arp.set_tpa(self.ipv4_addr); } len } fn write_incoming_tcp_segment( &self, buf: &mut [u8], addr: Ipv4Addr, flags: TcpFlags, ) -> usize { let mut eth_unsized = self.prepare_eth_unsized(buf, ETHERTYPE_IPV4).unwrap(); let packet_len = { let mut packet = IPv4Packet::write_header( eth_unsized.inner_mut().payload_mut(), PROTOCOL_TCP, REMOTE_ADDR, addr, ) .unwrap(); let segment_len = TcpSegment::write_incomplete_segment::<[u8]>( packet.inner_mut().payload_mut(), SEQ_NUMBER, 1234, flags, 10000, None, 0, None, ) .unwrap() .finalize(REMOTE_PORT, MMDS_PORT, Some((REMOTE_ADDR, addr))) .len(); packet.with_payload_len_unchecked(segment_len, true).len() }; eth_unsized.with_payload_len_unchecked(packet_len).len() } fn next_frame_as_ipv4_packet<'a>(&mut self, buf: &'a mut [u8]) -> IPv4Packet<'_, &'a [u8]> { let len = self.write_next_frame(buf).unwrap().get(); let eth = EthernetFrame::from_bytes(&buf[..len]).unwrap(); IPv4Packet::from_bytes(&buf[eth.payload_offset()..len], true).unwrap() } } #[test] fn test_ns_new_with_defaults() { let ns = MmdsNetworkStack::new_with_defaults(None, Arc::new(Mutex::new(Mmds::default()))); assert_eq!(ns.mac_addr, MacAddr::from_str(DEFAULT_MAC_ADDR).unwrap()); assert_eq!(ns.ipv4_addr, Ipv4Addr::from(DEFAULT_IPV4_ADDR)); let ns = MmdsNetworkStack::new_with_defaults( Some(Ipv4Addr::LOCALHOST), Arc::new(Mutex::new(Mmds::default())), ); assert_eq!(ns.mac_addr, MacAddr::from_str(DEFAULT_MAC_ADDR).unwrap()); assert_eq!(ns.ipv4_addr, Ipv4Addr::LOCALHOST); } #[test] #[allow(clippy::cognitive_complexity)] fn test_ns() { let mut ns = MmdsNetworkStack::new_with_defaults(None, Arc::new(Mutex::new(Mmds::default()))); let mut buf = [0u8; 2000]; let mut bad_buf = [0u8; 1]; let remote_mac = MacAddr::from_str(REMOTE_MAC_STR).unwrap(); let mmds_addr = ns.ipv4_addr; let bad_mmds_addr = Ipv4Addr::from_str("1.2.3.4").unwrap(); // Buffer is too small. assert!(!ns.is_mmds_frame(&bad_buf)); assert!(!ns.detour_frame(bad_buf.as_ref())); // There's nothing to send right now. assert!(ns.write_next_frame(buf.as_mut()).is_none()); { let len = ns.write_arp_request(buf.as_mut(), false); // Not asking for MMDS MAC address. assert!(!ns.is_mmds_frame(&buf[..len])); // There's still nothing to send. assert!(ns.write_next_frame(buf.as_mut()).is_none()); } { let len = ns.write_arp_request(buf.as_mut(), true); // Asking for MMDS MAC address. assert!(ns.detour_frame(&buf[..len])); assert_eq!(ns.remote_mac_addr, remote_mac); } // There should be an ARP reply to send. { // Buffer is too small. assert!(ns.write_next_frame(bad_buf.as_mut()).is_none()); let curr_tx_count = METRICS.mmds.tx_count.count(); let len = ns.write_next_frame(buf.as_mut()).unwrap().get(); assert_eq!(curr_tx_count + 1, METRICS.mmds.tx_count.count()); let eth = EthernetFrame::from_bytes(&buf[..len]).unwrap(); let arp_reply = EthIPv4ArpFrame::from_bytes_unchecked(eth.payload()); // REPLY = 2 assert_eq!(arp_reply.operation(), 2); assert_eq!(arp_reply.sha(), ns.mac_addr); assert_eq!(arp_reply.spa(), ns.ipv4_addr); assert_eq!(arp_reply.tha(), ns.remote_mac_addr); assert_eq!(arp_reply.tpa(), REMOTE_ADDR); } // Nothing to send anymore. assert!(ns.write_next_frame(buf.as_mut()).is_none()); // Let's send a TCP segment which will be rejected, because it's heading to the wrong // address. { let len = ns.write_incoming_tcp_segment(buf.as_mut(), bad_mmds_addr, TcpFlags::ACK); assert!(!ns.is_mmds_frame(&buf[..len])); // Nothing to send in response. assert!(ns.write_next_frame(buf.as_mut()).is_none()); } // Let's send a TCP segment which will cause a RST to come out of the inner TCP handler. { let len = ns.write_incoming_tcp_segment(buf.as_mut(), mmds_addr, TcpFlags::ACK); let curr_rx_count = METRICS.mmds.rx_count.count(); assert!(ns.detour_frame(&buf[..len])); assert_eq!(curr_rx_count + 1, METRICS.mmds.rx_count.count()); } // Let's check we actually get a RST when writing the next frame. { assert!(ns.write_next_frame(bad_buf.as_mut()).is_none()); let ip = ns.next_frame_as_ipv4_packet(buf.as_mut()); assert_eq!(ip.source_address(), mmds_addr); assert_eq!(ip.destination_address(), REMOTE_ADDR); let s = TcpSegment::from_bytes( ip.payload(), Some((ip.source_address(), ip.destination_address())), ) .unwrap(); assert_eq!(s.flags_after_ns(), TcpFlags::RST); assert_eq!(s.source_port(), MMDS_PORT); assert_eq!(s.destination_port(), REMOTE_PORT); } // Nothing else to send. assert!(ns.write_next_frame(buf.as_mut()).is_none()); // Let's send a TCP SYN into the ns. { let len = ns.write_incoming_tcp_segment(buf.as_mut(), mmds_addr, TcpFlags::SYN); assert!(ns.detour_frame(&buf[..len])); } // We should be getting a SYNACK out of the ns in response. { let ip = ns.next_frame_as_ipv4_packet(buf.as_mut()); assert_eq!(ip.source_address(), mmds_addr); assert_eq!(ip.destination_address(), REMOTE_ADDR); let s = TcpSegment::from_bytes( ip.payload(), Some((ip.source_address(), ip.destination_address())), ) .unwrap(); assert_eq!(s.flags_after_ns(), TcpFlags::SYN | TcpFlags::ACK); assert_eq!(s.source_port(), MMDS_PORT); assert_eq!(s.destination_port(), REMOTE_PORT); assert_eq!(s.ack_number(), SEQ_NUMBER.wrapping_add(1)); } // Nothing else to send. assert!(ns.write_next_frame(buf.as_mut()).is_none()); } #[test] fn test_set_ipv4_addr() { let mut ns = MmdsNetworkStack::new_with_defaults(None, Arc::new(Mutex::new(Mmds::default()))); assert_ne!(ns.ipv4_addr, Ipv4Addr::LOCALHOST); assert_ne!(ns.tcp_handler.local_ipv4_addr(), Ipv4Addr::LOCALHOST); ns.set_ipv4_addr(Ipv4Addr::LOCALHOST); assert_eq!(ns.ipv4_addr, Ipv4Addr::LOCALHOST); assert_eq!(ns.tcp_handler.local_ipv4_addr(), Ipv4Addr::LOCALHOST); } #[test] fn test_default_ipv4_addr() { let actual = MmdsNetworkStack::default_ipv4_addr(); let expected = Ipv4Addr::from(DEFAULT_IPV4_ADDR); assert_eq!(actual, expected); } #[test] fn test_break_speculative_check_detour_arp() { let mut buf = [0u8; 2000]; let ip = Ipv4Addr::from(DEFAULT_IPV4_ADDR); let other_ip = Ipv4Addr::new(5, 6, 7, 8); let mac = MacAddr::from_bytes_unchecked(&[0; 6]); let mut ns = MmdsNetworkStack::new_with_defaults(Some(ip), Arc::new(Mutex::new(Mmds::default()))); let mut eth = EthernetFrame::write_incomplete(buf.as_mut(), mac, mac, ETHERTYPE_ARP).unwrap(); let mut arp = EthIPv4ArpFrame::from_bytes_unchecked(eth.inner_mut().payload_mut()); arp.set_tpa(other_ip); let len = ns.write_arp_request(buf.as_mut(), false); eth = EthernetFrame::write_incomplete(buf.as_mut(), mac, mac, ETHERTYPE_ARP).unwrap(); IPv4Packet::from_bytes_unchecked(eth.inner_mut().payload_mut()).set_destination_address(ip); assert!(!ns.is_mmds_frame(&buf[..len])); } #[test] fn test_break_speculative_check_detour_ipv4() { let mut buf = [0u8; 2000]; let ip = Ipv4Addr::from(DEFAULT_IPV4_ADDR); let other_ip = Ipv4Addr::new(5, 6, 7, 8); let mac = MacAddr::from_bytes_unchecked(&[0; 6]); let ns = MmdsNetworkStack::new_with_defaults(Some(ip), Arc::new(Mutex::new(Mmds::default()))); let mut eth = EthernetFrame::write_incomplete(buf.as_mut(), mac, mac, ETHERTYPE_IPV4).unwrap(); IPv4Packet::from_bytes_unchecked(eth.inner_mut().payload_mut()) .set_destination_address(other_ip); let len = ns.write_incoming_tcp_segment(buf.as_mut(), other_ip, TcpFlags::SYN); eth = EthernetFrame::write_incomplete(buf.as_mut(), mac, mac, ETHERTYPE_IPV4).unwrap(); let mut arp = EthIPv4ArpFrame::from_bytes_unchecked(eth.inner_mut().payload_mut()); arp.set_tpa(ip); assert!(!ns.is_mmds_frame(&buf[..len])); } #[test] fn test_wrong_ethertype() { let mut buf = [0u8; 2000]; let ip = Ipv4Addr::from(DEFAULT_IPV4_ADDR); let other_ip = Ipv4Addr::new(5, 6, 7, 8); let mac = MacAddr::from_bytes_unchecked(&[0; 6]); let mut ns = MmdsNetworkStack::new_with_defaults(Some(ip), Arc::new(Mutex::new(Mmds::default()))); // try IPv4 with detour_arp let mut eth = EthernetFrame::write_incomplete(buf.as_mut(), mac, mac, ETHERTYPE_IPV4).unwrap(); IPv4Packet::from_bytes_unchecked(eth.inner_mut().payload_mut()) .set_destination_address(other_ip); let len = ns.write_incoming_tcp_segment(buf.as_mut(), other_ip, TcpFlags::SYN); eth = EthernetFrame::write_incomplete(buf.as_mut(), mac, mac, ETHERTYPE_IPV4).unwrap(); let mut arp = EthIPv4ArpFrame::from_bytes_unchecked(eth.inner_mut().payload_mut()); arp.set_tpa(ip); assert!(ns.detour_ipv4(EthernetFrame::from_bytes(&buf[..len]).unwrap())); assert!(!ns.detour_arp(EthernetFrame::from_bytes(&buf[..len]).unwrap())); // try IPv4 with detour_arp let mut eth = EthernetFrame::write_incomplete(buf.as_mut(), mac, mac, ETHERTYPE_ARP).unwrap(); let mut arp = EthIPv4ArpFrame::from_bytes_unchecked(eth.inner_mut().payload_mut()); arp.set_tpa(other_ip); let len = ns.write_arp_request(buf.as_mut(), false); eth = EthernetFrame::write_incomplete(buf.as_mut(), mac, mac, ETHERTYPE_ARP).unwrap(); IPv4Packet::from_bytes_unchecked(eth.inner_mut().payload_mut()).set_destination_address(ip); assert!(ns.detour_arp(EthernetFrame::from_bytes(&buf[..len]).unwrap())); assert!(!ns.detour_ipv4(EthernetFrame::from_bytes(&buf[..len]).unwrap())); } } ================================================ FILE: src/vmm/src/mmds/persist.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the structures needed for saving/restoring MmdsNetworkStack. use std::net::Ipv4Addr; use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; use super::ns::MmdsNetworkStack; use crate::mmds::data_store::Mmds; use crate::snapshot::Persist; use crate::utils::net::mac::{MAC_ADDR_LEN, MacAddr}; /// State of a MmdsNetworkStack. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct MmdsNetworkStackState { mac_addr: [u8; MAC_ADDR_LEN as usize], ipv4_addr: u32, tcp_port: u16, } impl Persist<'_> for MmdsNetworkStack { type State = MmdsNetworkStackState; type ConstructorArgs = Arc>; type Error = (); fn save(&self) -> Self::State { let mut mac_addr = [0; MAC_ADDR_LEN as usize]; mac_addr.copy_from_slice(self.mac_addr.get_bytes()); MmdsNetworkStackState { mac_addr, ipv4_addr: self.ipv4_addr.into(), tcp_port: self.tcp_handler.local_port(), } } fn restore(mmds: Self::ConstructorArgs, state: &Self::State) -> Result { Ok(MmdsNetworkStack::new( MacAddr::from_bytes_unchecked(&state.mac_addr), Ipv4Addr::from(state.ipv4_addr), state.tcp_port, mmds, )) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_persistence() { let ns = MmdsNetworkStack::new_with_defaults(None, Arc::new(Mutex::new(Mmds::default()))); let ns_state = ns.save(); let serialized_data = bitcode::serialize(&ns_state).unwrap(); let restored_state = bitcode::deserialize(&serialized_data).unwrap(); let restored_ns = MmdsNetworkStack::restore(Arc::new(Mutex::new(Mmds::default())), &restored_state) .unwrap(); assert_eq!(restored_ns.mac_addr, ns.mac_addr); assert_eq!(restored_ns.ipv4_addr, ns.ipv4_addr); assert_eq!( restored_ns.tcp_handler.local_port(), ns.tcp_handler.local_port() ); } } ================================================ FILE: src/vmm/src/mmds/token.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::convert::TryInto; use std::fmt; use std::ops::Add; use aws_lc_rs::aead::{AES_256_GCM, Aad, Nonce, RandomizedNonceKey}; use base64::Engine; use utils::time::{ClockType, get_time_ms}; use zerocopy::{FromBytes, Immutable, IntoBytes}; /// Length of initialization vector. pub const IV_LEN: usize = 12; /// Length of the key used for encryption. pub const KEY_LEN: usize = 32; /// Length of encryption payload. pub const PAYLOAD_LEN: usize = std::mem::size_of::(); /// Length of encryption tag. pub const TAG_LEN: usize = 16; /// Constant to convert seconds to milliseconds. pub const MILLISECONDS_PER_SECOND: u64 = 1_000; /// Minimum lifetime of token. pub const MIN_TOKEN_TTL_SECONDS: u32 = 1; /// Maximum lifetime of token. pub const MAX_TOKEN_TTL_SECONDS: u32 = 21600; /// Path to token. pub const PATH_TO_TOKEN: &str = "/latest/api/token"; /// Token length limit to ensure we don't bother decrypting huge character /// sequences. Tokens larger than this are automatically rejected. The value /// is computed based on the expected length of the base64 encoded Token struct /// including a small deviation. const TOKEN_LENGTH_LIMIT: usize = 70; #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MmdsTokenError { /// Failed to generate a key KeyGeneration, /// Failed to extract expiry value from token. ExpiryExtraction, /// Invalid time to live value provided for token: {0}. Please provide a value between {MIN_TOKEN_TTL_SECONDS:} and {MAX_TOKEN_TTL_SECONDS:}. InvalidTtlValue(u32), /// Failed to encrypt token. TokenEncryption, } pub struct TokenAuthority { cipher: RandomizedNonceKey, // Number of tokens encrypted under the current key. num_encrypted_tokens: u32, // Additional Authentication Data used for encryption and decryption. aad: String, } // TODO When https://github.com/RustCrypto/AEADs/pull/532 is merged replace these manual // implementation with `#[derive(Debug)]`. impl fmt::Debug for TokenAuthority { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("TokenAuthority") .field("num_encrypted_tokens", &self.num_encrypted_tokens) .field("aad", &self.aad) .finish() } } impl TokenAuthority { /// Create a new token authority entity. pub fn try_new() -> Result { Ok(TokenAuthority { cipher: TokenAuthority::create_cipher()?, num_encrypted_tokens: 0, aad: "".to_string(), }) } /// Set Additional Authenticated Data to be used for /// encryption and decryption of the session token. pub fn set_aad(&mut self, instance_id: &str) { self.aad = format!("microvmid={}", instance_id); } /// Generate encoded token string using the token time to live provided. pub fn generate_token_secret(&mut self, ttl_seconds: u32) -> Result { // Check number of tokens encrypted under the current key. We need to // make sure no more than 2^32 tokens are encrypted with the same key. // If this number is reached, we need to reinitialize the cipher entity. self.check_encryption_count()?; // Create token structure containing the encrypted expiry value. let token = self.create_token(ttl_seconds)?; // Encode struct into base64 in order to obtain token string. let encoded_token = token.base64_encode(); // Increase the count of encrypted tokens. self.num_encrypted_tokens += 1; Ok(encoded_token) } /// Create a new Token structure to encrypt. fn create_token(&mut self, ttl_seconds: u32) -> Result { // Validate token time to live against bounds. if !TokenAuthority::check_ttl(ttl_seconds) { return Err(MmdsTokenError::InvalidTtlValue(ttl_seconds)); } // Compute expiration time in milliseconds from ttl. let expiry = TokenAuthority::compute_expiry(ttl_seconds); // Encrypt expiry (RandomizedNonceKey generates nonce automatically). self.encrypt_expiry(expiry) } /// Encrypt expiry using AES-GCM block cipher and return token obtained. fn encrypt_expiry(&self, expiry: u64) -> Result { // Convert expiry u64 value into bytes. let mut expiry_as_bytes = expiry.to_le_bytes(); let aad = Aad::from(self.aad.as_bytes()); let (nonce, tag) = self .cipher .seal_in_place_separate_tag(aad, &mut expiry_as_bytes) .map_err(|_| MmdsTokenError::TokenEncryption)?; // Tag must be of size `TAG_LEN`. let tag_as_bytes: [u8; TAG_LEN] = tag .as_ref() .try_into() .map_err(|_| MmdsTokenError::TokenEncryption)?; Ok(Token::new(*nonce.as_ref(), expiry_as_bytes, tag_as_bytes)) } /// Attempts to decrypt expiry value within token sequence. Returns false if expiry /// cannot be decrypted. If decryption succeeds, returns true if token has not expired /// (i.e. current time is greater than expiry) and false otherwise. pub fn is_valid(&self, encoded_token: &str) -> bool { // Check size of encoded token struct. if encoded_token.len() > TOKEN_LENGTH_LIMIT { return false; } // Decode token struct from base64. let token = match Token::base64_decode(encoded_token) { Ok(token) => token, Err(_) => return false, }; // Decrypt ttl using AES-GCM block cipher. let expiry = match self.decrypt_expiry(&token) { Ok(expiry) => expiry, Err(_) => return false, }; // Compare expiry (in ms) with current time in milliseconds. expiry > get_time_ms(ClockType::Monotonic) } /// Decrypt ciphertext composed of payload and tag to obtain the expiry value. fn decrypt_expiry(&self, token: &Token) -> Result { // Create Nonce object from initialization vector. let nonce = Nonce::assume_unique_for_key(token.iv); let aad = Aad::from(self.aad.as_bytes()); // Combine payload and tag for aws-lc-rs let mut ciphertext_and_tag = [0; PAYLOAD_LEN + TAG_LEN]; ciphertext_and_tag[..PAYLOAD_LEN].copy_from_slice(&token.payload); ciphertext_and_tag[PAYLOAD_LEN..].copy_from_slice(&token.tag); // Decrypt in place let plaintext = self .cipher .open_in_place(nonce, aad, &mut ciphertext_and_tag) .map_err(|_| MmdsTokenError::ExpiryExtraction)?; let expiry_as_bytes: [u8; PAYLOAD_LEN] = plaintext .try_into() .map_err(|_| MmdsTokenError::ExpiryExtraction)?; // Return expiry value in seconds. Ok(u64::from_le_bytes(expiry_as_bytes)) } /// Create a new AES-GCM cipher entity. fn create_cipher() -> Result { // Randomly generate a 256-bit key to be used for encryption/decryption purposes. let mut key = [0u8; KEY_LEN]; aws_lc_rs::rand::fill(&mut key).map_err(|_| MmdsTokenError::KeyGeneration)?; // Create cipher entity to handle encryption/decryption. RandomizedNonceKey::new(&AES_256_GCM, &key).map_err(|_| MmdsTokenError::KeyGeneration) } /// Make sure to reinitialize the cipher under a new key before reaching /// a count of 2^32 encrypted tokens under the same cipher entity. fn check_encryption_count(&mut self) -> Result<(), MmdsTokenError> { // Make sure no more than 2^32 - 1 tokens are encrypted under // the same encryption key. if self.num_encrypted_tokens == u32::MAX { // Reinitialize the cipher entity under a new key when limit is exceeded. // As a result, all valid tokens created under the previous key are invalidated. // By design, we don't retain the cipher used to encrypt previous tokens, // because reaching the limit is very unlikely and should not happen under // healthy interactions with MMDS. However, if it happens, we expect the // customer code to have a retry mechanism in place and regenerate the // session token if the previous ones become invalid. self.cipher = TokenAuthority::create_cipher()?; // Reset encrypted tokens count. self.num_encrypted_tokens = 0; crate::logger::warn!( "The limit of tokens generated under current MMDS token authority has been reached. MMDS's token authority entity has been reseeded and all previously created tokens are now invalid." ); } Ok(()) } /// Validate the token time to live against bounds. fn check_ttl(ttl_seconds: u32) -> bool { (MIN_TOKEN_TTL_SECONDS..=MAX_TOKEN_TTL_SECONDS).contains(&ttl_seconds) } /// Compute expiry time in seconds by adding the time to live provided /// to the current time measured in milliseconds. fn compute_expiry(ttl_as_seconds: u32) -> u64 { // Get current time in milliseconds. let now_as_milliseconds = get_time_ms(ClockType::Monotonic); // Compute expiry by adding ttl value converted to milliseconds // to current time (also in milliseconds). This addition is safe // because ttl is verified beforehand and can never be more than // 6h (21_600_000 ms). now_as_milliseconds.add(u64::from(ttl_as_seconds) * MILLISECONDS_PER_SECOND) } } /// Structure for token information. #[derive(Clone, Debug, FromBytes, Immutable, IntoBytes, PartialEq)] #[repr(C)] struct Token { // Nonce or Initialization Vector. iv: [u8; IV_LEN], // Encrypted expire time. payload: [u8; PAYLOAD_LEN], // Tag returned after encryption. tag: [u8; TAG_LEN], } impl Token { /// Create a new token struct. fn new(iv: [u8; IV_LEN], payload: [u8; PAYLOAD_LEN], tag: [u8; TAG_LEN]) -> Self { Token { iv, payload, tag } } /// Encode token structure into a string using base64 encoding. fn base64_encode(&self) -> String { base64::engine::general_purpose::STANDARD.encode(self.as_bytes()) } /// Decode token structure from base64 string. fn base64_decode(encoded_token: &str) -> Result { let bytes = base64::engine::general_purpose::STANDARD .decode(encoded_token) .map_err(|_| MmdsTokenError::ExpiryExtraction)?; Self::read_from_bytes(&bytes).map_err(|_| MmdsTokenError::ExpiryExtraction) } } #[cfg(test)] mod tests { use std::thread::sleep; use std::time::Duration; use super::*; #[test] fn test_check_tll() { // Test invalid time to live values. assert!(!TokenAuthority::check_ttl(MIN_TOKEN_TTL_SECONDS - 1)); assert!(!TokenAuthority::check_ttl(MAX_TOKEN_TTL_SECONDS + 1)); // Test time to live value within bounds. assert!(TokenAuthority::check_ttl(MIN_TOKEN_TTL_SECONDS)); assert!(TokenAuthority::check_ttl(MAX_TOKEN_TTL_SECONDS / 2)); assert!(TokenAuthority::check_ttl(MAX_TOKEN_TTL_SECONDS)); } #[test] fn test_set_aad() { let mut token_authority = TokenAuthority::try_new().unwrap(); assert_eq!(token_authority.aad, "".to_string()); token_authority.set_aad("foo"); assert_eq!(token_authority.aad, "microvmid=foo".to_string()); } #[test] fn test_create_token() { let mut token_authority = TokenAuthority::try_new().unwrap(); // Test invalid time to live value. assert_eq!( token_authority.create_token(0).unwrap_err().to_string(), format!( "Invalid time to live value provided for token: 0. Please provide a value between \ {} and {}.", MIN_TOKEN_TTL_SECONDS, MAX_TOKEN_TTL_SECONDS ) ); // Test valid time to live value. let token = token_authority.create_token(1).unwrap(); assert_eq!(token.iv.len(), IV_LEN); assert_eq!(token.payload.len(), PAYLOAD_LEN); assert_eq!(token.tag.len(), TAG_LEN); } #[test] fn test_compute_expiry() { let time_now = get_time_ms(ClockType::Monotonic); let expiry = TokenAuthority::compute_expiry(1); let ttl = expiry - time_now; // We allow a deviation of 20ms to account for the gap // between the two calls to `get_time_ms()`. let deviation = 20; assert!( ttl >= MILLISECONDS_PER_SECOND && ttl <= MILLISECONDS_PER_SECOND + deviation, "ttl={ttl} not within [{MILLISECONDS_PER_SECOND}, \ {MILLISECONDS_PER_SECOND}+{deviation}]", ); let time_now = get_time_ms(ClockType::Monotonic); let expiry = TokenAuthority::compute_expiry(0); let ttl = expiry - time_now; assert!(ttl <= deviation, "ttl={ttl} is greater than {deviation}"); } #[test] fn test_encrypt_decrypt() { let mut token_authority = TokenAuthority::try_new().unwrap(); let expiry = TokenAuthority::compute_expiry(10); // Test valid ciphertext. let token = token_authority.encrypt_expiry(expiry).unwrap(); let decrypted_expiry = token_authority.decrypt_expiry(&token).unwrap(); assert_eq!(expiry, decrypted_expiry); // Test ciphertext with corrupted payload. let mut bad_token = token.clone(); bad_token.payload[0] = u8::MAX - bad_token.payload[0]; assert!(matches!( token_authority.decrypt_expiry(&bad_token).unwrap_err(), MmdsTokenError::ExpiryExtraction )); // Test ciphertext with corrupted tag. let mut bad_token = token.clone(); bad_token.tag[0] = u8::MAX - bad_token.tag[0]; assert!(matches!( token_authority.decrypt_expiry(&bad_token).unwrap_err(), MmdsTokenError::ExpiryExtraction )); // Test decrypting expiry under a different AAD than it was encrypted with. token_authority.set_aad("foo"); assert!(matches!( token_authority.decrypt_expiry(&token).unwrap_err(), MmdsTokenError::ExpiryExtraction )); } #[test] fn test_encode_decode() { let expected_token = Token::new([0u8; IV_LEN], [0u8; PAYLOAD_LEN], [0u8; TAG_LEN]); let mut encoded_token = expected_token.base64_encode(); let actual_token = Token::base64_decode(&encoded_token).unwrap(); assert_eq!(actual_token, expected_token); // Decode invalid base64 bytes sequence. encoded_token.push('x'); Token::base64_decode(&encoded_token).unwrap_err(); } #[test] fn test_generate_token_secret() { let mut token_authority = TokenAuthority::try_new().unwrap(); // Test time to live value too small. assert_eq!( token_authority .generate_token_secret(MIN_TOKEN_TTL_SECONDS - 1) .unwrap_err() .to_string(), format!( "Invalid time to live value provided for token: {}. Please provide a value \ between {} and {}.", MIN_TOKEN_TTL_SECONDS - 1, MIN_TOKEN_TTL_SECONDS, MAX_TOKEN_TTL_SECONDS ) ); // Test time to live value too big. assert_eq!( token_authority .generate_token_secret(MAX_TOKEN_TTL_SECONDS + 1) .unwrap_err() .to_string(), format!( "Invalid time to live value provided for token: {}. Please provide a value \ between {} and {}.", MAX_TOKEN_TTL_SECONDS + 1, MIN_TOKEN_TTL_SECONDS, MAX_TOKEN_TTL_SECONDS ) ); // Generate token with lifespan of 60 seconds. let _ = token_authority.generate_token_secret(60).unwrap(); assert_eq!(token_authority.num_encrypted_tokens, 1); } #[test] fn test_is_valid() { let mut token_authority = TokenAuthority::try_new().unwrap(); // Test token with size bigger than expected. assert!(!token_authority.is_valid(str::repeat("a", TOKEN_LENGTH_LIMIT + 1).as_str())); // Test valid token. let token0 = token_authority.generate_token_secret(1).unwrap(); assert!(token_authority.is_valid(&token0)); } #[test] fn test_token_authority() { let mut token_authority = TokenAuthority::try_new().unwrap(); // Generate token with lifespan of 60 seconds. let token0 = token_authority.generate_token_secret(60).unwrap(); assert!(token_authority.is_valid(&token0)); // Generate token with lifespan of one second. let token1 = token_authority.generate_token_secret(1).unwrap(); assert_eq!(token_authority.num_encrypted_tokens, 2); assert!(token_authority.is_valid(&token1)); // Wait for `token1` to expire. sleep(Duration::new(1, 0)); assert!(!token_authority.is_valid(&token1)); // The first token should still be valid. assert!(token_authority.is_valid(&token0)); // Simulate reaching to a count of 2^32 encrypted tokens. // The cipher and count should reset at this point and previous // tokens should become invalid. token_authority.num_encrypted_tokens = u32::MAX; let token2 = token_authority.generate_token_secret(60).unwrap(); assert_eq!(token_authority.num_encrypted_tokens, 1); assert!(token_authority.is_valid(&token2)); assert!(!token_authority.is_valid(&token0)); assert!(!token_authority.is_valid(&token1)); } } ================================================ FILE: src/vmm/src/mmds/token_headers.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::HashMap; // `X-Forwarded-For` pub(crate) const X_FORWARDED_FOR_HEADER: &str = "x-forwarded-for"; // `X-metadata-token` pub(crate) const X_METADATA_TOKEN_HEADER: &str = "x-metadata-token"; // `X-aws-ec2-metadata-token` pub(crate) const X_AWS_EC2_METADATA_TOKEN_HEADER: &str = "x-aws-ec2-metadata-token"; // `X-metadata-token-ttl-seconds` pub(crate) const X_METADATA_TOKEN_TTL_SECONDS_HEADER: &str = "x-metadata-token-ttl-seconds"; // `X-aws-ec2-metadata-token-ttl-seconds` pub(crate) const X_AWS_EC2_METADATA_TOKEN_SSL_SECONDS_HEADER: &str = "x-aws-ec2-metadata-token-ttl-seconds"; pub(crate) fn get_header_value_pair<'a>( custom_headers: &'a HashMap, headers: &'a [&'static str], ) -> Option<(&'a String, &'a String)> { custom_headers .iter() .find(|(k, _)| headers.iter().any(|header| k.eq_ignore_ascii_case(header))) } #[cfg(test)] mod tests { use super::*; fn to_mixed_case(s: &str) -> String { s.chars() .enumerate() .map(|(i, c)| { if i % 2 == 0 { c.to_ascii_lowercase() } else { c.to_ascii_uppercase() } }) .collect() } #[test] fn test_get_header_value_pair() { let headers = [X_METADATA_TOKEN_HEADER, X_AWS_EC2_METADATA_TOKEN_HEADER]; // No custom headers let custom_headers = HashMap::default(); let token = get_header_value_pair(&custom_headers, &headers); assert!(token.is_none()); // Unrelated custom headers let custom_headers = HashMap::from([ ("Some-Header".into(), "10".into()), ("Another-Header".into(), "value".into()), ]); let token = get_header_value_pair(&custom_headers, &headers); assert!(token.is_none()); for header in headers { // Valid header let expected = "THIS_IS_TOKEN"; let custom_headers = HashMap::from([(header.into(), expected.into())]); let token = get_header_value_pair(&custom_headers, &headers).unwrap(); assert_eq!(token, (&header.into(), &expected.into())); // Valid header in unrelated custom headers let custom_headers = HashMap::from([ ("Some-Header".into(), "10".into()), ("Another-Header".into(), "value".into()), (header.into(), expected.into()), ]); let token = get_header_value_pair(&custom_headers, &headers).unwrap(); assert_eq!(token, (&header.into(), &expected.into())); // Test case-insensitiveness let header = to_mixed_case(header); let custom_headers = HashMap::from([(header.clone(), expected.into())]); let token = get_header_value_pair(&custom_headers, &headers).unwrap(); assert_eq!(token, (&header, &expected.into())); } } } ================================================ FILE: src/vmm/src/pci/bus.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright 2018 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause use std::collections::HashMap; use std::fmt::Debug; use std::ops::DerefMut; use std::sync::{Arc, Barrier, Mutex}; use byteorder::{ByteOrder, LittleEndian}; use pci::{PciBridgeSubclass, PciClassCode}; use crate::logger::error; use crate::pci::configuration::PciConfiguration; use crate::pci::{DeviceRelocation, PciDevice}; use crate::utils::u64_to_usize; use crate::vstate::bus::BusDevice; /// Errors for device manager. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum PciRootError { /// Could not find an available device slot on the PCI bus. NoPciDeviceSlotAvailable, } const VENDOR_ID_INTEL: u16 = 0x8086; const DEVICE_ID_INTEL_VIRT_PCIE_HOST: u16 = 0x0d57; const NUM_DEVICE_IDS: usize = 32; #[derive(Debug)] /// Emulates the PCI Root bridge device. pub struct PciRoot { /// Configuration space. config: PciConfiguration, } impl PciRoot { /// Create an empty PCI root bridge. pub fn new(config: Option) -> Self { if let Some(config) = config { PciRoot { config } } else { PciRoot { config: PciConfiguration::new_type0( VENDOR_ID_INTEL, DEVICE_ID_INTEL_VIRT_PCIE_HOST, 0, PciClassCode::BridgeDevice, &PciBridgeSubclass::HostBridge, 0, 0, None, ), } } } } impl BusDevice for PciRoot {} impl PciDevice for PciRoot { fn write_config_register( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> Option> { self.config.write_config_register(reg_idx, offset, data); None } fn read_config_register(&mut self, reg_idx: usize) -> u32 { self.config.read_reg(reg_idx) } } /// A PCI bus definition pub struct PciBus { /// Devices attached to this bus. /// Device 0 is host bridge. pub devices: HashMap>>, vm: Arc, device_ids: Vec, } impl Debug for PciBus { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("Root Firecracker PCI Bus") .field("device_ids", &self.device_ids) .finish() } } impl PciBus { /// Create a new PCI bus pub fn new(pci_root: PciRoot, vm: Arc) -> Self { let mut devices: HashMap>> = HashMap::new(); let mut device_ids: Vec = vec![false; NUM_DEVICE_IDS]; devices.insert(0, Arc::new(Mutex::new(pci_root))); device_ids[0] = true; PciBus { devices, vm, device_ids, } } /// Insert a device in the bus pub fn add_device(&mut self, device_id: u32, device: Arc>) { self.devices.insert(device_id, device); } /// Get a new device ID pub fn next_device_id(&mut self) -> Result { for (idx, device_id) in self.device_ids.iter_mut().enumerate() { if !(*device_id) { *device_id = true; return Ok(idx.try_into().unwrap()); } } Err(PciRootError::NoPciDeviceSlotAvailable) } } #[cfg(target_arch = "x86_64")] /// IO port used for configuring PCI over the legacy bus pub const PCI_CONFIG_IO_PORT: u64 = 0xcf8; #[cfg(target_arch = "x86_64")] /// Size of IO ports we are using to configure PCI over the legacy bus. We have two ports, 0xcf8 /// and 0xcfc 32bits long. pub const PCI_CONFIG_IO_PORT_SIZE: u64 = 0x8; /// Wrapper that allows handling PCI configuration over the legacy Bus #[derive(Debug)] pub struct PciConfigIo { /// Config space register. config_address: u32, pci_bus: Arc>, } impl PciConfigIo { /// New Port IO configuration handler pub fn new(pci_bus: Arc>) -> Self { PciConfigIo { config_address: 0, pci_bus, } } /// Handle a configuration space read over Port IO pub fn config_space_read(&self) -> u32 { let enabled = (self.config_address & 0x8000_0000) != 0; if !enabled { return 0xffff_ffff; } let (bus, device, function, register) = parse_io_config_address(self.config_address & !0x8000_0000); // Only support one bus. if bus != 0 { return 0xffff_ffff; } // Don't support multi-function devices. if function > 0 { return 0xffff_ffff; } // NOTE: Potential contention among vCPU threads on this lock. This should not // be a problem currently, since we mainly access this when we are setting up devices. // We might want to do some profiling to ensure this does not become a bottleneck. self.pci_bus .as_ref() .lock() .unwrap() .devices .get(&(device.try_into().unwrap())) .map_or(0xffff_ffff, |d| { d.lock().unwrap().read_config_register(register) }) } /// Handle a configuration space write over Port IO pub fn config_space_write(&mut self, offset: u64, data: &[u8]) -> Option> { if u64_to_usize(offset) + data.len() > 4 { return None; } let enabled = (self.config_address & 0x8000_0000) != 0; if !enabled { return None; } let (bus, device, function, register) = parse_io_config_address(self.config_address & !0x8000_0000); // Only support one bus. if bus != 0 { return None; } // Don't support multi-function devices. if function > 0 { return None; } // NOTE: Potential contention among vCPU threads on this lock. This should not // be a problem currently, since we mainly access this when we are setting up devices. // We might want to do some profiling to ensure this does not become a bottleneck. let pci_bus = self.pci_bus.as_ref().lock().unwrap(); if let Some(d) = pci_bus.devices.get(&(device.try_into().unwrap())) { let mut device = d.lock().unwrap(); // Find out if one of the device's BAR is being reprogrammed, and // reprogram it if needed. if let Some(params) = device.detect_bar_reprogramming(register, data) && let Err(e) = pci_bus.vm.move_bar( params.old_base, params.new_base, params.len, device.deref_mut(), ) { error!( "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", e, params.old_base, params.new_base, params.len ); } // Update the register value device.write_config_register(register, offset, data) } else { None } } fn set_config_address(&mut self, offset: u64, data: &[u8]) { if u64_to_usize(offset) + data.len() > 4 { return; } let (mask, value): (u32, u32) = match data.len() { 1 => ( 0x0000_00ff << (offset * 8), u32::from(data[0]) << (offset * 8), ), 2 => ( 0x0000_ffff << (offset * 8), ((u32::from(data[1]) << 8) | u32::from(data[0])) << (offset * 8), ), 4 => (0xffff_ffff, LittleEndian::read_u32(data)), _ => return, }; self.config_address = (self.config_address & !mask) | value; } } impl BusDevice for PciConfigIo { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { // Only allow reads to the register boundary. let start = u64_to_usize(offset) % 4; let end = start + data.len(); if end > 4 { for d in data.iter_mut() { *d = 0xff; } return; } // `offset` is relative to 0xcf8 let value = match offset { 0..=3 => self.config_address, 4..=7 => self.config_space_read(), _ => 0xffff_ffff, }; for i in start..end { data[i - start] = ((value >> (i * 8)) & 0xff) as u8; } } fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { // `offset` is relative to 0xcf8 match offset { o @ 0..=3 => { self.set_config_address(o, data); None } o @ 4..=7 => self.config_space_write(o - 4, data), _ => None, } } } #[derive(Debug)] /// Emulates PCI memory-mapped configuration access mechanism. pub struct PciConfigMmio { pci_bus: Arc>, } impl PciConfigMmio { /// New MMIO configuration handler object pub fn new(pci_bus: Arc>) -> Self { PciConfigMmio { pci_bus } } fn config_space_read(&self, config_address: u32) -> u32 { let (bus, device, function, register) = parse_mmio_config_address(config_address); // Only support one bus. if bus != 0 { return 0xffff_ffff; } // Don't support multi-function devices. if function > 0 { return 0xffff_ffff; } self.pci_bus .lock() .unwrap() .devices .get(&(device.try_into().unwrap())) .map_or(0xffff_ffff, |d| { d.lock().unwrap().read_config_register(register) }) } fn config_space_write(&mut self, config_address: u32, offset: u64, data: &[u8]) { if u64_to_usize(offset) + data.len() > 4 { return; } let (bus, device, function, register) = parse_mmio_config_address(config_address); // Only support one bus. if bus != 0 { return; } // Don't support multi-function devices. if function > 0 { return; } let pci_bus = self.pci_bus.lock().unwrap(); if let Some(d) = pci_bus.devices.get(&(device.try_into().unwrap())) { let mut device = d.lock().unwrap(); // Find out if one of the device's BAR is being reprogrammed, and // reprogram it if needed. if let Some(params) = device.detect_bar_reprogramming(register, data) && let Err(e) = pci_bus.vm.move_bar( params.old_base, params.new_base, params.len, device.deref_mut(), ) { error!( "Failed moving device BAR: {}: 0x{:x}->0x{:x}(0x{:x})", e, params.old_base, params.new_base, params.len ); } // Update the register value device.write_config_register(register, offset, data); } } } impl BusDevice for PciConfigMmio { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { // Only allow reads to the register boundary. let start = u64_to_usize(offset) % 4; let end = start + data.len(); if end > 4 || offset > u64::from(u32::MAX) { for d in data { *d = 0xff; } return; } let value = self.config_space_read(offset.try_into().unwrap()); for i in start..end { data[i - start] = ((value >> (i * 8)) & 0xff) as u8; } } fn write(&mut self, _base: u64, offset: u64, data: &[u8]) -> Option> { if offset > u64::from(u32::MAX) { return None; } self.config_space_write(offset.try_into().unwrap(), offset % 4, data); None } } fn shift_and_mask(value: u32, offset: usize, mask: u32) -> usize { ((value >> offset) & mask) as usize } // Parse the MMIO address offset to a (bus, device, function, register) tuple. // See section 7.2.2 PCI Express Enhanced Configuration Access Mechanism (ECAM) // from the Pci Express Base Specification Revision 5.0 Version 1.0. fn parse_mmio_config_address(config_address: u32) -> (usize, usize, usize, usize) { const BUS_NUMBER_OFFSET: usize = 20; const BUS_NUMBER_MASK: u32 = 0x00ff; const DEVICE_NUMBER_OFFSET: usize = 15; const DEVICE_NUMBER_MASK: u32 = 0x1f; const FUNCTION_NUMBER_OFFSET: usize = 12; const FUNCTION_NUMBER_MASK: u32 = 0x07; const REGISTER_NUMBER_OFFSET: usize = 2; const REGISTER_NUMBER_MASK: u32 = 0x3ff; ( shift_and_mask(config_address, BUS_NUMBER_OFFSET, BUS_NUMBER_MASK), shift_and_mask(config_address, DEVICE_NUMBER_OFFSET, DEVICE_NUMBER_MASK), shift_and_mask(config_address, FUNCTION_NUMBER_OFFSET, FUNCTION_NUMBER_MASK), shift_and_mask(config_address, REGISTER_NUMBER_OFFSET, REGISTER_NUMBER_MASK), ) } // Parse the CONFIG_ADDRESS register to a (bus, device, function, register) tuple. fn parse_io_config_address(config_address: u32) -> (usize, usize, usize, usize) { const BUS_NUMBER_OFFSET: usize = 16; const BUS_NUMBER_MASK: u32 = 0x00ff; const DEVICE_NUMBER_OFFSET: usize = 11; const DEVICE_NUMBER_MASK: u32 = 0x1f; const FUNCTION_NUMBER_OFFSET: usize = 8; const FUNCTION_NUMBER_MASK: u32 = 0x07; const REGISTER_NUMBER_OFFSET: usize = 2; const REGISTER_NUMBER_MASK: u32 = 0x3f; ( shift_and_mask(config_address, BUS_NUMBER_OFFSET, BUS_NUMBER_MASK), shift_and_mask(config_address, DEVICE_NUMBER_OFFSET, DEVICE_NUMBER_MASK), shift_and_mask(config_address, FUNCTION_NUMBER_OFFSET, FUNCTION_NUMBER_MASK), shift_and_mask(config_address, REGISTER_NUMBER_OFFSET, REGISTER_NUMBER_MASK), ) } #[cfg(test)] mod tests { use std::sync::atomic::AtomicUsize; use std::sync::{Arc, Mutex}; use pci::{PciClassCode, PciMassStorageSubclass}; use super::{PciBus, PciConfigIo, PciConfigMmio, PciRoot}; use crate::pci::bus::{DEVICE_ID_INTEL_VIRT_PCIE_HOST, VENDOR_ID_INTEL}; use crate::pci::configuration::PciConfiguration; use crate::pci::{BarReprogrammingParams, DeviceRelocation, DeviceRelocationError, PciDevice}; use crate::vstate::bus::BusDevice; #[derive(Debug, Default)] struct RelocationMock { reloc_cnt: AtomicUsize, } impl RelocationMock { fn cnt(&self) -> usize { self.reloc_cnt.load(std::sync::atomic::Ordering::SeqCst) } } impl DeviceRelocation for RelocationMock { fn move_bar( &self, _old_base: u64, _new_base: u64, _len: u64, _pci_dev: &mut dyn PciDevice, ) -> Result<(), DeviceRelocationError> { self.reloc_cnt .fetch_add(1, std::sync::atomic::Ordering::SeqCst); Ok(()) } } struct PciDevMock(PciConfiguration); impl PciDevMock { fn new() -> Self { let mut config = PciConfiguration::new_type0( 0x42, 0x0, 0x0, PciClassCode::MassStorage, &PciMassStorageSubclass::SerialScsiController, 0x13, 0x12, None, ); config.add_pci_bar(0, 0x1000, 0x1000); PciDevMock(config) } } impl PciDevice for PciDevMock { fn write_config_register( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> Option> { self.0.write_config_register(reg_idx, offset, data); None } fn read_config_register(&mut self, reg_idx: usize) -> u32 { self.0.read_reg(reg_idx) } fn detect_bar_reprogramming( &mut self, reg_idx: usize, data: &[u8], ) -> Option { self.0.detect_bar_reprogramming(reg_idx, data) } } #[test] fn test_writing_io_config_address() { let mock = Arc::new(RelocationMock::default()); let root = PciRoot::new(None); let mut bus = PciConfigIo::new(Arc::new(Mutex::new(PciBus::new(root, mock)))); assert_eq!(bus.config_address, 0); // Writing more than 32 bits will should fail bus.write(0, 0, &[0x42; 8]); assert_eq!(bus.config_address, 0); // Write all the address at once bus.write(0, 0, &[0x13, 0x12, 0x11, 0x10]); assert_eq!(bus.config_address, 0x10111213); // Not writing 32bits at offset 0 should have no effect bus.write(0, 1, &[0x0; 4]); assert_eq!(bus.config_address, 0x10111213); // Write two bytes at a time bus.write(0, 0, &[0x42, 0x42]); assert_eq!(bus.config_address, 0x10114242); bus.write(0, 1, &[0x43, 0x43]); assert_eq!(bus.config_address, 0x10434342); bus.write(0, 2, &[0x44, 0x44]); assert_eq!(bus.config_address, 0x44444342); // Writing two bytes at offset 3 should overflow, so it shouldn't have any effect bus.write(0, 3, &[0x45, 0x45]); assert_eq!(bus.config_address, 0x44444342); // Write one byte at a time bus.write(0, 0, &[0x0]); assert_eq!(bus.config_address, 0x44444300); bus.write(0, 1, &[0x0]); assert_eq!(bus.config_address, 0x44440000); bus.write(0, 2, &[0x0]); assert_eq!(bus.config_address, 0x44000000); bus.write(0, 3, &[0x0]); assert_eq!(bus.config_address, 0x00000000); // Writing past 4 bytes should have no effect bus.write(0, 4, &[0x13]); assert_eq!(bus.config_address, 0x0); } #[test] fn test_reading_io_config_address() { let mock = Arc::new(RelocationMock::default()); let root = PciRoot::new(None); let mut bus = PciConfigIo::new(Arc::new(Mutex::new(PciBus::new(root, mock)))); let mut buffer = [0u8; 4]; bus.config_address = 0x13121110; // First 4 bytes are the config address // Next 4 bytes are the values read from the configuration space. // // Reading past offset 7 should not return nothing (all 1s) bus.read(0, 8, &mut buffer); assert_eq!(buffer, [0xff; 4]); // offset + buffer.len() needs to be smaller or equal than 4 bus.read(0, 1, &mut buffer); assert_eq!(buffer, [0xff; 4]); bus.read(0, 2, &mut buffer[..3]); assert_eq!(buffer, [0xff; 4]); bus.read(0, 3, &mut buffer[..2]); assert_eq!(buffer, [0xff; 4]); // reading one byte at a time bus.read(0, 0, &mut buffer[0..1]); assert_eq!(buffer, [0x10, 0xff, 0xff, 0xff]); bus.read(0, 1, &mut buffer[1..2]); assert_eq!(buffer, [0x10, 0x11, 0xff, 0xff]); bus.read(0, 2, &mut buffer[2..3]); assert_eq!(buffer, [0x10, 0x11, 0x12, 0xff]); bus.read(0, 3, &mut buffer[3..4]); assert_eq!(buffer, [0x10, 0x11, 0x12, 0x13]); // reading two bytes at a time bus.config_address = 0x42434445; bus.read(0, 0, &mut buffer[..2]); assert_eq!(buffer, [0x45, 0x44, 0x12, 0x13]); bus.read(0, 1, &mut buffer[..2]); assert_eq!(buffer, [0x44, 0x43, 0x12, 0x13]); bus.read(0, 2, &mut buffer[..2]); assert_eq!(buffer, [0x43, 0x42, 0x12, 0x13]); // reading all of it at once bus.read(0, 0, &mut buffer); assert_eq!(buffer, [0x45, 0x44, 0x43, 0x42]); } fn initialize_bus() -> (PciConfigMmio, PciConfigIo, Arc) { let mock = Arc::new(RelocationMock::default()); let root = PciRoot::new(None); let mut bus = PciBus::new(root, mock.clone()); bus.add_device(1, Arc::new(Mutex::new(PciDevMock::new()))); let bus = Arc::new(Mutex::new(bus)); (PciConfigMmio::new(bus.clone()), PciConfigIo::new(bus), mock) } #[test] fn test_invalid_register_boundary_reads() { let (mut mmio_config, mut io_config, _) = initialize_bus(); // Read crossing register boundaries let mut buffer = [0u8; 4]; mmio_config.read(0, 1, &mut buffer); assert_eq!(0xffff_ffff, u32::from_le_bytes(buffer)); let mut buffer = [0u8; 4]; io_config.read(0, 1, &mut buffer); assert_eq!(0xffff_ffff, u32::from_le_bytes(buffer)); // As well in the config space let mut buffer = [0u8; 4]; io_config.read(0, 5, &mut buffer); assert_eq!(0xffff_ffff, u32::from_le_bytes(buffer)); } // MMIO config addresses are of the form // // | Base address upper bits | Bus Number | Device Number | Function Number | Register number | Byte offset | // | 31-28 | 27-20 | 19-15 | 14-12 | 11-2 | 0-1 | // // Meaning that the offset is built using: // // `bus << 20 | device << 15 | function << 12 | register << 2 | byte` fn mmio_offset(bus: u8, device: u8, function: u8, register: u16, byte: u8) -> u32 { assert!(device < 32); assert!(function < 8); assert!(register < 1024); assert!(byte < 4); (bus as u32) << 20 | (device as u32) << 15 | (function as u32) << 12 | (register as u32) << 2 | (byte as u32) } fn read_mmio_config( config: &mut PciConfigMmio, bus: u8, device: u8, function: u8, register: u16, byte: u8, data: &mut [u8], ) { config.read( 0, mmio_offset(bus, device, function, register, byte) as u64, data, ); } fn write_mmio_config( config: &mut PciConfigMmio, bus: u8, device: u8, function: u8, register: u16, byte: u8, data: &[u8], ) { config.write( 0, mmio_offset(bus, device, function, register, byte) as u64, data, ); } // Similarly, when using the IO mechanism the config addresses have the following format // // | Enabled | zeros | Bus Number | Device Number | Function Number | Register number | zeros | // | 31 | 30-24 | 23-16 | 15-11 | 10-8 | 7-2 | 1-0 | // // // Meaning that the address is built using: // // 0x8000_0000 | bus << 16 | device << 11 | function << 8 | register << 2; // // Only 32-bit aligned accesses are allowed here. fn pio_offset(enabled: bool, bus: u8, device: u8, function: u8, register: u8) -> u32 { assert!(device < 32); assert!(function < 8); assert!(register < 64); let offset = if enabled { 0x8000_0000 } else { 0u32 }; offset | (bus as u32) << 16 | (device as u32) << 11 | (function as u32) << 8 | (register as u32) << 2 } fn set_io_address( config: &mut PciConfigIo, enabled: bool, bus: u8, device: u8, function: u8, register: u8, ) { let address = u32::to_le_bytes(pio_offset(enabled, bus, device, function, register)); config.write(0, 0, &address); } fn read_io_config( config: &mut PciConfigIo, enabled: bool, bus: u8, device: u8, function: u8, register: u8, data: &mut [u8], ) { set_io_address(config, enabled, bus, device, function, register); config.read(0, 4, data); } fn write_io_config( config: &mut PciConfigIo, enabled: bool, bus: u8, device: u8, function: u8, register: u8, data: &[u8], ) { set_io_address(config, enabled, bus, device, function, register); config.write(0, 4, data); } #[test] fn test_mmio_invalid_bus_number() { let (mut mmio_config, _, _) = initialize_bus(); let mut buffer = [0u8; 4]; // Asking for Bus 1 should return all 1s read_mmio_config(&mut mmio_config, 1, 0, 0, 0, 0, &mut buffer); assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); // Writing the same buffer[0] = 0x42; write_mmio_config(&mut mmio_config, 1, 0, 0, 15, 0, &buffer); read_mmio_config(&mut mmio_config, 1, 0, 0, 15, 0, &mut buffer); assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); read_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &mut buffer); assert_eq!(buffer, u32::to_le_bytes(0x0)); // Asking for Bus 0 should work read_mmio_config(&mut mmio_config, 0, 0, 0, 0, 0, &mut buffer); assert_eq!(&buffer[..2], &u16::to_le_bytes(VENDOR_ID_INTEL)); assert_eq!( &buffer[2..], &u16::to_le_bytes(DEVICE_ID_INTEL_VIRT_PCIE_HOST) ); } #[test] fn test_io_invalid_bus_number() { let (_, mut pio_config, _) = initialize_bus(); let mut buffer = [0u8; 4]; // Asking for Bus 1 should return all 1s read_io_config(&mut pio_config, true, 1, 0, 0, 0, &mut buffer); assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); // Asking for Bus 0 should work read_io_config(&mut pio_config, true, 0, 0, 0, 0, &mut buffer); assert_eq!(&buffer[..2], &u16::to_le_bytes(VENDOR_ID_INTEL)); assert_eq!( &buffer[2..], &u16::to_le_bytes(DEVICE_ID_INTEL_VIRT_PCIE_HOST) ); } #[test] fn test_mmio_invalid_function() { let (mut mmio_config, _, _) = initialize_bus(); let mut buffer = [0u8; 4]; // Asking for Bus 1 should return all 1s read_mmio_config(&mut mmio_config, 0, 0, 1, 0, 0, &mut buffer); assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); // Writing the same buffer[0] = 0x42; write_mmio_config(&mut mmio_config, 0, 0, 1, 15, 0, &buffer); read_mmio_config(&mut mmio_config, 0, 0, 1, 15, 0, &mut buffer); assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); read_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &mut buffer); assert_eq!(buffer, u32::to_le_bytes(0x0)); // Asking for Bus 0 should work read_mmio_config(&mut mmio_config, 0, 0, 0, 0, 0, &mut buffer); assert_eq!(&buffer[..2], &u16::to_le_bytes(VENDOR_ID_INTEL)); assert_eq!( &buffer[2..], &u16::to_le_bytes(DEVICE_ID_INTEL_VIRT_PCIE_HOST) ); } #[test] fn test_io_invalid_function() { let (_, mut pio_config, _) = initialize_bus(); let mut buffer = [0u8; 4]; // Asking for Bus 1 should return all 1s read_io_config(&mut pio_config, true, 0, 0, 1, 0, &mut buffer); assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); // Asking for Bus 0 should work read_io_config(&mut pio_config, true, 0, 0, 0, 0, &mut buffer); assert_eq!(&buffer[..2], &u16::to_le_bytes(VENDOR_ID_INTEL)); assert_eq!( &buffer[2..], &u16::to_le_bytes(DEVICE_ID_INTEL_VIRT_PCIE_HOST) ); } #[test] fn test_io_disabled_reads() { let (_, mut pio_config, _) = initialize_bus(); let mut buffer = [0u8; 4]; // Trying to read without enabling should return all 1s read_io_config(&mut pio_config, false, 0, 0, 0, 0, &mut buffer); assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); // Asking for Bus 0 should work read_io_config(&mut pio_config, true, 0, 0, 0, 0, &mut buffer); assert_eq!(&buffer[..2], &u16::to_le_bytes(VENDOR_ID_INTEL)); assert_eq!( &buffer[2..], &u16::to_le_bytes(DEVICE_ID_INTEL_VIRT_PCIE_HOST) ); } #[test] fn test_io_disabled_writes() { let (_, mut pio_config, _) = initialize_bus(); // Try to write the IRQ line used for the root port. let mut buffer = [0u8; 4]; // First read the current value (use `enabled` bit) read_io_config(&mut pio_config, true, 0, 0, 0, 15, &mut buffer); let irq_line = buffer[0]; // Write without setting the `enabled` bit. buffer[0] = 0x42; write_io_config(&mut pio_config, false, 0, 0, 0, 15, &buffer); // IRQ line shouldn't have changed read_io_config(&mut pio_config, true, 0, 0, 0, 15, &mut buffer); assert_eq!(buffer[0], irq_line); // Write with `enabled` bit set. buffer[0] = 0x42; write_io_config(&mut pio_config, true, 0, 0, 0, 15, &buffer); // IRQ line should change read_io_config(&mut pio_config, true, 0, 0, 0, 15, &mut buffer); assert_eq!(buffer[0], 0x42); } #[test] fn test_mmio_writes() { let (mut mmio_config, _, _) = initialize_bus(); let mut buffer = [0u8; 4]; read_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &mut buffer); assert_eq!(buffer[0], 0x0); write_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &[0x42]); read_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &mut buffer); assert_eq!(buffer[0], 0x42); } #[test] fn test_bar_reprogramming() { let (mut mmio_config, _, mock) = initialize_bus(); let mut buffer = [0u8; 4]; assert_eq!(mock.cnt(), 0); read_mmio_config(&mut mmio_config, 0, 1, 0, 0x4, 0, &mut buffer); let old_addr = u32::from_le_bytes(buffer) & 0xffff_fff0; assert_eq!(old_addr, 0x1000); // Writing the lower 32bits first should not trigger any reprogramming write_mmio_config( &mut mmio_config, 0, 1, 0, 0x4, 0, &u32::to_le_bytes(0x1312_0000), ); read_mmio_config(&mut mmio_config, 0, 1, 0, 0x4, 0, &mut buffer); let new_addr = u32::from_le_bytes(buffer) & 0xffff_fff0; assert_eq!(new_addr, 0x1312_0000); assert_eq!(mock.cnt(), 0); // Writing the upper 32bits first should now trigger the reprogramming logic write_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &u32::to_le_bytes(0x1110)); read_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &mut buffer); let new_addr = u32::from_le_bytes(buffer); assert_eq!(new_addr, 0x1110); assert_eq!(mock.cnt(), 1); // BAR2 should not be used, so reading its address should return all 0s read_mmio_config(&mut mmio_config, 0, 1, 0, 0x6, 0, &mut buffer); assert_eq!(buffer, [0x0, 0x0, 0x0, 0x0]); // and reprogramming shouldn't have any effect write_mmio_config( &mut mmio_config, 0, 1, 0, 0x5, 0, &u32::to_le_bytes(0x1312_1110), ); read_mmio_config(&mut mmio_config, 0, 1, 0, 0x6, 0, &mut buffer); assert_eq!(buffer, [0x0, 0x0, 0x0, 0x0]); } } ================================================ FILE: src/vmm/src/pci/configuration.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright 2018 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause use std::sync::{Arc, Mutex}; use byteorder::{ByteOrder, LittleEndian}; use pci::{PciCapabilityId, PciClassCode, PciSubclass}; use serde::{Deserialize, Serialize}; use super::BarReprogrammingParams; use super::msix::MsixConfig; use crate::logger::{info, warn}; use crate::utils::u64_to_usize; // The number of 32bit registers in the config space, 4096 bytes. const NUM_CONFIGURATION_REGISTERS: usize = 1024; const STATUS_REG: usize = 1; const STATUS_REG_CAPABILITIES_USED_MASK: u32 = 0x0010_0000; const BAR0_REG: usize = 4; const ROM_BAR_REG: usize = 12; const BAR_MEM_ADDR_MASK: u32 = 0xffff_fff0; const ROM_BAR_ADDR_MASK: u32 = 0xffff_f800; const MSI_CAPABILITY_REGISTER_MASK: u32 = 0x0071_0000; const MSIX_CAPABILITY_REGISTER_MASK: u32 = 0xc000_0000; const NUM_BAR_REGS: usize = 6; const CAPABILITY_LIST_HEAD_OFFSET: usize = 0x34; const FIRST_CAPABILITY_OFFSET: usize = 0x40; const CAPABILITY_MAX_OFFSET: usize = 192; /// A PCI capability list. Devices can optionally specify capabilities in their configuration space. pub trait PciCapability { /// Bytes of the PCI capability fn bytes(&self) -> &[u8]; /// Id of the PCI capability fn id(&self) -> PciCapabilityId; } // This encodes the BAR size as expected by the software running inside the guest. // It assumes that bar_size is not 0 fn encode_64_bits_bar_size(bar_size: u64) -> (u32, u32) { assert_ne!(bar_size, 0); let result = !(bar_size - 1); let result_hi = (result >> 32) as u32; let result_lo = (result & 0xffff_ffff) as u32; (result_hi, result_lo) } // This decoes the BAR size from the value stored in the BAR registers. fn decode_64_bits_bar_size(bar_size_hi: u32, bar_size_lo: u32) -> u64 { let bar_size: u64 = ((bar_size_hi as u64) << 32) | (bar_size_lo as u64); let size = !bar_size + 1; assert_ne!(size, 0); size } #[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)] struct PciBar { addr: u32, size: u32, used: bool, } /// PCI configuration space state for (de)serialization #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PciConfigurationState { registers: Vec, writable_bits: Vec, bars: Vec, last_capability: Option<(usize, usize)>, msix_cap_reg_idx: Option, } #[derive(Debug)] /// Contains the configuration space of a PCI node. /// /// See the [specification](https://en.wikipedia.org/wiki/PCI_configuration_space). /// The configuration space is accessed with DWORD reads and writes from the guest. pub struct PciConfiguration { registers: [u32; NUM_CONFIGURATION_REGISTERS], writable_bits: [u32; NUM_CONFIGURATION_REGISTERS], // writable bits for each register. bars: [PciBar; NUM_BAR_REGS], // Contains the byte offset and size of the last capability. last_capability: Option<(usize, usize)>, msix_cap_reg_idx: Option, msix_config: Option>>, } impl PciConfiguration { #[allow(clippy::too_many_arguments)] /// Create a new type 0 PCI configuration pub fn new_type0( vendor_id: u16, device_id: u16, revision_id: u8, class_code: PciClassCode, subclass: &dyn PciSubclass, subsystem_vendor_id: u16, subsystem_id: u16, msix_config: Option>>, ) -> Self { let mut registers = [0u32; NUM_CONFIGURATION_REGISTERS]; let mut writable_bits = [0u32; NUM_CONFIGURATION_REGISTERS]; registers[0] = (u32::from(device_id) << 16) | u32::from(vendor_id); // TODO(dverkamp): Status should be write-1-to-clear writable_bits[1] = 0x0000_ffff; // Status (r/o), command (r/w) registers[2] = (u32::from(class_code.get_register_value()) << 24) | (u32::from(subclass.get_register_value()) << 16) | u32::from(revision_id); writable_bits[3] = 0x0000_00ff; // Cacheline size (r/w) registers[3] = 0x0000_0000; // Header type 0 (device) writable_bits[15] = 0x0000_00ff; // IRQ line (r/w) registers[11] = (u32::from(subsystem_id) << 16) | u32::from(subsystem_vendor_id); PciConfiguration { registers, writable_bits, bars: [PciBar::default(); NUM_BAR_REGS], last_capability: None, msix_cap_reg_idx: None, msix_config, } } /// Create a type 0 PCI configuration from snapshot state pub fn type0_from_state( state: PciConfigurationState, msix_config: Option>>, ) -> Self { PciConfiguration { registers: state.registers.try_into().unwrap(), writable_bits: state.writable_bits.try_into().unwrap(), bars: state.bars.try_into().unwrap(), last_capability: state.last_capability, msix_cap_reg_idx: state.msix_cap_reg_idx, msix_config, } } /// Create PCI configuration space state pub fn state(&self) -> PciConfigurationState { PciConfigurationState { registers: self.registers.to_vec(), writable_bits: self.writable_bits.to_vec(), bars: self.bars.to_vec(), last_capability: self.last_capability, msix_cap_reg_idx: self.msix_cap_reg_idx, } } /// Reads a 32bit register from `reg_idx` in the register map. pub fn read_reg(&self, reg_idx: usize) -> u32 { *(self.registers.get(reg_idx).unwrap_or(&0xffff_ffff)) } /// Writes a 32bit register to `reg_idx` in the register map. pub fn write_reg(&mut self, reg_idx: usize, value: u32) { let mut mask = self.writable_bits[reg_idx]; if (BAR0_REG..BAR0_REG + NUM_BAR_REGS).contains(®_idx) { // Handle very specific case where the BAR is being written with // all 1's to retrieve the BAR size during next BAR reading. if value == 0xffff_ffff { mask &= self.bars[reg_idx - 4].size; } } else if reg_idx == ROM_BAR_REG { // Handle very specific case where the BAR is being written with // all 1's on bits 31-11 to retrieve the BAR size during next BAR // reading. if value & ROM_BAR_ADDR_MASK == ROM_BAR_ADDR_MASK { mask = 0; } } if let Some(r) = self.registers.get_mut(reg_idx) { *r = (*r & !self.writable_bits[reg_idx]) | (value & mask); } else { warn!("bad PCI register write {}", reg_idx); } } /// Writes a 16bit word to `offset`. `offset` must be 16bit aligned. pub fn write_word(&mut self, offset: usize, value: u16) { let shift = match offset % 4 { 0 => 0, 2 => 16, _ => { warn!("bad PCI config write offset {}", offset); return; } }; let reg_idx = offset / 4; if let Some(r) = self.registers.get_mut(reg_idx) { let writable_mask = self.writable_bits[reg_idx]; let mask = (0xffffu32 << shift) & writable_mask; let shifted_value = (u32::from(value) << shift) & writable_mask; *r = *r & !mask | shifted_value; } else { warn!("bad PCI config write offset {}", offset); } } /// Writes a byte to `offset`. pub fn write_byte(&mut self, offset: usize, value: u8) { self.write_byte_internal(offset, value, true); } /// Writes a byte to `offset`, optionally enforcing read-only bits. fn write_byte_internal(&mut self, offset: usize, value: u8, apply_writable_mask: bool) { let shift = (offset % 4) * 8; let reg_idx = offset / 4; if let Some(r) = self.registers.get_mut(reg_idx) { let writable_mask = if apply_writable_mask { self.writable_bits[reg_idx] } else { 0xffff_ffff }; let mask = (0xffu32 << shift) & writable_mask; let shifted_value = (u32::from(value) << shift) & writable_mask; *r = *r & !mask | shifted_value; } else { warn!("bad PCI config write offset {}", offset); } } /// Add the [addr, addr + size) BAR region. /// /// Configures the specified BAR to report this region and size to the guest kernel. /// Enforces a few constraints (i.e, region size must be power of two, register not already /// used). pub fn add_pci_bar(&mut self, bar_idx: usize, addr: u64, size: u64) { let reg_idx = BAR0_REG + bar_idx; // These are a few constraints that are imposed due to the fact // that only VirtIO devices are actually allocating a BAR. Moreover, this is // a single 64-bit BAR. Not conforming to these requirements is an internal // Firecracker bug. // We are only using BAR 0 assert_eq!(bar_idx, 0); // We shouldn't be trying to use the same BAR twice assert!(!self.bars[0].used); assert!(!self.bars[1].used); // We can't have a size of 0 assert_ne!(size, 0); // BAR size needs to be a power of two assert!(size.is_power_of_two()); // We should not be overflowing the address space addr.checked_add(size - 1).unwrap(); // Encode the BAR size as expected by the software running in // the guest. let (bar_size_hi, bar_size_lo) = encode_64_bits_bar_size(size); self.registers[reg_idx + 1] = (addr >> 32) as u32; self.writable_bits[reg_idx + 1] = 0xffff_ffff; self.bars[bar_idx + 1].addr = self.registers[reg_idx + 1]; self.bars[bar_idx].size = bar_size_lo; self.bars[bar_idx + 1].size = bar_size_hi; self.bars[bar_idx + 1].used = true; // Addresses of memory BARs are 16-byte aligned so the lower 4 bits are always 0. Within // the register we use this 4 bits to encode extra information about the BAR. The meaning // of these bits is: // // | Bit 3 | Bits 2-1 | Bit 0 | // | Prefetchable | type | Always 0 | // // Non-prefetchable, 64 bits BAR region self.registers[reg_idx] = (((addr & 0xffff_ffff) as u32) & BAR_MEM_ADDR_MASK) | 4u32; self.writable_bits[reg_idx] = BAR_MEM_ADDR_MASK; self.bars[bar_idx].addr = self.registers[reg_idx]; self.bars[bar_idx].used = true; } /// Returns the address of the given BAR region. /// /// This assumes that `bar_idx` is a valid BAR register. pub fn get_bar_addr(&self, bar_idx: usize) -> u64 { assert!(bar_idx < NUM_BAR_REGS); let reg_idx = BAR0_REG + bar_idx; (u64::from(self.bars[bar_idx].addr & self.writable_bits[reg_idx])) | (u64::from(self.bars[bar_idx + 1].addr) << 32) } /// Adds the capability `cap_data` to the list of capabilities. /// /// `cap_data` should not include the two-byte PCI capability header (type, next). /// Correct values will be generated automatically based on `cap_data.id()` and /// `cap_data.len()`. pub fn add_capability(&mut self, cap_data: &dyn PciCapability) -> usize { let total_len = cap_data.bytes().len() + 2; let (cap_offset, tail_offset) = match self.last_capability { Some((offset, len)) => (Self::next_dword(offset, len), offset + 1), None => (FIRST_CAPABILITY_OFFSET, CAPABILITY_LIST_HEAD_OFFSET), }; // We know that the capabilities we are using have a valid size (doesn't overflow) and that // we add capabilities that fit in the available space. If any of these requirements don't // hold, this is due to a Firecracker bug. let end_offset = cap_offset.checked_add(total_len).unwrap(); assert!(end_offset <= CAPABILITY_MAX_OFFSET); self.registers[STATUS_REG] |= STATUS_REG_CAPABILITIES_USED_MASK; self.write_byte_internal(tail_offset, cap_offset.try_into().unwrap(), false); self.write_byte_internal(cap_offset, cap_data.id() as u8, false); self.write_byte_internal(cap_offset + 1, 0, false); // Next pointer. for (i, byte) in cap_data.bytes().iter().enumerate() { self.write_byte_internal(cap_offset + i + 2, *byte, false); } self.last_capability = Some((cap_offset, total_len)); match cap_data.id() { PciCapabilityId::MessageSignalledInterrupts => { self.writable_bits[cap_offset / 4] = MSI_CAPABILITY_REGISTER_MASK; } PciCapabilityId::MsiX => { self.msix_cap_reg_idx = Some(cap_offset / 4); self.writable_bits[self.msix_cap_reg_idx.unwrap()] = MSIX_CAPABILITY_REGISTER_MASK; } _ => {} } cap_offset } // Find the next aligned offset after the one given. fn next_dword(offset: usize, len: usize) -> usize { let next = offset + len; (next + 3) & !3 } /// Write a PCI configuration register pub fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) { if reg_idx >= NUM_CONFIGURATION_REGISTERS { return; } if u64_to_usize(offset) + data.len() > 4 { return; } // Handle potential write to MSI-X message control register if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx && let Some(msix_config) = &self.msix_config { if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 { // 2-bytes write in the Message Control field msix_config .lock() .unwrap() .set_msg_ctl(LittleEndian::read_u16(data)); } else if msix_cap_reg_idx == reg_idx && offset == 0 && data.len() == 4 { // 4 bytes write at the beginning. Ignore the first 2 bytes which are the // capability id and next capability pointer msix_config .lock() .unwrap() .set_msg_ctl((LittleEndian::read_u32(data) >> 16) as u16); } } match data.len() { 1 => self.write_byte(reg_idx * 4 + u64_to_usize(offset), data[0]), 2 => self.write_word( reg_idx * 4 + u64_to_usize(offset), u16::from(data[0]) | (u16::from(data[1]) << 8), ), 4 => self.write_reg(reg_idx, LittleEndian::read_u32(data)), _ => (), } } /// Detect whether the guest wants to reprogram the address of a BAR pub fn detect_bar_reprogramming( &mut self, reg_idx: usize, data: &[u8], ) -> Option { if data.len() != 4 { return None; } let value = LittleEndian::read_u32(data); let mask = self.writable_bits[reg_idx]; if !(BAR0_REG..BAR0_REG + NUM_BAR_REGS).contains(®_idx) { return None; } // Ignore the case where the BAR size is being asked for. if value == 0xffff_ffff { return None; } let bar_idx = reg_idx - 4; // Do not reprogram BARs we are not using if !self.bars[bar_idx].used { return None; } // We are always using 64bit BARs, so two BAR registers. We don't do anything until // the upper BAR is modified, otherwise we would be moving the BAR to a wrong // location in memory. if bar_idx == 0 { return None; } // The lower BAR (of this 64bit BAR) has been reprogrammed to a different value // than it used to be if (self.registers[reg_idx - 1] & self.writable_bits[reg_idx - 1]) != (self.bars[bar_idx - 1].addr & self.writable_bits[reg_idx - 1]) || // Or the lower BAR hasn't been changed but the upper one is being reprogrammed // now to a different value (value & mask) != (self.bars[bar_idx].addr & mask) { info!( "Detected BAR reprogramming: (BAR {}) 0x{:x}->0x{:x}", reg_idx, self.registers[reg_idx], value ); let old_base = (u64::from(self.bars[bar_idx].addr & mask) << 32) | u64::from(self.bars[bar_idx - 1].addr & self.writable_bits[reg_idx - 1]); let new_base = (u64::from(value & mask) << 32) | u64::from(self.registers[reg_idx - 1] & self.writable_bits[reg_idx - 1]); let len = decode_64_bits_bar_size(self.bars[bar_idx].size, self.bars[bar_idx - 1].size); self.bars[bar_idx].addr = value; self.bars[bar_idx - 1].addr = self.registers[reg_idx - 1]; return Some(BarReprogrammingParams { old_base, new_base, len, }); } None } } #[cfg(test)] mod tests { use pci::PciMultimediaSubclass; use vm_memory::ByteValued; use super::*; use crate::pci::msix::MsixCap; #[repr(C, packed)] #[derive(Clone, Copy, Default)] #[allow(dead_code)] struct TestCap { len: u8, foo: u8, } // SAFETY: All members are simple numbers and any value is valid. unsafe impl ByteValued for TestCap {} impl PciCapability for TestCap { fn bytes(&self) -> &[u8] { self.as_slice() } fn id(&self) -> PciCapabilityId { PciCapabilityId::VendorSpecific } } struct BadCap { data: Vec, } impl BadCap { fn new(len: u8) -> Self { Self { data: (0..len).collect(), } } } impl PciCapability for BadCap { fn bytes(&self) -> &[u8] { &self.data } fn id(&self) -> PciCapabilityId { PciCapabilityId::VendorSpecific } } #[test] #[should_panic] fn test_too_big_capability() { let mut cfg = default_pci_config(); cfg.add_capability(&BadCap::new(127)); } #[test] #[should_panic] fn test_capability_space_overflow() { let mut cfg = default_pci_config(); cfg.add_capability(&BadCap::new(62)); cfg.add_capability(&BadCap::new(62)); cfg.add_capability(&BadCap::new(0)); } #[test] fn test_add_capability() { let mut cfg = default_pci_config(); // Reset capabilities cfg.last_capability = None; // Add two capabilities with different contents. let cap1 = TestCap { len: 4, foo: 0xAA }; let cap1_offset = cfg.add_capability(&cap1); assert_eq!(cap1_offset % 4, 0); let cap2 = TestCap { len: 0x04, foo: 0x55, }; let cap2_offset = cfg.add_capability(&cap2); assert_eq!(cap2_offset % 4, 0); // The capability list head should be pointing to cap1. let cap_ptr = cfg.read_reg(CAPABILITY_LIST_HEAD_OFFSET / 4) & 0xFF; assert_eq!(cap1_offset, cap_ptr as usize); // Verify the contents of the capabilities. let cap1_data = cfg.read_reg(cap1_offset / 4); assert_eq!(cap1_data & 0xFF, 0x09); // capability ID assert_eq!((cap1_data >> 8) & 0xFF, u32::try_from(cap2_offset).unwrap()); // next capability pointer assert_eq!((cap1_data >> 16) & 0xFF, 0x04); // cap1.len assert_eq!((cap1_data >> 24) & 0xFF, 0xAA); // cap1.foo let cap2_data = cfg.read_reg(cap2_offset / 4); assert_eq!(cap2_data & 0xFF, 0x09); // capability ID assert_eq!((cap2_data >> 8) & 0xFF, 0x00); // next capability pointer assert_eq!((cap2_data >> 16) & 0xFF, 0x04); // cap2.len assert_eq!((cap2_data >> 24) & 0xFF, 0x55); // cap2.foo } #[test] fn test_msix_capability() { let mut cfg = default_pci_config(); // Information about the MSI-X capability layout: https://wiki.osdev.org/PCI#Enabling_MSI-X let msix_cap = MsixCap::new( 3, // Using BAR3 for message control table 1024, // 1024 MSI-X vectors 0x4000, // Offset of message control table inside the BAR 4, // BAR4 used for pending control bit 0x420, // Offset of pending bit array (PBA) inside BAR ); cfg.add_capability(&msix_cap); let cap_reg = FIRST_CAPABILITY_OFFSET / 4; let reg = cfg.read_reg(cap_reg); // Capability ID is MSI-X assert_eq!( PciCapabilityId::from((reg & 0xff) as u8), PciCapabilityId::MsiX ); // We only have one capability, so `next` should be 0 assert_eq!(((reg >> 8) & 0xff) as u8, 0); let msg_ctl = (reg >> 16) as u16; // MSI-X is enabled assert_eq!(msg_ctl & 0x8000, 0x8000); // Vectors are not masked assert_eq!(msg_ctl & 0x4000, 0x0); // Reserved bits are 0 assert_eq!(msg_ctl & 0x3800, 0x0); // We've got 1024 vectors (Table size is N-1 encoded) assert_eq!((msg_ctl & 0x7ff) + 1, 1024); let reg = cfg.read_reg(cap_reg + 1); // We are using BAR3 assert_eq!(reg & 0x7, 3); // Message Control Table is located in offset 0x4000 inside the BAR // We don't need to shift. Offset needs to be 8-byte aligned - so BIR // is stored in its last 3 bits (which we need to mask out). assert_eq!(reg & 0xffff_fff8, 0x4000); let reg = cfg.read_reg(cap_reg + 2); // PBA is 0x420 bytes inside BAR4 assert_eq!(reg & 0x7, 4); assert_eq!(reg & 0xffff_fff8, 0x420); // Check read/write mask // Capability Id of MSI-X is 0x11 cfg.write_config_register(cap_reg, 0, &[0x0]); assert_eq!( PciCapabilityId::from((cfg.read_reg(cap_reg) & 0xff) as u8), PciCapabilityId::MsiX ); // Cannot override next capability pointer cfg.write_config_register(cap_reg, 1, &[0x42]); assert_eq!((cfg.read_reg(cap_reg) >> 8) & 0xff, 0); // We are writing this: // // meaning: | MSI enabled | Vectors Masked | Reserved | Table size | // bit: | 15 | 14 | 13 - 11 | 0 - 10 | // R/W: | R/W | R/W | R | R | let msg_ctl = (cfg.read_reg(cap_reg) >> 16) as u16; // Try to flip all bits cfg.write_config_register(cap_reg, 2, &u16::to_le_bytes(!msg_ctl)); let msg_ctl = (cfg.read_reg(cap_reg) >> 16) as u16; // MSI enabled and Vectors masked should be flipped (MSI disabled and vectors masked) assert_eq!(msg_ctl & 0xc000, 0x4000); // Reserved bits should still be 0 assert_eq!(msg_ctl & 0x3800, 0); // Table size should not have changed assert_eq!((msg_ctl & 0x07ff) + 1, 1024); // Table offset is read only let table_offset = cfg.read_reg(cap_reg + 1); // Try to flip all bits cfg.write_config_register(cap_reg + 1, 0, &u32::to_le_bytes(!table_offset)); // None should be flipped assert_eq!(cfg.read_reg(cap_reg + 1), table_offset); // PBA offset also let pba_offset = cfg.read_reg(cap_reg + 2); // Try to flip all bits cfg.write_config_register(cap_reg + 2, 0, &u32::to_le_bytes(!pba_offset)); // None should be flipped assert_eq!(cfg.read_reg(cap_reg + 2), pba_offset); } fn default_pci_config() -> PciConfiguration { PciConfiguration::new_type0( 0x1234, 0x5678, 0x1, PciClassCode::MultimediaController, &PciMultimediaSubclass::AudioController, 0xABCD, 0x2468, None, ) } #[test] fn class_code() { let cfg = default_pci_config(); let class_reg = cfg.read_reg(2); let class_code = (class_reg >> 24) & 0xFF; let subclass = (class_reg >> 16) & 0xFF; let prog_if = (class_reg >> 8) & 0xFF; assert_eq!(class_code, 0x04); assert_eq!(subclass, 0x01); assert_eq!(prog_if, 0x0); } #[test] #[should_panic] fn test_encode_zero_sized_bar() { encode_64_bits_bar_size(0); } #[test] #[should_panic] fn test_decode_zero_sized_bar() { decode_64_bits_bar_size(0, 0); } #[test] fn test_bar_size_encoding() { // According to OSDev wiki (https://wiki.osdev.org/PCI#Address_and_size_of_the_BAR): // // > To determine the amount of address space needed by a PCI device, you must save the // > original value of the BAR, write a value of all 1's to the register, then read it back. // > The amount of memory can then be determined by masking the information bits, performing // > a bitwise NOT ('~' in C), and incrementing the value by 1. The original value of the // BAR > should then be restored. The BAR register is naturally aligned and as such you can // only > modify the bits that are set. For example, if a device utilizes 16 MB it will // have BAR0 > filled with 0xFF000000 (0x1000000 after decoding) and you can only modify // the upper > 8-bits. // // So, we encode a 64 bits size and then store it as a 2 32bit addresses (we use // two BARs). let (hi, lo) = encode_64_bits_bar_size(0xffff_ffff_ffff_fff0); assert_eq!(hi, 0); assert_eq!(lo, 0x0000_0010); assert_eq!(decode_64_bits_bar_size(hi, lo), 0xffff_ffff_ffff_fff0); } #[test] #[should_panic] fn test_bar_size_no_power_of_two() { let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1000, 0x1001); } #[test] #[should_panic] fn test_bad_bar_index() { let mut pci_config = default_pci_config(); pci_config.add_pci_bar(NUM_BAR_REGS, 0x1000, 0x1000); } #[test] #[should_panic] fn test_bad_64bit_bar_index() { let mut pci_config = default_pci_config(); pci_config.add_pci_bar(NUM_BAR_REGS - 1, 0x1000, 0x1000); } #[test] #[should_panic] fn test_bar_size_overflows() { let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, u64::MAX, 0x2); } #[test] #[should_panic] fn test_lower_bar_free_upper_used() { let mut pci_config = default_pci_config(); pci_config.add_pci_bar(1, 0x1000, 0x1000); pci_config.add_pci_bar(0, 0x1000, 0x1000); } #[test] #[should_panic] fn test_lower_bar_used() { let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1000, 0x1000); pci_config.add_pci_bar(0, 0x1000, 0x1000); } #[test] #[should_panic] fn test_upper_bar_used() { let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1000, 0x1000); pci_config.add_pci_bar(1, 0x1000, 0x1000); } #[test] fn test_add_pci_bar() { let mut pci_config = default_pci_config(); pci_config.add_pci_bar(0, 0x1_0000_0000, 0x1000); assert_eq!(pci_config.get_bar_addr(0), 0x1_0000_0000); assert_eq!(pci_config.read_reg(BAR0_REG) & 0xffff_fff0, 0x0); assert!(pci_config.bars[0].used); assert_eq!(pci_config.read_reg(BAR0_REG + 1), 1); assert!(pci_config.bars[0].used); } #[test] fn test_access_invalid_reg() { let mut pci_config = default_pci_config(); // Can't read past the end of the configuration space assert_eq!( pci_config.read_reg(NUM_CONFIGURATION_REGISTERS), 0xffff_ffff ); // Read out all of configuration space let config_space: Vec = (0..NUM_CONFIGURATION_REGISTERS) .map(|reg_idx| pci_config.read_reg(reg_idx)) .collect(); // Various invalid write accesses // Past the end of config space pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 0, &[0x42]); pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 0, &[0x42, 0x42]); pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 0, &[0x42, 0x42, 0x42, 0x42]); // Past register boundaries pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 1, &[0x42, 0x42, 0x42, 0x42]); pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 2, &[0x42, 0x42, 0x42]); pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 3, &[0x42, 0x42]); pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 4, &[0x42]); pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 5, &[]); for (reg_idx, reg) in config_space.iter().enumerate() { assert_eq!(*reg, pci_config.read_reg(reg_idx)); } } #[test] fn test_detect_bar_reprogramming() { let mut pci_config = default_pci_config(); // Trying to reprogram with something less than 4 bytes (length of the address) should fail assert!( pci_config .detect_bar_reprogramming(BAR0_REG, &[0x13]) .is_none() ); assert!( pci_config .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) .is_none() ); assert!( pci_config .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) .is_none() ); assert!( pci_config .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12, 0x16]) .is_none() ); // Writing all 1s is a special case where we're actually asking for the size of the BAR assert!( pci_config .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0xffff_ffff)) .is_none() ); // Trying to reprogram a BAR that hasn't be initialized does nothing for reg_idx in BAR0_REG..BAR0_REG + NUM_BAR_REGS { assert!( pci_config .detect_bar_reprogramming(reg_idx, &u32::to_le_bytes(0x1312_4243)) .is_none() ); } // Reprogramming of a 64bit BAR pci_config.add_pci_bar(0, 0x13_1200_0000, 0x8000); // First we write the lower 32 bits and this shouldn't cause any reprogramming assert!( pci_config .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) .is_none() ); pci_config.write_config_register(BAR0_REG, 0, &u32::to_le_bytes(0x4200_0000)); // Writing the upper 32 bits should trigger the reprogramming assert_eq!( pci_config.detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x84)), Some(BarReprogrammingParams { old_base: 0x13_1200_0000, new_base: 0x84_4200_0000, len: 0x8000, }) ); pci_config.write_config_register(BAR0_REG + 1, 0, &u32::to_le_bytes(0x84)); // Trying to reprogram the upper bits directly (without first touching the lower bits) // should trigger a reprogramming assert_eq!( pci_config.detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)), Some(BarReprogrammingParams { old_base: 0x84_4200_0000, new_base: 0x1312_4200_0000, len: 0x8000, }) ); pci_config.write_config_register(BAR0_REG + 1, 0, &u32::to_le_bytes(0x1312)); // Attempting to reprogram the BAR with the same address should not have any effect assert!( pci_config .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x4200_0000)) .is_none() ); assert!( pci_config .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x1312)) .is_none() ); } #[test] fn test_rom_bar() { let mut pci_config = default_pci_config(); // ROM BAR address should always be 0 and writes to it shouldn't do anything assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); pci_config.write_reg(ROM_BAR_REG, 0x42); assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); // Reading the size of the BAR should always return 0 as well pci_config.write_reg(ROM_BAR_REG, 0xffff_ffff); assert_eq!(pci_config.read_reg(ROM_BAR_REG), 0); } } ================================================ FILE: src/vmm/src/pci/mod.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright 2018 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause /// PCI bus logic pub mod bus; /// PCI configuration space handling pub mod configuration; /// MSI-X logic pub mod msix; use std::fmt::Debug; use std::sync::{Arc, Barrier}; #[derive(Clone, Copy, Debug, PartialEq, Eq)] /// Parameters for performing a BAR reprogramming operation pub struct BarReprogrammingParams { /// Previous address of the BAR pub old_base: u64, /// New address of the BAR pub new_base: u64, /// Size of the BAR pub len: u64, } /// Common logic of all PCI devices pub trait PciDevice: Send { /// Sets a register in the configuration space. /// * `reg_idx` - The index of the config register to modify. /// * `offset` - Offset into the register. fn write_config_register( &mut self, reg_idx: usize, offset: u64, data: &[u8], ) -> Option>; /// Gets a register from the configuration space. /// * `reg_idx` - The index of the config register to read. fn read_config_register(&mut self, reg_idx: usize) -> u32; /// Detects if a BAR is being reprogrammed. fn detect_bar_reprogramming( &mut self, _reg_idx: usize, _data: &[u8], ) -> Option { None } /// Reads from a BAR region mapped into the device. /// * `addr` - The guest address inside the BAR. /// * `data` - Filled with the data from `addr`. fn read_bar(&mut self, _base: u64, _offset: u64, _data: &mut [u8]) {} /// Writes to a BAR region mapped into the device. /// * `addr` - The guest address inside the BAR. /// * `data` - The data to write. fn write_bar(&mut self, _base: u64, _offset: u64, _data: &[u8]) -> Option> { None } /// Relocates the BAR to a different address in guest address space. fn move_bar(&mut self, _old_base: u64, _new_base: u64) -> Result<(), DeviceRelocationError> { Ok(()) } } /// Errors for device manager. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum DeviceRelocationError { /// Device relocation not supported. NotSupported, } /// This trait defines a set of functions which can be triggered whenever a /// PCI device is modified in any way. pub trait DeviceRelocation: Send + Sync { /// The BAR needs to be moved to a different location in the guest address /// space. This follows a decision from the software running in the guest. fn move_bar( &self, old_base: u64, new_base: u64, len: u64, pci_dev: &mut dyn PciDevice, ) -> Result<(), DeviceRelocationError>; } ================================================ FILE: src/vmm/src/pci/msix.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // Copyright © 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause // use std::sync::Arc; use byteorder::{ByteOrder, LittleEndian}; use pci::PciCapabilityId; use serde::{Deserialize, Serialize}; use vm_memory::ByteValued; use crate::Vm; use crate::logger::{debug, error, warn}; use crate::pci::configuration::PciCapability; use crate::snapshot::Persist; use crate::vstate::interrupts::{InterruptError, MsixVectorConfig, MsixVectorGroup}; const MAX_MSIX_VECTORS_PER_DEVICE: u16 = 2048; const MSIX_TABLE_ENTRIES_MODULO: u64 = 16; const MSIX_PBA_ENTRIES_MODULO: u64 = 8; const BITS_PER_PBA_ENTRY: usize = 64; const FUNCTION_MASK_BIT: u8 = 14; const MSIX_ENABLE_BIT: u8 = 15; #[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)] /// MSI-X table entries pub struct MsixTableEntry { /// Lower 32 bits of the vector address pub msg_addr_lo: u32, /// Upper 32 bits of the vector address pub msg_addr_hi: u32, /// Vector data pub msg_data: u32, /// Enable/Disable and (un)masking control pub vector_ctl: u32, } impl MsixTableEntry { /// Returns `true` if the vector is masked pub fn masked(&self) -> bool { self.vector_ctl & 0x1 == 0x1 } } impl Default for MsixTableEntry { fn default() -> Self { MsixTableEntry { msg_addr_lo: 0, msg_addr_hi: 0, msg_data: 0, vector_ctl: 0x1, } } } #[derive(Debug, Clone, Serialize, Deserialize)] /// State for (de)serializing MSI-X configuration pub struct MsixConfigState { table_entries: Vec, pba_entries: Vec, masked: bool, enabled: bool, vectors: Vec, } /// MSI-X configuration pub struct MsixConfig { /// Vector table entries pub table_entries: Vec, /// Pending bit array pub pba_entries: Vec, /// Id of the device using this set of vectors pub devid: u32, /// Interrupts vectors used pub vectors: Arc, /// Whether vectors are masked pub masked: bool, /// Whether vectors are enabled pub enabled: bool, } impl std::fmt::Debug for MsixConfig { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("MsixConfig") .field("table_entries", &self.table_entries) .field("pba_entries", &self.pba_entries) .field("devid", &self.devid) .field("masked", &self.masked) .field("enabled", &self.enabled) .finish() } } impl MsixConfig { /// Create a new MSI-X configuration pub fn new(vectors: Arc, devid: u32) -> Self { assert!(vectors.num_vectors() <= MAX_MSIX_VECTORS_PER_DEVICE); let mut table_entries: Vec = Vec::new(); table_entries.resize_with(vectors.num_vectors() as usize, Default::default); let mut pba_entries: Vec = Vec::new(); let num_pba_entries: usize = (vectors.num_vectors() as usize).div_ceil(BITS_PER_PBA_ENTRY); pba_entries.resize_with(num_pba_entries, Default::default); MsixConfig { table_entries, pba_entries, devid, vectors, masked: true, enabled: false, } } /// Create an MSI-X configuration from snapshot state pub fn from_state( state: MsixConfigState, vm: Arc, devid: u32, ) -> Result { let vectors = Arc::new(MsixVectorGroup::restore(vm, &state.vectors)?); if state.enabled && !state.masked { for (idx, table_entry) in state.table_entries.iter().enumerate() { if table_entry.masked() { continue; } let config = MsixVectorConfig { high_addr: table_entry.msg_addr_hi, low_addr: table_entry.msg_addr_lo, data: table_entry.msg_data, devid, }; vectors.update(idx, config, state.masked, true)?; vectors.enable()?; } } Ok(MsixConfig { table_entries: state.table_entries, pba_entries: state.pba_entries, devid, vectors, masked: state.masked, enabled: state.enabled, }) } /// Create the state object for serializing MSI-X vectors pub fn state(&self) -> MsixConfigState { MsixConfigState { table_entries: self.table_entries.clone(), pba_entries: self.pba_entries.clone(), masked: self.masked, enabled: self.enabled, vectors: self.vectors.save(), } } /// Set the MSI-X control message (enable/disable, (un)mask) pub fn set_msg_ctl(&mut self, reg: u16) { let old_masked = self.masked; let old_enabled = self.enabled; self.masked = ((reg >> FUNCTION_MASK_BIT) & 1u16) == 1u16; self.enabled = ((reg >> MSIX_ENABLE_BIT) & 1u16) == 1u16; // Update interrupt routing if old_masked != self.masked || old_enabled != self.enabled { if self.enabled && !self.masked { debug!("MSI-X enabled for device 0x{:x}", self.devid); for (idx, table_entry) in self.table_entries.iter().enumerate() { let config = MsixVectorConfig { high_addr: table_entry.msg_addr_hi, low_addr: table_entry.msg_addr_lo, data: table_entry.msg_data, devid: self.devid, }; if let Err(e) = self.vectors.update(idx, config, table_entry.masked(), true) { error!("Failed updating vector: {:?}", e); } } } else if old_enabled || !old_masked { debug!("MSI-X disabled for device 0x{:x}", self.devid); if let Err(e) = self.vectors.disable() { error!("Failed disabling irq_fd: {:?}", e); } } } // If the Function Mask bit was set, and has just been cleared, it's // important to go through the entire PBA to check if there was any // pending MSI-X message to inject, given that the vector is not // masked. if old_masked && !self.masked { for (index, entry) in self.table_entries.clone().iter().enumerate() { if !entry.masked() && self.get_pba_bit(index.try_into().unwrap()) == 1 { self.inject_msix_and_clear_pba(index); } } } } /// Read an MSI-X table entry pub fn read_table(&self, offset: u64, data: &mut [u8]) { assert!(data.len() <= 8); let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; if index >= self.table_entries.len() { warn!("Invalid MSI-X table entry index {index}"); data.fill(0xff); return; } match data.len() { 4 => { let value = match modulo_offset { 0x0 => self.table_entries[index].msg_addr_lo, 0x4 => self.table_entries[index].msg_addr_hi, 0x8 => self.table_entries[index].msg_data, 0xc => self.table_entries[index].vector_ctl, off => { warn!("msi-x: invalid offset in table entry read: {off}"); 0xffff_ffff } }; LittleEndian::write_u32(data, value); } 8 => { let value = match modulo_offset { 0x0 => { (u64::from(self.table_entries[index].msg_addr_hi) << 32) | u64::from(self.table_entries[index].msg_addr_lo) } 0x8 => { (u64::from(self.table_entries[index].vector_ctl) << 32) | u64::from(self.table_entries[index].msg_data) } off => { warn!("msi-x: invalid offset in table entry read: {off}"); 0xffff_ffff_ffff_ffff } }; LittleEndian::write_u64(data, value); } len => { warn!("msi-x: invalid length in table entry read: {len}"); data.fill(0xff); } } } /// Write an MSI-X table entry pub fn write_table(&mut self, offset: u64, data: &[u8]) { assert!(data.len() <= 8); let index: usize = (offset / MSIX_TABLE_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; if index >= self.table_entries.len() { warn!("msi-x: invalid table entry index {index}"); return; } // Store the value of the entry before modification let old_entry = self.table_entries[index].clone(); match data.len() { 4 => { let value = LittleEndian::read_u32(data); match modulo_offset { 0x0 => self.table_entries[index].msg_addr_lo = value, 0x4 => self.table_entries[index].msg_addr_hi = value, 0x8 => self.table_entries[index].msg_data = value, 0xc => { self.table_entries[index].vector_ctl = value; } off => warn!("msi-x: invalid offset in table entry write: {off}"), }; } 8 => { let value = LittleEndian::read_u64(data); match modulo_offset { 0x0 => { self.table_entries[index].msg_addr_lo = (value & 0xffff_ffffu64) as u32; self.table_entries[index].msg_addr_hi = (value >> 32) as u32; } 0x8 => { self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; self.table_entries[index].vector_ctl = (value >> 32) as u32; } off => warn!("msi-x: invalid offset in table entry write: {off}"), }; } len => warn!("msi-x: invalid length in table entry write: {len}"), }; let table_entry = &self.table_entries[index]; // Optimisation to avoid excessive updates if &old_entry == table_entry { return; } // Update interrupt routes // Optimisation: only update routes if the entry is not masked; // this is safe because if the entry is masked (starts masked as per spec) // in the table then it won't be triggered. if self.enabled && !self.masked && !table_entry.masked() { let config = MsixVectorConfig { high_addr: table_entry.msg_addr_hi, low_addr: table_entry.msg_addr_lo, data: table_entry.msg_data, devid: self.devid, }; if let Err(e) = self .vectors .update(index, config, table_entry.masked(), true) { error!("Failed updating vector: {:?}", e); } } // After the MSI-X table entry has been updated, it is necessary to // check if the vector control masking bit has changed. In case the // bit has been flipped from 1 to 0, we need to inject a MSI message // if the corresponding pending bit from the PBA is set. Once the MSI // has been injected, the pending bit in the PBA needs to be cleared. // All of this is valid only if MSI-X has not been masked for the whole // device. // Check if bit has been flipped if !self.masked && self.enabled && old_entry.masked() && !table_entry.masked() && self.get_pba_bit(index.try_into().unwrap()) == 1 { self.inject_msix_and_clear_pba(index); } } /// Read a pending bit array entry pub fn read_pba(&self, offset: u64, data: &mut [u8]) { let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; if index >= self.pba_entries.len() { warn!("msi-x: invalid PBA entry index {index}"); data.fill(0xff); return; } match data.len() { 4 => { let value: u32 = match modulo_offset { 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 0x4 => (self.pba_entries[index] >> 32) as u32, off => { warn!("msi-x: invalid offset in pba entry read: {off}"); 0xffff_ffff } }; LittleEndian::write_u32(data, value); } 8 => { let value: u64 = match modulo_offset { 0x0 => self.pba_entries[index], off => { warn!("msi-x: invalid offset in pba entry read: {off}"); 0xffff_ffff_ffff_ffff } }; LittleEndian::write_u64(data, value); } len => { warn!("msi-x: invalid length in table entry read: {len}"); data.fill(0xff); } } } /// Write a pending bit array entry pub fn write_pba(&mut self, _offset: u64, _data: &[u8]) { error!("Pending Bit Array is read only"); } /// Set PBA bit for a vector pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); if (vector as usize) >= self.table_entries.len() { return; } let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; let mut mask: u64 = 1u64 << shift; if reset { mask = !mask; self.pba_entries[index] &= mask; } else { self.pba_entries[index] |= mask; } } /// Get the PBA bit for a vector fn get_pba_bit(&self, vector: u16) -> u8 { assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); if (vector as usize) >= self.table_entries.len() { return 0xff; } let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; ((self.pba_entries[index] >> shift) & 0x0000_0001u64) as u8 } /// Inject an MSI-X interrupt and clear the PBA bit for a vector fn inject_msix_and_clear_pba(&mut self, vector: usize) { // Inject the MSI message match self.vectors.trigger(vector) { Ok(_) => debug!("MSI-X injected on vector control flip"), Err(e) => error!("failed to inject MSI-X: {}", e), } // Clear the bit from PBA self.set_pba_bit(vector.try_into().unwrap(), true); } } #[repr(C, packed)] #[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] /// MSI-X PCI capability pub struct MsixCap { /// Message Control Register /// 10-0: MSI-X Table size /// 13-11: Reserved /// 14: Mask. Mask all MSI-X when set. /// 15: Enable. Enable all MSI-X when set. pub msg_ctl: u16, /// Table. Contains the offset and the BAR indicator (BIR) /// 2-0: Table BAR indicator (BIR). Can be 0 to 5. /// 31-3: Table offset in the BAR pointed by the BIR. pub table: u32, /// Pending Bit Array. Contains the offset and the BAR indicator (BIR) /// 2-0: PBA BAR indicator (BIR). Can be 0 to 5. /// 31-3: PBA offset in the BAR pointed by the BIR. pub pba: u32, } // SAFETY: All members are simple numbers and any value is valid. unsafe impl ByteValued for MsixCap {} impl PciCapability for MsixCap { fn bytes(&self) -> &[u8] { self.as_slice() } fn id(&self) -> PciCapabilityId { PciCapabilityId::MsiX } } impl MsixCap { /// Create a new MSI-X capability object pub fn new( table_pci_bar: u8, table_size: u16, table_off: u32, pba_pci_bar: u8, pba_off: u32, ) -> Self { assert!(table_size < MAX_MSIX_VECTORS_PER_DEVICE); // Set the table size and enable MSI-X. let msg_ctl: u16 = 0x8000u16 + table_size - 1; MsixCap { msg_ctl, table: (table_off & 0xffff_fff8u32) | u32::from(table_pci_bar & 0x7u8), pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), } } } #[cfg(test)] mod tests { use super::*; use crate::builder::tests::default_vmm; use crate::logger::{IncMetric, METRICS}; use crate::{Vm, check_metric_after_block}; fn msix_vector_group(nr_vectors: u16) -> Arc { let vmm = default_vmm(); Arc::new(Vm::create_msix_group(vmm.vm.clone(), nr_vectors).unwrap()) } #[test] #[should_panic] fn test_too_many_vectors() { MsixConfig::new(msix_vector_group(2049), 0x42); } #[test] fn test_new_msix_config() { let config = MsixConfig::new(msix_vector_group(2), 0x42); assert_eq!(config.devid, 0x42); assert!(config.masked); assert!(!config.enabled); assert_eq!(config.table_entries.len(), 2); assert_eq!(config.pba_entries.len(), 1); } #[test] fn test_enable_msix_vectors() { let mut config = MsixConfig::new(msix_vector_group(2), 0x42); assert!(!config.enabled); assert!(config.masked); // Bit 15 marks whether MSI-X is enabled // Bit 14 marks whether vectors are masked config.set_msg_ctl(0x8000); assert!(config.enabled); assert!(!config.masked); config.set_msg_ctl(0x4000); assert!(!config.enabled); assert!(config.masked); config.set_msg_ctl(0xC000); assert!(config.enabled); assert!(config.masked); config.set_msg_ctl(0x0); assert!(!config.enabled); assert!(!config.masked); } #[test] #[should_panic] fn test_table_access_read_too_big() { let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 16]; config.read_table(0, &mut buffer); } #[test] fn test_read_table_past_end() { let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 8]; // We have 2 vectors (16 bytes each), so we should be able to read up to 32 bytes. // Past that the device should respond with all 1s config.read_table(32, &mut buffer); assert_eq!(buffer, [0xff; 8]); } #[test] fn test_read_table_bad_length() { let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 8]; // We can either read 4 or 8 bytes config.read_table(0, &mut buffer[..0]); assert_eq!(buffer, [0x0; 8]); config.read_table(0, &mut buffer[..1]); assert_eq!(buffer[..1], [0xff; 1]); config.read_table(0, &mut buffer[..2]); assert_eq!(buffer[..2], [0xff; 2]); config.read_table(0, &mut buffer[..3]); assert_eq!(buffer[..3], [0xff; 3]); config.read_table(0, &mut buffer[..5]); assert_eq!(buffer[..5], [0xff; 5]); config.read_table(0, &mut buffer[..6]); assert_eq!(buffer[..6], [0xff; 6]); config.read_table(0, &mut buffer[..7]); assert_eq!(buffer[..7], [0xff; 7]); config.read_table(0, &mut buffer[..4]); assert_eq!(buffer, u64::to_le_bytes(0x00ff_ffff_0000_0000)); config.read_table(0, &mut buffer); assert_eq!(buffer, u64::to_le_bytes(0)); } #[test] fn test_access_table() { let mut config = MsixConfig::new(msix_vector_group(2), 0x42); // enabled and not masked check_metric_after_block!( METRICS.interrupts.config_updates, 2, config.set_msg_ctl(0x8000) ); let mut buffer = [0u8; 8]; // Write first vector's address with a single 8-byte write // It's still masked so shouldn't be updated check_metric_after_block!( METRICS.interrupts.config_updates, 0, config.write_table(0, &u64::to_le_bytes(0x0000_1312_0000_1110)) ); // Same for control and message data // Now, we enabled it, so we should see an update check_metric_after_block!( METRICS.interrupts.config_updates, 1, config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)) ); // Write second vector's fields with 4-byte writes // low 32 bits of the address (still masked) check_metric_after_block!( METRICS.interrupts.config_updates, 0, config.write_table(16, &u32::to_le_bytes(0x4241)) ); // high 32 bits of the address (still masked) check_metric_after_block!( METRICS.interrupts.config_updates, 0, config.write_table(20, &u32::to_le_bytes(0x4443)) ); // message data (still masked) check_metric_after_block!( METRICS.interrupts.config_updates, 0, config.write_table(24, &u32::to_le_bytes(0x21)) ); // vector control (now unmasked) check_metric_after_block!( METRICS.interrupts.config_updates, 1, config.write_table(28, &u32::to_le_bytes(0x0)) ); assert_eq!(config.table_entries[0].msg_addr_hi, 0x1312); assert_eq!(config.table_entries[0].msg_addr_lo, 0x1110); assert_eq!(config.table_entries[0].msg_data, 0x20); assert_eq!(config.table_entries[0].vector_ctl, 0); assert_eq!(config.table_entries[1].msg_addr_hi, 0x4443); assert_eq!(config.table_entries[1].msg_addr_lo, 0x4241); assert_eq!(config.table_entries[1].msg_data, 0x21); assert_eq!(config.table_entries[1].vector_ctl, 0); assert_eq!(config.table_entries.len(), 2); assert_eq!(config.pba_entries.len(), 1); // reading at a bad offset should return all 1s config.read_table(1, &mut buffer[..4]); assert_eq!(buffer[..4], [0xff; 4]); // read low address for first vector config.read_table(0, &mut buffer[..4]); assert_eq!( buffer[..4], u32::to_le_bytes(config.table_entries[0].msg_addr_lo) ); // read the high address for first vector config.read_table(4, &mut buffer[4..]); assert_eq!(0x0000_1312_0000_1110, u64::from_le_bytes(buffer)); // read msg_data from second vector config.read_table(24, &mut buffer[..4]); assert_eq!(u32::to_le_bytes(0x21), &buffer[..4]); // read vector control for second vector config.read_table(28, &mut buffer[..4]); assert_eq!(u32::to_le_bytes(0x0), &buffer[..4]); // reading with 8 bytes at bad offset should also return all 1s config.read_table(19, &mut buffer); assert_eq!(buffer, [0xff; 8]); // Read the second vector's address using an 8 byte read config.read_table(16, &mut buffer); assert_eq!(0x0000_4443_0000_4241, u64::from_le_bytes(buffer)); // Read the first vector's ctrl and data with a single 8 byte read config.read_table(8, &mut buffer); assert_eq!(0x0_0000_0020, u64::from_le_bytes(buffer)); // If we mask the interrupts we shouldn't see any update check_metric_after_block!(METRICS.interrupts.config_updates, 0, { config.write_table(12, &u32::to_le_bytes(0x1)); config.write_table(28, &u32::to_le_bytes(0x1)); }); // Un-masking them should update them check_metric_after_block!(METRICS.interrupts.config_updates, 2, { config.write_table(12, &u32::to_le_bytes(0x0)); config.write_table(28, &u32::to_le_bytes(0x0)); }); // Setting up the same config should have no effect check_metric_after_block!(METRICS.interrupts.config_updates, 0, { config.write_table(12, &u32::to_le_bytes(0x0)); config.write_table(28, &u32::to_le_bytes(0x0)); }); } #[test] #[should_panic] fn test_table_access_write_too_big() { let mut config = MsixConfig::new(msix_vector_group(2), 0x42); let buffer = [0u8; 16]; config.write_table(0, &buffer); } #[test] fn test_pba_read_too_big() { let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 16]; config.read_pba(0, &mut buffer); assert_eq!(buffer, [0xff; 16]); } #[test] fn test_pba_invalid_offset() { let config = MsixConfig::new(msix_vector_group(2), 0x42); let mut buffer = [0u8; 8]; // Past the end of the PBA array config.read_pba(128, &mut buffer); assert_eq!(buffer, [0xffu8; 8]); // Invalid offset within a valid entry let mut buffer = [0u8; 8]; config.read_pba(3, &mut buffer[..4]); assert_eq!(buffer[..4], [0xffu8; 4]); config.read_pba(3, &mut buffer); assert_eq!(buffer, [0xffu8; 8]); } #[test] #[should_panic] fn test_set_pba_bit_vector_too_big() { let mut config = MsixConfig::new(msix_vector_group(2), 0x42); config.set_pba_bit(2048, false); } #[test] #[should_panic] fn test_get_pba_bit_vector_too_big() { let config = MsixConfig::new(msix_vector_group(2), 0x42); config.get_pba_bit(2048); } #[test] fn test_pba_bit_invalid_vector() { let mut config = MsixConfig::new(msix_vector_group(2), 0x42); // We have two vectors, so setting the pending bit for the third one // should be ignored config.set_pba_bit(2, false); assert_eq!(config.pba_entries[0], 0); // Same for getting the bit assert_eq!(config.get_pba_bit(2), 0xff); } #[test] fn test_pba_read() { let mut config = MsixConfig::new(msix_vector_group(128), 0x42); let mut buffer = [0u8; 8]; config.set_pba_bit(1, false); assert_eq!(config.pba_entries[0], 2); assert_eq!(config.pba_entries[1], 0); config.read_pba(0, &mut buffer); assert_eq!(0x2, u64::from_le_bytes(buffer)); let mut buffer = [0u8; 4]; config.set_pba_bit(96, false); assert_eq!(config.pba_entries[0], 2); assert_eq!(config.pba_entries[1], 0x1_0000_0000); config.read_pba(8, &mut buffer); assert_eq!(0x0, u32::from_le_bytes(buffer)); config.read_pba(12, &mut buffer); assert_eq!(0x1, u32::from_le_bytes(buffer)); } #[test] fn test_pending_interrupt() { let mut config = MsixConfig::new(msix_vector_group(2), 0x42); config.set_pba_bit(1, false); assert_eq!(config.get_pba_bit(1), 1); // Enable MSI-X vector and unmask interrupts // Individual vectors are still masked, so no change check_metric_after_block!(METRICS.interrupts.triggers, 0, config.set_msg_ctl(0x8000)); // Enable all vectors // Vector one had a pending bit, so we must have triggered an interrupt for it // and cleared the pending bit check_metric_after_block!(METRICS.interrupts.triggers, 1, { config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)); config.write_table(24, &u64::to_le_bytes(0x0_0000_0020)); }); assert_eq!(config.get_pba_bit(1), 0); // Check that interrupt is sent as well for enabled vectors once we unmask from // Message Control // Mask vectors and set pending bit for vector 0 check_metric_after_block!(METRICS.interrupts.triggers, 0, { config.set_msg_ctl(0xc000); config.set_pba_bit(0, false); }); // Unmask them check_metric_after_block!(METRICS.interrupts.triggers, 1, config.set_msg_ctl(0x8000)); assert_eq!(config.get_pba_bit(0), 0); } } ================================================ FILE: src/vmm/src/persist.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines state structures for saving/restoring a Firecracker microVM. use std::fmt::Debug; use std::fs::{File, OpenOptions}; use std::io::{self, Write}; use std::mem::forget; use std::os::unix::io::AsRawFd; use std::os::unix::net::UnixStream; use std::path::Path; use std::sync::{Arc, Mutex}; use semver::Version; use serde::{Deserialize, Serialize}; use userfaultfd::{FeatureFlags, Uffd, UffdBuilder}; use vmm_sys_util::sock_ctrl_msg::ScmSocket; #[cfg(target_arch = "aarch64")] use crate::arch::aarch64::vcpu::get_manufacturer_id_from_host; use crate::builder::{self, BuildMicrovmFromSnapshotError}; use crate::cpu_config::templates::StaticCpuTemplate; #[cfg(target_arch = "x86_64")] use crate::cpu_config::x86_64::cpuid::CpuidTrait; #[cfg(target_arch = "x86_64")] use crate::cpu_config::x86_64::cpuid::common::get_vendor_id_from_host; use crate::device_manager::{DevicePersistError, DevicesState}; use crate::logger::{info, warn}; use crate::resources::VmResources; use crate::seccomp::BpfThreadMap; use crate::snapshot::Snapshot; use crate::utils::u64_to_usize; use crate::vmm_config::boot_source::BootSourceConfig; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::{HugePageConfig, MachineConfigError, MachineConfigUpdate}; use crate::vmm_config::snapshot::{CreateSnapshotParams, LoadSnapshotParams, MemBackendType}; use crate::vstate::kvm::KvmState; use crate::vstate::memory::{ self, GuestMemoryState, GuestRegionMmap, GuestRegionType, MemoryError, }; use crate::vstate::vcpu::{VcpuSendEventError, VcpuState}; use crate::vstate::vm::{VmError, VmState}; use crate::{EventManager, Vmm, vstate}; /// Holds information related to the VM that is not part of VmState. #[derive(Clone, Debug, Default, Deserialize, PartialEq, Eq, Serialize)] pub struct VmInfo { /// Guest memory size. pub mem_size_mib: u64, /// smt information pub smt: bool, /// CPU template type pub cpu_template: StaticCpuTemplate, /// Boot source information. pub boot_source: BootSourceConfig, /// Huge page configuration pub huge_pages: HugePageConfig, } impl From<&VmResources> for VmInfo { fn from(value: &VmResources) -> Self { Self { mem_size_mib: value.machine_config.mem_size_mib as u64, smt: value.machine_config.smt, cpu_template: StaticCpuTemplate::from(&value.machine_config.cpu_template), boot_source: value.boot_source.config.clone(), huge_pages: value.machine_config.huge_pages, } } } impl From<&Vmm> for VmInfo { fn from(value: &Vmm) -> Self { let machine_config = &value.machine_config; Self { mem_size_mib: machine_config.mem_size_mib as u64, smt: machine_config.smt, cpu_template: StaticCpuTemplate::from(&machine_config.cpu_template), boot_source: value.boot_source_config.clone(), huge_pages: machine_config.huge_pages, } } } /// Contains the necessary state for saving/restoring a microVM. #[derive(Debug, Default, Serialize, Deserialize)] pub struct MicrovmState { /// Miscellaneous VM info. pub vm_info: VmInfo, /// KVM KVM state. pub kvm_state: KvmState, /// VM KVM state. pub vm_state: VmState, /// Vcpu states. pub vcpu_states: Vec, /// Device states. pub device_states: DevicesState, } /// This describes the mapping between Firecracker base virtual address and /// offset in the buffer or file backend for a guest memory region. It is used /// to tell an external process/thread where to populate the guest memory data /// for this range. /// /// E.g. Guest memory contents for a region of `size` bytes can be found in the /// backend at `offset` bytes from the beginning, and should be copied/populated /// into `base_host_address`. #[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] pub struct GuestRegionUffdMapping { /// Base host virtual address where the guest memory contents for this /// region should be copied/populated. pub base_host_virt_addr: u64, /// Region size. pub size: usize, /// Offset in the backend file/buffer where the region contents are. pub offset: u64, /// The configured page size for this memory region. pub page_size: usize, /// The configured page size **in bytes** for this memory region. The name is /// wrong but cannot be changed due to being API, so this field is deprecated, /// to be removed in 2.0. #[deprecated] pub page_size_kib: usize, } /// Errors related to saving and restoring Microvm state. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MicrovmStateError { /// Operation not allowed: {0} NotAllowed(String), /// Cannot restore devices: {0} RestoreDevices(#[from] DevicePersistError), /// Cannot save Vcpu state: {0} SaveVcpuState(vstate::vcpu::VcpuError), /// Cannot save Vm state: {0} SaveVmState(vstate::vm::ArchVmError), /// Cannot signal Vcpu: {0} SignalVcpu(VcpuSendEventError), /// Vcpu is in unexpected state. UnexpectedVcpuResponse, } /// Errors associated with creating a snapshot. #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum CreateSnapshotError { /// Cannot get dirty bitmap: {0} DirtyBitmap(#[from] VmError), /// Cannot write memory file: {0} Memory(#[from] MemoryError), /// Cannot perform {0} on the memory backing file: {1} MemoryBackingFile(&'static str, io::Error), /// Cannot save the microVM state: {0} MicrovmState(MicrovmStateError), /// Cannot serialize the microVM state: {0} SerializeMicrovmState(#[from] crate::snapshot::SnapshotError), /// Cannot perform {0} on the snapshot backing file: {1} SnapshotBackingFile(&'static str, io::Error), } /// Snapshot version pub const SNAPSHOT_VERSION: Version = Version::new(9, 0, 0); /// Creates a Microvm snapshot. pub fn create_snapshot( vmm: &mut Vmm, vm_info: &VmInfo, params: &CreateSnapshotParams, ) -> Result<(), CreateSnapshotError> { let microvm_state = vmm .save_state(vm_info) .map_err(CreateSnapshotError::MicrovmState)?; snapshot_state_to_file(µvm_state, ¶ms.snapshot_path)?; vmm.vm .snapshot_memory_to_file(¶ms.mem_file_path, params.snapshot_type)?; // We need to mark queues as dirty again for all activated devices. The reason we // do it here is that we don't mark pages as dirty during runtime // for queue objects. vmm.device_manager .mark_virtio_queue_memory_dirty(vmm.vm.guest_memory()); Ok(()) } fn snapshot_state_to_file( microvm_state: &MicrovmState, snapshot_path: &Path, ) -> Result<(), CreateSnapshotError> { use self::CreateSnapshotError::*; let mut snapshot_file = OpenOptions::new() .create(true) .write(true) .truncate(true) .open(snapshot_path) .map_err(|err| SnapshotBackingFile("open", err))?; let snapshot = Snapshot::new(microvm_state); snapshot.save(&mut snapshot_file)?; snapshot_file .flush() .map_err(|err| SnapshotBackingFile("flush", err))?; snapshot_file .sync_all() .map_err(|err| SnapshotBackingFile("sync_all", err)) } /// Validates that snapshot CPU vendor matches the host CPU vendor. /// /// # Errors /// /// When: /// - Failed to read host vendor. /// - Failed to read snapshot vendor. #[cfg(target_arch = "x86_64")] pub fn validate_cpu_vendor(microvm_state: &MicrovmState) { let host_vendor_id = get_vendor_id_from_host(); let snapshot_vendor_id = microvm_state.vcpu_states[0].cpuid.vendor_id(); match (host_vendor_id, snapshot_vendor_id) { (Ok(host_id), Some(snapshot_id)) => { info!("Host CPU vendor ID: {host_id:?}"); info!("Snapshot CPU vendor ID: {snapshot_id:?}"); if host_id != snapshot_id { warn!("Host CPU vendor ID differs from the snapshotted one",); } } (Ok(host_id), None) => { info!("Host CPU vendor ID: {host_id:?}"); warn!("Snapshot CPU vendor ID: couldn't get from the snapshot"); } (Err(_), Some(snapshot_id)) => { warn!("Host CPU vendor ID: couldn't get from the host"); info!("Snapshot CPU vendor ID: {snapshot_id:?}"); } (Err(_), None) => { warn!("Host CPU vendor ID: couldn't get from the host"); warn!("Snapshot CPU vendor ID: couldn't get from the snapshot"); } } } /// Validate that Snapshot Manufacturer ID matches /// the one from the Host /// /// The manufacturer ID for the Snapshot is taken from each VCPU state. /// # Errors /// /// When: /// - Failed to read host vendor. /// - Failed to read snapshot vendor. #[cfg(target_arch = "aarch64")] pub fn validate_cpu_manufacturer_id(microvm_state: &MicrovmState) { let host_cpu_id = get_manufacturer_id_from_host(); let snapshot_cpu_id = microvm_state.vcpu_states[0].regs.manifacturer_id(); match (host_cpu_id, snapshot_cpu_id) { (Some(host_id), Some(snapshot_id)) => { info!("Host CPU manufacturer ID: {host_id:?}"); info!("Snapshot CPU manufacturer ID: {snapshot_id:?}"); if host_id != snapshot_id { warn!("Host CPU manufacturer ID differs from the snapshotted one",); } } (Some(host_id), None) => { info!("Host CPU manufacturer ID: {host_id:?}"); warn!("Snapshot CPU manufacturer ID: couldn't get from the snapshot"); } (None, Some(snapshot_id)) => { warn!("Host CPU manufacturer ID: couldn't get from the host"); info!("Snapshot CPU manufacturer ID: {snapshot_id:?}"); } (None, None) => { warn!("Host CPU manufacturer ID: couldn't get from the host"); warn!("Snapshot CPU manufacturer ID: couldn't get from the snapshot"); } } } /// Error type for [`snapshot_state_sanity_check`]. #[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] pub enum SnapShotStateSanityCheckError { /// No memory region defined. NoMemory, /// No DRAM memory region defined. NoDramMemory, /// DRAM memory has more than a single slot. DramMemoryTooManySlots, /// DRAM memory is unplugged. DramMemoryUnplugged, } /// Performs sanity checks against the state file and returns specific errors. pub fn snapshot_state_sanity_check( microvm_state: &MicrovmState, ) -> Result<(), SnapShotStateSanityCheckError> { // Check that the snapshot contains at least 1 mem region, that at least one is Dram, // and that Dram region contains a single plugged slot. // Upper bound check will be done when creating guest memory by comparing against // KVM max supported value kvm_context.max_memslots(). let regions = µvm_state.vm_state.memory.regions; if regions.is_empty() { return Err(SnapShotStateSanityCheckError::NoMemory); } if !regions .iter() .any(|r| r.region_type == GuestRegionType::Dram) { return Err(SnapShotStateSanityCheckError::NoDramMemory); } for dram_region in regions .iter() .filter(|r| r.region_type == GuestRegionType::Dram) { if dram_region.plugged.len() != 1 { return Err(SnapShotStateSanityCheckError::DramMemoryTooManySlots); } if !dram_region.plugged[0] { return Err(SnapShotStateSanityCheckError::DramMemoryUnplugged); } } #[cfg(target_arch = "x86_64")] validate_cpu_vendor(microvm_state); #[cfg(target_arch = "aarch64")] validate_cpu_manufacturer_id(microvm_state); Ok(()) } /// Error type for [`restore_from_snapshot`]. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum RestoreFromSnapshotError { /// Failed to get snapshot state from file: {0} File(#[from] SnapshotStateFromFileError), /// Invalid snapshot state: {0} Invalid(#[from] SnapShotStateSanityCheckError), /// Failed to load guest memory: {0} GuestMemory(#[from] RestoreFromSnapshotGuestMemoryError), /// Failed to build microVM from snapshot: {0} Build(#[from] BuildMicrovmFromSnapshotError), } /// Sub-Error type for [`restore_from_snapshot`] to contain either [`GuestMemoryFromFileError`] or /// [`GuestMemoryFromUffdError`] within [`RestoreFromSnapshotError`]. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum RestoreFromSnapshotGuestMemoryError { /// Error creating guest memory from file: {0} File(#[from] GuestMemoryFromFileError), /// Error creating guest memory from uffd: {0} Uffd(#[from] GuestMemoryFromUffdError), } /// Loads a Microvm snapshot producing a 'paused' Microvm. pub fn restore_from_snapshot( instance_info: &InstanceInfo, event_manager: &mut EventManager, seccomp_filters: &BpfThreadMap, params: &LoadSnapshotParams, vm_resources: &mut VmResources, ) -> Result>, RestoreFromSnapshotError> { let mut microvm_state = snapshot_state_from_file(¶ms.snapshot_path)?; for entry in ¶ms.network_overrides { microvm_state .device_states .mmio_state .net_devices .iter_mut() .map(|device| &mut device.device_state) .chain( microvm_state .device_states .pci_state .net_devices .iter_mut() .map(|device| &mut device.device_state), ) .find(|x| x.id == entry.iface_id) .map(|device_state| device_state.tap_if_name.clone_from(&entry.host_dev_name)) .ok_or(SnapshotStateFromFileError::UnknownNetworkDevice)?; } if let Some(vsock_override) = ¶ms.vsock_override { // There should only ever be at most one vsock device, therefore this // should correctly find it and modify the path if such a device exists. let device_state = microvm_state .device_states .mmio_state .vsock_device .as_mut() .map(|device| &mut device.device_state) .or_else(|| { microvm_state .device_states .pci_state .vsock_device .as_mut() .map(|device| &mut device.device_state) }) .ok_or(SnapshotStateFromFileError::UnknownVsockDevice)?; device_state .backend .uds_path .clone_from(&vsock_override.uds_path); } let track_dirty_pages = params.track_dirty_pages; let vcpu_count = microvm_state .vcpu_states .len() .try_into() .map_err(|_| MachineConfigError::InvalidVcpuCount) .map_err(BuildMicrovmFromSnapshotError::VmUpdateConfig)?; vm_resources .update_machine_config(&MachineConfigUpdate { vcpu_count: Some(vcpu_count), mem_size_mib: Some(u64_to_usize(microvm_state.vm_info.mem_size_mib)), smt: Some(microvm_state.vm_info.smt), cpu_template: Some(microvm_state.vm_info.cpu_template), track_dirty_pages: Some(track_dirty_pages), huge_pages: Some(microvm_state.vm_info.huge_pages), #[cfg(feature = "gdb")] gdb_socket_path: None, }) .map_err(BuildMicrovmFromSnapshotError::VmUpdateConfig)?; // Some sanity checks before building the microvm. snapshot_state_sanity_check(µvm_state)?; let mem_backend_path = ¶ms.mem_backend.backend_path; let mem_state = µvm_state.vm_state.memory; let (guest_memory, uffd) = match params.mem_backend.backend_type { MemBackendType::File => { if vm_resources.machine_config.huge_pages.is_hugetlbfs() { return Err(RestoreFromSnapshotGuestMemoryError::File( GuestMemoryFromFileError::HugetlbfsSnapshot, ) .into()); } ( guest_memory_from_file(mem_backend_path, mem_state, track_dirty_pages) .map_err(RestoreFromSnapshotGuestMemoryError::File)?, None, ) } MemBackendType::Uffd => guest_memory_from_uffd( mem_backend_path, mem_state, track_dirty_pages, vm_resources.machine_config.huge_pages, ) .map_err(RestoreFromSnapshotGuestMemoryError::Uffd)?, }; builder::build_microvm_from_snapshot( instance_info, event_manager, microvm_state, guest_memory, uffd, seccomp_filters, vm_resources, ) .map_err(RestoreFromSnapshotError::Build) } /// Error type for [`snapshot_state_from_file`] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum SnapshotStateFromFileError { /// Failed to open snapshot file: {0} Open(#[from] std::io::Error), /// Failed to load snapshot state from file: {0} Load(#[from] crate::snapshot::SnapshotError), /// Unknown Network Device. UnknownNetworkDevice, /// Unknown Vsock Device. UnknownVsockDevice, } fn snapshot_state_from_file( snapshot_path: &Path, ) -> Result { let mut snapshot_reader = File::open(snapshot_path)?; let snapshot = Snapshot::load(&mut snapshot_reader)?; Ok(snapshot.data) } /// Error type for [`guest_memory_from_file`]. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum GuestMemoryFromFileError { /// Failed to load guest memory: {0} File(#[from] std::io::Error), /// Failed to restore guest memory: {0} Restore(#[from] MemoryError), /// Cannot restore hugetlbfs backed snapshot by mapping the memory file. Please use uffd. HugetlbfsSnapshot, } fn guest_memory_from_file( mem_file_path: &Path, mem_state: &GuestMemoryState, track_dirty_pages: bool, ) -> Result, GuestMemoryFromFileError> { let mem_file = File::open(mem_file_path)?; let guest_mem = memory::snapshot_file(mem_file, mem_state.regions(), track_dirty_pages)?; Ok(guest_mem) } /// Error type for [`guest_memory_from_uffd`] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum GuestMemoryFromUffdError { /// Failed to restore guest memory: {0} Restore(#[from] MemoryError), /// Failed to UFFD object: {0} Create(userfaultfd::Error), /// Failed to register memory address range with the userfaultfd object: {0} Register(userfaultfd::Error), /// Failed to connect to UDS Unix stream: {0} Connect(#[from] std::io::Error), /// Failed to sends file descriptor: {0} Send(#[from] vmm_sys_util::errno::Error), } fn guest_memory_from_uffd( mem_uds_path: &Path, mem_state: &GuestMemoryState, track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result<(Vec, Option), GuestMemoryFromUffdError> { let (guest_memory, backend_mappings) = create_guest_memory(mem_state, track_dirty_pages, huge_pages)?; let mut uffd_builder = UffdBuilder::new(); // We only make use of this if balloon devices are present, but we can enable it unconditionally // because the only place the kernel checks this is in a hook from madvise, e.g. it doesn't // actively change the behavior of UFFD, only passively. Without balloon devices // we never call madvise anyway, so no need to put this into a conditional. uffd_builder.require_features(FeatureFlags::EVENT_REMOVE); let uffd = uffd_builder .close_on_exec(true) .non_blocking(true) .user_mode_only(false) .create() .map_err(GuestMemoryFromUffdError::Create)?; for mem_region in guest_memory.iter() { uffd.register(mem_region.as_ptr().cast(), mem_region.size() as _) .map_err(GuestMemoryFromUffdError::Register)?; } send_uffd_handshake(mem_uds_path, &backend_mappings, &uffd)?; Ok((guest_memory, Some(uffd))) } fn create_guest_memory( mem_state: &GuestMemoryState, track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result<(Vec, Vec), GuestMemoryFromUffdError> { let guest_memory = memory::anonymous(mem_state.regions(), track_dirty_pages, huge_pages)?; let mut backend_mappings = Vec::with_capacity(guest_memory.len()); let mut offset = 0; for mem_region in guest_memory.iter() { #[allow(deprecated)] backend_mappings.push(GuestRegionUffdMapping { base_host_virt_addr: mem_region.as_ptr() as u64, size: mem_region.size(), offset, page_size: huge_pages.page_size(), page_size_kib: huge_pages.page_size(), }); offset += mem_region.size() as u64; } Ok((guest_memory, backend_mappings)) } fn send_uffd_handshake( mem_uds_path: &Path, backend_mappings: &[GuestRegionUffdMapping], uffd: &impl AsRawFd, ) -> Result<(), GuestMemoryFromUffdError> { // This is safe to unwrap() because we control the contents of the vector // (i.e GuestRegionUffdMapping entries). let backend_mappings = serde_json::to_string(backend_mappings).unwrap(); let socket = UnixStream::connect(mem_uds_path)?; socket.send_with_fd( backend_mappings.as_bytes(), // In the happy case we can close the fd since the other process has it open and is // using it to serve us pages. // // The problem is that if other process crashes/exits, firecracker guest memory // will simply revert to anon-mem behavior which would lead to silent errors and // undefined behavior. // // To tackle this scenario, the page fault handler can notify Firecracker of any // crashes/exits. There is no need for Firecracker to explicitly send its process ID. // The external process can obtain Firecracker's PID by calling `getsockopt` with // `libc::SO_PEERCRED` option like so: // // let mut val = libc::ucred { pid: 0, gid: 0, uid: 0 }; // let mut ucred_size: u32 = mem::size_of::() as u32; // libc::getsockopt( // socket.as_raw_fd(), // libc::SOL_SOCKET, // libc::SO_PEERCRED, // &mut val as *mut _ as *mut _, // &mut ucred_size as *mut libc::socklen_t, // ); // // Per this linux man page: https://man7.org/linux/man-pages/man7/unix.7.html, // `SO_PEERCRED` returns the credentials (PID, UID and GID) of the peer process // connected to this socket. The returned credentials are those that were in effect // at the time of the `connect` call. // // Moreover, Firecracker holds a copy of the UFFD fd as well, so that even if the // page fault handler process does not tear down Firecracker when necessary, the // uffd will still be alive but with no one to serve faults, leading to guest freeze. uffd.as_raw_fd(), )?; // We prevent Rust from closing the socket file descriptor to avoid a potential race condition // between the mappings message and the connection shutdown. If the latter arrives at the UFFD // handler first, the handler never sees the mappings. forget(socket); Ok(()) } #[cfg(test)] mod tests { use std::os::unix::net::UnixListener; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::Vmm; #[cfg(target_arch = "x86_64")] use crate::builder::tests::insert_vmclock_device; #[cfg(target_arch = "x86_64")] use crate::builder::tests::insert_vmgenid_device; use crate::builder::tests::{ CustomBlockConfig, default_kernel_cmdline, default_vmm, insert_balloon_device, insert_block_devices, insert_net_device, insert_vsock_device, }; #[cfg(target_arch = "aarch64")] use crate::construct_kvm_mpidrs; use crate::devices::virtio::block::CacheType; use crate::snapshot::Persist; use crate::vmm_config::balloon::BalloonDeviceConfig; use crate::vmm_config::net::NetworkInterfaceConfig; use crate::vmm_config::vsock::tests::default_config; use crate::vstate::memory::{GuestMemoryRegionState, GuestRegionType}; fn default_vmm_with_devices() -> Vmm { let mut event_manager = EventManager::new().expect("Cannot create EventManager"); let mut vmm = default_vmm(); let mut cmdline = default_kernel_cmdline(); // Add a balloon device. let balloon_config = BalloonDeviceConfig { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, free_page_hinting: false, free_page_reporting: false, }; insert_balloon_device(&mut vmm, &mut cmdline, &mut event_manager, balloon_config); // Add a block device. let drive_id = String::from("root"); let block_configs = vec![CustomBlockConfig::new( drive_id, true, None, true, CacheType::Unsafe, )]; insert_block_devices(&mut vmm, &mut cmdline, &mut event_manager, block_configs); // Add net device. let network_interface = NetworkInterfaceConfig { iface_id: String::from("netif"), host_dev_name: String::from("hostname"), guest_mac: None, rx_rate_limiter: None, tx_rate_limiter: None, }; insert_net_device( &mut vmm, &mut cmdline, &mut event_manager, network_interface, ); // Add vsock device. let mut tmp_sock_file = TempFile::new().unwrap(); tmp_sock_file.remove().unwrap(); let vsock_config = default_config(&tmp_sock_file); insert_vsock_device(&mut vmm, &mut cmdline, &mut event_manager, vsock_config); #[cfg(target_arch = "x86_64")] insert_vmgenid_device(&mut vmm); #[cfg(target_arch = "x86_64")] insert_vmclock_device(&mut vmm); vmm } #[test] fn test_microvm_state_snapshot() { let vmm = default_vmm_with_devices(); let states = vmm.device_manager.save(); // Only checking that all devices are saved, actual device state // is tested by that device's tests. assert_eq!(states.mmio_state.block_devices.len(), 1); assert_eq!(states.mmio_state.net_devices.len(), 1); assert!(states.mmio_state.vsock_device.is_some()); assert!(states.mmio_state.balloon_device.is_some()); let vcpu_states = vec![VcpuState::default()]; #[cfg(target_arch = "aarch64")] let mpidrs = construct_kvm_mpidrs(&vcpu_states); let microvm_state = MicrovmState { device_states: states, vcpu_states, kvm_state: Default::default(), vm_info: VmInfo { mem_size_mib: 1u64, ..Default::default() }, #[cfg(target_arch = "aarch64")] vm_state: vmm.vm.save_state(&mpidrs).unwrap(), #[cfg(target_arch = "x86_64")] vm_state: vmm.vm.save_state().unwrap(), }; let serialized_data = bitcode::serialize(µvm_state).unwrap(); let restored_microvm_state: MicrovmState = bitcode::deserialize(&serialized_data).unwrap(); assert_eq!(restored_microvm_state.vm_info, microvm_state.vm_info); assert_eq!( restored_microvm_state.device_states.mmio_state, microvm_state.device_states.mmio_state ) } #[test] fn test_create_guest_memory() { let mem_state = GuestMemoryState { regions: vec![GuestMemoryRegionState { base_address: 0, size: 0x20000, region_type: GuestRegionType::Dram, plugged: vec![true], }], }; let (_, uffd_regions) = create_guest_memory(&mem_state, false, HugePageConfig::None).unwrap(); assert_eq!(uffd_regions.len(), 1); assert_eq!(uffd_regions[0].size, 0x20000); assert_eq!(uffd_regions[0].offset, 0); assert_eq!(uffd_regions[0].page_size, HugePageConfig::None.page_size()); } #[test] fn test_send_uffd_handshake() { #[allow(deprecated)] let uffd_regions = vec![ GuestRegionUffdMapping { base_host_virt_addr: 0, size: 0x100000, offset: 0, page_size: HugePageConfig::None.page_size(), page_size_kib: HugePageConfig::None.page_size(), }, GuestRegionUffdMapping { base_host_virt_addr: 0x100000, size: 0x200000, offset: 0, page_size: HugePageConfig::Hugetlbfs2M.page_size(), page_size_kib: HugePageConfig::Hugetlbfs2M.page_size(), }, ]; let uds_path = TempFile::new().unwrap(); let uds_path = uds_path.as_path(); std::fs::remove_file(uds_path).unwrap(); let listener = UnixListener::bind(uds_path).expect("Cannot bind to socket path"); send_uffd_handshake(uds_path, &uffd_regions, &std::io::stdin()).unwrap(); let (stream, _) = listener.accept().expect("Cannot listen on UDS socket"); let mut message_buf = vec![0u8; 1024]; let (bytes_read, _) = stream .recv_with_fd(&mut message_buf[..]) .expect("Cannot recv_with_fd"); message_buf.resize(bytes_read, 0); let deserialized: Vec = serde_json::from_slice(&message_buf).unwrap(); assert_eq!(uffd_regions, deserialized); } } ================================================ FILE: src/vmm/src/rate_limiter/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::os::unix::io::{AsRawFd, RawFd}; use std::time::{Duration, Instant}; use std::{fmt, io}; use utils::time::TimerFd; pub mod persist; #[derive(Debug, thiserror::Error, displaydoc::Display)] /// Describes the errors that may occur while handling rate limiter events. pub enum RateLimiterError { /// Rate limiter event handler called without a present timer SpuriousRateLimiterEvent, } // Interval at which the refill timer will run when limiter is at capacity. const REFILL_TIMER_DURATION: Duration = Duration::from_millis(100); const NANOSEC_IN_ONE_MILLISEC: u64 = 1_000_000; // Euclid's two-thousand-year-old algorithm for finding the greatest common divisor. #[cfg_attr(kani, kani::requires(x > 0 && y > 0))] #[cfg_attr(kani, kani::ensures( |&result| result != 0 && x % result == 0 && y % result == 0 ))] fn gcd(x: u64, y: u64) -> u64 { let mut x = x; let mut y = y; while y != 0 { let t = y; y = x % y; x = t; } x } /// Enum describing the outcomes of a `reduce()` call on a `TokenBucket`. #[derive(Clone, Debug, PartialEq)] pub enum BucketReduction { /// There are not enough tokens to complete the operation. Failure, /// A part of the available tokens have been consumed. Success, /// A number of tokens `inner` times larger than the bucket size have been consumed. OverConsumption(f64), } /// TokenBucket provides a lower level interface to rate limiting with a /// configurable capacity, refill-rate and initial burst. #[derive(Clone, Debug, PartialEq, Eq)] pub struct TokenBucket { // Bucket defining traits. size: u64, // Initial burst size. initial_one_time_burst: u64, // Complete refill time in milliseconds. refill_time: u64, // Internal state descriptors. // Number of free initial tokens, that can be consumed at no cost. one_time_burst: u64, // Current token budget. budget: u64, // Last time this token bucket saw activity. last_update: Instant, // Fields used for pre-processing optimizations. processed_capacity: u64, processed_refill_time: u64, } impl TokenBucket { /// Creates a `TokenBucket` wrapped in an `Option`. /// /// TokenBucket created is of `size` total capacity and takes `complete_refill_time_ms` /// milliseconds to go from zero tokens to total capacity. The `one_time_burst` is initial /// extra credit on top of total capacity, that does not replenish and which can be used /// for an initial burst of data. /// /// If the `size` or the `complete refill time` are zero, then `None` is returned. pub fn new(size: u64, one_time_burst: u64, complete_refill_time_ms: u64) -> Option { // If either token bucket capacity or refill time is 0, disable limiting. if size == 0 || complete_refill_time_ms == 0 { return None; } // Formula for computing current refill amount: // refill_token_count = (delta_time * size) / (complete_refill_time_ms * 1_000_000) // In order to avoid overflows, simplify the fractions by computing greatest common divisor. let complete_refill_time_ns = complete_refill_time_ms.checked_mul(NANOSEC_IN_ONE_MILLISEC)?; // Get the greatest common factor between `size` and `complete_refill_time_ns`. let common_factor = gcd(size, complete_refill_time_ns); // The division will be exact since `common_factor` is a factor of `size`. let processed_capacity: u64 = size / common_factor; // The division will be exact since `common_factor` is a factor of // `complete_refill_time_ns`. let processed_refill_time: u64 = complete_refill_time_ns / common_factor; Some(TokenBucket { size, one_time_burst, initial_one_time_burst: one_time_burst, refill_time: complete_refill_time_ms, // Start off full. budget: size, // Last updated is now. last_update: Instant::now(), processed_capacity, processed_refill_time, }) } // Replenishes token bucket based on elapsed time. Should only be called internally by `Self`. #[allow(clippy::cast_possible_truncation)] fn auto_replenish(&mut self) { // Compute time passed since last refill/update. let now = Instant::now(); let time_delta = (now - self.last_update).as_nanos(); if time_delta >= u128::from(self.refill_time * NANOSEC_IN_ONE_MILLISEC) { self.budget = self.size; self.last_update = now; } else { // At each 'time_delta' nanoseconds the bucket should refill with: // refill_amount = (time_delta * size) / (complete_refill_time_ms * 1_000_000) // `processed_capacity` and `processed_refill_time` are the result of simplifying above // fraction formula with their greatest-common-factor. // In the constructor, we assured that (self.refill_time * NANOSEC_IN_ONE_MILLISEC) // fits into a u64 That means, at this point we know that time_delta < // u64::MAX. Since all other values here are u64, this assures that u128 // multiplication cannot overflow. let processed_capacity = u128::from(self.processed_capacity); let processed_refill_time = u128::from(self.processed_refill_time); let tokens = (time_delta * processed_capacity) / processed_refill_time; // We increment `self.last_update` by the minimum time required to generate `tokens`, in // the case where we have the time to generate `1.8` tokens but only // generate `x` tokens due to integer arithmetic this will carry the time // required to generate 0.8th of a token over to the next call, such that if // the next call where to generate `2.3` tokens it would instead // generate `3.1` tokens. This minimizes dropping tokens at high frequencies. // We want the integer division here to round up instead of down (as if we round down, // we would allow some fraction of a nano second to be used twice, allowing // for the generation of one extra token in extreme circumstances). let mut time_adjustment = tokens * processed_refill_time / processed_capacity; if !(tokens * processed_refill_time).is_multiple_of(processed_capacity) { time_adjustment += 1; } // Ensure that we always generate as many tokens as we can: assert that the "unused" // part of time_delta is less than the time it would take to generate a // single token (= processed_refill_time / processed_capacity) debug_assert!(time_adjustment <= time_delta); debug_assert!( (time_delta - time_adjustment) * processed_capacity <= processed_refill_time ); // time_adjustment is at most time_delta, and since time_delta <= u64::MAX, this cast is // fine self.last_update += Duration::from_nanos(time_adjustment as u64); self.budget = std::cmp::min(self.budget.saturating_add(tokens as u64), self.size); } } /// Attempts to consume `tokens` from the bucket and returns whether the action succeeded. pub fn reduce(&mut self, mut tokens: u64) -> BucketReduction { // First things first: consume the one-time-burst budget. if self.one_time_burst > 0 { // We still have burst budget for *all* tokens requests. if self.one_time_burst >= tokens { self.one_time_burst -= tokens; self.last_update = Instant::now(); // No need to continue to the refill process, we still have burst budget to consume // from. return BucketReduction::Success; } else { // We still have burst budget for *some* of the tokens requests. // The tokens left unfulfilled will be consumed from current `self.budget`. tokens -= self.one_time_burst; self.one_time_burst = 0; } } if tokens > self.budget { // Hit the bucket bottom, let's auto-replenish and try again. self.auto_replenish(); // This operation requests a bandwidth higher than the bucket size if tokens > self.size { crate::logger::error!( "Consumed {} tokens from bucket of size {}", tokens, self.size ); // Empty the bucket and report an overconsumption of // (remaining tokens / size) times larger than the bucket size tokens -= self.budget; self.budget = 0; return BucketReduction::OverConsumption(tokens as f64 / self.size as f64); } if tokens > self.budget { // Still not enough tokens, consume() fails, return false. return BucketReduction::Failure; } } self.budget -= tokens; BucketReduction::Success } /// "Manually" adds tokens to bucket. pub fn force_replenish(&mut self, tokens: u64) { // This means we are still during the burst interval. // Of course there is a very small chance that the last reduce() also used up burst // budget which should now be replenished, but for performance and code-complexity // reasons we're just gonna let that slide since it's practically inconsequential. if self.one_time_burst > 0 { self.one_time_burst = std::cmp::min( self.one_time_burst.saturating_add(tokens), self.initial_one_time_burst, ); return; } self.budget = std::cmp::min(self.budget.saturating_add(tokens), self.size); } /// Returns the capacity of the token bucket. pub fn capacity(&self) -> u64 { self.size } /// Returns the remaining one time burst budget. pub fn one_time_burst(&self) -> u64 { self.one_time_burst } /// Returns the time in milliseconds required to to completely fill the bucket. pub fn refill_time_ms(&self) -> u64 { self.refill_time } /// Returns the current budget (one time burst allowance notwithstanding). pub fn budget(&self) -> u64 { self.budget } /// Returns the initially configured one time burst budget. pub fn initial_one_time_burst(&self) -> u64 { self.initial_one_time_burst } } /// Enum that describes the type of token used. #[derive(Debug)] pub enum TokenType { /// Token type used for bandwidth limiting. Bytes, /// Token type used for operations/second limiting. Ops, } /// Enum that describes the type of token bucket update. #[derive(Debug)] pub enum BucketUpdate { /// No Update - same as before. None, /// Rate Limiting is disabled on this bucket. Disabled, /// Rate Limiting enabled with updated bucket. Update(TokenBucket), } /// Rate Limiter that works on both bandwidth and ops/s limiting. /// /// Bandwidth (bytes/s) and ops/s limiting can be used at the same time or individually. /// /// Implementation uses a single timer through TimerFd to refresh either or /// both token buckets. /// /// Its internal buckets are 'passively' replenished as they're being used (as /// part of `consume()` operations). /// A timer is enabled and used to 'actively' replenish the token buckets when /// limiting is in effect and `consume()` operations are disabled. /// /// RateLimiters will generate events on the FDs provided by their `AsRawFd` trait /// implementation. These events are meant to be consumed by the user of this struct. /// On each such event, the user must call the `event_handler()` method. pub struct RateLimiter { bandwidth: Option, ops: Option, timer_fd: TimerFd, // Internal flag that quickly determines timer state. timer_active: bool, } impl PartialEq for RateLimiter { fn eq(&self, other: &RateLimiter) -> bool { self.bandwidth == other.bandwidth && self.ops == other.ops } } impl fmt::Debug for RateLimiter { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!( f, "RateLimiter {{ bandwidth: {:?}, ops: {:?} }}", self.bandwidth, self.ops ) } } impl RateLimiter { /// Creates a new Rate Limiter that can limit on both bytes/s and ops/s. /// /// # Arguments /// /// * `bytes_total_capacity` - the total capacity of the `TokenType::Bytes` token bucket. /// * `bytes_one_time_burst` - initial extra credit on top of `bytes_total_capacity`, that does /// not replenish and which can be used for an initial burst of data. /// * `bytes_complete_refill_time_ms` - number of milliseconds for the `TokenType::Bytes` token /// bucket to go from zero Bytes to `bytes_total_capacity` Bytes. /// * `ops_total_capacity` - the total capacity of the `TokenType::Ops` token bucket. /// * `ops_one_time_burst` - initial extra credit on top of `ops_total_capacity`, that does not /// replenish and which can be used for an initial burst of data. /// * `ops_complete_refill_time_ms` - number of milliseconds for the `TokenType::Ops` token /// bucket to go from zero Ops to `ops_total_capacity` Ops. /// /// If either bytes/ops *size* or *refill_time* are **zero**, the limiter /// is **disabled** for that respective token type. /// /// # Errors /// /// If the timerfd creation fails, an error is returned. pub fn new( bytes_total_capacity: u64, bytes_one_time_burst: u64, bytes_complete_refill_time_ms: u64, ops_total_capacity: u64, ops_one_time_burst: u64, ops_complete_refill_time_ms: u64, ) -> io::Result { let bytes_token_bucket = TokenBucket::new( bytes_total_capacity, bytes_one_time_burst, bytes_complete_refill_time_ms, ); let ops_token_bucket = TokenBucket::new( ops_total_capacity, ops_one_time_burst, ops_complete_refill_time_ms, ); // We'll need a timer_fd, even if our current config effectively disables rate limiting, // because `Self::update_buckets()` might re-enable it later, and we might be // seccomp-blocked from creating the timer_fd at that time. let timer_fd = TimerFd::new(); Ok(RateLimiter { bandwidth: bytes_token_bucket, ops: ops_token_bucket, timer_fd, timer_active: false, }) } // Arm the timer of the rate limiter with the provided `TimerState`. fn activate_timer(&mut self, one_shot_duration: Duration) { // Register the timer; don't care about its previous state self.timer_fd.arm(one_shot_duration, None); self.timer_active = true; } /// Attempts to consume tokens and returns whether that is possible. /// /// If rate limiting is disabled on provided `token_type`, this function will always succeed. pub fn consume(&mut self, tokens: u64, token_type: TokenType) -> bool { // If the timer is active, we can't consume tokens from any bucket and the function fails. if self.timer_active { return false; } // Identify the required token bucket. let token_bucket = match token_type { TokenType::Bytes => self.bandwidth.as_mut(), TokenType::Ops => self.ops.as_mut(), }; // Try to consume from the token bucket. if let Some(bucket) = token_bucket { let refill_time = bucket.refill_time_ms(); match bucket.reduce(tokens) { // When we report budget is over, there will be no further calls here, // register a timer to replenish the bucket and resume processing; // make sure there is only one running timer for this limiter. BucketReduction::Failure => { if !self.timer_active { self.activate_timer(REFILL_TIMER_DURATION); } false } // The operation succeeded and further calls can be made. BucketReduction::Success => true, // The operation succeeded as the tokens have been consumed // but the timer still needs to be armed. BucketReduction::OverConsumption(ratio) => { // The operation "borrowed" a number of tokens `ratio` times // greater than the size of the bucket, and since it takes // `refill_time` milliseconds to fill an empty bucket, in // order to enforce the bandwidth limit we need to prevent // further calls to the rate limiter for // `ratio * refill_time` milliseconds. // The conversion should be safe because the ratio is positive. #[allow(clippy::cast_sign_loss, clippy::cast_possible_truncation)] self.activate_timer(Duration::from_millis((ratio * refill_time as f64) as u64)); true } } } else { // If bucket is not present rate limiting is disabled on token type, // consume() will always succeed. true } } /// Adds tokens of `token_type` to their respective bucket. /// /// Can be used to *manually* add tokens to a bucket. Useful for reverting a /// `consume()` if needed. pub fn manual_replenish(&mut self, tokens: u64, token_type: TokenType) { // Identify the required token bucket. let token_bucket = match token_type { TokenType::Bytes => self.bandwidth.as_mut(), TokenType::Ops => self.ops.as_mut(), }; // Add tokens to the token bucket. if let Some(bucket) = token_bucket { bucket.force_replenish(tokens); } } /// Returns whether this rate limiter is blocked. /// /// The limiter 'blocks' when a `consume()` operation fails because there was not enough /// budget for it. /// An event will be generated on the exported FD when the limiter 'unblocks'. pub fn is_blocked(&self) -> bool { self.timer_active } /// This function needs to be called every time there is an event on the /// FD provided by this object's `AsRawFd` trait implementation. /// /// # Errors /// /// If the rate limiter is disabled or is not blocked, an error is returned. pub fn event_handler(&mut self) -> Result<(), RateLimiterError> { match self.timer_fd.read() { 0 => Err(RateLimiterError::SpuriousRateLimiterEvent), _ => { self.timer_active = false; Ok(()) } } } /// Updates the parameters of the token buckets associated with this RateLimiter. // TODO: Please note that, right now, the buckets become full after being updated. pub fn update_buckets(&mut self, bytes: BucketUpdate, ops: BucketUpdate) { match bytes { BucketUpdate::Disabled => self.bandwidth = None, BucketUpdate::Update(tb) => self.bandwidth = Some(tb), BucketUpdate::None => (), }; match ops { BucketUpdate::Disabled => self.ops = None, BucketUpdate::Update(tb) => self.ops = Some(tb), BucketUpdate::None => (), }; } /// Returns an immutable view of the inner bandwidth token bucket. pub fn bandwidth(&self) -> Option<&TokenBucket> { self.bandwidth.as_ref() } /// Returns an immutable view of the inner ops token bucket. pub fn ops(&self) -> Option<&TokenBucket> { self.ops.as_ref() } } impl AsRawFd for RateLimiter { /// Provides a FD which needs to be monitored for POLLIN events. /// /// This object's `event_handler()` method must be called on such events. /// /// Will return a negative value if rate limiting is disabled on both /// token types. fn as_raw_fd(&self) -> RawFd { self.timer_fd.as_raw_fd() } } impl Default for RateLimiter { /// Default RateLimiter is a no-op limiter with infinite budget. fn default() -> Self { // Safe to unwrap since this will not attempt to create timer_fd. RateLimiter::new(0, 0, 0, 0, 0, 0).expect("Failed to build default RateLimiter") } } #[cfg(kani)] #[allow(dead_code)] // Avoid warning when using stubs. mod verification { use std::time::Instant; use super::*; mod stubs { use std::time::Instant; use crate::rate_limiter::TokenBucket; // On Unix, the Rust Standard Library defines Instants as // // struct Instance(struct inner::Instant { // t: struct Timespec { // tv_sec: i64, // tv_nsec: struct Nanoseconds(u32), // } // } // // This is not really repr-compatible with the below, as the structs (apart from // `Nanoseconds`) are repr(Rust), but currently this seems to work. #[repr(C)] struct InstantStub { tv_sec: i64, tv_nsec: u32, } // The last value returned by this stub, in nano seconds. We keep these variables separately // for Kani performance reasons (just counting nanos and then doing division/modulo // to get seconds/nanos is slow as those operations are very difficult for Kani's // underlying SAT solvers). static mut LAST_SECONDS: i64 = 0; static mut LAST_NANOS: u32 = 0; /// Stubs out `std::time::Instant::now` to return non-deterministic instances that are /// non-decreasing. The first value produced by this stub will always be 0. This is /// because generally harnesses only care about the delta between instants i1 and i2, which /// is arbitrary as long as at least one of i1, i2 is non-deterministic. Therefore, /// hardcoding one of the instances to be 0 brings a performance improvement. Should /// a harness loose generality due to the first Instant::now() call returning 0, add a /// dummy call to Instant::now() to the top of the harness to consume the 0 value. All /// subsequent calls will then result in non-deterministic values. fn instant_now() -> Instant { // Instants are non-decreasing. // See https://doc.rust-lang.org/std/time/struct.Instant.html. // upper bound on seconds to prevent scenarios involving clock overflow. let next_seconds = kani::any_where(|n| *n >= unsafe { LAST_SECONDS }); let next_nanos = kani::any_where(|n| *n < 1_000_000_000); // rustc intrinsic bound if next_seconds == unsafe { LAST_SECONDS } { kani::assume(next_nanos >= unsafe { LAST_NANOS }); } let to_return = next_instant_now(); unsafe { LAST_SECONDS = next_seconds; LAST_NANOS = next_nanos; } to_return } pub(super) fn next_instant_now() -> Instant { let stub = InstantStub { tv_sec: unsafe { LAST_SECONDS }, tv_nsec: unsafe { LAST_NANOS }, }; // In normal rust code, this would not be safe, as the compiler can re-order the fields // However, kani will never run any transformations on the code, so this is safe. This // is because kani doesn't use rustc/llvm to compile down to bytecode, but instead // transpiles unoptimized rust MIR to goto-programs, which are then fed to CMBC. unsafe { std::mem::transmute(stub) } } /// Stubs out `TokenBucket::auto_replenish` by simply filling up the bucket by a /// non-deterministic amount. fn token_bucket_auto_replenish(this: &mut TokenBucket) { this.budget += kani::any_where::(|&n| n <= this.size - this.budget); } } impl TokenBucket { /// Functions checking that the general invariants of a TokenBucket are upheld fn is_valid(&self) -> bool { self.size != 0 && self.refill_time != 0 // The token budget can never exceed the bucket's size && self.budget <= self.size // The burst budget never exceeds its initial value && self.one_time_burst <= self.initial_one_time_burst // While burst budget is available, no tokens from the normal budget are consumed. && (self.one_time_burst == 0 || self.budget == self.size) } } impl kani::Arbitrary for TokenBucket { fn any() -> TokenBucket { let bucket = TokenBucket::new(kani::any(), kani::any(), kani::any()); kani::assume(bucket.is_some()); let mut bucket = bucket.unwrap(); // Adjust the budgets non-deterministically to simulate that the bucket has been "in // use" already bucket.budget = kani::any(); bucket.one_time_burst = kani::any(); kani::assume(bucket.is_valid()); bucket } } #[kani::proof] #[kani::stub(std::time::Instant::now, stubs::instant_now)] fn verify_instant_stub_non_decreasing() { let early = Instant::now(); let late = Instant::now(); assert!(early <= late); } // Euclid algorithm has runtime O(log(min(x,y))) -> kani::unwind(log(MAX)) should be enough. #[kani::proof_for_contract(gcd)] #[kani::unwind(64)] #[kani::solver(cadical)] fn gcd_contract_harness() { const MAX: u64 = 64; let x = kani::any_where(|&x| x < MAX); let y = kani::any_where(|&y| y < MAX); let gcd = super::gcd(x, y); // Most assertions are unnecessary as they are proved as part of the // contract. However for simplification the contract only enforces that // the result is *a* divisor, not necessarily the smallest one, so we // check that here manually. if gcd != 0 { let w = kani::any_where(|&w| w > 0 && x % w == 0 && y % w == 0); assert!(gcd >= w); } } #[kani::proof] #[kani::stub(std::time::Instant::now, stubs::instant_now)] #[kani::stub_verified(gcd)] #[kani::solver(cadical)] fn verify_token_bucket_new() { let size = kani::any(); let one_time_burst = kani::any(); let complete_refill_time_ms = kani::any(); // Checks if the `TokenBucket` is created with invalid inputs, the result is always `None`. match TokenBucket::new(size, one_time_burst, complete_refill_time_ms) { None => assert!( size == 0 || complete_refill_time_ms == 0 || complete_refill_time_ms > u64::MAX / NANOSEC_IN_ONE_MILLISEC ), Some(bucket) => assert!(bucket.is_valid()), } } #[kani::proof] #[kani::unwind(1)] // enough to unwind the recursion at `Timespec::sub_timespec` #[kani::stub(std::time::Instant::now, stubs::instant_now)] #[kani::stub_verified(gcd)] fn verify_token_bucket_auto_replenish() { const MAX_BUCKET_SIZE: u64 = 15; const MAX_REFILL_TIME: u64 = 15; // Create a non-deterministic `TokenBucket`. This internally calls `Instant::now()`, which // is stubbed to always return 0 on its first call. We can make this simplification // here, as `auto_replenish` only cares about the time delta between two consecutive // calls. This speeds up the verification significantly. let size = kani::any_where(|n| *n < MAX_BUCKET_SIZE && *n != 0); let complete_refill_time_ms = kani::any_where(|n| *n < MAX_REFILL_TIME && *n != 0); // `auto_replenish` doesn't use `one_time_burst` let mut bucket: TokenBucket = TokenBucket::new(size, 0, complete_refill_time_ms).unwrap(); bucket.auto_replenish(); assert!(bucket.is_valid()); } #[kani::proof] #[kani::stub(std::time::Instant::now, stubs::instant_now)] #[kani::stub(TokenBucket::auto_replenish, stubs::token_bucket_auto_replenish)] #[kani::stub_verified(gcd)] #[kani::solver(cadical)] fn verify_token_bucket_reduce() { let mut token_bucket: TokenBucket = kani::any(); let old_token_bucket = token_bucket.clone(); let tokens = kani::any(); let result = token_bucket.reduce(tokens); assert!(token_bucket.is_valid()); assert!(token_bucket.one_time_burst <= old_token_bucket.one_time_burst); // Initial burst always gets used up before budget. Read assertion as implication, i.e., // `token_bucket.budget != old_token_bucket.budget => token_bucket.one_time_burst == 0`. assert!(token_bucket.budget == old_token_bucket.budget || token_bucket.one_time_burst == 0); // If reduction failed, bucket state should not change. if result == BucketReduction::Failure { // In case of a failure, no budget should have been consumed. However, since `reduce` // attempts to call `auto_replenish`, the budget could actually have // increased. assert!(token_bucket.budget >= old_token_bucket.budget); assert!(token_bucket.one_time_burst == old_token_bucket.one_time_burst); // Ensure that it is possible to trigger the BucketReduction::Failure case at all. // kani::cover makes verification fail if no possible execution path reaches // this line. kani::cover!(); } } #[kani::proof] #[kani::stub(std::time::Instant::now, stubs::instant_now)] #[kani::stub_verified(gcd)] #[kani::stub(TokenBucket::auto_replenish, stubs::token_bucket_auto_replenish)] fn verify_token_bucket_force_replenish() { let mut token_bucket: TokenBucket = kani::any(); token_bucket.reduce(kani::any()); let reduced_budget = token_bucket.budget; let reduced_burst = token_bucket.one_time_burst; let to_replenish = kani::any(); token_bucket.force_replenish(to_replenish); assert!(token_bucket.is_valid()); assert!(token_bucket.budget >= reduced_budget); assert!(token_bucket.one_time_burst >= reduced_burst); } } #[cfg(test)] pub(crate) mod tests { use std::thread; use std::time::Duration; use super::*; // Define custom refill interval to be a bit bigger. This will help // in tests which wait for a limiter refill in 2 stages. This will make it so // second wait will always result in the limiter being refilled. Otherwise // there is a chance for a race condition between limiter refilling and limiter // checking. const TEST_REFILL_TIMER_DURATION: Duration = Duration::from_millis(110); impl TokenBucket { // Resets the token bucket: budget set to max capacity and last-updated set to now. fn reset(&mut self) { self.budget = self.size; self.last_update = Instant::now(); } fn get_last_update(&self) -> &Instant { &self.last_update } fn get_processed_capacity(&self) -> u64 { self.processed_capacity } fn get_processed_refill_time(&self) -> u64 { self.processed_refill_time } // After a restore, we cannot be certain that the last_update field has the same value. pub(crate) fn partial_eq(&self, other: &TokenBucket) -> bool { (other.capacity() == self.capacity()) && (other.one_time_burst() == self.one_time_burst()) && (other.refill_time_ms() == self.refill_time_ms()) && (other.budget() == self.budget()) } } impl RateLimiter { fn get_token_bucket(&self, token_type: TokenType) -> Option<&TokenBucket> { match token_type { TokenType::Bytes => self.bandwidth.as_ref(), TokenType::Ops => self.ops.as_ref(), } } } #[test] fn test_token_bucket_auto_replenish_one() { // These values will give 1 token every 100 milliseconds const SIZE: u64 = 10; const TIME: u64 = 1000; let mut tb = TokenBucket::new(SIZE, 0, TIME).unwrap(); tb.reduce(SIZE); assert_eq!(tb.budget(), 0); // Auto-replenishing after 10 milliseconds should not yield any tokens thread::sleep(Duration::from_millis(10)); tb.auto_replenish(); assert_eq!(tb.budget(), 0); // Neither after 20. thread::sleep(Duration::from_millis(10)); tb.auto_replenish(); assert_eq!(tb.budget(), 0); // We should get 1 token after 100 millis thread::sleep(Duration::from_millis(80)); tb.auto_replenish(); assert_eq!(tb.budget(), 1); // So, 5 after 500 millis thread::sleep(Duration::from_millis(400)); tb.auto_replenish(); assert_eq!(tb.budget(), 5); // And be fully replenished after 1 second. // Wait more here to make sure we do not overshoot thread::sleep(Duration::from_millis(1000)); tb.auto_replenish(); assert_eq!(tb.budget(), 10); } #[test] fn test_token_bucket_auto_replenish_two() { const SIZE: u64 = 1000; const TIME: u64 = 1000; let time = Duration::from_millis(TIME); let mut tb = TokenBucket::new(SIZE, 0, TIME).unwrap(); tb.reduce(SIZE); assert_eq!(tb.budget(), 0); let now = Instant::now(); while now.elapsed() < time { tb.auto_replenish(); } tb.auto_replenish(); assert_eq!(tb.budget(), SIZE); } #[test] fn test_token_bucket_create() { let before = Instant::now(); let tb = TokenBucket::new(1000, 0, 1000).unwrap(); assert_eq!(tb.capacity(), 1000); assert_eq!(tb.budget(), 1000); assert!(*tb.get_last_update() >= before); let after = Instant::now(); assert!(*tb.get_last_update() <= after); assert_eq!(tb.get_processed_capacity(), 1); assert_eq!(tb.get_processed_refill_time(), 1_000_000); // Verify invalid bucket configurations result in `None`. assert!(TokenBucket::new(0, 1234, 1000).is_none()); assert!(TokenBucket::new(100, 1234, 0).is_none()); assert!(TokenBucket::new(0, 1234, 0).is_none()); } #[test] fn test_token_bucket_preprocess() { let tb = TokenBucket::new(1000, 0, 1000).unwrap(); assert_eq!(tb.get_processed_capacity(), 1); assert_eq!(tb.get_processed_refill_time(), NANOSEC_IN_ONE_MILLISEC); let thousand = 1000; let tb = TokenBucket::new(3 * 7 * 11 * 19 * thousand, 0, 7 * 11 * 13 * 17).unwrap(); assert_eq!(tb.get_processed_capacity(), 3 * 19); assert_eq!( tb.get_processed_refill_time(), 13 * 17 * (NANOSEC_IN_ONE_MILLISEC / thousand) ); } #[test] fn test_token_bucket_reduce() { // token bucket with capacity 1000 and refill time of 1000 milliseconds // allowing rate of 1 token/ms. let capacity = 1000; let refill_ms = 1000; let mut tb = TokenBucket::new(capacity, 0, refill_ms).unwrap(); assert_eq!(tb.reduce(123), BucketReduction::Success); assert_eq!(tb.budget(), capacity - 123); assert_eq!(tb.reduce(capacity), BucketReduction::Failure); // token bucket with capacity 1000 and refill time of 1000 milliseconds let mut tb = TokenBucket::new(1000, 1100, 1000).unwrap(); // safely assuming the thread can run these 3 commands in less than 500ms assert_eq!(tb.reduce(1000), BucketReduction::Success); assert_eq!(tb.one_time_burst(), 100); assert_eq!(tb.reduce(500), BucketReduction::Success); assert_eq!(tb.one_time_burst(), 0); assert_eq!(tb.reduce(500), BucketReduction::Success); assert_eq!(tb.reduce(500), BucketReduction::Failure); thread::sleep(Duration::from_millis(500)); assert_eq!(tb.reduce(500), BucketReduction::Success); thread::sleep(Duration::from_millis(1000)); assert_eq!(tb.reduce(2500), BucketReduction::OverConsumption(1.5)); let before = Instant::now(); tb.reset(); assert_eq!(tb.capacity(), 1000); assert_eq!(tb.budget(), 1000); assert!(*tb.get_last_update() >= before); let after = Instant::now(); assert!(*tb.get_last_update() <= after); } #[test] fn test_rate_limiter_default() { let mut l = RateLimiter::default(); // limiter should not be blocked assert!(!l.is_blocked()); // limiter should be disabled so consume(whatever) should work assert!(l.consume(u64::MAX, TokenType::Ops)); assert!(l.consume(u64::MAX, TokenType::Bytes)); // calling the handler without there having been an event should error let err = l.event_handler().unwrap_err(); assert!( matches!(err, RateLimiterError::SpuriousRateLimiterEvent), "{:?}", err ); } #[test] fn test_rate_limiter_new() { let l = RateLimiter::new(1000, 1001, 1002, 1003, 1004, 1005).unwrap(); let bw = l.bandwidth.unwrap(); assert_eq!(bw.capacity(), 1000); assert_eq!(bw.one_time_burst(), 1001); assert_eq!(bw.refill_time_ms(), 1002); assert_eq!(bw.budget(), 1000); let ops = l.ops.unwrap(); assert_eq!(ops.capacity(), 1003); assert_eq!(ops.one_time_burst(), 1004); assert_eq!(ops.refill_time_ms(), 1005); assert_eq!(ops.budget(), 1003); } #[test] fn test_rate_limiter_manual_replenish() { // rate limiter with limit of 1000 bytes/s and 1000 ops/s let mut l = RateLimiter::new(1000, 0, 1000, 1000, 0, 1000).unwrap(); // consume 123 bytes assert!(l.consume(123, TokenType::Bytes)); l.manual_replenish(23, TokenType::Bytes); { let bytes_tb = l.get_token_bucket(TokenType::Bytes).unwrap(); assert_eq!(bytes_tb.budget(), 900); } // consume 123 ops assert!(l.consume(123, TokenType::Ops)); l.manual_replenish(23, TokenType::Ops); { let bytes_tb = l.get_token_bucket(TokenType::Ops).unwrap(); assert_eq!(bytes_tb.budget(), 900); } } #[test] fn test_rate_limiter_bandwidth() { // rate limiter with limit of 1000 bytes/s let mut l = RateLimiter::new(1000, 0, 1000, 0, 0, 0).unwrap(); // limiter should not be blocked assert!(!l.is_blocked()); // raw FD for this disabled should be valid assert!(l.as_raw_fd() > 0); // ops/s limiter should be disabled so consume(whatever) should work assert!(l.consume(u64::MAX, TokenType::Ops)); // do full 1000 bytes assert!(l.consume(1000, TokenType::Bytes)); // try and fail on another 100 assert!(!l.consume(100, TokenType::Bytes)); // since consume failed, limiter should be blocked now assert!(l.is_blocked()); // wait half the timer period thread::sleep(TEST_REFILL_TIMER_DURATION / 2); // limiter should still be blocked assert!(l.is_blocked()); // wait the other half of the timer period thread::sleep(TEST_REFILL_TIMER_DURATION / 2); // the timer_fd should have an event on it by now l.event_handler().unwrap(); // limiter should now be unblocked assert!(!l.is_blocked()); // try and succeed on another 100 bytes this time assert!(l.consume(100, TokenType::Bytes)); } #[test] fn test_rate_limiter_ops() { // rate limiter with limit of 1000 ops/s let mut l = RateLimiter::new(0, 0, 0, 1000, 0, 1000).unwrap(); // limiter should not be blocked assert!(!l.is_blocked()); // raw FD for this disabled should be valid assert!(l.as_raw_fd() > 0); // bytes/s limiter should be disabled so consume(whatever) should work assert!(l.consume(u64::MAX, TokenType::Bytes)); // do full 1000 ops assert!(l.consume(1000, TokenType::Ops)); // try and fail on another 100 assert!(!l.consume(100, TokenType::Ops)); // since consume failed, limiter should be blocked now assert!(l.is_blocked()); // wait half the timer period thread::sleep(TEST_REFILL_TIMER_DURATION / 2); // limiter should still be blocked assert!(l.is_blocked()); // wait the other half of the timer period thread::sleep(TEST_REFILL_TIMER_DURATION / 2); // the timer_fd should have an event on it by now l.event_handler().unwrap(); // limiter should now be unblocked assert!(!l.is_blocked()); // try and succeed on another 100 ops this time assert!(l.consume(100, TokenType::Ops)); } #[test] fn test_rate_limiter_full() { // rate limiter with limit of 1000 bytes/s and 1000 ops/s let mut l = RateLimiter::new(1000, 0, 1000, 1000, 0, 1000).unwrap(); // limiter should not be blocked assert!(!l.is_blocked()); // raw FD for this disabled should be valid assert!(l.as_raw_fd() > 0); // do full 1000 bytes assert!(l.consume(1000, TokenType::Ops)); // do full 1000 bytes assert!(l.consume(1000, TokenType::Bytes)); // try and fail on another 100 ops assert!(!l.consume(100, TokenType::Ops)); // try and fail on another 100 bytes assert!(!l.consume(100, TokenType::Bytes)); // since consume failed, limiter should be blocked now assert!(l.is_blocked()); // wait half the timer period thread::sleep(TEST_REFILL_TIMER_DURATION / 2); // limiter should still be blocked assert!(l.is_blocked()); // wait the other half of the timer period thread::sleep(TEST_REFILL_TIMER_DURATION / 2); // the timer_fd should have an event on it by now l.event_handler().unwrap(); // limiter should now be unblocked assert!(!l.is_blocked()); // try and succeed on another 100 ops this time assert!(l.consume(100, TokenType::Ops)); // try and succeed on another 100 bytes this time assert!(l.consume(100, TokenType::Bytes)); } #[test] fn test_rate_limiter_overconsumption() { // initialize the rate limiter let mut l = RateLimiter::new(1000, 0, 1000, 1000, 0, 1000).unwrap(); // try to consume 2.5x the bucket size // we are "borrowing" 1.5x the bucket size in tokens since // the bucket is full assert!(l.consume(2500, TokenType::Bytes)); // check that even after a whole second passes, the rate limiter // is still blocked thread::sleep(Duration::from_millis(1000)); l.event_handler().unwrap_err(); assert!(l.is_blocked()); // after 1.5x the replenish time has passed, the rate limiter // is available again thread::sleep(Duration::from_millis(500)); l.event_handler().unwrap(); assert!(!l.is_blocked()); // reset the rate limiter let mut l = RateLimiter::new(1000, 0, 1000, 1000, 0, 1000).unwrap(); // try to consume 1.5x the bucket size // we are "borrowing" 1.5x the bucket size in tokens since // the bucket is full, should arm the timer to 0.5x replenish // time, which is 500 ms assert!(l.consume(1500, TokenType::Bytes)); // check that after more than the minimum refill time, // the rate limiter is still blocked thread::sleep(Duration::from_millis(200)); l.event_handler().unwrap_err(); assert!(l.is_blocked()); // try to consume some tokens, which should fail as the timer // is still active assert!(!l.consume(100, TokenType::Bytes)); l.event_handler().unwrap_err(); assert!(l.is_blocked()); // check that after the minimum refill time, the timer was not // overwritten and the rate limiter is still blocked from the // borrowing we performed earlier thread::sleep(Duration::from_millis(100)); l.event_handler().unwrap_err(); assert!(l.is_blocked()); assert!(!l.consume(100, TokenType::Bytes)); // after waiting out the full duration, rate limiter should be // availale again thread::sleep(Duration::from_millis(200)); l.event_handler().unwrap(); assert!(!l.is_blocked()); assert!(l.consume(100, TokenType::Bytes)); } #[test] fn test_update_buckets() { let mut x = RateLimiter::new(1000, 2000, 1000, 10, 20, 1000).unwrap(); let initial_bw = x.bandwidth.clone(); let initial_ops = x.ops.clone(); x.update_buckets(BucketUpdate::None, BucketUpdate::None); assert_eq!(x.bandwidth, initial_bw); assert_eq!(x.ops, initial_ops); let new_bw = TokenBucket::new(123, 0, 57).unwrap(); let new_ops = TokenBucket::new(321, 12346, 89).unwrap(); x.update_buckets( BucketUpdate::Update(new_bw.clone()), BucketUpdate::Update(new_ops.clone()), ); // We have manually adjust the last_update field, because it changes when update_buckets() // constructs new buckets (and thus gets a different value for last_update). We do this so // it makes sense to test the following assertions. x.bandwidth.as_mut().unwrap().last_update = new_bw.last_update; x.ops.as_mut().unwrap().last_update = new_ops.last_update; assert_eq!(x.bandwidth, Some(new_bw)); assert_eq!(x.ops, Some(new_ops)); x.update_buckets(BucketUpdate::Disabled, BucketUpdate::Disabled); assert_eq!(x.bandwidth, None); assert_eq!(x.ops, None); } #[test] fn test_rate_limiter_debug() { let l = RateLimiter::new(1, 2, 3, 4, 5, 6).unwrap(); assert_eq!( format!("{:?}", l), format!( "RateLimiter {{ bandwidth: {:?}, ops: {:?} }}", l.bandwidth(), l.ops() ), ); } } ================================================ FILE: src/vmm/src/rate_limiter/persist.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines the structures needed for saving/restoring a RateLimiter. use serde::{Deserialize, Serialize}; use utils::time::TimerFd; use super::*; use crate::snapshot::Persist; /// State for saving a TokenBucket. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TokenBucketState { size: u64, one_time_burst: u64, refill_time: u64, budget: u64, elapsed_ns: u64, } impl Persist<'_> for TokenBucket { type State = TokenBucketState; type ConstructorArgs = (); type Error = io::Error; fn save(&self) -> Self::State { TokenBucketState { size: self.size, one_time_burst: self.one_time_burst, refill_time: self.refill_time, budget: self.budget, // This should be safe for a duration of about 584 years. elapsed_ns: u64::try_from(self.last_update.elapsed().as_nanos()).unwrap(), } } fn restore(_: Self::ConstructorArgs, state: &Self::State) -> Result { let now = Instant::now(); let last_update = now .checked_sub(Duration::from_nanos(state.elapsed_ns)) .unwrap_or(now); let mut token_bucket = TokenBucket::new(state.size, state.one_time_burst, state.refill_time) .ok_or_else(|| io::Error::from(io::ErrorKind::InvalidInput))?; token_bucket.budget = state.budget; token_bucket.last_update = last_update; Ok(token_bucket) } } /// State for saving a RateLimiter. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RateLimiterState { ops: Option, bandwidth: Option, } impl Persist<'_> for RateLimiter { type State = RateLimiterState; type ConstructorArgs = (); type Error = io::Error; fn save(&self) -> Self::State { RateLimiterState { ops: self.ops.as_ref().map(|ops| ops.save()), bandwidth: self.bandwidth.as_ref().map(|bw| bw.save()), } } fn restore(_: Self::ConstructorArgs, state: &Self::State) -> Result { let rate_limiter = RateLimiter { ops: if let Some(ops) = state.ops.as_ref() { Some(TokenBucket::restore((), ops)?) } else { None }, bandwidth: if let Some(bw) = state.bandwidth.as_ref() { Some(TokenBucket::restore((), bw)?) } else { None }, timer_fd: TimerFd::new(), timer_active: false, }; Ok(rate_limiter) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_token_bucket_persistence() { let mut tb = TokenBucket::new(1000, 2000, 3000).unwrap(); // Check that TokenBucket restores correctly if untouched. let restored_tb = TokenBucket::restore((), &tb.save()).unwrap(); assert!(tb.partial_eq(&restored_tb)); // Check that TokenBucket restores correctly after partially consuming tokens. tb.reduce(100); let restored_tb = TokenBucket::restore((), &tb.save()).unwrap(); assert!(tb.partial_eq(&restored_tb)); // Check that TokenBucket restores correctly after replenishing tokens. tb.force_replenish(100); let restored_tb = TokenBucket::restore((), &tb.save()).unwrap(); assert!(tb.partial_eq(&restored_tb)); // Test serialization. let tb_state = tb.save(); let serialized_data = bitcode::serialize(&tb_state).unwrap(); let restored_state = bitcode::deserialize(&serialized_data).unwrap(); let restored_tb = TokenBucket::restore((), &restored_state).unwrap(); assert!(tb.partial_eq(&restored_tb)); } #[test] fn test_rate_limiter_persistence() { let refill_time = 100_000; let mut rate_limiter = RateLimiter::new(100, 0, refill_time, 10, 0, refill_time).unwrap(); // Check that RateLimiter restores correctly if untouched. let restored_rate_limiter = RateLimiter::restore((), &rate_limiter.save()).expect("Unable to restore rate limiter"); assert!( rate_limiter .ops() .unwrap() .partial_eq(restored_rate_limiter.ops().unwrap()) ); assert!( rate_limiter .bandwidth() .unwrap() .partial_eq(restored_rate_limiter.bandwidth().unwrap()) ); assert!(!restored_rate_limiter.timer_fd.is_armed()); // Check that RateLimiter restores correctly after partially consuming tokens. rate_limiter.consume(10, TokenType::Bytes); rate_limiter.consume(10, TokenType::Ops); let restored_rate_limiter = RateLimiter::restore((), &rate_limiter.save()).expect("Unable to restore rate limiter"); assert!( rate_limiter .ops() .unwrap() .partial_eq(restored_rate_limiter.ops().unwrap()) ); assert!( rate_limiter .bandwidth() .unwrap() .partial_eq(restored_rate_limiter.bandwidth().unwrap()) ); assert!(!restored_rate_limiter.timer_fd.is_armed()); // Check that RateLimiter restores correctly after totally consuming tokens. rate_limiter.consume(1000, TokenType::Bytes); let restored_rate_limiter = RateLimiter::restore((), &rate_limiter.save()).expect("Unable to restore rate limiter"); assert!( rate_limiter .ops() .unwrap() .partial_eq(restored_rate_limiter.ops().unwrap()) ); assert!( rate_limiter .bandwidth() .unwrap() .partial_eq(restored_rate_limiter.bandwidth().unwrap()) ); // Test serialization. let rate_limiter_state = rate_limiter.save(); let serialized_data = bitcode::serialize(&rate_limiter_state).unwrap(); let restored_state = bitcode::deserialize(&serialized_data).unwrap(); let restored_rate_limiter = RateLimiter::restore((), &restored_state).unwrap(); assert!( rate_limiter .ops() .unwrap() .partial_eq(restored_rate_limiter.ops().unwrap()) ); assert!( rate_limiter .bandwidth() .unwrap() .partial_eq(restored_rate_limiter.bandwidth().unwrap()) ); } } ================================================ FILE: src/vmm/src/resources.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::convert::From; use std::path::PathBuf; use std::sync::{Arc, Mutex, MutexGuard}; use serde::{Deserialize, Serialize}; use vm_memory::GuestAddress; use crate::cpu_config::templates::CustomCpuTemplate; use crate::devices::virtio::device::VirtioDevice; use crate::logger::{LoggerConfig, info}; use crate::mmds; use crate::mmds::data_store::{Mmds, MmdsVersion}; use crate::mmds::ns::MmdsNetworkStack; use crate::utils::mib_to_bytes; use crate::utils::net::ipv4addr::is_link_local_valid; use crate::vmm_config::balloon::*; use crate::vmm_config::boot_source::{ BootConfig, BootSource, BootSourceConfig, BootSourceConfigError, }; use crate::vmm_config::drive::*; use crate::vmm_config::entropy::*; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError, MachineConfigUpdate}; use crate::vmm_config::memory_hotplug::{MemoryHotplugConfig, MemoryHotplugConfigError}; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError, init_metrics}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::*; use crate::vmm_config::pmem::{PmemBuilder, PmemConfig, PmemConfigError}; use crate::vmm_config::serial::SerialConfig; use crate::vmm_config::vsock::*; use crate::vstate::memory; use crate::vstate::memory::{GuestRegionMmap, MemoryError}; /// Errors encountered when configuring microVM resources. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum ResourcesError { /// Balloon device error: {0} BalloonDevice(#[from] BalloonConfigError), /// Block device error: {0} BlockDevice(#[from] DriveError), /// Boot source error: {0} BootSource(#[from] BootSourceConfigError), /// File operation error: {0} File(#[from] std::io::Error), /// Invalid JSON: {0} InvalidJson(#[from] serde_json::Error), /// Logger error: {0} Logger(#[from] crate::logger::LoggerUpdateError), /// Metrics error: {0} Metrics(#[from] MetricsConfigError), /// MMDS error: {0} Mmds(#[from] mmds::data_store::MmdsDatastoreError), /// MMDS config error: {0} MmdsConfig(#[from] MmdsConfigError), /// Network device error: {0} NetDevice(#[from] NetworkInterfaceError), /// VM config error: {0} MachineConfig(#[from] MachineConfigError), /// Vsock device error: {0} VsockDevice(#[from] VsockConfigError), /// Entropy device error: {0} EntropyDevice(#[from] EntropyDeviceError), /// Pmem device error: {0} PmemDevice(#[from] PmemConfigError), /// Memory hotplug config error: {0} MemoryHotplugConfig(#[from] MemoryHotplugConfigError), } #[derive(Serialize, Deserialize, PartialEq, Eq, Debug)] #[serde(untagged)] #[allow(missing_docs)] pub enum CustomCpuTemplateOrPath { Path(PathBuf), Template(CustomCpuTemplate), } /// Used for configuring a vmm from one single json passed to the Firecracker process. #[derive(Debug, Default, PartialEq, Eq, Deserialize, Serialize)] #[serde(rename_all = "kebab-case")] #[allow(missing_docs)] pub struct VmmConfig { pub balloon: Option, pub drives: Vec, pub boot_source: BootSourceConfig, pub cpu_config: Option, pub logger: Option, pub machine_config: Option, pub metrics: Option, pub mmds_config: Option, #[serde(default)] pub network_interfaces: Vec, pub vsock: Option, pub entropy: Option, #[serde(default, rename = "pmem")] pub pmem_devices: Vec, #[serde(skip)] pub serial_config: Option, pub memory_hotplug: Option, } /// A data structure that encapsulates the device configurations /// held in the Vmm. #[derive(Debug, Default)] pub struct VmResources { /// The vCpu and memory configuration for this microVM. pub machine_config: MachineConfig, /// The boot source spec (contains both config and builder) for this microVM. pub boot_source: BootSource, /// The block devices. pub block: BlockBuilder, /// The vsock device. pub vsock: VsockBuilder, /// The balloon device. pub balloon: BalloonBuilder, /// The network devices builder. pub net_builder: NetBuilder, /// The entropy device builder. pub entropy: EntropyDeviceBuilder, /// The pmem devices. pub pmem: PmemBuilder, /// The memory hotplug configuration. pub memory_hotplug: Option, /// The optional Mmds data store. // This is initialised on demand (if ever used), so that we don't allocate it unless it's // actually used. pub mmds: Option>>, /// Data store limit for the mmds. pub mmds_size_limit: usize, /// Whether or not to load boot timer device. pub boot_timer: bool, /// Whether or not to use PCIe transport for VirtIO devices. pub pci_enabled: bool, /// Where serial console output should be written to pub serial_out_path: Option, } impl VmResources { /// Configures Vmm resources as described by the `config_json` param. pub fn from_json( config_json: &str, instance_info: &InstanceInfo, mmds_size_limit: usize, metadata_json: Option<&str>, ) -> Result { let vmm_config = serde_json::from_str::(config_json)?; if let Some(logger_config) = vmm_config.logger { crate::logger::LOGGER.update(logger_config)?; } if let Some(metrics) = vmm_config.metrics { init_metrics(metrics)?; } let mut resources: Self = Self { mmds_size_limit, ..Default::default() }; if let Some(machine_config) = vmm_config.machine_config { let machine_config = MachineConfigUpdate::from(machine_config); resources.update_machine_config(&machine_config)?; } if let Some(either) = vmm_config.cpu_config { match either { CustomCpuTemplateOrPath::Path(path) => { let cpu_config_json = std::fs::read_to_string(path).map_err(ResourcesError::File)?; let cpu_template = CustomCpuTemplate::try_from(cpu_config_json.as_str())?; resources.set_custom_cpu_template(cpu_template); } CustomCpuTemplateOrPath::Template(template) => { resources.set_custom_cpu_template(template) } } } resources.build_boot_source(vmm_config.boot_source)?; for drive_config in vmm_config.drives.into_iter() { resources.set_block_device(drive_config)?; } for net_config in vmm_config.network_interfaces.into_iter() { resources.build_net_device(net_config)?; } if let Some(vsock_config) = vmm_config.vsock { resources.set_vsock_device(vsock_config)?; } if let Some(balloon_config) = vmm_config.balloon { resources.set_balloon_device(balloon_config)?; } // Init the data store from file, if present. if let Some(data) = metadata_json { resources.locked_mmds_or_default()?.put_data( serde_json::from_str(data).expect("MMDS error: metadata provided not valid json"), )?; info!("Successfully added metadata to mmds from file"); } if let Some(mmds_config) = vmm_config.mmds_config { resources.set_mmds_config(mmds_config, &instance_info.id)?; } if let Some(entropy_device_config) = vmm_config.entropy { resources.build_entropy_device(entropy_device_config)?; } for pmem_config in vmm_config.pmem_devices.into_iter() { resources.build_pmem_device(pmem_config)?; } if let Some(serial_cfg) = vmm_config.serial_config { resources.serial_out_path = serial_cfg.serial_out_path; } if let Some(memory_hotplug_config) = vmm_config.memory_hotplug { resources.set_memory_hotplug_config(memory_hotplug_config)?; } Ok(resources) } /// If not initialised, create the mmds data store with the default config. pub fn mmds_or_default(&mut self) -> Result<&Arc>, MmdsConfigError> { Ok(self .mmds .get_or_insert(Arc::new(Mutex::new(Mmds::try_new(self.mmds_size_limit)?)))) } /// If not initialised, create the mmds data store with the default config. pub fn locked_mmds_or_default(&mut self) -> Result, MmdsConfigError> { let mmds = self.mmds_or_default()?; Ok(mmds.lock().expect("Poisoned lock")) } /// Add a custom CPU template to the VM resources /// to configure vCPUs. pub fn set_custom_cpu_template(&mut self, cpu_template: CustomCpuTemplate) { self.machine_config.set_custom_cpu_template(cpu_template); } /// Updates the configuration of the microVM. pub fn update_machine_config( &mut self, update: &MachineConfigUpdate, ) -> Result<(), MachineConfigError> { let updated = self.machine_config.update(update)?; // The VM cannot have a memory size smaller than the target size // of the balloon device, if present. if self.balloon.get().is_some() && updated.mem_size_mib < self .balloon .get_config() .map_err(|_| MachineConfigError::InvalidVmState)? .amount_mib as usize { return Err(MachineConfigError::IncompatibleBalloonSize); } self.machine_config = updated; Ok(()) } // Repopulate the MmdsConfig based on information from the data store // and the associated net devices. fn mmds_config(&self) -> Option { // If the data store is not initialised, we can be sure that the user did not configure // mmds. let mmds = self.mmds.as_ref()?; let mut mmds_config = None; let net_devs_with_mmds: Vec<_> = self .net_builder .iter() .filter(|net| net.lock().expect("Poisoned lock").mmds_ns().is_some()) .collect(); if !net_devs_with_mmds.is_empty() { let mmds_guard = mmds.lock().expect("Poisoned lock"); let mut inner_mmds_config = MmdsConfig { version: mmds_guard.version(), network_interfaces: vec![], ipv4_address: None, imds_compat: mmds_guard.imds_compat(), }; for net_dev in net_devs_with_mmds { let net = net_dev.lock().unwrap(); inner_mmds_config .network_interfaces .push(net.id().to_string()); // Only need to get one ip address, as they will all be equal. if inner_mmds_config.ipv4_address.is_none() { // Safe to unwrap the mmds_ns as the filter() explicitly checks for // its existence. inner_mmds_config.ipv4_address = Some(net.mmds_ns().unwrap().ipv4_addr()); } } mmds_config = Some(inner_mmds_config); } mmds_config } /// Sets a balloon device to be attached when the VM starts. pub fn set_balloon_device( &mut self, config: BalloonDeviceConfig, ) -> Result<(), BalloonConfigError> { // The balloon cannot have a target size greater than the size of // the guest memory. if config.amount_mib as usize > self.machine_config.mem_size_mib { return Err(BalloonConfigError::TooManyPagesRequested); } self.balloon.set(config) } /// Obtains the boot source hooks (kernel fd, command line creation and validation). pub fn build_boot_source( &mut self, boot_source_cfg: BootSourceConfig, ) -> Result<(), BootSourceConfigError> { self.boot_source = BootSource { builder: Some(BootConfig::new(&boot_source_cfg)?), config: boot_source_cfg, }; Ok(()) } /// Inserts a block to be attached when the VM starts. // Only call this function as part of user configuration. // If the drive_id does not exist, a new Block Device Config is added to the list. pub fn set_block_device( &mut self, block_device_config: BlockDeviceConfig, ) -> Result<(), DriveError> { let has_pmem_root = self.pmem.has_root_device(); self.block.insert(block_device_config, has_pmem_root) } /// Builds a network device to be attached when the VM starts. pub fn build_net_device( &mut self, body: NetworkInterfaceConfig, ) -> Result<(), NetworkInterfaceError> { let _ = self.net_builder.build(body)?; Ok(()) } /// Sets a vsock device to be attached when the VM starts. pub fn set_vsock_device(&mut self, config: VsockDeviceConfig) -> Result<(), VsockConfigError> { self.vsock.insert(config) } /// Builds an entropy device to be attached when the VM starts. pub fn build_entropy_device( &mut self, body: EntropyDeviceConfig, ) -> Result<(), EntropyDeviceError> { self.entropy.insert(body) } /// Builds a pmem device to be attached when the VM starts. pub fn build_pmem_device(&mut self, body: PmemConfig) -> Result<(), PmemConfigError> { let has_block_root = self.block.has_root_device(); self.pmem.build(body, has_block_root) } /// Sets the memory hotplug configuration. pub fn set_memory_hotplug_config( &mut self, config: MemoryHotplugConfig, ) -> Result<(), MemoryHotplugConfigError> { config.validate()?; self.memory_hotplug = Some(config); Ok(()) } /// Setter for mmds config. pub fn set_mmds_config( &mut self, config: MmdsConfig, instance_id: &str, ) -> Result<(), MmdsConfigError> { self.set_mmds_network_stack_config(&config)?; self.set_mmds_basic_config(config.version, config.imds_compat, instance_id)?; Ok(()) } /// Updates MMDS-related config other than MMDS network stack. pub fn set_mmds_basic_config( &mut self, version: MmdsVersion, imds_compat: bool, instance_id: &str, ) -> Result<(), MmdsConfigError> { let mut mmds_guard = self.locked_mmds_or_default()?; mmds_guard.set_version(version); mmds_guard.set_imds_compat(imds_compat); mmds_guard.set_aad(instance_id); Ok(()) } // Updates MMDS Network Stack for network interfaces to allow forwarding // requests to MMDS (or not). fn set_mmds_network_stack_config( &mut self, config: &MmdsConfig, ) -> Result<(), MmdsConfigError> { // Check IPv4 address validity. let ipv4_addr = match config.ipv4_addr() { Some(ipv4_addr) if is_link_local_valid(ipv4_addr) => Ok(ipv4_addr), None => Ok(MmdsNetworkStack::default_ipv4_addr()), _ => Err(MmdsConfigError::InvalidIpv4Addr), }?; let network_interfaces = config.network_interfaces(); // Ensure that at least one network ID is specified. if network_interfaces.is_empty() { return Err(MmdsConfigError::EmptyNetworkIfaceList); } // Ensure all interface IDs specified correspond to existing net devices. if !network_interfaces.iter().all(|id| { self.net_builder .iter() .any(|device| device.lock().expect("Poisoned lock").id() == id) }) { return Err(MmdsConfigError::InvalidNetworkInterfaceId); } // Safe to unwrap because we've just made sure that it's initialised. let mmds = self.mmds_or_default()?.clone(); // Create `MmdsNetworkStack` and configure the IPv4 address for // existing built network devices whose names are defined in the // network interface ID list. for net_device in self.net_builder.iter() { let mut net_device_lock = net_device.lock().expect("Poisoned lock"); if network_interfaces.contains(&net_device_lock.id) { net_device_lock.configure_mmds_network_stack(ipv4_addr, mmds.clone()); } else { net_device_lock.disable_mmds_network_stack(); } } Ok(()) } /// Allocates the given guest memory regions. /// /// If vhost-user-blk devices are in use, allocates memfd-backed shared memory, otherwise /// prefers anonymous memory for performance reasons. fn allocate_memory_regions( &self, regions: &[(GuestAddress, usize)], ) -> Result, MemoryError> { let vhost_user_device_used = self .block .devices .iter() .any(|b| b.lock().expect("Poisoned lock").is_vhost_user()); // Page faults are more expensive for shared memory mapping, including memfd. // For this reason, we only back guest memory with a memfd // if a vhost-user-blk device is configured in the VM, otherwise we fall back to // an anonymous private memory. // // The vhost-user-blk branch is not currently covered by integration tests in Rust, // because that would require running a backend process. If in the future we converge to // a single way of backing guest memory for vhost-user and non-vhost-user cases, // that would not be worth the effort. if vhost_user_device_used { memory::memfd_backed( regions, self.machine_config.track_dirty_pages, self.machine_config.huge_pages, ) } else { memory::anonymous( regions.iter().copied(), self.machine_config.track_dirty_pages, self.machine_config.huge_pages, ) } } /// Allocates guest memory in a configuration most appropriate for these [`VmResources`]. pub fn allocate_guest_memory(&self) -> Result, MemoryError> { let regions = crate::arch::arch_memory_regions(mib_to_bytes(self.machine_config.mem_size_mib)); self.allocate_memory_regions(®ions) } /// Allocates a single guest memory region. pub fn allocate_memory_region( &self, start: GuestAddress, size: usize, ) -> Result { Ok(self .allocate_memory_regions(&[(start, size)])? .pop() .unwrap()) } } impl From<&VmResources> for VmmConfig { fn from(resources: &VmResources) -> Self { VmmConfig { balloon: resources.balloon.get_config().ok(), drives: resources.block.configs(), boot_source: resources.boot_source.config.clone(), cpu_config: None, logger: None, machine_config: Some(resources.machine_config.clone()), metrics: None, mmds_config: resources.mmds_config(), network_interfaces: resources.net_builder.configs(), vsock: resources.vsock.config(), entropy: resources.entropy.config(), pmem_devices: resources.pmem.configs(), // serial_config is marked serde(skip) so that it doesnt end up in snapshots. serial_config: None, memory_hotplug: resources.memory_hotplug.clone(), } } } #[cfg(test)] mod tests { use std::fs::File; use std::io::Write; use std::os::linux::fs::MetadataExt; use std::str::FromStr; use serde_json::{Map, Value}; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::HTTP_MAX_PAYLOAD_SIZE; use crate::cpu_config::templates::test_utils::TEST_TEMPLATE_JSON; use crate::cpu_config::templates::{CpuTemplateType, StaticCpuTemplate}; use crate::devices::virtio::block::virtio::VirtioBlockError; use crate::devices::virtio::block::{BlockError, CacheType}; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::vsock::VSOCK_DEV_ID; use crate::resources::VmResources; use crate::utils::net::mac::MacAddr; use crate::vmm_config::RateLimiterConfig; use crate::vmm_config::boot_source::{ BootConfig, BootSource, BootSourceConfig, DEFAULT_KERNEL_CMDLINE, }; use crate::vmm_config::drive::{BlockBuilder, BlockDeviceConfig}; use crate::vmm_config::machine_config::{HugePageConfig, MachineConfig, MachineConfigError}; use crate::vmm_config::net::{NetBuilder, NetworkInterfaceConfig}; use crate::vmm_config::vsock::tests::default_config; fn default_net_cfg() -> NetworkInterfaceConfig { NetworkInterfaceConfig { iface_id: "net_if1".to_string(), // TempFile::new_with_prefix("") generates a random file name used as random net_if // name. host_dev_name: TempFile::new_with_prefix("") .unwrap() .as_path() .to_str() .unwrap() .to_string(), guest_mac: Some(MacAddr::from_str("01:23:45:67:89:0a").unwrap()), rx_rate_limiter: Some(RateLimiterConfig::default()), tx_rate_limiter: Some(RateLimiterConfig::default()), } } fn default_net_builder() -> NetBuilder { let mut net_builder = NetBuilder::new(); net_builder.build(default_net_cfg()).unwrap(); net_builder } fn default_block_cfg() -> (BlockDeviceConfig, TempFile) { let tmp_file = TempFile::new().unwrap(); ( BlockDeviceConfig { drive_id: "block1".to_string(), partuuid: Some("0eaa91a0-01".to_string()), is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(tmp_file.as_path().to_str().unwrap().to_string()), rate_limiter: Some(RateLimiterConfig::default()), file_engine_type: None, socket: None, }, tmp_file, ) } fn default_blocks() -> BlockBuilder { let mut blocks = BlockBuilder::new(); let (cfg, _file) = default_block_cfg(); blocks.insert(cfg, false).unwrap(); blocks } fn default_boot_cfg() -> BootSource { let kernel_cmdline = linux_loader::cmdline::Cmdline::try_from(DEFAULT_KERNEL_CMDLINE, 4096).unwrap(); let tmp_file = TempFile::new().unwrap(); BootSource { config: BootSourceConfig::default(), builder: Some(BootConfig { cmdline: kernel_cmdline, kernel_file: File::open(tmp_file.as_path()).unwrap(), initrd_file: Some(File::open(tmp_file.as_path()).unwrap()), }), } } fn default_vm_resources() -> VmResources { VmResources { machine_config: MachineConfig::default(), boot_source: default_boot_cfg(), block: default_blocks(), vsock: Default::default(), balloon: Default::default(), net_builder: default_net_builder(), mmds: None, boot_timer: false, mmds_size_limit: HTTP_MAX_PAYLOAD_SIZE, entropy: Default::default(), pmem: Default::default(), pci_enabled: false, serial_out_path: None, memory_hotplug: Default::default(), } } #[test] fn test_from_json() { let kernel_file = TempFile::new().unwrap(); let rootfs_file = TempFile::new().unwrap(); let scratch_file = TempFile::new().unwrap(); scratch_file.as_file().set_len(0x1000).unwrap(); let default_instance_info = InstanceInfo::default(); // We will test different scenarios with invalid resources configuration and // check the expected errors. We include configuration for the kernel and rootfs // in every json because they are mandatory fields. If we don't configure // these resources, it is considered an invalid json and the test will crash. // Invalid JSON string must yield a `serde_json` error. let error = VmResources::from_json(r#"}"#, &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None) .unwrap_err(); assert!( matches!(error, ResourcesError::InvalidJson(_)), "{:?}", error ); // Valid JSON string without the configuration for kernel or rootfs // result in an invalid JSON error. let error = VmResources::from_json(r#"{}"#, &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None) .unwrap_err(); assert!( matches!(error, ResourcesError::InvalidJson(_)), "{:?}", error ); // Invalid kernel path. let mut json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "/invalid/path", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ] }}"#, rootfs_file.as_path().to_str().unwrap() ); let error = VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap_err(); assert!( matches!( error, ResourcesError::BootSource(BootSourceConfigError::InvalidKernelPath(_)) ), "{:?}", error ); // Invalid rootfs path. json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "/invalid/path", "is_root_device": true, "is_read_only": false }} ] }}"#, kernel_file.as_path().to_str().unwrap() ); let error = VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap_err(); assert!( matches!( error, ResourcesError::BlockDevice(DriveError::CreateBlockDevice( BlockError::VirtioBackend(VirtioBlockError::BackingFile(_, _)), )) ), "{:?}", error ); // Valid config for x86 but invalid on aarch64 since it uses cpu_template. json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ], "machine-config": {{ "vcpu_count": 2, "mem_size_mib": 1024, "cpu_template": "C3" }} }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap() ); #[cfg(target_arch = "x86_64")] VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap(); #[cfg(target_arch = "aarch64")] VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap_err(); // Invalid memory size. json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ], "machine-config": {{ "vcpu_count": 2, "mem_size_mib": 0 }} }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap() ); let error = VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap_err(); assert!( matches!( error, ResourcesError::MachineConfig(MachineConfigError::InvalidMemorySize) ), "{:?}", error ); // Invalid path for logger pipe. json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ], "logger": {{ "log_path": "/invalid/path" }} }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap() ); let error = VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap_err(); assert!( matches!( error, ResourcesError::Logger(crate::logger::LoggerUpdateError(_)) ), "{:?}", error ); // Invalid path for metrics pipe. json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ], "metrics": {{ "metrics_path": "/invalid/path" }} }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap() ); let error = VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap_err(); assert!( matches!( error, ResourcesError::Metrics(MetricsConfigError::InitializationFailure { .. }) ), "{:?}", error ); // Reuse of a host name. json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ], "network-interfaces": [ {{ "iface_id": "netif1", "host_dev_name": "hostname7" }}, {{ "iface_id": "netif2", "host_dev_name": "hostname7" }} ] }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap() ); let error = VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap_err(); assert!( matches!( error, ResourcesError::NetDevice(NetworkInterfaceError::CreateNetworkDevice( crate::devices::virtio::net::NetError::TapOpen { .. }, )) ), "{:?}", error ); // Let's try now passing a valid configuration. We won't include any logger // or metrics configuration because these were already initialized in other // tests of this module and the reinitialization of them will cause crashing. json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ], "network-interfaces": [ {{ "iface_id": "netif", "host_dev_name": "hostname8" }} ], "machine-config": {{ "vcpu_count": 2, "mem_size_mib": 1024, "smt": false }}, "mmds-config": {{ "version": "V2", "ipv4_address": "169.254.170.2", "network_interfaces": ["netif"] }} }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap(), ); VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap(); // Test all configuration, this time trying to set default configuration // for version and IPv4 address. let kernel_file = TempFile::new().unwrap(); json = format!( r#"{{ "balloon": {{ "amount_mib": 0, "deflate_on_oom": false, "stats_polling_interval_s": 0 }}, "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ], "pmem": [ {{ "id": "pmem", "path_on_host": "{}", "root_device": false, "read_only": false }} ], "network-interfaces": [ {{ "iface_id": "netif", "host_dev_name": "hostname9" }} ], "machine-config": {{ "vcpu_count": 2, "mem_size_mib": 1024, "smt": false }}, "mmds-config": {{ "network_interfaces": ["netif"], "ipv4_address": "169.254.1.1" }} }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap(), scratch_file.as_path().to_str().unwrap(), ); let resources = VmResources::from_json( json.as_str(), &default_instance_info, 1200, Some(r#"{"key": "value"}"#), ) .unwrap(); let mut map = Map::new(); map.insert("key".to_string(), Value::String("value".to_string())); assert_eq!( resources.mmds.unwrap().lock().unwrap().data_store_value(), Value::Object(map) ); } #[test] fn test_cpu_config_from_invalid_json() { // Invalid cpu config file path. // `VmResources::from_json()` should fail with `Error::File`. let kernel_file = TempFile::new().unwrap(); let rootfs_file = TempFile::new().unwrap(); let default_instance_info = InstanceInfo::default(); let json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "cpu-config": "/invalid/path", "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ] }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap(), ); let error = VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap_err(); assert!(matches!(error, ResourcesError::File(_)), "{:?}", error); } #[test] fn test_cpu_config_inline() { // Include custom cpu template directly inline in config json let kernel_file = TempFile::new().unwrap(); let rootfs_file = TempFile::new().unwrap(); let default_instance_info = InstanceInfo::default(); let json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "cpu-config": {}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ] }}"#, kernel_file.as_path().to_str().unwrap(), TEST_TEMPLATE_JSON, rootfs_file.as_path().to_str().unwrap(), ); VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap(); } #[test] fn test_cpu_config_from_valid_json() { // Valid cpu config file path. // `VmResources::from_json()` should succeed and it should have a custom CPU template. let kernel_file = TempFile::new().unwrap(); let rootfs_file = TempFile::new().unwrap(); let default_instance_info = InstanceInfo::default(); let cpu_config_file = TempFile::new().unwrap(); cpu_config_file .as_file() .write_all("{}".as_bytes()) .unwrap(); let json = format!( r#"{{ "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "cpu-config": "{}", "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false }} ] }}"#, kernel_file.as_path().to_str().unwrap(), cpu_config_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap(), ); let vm_resources = VmResources::from_json( json.as_str(), &default_instance_info, HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap(); assert_eq!( vm_resources.machine_config.cpu_template, Some(CpuTemplateType::Custom(CustomCpuTemplate::default())) ); } #[test] fn test_cast_to_vmm_config() { // No mmds config. { let kernel_file = TempFile::new().unwrap(); let rootfs_file = TempFile::new().unwrap(); let json = format!( r#"{{ "balloon": {{ "amount_mib": 0, "deflate_on_oom": false, "stats_polling_interval_s": 0 }}, "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false, "io_engine": "Sync" }} ], "network-interfaces": [ {{ "iface_id": "netif1", "host_dev_name": "hostname9" }}, {{ "iface_id": "netif2", "host_dev_name": "hostname10" }} ], "machine-config": {{ "vcpu_count": 2, "mem_size_mib": 1024, "smt": false }}, "entropy": {{}} }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap(), ); { let resources = VmResources::from_json( json.as_str(), &InstanceInfo::default(), HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap(); let initial_vmm_config = serde_json::from_str::(&json).unwrap(); let vmm_config: VmmConfig = (&resources).into(); assert_eq!(initial_vmm_config, vmm_config); } { // In this case the mmds data store will be initialised but the config still None. let resources = VmResources::from_json( json.as_str(), &InstanceInfo::default(), HTTP_MAX_PAYLOAD_SIZE, Some(r#"{"key": "value"}"#), ) .unwrap(); let initial_vmm_config = serde_json::from_str::(&json).unwrap(); let vmm_config: VmmConfig = (&resources).into(); assert_eq!(initial_vmm_config, vmm_config); } } // Single interface for MMDS. { let kernel_file = TempFile::new().unwrap(); let rootfs_file = TempFile::new().unwrap(); let json = format!( r#"{{ "balloon": {{ "amount_mib": 0, "deflate_on_oom": false, "stats_polling_interval_s": 0 }}, "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false, "io_engine": "Sync" }} ], "network-interfaces": [ {{ "iface_id": "netif1", "host_dev_name": "hostname9" }}, {{ "iface_id": "netif2", "host_dev_name": "hostname10" }} ], "machine-config": {{ "vcpu_count": 2, "mem_size_mib": 1024, "smt": false }}, "mmds-config": {{ "network_interfaces": ["netif1"], "ipv4_address": "169.254.1.1" }} }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap(), ); let resources = VmResources::from_json( json.as_str(), &InstanceInfo::default(), HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap(); let initial_vmm_config = serde_json::from_str::(&json).unwrap(); let vmm_config: VmmConfig = (&resources).into(); assert_eq!(initial_vmm_config, vmm_config); } // Multiple interfaces configured for MMDS. { let kernel_file = TempFile::new().unwrap(); let rootfs_file = TempFile::new().unwrap(); let json = format!( r#"{{ "balloon": {{ "amount_mib": 0, "deflate_on_oom": false, "stats_polling_interval_s": 0 }}, "boot-source": {{ "kernel_image_path": "{}", "boot_args": "console=ttyS0 reboot=k panic=1 pci=off" }}, "drives": [ {{ "drive_id": "rootfs", "path_on_host": "{}", "is_root_device": true, "is_read_only": false, "io_engine": "Sync" }} ], "network-interfaces": [ {{ "iface_id": "netif1", "host_dev_name": "hostname9" }}, {{ "iface_id": "netif2", "host_dev_name": "hostname10" }} ], "machine-config": {{ "vcpu_count": 2, "mem_size_mib": 1024, "smt": false }}, "mmds-config": {{ "network_interfaces": ["netif1", "netif2"], "ipv4_address": "169.254.1.1" }} }}"#, kernel_file.as_path().to_str().unwrap(), rootfs_file.as_path().to_str().unwrap(), ); let resources = VmResources::from_json( json.as_str(), &InstanceInfo::default(), HTTP_MAX_PAYLOAD_SIZE, None, ) .unwrap(); let initial_vmm_config = serde_json::from_str::(&json).unwrap(); let vmm_config: VmmConfig = (&resources).into(); assert_eq!(initial_vmm_config, vmm_config); } } #[test] fn test_update_machine_config() { let mut vm_resources = default_vm_resources(); let mut aux_vm_config = MachineConfigUpdate { vcpu_count: Some(32), mem_size_mib: Some(512), smt: Some(false), #[cfg(target_arch = "x86_64")] cpu_template: Some(StaticCpuTemplate::T2), #[cfg(target_arch = "aarch64")] cpu_template: Some(StaticCpuTemplate::V1N1), track_dirty_pages: Some(false), huge_pages: Some(HugePageConfig::None), #[cfg(feature = "gdb")] gdb_socket_path: None, }; assert_ne!( MachineConfigUpdate::from(vm_resources.machine_config.clone()), aux_vm_config ); vm_resources.update_machine_config(&aux_vm_config).unwrap(); assert_eq!( MachineConfigUpdate::from(vm_resources.machine_config.clone()), aux_vm_config ); // Invalid vcpu count. aux_vm_config.vcpu_count = Some(0); assert_eq!( vm_resources.update_machine_config(&aux_vm_config), Err(MachineConfigError::InvalidVcpuCount) ); aux_vm_config.vcpu_count = Some(33); assert_eq!( vm_resources.update_machine_config(&aux_vm_config), Err(MachineConfigError::InvalidVcpuCount) ); // Check that SMT is not supported on aarch64, and that on x86_64 enabling it requires vcpu // count to be even. aux_vm_config.smt = Some(true); #[cfg(target_arch = "aarch64")] assert_eq!( vm_resources.update_machine_config(&aux_vm_config), Err(MachineConfigError::SmtNotSupported) ); aux_vm_config.vcpu_count = Some(3); #[cfg(target_arch = "x86_64")] assert_eq!( vm_resources.update_machine_config(&aux_vm_config), Err(MachineConfigError::InvalidVcpuCount) ); aux_vm_config.vcpu_count = Some(32); #[cfg(target_arch = "x86_64")] vm_resources.update_machine_config(&aux_vm_config).unwrap(); aux_vm_config.smt = Some(false); // Invalid mem_size_mib. aux_vm_config.mem_size_mib = Some(0); assert_eq!( vm_resources.update_machine_config(&aux_vm_config), Err(MachineConfigError::InvalidMemorySize) ); // Incompatible mem_size_mib with balloon size. vm_resources.machine_config.mem_size_mib = 128; vm_resources .set_balloon_device(BalloonDeviceConfig { amount_mib: 100, deflate_on_oom: false, stats_polling_interval_s: 0, free_page_hinting: false, free_page_reporting: false, }) .unwrap(); aux_vm_config.mem_size_mib = Some(90); assert_eq!( vm_resources.update_machine_config(&aux_vm_config), Err(MachineConfigError::IncompatibleBalloonSize) ); // mem_size_mib compatible with balloon size. aux_vm_config.mem_size_mib = Some(256); vm_resources.update_machine_config(&aux_vm_config).unwrap(); // mem_size_mib incompatible with huge pages configuration aux_vm_config.mem_size_mib = Some(129); aux_vm_config.huge_pages = Some(HugePageConfig::Hugetlbfs2M); assert_eq!( vm_resources .update_machine_config(&aux_vm_config) .unwrap_err(), MachineConfigError::InvalidMemorySize ); // mem_size_mib compatible with huge page configuration aux_vm_config.mem_size_mib = Some(2048); // Remove the balloon device config that's added by `default_vm_resources` as it would // trigger the "ballooning incompatible with huge pages" check. vm_resources.balloon = BalloonBuilder::new(); vm_resources.update_machine_config(&aux_vm_config).unwrap(); } #[test] fn test_set_balloon_device() { let mut vm_resources = default_vm_resources(); vm_resources.balloon = BalloonBuilder::new(); let mut new_balloon_cfg = BalloonDeviceConfig { amount_mib: 100, deflate_on_oom: false, stats_polling_interval_s: 0, free_page_hinting: false, free_page_reporting: false, }; assert!(vm_resources.balloon.get().is_none()); vm_resources .set_balloon_device(new_balloon_cfg.clone()) .unwrap(); let actual_balloon_cfg = vm_resources.balloon.get_config().unwrap(); assert_eq!(actual_balloon_cfg.amount_mib, new_balloon_cfg.amount_mib); assert_eq!( actual_balloon_cfg.deflate_on_oom, new_balloon_cfg.deflate_on_oom ); assert_eq!( actual_balloon_cfg.stats_polling_interval_s, new_balloon_cfg.stats_polling_interval_s ); let mut vm_resources = default_vm_resources(); vm_resources.balloon = BalloonBuilder::new(); new_balloon_cfg.amount_mib = 256; vm_resources .set_balloon_device(new_balloon_cfg) .unwrap_err(); } #[test] fn test_set_entropy_device() { let mut vm_resources = default_vm_resources(); vm_resources.entropy = EntropyDeviceBuilder::new(); let entropy_device_cfg = EntropyDeviceConfig::default(); assert!(vm_resources.entropy.get().is_none()); vm_resources .build_entropy_device(entropy_device_cfg.clone()) .unwrap(); let actual_entropy_cfg = vm_resources.entropy.config().unwrap(); assert_eq!(actual_entropy_cfg, entropy_device_cfg); } #[test] fn test_set_boot_source() { let tmp_file = TempFile::new().unwrap(); let cmdline = "reboot=k panic=1 pci=off nomodule 8250.nr_uarts=0"; let expected_boot_cfg = BootSourceConfig { kernel_image_path: String::from(tmp_file.as_path().to_str().unwrap()), initrd_path: Some(String::from(tmp_file.as_path().to_str().unwrap())), boot_args: Some(cmdline.to_string()), }; let mut vm_resources = default_vm_resources(); let boot_builder = vm_resources.boot_source.builder.as_ref().unwrap(); let tmp_ino = tmp_file.as_file().metadata().unwrap().st_ino(); assert_ne!( boot_builder .cmdline .as_cstring() .unwrap() .as_bytes_with_nul(), [cmdline.as_bytes(), b"\0"].concat() ); assert_ne!( boot_builder.kernel_file.metadata().unwrap().st_ino(), tmp_ino ); assert_ne!( boot_builder .initrd_file .as_ref() .unwrap() .metadata() .unwrap() .st_ino(), tmp_ino ); vm_resources.build_boot_source(expected_boot_cfg).unwrap(); let boot_source_builder = vm_resources.boot_source.builder.unwrap(); assert_eq!( boot_source_builder .cmdline .as_cstring() .unwrap() .as_bytes_with_nul(), [cmdline.as_bytes(), b"\0"].concat() ); assert_eq!( boot_source_builder.kernel_file.metadata().unwrap().st_ino(), tmp_ino ); assert_eq!( boot_source_builder .initrd_file .as_ref() .unwrap() .metadata() .unwrap() .st_ino(), tmp_ino ); } #[test] fn test_set_block_device() { let mut vm_resources = default_vm_resources(); let (mut new_block_device_cfg, _file) = default_block_cfg(); let tmp_file = TempFile::new().unwrap(); new_block_device_cfg.drive_id = "block2".to_string(); new_block_device_cfg.path_on_host = Some(tmp_file.as_path().to_str().unwrap().to_string()); assert_eq!(vm_resources.block.devices.len(), 1); vm_resources.set_block_device(new_block_device_cfg).unwrap(); assert_eq!(vm_resources.block.devices.len(), 2); } #[test] fn test_set_vsock_device() { let mut vm_resources = default_vm_resources(); let mut tmp_sock_file = TempFile::new().unwrap(); tmp_sock_file.remove().unwrap(); let new_vsock_cfg = default_config(&tmp_sock_file); assert!(vm_resources.vsock.get().is_none()); vm_resources.set_vsock_device(new_vsock_cfg).unwrap(); let actual_vsock_cfg = vm_resources.vsock.get().unwrap(); assert_eq!(actual_vsock_cfg.lock().unwrap().id(), VSOCK_DEV_ID); } #[test] fn test_set_net_device() { let mut vm_resources = default_vm_resources(); // Clone the existing net config in order to obtain a new one. let mut new_net_device_cfg = default_net_cfg(); new_net_device_cfg.iface_id = "new_net_if".to_string(); new_net_device_cfg.guest_mac = Some(MacAddr::from_str("01:23:45:67:89:0c").unwrap()); new_net_device_cfg.host_dev_name = "dummy_path2".to_string(); assert_eq!(vm_resources.net_builder.len(), 1); vm_resources.build_net_device(new_net_device_cfg).unwrap(); assert_eq!(vm_resources.net_builder.len(), 2); } #[test] fn test_set_pmem_device() { let mut vm_resources = default_vm_resources(); let tmp_file = TempFile::new().unwrap(); tmp_file.as_file().set_len(0x1000).unwrap(); let cfg = PmemConfig { id: "pmem".to_string(), path_on_host: tmp_file.as_path().to_str().unwrap().to_string(), ..Default::default() }; assert_eq!(vm_resources.pmem.devices.len(), 0); vm_resources.build_pmem_device(cfg).unwrap(); assert_eq!(vm_resources.pmem.devices.len(), 1); } } ================================================ FILE: src/vmm/src/rpc_interface.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::{self, Debug}; use std::sync::{Arc, Mutex, MutexGuard}; use serde_json::Value; use utils::time::{ClockType, get_time_us}; use super::builder::build_and_boot_microvm; use super::persist::{create_snapshot, restore_from_snapshot}; use super::resources::VmResources; use super::{Vmm, VmmError}; use crate::EventManager; use crate::builder::StartMicrovmError; use crate::cpu_config::templates::{CustomCpuTemplate, GuestConfigError}; use crate::devices::virtio::balloon::device::{HintingStatus, StartHintingCmd}; use crate::devices::virtio::mem::VirtioMemStatus; use crate::logger::{LoggerConfig, info, warn, *}; use crate::mmds::data_store::{self, Mmds, MmdsDatastoreError}; use crate::persist::{CreateSnapshotError, RestoreFromSnapshotError, VmInfo}; use crate::resources::VmmConfig; use crate::seccomp::BpfThreadMap; use crate::vmm_config::balloon::{ BalloonConfigError, BalloonDeviceConfig, BalloonStats, BalloonUpdateConfig, BalloonUpdateStatsConfig, }; use crate::vmm_config::boot_source::{BootSourceConfig, BootSourceConfigError}; use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, DriveError}; use crate::vmm_config::entropy::{EntropyDeviceConfig, EntropyDeviceError}; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError, MachineConfigUpdate}; use crate::vmm_config::memory_hotplug::{ MemoryHotplugConfig, MemoryHotplugConfigError, MemoryHotplugSizeUpdate, }; use crate::vmm_config::metrics::{MetricsConfig, MetricsConfigError}; use crate::vmm_config::mmds::{MmdsConfig, MmdsConfigError}; use crate::vmm_config::net::{ NetworkInterfaceConfig, NetworkInterfaceError, NetworkInterfaceUpdateConfig, }; use crate::vmm_config::pmem::{PmemConfig, PmemConfigError}; use crate::vmm_config::serial::SerialConfig; use crate::vmm_config::snapshot::{CreateSnapshotParams, LoadSnapshotParams, SnapshotType}; use crate::vmm_config::vsock::{VsockConfigError, VsockDeviceConfig}; use crate::vmm_config::{self, RateLimiterUpdate}; /// This enum represents the public interface of the VMM. Each action contains various /// bits of information (ids, paths, etc.). #[derive(Debug, PartialEq, Eq)] pub enum VmmAction { /// Configure the boot source of the microVM using as input the `ConfigureBootSource`. This /// action can only be called before the microVM has booted. ConfigureBootSource(BootSourceConfig), /// Configure the logger using as input the `LoggerConfig`. This action can only be called /// before the microVM has booted. ConfigureLogger(LoggerConfig), /// Configure the metrics using as input the `MetricsConfig`. This action can only be called /// before the microVM has booted. ConfigureMetrics(MetricsConfig), /// Configure the serial device. This action can only be called before the microVM has booted. ConfigureSerial(SerialConfig), /// Create a snapshot using as input the `CreateSnapshotParams`. This action can only be called /// after the microVM has booted and only when the microVM is in `Paused` state. CreateSnapshot(CreateSnapshotParams), /// Get the balloon device configuration. GetBalloonConfig, /// Get the ballon device latest statistics. GetBalloonStats, /// Get complete microVM configuration in JSON format. GetFullVmConfig, /// Get MMDS contents. GetMMDS, /// Get the machine configuration of the microVM. GetVmMachineConfig, /// Get microVM instance information. GetVmInstanceInfo, /// Get microVM version. GetVmmVersion, /// Flush the metrics. This action can only be called after the logger has been configured. FlushMetrics, /// Add a new block device or update one that already exists using the `BlockDeviceConfig` as /// input. This action can only be called before the microVM has booted. InsertBlockDevice(BlockDeviceConfig), /// Add a virtio-pmem device. InsertPmemDevice(PmemConfig), /// Add a new network interface config or update one that already exists using the /// `NetworkInterfaceConfig` as input. This action can only be called before the microVM has /// booted. InsertNetworkDevice(NetworkInterfaceConfig), /// Load the microVM state using as input the `LoadSnapshotParams`. This action can only be /// called before the microVM has booted. If this action is successful, the loaded microVM will /// be in `Paused` state. Should change this state to `Resumed` for the microVM to run. LoadSnapshot(LoadSnapshotParams), /// Partial update of the MMDS contents. PatchMMDS(Value), /// Pause the guest, by pausing the microVM VCPUs. Pause, /// Repopulate the MMDS contents. PutMMDS(Value), /// Configure the guest vCPU features. PutCpuConfiguration(CustomCpuTemplate), /// Resume the guest, by resuming the microVM VCPUs. Resume, /// Set the balloon device or update the one that already exists using the /// `BalloonDeviceConfig` as input. This action can only be called before the microVM /// has booted. SetBalloonDevice(BalloonDeviceConfig), /// Set the MMDS configuration. SetMmdsConfiguration(MmdsConfig), /// Set the vsock device or update the one that already exists using the /// `VsockDeviceConfig` as input. This action can only be called before the microVM has /// booted. SetVsockDevice(VsockDeviceConfig), /// Set the entropy device using `EntropyDeviceConfig` as input. This action can only be called /// before the microVM has booted. SetEntropyDevice(EntropyDeviceConfig), /// Get the memory hotplug device configuration and status. GetMemoryHotplugStatus, /// Set the memory hotplug device using `MemoryHotplugConfig` as input. This action can only be /// called before the microVM has booted. SetMemoryHotplugDevice(MemoryHotplugConfig), /// Updates the memory hotplug device using `MemoryHotplugConfigUpdate` as input. This action /// can only be called after the microVM has booted. UpdateMemoryHotplugSize(MemoryHotplugSizeUpdate), /// Launch the microVM. This action can only be called before the microVM has booted. StartMicroVm, /// Send CTRL+ALT+DEL to the microVM, using the i8042 keyboard function. If an AT-keyboard /// driver is listening on the guest end, this can be used to shut down the microVM gracefully. #[cfg(target_arch = "x86_64")] SendCtrlAltDel, /// Update the balloon size, after microVM start. UpdateBalloon(BalloonUpdateConfig), /// Update the balloon statistics polling interval, after microVM start. UpdateBalloonStatistics(BalloonUpdateStatsConfig), /// Start a free page hinting run StartFreePageHinting(StartHintingCmd), /// Retrieve the status of the hinting run GetFreePageHintingStatus, /// Stops a free page hinting run StopFreePageHinting, /// Update existing block device properties such as `path_on_host` or `rate_limiter`. UpdateBlockDevice(BlockDeviceUpdateConfig), /// Update a network interface, after microVM start. Currently, the only updatable properties /// are the RX and TX rate limiters. UpdateNetworkInterface(NetworkInterfaceUpdateConfig), /// Update the microVM configuration (memory & vcpu) using `VmUpdateConfig` as input. This /// action can only be called before the microVM has booted. UpdateMachineConfiguration(MachineConfigUpdate), } /// Wrapper for all errors associated with VMM actions. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VmmActionError { /// Balloon config error: {0} BalloonConfig(#[from] BalloonConfigError), /// Balloon update error: {0} BalloonUpdate(VmmError), /// Boot source error: {0} BootSource(#[from] BootSourceConfigError), /// Create snapshot error: {0} CreateSnapshot(#[from] CreateSnapshotError), /// Configure CPU error: {0} ConfigureCpu(#[from] GuestConfigError), /// Drive config error: {0} DriveConfig(#[from] DriveError), /// Entropy device error: {0} EntropyDevice(#[from] EntropyDeviceError), /// Pmem device error: {0} PmemDevice(#[from] PmemConfigError), /// Memory hotplug config error: {0} MemoryHotplugConfig(#[from] MemoryHotplugConfigError), /// Memory hotplug update error: {0} MemoryHotplugUpdate(VmmError), /// Internal VMM error: {0} InternalVmm(#[from] VmmError), /// Load snapshot error: {0} LoadSnapshot(#[from] LoadSnapshotError), /// Logger error: {0} Logger(#[from] crate::logger::LoggerUpdateError), /// Machine config error: {0} MachineConfig(#[from] MachineConfigError), /// Metrics error: {0} Metrics(#[from] MetricsConfigError), #[from(ignore)] /// MMDS error: {0} Mmds(#[from] data_store::MmdsDatastoreError), /// MMMDS config error: {0} MmdsConfig(#[from] MmdsConfigError), #[from(ignore)] /// MMDS limit exceeded error: {0} MmdsLimitExceeded(data_store::MmdsDatastoreError), /// Network config error: {0} NetworkConfig(#[from] NetworkInterfaceError), /// The requested operation is not supported: {0} NotSupported(String), /// The requested operation is not supported after starting the microVM. OperationNotSupportedPostBoot, /// The requested operation is not supported before starting the microVM. OperationNotSupportedPreBoot, /// Start microvm error: {0} StartMicrovm(#[from] StartMicrovmError), /// Vsock config error: {0} VsockConfig(#[from] VsockConfigError), } /// The enum represents the response sent by the VMM in case of success. The response is either /// empty, when no data needs to be sent, or an internal VMM structure. #[allow(clippy::large_enum_variant)] #[derive(Debug, PartialEq, Eq)] pub enum VmmData { /// The balloon device configuration. BalloonConfig(BalloonDeviceConfig), /// The latest balloon device statistics. BalloonStats(BalloonStats), /// No data is sent on the channel. Empty, /// The complete microVM configuration in JSON format. FullVmConfig(VmmConfig), /// The microVM configuration represented by `VmConfig`. MachineConfiguration(MachineConfig), /// Mmds contents. MmdsValue(serde_json::Value), /// The microVM instance information. InstanceInformation(InstanceInfo), /// The microVM version. VmmVersion(String), /// The status of the memory hotplug device. VirtioMemStatus(VirtioMemStatus), /// The status of the virtio-balloon hinting run HintingStatus(HintingStatus), } fn mmds_patch_data( mut mmds: MutexGuard<'_, Mmds>, value: serde_json::Value, ) -> Result { mmds.patch_data(value) .map(|()| VmmData::Empty) .map_err(|err| match err { data_store::MmdsDatastoreError::DataStoreLimitExceeded => { VmmActionError::MmdsLimitExceeded( data_store::MmdsDatastoreError::DataStoreLimitExceeded, ) } _ => VmmActionError::Mmds(err), }) } fn mmds_put_data( mut mmds: MutexGuard<'_, Mmds>, value: serde_json::Value, ) -> Result { mmds.put_data(value) .map(|()| VmmData::Empty) .map_err(|err| match err { data_store::MmdsDatastoreError::DataStoreLimitExceeded => { VmmActionError::MmdsLimitExceeded( data_store::MmdsDatastoreError::DataStoreLimitExceeded, ) } _ => VmmActionError::Mmds(err), }) } /// Enables pre-boot setup and instantiation of a Firecracker VMM. pub struct PrebootApiController<'a> { seccomp_filters: &'a BpfThreadMap, instance_info: InstanceInfo, vm_resources: &'a mut VmResources, event_manager: &'a mut EventManager, /// The [`Vmm`] object constructed through requests pub built_vmm: Option>>, // Configuring boot specific resources will set this to true. // Loading from snapshot will not be allowed once this is true. boot_path: bool, // Some PrebootApiRequest errors are irrecoverable and Firecracker // should cleanly teardown if they occur. fatal_error: Option, } // TODO Remove when `EventManager` implements `std::fmt::Debug`. impl fmt::Debug for PrebootApiController<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("PrebootApiController") .field("seccomp_filters", &self.seccomp_filters) .field("instance_info", &self.instance_info) .field("vm_resources", &self.vm_resources) .field("event_manager", &"?") .field("built_vmm", &self.built_vmm) .field("boot_path", &self.boot_path) .field("fatal_error", &self.fatal_error) .finish() } } /// Error type for [`PrebootApiController::load_snapshot`] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum LoadSnapshotError { /// Loading a microVM snapshot not allowed after configuring boot-specific resources. LoadSnapshotNotAllowed, /// Failed to restore from snapshot: {0} RestoreFromSnapshot(#[from] RestoreFromSnapshotError), /// Failed to resume microVM: {0} ResumeMicrovm(#[from] VmmError), } /// Shorthand type for a request containing a boxed VmmAction. pub type ApiRequest = Box; /// Shorthand type for a response containing a boxed Result. pub type ApiResponse = Box>; /// Error type for `PrebootApiController::build_microvm_from_requests`. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum BuildMicrovmFromRequestsError { /// Configuring MMDS failed: {0}. ConfigureMmds(#[from] MmdsConfigError), /// Populating MMDS from file failed: {0}. PopulateMmds(#[from] data_store::MmdsDatastoreError), /// Loading snapshot failed. Restore, /// Resuming MicroVM after loading snapshot failed. Resume, } impl<'a> PrebootApiController<'a> { /// Constructor for the PrebootApiController. pub fn new( seccomp_filters: &'a BpfThreadMap, instance_info: InstanceInfo, vm_resources: &'a mut VmResources, event_manager: &'a mut EventManager, ) -> Self { Self { seccomp_filters, instance_info, vm_resources, event_manager, built_vmm: None, boot_path: false, fatal_error: None, } } /// Default implementation for the function that builds and starts a microVM. /// /// Returns a populated `VmResources` object and a running `Vmm` object. #[allow(clippy::too_many_arguments)] pub fn build_microvm_from_requests( seccomp_filters: &BpfThreadMap, event_manager: &mut EventManager, instance_info: InstanceInfo, from_api: &std::sync::mpsc::Receiver, to_api: &std::sync::mpsc::Sender, api_event_fd: &vmm_sys_util::eventfd::EventFd, boot_timer_enabled: bool, pci_enabled: bool, mmds_size_limit: usize, metadata_json: Option<&str>, ) -> Result>, BuildMicrovmFromRequestsError> { let mut vm_resources = VmResources { boot_timer: boot_timer_enabled, mmds_size_limit, pci_enabled, ..Default::default() }; // Init the data store from file, if present. if let Some(data) = metadata_json { vm_resources.locked_mmds_or_default()?.put_data( serde_json::from_str(data).expect("MMDS error: metadata provided not valid json"), )?; info!("Successfully added metadata to mmds from file"); } let mut preboot_controller = PrebootApiController::new( seccomp_filters, instance_info, &mut vm_resources, event_manager, ); // Configure and start microVM through successive API calls. // Iterate through API calls to configure microVm. // The loop breaks when a microVM is successfully started, and a running Vmm is built. while preboot_controller.built_vmm.is_none() { // Get request let req = from_api .recv() .expect("The channel's sending half was disconnected. Cannot receive data."); // Also consume the API event along with the message. It is safe to unwrap() // because this event_fd is blocking. api_event_fd .read() .expect("VMM: Failed to read the API event_fd"); // Process the request. let res = preboot_controller.handle_preboot_request(*req); // Send back the response. to_api.send(Box::new(res)).expect("one-shot channel closed"); // If any fatal errors were encountered, break the loop. if let Some(preboot_error) = preboot_controller.fatal_error { return Err(preboot_error); } } // Safe to unwrap because previous loop cannot end on None. let vmm = preboot_controller.built_vmm.unwrap(); Ok(vmm) } /// Handles the incoming preboot request and provides a response for it. /// Returns a built/running `Vmm` after handling a successful `StartMicroVm` request. pub fn handle_preboot_request( &mut self, request: VmmAction, ) -> Result { use self::VmmAction::*; match request { // Supported operations allowed pre-boot. ConfigureBootSource(config) => self.set_boot_source(config), ConfigureLogger(logger_cfg) => crate::logger::LOGGER .update(logger_cfg) .map(|()| VmmData::Empty) .map_err(VmmActionError::Logger), ConfigureMetrics(metrics_cfg) => vmm_config::metrics::init_metrics(metrics_cfg) .map(|()| VmmData::Empty) .map_err(VmmActionError::Metrics), ConfigureSerial(serial_cfg) => { self.vm_resources.serial_out_path = serial_cfg.serial_out_path; Ok(VmmData::Empty) } GetBalloonConfig => self.balloon_config(), GetFullVmConfig => { warn!( "If the VM was restored from snapshot, boot-source, machine-config.smt, and \ machine-config.cpu_template will all be empty." ); Ok(VmmData::FullVmConfig((&*self.vm_resources).into())) } GetMMDS => Ok(VmmData::MmdsValue( self.vm_resources .locked_mmds_or_default() .map_err(VmmActionError::MmdsConfig)? .data_store_value(), )), GetVmMachineConfig => Ok(VmmData::MachineConfiguration( self.vm_resources.machine_config.clone(), )), GetVmInstanceInfo => Ok(VmmData::InstanceInformation(self.instance_info.clone())), GetVmmVersion => Ok(VmmData::VmmVersion(self.instance_info.vmm_version.clone())), InsertBlockDevice(config) => self.insert_block_device(config), InsertPmemDevice(config) => self.insert_pmem_device(config), InsertNetworkDevice(config) => self.insert_net_device(config), LoadSnapshot(config) => self .load_snapshot(&config) .map_err(VmmActionError::LoadSnapshot), PatchMMDS(value) => mmds_patch_data( self.vm_resources .locked_mmds_or_default() .map_err(VmmActionError::MmdsConfig)?, value, ), PutCpuConfiguration(custom_cpu_template) => { self.set_custom_cpu_template(custom_cpu_template) } PutMMDS(value) => mmds_put_data( self.vm_resources .locked_mmds_or_default() .map_err(VmmActionError::MmdsConfig)?, value, ), SetBalloonDevice(config) => self.set_balloon_device(config), SetVsockDevice(config) => self.set_vsock_device(config), SetMmdsConfiguration(config) => self.set_mmds_config(config), StartMicroVm => self.start_microvm(), UpdateMachineConfiguration(config) => self.update_machine_config(config), SetEntropyDevice(config) => self.set_entropy_device(config), SetMemoryHotplugDevice(config) => self.set_memory_hotplug_device(config), // Operations not allowed pre-boot. CreateSnapshot(_) | FlushMetrics | Pause | Resume | GetBalloonStats | GetMemoryHotplugStatus | UpdateBalloon(_) | UpdateBalloonStatistics(_) | UpdateBlockDevice(_) | UpdateMemoryHotplugSize(_) | UpdateNetworkInterface(_) | StartFreePageHinting(_) | GetFreePageHintingStatus | StopFreePageHinting => Err(VmmActionError::OperationNotSupportedPreBoot), #[cfg(target_arch = "x86_64")] SendCtrlAltDel => Err(VmmActionError::OperationNotSupportedPreBoot), } } fn balloon_config(&mut self) -> Result { self.vm_resources .balloon .get_config() .map(VmmData::BalloonConfig) .map_err(VmmActionError::BalloonConfig) } fn insert_block_device(&mut self, cfg: BlockDeviceConfig) -> Result { self.boot_path = true; self.vm_resources .set_block_device(cfg) .map(|()| VmmData::Empty) .map_err(VmmActionError::DriveConfig) } fn insert_net_device( &mut self, cfg: NetworkInterfaceConfig, ) -> Result { self.boot_path = true; self.vm_resources .build_net_device(cfg) .map(|()| VmmData::Empty) .map_err(VmmActionError::NetworkConfig) } fn insert_pmem_device(&mut self, cfg: PmemConfig) -> Result { self.boot_path = true; self.vm_resources .build_pmem_device(cfg) .map(|()| VmmData::Empty) .map_err(VmmActionError::PmemDevice) } fn set_balloon_device(&mut self, cfg: BalloonDeviceConfig) -> Result { self.boot_path = true; self.vm_resources .set_balloon_device(cfg) .map(|()| VmmData::Empty) .map_err(VmmActionError::BalloonConfig) } fn set_boot_source(&mut self, cfg: BootSourceConfig) -> Result { self.boot_path = true; self.vm_resources .build_boot_source(cfg) .map(|()| VmmData::Empty) .map_err(VmmActionError::BootSource) } fn set_mmds_config(&mut self, cfg: MmdsConfig) -> Result { self.boot_path = true; self.vm_resources .set_mmds_config(cfg, &self.instance_info.id) .map(|()| VmmData::Empty) .map_err(VmmActionError::MmdsConfig) } fn update_machine_config( &mut self, cfg: MachineConfigUpdate, ) -> Result { self.boot_path = true; self.vm_resources .update_machine_config(&cfg) .map(|()| VmmData::Empty) .map_err(VmmActionError::MachineConfig) } fn set_custom_cpu_template( &mut self, cpu_template: CustomCpuTemplate, ) -> Result { self.vm_resources.set_custom_cpu_template(cpu_template); Ok(VmmData::Empty) } fn set_vsock_device(&mut self, cfg: VsockDeviceConfig) -> Result { self.boot_path = true; self.vm_resources .set_vsock_device(cfg) .map(|()| VmmData::Empty) .map_err(VmmActionError::VsockConfig) } fn set_entropy_device(&mut self, cfg: EntropyDeviceConfig) -> Result { self.boot_path = true; self.vm_resources.build_entropy_device(cfg)?; Ok(VmmData::Empty) } fn set_memory_hotplug_device( &mut self, cfg: MemoryHotplugConfig, ) -> Result { self.boot_path = true; self.vm_resources.set_memory_hotplug_config(cfg)?; Ok(VmmData::Empty) } // On success, this command will end the pre-boot stage and this controller // will be replaced by a runtime controller. fn start_microvm(&mut self) -> Result { build_and_boot_microvm( &self.instance_info, self.vm_resources, self.event_manager, self.seccomp_filters, ) .map(|vmm| { self.built_vmm = Some(vmm); VmmData::Empty }) .map_err(VmmActionError::StartMicrovm) } // On success, this command will end the pre-boot stage and this controller // will be replaced by a runtime controller. fn load_snapshot( &mut self, load_params: &LoadSnapshotParams, ) -> Result { let load_start_us = get_time_us(ClockType::Monotonic); if self.boot_path { let err = LoadSnapshotError::LoadSnapshotNotAllowed; info!("{}", err); return Err(err); } // Restore VM from snapshot let vmm = restore_from_snapshot( &self.instance_info, self.event_manager, self.seccomp_filters, load_params, self.vm_resources, ) .inspect_err(|_| { // If restore fails, we consider the process is too dirty to recover. self.fatal_error = Some(BuildMicrovmFromRequestsError::Restore); })?; // Resume VM if load_params.resume_vm { vmm.lock() .expect("Poisoned lock") .resume_vm() .inspect_err(|_| { // If resume fails, we consider the process is too dirty to recover. self.fatal_error = Some(BuildMicrovmFromRequestsError::Resume); })?; } // Set the VM self.built_vmm = Some(vmm); debug!( "'load snapshot' VMM action took {} us.", update_metric_with_elapsed_time(&METRICS.latencies_us.vmm_load_snapshot, load_start_us) ); Ok(VmmData::Empty) } } /// Enables RPC interaction with a running Firecracker VMM. #[derive(Debug)] pub struct RuntimeApiController { vmm: Arc>, } impl RuntimeApiController { /// Handles the incoming runtime `VmmAction` request and provides a response for it. pub fn handle_request(&mut self, request: VmmAction) -> Result { use self::VmmAction::*; match request { // Supported operations allowed post-boot. CreateSnapshot(snapshot_create_cfg) => self.create_snapshot(&snapshot_create_cfg), FlushMetrics => self.flush_metrics(), GetBalloonConfig => self .vmm .lock() .expect("Poisoned lock") .balloon_config() .map(|state| VmmData::BalloonConfig(BalloonDeviceConfig::from(state))) .map_err(VmmActionError::InternalVmm), GetBalloonStats => self .vmm .lock() .expect("Poisoned lock") .latest_balloon_stats() .map(VmmData::BalloonStats) .map_err(VmmActionError::InternalVmm), GetFullVmConfig => Ok(VmmData::FullVmConfig( self.vmm.lock().expect("Poisoned lock").full_config(), )), GetMemoryHotplugStatus => self .vmm .lock() .expect("Poisoned lock") .memory_hotplug_status() .map(VmmData::VirtioMemStatus) .map_err(VmmActionError::InternalVmm), GetMMDS => Ok(VmmData::MmdsValue( self.vmm .lock() .expect("Poisoned lock") .get_mmds() .ok_or(VmmActionError::Mmds(MmdsDatastoreError::NotInitialized))? .lock() .expect("Poisoned lock") .data_store_value(), )), GetVmMachineConfig => Ok(VmmData::MachineConfiguration( self.vmm .lock() .expect("Poisoned lock") .machine_config .clone(), )), GetVmInstanceInfo => Ok(VmmData::InstanceInformation( self.vmm.lock().expect("Poisoned lock").instance_info(), )), GetVmmVersion => Ok(VmmData::VmmVersion( self.vmm.lock().expect("Poisoned lock").version(), )), PatchMMDS(value) => mmds_patch_data( self.vmm .lock() .expect("Poisoned lock") .get_mmds() .ok_or(VmmActionError::Mmds(MmdsDatastoreError::NotInitialized))? .lock() .expect("Poisoned lock"), value, ), Pause => self.pause(), PutMMDS(value) => mmds_put_data( self.vmm .lock() .expect("Poisoned lock") .get_mmds() .ok_or(VmmActionError::Mmds(MmdsDatastoreError::NotInitialized))? .lock() .expect("Poisoned lock"), value, ), Resume => self.resume(), #[cfg(target_arch = "x86_64")] SendCtrlAltDel => self.send_ctrl_alt_del(), UpdateBalloon(balloon_update) => self .vmm .lock() .expect("Poisoned lock") .update_balloon_config(balloon_update.amount_mib) .map(|_| VmmData::Empty) .map_err(VmmActionError::BalloonUpdate), UpdateBalloonStatistics(balloon_stats_update) => self .vmm .lock() .expect("Poisoned lock") .update_balloon_stats_config(balloon_stats_update.stats_polling_interval_s) .map(|_| VmmData::Empty) .map_err(VmmActionError::BalloonUpdate), StartFreePageHinting(cmd) => self .vmm .lock() .expect("Poisoned lock") .start_balloon_hinting(cmd) .map(|_| VmmData::Empty) .map_err(VmmActionError::BalloonUpdate), GetFreePageHintingStatus => self .vmm .lock() .expect("Poisoned lock") .get_balloon_hinting_status() .map(VmmData::HintingStatus) .map_err(VmmActionError::BalloonUpdate), StopFreePageHinting => self .vmm .lock() .expect("Poisoned lock") .stop_balloon_hinting() .map(|_| VmmData::Empty) .map_err(VmmActionError::BalloonUpdate), UpdateBlockDevice(new_cfg) => self.update_block_device(new_cfg), UpdateNetworkInterface(netif_update) => self.update_net_rate_limiters(netif_update), UpdateMemoryHotplugSize(cfg) => self .vmm .lock() .expect("Poisoned lock") .update_memory_hotplug_size(cfg.requested_size_mib) .map(|_| VmmData::Empty) .map_err(VmmActionError::MemoryHotplugUpdate), // Operations not allowed post-boot. ConfigureBootSource(_) | ConfigureLogger(_) | ConfigureMetrics(_) | ConfigureSerial(_) | InsertBlockDevice(_) | InsertPmemDevice(_) | InsertNetworkDevice(_) | LoadSnapshot(_) | PutCpuConfiguration(_) | SetBalloonDevice(_) | SetVsockDevice(_) | SetMmdsConfiguration(_) | SetEntropyDevice(_) | SetMemoryHotplugDevice(_) | StartMicroVm | UpdateMachineConfiguration(_) => Err(VmmActionError::OperationNotSupportedPostBoot), } } /// Creates a new `RuntimeApiController`. pub fn new(vmm: Arc>) -> Self { Self { vmm } } /// Pauses the microVM by pausing the vCPUs. pub fn pause(&mut self) -> Result { let pause_start_us = get_time_us(ClockType::Monotonic); self.vmm.lock().expect("Poisoned lock").pause_vm()?; let elapsed_time_us = update_metric_with_elapsed_time(&METRICS.latencies_us.vmm_pause_vm, pause_start_us); info!("'pause vm' VMM action took {} us.", elapsed_time_us); Ok(VmmData::Empty) } /// Resumes the microVM by resuming the vCPUs. pub fn resume(&mut self) -> Result { let resume_start_us = get_time_us(ClockType::Monotonic); self.vmm.lock().expect("Poisoned lock").resume_vm()?; let elapsed_time_us = update_metric_with_elapsed_time(&METRICS.latencies_us.vmm_resume_vm, resume_start_us); info!("'resume vm' VMM action took {} us.", elapsed_time_us); Ok(VmmData::Empty) } /// Write the metrics on user demand (flush). We use the word `flush` here to highlight the fact /// that the metrics will be written immediately. /// Defer to inner Vmm. We'll move to a variant where the Vmm simply exposes functionality like /// getting the dirty pages, and then we'll have the metrics flushing logic entirely on the /// outside. fn flush_metrics(&mut self) -> Result { // FIXME: we're losing the bool saying whether metrics were actually written. METRICS .write() .map(|_| VmmData::Empty) .map_err(super::VmmError::Metrics) .map_err(VmmActionError::InternalVmm) } /// Injects CTRL+ALT+DEL keystroke combo to the inner Vmm (if present). #[cfg(target_arch = "x86_64")] fn send_ctrl_alt_del(&mut self) -> Result { self.vmm .lock() .expect("Poisoned lock") .send_ctrl_alt_del() .map(|()| VmmData::Empty) .map_err(VmmActionError::InternalVmm) } fn create_snapshot( &mut self, create_params: &CreateSnapshotParams, ) -> Result { if create_params.snapshot_type == SnapshotType::Diff { log_dev_preview_warning("Virtual machine diff snapshots", None); } let mut locked_vmm = self.vmm.lock().unwrap(); let vm_info = VmInfo::from(&*locked_vmm); let create_start_us = get_time_us(ClockType::Monotonic); create_snapshot(&mut locked_vmm, &vm_info, create_params)?; match create_params.snapshot_type { SnapshotType::Full => { let elapsed_time_us = update_metric_with_elapsed_time( &METRICS.latencies_us.vmm_full_create_snapshot, create_start_us, ); info!( "'create full snapshot' VMM action took {} us.", elapsed_time_us ); } SnapshotType::Diff => { let elapsed_time_us = update_metric_with_elapsed_time( &METRICS.latencies_us.vmm_diff_create_snapshot, create_start_us, ); info!( "'create diff snapshot' VMM action took {} us.", elapsed_time_us ); } } Ok(VmmData::Empty) } /// Updates block device properties: /// - path of the host file backing the emulated block device, update the disk image on the /// device and its virtio configuration /// - rate limiter configuration. fn update_block_device( &mut self, new_cfg: BlockDeviceUpdateConfig, ) -> Result { let mut vmm = self.vmm.lock().expect("Poisoned lock"); // vhost-user-block updates if new_cfg.path_on_host.is_none() && new_cfg.rate_limiter.is_none() { vmm.update_vhost_user_block_config(&new_cfg.drive_id) .map_err(DriveError::DeviceUpdate)?; } // virtio-block updates if let Some(new_path) = new_cfg.path_on_host { vmm.update_block_device_path(&new_cfg.drive_id, new_path) .map_err(DriveError::DeviceUpdate)?; } if new_cfg.rate_limiter.is_some() { vmm.update_block_rate_limiter( &new_cfg.drive_id, RateLimiterUpdate::from(new_cfg.rate_limiter).bandwidth, RateLimiterUpdate::from(new_cfg.rate_limiter).ops, ) .map_err(DriveError::DeviceUpdate)?; } Ok(VmmData::Empty) } /// Updates configuration for an emulated net device as described in `new_cfg`. fn update_net_rate_limiters( &mut self, new_cfg: NetworkInterfaceUpdateConfig, ) -> Result { self.vmm .lock() .expect("Poisoned lock") .update_net_rate_limiters( &new_cfg.iface_id, RateLimiterUpdate::from(new_cfg.rx_rate_limiter).bandwidth, RateLimiterUpdate::from(new_cfg.rx_rate_limiter).ops, RateLimiterUpdate::from(new_cfg.tx_rate_limiter).bandwidth, RateLimiterUpdate::from(new_cfg.tx_rate_limiter).ops, ) .map(|()| VmmData::Empty) .map_err(NetworkInterfaceError::DeviceUpdate) .map_err(VmmActionError::NetworkConfig) } } #[cfg(test)] mod tests { use std::path::PathBuf; use super::*; use crate::HTTP_MAX_PAYLOAD_SIZE; use crate::builder::tests::default_vmm; use crate::devices::virtio::block::CacheType; use crate::mmds::data_store::MmdsVersion; use crate::seccomp::BpfThreadMap; use crate::vmm_config::snapshot::{MemBackendConfig, MemBackendType}; fn default_preboot<'a>( vm_resources: &'a mut VmResources, event_manager: &'a mut EventManager, seccomp_filters: &'a BpfThreadMap, ) -> PrebootApiController<'a> { let instance_info = InstanceInfo::default(); PrebootApiController::new(seccomp_filters, instance_info, vm_resources, event_manager) } fn preboot_request(request: VmmAction) -> Result { let mut vm_resources = VmResources::default(); let mut evmgr = EventManager::new().unwrap(); let seccomp_filters = BpfThreadMap::new(); let mut preboot = default_preboot(&mut vm_resources, &mut evmgr, &seccomp_filters); preboot.handle_preboot_request(request) } fn preboot_request_with_mmds( request: VmmAction, mmds: Arc>, ) -> Result { let mut vm_resources = VmResources { mmds: Some(mmds), mmds_size_limit: HTTP_MAX_PAYLOAD_SIZE, ..Default::default() }; let mut evmgr = EventManager::new().unwrap(); let seccomp_filters = BpfThreadMap::new(); let mut preboot = default_preboot(&mut vm_resources, &mut evmgr, &seccomp_filters); preboot.handle_preboot_request(request) } #[test] fn test_preboot_get_vm_config() { assert_eq!( preboot_request(VmmAction::GetVmMachineConfig).unwrap(), VmmData::MachineConfiguration(MachineConfig::default()) ); } #[test] fn test_preboot_get_mmds() { assert_eq!( preboot_request(VmmAction::GetMMDS).unwrap(), VmmData::MmdsValue(Value::Null) ); } #[test] fn test_runtime_get_mmds() { assert!(matches!( runtime_request(VmmAction::GetMMDS), Err(VmmActionError::Mmds( data_store::MmdsDatastoreError::NotInitialized )) )); } #[test] fn test_preboot_put_mmds() { let mmds = Arc::new(Mutex::new(Mmds::default())); assert_eq!( preboot_request_with_mmds( VmmAction::PutMMDS(Value::String("string".to_string())), mmds.clone() ) .unwrap(), VmmData::Empty ); assert_eq!( preboot_request_with_mmds(VmmAction::GetMMDS, mmds.clone()).unwrap(), VmmData::MmdsValue(Value::String("string".to_string())) ); let filling = (0..51300).map(|_| "X").collect::(); let data = "{\"key\": \"".to_string() + &filling + "\"}"; assert!(matches!( preboot_request_with_mmds( VmmAction::PutMMDS(serde_json::from_str(&data).unwrap()), mmds.clone() ), Err(VmmActionError::MmdsLimitExceeded(_)) )); assert_eq!( preboot_request_with_mmds(VmmAction::GetMMDS, mmds).unwrap(), VmmData::MmdsValue(Value::String("string".to_string())) ); } #[test] fn test_preboot_patch_mmds() { let mmds = Arc::new(Mutex::new(Mmds::default())); // MMDS data store is not yet initialized. let res = preboot_request(VmmAction::PatchMMDS(Value::String("string".to_string()))); assert!( matches!( res, Err(VmmActionError::Mmds( data_store::MmdsDatastoreError::NotInitialized )) ), "{:?}", res ); assert_eq!( preboot_request_with_mmds( VmmAction::PutMMDS( serde_json::from_str(r#"{"key1": "value1", "key2": "val2"}"#).unwrap(), ), mmds.clone() ) .unwrap(), VmmData::Empty ); assert_eq!( preboot_request_with_mmds(VmmAction::GetMMDS, mmds.clone()).unwrap(), VmmData::MmdsValue( serde_json::from_str(r#"{"key1": "value1", "key2": "val2"}"#).unwrap() ) ); assert_eq!( preboot_request_with_mmds( VmmAction::PatchMMDS( serde_json::from_str(r#"{"key1": null, "key2": "value2"}"#).unwrap(), ), mmds.clone() ) .unwrap(), VmmData::Empty ); assert_eq!( preboot_request_with_mmds(VmmAction::GetMMDS, mmds.clone()).unwrap(), VmmData::MmdsValue(serde_json::from_str(r#"{"key2": "value2"}"#).unwrap()) ); let filling = (0..HTTP_MAX_PAYLOAD_SIZE).map(|_| "X").collect::(); let data = "{\"key\": \"".to_string() + &filling + "\"}"; assert!(matches!( preboot_request_with_mmds( VmmAction::PatchMMDS(serde_json::from_str(&data).unwrap()), mmds.clone() ), Err(VmmActionError::MmdsLimitExceeded(_)) )); assert_eq!( preboot_request_with_mmds(VmmAction::GetMMDS, mmds).unwrap(), VmmData::MmdsValue(serde_json::from_str(r#"{"key2": "value2"}"#).unwrap()) ); } #[test] fn test_preboot_disallowed() { fn check_unsupported(res: Result) { assert!( matches!(res, Err(VmmActionError::OperationNotSupportedPreBoot)), "{:?}", res ); } check_unsupported(preboot_request(VmmAction::FlushMetrics)); check_unsupported(preboot_request(VmmAction::Pause)); check_unsupported(preboot_request(VmmAction::Resume)); check_unsupported(preboot_request(VmmAction::GetBalloonStats)); check_unsupported(preboot_request(VmmAction::UpdateBalloon( BalloonUpdateConfig { amount_mib: 0 }, ))); check_unsupported(preboot_request(VmmAction::StartFreePageHinting( Default::default(), ))); check_unsupported(preboot_request(VmmAction::GetFreePageHintingStatus)); check_unsupported(preboot_request(VmmAction::StopFreePageHinting)); check_unsupported(preboot_request(VmmAction::UpdateBalloonStatistics( BalloonUpdateStatsConfig { stats_polling_interval_s: 0, }, ))); check_unsupported(preboot_request(VmmAction::UpdateBlockDevice( BlockDeviceUpdateConfig::default(), ))); check_unsupported(preboot_request(VmmAction::UpdateNetworkInterface( NetworkInterfaceUpdateConfig { iface_id: String::new(), rx_rate_limiter: None, tx_rate_limiter: None, }, ))); check_unsupported(preboot_request(VmmAction::CreateSnapshot( CreateSnapshotParams { snapshot_type: SnapshotType::Full, snapshot_path: PathBuf::new(), mem_file_path: PathBuf::new(), }, ))); #[cfg(target_arch = "x86_64")] check_unsupported(preboot_request(VmmAction::SendCtrlAltDel)); check_unsupported(preboot_request(VmmAction::UpdateMemoryHotplugSize( MemoryHotplugSizeUpdate { requested_size_mib: 0, }, ))); } fn runtime_request(request: VmmAction) -> Result { let vmm = Arc::new(Mutex::new(default_vmm())); let mut runtime = RuntimeApiController::new(vmm.clone()); runtime.handle_request(request) } #[test] fn test_runtime_get_vm_config() { assert_eq!( runtime_request(VmmAction::GetVmMachineConfig).unwrap(), VmmData::MachineConfiguration(MachineConfig::default()) ); } #[test] fn test_runtime_disallowed() { fn check_unsupported(res: Result) { assert!( matches!(res, Err(VmmActionError::OperationNotSupportedPostBoot)), "{:?}", res ); } check_unsupported(runtime_request(VmmAction::ConfigureBootSource( BootSourceConfig::default(), ))); check_unsupported(runtime_request(VmmAction::ConfigureLogger(LoggerConfig { log_path: Some(PathBuf::new()), level: Some(crate::logger::LevelFilter::Debug), show_level: Some(false), show_log_origin: Some(false), module: None, }))); check_unsupported(runtime_request(VmmAction::ConfigureMetrics( MetricsConfig { metrics_path: PathBuf::new(), }, ))); check_unsupported(runtime_request(VmmAction::InsertBlockDevice( BlockDeviceConfig { drive_id: String::new(), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(String::new()), rate_limiter: None, file_engine_type: None, socket: None, }, ))); check_unsupported(runtime_request(VmmAction::InsertNetworkDevice( NetworkInterfaceConfig { iface_id: String::new(), host_dev_name: String::new(), guest_mac: None, rx_rate_limiter: None, tx_rate_limiter: None, }, ))); check_unsupported(runtime_request(VmmAction::SetVsockDevice( VsockDeviceConfig { vsock_id: Some(String::new()), guest_cid: 0, uds_path: String::new(), }, ))); check_unsupported(runtime_request(VmmAction::SetBalloonDevice( BalloonDeviceConfig::default(), ))); check_unsupported(runtime_request(VmmAction::SetVsockDevice( VsockDeviceConfig { vsock_id: Some(String::new()), guest_cid: 0, uds_path: String::new(), }, ))); check_unsupported(runtime_request(VmmAction::SetMmdsConfiguration( MmdsConfig { ipv4_address: None, version: MmdsVersion::default(), network_interfaces: Vec::new(), imds_compat: false, }, ))); check_unsupported(runtime_request(VmmAction::UpdateMachineConfiguration( MachineConfigUpdate::from(MachineConfig::default()), ))); check_unsupported(runtime_request(VmmAction::LoadSnapshot( LoadSnapshotParams { snapshot_path: PathBuf::new(), mem_backend: MemBackendConfig { backend_type: MemBackendType::File, backend_path: PathBuf::new(), }, track_dirty_pages: false, resume_vm: false, network_overrides: vec![], vsock_override: None, }, ))); check_unsupported(runtime_request(VmmAction::SetEntropyDevice( EntropyDeviceConfig::default(), ))); check_unsupported(runtime_request(VmmAction::InsertPmemDevice(PmemConfig { id: String::new(), path_on_host: String::new(), root_device: false, read_only: false, }))); check_unsupported(runtime_request(VmmAction::SetMemoryHotplugDevice( MemoryHotplugConfig::default(), ))); } } ================================================ FILE: src/vmm/src/seccomp.rs ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::HashMap; use std::io::Read; use std::sync::Arc; // This byte limit is passed to `bitcode` to guard against a potential memory // allocation DOS caused by binary filters that are too large. // This limit can be safely determined since the maximum length of a BPF // filter is 4096 instructions and Firecracker has a finite number of threads. const DESERIALIZATION_BYTES_LIMIT: usize = 100_000; /// Each BPF instruction is 8 bytes long and 4 byte aligned. /// This alignment needs to be satisfied in order for a BPF code to be accepted /// by the syscalls. Using u64 here is is safe as it has same size and even bigger alignment. pub type BpfInstruction = u64; /// Program made up of a sequence of BPF instructions. pub type BpfProgram = Vec; /// Reference to program made up of a sequence of BPF instructions. pub type BpfProgramRef<'a> = &'a [BpfInstruction]; /// Type that associates a thread category to a BPF program. pub type BpfThreadMap = HashMap>; /// Binary filter deserialization errors. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum DeserializationError { /// Failed to read input: {0} InputRead(std::io::Error), /// Input size {0} exceeds limit of {1} bytes SizeLimitExceeded(usize, usize), /// Bitcode deserialization failed: {0} Bitcode(#[from] bitcode::Error), } /// Retrieve empty seccomp filters. pub fn get_empty_filters() -> BpfThreadMap { let mut map = BpfThreadMap::new(); map.insert("vmm".to_string(), Arc::new(vec![])); map.insert("api".to_string(), Arc::new(vec![])); map.insert("vcpu".to_string(), Arc::new(vec![])); map } /// Deserialize binary with bpf filters pub fn deserialize_binary(reader: R) -> Result { // Check size limit before reading the full file to prevent DOS attacks let mut buf = Vec::new(); let bytes_read = reader .take((DESERIALIZATION_BYTES_LIMIT + 1) as u64) .read_to_end(&mut buf) .map_err(DeserializationError::InputRead)?; if bytes_read > DESERIALIZATION_BYTES_LIMIT { return Err(DeserializationError::SizeLimitExceeded( bytes_read, DESERIALIZATION_BYTES_LIMIT, )); } let result: HashMap = bitcode::deserialize(&buf)?; Ok(result .into_iter() .map(|(k, v)| (k.to_lowercase(), Arc::new(v))) .collect()) } /// Filter installation errors. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum InstallationError { /// Filter length exceeds the maximum size of {BPF_MAX_LEN:} instructions FilterTooLarge, /// prctl` syscall failed with error code: {0} Prctl(std::io::Error), } /// The maximum seccomp-BPF program length allowed by the linux kernel. pub const BPF_MAX_LEN: usize = 4096; /// BPF structure definition for filter array. /// See /usr/include/linux/filter.h . #[repr(C)] #[derive(Debug)] struct SockFprog { len: u16, filter: *const BpfInstruction, } /// Apply bpf filter. pub fn apply_filter(bpf_filter: BpfProgramRef) -> Result<(), InstallationError> { // If the program is empty, don't install the filter. if bpf_filter.is_empty() { return Ok(()); } // If the program length is greater than the limit allowed by the kernel, // fail quickly. Otherwise, `prctl` will give a more cryptic error code. if BPF_MAX_LEN < bpf_filter.len() { return Err(InstallationError::FilterTooLarge); } let bpf_filter_len = u16::try_from(bpf_filter.len()).map_err(|_| InstallationError::FilterTooLarge)?; // SAFETY: Safe because the parameters are valid. unsafe { { let rc = libc::prctl(libc::PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); if rc != 0 { return Err(InstallationError::Prctl(std::io::Error::last_os_error())); } } let bpf_prog = SockFprog { len: bpf_filter_len, filter: bpf_filter.as_ptr(), }; let bpf_prog_ptr = &bpf_prog as *const SockFprog; { let rc = libc::syscall( libc::SYS_seccomp, libc::SECCOMP_SET_MODE_FILTER, 0, bpf_prog_ptr, ); if rc != 0 { return Err(InstallationError::Prctl(std::io::Error::last_os_error())); } } } Ok(()) } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::collections::HashMap; use std::sync::Arc; use std::thread; use super::*; #[test] fn test_deserialize_binary() { // Malformed bitcode binary. let data = "adassafvc".to_string(); deserialize_binary(data.as_bytes()).unwrap_err(); // Test that the binary deserialization is correct, and that the thread keys // have been lowercased. let bpf_prog = vec![0; 2]; let mut filter_map: HashMap = HashMap::new(); filter_map.insert("VcpU".to_string(), bpf_prog.clone()); let bytes = bitcode::serialize(&filter_map).unwrap(); let mut expected_res = BpfThreadMap::new(); expected_res.insert("vcpu".to_string(), Arc::new(bpf_prog)); assert_eq!(deserialize_binary(&bytes[..]).unwrap(), expected_res); // Test filter too large - create data that will exceed the deserialization limit // Create a large buffer that when serialized will exceed DESERIALIZATION_BYTES_LIMIT let large_data = vec![0u8; DESERIALIZATION_BYTES_LIMIT + 1000]; deserialize_binary(&large_data[..]).unwrap_err(); } #[test] fn test_filter_apply() { // Test filter too large. thread::spawn(|| { let filter: BpfProgram = vec![0; 5000]; // Apply seccomp filter. assert!(matches!( apply_filter(&filter).unwrap_err(), InstallationError::FilterTooLarge )); }) .join() .unwrap(); // Test empty filter. thread::spawn(|| { let filter: BpfProgram = vec![]; assert_eq!(filter.len(), 0); let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; assert_eq!(seccomp_level, 0); apply_filter(&filter).unwrap(); // test that seccomp level remains 0 on failure. let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; assert_eq!(seccomp_level, 0); }) .join() .unwrap(); // Test invalid BPF code. thread::spawn(|| { let filter = vec![0xFF; 1]; let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; assert_eq!(seccomp_level, 0); assert!(matches!( apply_filter(&filter).unwrap_err(), InstallationError::Prctl(_) )); // test that seccomp level remains 0 on failure. let seccomp_level = unsafe { libc::prctl(libc::PR_GET_SECCOMP) }; assert_eq!(seccomp_level, 0); }) .join() .unwrap(); } } ================================================ FILE: src/vmm/src/signal_handler.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use libc::{ SIGBUS, SIGHUP, SIGILL, SIGPIPE, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ, c_int, c_void, siginfo_t, }; use log::error; use crate::FcExitCode; use crate::logger::{IncMetric, METRICS, StoreMetric}; use crate::utils::signal::register_signal_handler; // The offset of `si_syscall` (offending syscall identifier) within the siginfo structure // expressed as an `(u)int*`. // Offset `6` for an `i32` field means that the needed information is located at `6 * sizeof(i32)`. // See /usr/include/linux/signal.h for the C struct definition. // See https://github.com/rust-lang/libc/issues/716 for why the offset is different in Rust. const SI_OFF_SYSCALL: isize = 6; const SYS_SECCOMP_CODE: i32 = 1; #[inline] fn exit_with_code(exit_code: FcExitCode) { // Write the metrics before exiting. if let Err(err) = METRICS.write() { error!("Failed to write metrics while stopping: {}", err); } // SAFETY: Safe because we're terminating the process anyway. unsafe { libc::_exit(exit_code as i32) }; } macro_rules! generate_handler { ($fn_name:ident ,$signal_name:ident, $exit_code:ident, $signal_metric:expr, $body:ident) => { #[inline(always)] extern "C" fn $fn_name(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { // SAFETY: Safe because we're just reading some fields from a supposedly valid argument. let si_signo = unsafe { (*info).si_signo }; // SAFETY: Safe because we're just reading some fields from a supposedly valid argument. let si_code = unsafe { (*info).si_code }; if num != si_signo || num != $signal_name { exit_with_code(FcExitCode::UnexpectedError); } $signal_metric.store(1); error!( "Shutting down VM after intercepting signal {}, code {}.", si_signo, si_code ); $body(si_code, info); match si_signo { $signal_name => exit_with_code(crate::FcExitCode::$exit_code), _ => exit_with_code(FcExitCode::UnexpectedError), }; } }; } fn log_sigsys_err(si_code: c_int, info: *mut siginfo_t) { if si_code != SYS_SECCOMP_CODE { // We received a SIGSYS for a reason other than `bad syscall`. exit_with_code(FcExitCode::UnexpectedError); } // SAFETY: Other signals which might do async unsafe things incompatible with the rest of this // function are blocked due to the sa_mask used when registering the signal handler. let syscall = unsafe { *(info as *const i32).offset(SI_OFF_SYSCALL) }; error!( "Shutting down VM after intercepting a bad syscall ({}).", syscall ); } fn empty_fn(_si_code: c_int, _info: *mut siginfo_t) {} generate_handler!( sigxfsz_handler, SIGXFSZ, SIGXFSZ, METRICS.signals.sigxfsz, empty_fn ); generate_handler!( sigxcpu_handler, SIGXCPU, SIGXCPU, METRICS.signals.sigxcpu, empty_fn ); generate_handler!( sigbus_handler, SIGBUS, SIGBUS, METRICS.signals.sigbus, empty_fn ); generate_handler!( sigsegv_handler, SIGSEGV, SIGSEGV, METRICS.signals.sigsegv, empty_fn ); generate_handler!( sigsys_handler, SIGSYS, BadSyscall, METRICS.seccomp.num_faults, log_sigsys_err ); generate_handler!( sighup_handler, SIGHUP, SIGHUP, METRICS.signals.sighup, empty_fn ); generate_handler!( sigill_handler, SIGILL, SIGILL, METRICS.signals.sigill, empty_fn ); #[inline(always)] extern "C" fn sigpipe_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { // Just record the metric and allow the process to continue, the EPIPE error needs // to be handled at caller level. // SAFETY: Safe because we're just reading some fields from a supposedly valid argument. let si_signo = unsafe { (*info).si_signo }; // SAFETY: Safe because we're just reading some fields from a supposedly valid argument. let si_code = unsafe { (*info).si_code }; if num != si_signo || num != SIGPIPE { error!("Received invalid signal {}, code {}.", si_signo, si_code); return; } METRICS.signals.sigpipe.inc(); error!("Received signal {}, code {}.", si_signo, si_code); } /// Registers all the required signal handlers. /// /// Custom handlers are installed for: `SIGBUS`, `SIGSEGV`, `SIGSYS` /// `SIGXFSZ` `SIGXCPU` `SIGPIPE` `SIGHUP` and `SIGILL`. pub fn register_signal_handlers() -> vmm_sys_util::errno::Result<()> { // Call to unsafe register_signal_handler which is considered unsafe because it will // register a signal handler which will be called in the current thread and will interrupt // whatever work is done on the current thread, so we have to keep in mind that the registered // signal handler must only do async-signal-safe operations. register_signal_handler(SIGSYS, sigsys_handler)?; register_signal_handler(SIGBUS, sigbus_handler)?; register_signal_handler(SIGSEGV, sigsegv_handler)?; register_signal_handler(SIGXFSZ, sigxfsz_handler)?; register_signal_handler(SIGXCPU, sigxcpu_handler)?; register_signal_handler(SIGPIPE, sigpipe_handler)?; register_signal_handler(SIGHUP, sighup_handler)?; register_signal_handler(SIGILL, sigill_handler)?; Ok(()) } ================================================ FILE: src/vmm/src/snapshot/crc.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Implements readers and writers that compute the CRC64 checksum of the bytes //! read/written. use std::io::Write; use crc64::crc64; /// Computes the CRC64 checksum of the written bytes. /// /// ``` /// use std::io::Write; /// /// use vmm::snapshot::crc::CRC64Writer; /// /// let mut buf = vec![0; 16]; /// let write_buf = vec![123; 16]; /// let mut slice = buf.as_mut_slice(); /// /// // Create a new writer from slice. /// let mut crc_writer = CRC64Writer::new(&mut slice); /// /// crc_writer.write_all(&write_buf.as_slice()).unwrap(); /// assert_eq!(crc_writer.checksum(), 0x29D5_3572_1632_6566); /// assert_eq!(write_buf, buf); /// ``` #[derive(Debug)] pub struct CRC64Writer { /// The underlying raw writer. Using this directly will bypass CRC computation! pub writer: T, crc64: u64, } impl CRC64Writer where T: Write, { /// Create a new writer. pub fn new(writer: T) -> Self { CRC64Writer { crc64: 0, writer } } /// Returns the current checksum value. pub fn checksum(&self) -> u64 { self.crc64 } } impl Write for CRC64Writer where T: Write, { fn write(&mut self, buf: &[u8]) -> std::io::Result { let bytes_written = self.writer.write(buf)?; self.crc64 = crc64(self.crc64, &buf[..bytes_written]); Ok(bytes_written) } fn flush(&mut self) -> std::io::Result<()> { self.writer.flush() } } #[cfg(test)] mod tests { use super::{CRC64Writer, Write}; #[test] fn test_crc_new() { let mut buf = vec![0; 5]; let mut slice = buf.as_mut_slice(); let crc_writer = CRC64Writer::new(&mut slice); assert_eq!(crc_writer.crc64, 0); assert_eq!(crc_writer.writer, &[0; 5]); assert_eq!(crc_writer.checksum(), 0); } #[test] fn test_crc_write() { let mut buf = vec![0; 16]; let write_buf = vec![123; 16]; let mut slice = buf.as_mut_slice(); let mut crc_writer = CRC64Writer::new(&mut slice); crc_writer.write_all(write_buf.as_slice()).unwrap(); crc_writer.flush().unwrap(); assert_eq!(crc_writer.checksum(), 0x29D5_3572_1632_6566); assert_eq!(crc_writer.checksum(), crc_writer.crc64); } } ================================================ FILE: src/vmm/src/snapshot/mod.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Provides serialization and deserialization facilities and implements a persistent storage //! format for Firecracker state snapshots. //! //! The `Snapshot` API manages serialization and deserialization of collections of objects //! that implement the `serde` `Serialize`, `Deserialize` trait. Currently, we use //! [`bitcode`](https://docs.rs/bitcode/latest/bitcode/) for performing the serialization. //! //! The snapshot format uses the following layout: //! //! |-----------------------------| //! | 64 bit magic_id | //! |-----------------------------| //! | version string | //! |-----------------------------| //! | State | //! |-----------------------------| //! | optional CRC64 | //! |-----------------------------| //! //! //! The snapshot format uses a version value in the form of `MAJOR.MINOR.PATCH`. The version is //! provided by the library clients (it is not tied to this crate). pub mod crc; mod persist; use std::fmt::Debug; use std::io::{Read, Write}; use crc64::crc64; use semver::Version; use serde::de::DeserializeOwned; use serde::{Deserialize, Serialize}; use crate::persist::SNAPSHOT_VERSION; use crate::snapshot::crc::CRC64Writer; pub use crate::snapshot::persist::Persist; #[cfg(target_arch = "x86_64")] const SNAPSHOT_MAGIC_ID: u64 = 0x0710_1984_8664_0000u64; #[cfg(target_arch = "aarch64")] const SNAPSHOT_MAGIC_ID: u64 = 0x0710_1984_AAAA_0000u64; /// Maximum size in bytes for snapshot deserialization to prevent DOS attacks. /// Snapshots contain VM state which can be large, but we set a reasonable upper bound. /// This limit is 10MB which should be sufficient for any legitimate snapshot. const SNAPSHOT_DESERIALIZATION_BYTES_LIMIT: usize = 10_000_000; /// Error definitions for the Snapshot API. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum SnapshotError { /// CRC64 validation failed Crc64, /// Invalid data version: {0} InvalidFormatVersion(Version), /// Magic value does not match arch: {0} InvalidMagic(u64), /// An error occured during bitcode serialization: {0} Bitcode(#[from] bitcode::Error), /// IO Error: {0} Io(#[from] std::io::Error), /// Snapshot size exceeds limit of {0} bytes SizeLimitExceeded(usize), } fn serialize(data: &S, write: &mut W) -> Result<(), SnapshotError> { let encoded = bitcode::serialize(data)?; write.write_all(&encoded).map_err(SnapshotError::Io) } /// Firecracker snapshot header #[derive(Debug, Serialize, Deserialize)] struct SnapshotHdr { /// magic value magic: u64, /// Snapshot data version version: Version, } /// Assumes the raw bytes stream read from the given [`Read`] instance is a snapshot file, /// and returns the version of it. pub fn get_format_version(reader: &mut R) -> Result { // Check size limit before reading the full file to prevent DOS attacks let mut buf = Vec::new(); let bytes_read = reader .take((SNAPSHOT_DESERIALIZATION_BYTES_LIMIT + 1) as u64) .read_to_end(&mut buf)?; if bytes_read > SNAPSHOT_DESERIALIZATION_BYTES_LIMIT { return Err(SnapshotError::SizeLimitExceeded( SNAPSHOT_DESERIALIZATION_BYTES_LIMIT, )); } // The last 8 bytes are the CRC, so we need to separate them for deserialization if buf.len() < 8 { return Err(SnapshotError::Io(std::io::Error::new( std::io::ErrorKind::UnexpectedEof, "File too short to contain CRC", ))); } let (data_buf, _crc_buf) = buf.split_at(buf.len() - 8); // Since bitcode requires exact type matching, we need to try deserializing // as the specific snapshot type we know about. In practice, all snapshots // in Firecracker use MicrovmState as the data type. use crate::persist::MicrovmState; match bitcode::deserialize::>(data_buf) { Ok(snapshot) => Ok(snapshot.header.version), Err(e) => { // If deserialization fails, it could be due to: // 1. The snapshot was created with bincode (older versions) // 2. The MicrovmState structure has changed and is incompatible // 3. The snapshot file is corrupted // Since supporting bincode is out of scope, we return a descriptive error. Err(SnapshotError::Bitcode(e)) } } } /// Firecracker snapshot type /// /// A type used to store and load Firecracker snapshots of a particular version #[derive(Debug, Serialize, Deserialize)] pub struct Snapshot { header: SnapshotHdr, /// The data stored int his [`Snapshot`] pub data: Data, } impl Snapshot { /// Constructs a new snapshot with the given `data`. pub fn new(data: Data) -> Self { Self { header: SnapshotHdr { magic: SNAPSHOT_MAGIC_ID, version: SNAPSHOT_VERSION.clone(), }, data, } } /// Gets the version of this snapshot pub fn version(&self) -> &Version { &self.header.version } } impl Snapshot { pub(crate) fn load_without_crc_check(buf: &[u8]) -> Result { // Check size limit to prevent DOS attacks if buf.len() > SNAPSHOT_DESERIALIZATION_BYTES_LIMIT { return Err(SnapshotError::SizeLimitExceeded( SNAPSHOT_DESERIALIZATION_BYTES_LIMIT, )); } let snapshot: Self = bitcode::deserialize(buf)?; // Validate the header if snapshot.header.magic != SNAPSHOT_MAGIC_ID { return Err(SnapshotError::InvalidMagic(snapshot.header.magic)); } if snapshot.header.version.major != SNAPSHOT_VERSION.major || snapshot.header.version.minor > SNAPSHOT_VERSION.minor { return Err(SnapshotError::InvalidFormatVersion( snapshot.header.version.clone(), )); } Ok(snapshot) } /// Loads a snapshot from the given [`Read`] instance, performing all validations /// (CRC, snapshot magic value, snapshot version). pub fn load(reader: &mut R) -> Result { // Check size limit before reading the full file to prevent DOS attacks let mut buf = Vec::new(); let bytes_read = reader .take((SNAPSHOT_DESERIALIZATION_BYTES_LIMIT + 1) as u64) .read_to_end(&mut buf)?; if bytes_read > SNAPSHOT_DESERIALIZATION_BYTES_LIMIT { return Err(SnapshotError::SizeLimitExceeded( SNAPSHOT_DESERIALIZATION_BYTES_LIMIT, )); } // The last 8 bytes are the CRC, so we need to separate them if buf.len() < 8 { return Err(SnapshotError::Io(std::io::Error::new( std::io::ErrorKind::UnexpectedEof, "File too short to contain CRC", ))); } let (data_buf, _crc_buf) = buf.split_at(buf.len() - 8); let snapshot = Self::load_without_crc_check(data_buf)?; let computed_checksum = crc64(0, buf.as_slice()); // When we read the entire file, we also read the checksum into the buffer. The CRC has the // property that crc(0, buf.as_slice()) == 0 iff the last 8 bytes of buf are the checksum // of all the preceeding bytes, and this is the property we are using here. if computed_checksum != 0 { return Err(SnapshotError::Crc64); } Ok(snapshot) } } impl Snapshot { /// Saves `self` to the given [`Write`] instance, computing the CRC of the written data, /// and then writing the CRC into the `Write` instance, too. pub fn save(&self, writer: &mut W) -> Result<(), SnapshotError> { let mut crc_writer = CRC64Writer::new(writer); serialize(self, &mut crc_writer)?; // Write the CRC as raw bytes, not bitcode-serialized crc_writer .writer .write_all(&crc_writer.checksum().to_le_bytes()) .map_err(SnapshotError::Io) } } #[cfg(test)] mod tests { use super::*; use crate::persist::MicrovmState; #[test] fn test_snapshot_restore() { let state = MicrovmState::default(); let mut buf = Vec::new(); Snapshot::new(state).save(&mut buf).unwrap(); Snapshot::::load(&mut buf.as_slice()).unwrap(); } #[test] fn test_parse_version_from_file() { use crate::persist::MicrovmState; let snapshot = Snapshot::new(MicrovmState::default()); // Use a Vec that can grow as needed let mut snapshot_data = Vec::new(); snapshot.save(&mut snapshot_data).unwrap(); // Debug: print the length to understand what's happening println!("Snapshot data length: {}", snapshot_data.len()); assert_eq!( get_format_version(&mut std::io::Cursor::new(&snapshot_data)).unwrap(), SNAPSHOT_VERSION ); } #[test] fn test_bad_reader() { #[derive(Debug)] struct BadReader; impl Read for BadReader { fn read(&mut self, _buf: &mut [u8]) -> std::io::Result { Err(std::io::ErrorKind::InvalidInput.into()) } } let mut reader = BadReader {}; assert!( matches!(Snapshot::<()>::load(&mut reader), Err(SnapshotError::Io(inner)) if inner.kind() == std::io::ErrorKind::InvalidInput) ); } #[test] fn test_bad_magic() { // Create a snapshot with corrupted magic and serialize it properly let mut bad_snapshot = Snapshot::new(()); bad_snapshot.header.magic = 0xDEADBEEF; // Serialize the bad snapshot (without CRC for load_without_crc_check) let corrupted_data = bitcode::serialize(&bad_snapshot).unwrap(); assert!(matches!( Snapshot::<()>::load_without_crc_check(&corrupted_data), Err(SnapshotError::InvalidMagic(_)) )); } #[test] fn test_bad_crc() { let snapshot = Snapshot::new(()); // Use a Vec that can grow as needed let mut valid_data = Vec::new(); snapshot.save(&mut valid_data).unwrap(); // Corrupt the CRC by changing the last 8 bytes (where CRC is stored) if valid_data.len() >= 8 { for i in (valid_data.len() - 8)..valid_data.len() { valid_data[i] ^= 0xFF; // Corrupt the CRC by flipping bits } } assert!(matches!( Snapshot::<()>::load(&mut std::io::Cursor::new(&valid_data)), Err(SnapshotError::Crc64) )); } #[test] fn test_bad_version() { // Different major version: shouldn't work let mut bad_snapshot = Snapshot::new(()); bad_snapshot.header.version.major = SNAPSHOT_VERSION.major + 1; let data = bitcode::serialize(&bad_snapshot).unwrap(); assert!(matches!( Snapshot::<()>::load_without_crc_check(&data), Err(SnapshotError::InvalidFormatVersion(v)) if v.major == SNAPSHOT_VERSION.major + 1 )); // minor > SNAPSHOT_VERSION.minor: shouldn't work let mut bad_snapshot = Snapshot::new(()); bad_snapshot.header.version.minor = SNAPSHOT_VERSION.minor + 1; let data = bitcode::serialize(&bad_snapshot).unwrap(); assert!(matches!( Snapshot::<()>::load_without_crc_check(&data), Err(SnapshotError::InvalidFormatVersion(v)) if v.minor == SNAPSHOT_VERSION.minor + 1 )); // But we can support minor versions smaller or equal to ours. We also support // all patch versions within our supported major.minor version. let snapshot = Snapshot::new(()); let data = bitcode::serialize(&snapshot).unwrap(); Snapshot::<()>::load_without_crc_check(&data).unwrap(); if SNAPSHOT_VERSION.minor != 0 { let mut snapshot = Snapshot::new(()); snapshot.header.version.minor = SNAPSHOT_VERSION.minor - 1; let data = bitcode::serialize(&snapshot).unwrap(); Snapshot::<()>::load_without_crc_check(&data).unwrap(); } let mut snapshot = Snapshot::new(()); snapshot.header.version.patch = 0; let data = bitcode::serialize(&snapshot).unwrap(); Snapshot::<()>::load_without_crc_check(&data).unwrap(); let mut snapshot = Snapshot::new(()); snapshot.header.version.patch = SNAPSHOT_VERSION.patch + 1; let data = bitcode::serialize(&snapshot).unwrap(); Snapshot::<()>::load_without_crc_check(&data).unwrap(); let mut snapshot = Snapshot::new(()); snapshot.header.version.patch = 1024; let data = bitcode::serialize(&snapshot).unwrap(); Snapshot::<()>::load_without_crc_check(&data).unwrap(); } } ================================================ FILE: src/vmm/src/snapshot/persist.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Defines an abstract interface for saving/restoring a component from state. /// An abstract interface for saving/restoring a component using a specific state. pub trait Persist<'a> where Self: Sized, { /// The type of the object representing the state of the component. type State; /// The type of the object holding the constructor arguments. type ConstructorArgs; /// The type of the error that can occur while constructing the object. type Error; /// Returns the current state of the component. fn save(&self) -> Self::State; /// Constructs a component from a specified state. fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result; } ================================================ FILE: src/vmm/src/test_utils/mock_resources/make_noisy_kernel.sh ================================================ #!/bin/bash # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # This script illustrates the build steps for `test_noisy_elf.bin`. set -e WORKDIR="/tmp/noisy_kernel" SOURCE=$(readlink -f "$0") TEST_RESOURCE_DIR="$(dirname "$SOURCE")" FC_DIR="$TEST_RESOURCE_DIR/../../../.." KERNEL="linux-4.14.176" KERNEL_ARCHIVE="$KERNEL.tar.xz" KERNEL_URL="https://cdn.kernel.org/pub/linux/kernel/v4.x/$KERNEL_ARCHIVE" INIT_PROJ="dirtying_init" INIT_ARCHIVE="$INIT_PROJ.tgz" rm -rf "$WORKDIR" && mkdir -p "$WORKDIR" # Prepare dirtying init. echo "Preparing init..." cp "$INIT_ARCHIVE" "$WORKDIR" cd "$WORKDIR" tar xzf "$INIT_ARCHIVE" pushd "$INIT_PROJ" &>/dev/null cargo build --release popd &>/dev/null # Download kernel sources. echo "Downloading kernel..." curl "$KERNEL_URL" > "$KERNEL_ARCHIVE" echo "Extracting kernel sources..." tar xf "$KERNEL_ARCHIVE" cd "$KERNEL" # Copy base kernel config from Firecracker resources. cp "$FC_DIR/resources/guest_configs/microvm-kernel-x86_64-4.14.config" .config # Prepare initramfs. echo "Preparing initramfs..." mkdir -p initramfs cp "../$INIT_PROJ/target/x86_64-unknown-linux-musl/release/dirtying_init" initramfs/init pushd initramfs &>/dev/null fakeroot mkdir -p dev fakeroot mknod dev/console c 5 1 fakeroot chown root init find . | cpio -H newc -o > ../initramfs.cpio fakeroot chown root ../initramfs.cpio popd &>/dev/null # Update kernel config with initramfs settings. echo "Writing initramfs settings in kernel config..." sed -i 's/CONFIG_INITRAMFS_SOURCE=""/CONFIG_INITRAMFS_SOURCE="initramfs.cpio"/' .config echo "CONFIG_INITRAMFS_ROOT_GID=0" >> .config echo "CONFIG_INITRAMFS_ROOT_UID=0" >> .config # Build kernel. echo "Building kernel..." make vmlinux cp vmlinux "$TEST_RESOURCE_DIR/test_noisy_elf.bin" echo "Done!" exit 0 ================================================ FILE: src/vmm/src/test_utils/mock_resources/mod.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow(missing_docs)] use std::path::PathBuf; use crate::cpu_config::templates::CustomCpuTemplate; use crate::resources::VmResources; use crate::vmm_config::boot_source::BootSourceConfig; use crate::vmm_config::machine_config::{MachineConfig, MachineConfigUpdate}; pub const DEFAULT_BOOT_ARGS: &str = "reboot=k panic=1 pci=off"; #[cfg(target_arch = "x86_64")] pub const DEFAULT_KERNEL_IMAGE: &str = "test_elf.bin"; #[cfg(target_arch = "aarch64")] pub const DEFAULT_KERNEL_IMAGE: &str = "test_pe.bin"; #[cfg(target_arch = "x86_64")] pub const NOISY_KERNEL_IMAGE: &str = "test_noisy_elf.bin"; #[cfg(target_arch = "aarch64")] pub const NOISY_KERNEL_IMAGE: &str = "test_pe.bin"; pub fn kernel_image_path(kernel_image: Option<&str>) -> String { let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); path.push("src/test_utils/mock_resources"); path.push(kernel_image.unwrap_or(DEFAULT_KERNEL_IMAGE)); path.as_os_str().to_str().unwrap().to_string() } macro_rules! generate_from { ($src_type: ty, $dst_type: ty) => { impl From<$src_type> for $dst_type { fn from(src: $src_type) -> $dst_type { src.0 } } }; } #[derive(Debug)] pub struct MockBootSourceConfig(BootSourceConfig); impl MockBootSourceConfig { pub fn new() -> MockBootSourceConfig { MockBootSourceConfig(BootSourceConfig { kernel_image_path: kernel_image_path(None), initrd_path: None, boot_args: None, }) } pub fn with_default_boot_args(mut self) -> Self { self.0.boot_args = Some(DEFAULT_BOOT_ARGS.to_string()); self } #[cfg(target_arch = "x86_64")] pub fn with_kernel(mut self, kernel_image: &str) -> Self { self.0.kernel_image_path = kernel_image_path(Some(kernel_image)); self } } impl Default for MockBootSourceConfig { fn default() -> Self { Self::new() } } #[derive(Debug, Default)] pub struct MockVmResources(VmResources); impl MockVmResources { pub fn new() -> MockVmResources { MockVmResources::default() } pub fn with_boot_source(mut self, boot_source_cfg: BootSourceConfig) -> Self { self.0.build_boot_source(boot_source_cfg).unwrap(); self } pub fn with_vm_config(mut self, vm_config: MachineConfig) -> Self { let machine_config = MachineConfigUpdate::from(vm_config); self.0.update_machine_config(&machine_config).unwrap(); self } pub fn set_cpu_template(&mut self, cpu_template: CustomCpuTemplate) { self.0.machine_config.set_custom_cpu_template(cpu_template); } } #[derive(Debug, Default)] pub struct MockVmConfig(MachineConfig); impl MockVmConfig { pub fn new() -> MockVmConfig { MockVmConfig::default() } pub fn with_dirty_page_tracking(mut self) -> Self { self.0.track_dirty_pages = true; self } } generate_from!(MockBootSourceConfig, BootSourceConfig); generate_from!(MockVmResources, VmResources); generate_from!(MockVmConfig, MachineConfig); ================================================ FILE: src/vmm/src/test_utils/mod.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow(missing_docs)] use std::sync::{Arc, Mutex}; use vm_memory::{GuestAddress, GuestRegionCollection}; use vmm_sys_util::tempdir::TempDir; use crate::builder::build_microvm_for_boot; use crate::resources::VmResources; use crate::seccomp::get_empty_filters; use crate::test_utils::mock_resources::{MockBootSourceConfig, MockVmConfig, MockVmResources}; use crate::vmm_config::boot_source::BootSourceConfig; use crate::vmm_config::instance_info::InstanceInfo; use crate::vmm_config::machine_config::HugePageConfig; use crate::vmm_config::memory_hotplug::MemoryHotplugConfig; use crate::vstate::memory::{self, GuestMemoryMmap, GuestRegionMmap, GuestRegionMmapExt}; use crate::{EventManager, Vmm}; pub mod mock_resources; /// Creates a [`GuestMemoryMmap`] with a single region of the given size starting at guest /// physical address 0 and without dirty tracking. pub fn single_region_mem(region_size: usize) -> GuestMemoryMmap { single_region_mem_at(0, region_size) } pub fn single_region_mem_raw(region_size: usize) -> Vec { single_region_mem_at_raw(0, region_size) } /// Creates a [`GuestMemoryMmap`] with a single region of the given size starting at the given /// guest physical address `at` and without dirty tracking. pub fn single_region_mem_at(at: u64, size: usize) -> GuestMemoryMmap { multi_region_mem(&[(GuestAddress(at), size)]) } pub fn single_region_mem_at_raw(at: u64, size: usize) -> Vec { multi_region_mem_raw(&[(GuestAddress(at), size)]) } /// Creates a [`GuestMemoryMmap`] with multiple regions and without dirty page tracking. pub fn multi_region_mem(regions: &[(GuestAddress, usize)]) -> GuestMemoryMmap { GuestRegionCollection::from_regions( memory::anonymous(regions.iter().copied(), false, HugePageConfig::None) .expect("Cannot initialize memory") .into_iter() .map(|region| GuestRegionMmapExt::dram_from_mmap_region(region, 0)) .collect(), ) .unwrap() } pub fn multi_region_mem_raw(regions: &[(GuestAddress, usize)]) -> Vec { memory::anonymous(regions.iter().copied(), false, HugePageConfig::None) .expect("Cannot initialize memory") } /// Creates a [`GuestMemoryMmap`] of the given size with the contained regions laid out in /// accordance with the requirements of the architecture on which the tests are being run. pub fn arch_mem(mem_size_bytes: usize) -> GuestMemoryMmap { multi_region_mem(&crate::arch::arch_memory_regions(mem_size_bytes)) } pub fn arch_mem_raw(mem_size_bytes: usize) -> Vec { multi_region_mem_raw(&crate::arch::arch_memory_regions(mem_size_bytes)) } pub fn create_vmm( _kernel_image: Option<&str>, is_diff: bool, boot_microvm: bool, pci_enabled: bool, memory_hotplug_enabled: bool, ) -> (Arc>, EventManager) { let mut event_manager = EventManager::new().unwrap(); let empty_seccomp_filters = get_empty_filters(); let boot_source_cfg = MockBootSourceConfig::new().with_default_boot_args(); #[cfg(target_arch = "aarch64")] let boot_source_cfg: BootSourceConfig = boot_source_cfg.into(); #[cfg(target_arch = "x86_64")] let boot_source_cfg: BootSourceConfig = match _kernel_image { Some(kernel) => boot_source_cfg.with_kernel(kernel).into(), None => boot_source_cfg.into(), }; let mock_vm_res = MockVmResources::new().with_boot_source(boot_source_cfg); let mut resources: VmResources = if is_diff { mock_vm_res .with_vm_config(MockVmConfig::new().with_dirty_page_tracking().into()) .into() } else { mock_vm_res.into() }; resources.pci_enabled = pci_enabled; if memory_hotplug_enabled { resources.memory_hotplug = Some(MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 2, slot_size_mib: 128, }); } let vmm = build_microvm_for_boot( &InstanceInfo::default(), &resources, &mut event_manager, &empty_seccomp_filters, ) .unwrap(); if boot_microvm { vmm.lock().unwrap().resume_vm().unwrap(); } (vmm, event_manager) } pub fn default_vmm(kernel_image: Option<&str>) -> (Arc>, EventManager) { create_vmm(kernel_image, false, true, false, false) } pub fn default_vmm_no_boot(kernel_image: Option<&str>) -> (Arc>, EventManager) { create_vmm(kernel_image, false, false, false, false) } pub fn dirty_tracking_vmm(kernel_image: Option<&str>) -> (Arc>, EventManager) { create_vmm(kernel_image, true, true, false, false) } #[allow(clippy::undocumented_unsafe_blocks)] #[allow(clippy::cast_possible_truncation)] pub fn create_tmp_socket() -> (TempDir, String) { let tmp_dir = TempDir::new().unwrap(); let tmp_dir_path_str = tmp_dir.as_path().to_str().unwrap(); let tmp_socket_path = format!("{tmp_dir_path_str}/tmp_socket"); unsafe { let socketfd = libc::socket(libc::AF_UNIX, libc::SOCK_STREAM, 0); if socketfd < 0 { panic!("Cannot create socket"); } let mut socket_addr = libc::sockaddr_un { sun_family: libc::AF_UNIX as u16, sun_path: [0; 108], }; std::ptr::copy( tmp_socket_path.as_ptr().cast(), socket_addr.sun_path.as_mut_ptr(), tmp_socket_path.len(), ); let bind = libc::bind( socketfd, (&socket_addr as *const libc::sockaddr_un).cast(), std::mem::size_of::() as u32, ); if bind < 0 { panic!("Cannot bind socket"); } let listen = libc::listen(socketfd, 1); if listen < 0 { panic!("Cannot listen on socket"); } } (tmp_dir, tmp_socket_path) } ================================================ FILE: src/vmm/src/utils/byte_order.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 macro_rules! generate_read_fn { ($fn_name: ident, $data_type: ty, $byte_type: ty, $type_size: expr, $endian_type: ident) => { /// Read bytes from the slice pub fn $fn_name(input: &[$byte_type]) -> $data_type { let mut array = [0u8; std::mem::size_of::<$data_type>()]; let how_many = input.len().min(std::mem::size_of::<$data_type>()); array[..how_many].copy_from_slice(&input[..how_many]); <$data_type>::$endian_type(array) } }; } macro_rules! generate_write_fn { ($fn_name: ident, $data_type: ty, $byte_type: ty, $endian_type: ident) => { /// Write bytes to the slice pub fn $fn_name(buf: &mut [$byte_type], n: $data_type) { let bytes = n.$endian_type(); let how_much = buf.len().min(bytes.len()); buf[..how_much].copy_from_slice(&bytes[..how_much]); } }; } generate_read_fn!(read_le_u32, u32, u8, 4, from_le_bytes); generate_read_fn!(read_le_u64, u64, u8, 8, from_le_bytes); generate_read_fn!(read_be_u16, u16, u8, 2, from_be_bytes); generate_read_fn!(read_be_u32, u32, u8, 4, from_be_bytes); generate_write_fn!(write_le_u32, u32, u8, to_le_bytes); generate_write_fn!(write_le_u64, u64, u8, to_le_bytes); generate_write_fn!(write_be_u16, u16, u8, to_be_bytes); generate_write_fn!(write_be_u32, u32, u8, to_be_bytes); #[cfg(test)] mod tests { use super::*; macro_rules! byte_order_test_read_write { ($test_name: ident, $write_fn_name: ident, $read_fn_name: ident, $is_be: expr, $data_type: ty) => { #[test] fn $test_name() { #[allow(overflowing_literals)] let test_cases = [ ( 0x0123_4567_89AB_CDEF as u64, [0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef], ), ( 0x0000_0000_0000_0000 as u64, [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], ), ( 0x1923_2345_ABF3_CCD4 as u64, [0x19, 0x23, 0x23, 0x45, 0xAB, 0xF3, 0xCC, 0xD4], ), ( 0x0FF0_0FF0_0FF0_0FF0 as u64, [0x0F, 0xF0, 0x0F, 0xF0, 0x0F, 0xF0, 0x0F, 0xF0], ), ( 0xFFFF_FFFF_FFFF_FFFF as u64, [0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF], ), ( 0x89AB_12D4_C2D2_09BB as u64, [0x89, 0xAB, 0x12, 0xD4, 0xC2, 0xD2, 0x09, 0xBB], ), ]; let type_size = std::mem::size_of::<$data_type>(); #[allow(clippy::cast_possible_truncation)] #[allow(clippy::cast_sign_loss)] for (test_val, v_arr) in &test_cases { let v = *test_val as $data_type; let cmp_iter: Box> = if $is_be { Box::new(v_arr[(8 - type_size)..].iter()) } else { Box::new(v_arr.iter().rev()) }; // test write let mut write_arr = vec![Default::default(); type_size]; $write_fn_name(&mut write_arr, v); for (cmp, cur) in cmp_iter.zip(write_arr.iter()) { assert_eq!(*cmp, *cur as u8) } // test read let read_val = $read_fn_name(&write_arr); assert_eq!(v, read_val); } } }; } byte_order_test_read_write!(test_le_u32, write_le_u32, read_le_u32, false, u32); byte_order_test_read_write!(test_le_u64, write_le_u64, read_le_u64, false, u64); byte_order_test_read_write!(test_be_u16, write_be_u16, read_be_u16, true, u16); byte_order_test_read_write!(test_be_u32, write_be_u32, read_be_u32, true, u32); } ================================================ FILE: src/vmm/src/utils/mod.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 /// Module with helpers to read/write bytes into slices pub mod byte_order; /// Module with network related helpers pub mod net; /// Module with external libc functions pub mod signal; /// Module with state machine pub mod sm; use std::fs::{File, OpenOptions}; use std::num::Wrapping; use std::os::unix::fs::OpenOptionsExt; use std::path::Path; use libc::O_NONBLOCK; /// How many bits to left-shift by to convert MiB to bytes const MIB_TO_BYTES_SHIFT: usize = 20; /// Return the default page size of the platform, in bytes. pub fn get_page_size() -> Result { // SAFETY: Safe because the parameters are valid. match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } { -1 => Err(vmm_sys_util::errno::Error::last()), ps => Ok(usize::try_from(ps).unwrap()), } } /// Safely converts a u64 value to a usize value. /// This bypasses the Clippy lint check because we only support 64-bit platforms. #[cfg(target_pointer_width = "64")] #[inline] #[allow(clippy::cast_possible_truncation)] pub const fn u64_to_usize(num: u64) -> usize { num as usize } /// Safely converts a usize value to a u64 value. /// This bypasses the Clippy lint check because we only support 64-bit platforms. #[cfg(target_pointer_width = "64")] #[inline] #[allow(clippy::cast_possible_truncation)] pub const fn usize_to_u64(num: usize) -> u64 { num as u64 } /// Converts a usize into a wrapping u32. #[inline] pub const fn wrap_usize_to_u32(num: usize) -> Wrapping { Wrapping(((num as u64) & 0xFFFFFFFF) as u32) } /// Converts MiB to Bytes pub const fn mib_to_bytes(mib: usize) -> usize { mib << MIB_TO_BYTES_SHIFT } /// Converts Bytes to MiB, truncating any remainder pub const fn bytes_to_mib(bytes: usize) -> usize { bytes >> MIB_TO_BYTES_SHIFT } /// Align address up to the aligment. pub const fn align_up(addr: u64, align: u64) -> u64 { debug_assert!(align != 0); (addr + align - 1) & !(align - 1) } /// Align address down to the aligment. pub const fn align_down(addr: u64, align: u64) -> u64 { debug_assert!(align != 0); addr & !(align - 1) } /// Create and open a file for both reading and writing to it with a O_NONBLOCK flag. /// In case we open a FIFO, we need all READ, WRITE and O_NONBLOCK in order to not block the process /// if nobody is consuming the message. Otherwise opening the FIFO with only WRITE and O_NONBLOCK /// will fail with ENXIO if there is no readier already attached to it. /// NOTE: writing to a pipe will start failing when reaching 64K of unconsumed content. pub fn open_file_nonblock(path: &Path) -> Result { OpenOptions::new() .custom_flags(O_NONBLOCK) .create(true) .read(true) .write(true) .open(path) } ================================================ FILE: src/vmm/src/utils/net/ipv4addr.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::net::Ipv4Addr; /// Checks if an IPv4 address is RFC 3927 compliant. pub fn is_link_local_valid(ipv4_addr: Ipv4Addr) -> bool { match ipv4_addr.octets() { [169, 254, 0, _] => false, [169, 254, 255, _] => false, [169, 254, _, _] => true, _ => false, } } #[cfg(test)] mod tests { use std::net::Ipv4Addr; use super::*; #[test] fn test_is_link_local_valid() { // Outside link-local IPv4 address range (169.254.0.0/16 - 169.254.255.255/16). let mut ipv4_addr = Ipv4Addr::new(1, 1, 1, 1); assert!(!is_link_local_valid(ipv4_addr)); // First 256 addresses can not be used, per RFC 3927. ipv4_addr = Ipv4Addr::new(169, 254, 0, 0); assert!(!is_link_local_valid(ipv4_addr)); ipv4_addr = Ipv4Addr::new(169, 254, 0, 10); assert!(!is_link_local_valid(ipv4_addr)); ipv4_addr = Ipv4Addr::new(169, 254, 0, 255); assert!(!is_link_local_valid(ipv4_addr)); // Last 256 addresses can not be used, per RFC 3927. ipv4_addr = Ipv4Addr::new(169, 254, 255, 0); assert!(!is_link_local_valid(ipv4_addr)); ipv4_addr = Ipv4Addr::new(169, 254, 255, 194); assert!(!is_link_local_valid(ipv4_addr)); ipv4_addr = Ipv4Addr::new(169, 254, 255, 255); assert!(!is_link_local_valid(ipv4_addr)); // First valid IPv4 link-local address. ipv4_addr = Ipv4Addr::new(169, 254, 1, 0); assert!(is_link_local_valid(ipv4_addr)); // Last valid IPv4 link-local address. ipv4_addr = Ipv4Addr::new(169, 254, 254, 255); assert!(is_link_local_valid(ipv4_addr)); // In between valid IPv4 link-local address. ipv4_addr = Ipv4Addr::new(169, 254, 170, 2); assert!(is_link_local_valid(ipv4_addr)); } } ================================================ FILE: src/vmm/src/utils/net/mac.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. //! Contains support for parsing and constructing MAC addresses //! More information about MAC addresses can be found [here] //! //! [here]: https://en.wikipedia.org/wiki/MAC_address use std::fmt; use std::str::FromStr; use serde::de::{Deserialize, Deserializer, Error}; use serde::ser::{Serialize, Serializer}; /// The number of tuples (the ones separated by ":") contained in a MAC address. pub const MAC_ADDR_LEN: u8 = 6; /// Represents a MAC address #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] #[repr(transparent)] /// Representation of a MAC address. pub struct MacAddr { bytes: [u8; MAC_ADDR_LEN as usize], } impl fmt::Display for MacAddr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let b = &self.bytes; write!( f, "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", b[0], b[1], b[2], b[3], b[4], b[5] ) } } impl From<[u8; 6]> for MacAddr { fn from(bytes: [u8; 6]) -> Self { Self { bytes } } } impl From for [u8; 6] { fn from(mac: MacAddr) -> Self { mac.bytes } } impl FromStr for MacAddr { type Err = String; /// Try to turn a `&str` into a `MacAddr` object. The method will return the `str` that failed /// to be parsed. /// # Arguments /// /// * `s` - reference that can be converted to &str. fn from_str(s: &str) -> Result { let v: Vec<&str> = s.split(':').collect(); let mut bytes = [0u8; MAC_ADDR_LEN as usize]; if v.len() != MAC_ADDR_LEN as usize { return Err(String::from(s)); } for i in 0..MAC_ADDR_LEN as usize { if v[i].len() != 2 { return Err(String::from(s)); } bytes[i] = u8::from_str_radix(v[i], 16).map_err(|_| String::from(s))?; } Ok(MacAddr { bytes }) } } impl MacAddr { /// Create a `MacAddr` from a slice. /// Does not check whether `src.len()` == `MAC_ADDR_LEN`. /// # Arguments /// /// * `src` - slice from which to copy MAC address content. #[inline] pub fn from_bytes_unchecked(src: &[u8]) -> MacAddr { let mut bytes = [0u8; MAC_ADDR_LEN as usize]; bytes[..].copy_from_slice(src); MacAddr { bytes } } /// Return the underlying content of this `MacAddr` in bytes. #[inline] pub fn get_bytes(&self) -> &[u8] { &self.bytes } } impl Serialize for MacAddr { fn serialize(&self, serializer: S) -> Result where S: Serializer, { Serialize::serialize(&self.to_string(), serializer) } } impl<'de> Deserialize<'de> for MacAddr { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { let s = ::deserialize(deserializer)?; MacAddr::from_str(&s).map_err(|_| D::Error::custom("The provided MAC address is invalid.")) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_mac_addr() { // too long MacAddr::from_str("aa:aa:aa:aa:aa:aa:aa").unwrap_err(); // invalid hex MacAddr::from_str("aa:aa:aa:aa:aa:ax").unwrap_err(); // single digit mac address component should be invalid MacAddr::from_str("aa:aa:aa:aa:aa:b").unwrap_err(); // components with more than two digits should also be invalid MacAddr::from_str("aa:aa:aa:aa:aa:bbb").unwrap_err(); let mac = MacAddr::from_str("12:34:56:78:9a:BC").unwrap(); println!("parsed MAC address: {}", mac); let bytes = mac.get_bytes(); assert_eq!(bytes, [0x12u8, 0x34, 0x56, 0x78, 0x9a, 0xbc]); } #[test] fn test_mac_addr_serialization_and_deserialization() { let mac: MacAddr = serde_json::from_str("\"12:34:56:78:9a:bc\"").expect("MacAddr deserialization failed."); let bytes = mac.get_bytes(); assert_eq!(bytes, [0x12u8, 0x34, 0x56, 0x78, 0x9a, 0xbc]); let s = serde_json::to_string(&mac).expect("MacAddr serialization failed."); assert_eq!(s, "\"12:34:56:78:9a:bc\""); } } ================================================ FILE: src/vmm/src/utils/net/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. #![warn(missing_docs)] //! # Network-related utilities //! //! Provides tools for representing and handling network related concepts like MAC addresses and //! network interfaces. /// Provides IPv4 address utility methods. pub mod ipv4addr; pub mod mac; ================================================ FILE: src/vmm/src/utils/signal.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use libc::c_int; pub use vmm_sys_util::signal::*; // SAFETY: these are valid libc functions unsafe extern "C" { // SAFETY: Function has no invariants that can be broken. safe fn __libc_current_sigrtmin() -> c_int; // SAFETY: Function has no invariants that can be broken. safe fn __libc_current_sigrtmax() -> c_int; } /// Sigrtmin pub fn sigrtmin() -> c_int { __libc_current_sigrtmin() } /// Sigrtmax pub fn sigrtmax() -> c_int { __libc_current_sigrtmax() } ================================================ FILE: src/vmm/src/utils/sm.rs ================================================ // Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; /// Simple abstraction of a state machine. /// /// `StateMachine` is a wrapper over `T` that also encodes state information for `T`. /// /// Each state for `T` is represented by a `StateFn` which is a function that acts as /// the state handler for that particular state of `T`. /// /// `StateFn` returns exactly one other `StateMachine` thus each state gets clearly /// defined transitions to other states. pub struct StateMachine { function: Option>, } impl Debug for StateMachine { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("StateMachine") .field("function", &self.function.map(|f| f as usize)) .finish() } } /// Type representing a state handler of a `StateMachine` machine. Each state handler /// is a function from `T` that handles a specific state of `T`. type StateFn = fn(&mut T) -> StateMachine; impl StateMachine { /// Creates a new state wrapper. /// /// # Arguments /// /// `function` - the state handler for this state. pub fn new(function: Option>) -> StateMachine { StateMachine { function } } /// Creates a new state wrapper that has further possible transitions. /// /// # Arguments /// /// `function` - the state handler for this state. pub fn next(function: StateFn) -> StateMachine { StateMachine::new(Some(function)) } /// Creates a new state wrapper that has no further transitions. The state machine /// will finish after running this handler. /// /// # Arguments /// /// `function` - the state handler for this last state. pub fn finish() -> StateMachine { StateMachine::new(None) } /// Runs a state machine for `T` starting from the provided state. /// /// # Arguments /// /// `machine` - a mutable reference to the object running through the various states. /// `starting_state_fn` - a `fn(&mut T) -> StateMachine` that should be the handler for /// the initial state. pub fn run(machine: &mut T, starting_state_fn: StateFn) { // Start off in the `starting_state` state. let mut state_machine = StateMachine::new(Some(starting_state_fn)); // While current state is not a final/end state, keep churning. while let Some(state_fn) = state_machine.function { // Run the current state handler, and get the next one. state_machine = state_fn(machine); } } } #[cfg(test)] mod tests { use super::*; // DummyMachine with states `s1`, `s2` and `s3`. #[derive(Debug)] struct DummyMachine { private_data_s1: bool, private_data_s2: bool, private_data_s3: bool, } impl DummyMachine { fn new() -> Self { DummyMachine { private_data_s1: false, private_data_s2: false, private_data_s3: false, } } // DummyMachine functions here. // Simple state-machine: start->s1->s2->s3->done. fn run(&mut self) { // Verify the machine has not run yet. assert!(!self.private_data_s1); assert!(!self.private_data_s2); assert!(!self.private_data_s3); // Run the state-machine. StateMachine::run(self, Self::s1); // Verify the machine went through all states. assert!(self.private_data_s1); assert!(self.private_data_s2); assert!(self.private_data_s3); } fn s1(&mut self) -> StateMachine { // Verify private data mutates along with the states. assert!(!self.private_data_s1); self.private_data_s1 = true; StateMachine::next(Self::s2) } fn s2(&mut self) -> StateMachine { // Verify private data mutates along with the states. assert!(!self.private_data_s2); self.private_data_s2 = true; StateMachine::next(Self::s3) } fn s3(&mut self) -> StateMachine { // Verify private data mutates along with the states. assert!(!self.private_data_s3); self.private_data_s3 = true; // The machine ends here, adding `s1` as next state to validate this. StateMachine::finish() } } #[test] fn test_sm() { let mut machine = DummyMachine::new(); machine.run(); } } ================================================ FILE: src/vmm/src/vmm_config/balloon.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; pub use crate::devices::virtio::balloon::BALLOON_DEV_ID; pub use crate::devices::virtio::balloon::device::BalloonStats; use crate::devices::virtio::balloon::{Balloon, BalloonConfig}; type MutexBalloon = Arc>; /// Errors associated with the operations allowed on the balloon. #[derive(Debug, derive_more::From, thiserror::Error, displaydoc::Display)] pub enum BalloonConfigError { /// No balloon device found. DeviceNotFound, /// Amount of pages requested is too large. TooManyPagesRequested, /// Error creating the balloon device: {0} CreateFailure(crate::devices::virtio::balloon::BalloonError), } /// This struct represents the strongly typed equivalent of the json body /// from balloon related requests. #[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct BalloonDeviceConfig { /// Target balloon size in MiB. pub amount_mib: u32, /// Option to deflate the balloon in case the guest is out of memory. pub deflate_on_oom: bool, /// Interval in seconds between refreshing statistics. #[serde(default)] pub stats_polling_interval_s: u16, /// Free page hinting enabled #[serde(default)] pub free_page_hinting: bool, /// Free page reporting enabled #[serde(default)] pub free_page_reporting: bool, } impl From for BalloonDeviceConfig { fn from(state: BalloonConfig) -> Self { BalloonDeviceConfig { amount_mib: state.amount_mib, deflate_on_oom: state.deflate_on_oom, stats_polling_interval_s: state.stats_polling_interval_s, free_page_hinting: state.free_page_hinting, free_page_reporting: state.free_page_reporting, } } } /// The data fed into a balloon update request. Currently, only the number /// of pages and the stats polling interval can be updated. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct BalloonUpdateConfig { /// Target balloon size in MiB. pub amount_mib: u32, } /// The data fed into a balloon statistics interval update request. /// Note that the state of the statistics cannot be changed from ON to OFF /// or vice versa after boot, only the interval of polling can be changed /// if the statistics were activated in the device configuration. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct BalloonUpdateStatsConfig { /// Interval in seconds between refreshing statistics. pub stats_polling_interval_s: u16, } /// A builder for `Balloon` devices from 'BalloonDeviceConfig'. #[cfg_attr(not(test), derive(Default))] #[derive(Debug)] pub struct BalloonBuilder { inner: Option, } impl BalloonBuilder { /// Creates an empty Balloon Store. pub fn new() -> Self { Self { inner: None } } /// Inserts a Balloon device in the store. /// If an entry already exists, it will overwrite it. pub fn set(&mut self, cfg: BalloonDeviceConfig) -> Result<(), BalloonConfigError> { self.inner = Some(Arc::new(Mutex::new(Balloon::new( cfg.amount_mib, cfg.deflate_on_oom, cfg.stats_polling_interval_s, cfg.free_page_hinting, cfg.free_page_reporting, )?))); Ok(()) } /// Inserts an existing balloon device. pub fn set_device(&mut self, balloon: MutexBalloon) { self.inner = Some(balloon); } /// Provides a reference to the Balloon if present. pub fn get(&self) -> Option<&MutexBalloon> { self.inner.as_ref() } /// Returns the same structure that was used to configure the device. pub fn get_config(&self) -> Result { self.get() .ok_or(BalloonConfigError::DeviceNotFound) .map(|balloon_mutex| balloon_mutex.lock().expect("Poisoned lock").config()) .map(BalloonDeviceConfig::from) } } #[cfg(test)] impl Default for BalloonBuilder { fn default() -> BalloonBuilder { let mut balloon = BalloonBuilder::new(); balloon.set(BalloonDeviceConfig::default()).unwrap(); balloon } } #[cfg(test)] pub(crate) mod tests { use super::*; pub(crate) fn default_config() -> BalloonDeviceConfig { BalloonDeviceConfig { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, free_page_hinting: false, free_page_reporting: false, } } #[test] fn test_balloon_create() { let default_balloon_config = default_config(); let balloon_config = BalloonDeviceConfig { amount_mib: 0, deflate_on_oom: false, stats_polling_interval_s: 0, free_page_hinting: false, free_page_reporting: false, }; assert_eq!(default_balloon_config, balloon_config); let mut builder = BalloonBuilder::new(); assert!(builder.get().is_none()); builder.set(balloon_config).unwrap(); assert_eq!(builder.get().unwrap().lock().unwrap().num_pages(), 0); assert_eq!(builder.get_config().unwrap(), default_balloon_config); let _update_config = BalloonUpdateConfig { amount_mib: 5 }; let _stats_update_config = BalloonUpdateStatsConfig { stats_polling_interval_s: 5, }; } #[test] fn test_from_balloon_state() { let expected_balloon_config = BalloonDeviceConfig { amount_mib: 5, deflate_on_oom: false, stats_polling_interval_s: 3, free_page_hinting: false, free_page_reporting: false, }; let actual_balloon_config = BalloonDeviceConfig::from(BalloonConfig { amount_mib: 5, deflate_on_oom: false, stats_polling_interval_s: 3, free_page_hinting: false, free_page_reporting: false, }); assert_eq!(expected_balloon_config, actual_balloon_config); } #[test] fn test_set_device() { let mut builder = BalloonBuilder::new(); let balloon = Balloon::new(0, true, 0, false, false).unwrap(); builder.set_device(Arc::new(Mutex::new(balloon))); assert!(builder.inner.is_some()); } } ================================================ FILE: src/vmm/src/vmm_config/boot_source.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fs::File; use std::io; use serde::{Deserialize, Serialize}; /// Default guest kernel command line: /// - `reboot=k` shut down the guest on reboot, instead of well... rebooting; /// - `panic=1` on panic, reboot after 1 second; /// - `nomodule` disable loadable kernel module support; /// - `8250.nr_uarts=0` disable 8250 serial interface; /// - `i8042.noaux` do not probe the i8042 controller for an attached mouse (save boot time); /// - `i8042.nomux` do not probe i8042 for a multiplexing controller (save boot time); /// - `i8042.dumbkbd` do not attempt to control kbd state via the i8042 (save boot time). /// - `swiotlb=noforce` disable software bounce buffers (SWIOTLB) pub const DEFAULT_KERNEL_CMDLINE: &str = "reboot=k panic=1 nomodule 8250.nr_uarts=0 i8042.noaux \ i8042.nomux i8042.dumbkbd swiotlb=noforce"; /// Strongly typed data structure used to configure the boot source of the /// microvm. #[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] #[serde(deny_unknown_fields)] pub struct BootSourceConfig { /// Path of the kernel image. pub kernel_image_path: String, /// Path of the initrd, if there is one. pub initrd_path: Option, /// The boot arguments to pass to the kernel. If this field is uninitialized, /// DEFAULT_KERNEL_CMDLINE is used. pub boot_args: Option, } /// Errors associated with actions on `BootSourceConfig`. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum BootSourceConfigError { /// The kernel file cannot be opened: {0} InvalidKernelPath(io::Error), /// The initrd file cannot be opened due to invalid path or invalid permissions. {0} InvalidInitrdPath(io::Error), /// The kernel command line is invalid: {0} InvalidKernelCommandLine(String), } /// Holds the kernel specification (both configuration as well as runtime details). #[derive(Debug, Default)] pub struct BootSource { /// The boot source configuration. pub config: BootSourceConfig, /// The boot source builder (a boot source allocated and validated). /// It is an option cause a resumed microVM does not need it. pub builder: Option, } /// Holds the kernel builder (created and validates based on BootSourceConfig). #[derive(Debug)] pub struct BootConfig { /// The commandline validated against correctness. pub cmdline: linux_loader::cmdline::Cmdline, /// The descriptor to the kernel file. pub kernel_file: File, /// The descriptor to the initrd file, if there is one. pub initrd_file: Option, } impl BootConfig { /// Creates the BootConfig based on a given configuration. pub fn new(cfg: &BootSourceConfig) -> Result { use self::BootSourceConfigError::{ InvalidInitrdPath, InvalidKernelCommandLine, InvalidKernelPath, }; // Validate boot source config. let kernel_file = File::open(&cfg.kernel_image_path).map_err(InvalidKernelPath)?; let initrd_file: Option = match &cfg.initrd_path { Some(path) => Some(File::open(path).map_err(InvalidInitrdPath)?), None => None, }; let cmdline_str = match cfg.boot_args.as_ref() { None => DEFAULT_KERNEL_CMDLINE, Some(str) => str.as_str(), }; let cmdline = linux_loader::cmdline::Cmdline::try_from(cmdline_str, crate::arch::CMDLINE_MAX_SIZE) .map_err(|err| InvalidKernelCommandLine(err.to_string()))?; Ok(BootConfig { cmdline, kernel_file, initrd_file, }) } } #[cfg(test)] pub(crate) mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; use crate::snapshot::Snapshot; #[test] fn test_boot_config() { let kernel_file = TempFile::new().unwrap(); let kernel_path = kernel_file.as_path().to_str().unwrap().to_string(); let boot_src_cfg = BootSourceConfig { boot_args: None, initrd_path: None, kernel_image_path: kernel_path, }; let boot_cfg = BootConfig::new(&boot_src_cfg).unwrap(); assert!(boot_cfg.initrd_file.is_none()); assert_eq!( boot_cfg.cmdline.as_cstring().unwrap().as_bytes_with_nul(), [DEFAULT_KERNEL_CMDLINE.as_bytes(), b"\0"].concat() ); } #[test] fn test_serde() { let boot_src_cfg = BootSourceConfig { boot_args: Some(DEFAULT_KERNEL_CMDLINE.to_string()), initrd_path: Some("/tmp/initrd".to_string()), kernel_image_path: "./vmlinux.bin".to_string(), }; // Use bitcode serialization directly for the test data let serialized_data = bitcode::serialize(&boot_src_cfg).unwrap(); let restored_boot_cfg: BootSourceConfig = bitcode::deserialize(&serialized_data).unwrap(); assert_eq!(boot_src_cfg, restored_boot_cfg); // Also test with Snapshot wrapper let snapshot_data = bitcode::serialize(&Snapshot::new(boot_src_cfg.clone())).unwrap(); let restored_snapshot = Snapshot::load_without_crc_check(&snapshot_data).unwrap(); assert_eq!(boot_src_cfg, restored_snapshot.data); } } ================================================ FILE: src/vmm/src/vmm_config/drive.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::collections::VecDeque; use std::io; use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; use super::RateLimiterConfig; use crate::VmmError; use crate::devices::virtio::block::device::Block; pub use crate::devices::virtio::block::virtio::device::FileEngineType; use crate::devices::virtio::block::{BlockError, CacheType}; use crate::devices::virtio::device::VirtioDevice; /// Errors associated with the operations allowed on a drive. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum DriveError { /// Attempt to add block as a root device while the root device defined as a pmem device AddingSecondRootDevice, /// Unable to create the virtio block device: {0} CreateBlockDevice(BlockError), /// Cannot create RateLimiter: {0} CreateRateLimiter(io::Error), /// Unable to patch the block device: {0} Please verify the request arguments. DeviceUpdate(VmmError), /// A root block device already exists! RootBlockDeviceAlreadyAdded, } /// Use this structure to set up the Block Device before booting the kernel. #[derive(Debug, Default, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct BlockDeviceConfig { /// Unique identifier of the drive. pub drive_id: String, /// Part-UUID. Represents the unique id of the boot partition of this device. It is /// optional and it will be used only if the `is_root_device` field is true. pub partuuid: Option, /// If set to true, it makes the current device the root block device. /// Setting this flag to true will mount the block device in the /// guest under /dev/vda unless the partuuid is present. pub is_root_device: bool, /// If set to true, the drive will ignore flush requests coming from /// the guest driver. #[serde(default)] pub cache_type: CacheType, // VirtioBlock specific fields /// If set to true, the drive is opened in read-only mode. Otherwise, the /// drive is opened as read-write. pub is_read_only: Option, /// Path of the drive. pub path_on_host: Option, /// Rate Limiter for I/O operations. pub rate_limiter: Option, /// The type of IO engine used by the device. // #[serde(default)] // #[serde(rename = "io_engine")] // pub file_engine_type: FileEngineType, #[serde(rename = "io_engine")] pub file_engine_type: Option, // VhostUserBlock specific fields /// Path to the vhost-user socket. pub socket: Option, } /// Only provided fields will be updated. I.e. if any optional fields /// are missing, they will not be updated. #[derive(Debug, Default, PartialEq, Eq, Deserialize)] #[serde(deny_unknown_fields)] pub struct BlockDeviceUpdateConfig { /// The drive ID, as provided by the user at creation time. pub drive_id: String, // VirtioBlock sepcific fields /// New block file path on the host. Only provided data will be updated. pub path_on_host: Option, /// New rate limiter config. pub rate_limiter: Option, } /// Wrapper for the collection that holds all the Block Devices #[derive(Debug, Default)] pub struct BlockBuilder { /// The list of block devices. /// There can be at most one root block device and it would be the first in the list. // Root Device should be the first in the list whether or not PARTUUID is // specified in order to avoid bugs in case of switching from partuuid boot // scenarios to /dev/vda boot type. pub devices: VecDeque>>, } impl BlockBuilder { /// Constructor for BlockDevices. It initializes an empty LinkedList. pub fn new() -> Self { Self { devices: Default::default(), } } /// Specifies whether there is a root block device already present in the list. pub fn has_root_device(&self) -> bool { // If there is a root device, it would be at the top of the list. if let Some(block) = self.devices.front() { block.lock().expect("Poisoned lock").root_device() } else { false } } /// Gets the index of the device with the specified `drive_id` if it exists in the list. fn get_index_of_drive_id(&self, drive_id: &str) -> Option { self.devices .iter() .position(|b| b.lock().expect("Poisoned lock").id().eq(drive_id)) } /// Inserts an existing block device. pub fn add_virtio_device(&mut self, block_device: Arc>) { if block_device.lock().expect("Poisoned lock").root_device() { self.devices.push_front(block_device); } else { self.devices.push_back(block_device); } } /// Inserts a `Block` in the block devices list using the specified configuration. /// If a block with the same id already exists, it will overwrite it. /// Inserting a secondary root block device will fail. pub fn insert( &mut self, config: BlockDeviceConfig, has_pmem_root: bool, ) -> Result<(), DriveError> { let position = self.get_index_of_drive_id(&config.drive_id); let has_root_device = self.has_root_device(); let configured_as_root = config.is_root_device; if configured_as_root && has_pmem_root { return Err(DriveError::AddingSecondRootDevice); } // Don't allow adding a second root block device. // If the new device cfg is root and not an update to the existing root, fail fast. if configured_as_root && has_root_device && position != Some(0) { return Err(DriveError::RootBlockDeviceAlreadyAdded); } let block_dev = Arc::new(Mutex::new( Block::new(config).map_err(DriveError::CreateBlockDevice)?, )); // If the id of the drive already exists in the list, the operation is update/overwrite. match position { // New block device. None => { if configured_as_root { self.devices.push_front(block_dev); } else { self.devices.push_back(block_dev); } } // Update existing block device. Some(index) => { // Update the slot with the new block. self.devices[index] = block_dev; // Check if the root block device is being updated. if index != 0 && configured_as_root { // Make sure the root device is on the first position. self.devices.swap(0, index); } } } Ok(()) } /// Returns a vec with the structures used to configure the devices. pub fn configs(&self) -> Vec { self.devices .iter() .map(|b| b.lock().unwrap().config()) .collect() } } #[cfg(test)] mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; use crate::devices::virtio::block::virtio::VirtioBlockError; impl PartialEq for DriveError { fn eq(&self, other: &DriveError) -> bool { self.to_string() == other.to_string() } } // This implementation is used only in tests. // We cannot directly derive clone because RateLimiter does not implement clone. impl Clone for BlockDeviceConfig { fn clone(&self) -> Self { BlockDeviceConfig { drive_id: self.drive_id.clone(), partuuid: self.partuuid.clone(), is_root_device: self.is_root_device, is_read_only: self.is_read_only, cache_type: self.cache_type, path_on_host: self.path_on_host.clone(), rate_limiter: self.rate_limiter, file_engine_type: self.file_engine_type, socket: self.socket.clone(), } } } #[test] fn test_create_block_devs() { let block_devs = BlockBuilder::new(); assert_eq!(block_devs.devices.len(), 0); } #[test] fn test_add_non_root_block_device() { let dummy_file = TempFile::new().unwrap(); let dummy_path = dummy_file.as_path().to_str().unwrap().to_string(); let dummy_id = String::from("1"); let dummy_block_device = BlockDeviceConfig { drive_id: dummy_id.clone(), partuuid: None, is_root_device: false, cache_type: CacheType::Writeback, is_read_only: Some(false), path_on_host: Some(dummy_path), rate_limiter: None, file_engine_type: None, socket: None, }; let mut block_devs = BlockBuilder::new(); block_devs .insert(dummy_block_device.clone(), false) .unwrap(); assert!(!block_devs.has_root_device()); assert_eq!(block_devs.devices.len(), 1); assert_eq!(block_devs.get_index_of_drive_id(&dummy_id), Some(0)); let block = block_devs.devices[0].lock().unwrap(); assert_eq!(block.id(), dummy_block_device.drive_id); assert_eq!(block.partuuid(), &dummy_block_device.partuuid); assert_eq!(block.read_only(), dummy_block_device.is_read_only.unwrap()); } #[test] fn test_add_one_root_block_device() { let dummy_file = TempFile::new().unwrap(); let dummy_path = dummy_file.as_path().to_str().unwrap().to_string(); let dummy_block_device = BlockDeviceConfig { drive_id: String::from("1"), partuuid: None, is_root_device: true, cache_type: CacheType::Unsafe, is_read_only: Some(true), path_on_host: Some(dummy_path), rate_limiter: None, file_engine_type: None, socket: None, }; let mut block_devs = BlockBuilder::new(); block_devs .insert(dummy_block_device.clone(), false) .unwrap(); assert!(block_devs.has_root_device()); assert_eq!(block_devs.devices.len(), 1); let block = block_devs.devices[0].lock().unwrap(); assert_eq!(block.id(), dummy_block_device.drive_id); assert_eq!(block.partuuid(), &dummy_block_device.partuuid); assert_eq!(block.read_only(), dummy_block_device.is_read_only.unwrap()); } #[test] fn test_add_one_root_block_device_with_pmem_already_as_root() { let dummy_file = TempFile::new().unwrap(); let dummy_path = dummy_file.as_path().to_str().unwrap().to_string(); let dummy_block_device = BlockDeviceConfig { drive_id: String::from("1"), partuuid: None, is_root_device: true, cache_type: CacheType::Unsafe, is_read_only: Some(true), path_on_host: Some(dummy_path), rate_limiter: None, file_engine_type: None, socket: None, }; let mut block_devs = BlockBuilder::new(); assert!(matches!( block_devs .insert(dummy_block_device.clone(), true) .unwrap_err(), DriveError::AddingSecondRootDevice, )); assert!(!block_devs.has_root_device()); assert_eq!(block_devs.devices.len(), 0); } #[test] fn test_add_two_root_block_devs() { let dummy_file_1 = TempFile::new().unwrap(); let dummy_path_1 = dummy_file_1.as_path().to_str().unwrap().to_string(); let root_block_device_1 = BlockDeviceConfig { drive_id: String::from("1"), partuuid: None, is_root_device: true, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_1), rate_limiter: None, file_engine_type: None, socket: None, }; let dummy_file_2 = TempFile::new().unwrap(); let dummy_path_2 = dummy_file_2.as_path().to_str().unwrap().to_string(); let root_block_device_2 = BlockDeviceConfig { drive_id: String::from("2"), partuuid: None, is_root_device: true, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_2), rate_limiter: None, file_engine_type: None, socket: None, }; let mut block_devs = BlockBuilder::new(); block_devs.insert(root_block_device_1, false).unwrap(); assert_eq!( block_devs.insert(root_block_device_2, false).unwrap_err(), DriveError::RootBlockDeviceAlreadyAdded ); } #[test] // Test BlockDevicesConfigs::add when you first add the root device and then the other devices. fn test_add_root_block_device_first() { let dummy_file_1 = TempFile::new().unwrap(); let dummy_path_1 = dummy_file_1.as_path().to_str().unwrap().to_string(); let root_block_device = BlockDeviceConfig { drive_id: String::from("1"), partuuid: None, is_root_device: true, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_1), rate_limiter: None, file_engine_type: None, socket: None, }; let dummy_file_2 = TempFile::new().unwrap(); let dummy_path_2 = dummy_file_2.as_path().to_str().unwrap().to_string(); let dummy_block_dev_2 = BlockDeviceConfig { drive_id: String::from("2"), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_2), rate_limiter: None, file_engine_type: None, socket: None, }; let dummy_file_3 = TempFile::new().unwrap(); let dummy_path_3 = dummy_file_3.as_path().to_str().unwrap().to_string(); let dummy_block_dev_3 = BlockDeviceConfig { drive_id: String::from("3"), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_3), rate_limiter: None, file_engine_type: None, socket: None, }; let mut block_devs = BlockBuilder::new(); block_devs.insert(dummy_block_dev_2.clone(), false).unwrap(); block_devs.insert(dummy_block_dev_3.clone(), false).unwrap(); block_devs.insert(root_block_device.clone(), false).unwrap(); assert_eq!(block_devs.devices.len(), 3); let mut block_iter = block_devs.devices.iter(); assert_eq!( block_iter.next().unwrap().lock().unwrap().id(), root_block_device.drive_id ); assert_eq!( block_iter.next().unwrap().lock().unwrap().id(), dummy_block_dev_2.drive_id ); assert_eq!( block_iter.next().unwrap().lock().unwrap().id(), dummy_block_dev_3.drive_id ); } #[test] // Test BlockDevicesConfigs::add when you add other devices first and then the root device. fn test_root_block_device_add_last() { let dummy_file_1 = TempFile::new().unwrap(); let dummy_path_1 = dummy_file_1.as_path().to_str().unwrap().to_string(); let root_block_device = BlockDeviceConfig { drive_id: String::from("1"), partuuid: None, is_root_device: true, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_1), rate_limiter: None, file_engine_type: None, socket: None, }; let dummy_file_2 = TempFile::new().unwrap(); let dummy_path_2 = dummy_file_2.as_path().to_str().unwrap().to_string(); let dummy_block_dev_2 = BlockDeviceConfig { drive_id: String::from("2"), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_2), rate_limiter: None, file_engine_type: None, socket: None, }; let dummy_file_3 = TempFile::new().unwrap(); let dummy_path_3 = dummy_file_3.as_path().to_str().unwrap().to_string(); let dummy_block_dev_3 = BlockDeviceConfig { drive_id: String::from("3"), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_3), rate_limiter: None, file_engine_type: None, socket: None, }; let mut block_devs = BlockBuilder::new(); block_devs.insert(dummy_block_dev_2.clone(), false).unwrap(); block_devs.insert(dummy_block_dev_3.clone(), false).unwrap(); block_devs.insert(root_block_device.clone(), false).unwrap(); assert_eq!(block_devs.devices.len(), 3); let mut block_iter = block_devs.devices.iter(); // The root device should be first in the list no matter of the order in // which the devices were added. assert_eq!( block_iter.next().unwrap().lock().unwrap().id(), root_block_device.drive_id ); assert_eq!( block_iter.next().unwrap().lock().unwrap().id(), dummy_block_dev_2.drive_id ); assert_eq!( block_iter.next().unwrap().lock().unwrap().id(), dummy_block_dev_3.drive_id ); } #[test] fn test_update() { let dummy_file_1 = TempFile::new().unwrap(); let dummy_path_1 = dummy_file_1.as_path().to_str().unwrap().to_string(); let root_block_device = BlockDeviceConfig { drive_id: String::from("1"), partuuid: None, is_root_device: true, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_1.clone()), rate_limiter: None, file_engine_type: None, socket: None, }; let dummy_file_2 = TempFile::new().unwrap(); let dummy_path_2 = dummy_file_2.as_path().to_str().unwrap().to_string(); let mut dummy_block_device_2 = BlockDeviceConfig { drive_id: String::from("2"), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_2.clone()), rate_limiter: None, file_engine_type: None, socket: None, }; let mut block_devs = BlockBuilder::new(); // Add 2 block devices. block_devs.insert(root_block_device, false).unwrap(); block_devs .insert(dummy_block_device_2.clone(), false) .unwrap(); // Get index zero. assert_eq!( block_devs.get_index_of_drive_id(&String::from("1")), Some(0) ); // Get None. assert!( block_devs .get_index_of_drive_id(&String::from("foo")) .is_none() ); // Test several update cases using dummy_block_device_2. // Validate `dummy_block_device_2` is already in the list assert!( block_devs .get_index_of_drive_id(&dummy_block_device_2.drive_id) .is_some() ); // Update OK. dummy_block_device_2.is_read_only = Some(true); block_devs .insert(dummy_block_device_2.clone(), false) .unwrap(); let index = block_devs .get_index_of_drive_id(&dummy_block_device_2.drive_id) .unwrap(); // Validate update was successful. assert!(block_devs.devices[index].lock().unwrap().read_only()); // Update with invalid path. let dummy_path_3 = String::from("test_update_3"); dummy_block_device_2.path_on_host = Some(dummy_path_3); assert!(matches!( block_devs.insert(dummy_block_device_2.clone(), false), Err(DriveError::CreateBlockDevice(BlockError::VirtioBackend( VirtioBlockError::BackingFile(_, _) ))) )); // Update with 2 root block devices. dummy_block_device_2.path_on_host = Some(dummy_path_2.clone()); dummy_block_device_2.is_root_device = true; assert_eq!( block_devs.insert(dummy_block_device_2, false), Err(DriveError::RootBlockDeviceAlreadyAdded) ); let root_block_device = BlockDeviceConfig { drive_id: String::from("1"), partuuid: None, is_root_device: true, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_1), rate_limiter: None, file_engine_type: None, socket: None, }; // Switch roots and add a PARTUUID for the new one. let mut root_block_device_old = root_block_device; root_block_device_old.is_root_device = false; let root_block_device_new = BlockDeviceConfig { drive_id: String::from("2"), partuuid: Some("0eaa91a0-01".to_string()), is_root_device: true, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(dummy_path_2), rate_limiter: None, file_engine_type: None, socket: None, }; block_devs.insert(root_block_device_old, false).unwrap(); let root_block_id = root_block_device_new.drive_id.clone(); block_devs.insert(root_block_device_new, false).unwrap(); assert!(block_devs.has_root_device()); // Verify it's been moved to the first position. assert_eq!(block_devs.devices[0].lock().unwrap().id(), root_block_id); } #[test] fn test_block_config() { let dummy_file = TempFile::new().unwrap(); let dummy_block_device = BlockDeviceConfig { drive_id: String::from("1"), partuuid: None, is_root_device: true, cache_type: CacheType::Unsafe, is_read_only: Some(true), path_on_host: Some(dummy_file.as_path().to_str().unwrap().to_string()), rate_limiter: None, file_engine_type: Some(FileEngineType::Sync), socket: None, }; let mut block_devs = BlockBuilder::new(); block_devs .insert(dummy_block_device.clone(), false) .unwrap(); let configs = block_devs.configs(); assert_eq!(configs.len(), 1); assert_eq!(configs.first().unwrap(), &dummy_block_device); } #[test] fn test_add_device() { let mut block_devs = BlockBuilder::new(); let backing_file = TempFile::new().unwrap(); let block_id = "test_id"; let config = BlockDeviceConfig { drive_id: block_id.to_string(), partuuid: None, is_root_device: true, cache_type: CacheType::default(), is_read_only: Some(true), path_on_host: Some(backing_file.as_path().to_str().unwrap().to_string()), rate_limiter: None, file_engine_type: None, socket: None, }; let block = Block::new(config).unwrap(); block_devs.add_virtio_device(Arc::new(Mutex::new(block))); assert_eq!(block_devs.devices.len(), 1); assert_eq!( block_devs.devices.pop_back().unwrap().lock().unwrap().id(), block_id ); } } ================================================ FILE: src/vmm/src/vmm_config/entropy.rs ================================================ // Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::ops::Deref; use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; use super::RateLimiterConfig; use crate::devices::virtio::rng::{Entropy, EntropyError}; /// This struct represents the strongly typed equivalent of the json body from entropy device /// related requests. #[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct EntropyDeviceConfig { /// Configuration for RateLimiter of Entropy device pub rate_limiter: Option, } impl From<&Entropy> for EntropyDeviceConfig { fn from(dev: &Entropy) -> Self { let rate_limiter: RateLimiterConfig = dev.rate_limiter().into(); EntropyDeviceConfig { rate_limiter: rate_limiter.into_option(), } } } /// Errors that can occur while handling configuration for /// an entropy device #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum EntropyDeviceError { /// Could not create Entropy device: {0} CreateDevice(#[from] EntropyError), /// Could not create RateLimiter from configuration: {0} CreateRateLimiter(#[from] std::io::Error), } /// A builder type used to construct an Entropy device #[derive(Debug, Default)] pub struct EntropyDeviceBuilder(Option>>); impl EntropyDeviceBuilder { /// Create a new instance for the builder pub fn new() -> Self { Self(None) } /// Build an entropy device and return a (counted) reference to it protected by a mutex pub fn build( &mut self, config: EntropyDeviceConfig, ) -> Result>, EntropyDeviceError> { let rate_limiter = config .rate_limiter .map(RateLimiterConfig::try_into) .transpose()?; let dev = Arc::new(Mutex::new(Entropy::new(rate_limiter.unwrap_or_default())?)); self.0 = Some(dev.clone()); Ok(dev) } /// Insert a new entropy device from a configuration object pub fn insert(&mut self, config: EntropyDeviceConfig) -> Result<(), EntropyDeviceError> { let _ = self.build(config)?; Ok(()) } /// Get a reference to the entropy device, if present pub fn get(&self) -> Option<&Arc>> { self.0.as_ref() } /// Get the configuration of the entropy device (if any) pub fn config(&self) -> Option { self.0 .as_ref() .map(|dev| EntropyDeviceConfig::from(dev.lock().unwrap().deref())) } /// Set the entropy device from an already created object pub fn set_device(&mut self, device: Arc>) { self.0 = Some(device); } } #[cfg(test)] mod tests { use super::*; use crate::rate_limiter::RateLimiter; #[test] fn test_entropy_device_create() { let config = EntropyDeviceConfig::default(); let mut builder = EntropyDeviceBuilder::new(); assert!(builder.get().is_none()); builder.insert(config.clone()).unwrap(); assert!(builder.get().is_some()); assert_eq!(builder.config().unwrap(), config); } #[test] fn test_set_device() { let mut builder = EntropyDeviceBuilder::new(); let device = Entropy::new(RateLimiter::default()).unwrap(); assert!(builder.0.is_none()); builder.set_device(Arc::new(Mutex::new(device))); assert!(builder.0.is_some()); } } ================================================ FILE: src/vmm/src/vmm_config/instance_info.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::{self, Display, Formatter}; use serde::{Serialize, ser}; /// Enumerates microVM runtime states. #[derive(Clone, Debug, Default, PartialEq, Eq)] pub enum VmState { /// Vm not started (yet) #[default] NotStarted, /// Vm is Paused Paused, /// Vm is running Running, } impl Display for VmState { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match *self { VmState::NotStarted => write!(f, "Not started"), VmState::Paused => write!(f, "Paused"), VmState::Running => write!(f, "Running"), } } } impl ser::Serialize for VmState { fn serialize(&self, serializer: S) -> Result where S: ser::Serializer, { self.to_string().serialize(serializer) } } /// Serializable struct that contains general information about the microVM. #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)] pub struct InstanceInfo { /// The ID of the microVM. pub id: String, /// Whether the microVM is not started/running/paused. pub state: VmState, /// The version of the VMM that runs the microVM. pub vmm_version: String, /// The name of the application that runs the microVM. pub app_name: String, } ================================================ FILE: src/vmm/src/vmm_config/machine_config.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::fmt::Debug; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::cpu_config::templates::{CpuTemplateType, CustomCpuTemplate, StaticCpuTemplate}; /// The default memory size of the VM, in MiB. pub const DEFAULT_MEM_SIZE_MIB: usize = 128; /// Firecracker aims to support small scale workloads only, so limit the maximum /// vCPUs supported. pub const MAX_SUPPORTED_VCPUS: u8 = 32; /// Errors associated with configuring the microVM. #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display, PartialEq, Eq)] pub enum MachineConfigError { /// The memory size (MiB) is smaller than the previously set balloon device target size. IncompatibleBalloonSize, /// The memory size (MiB) is either 0, or not a multiple of the configured page size. InvalidMemorySize, /// The number of vCPUs must be greater than 0, less than {MAX_SUPPORTED_VCPUS:} and must be 1 or an even number if SMT is enabled. InvalidVcpuCount, /// Could not get the configuration of the previously installed balloon device to validate the memory size. InvalidVmState, /// Enabling simultaneous multithreading is not supported on aarch64. #[cfg(target_arch = "aarch64")] SmtNotSupported, /// Could not determine host kernel version when checking hugetlbfs compatibility KernelVersion, } /// Describes the possible (huge)page configurations for a microVM's memory. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub enum HugePageConfig { /// Do not use hugepages, e.g. back guest memory by 4K #[default] None, /// Back guest memory by 2MB hugetlbfs pages #[serde(rename = "2M")] Hugetlbfs2M, } impl HugePageConfig { /// Checks whether the given memory size (in MiB) is valid for this [`HugePageConfig`], e.g. /// whether it is a multiple of the page size fn is_valid_mem_size(&self, mem_size_mib: usize) -> bool { let divisor = match self { // Any integer memory size expressed in MiB will be a multiple of 4096KiB. HugePageConfig::None => 1, HugePageConfig::Hugetlbfs2M => 2, }; mem_size_mib.is_multiple_of(divisor) } /// Returns the flags required to pass to `mmap`, in addition to `MAP_ANONYMOUS`, to /// create a mapping backed by huge pages as described by this [`HugePageConfig`]. pub fn mmap_flags(&self) -> libc::c_int { match self { HugePageConfig::None => 0, HugePageConfig::Hugetlbfs2M => libc::MAP_HUGETLB | libc::MAP_HUGE_2MB, } } /// Returns `true` iff this [`HugePageConfig`] describes a hugetlbfs-based configuration. pub fn is_hugetlbfs(&self) -> bool { matches!(self, HugePageConfig::Hugetlbfs2M) } /// Gets the page size in bytes of this [`HugePageConfig`]. pub fn page_size(&self) -> usize { match self { HugePageConfig::None => 4096, HugePageConfig::Hugetlbfs2M => 2 * 1024 * 1024, } } } impl From for Option { fn from(value: HugePageConfig) -> Self { match value { HugePageConfig::None => None, HugePageConfig::Hugetlbfs2M => Some(memfd::HugetlbSize::Huge2MB), } } } /// Struct used in PUT `/machine-config` API call. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct MachineConfig { /// Number of vcpu to start. pub vcpu_count: u8, /// The memory size in MiB. pub mem_size_mib: usize, /// Enables or disabled SMT. #[serde(default)] pub smt: bool, /// A CPU template that it is used to filter the CPU features exposed to the guest. // FIXME: once support for static CPU templates is removed, this field can be dropped altogether #[serde( default, skip_serializing_if = "is_none_or_custom_template", deserialize_with = "deserialize_static_template", serialize_with = "serialize_static_template" )] pub cpu_template: Option, /// Enables or disables dirty page tracking. Enabling allows incremental snapshots. #[serde(default)] pub track_dirty_pages: bool, /// Configures what page size Firecracker should use to back guest memory. #[serde(default)] pub huge_pages: HugePageConfig, /// GDB socket address. #[cfg(feature = "gdb")] #[serde(default, skip_serializing_if = "Option::is_none")] pub gdb_socket_path: Option, } fn is_none_or_custom_template(template: &Option) -> bool { matches!(template, None | Some(CpuTemplateType::Custom(_))) } fn deserialize_static_template<'de, D>(deserializer: D) -> Result, D::Error> where D: Deserializer<'de>, { Option::::deserialize(deserializer) .map(|maybe_template| maybe_template.map(CpuTemplateType::Static)) } fn serialize_static_template( template: &Option, serializer: S, ) -> Result where S: Serializer, { let Some(CpuTemplateType::Static(template)) = template else { // We have a skip_serializing_if on the field unreachable!() }; template.serialize(serializer) } impl Default for MachineConfig { fn default() -> Self { Self { vcpu_count: 1, mem_size_mib: DEFAULT_MEM_SIZE_MIB, smt: false, cpu_template: None, track_dirty_pages: false, huge_pages: HugePageConfig::None, #[cfg(feature = "gdb")] gdb_socket_path: None, } } } /// Struct used in PATCH `/machine-config` API call. /// Used to update `MachineConfig` in `VmResources`. /// This struct mirrors all the fields in `MachineConfig`. /// All fields are optional, but at least one needs to be specified. /// If a field is `Some(value)` then we assume an update is requested /// for that field. #[derive(Clone, Default, Debug, PartialEq, Eq, Deserialize)] #[serde(deny_unknown_fields)] pub struct MachineConfigUpdate { /// Number of vcpu to start. #[serde(default)] pub vcpu_count: Option, /// The memory size in MiB. #[serde(default)] pub mem_size_mib: Option, /// Enables or disabled SMT. #[serde(default)] pub smt: Option, /// A CPU template that it is used to filter the CPU features exposed to the guest. #[serde(default)] pub cpu_template: Option, /// Enables or disables dirty page tracking. Enabling allows incremental snapshots. #[serde(default)] pub track_dirty_pages: Option, /// Configures what page size Firecracker should use to back guest memory. #[serde(default)] pub huge_pages: Option, /// GDB socket address. #[cfg(feature = "gdb")] #[serde(default)] pub gdb_socket_path: Option, } impl MachineConfigUpdate { /// Checks if the update request contains any data. /// Returns `true` if all fields are set to `None` which means that there is nothing /// to be updated. pub fn is_empty(&self) -> bool { self == &Default::default() } } impl From for MachineConfigUpdate { fn from(cfg: MachineConfig) -> Self { MachineConfigUpdate { vcpu_count: Some(cfg.vcpu_count), mem_size_mib: Some(cfg.mem_size_mib), smt: Some(cfg.smt), cpu_template: cfg.static_template(), track_dirty_pages: Some(cfg.track_dirty_pages), huge_pages: Some(cfg.huge_pages), #[cfg(feature = "gdb")] gdb_socket_path: cfg.gdb_socket_path, } } } impl MachineConfig { /// Sets cpu tempalte field to `CpuTemplateType::Custom(cpu_template)`. pub fn set_custom_cpu_template(&mut self, cpu_template: CustomCpuTemplate) { self.cpu_template = Some(CpuTemplateType::Custom(cpu_template)); } fn static_template(&self) -> Option { match self.cpu_template { Some(CpuTemplateType::Static(template)) => Some(template), _ => None, } } /// Updates [`MachineConfig`] with [`MachineConfigUpdate`]. /// Mapping for cpu template update: /// StaticCpuTemplate::None -> None /// StaticCpuTemplate::Other -> Some(CustomCpuTemplate::Static(Other)), /// Returns the updated `MachineConfig` object. pub fn update( &self, update: &MachineConfigUpdate, ) -> Result { let vcpu_count = update.vcpu_count.unwrap_or(self.vcpu_count); let smt = update.smt.unwrap_or(self.smt); #[cfg(target_arch = "aarch64")] if smt { return Err(MachineConfigError::SmtNotSupported); } if vcpu_count == 0 || vcpu_count > MAX_SUPPORTED_VCPUS { return Err(MachineConfigError::InvalidVcpuCount); } // If SMT is enabled or is to be enabled in this call // only allow vcpu count to be 1 or even. if smt && vcpu_count > 1 && vcpu_count % 2 == 1 { return Err(MachineConfigError::InvalidVcpuCount); } let mem_size_mib = update.mem_size_mib.unwrap_or(self.mem_size_mib); let page_config = update.huge_pages.unwrap_or(self.huge_pages); if mem_size_mib == 0 || !page_config.is_valid_mem_size(mem_size_mib) { return Err(MachineConfigError::InvalidMemorySize); } let cpu_template = match update.cpu_template { None => self.cpu_template.clone(), Some(StaticCpuTemplate::None) => None, Some(other) => Some(CpuTemplateType::Static(other)), }; Ok(MachineConfig { vcpu_count, mem_size_mib, smt, cpu_template, track_dirty_pages: update.track_dirty_pages.unwrap_or(self.track_dirty_pages), huge_pages: page_config, #[cfg(feature = "gdb")] gdb_socket_path: update.gdb_socket_path.clone(), }) } } #[cfg(test)] mod tests { use crate::cpu_config::templates::{CpuTemplateType, CustomCpuTemplate, StaticCpuTemplate}; use crate::vmm_config::machine_config::MachineConfig; // Ensure the special (de)serialization logic for the cpu_template field works: // only static cpu templates can be specified via the machine-config endpoint, but // we still cram custom cpu templates into the MachineConfig struct if they're set otherwise // Ensure that during (de)serialization we preserve static templates, but we set custom // templates to None #[test] fn test_serialize_machine_config() { #[cfg(target_arch = "aarch64")] const TEMPLATE: StaticCpuTemplate = StaticCpuTemplate::V1N1; #[cfg(target_arch = "x86_64")] const TEMPLATE: StaticCpuTemplate = StaticCpuTemplate::T2S; let mconfig = MachineConfig { cpu_template: None, ..Default::default() }; let serialized = serde_json::to_string(&mconfig).unwrap(); let deserialized = serde_json::from_str::(&serialized).unwrap(); assert!(deserialized.cpu_template.is_none()); let mconfig = MachineConfig { cpu_template: Some(CpuTemplateType::Static(TEMPLATE)), ..Default::default() }; let serialized = serde_json::to_string(&mconfig).unwrap(); let deserialized = serde_json::from_str::(&serialized).unwrap(); assert_eq!( deserialized.cpu_template, Some(CpuTemplateType::Static(TEMPLATE)) ); let mconfig = MachineConfig { cpu_template: Some(CpuTemplateType::Custom(CustomCpuTemplate::default())), ..Default::default() }; let serialized = serde_json::to_string(&mconfig).unwrap(); let deserialized = serde_json::from_str::(&serialized).unwrap(); assert!(deserialized.cpu_template.is_none()); } } ================================================ FILE: src/vmm/src/vmm_config/memory_hotplug.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use serde::{Deserialize, Serialize}; use crate::devices::virtio::mem::{ VIRTIO_MEM_DEFAULT_BLOCK_SIZE_MIB, VIRTIO_MEM_DEFAULT_SLOT_SIZE_MIB, VirtioMem, }; /// Errors associated with memory hotplug configuration. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MemoryHotplugConfigError { /// Block size must not be lower than {0} MiB BlockSizeTooSmall(usize), /// Block size must be a power of 2 BlockSizeNotPowerOfTwo, /// Slot size must not be lower than {0} MiB SlotSizeTooSmall(usize), /// Slot size must be a multiple of block size ({0} MiB) SlotSizeNotMultipleOfBlockSize(usize), /// Total size must not be lower than slot size ({0} MiB) TotalSizeTooSmall(usize), /// Total size must be a multiple of slot size ({0} MiB) TotalSizeNotMultipleOfSlotSize(usize), } fn default_block_size_mib() -> usize { VIRTIO_MEM_DEFAULT_BLOCK_SIZE_MIB } fn default_slot_size_mib() -> usize { VIRTIO_MEM_DEFAULT_SLOT_SIZE_MIB } /// Configuration for memory hotplug device. #[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct MemoryHotplugConfig { /// Total memory size in MiB that can be hotplugged. pub total_size_mib: usize, /// Block size in MiB. A block is the smallest unit the guest can hot(un)plug #[serde(default = "default_block_size_mib")] pub block_size_mib: usize, /// Slot size in MiB. A slot is the smallest unit the host can (de)attach memory #[serde(default = "default_slot_size_mib")] pub slot_size_mib: usize, } impl MemoryHotplugConfig { /// Validates the configuration. pub fn validate(&self) -> Result<(), MemoryHotplugConfigError> { let min_block_size_mib = VIRTIO_MEM_DEFAULT_BLOCK_SIZE_MIB; if self.block_size_mib < min_block_size_mib { return Err(MemoryHotplugConfigError::BlockSizeTooSmall( min_block_size_mib, )); } if !self.block_size_mib.is_power_of_two() { return Err(MemoryHotplugConfigError::BlockSizeNotPowerOfTwo); } let min_slot_size_mib = VIRTIO_MEM_DEFAULT_SLOT_SIZE_MIB; if self.slot_size_mib < min_slot_size_mib { return Err(MemoryHotplugConfigError::SlotSizeTooSmall( min_slot_size_mib, )); } if !self.slot_size_mib.is_multiple_of(self.block_size_mib) { return Err(MemoryHotplugConfigError::SlotSizeNotMultipleOfBlockSize( self.block_size_mib, )); } if self.total_size_mib < self.slot_size_mib { return Err(MemoryHotplugConfigError::TotalSizeTooSmall( self.slot_size_mib, )); } if !self.total_size_mib.is_multiple_of(self.slot_size_mib) { return Err(MemoryHotplugConfigError::TotalSizeNotMultipleOfSlotSize( self.slot_size_mib, )); } Ok(()) } } impl From<&VirtioMem> for MemoryHotplugConfig { fn from(mem: &VirtioMem) -> Self { MemoryHotplugConfig { total_size_mib: mem.total_size_mib(), block_size_mib: mem.block_size_mib(), slot_size_mib: mem.slot_size_mib(), } } } /// Configuration for memory hotplug device. #[derive(Clone, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct MemoryHotplugSizeUpdate { /// Requested size in MiB to resize the hotpluggable memory to. pub requested_size_mib: usize, } #[cfg(test)] mod tests { use serde_json; use super::*; #[test] fn test_valid_config() { let config = MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 2, slot_size_mib: 128, }; config.validate().unwrap(); } #[test] fn test_block_size_too_small() { let config = MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 1, slot_size_mib: 128, }; match config.validate() { Err(MemoryHotplugConfigError::BlockSizeTooSmall(min)) => assert_eq!(min, 2), _ => panic!("Expected InvalidBlockSizeTooSmall error"), } } #[test] fn test_block_size_not_power_of_two() { let config = MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 3, slot_size_mib: 128, }; match config.validate() { Err(MemoryHotplugConfigError::BlockSizeNotPowerOfTwo) => {} _ => panic!("Expected InvalidBlockSizePowerOfTwo error"), } } #[test] fn test_slot_size_too_small() { let config = MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 2, slot_size_mib: 1, }; match config.validate() { Err(MemoryHotplugConfigError::SlotSizeTooSmall(min)) => assert_eq!(min, 128), _ => panic!("Expected InvalidSlotSizeTooSmall error"), } } #[test] fn test_slot_size_not_multiple_of_block_size() { let config = MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 4, slot_size_mib: 130, }; match config.validate() { Err(MemoryHotplugConfigError::SlotSizeNotMultipleOfBlockSize(block_size)) => { assert_eq!(block_size, 4) } _ => panic!("Expected InvalidSlotSizeMultiple error"), } } #[test] fn test_total_size_too_small() { let config = MemoryHotplugConfig { total_size_mib: 64, block_size_mib: 2, slot_size_mib: 128, }; match config.validate() { Err(MemoryHotplugConfigError::TotalSizeTooSmall(slot_size)) => { assert_eq!(slot_size, 128) } _ => panic!("Expected InvalidTotalSizeTooSmall error"), } } #[test] fn test_total_size_not_multiple_of_slot_size() { let config = MemoryHotplugConfig { total_size_mib: 1000, block_size_mib: 2, slot_size_mib: 128, }; match config.validate() { Err(MemoryHotplugConfigError::TotalSizeNotMultipleOfSlotSize(slot_size)) => { assert_eq!(slot_size, 128) } _ => panic!("Expected InvalidTotalSizeMultiple error"), } } #[test] fn test_defaults() { assert_eq!(default_block_size_mib(), 2); assert_eq!(default_slot_size_mib(), 128); let json = r#"{ "total_size_mib": 1024 }"#; let deserialized: MemoryHotplugConfig = serde_json::from_str(json).unwrap(); assert_eq!( deserialized, MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 2, slot_size_mib: 128, } ); } #[test] fn test_serde() { let config = MemoryHotplugConfig { total_size_mib: 1024, block_size_mib: 4, slot_size_mib: 256, }; let json = serde_json::to_string(&config).unwrap(); let deserialized: MemoryHotplugConfig = serde_json::from_str(&json).unwrap(); assert_eq!(config, deserialized); } } ================================================ FILE: src/vmm/src/vmm_config/metrics.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Auxiliary module for configuring the metrics system. use std::path::PathBuf; use serde::{Deserialize, Serialize}; use crate::logger::{FcLineWriter, METRICS}; use crate::utils::open_file_nonblock; /// Strongly typed structure used to describe the metrics system. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct MetricsConfig { /// Named pipe or file used as output for metrics. pub metrics_path: PathBuf, } /// Errors associated with actions on the `MetricsConfig`. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MetricsConfigError { /// Cannot initialize the metrics system due to bad user input: {0} InitializationFailure(String), } /// Configures the metrics as described in `metrics_cfg`. pub fn init_metrics(metrics_cfg: MetricsConfig) -> Result<(), MetricsConfigError> { let writer = FcLineWriter::new( open_file_nonblock(&metrics_cfg.metrics_path) .map_err(|err| MetricsConfigError::InitializationFailure(err.to_string()))?, ); METRICS .init(writer) .map_err(|err| MetricsConfigError::InitializationFailure(err.to_string())) } #[cfg(test)] mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; #[test] fn test_init_metrics() { // Initializing metrics with valid pipe is ok. let metrics_file = TempFile::new().unwrap(); let desc = MetricsConfig { metrics_path: metrics_file.as_path().to_path_buf(), }; init_metrics(desc.clone()).unwrap(); init_metrics(desc).unwrap_err(); } } ================================================ FILE: src/vmm/src/vmm_config/mmds.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::net::Ipv4Addr; use serde::{Deserialize, Serialize}; use crate::mmds::data_store; use crate::mmds::data_store::MmdsVersion; /// Keeps the MMDS configuration. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct MmdsConfig { /// MMDS version. #[serde(default)] pub version: MmdsVersion, /// Network interfaces that allow forwarding packets to MMDS. pub network_interfaces: Vec, /// MMDS IPv4 configured address. pub ipv4_address: Option, /// Compatibility with EC2 IMDS. #[serde(default)] pub imds_compat: bool, } impl MmdsConfig { /// Returns the MMDS version configured. pub fn version(&self) -> MmdsVersion { self.version } /// Returns the network interfaces that accept MMDS requests. pub fn network_interfaces(&self) -> Vec { self.network_interfaces.clone() } /// Returns the MMDS IPv4 address if one was configured. /// Otherwise returns None. pub fn ipv4_addr(&self) -> Option { self.ipv4_address } } /// MMDS configuration related errors. #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MmdsConfigError { /// The list of network interface IDs that allow forwarding MMDS requests is empty. EmptyNetworkIfaceList, /// The MMDS IPv4 address is not link local. InvalidIpv4Addr, /// The list of network interface IDs provided contains at least one ID that does not correspond to any existing network interface. InvalidNetworkInterfaceId, /// Failed to initialize MMDS data store: {0} InitMmdsDatastore(#[from] data_store::MmdsDatastoreError), } ================================================ FILE: src/vmm/src/vmm_config/mod.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::convert::{From, TryInto}; use std::io; use serde::{Deserialize, Serialize}; use crate::rate_limiter::{BucketUpdate, RateLimiter, TokenBucket}; /// Wrapper for configuring the balloon device. pub mod balloon; /// Wrapper for configuring the microVM boot source. pub mod boot_source; /// Wrapper for configuring the block devices. pub mod drive; /// Wrapper for configuring the entropy device attached to the microVM. pub mod entropy; /// Wrapper over the microVM general information attached to the microVM. pub mod instance_info; /// Wrapper for configuring the memory and CPU of the microVM. pub mod machine_config; /// Wrapper for configuring memory hotplug. pub mod memory_hotplug; /// Wrapper for configuring the metrics. pub mod metrics; /// Wrapper for configuring the MMDS. pub mod mmds; /// Wrapper for configuring the network devices attached to the microVM. pub mod net; /// Wrapper for configuring the pmem devises attached to the microVM. pub mod pmem; /// Wrapper for configuring microVM snapshots and the microVM state. pub mod serial; pub mod snapshot; /// Wrapper for configuring the vsock devices attached to the microVM. pub mod vsock; // TODO: Migrate the VMM public-facing code (i.e. interface) to use stateless structures, // for receiving data/args, such as the below `RateLimiterConfig` and `TokenBucketConfig`. // Also todo: find a better suffix than `Config`; it should illustrate the static nature // of the enclosed data. // Currently, data is passed around using live/stateful objects. Switching to static/stateless // objects will simplify both the ownership model and serialization. // Public access would then be more tightly regulated via `VmmAction`s, consisting of tuples like // (entry-point-into-VMM-logic, stateless-args-structure). /// A public-facing, stateless structure, holding all the data we need to create a TokenBucket /// (live) object. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] pub struct TokenBucketConfig { /// See TokenBucket::size. pub size: u64, /// See TokenBucket::one_time_burst. pub one_time_burst: Option, /// See TokenBucket::refill_time. pub refill_time: u64, } impl From<&TokenBucket> for TokenBucketConfig { fn from(tb: &TokenBucket) -> Self { let one_time_burst = match tb.initial_one_time_burst() { 0 => None, v => Some(v), }; TokenBucketConfig { size: tb.capacity(), one_time_burst, refill_time: tb.refill_time_ms(), } } } /// A public-facing, stateless structure, holding all the data we need to create a RateLimiter /// (live) object. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct RateLimiterConfig { /// Data used to initialize the RateLimiter::bandwidth bucket. pub bandwidth: Option, /// Data used to initialize the RateLimiter::ops bucket. pub ops: Option, } /// A public-facing, stateless structure, specifying RateLimiter properties updates. #[derive(Debug)] pub struct RateLimiterUpdate { /// Possible update to the RateLimiter::bandwidth bucket. pub bandwidth: BucketUpdate, /// Possible update to the RateLimiter::ops bucket. pub ops: BucketUpdate, } fn get_bucket_update(tb_cfg: &Option) -> BucketUpdate { match tb_cfg { // There is data to update. Some(tb_cfg) => { TokenBucket::new( tb_cfg.size, tb_cfg.one_time_burst.unwrap_or(0), tb_cfg.refill_time, ) // Updated active rate-limiter. .map(BucketUpdate::Update) // Updated/deactivated rate-limiter .unwrap_or(BucketUpdate::Disabled) } // No update to the rate-limiter. None => BucketUpdate::None, } } impl From> for RateLimiterUpdate { fn from(cfg: Option) -> Self { if let Some(cfg) = cfg { RateLimiterUpdate { bandwidth: get_bucket_update(&cfg.bandwidth), ops: get_bucket_update(&cfg.ops), } } else { // No update to the rate-limiter. RateLimiterUpdate { bandwidth: BucketUpdate::None, ops: BucketUpdate::None, } } } } impl TryInto for RateLimiterConfig { type Error = io::Error; fn try_into(self) -> Result { let bw = self.bandwidth.unwrap_or_default(); let ops = self.ops.unwrap_or_default(); RateLimiter::new( bw.size, bw.one_time_burst.unwrap_or(0), bw.refill_time, ops.size, ops.one_time_burst.unwrap_or(0), ops.refill_time, ) } } impl From<&RateLimiter> for RateLimiterConfig { fn from(rl: &RateLimiter) -> Self { RateLimiterConfig { bandwidth: rl.bandwidth().map(TokenBucketConfig::from), ops: rl.ops().map(TokenBucketConfig::from), } } } impl RateLimiterConfig { /// [`Option`] already implements [`From`] so we have to use a custom /// one. pub fn into_option(self) -> Option { if self.bandwidth.is_some() || self.ops.is_some() { Some(self) } else { None } } } #[cfg(test)] mod tests { use super::*; const SIZE: u64 = 1024 * 1024; const ONE_TIME_BURST: u64 = 1024; const REFILL_TIME: u64 = 1000; #[test] fn test_rate_limiter_configs() { let rlconf = RateLimiterConfig { bandwidth: Some(TokenBucketConfig { size: SIZE, one_time_burst: Some(ONE_TIME_BURST), refill_time: REFILL_TIME, }), ops: Some(TokenBucketConfig { size: SIZE * 2, one_time_burst: None, refill_time: REFILL_TIME * 2, }), }; let rl: RateLimiter = rlconf.try_into().unwrap(); assert_eq!(rl.bandwidth().unwrap().capacity(), SIZE); assert_eq!(rl.bandwidth().unwrap().one_time_burst(), ONE_TIME_BURST); assert_eq!(rl.bandwidth().unwrap().refill_time_ms(), REFILL_TIME); assert_eq!(rl.ops().unwrap().capacity(), SIZE * 2); assert_eq!(rl.ops().unwrap().one_time_burst(), 0); assert_eq!(rl.ops().unwrap().refill_time_ms(), REFILL_TIME * 2); } #[test] fn test_generate_configs() { let bw_tb_cfg = TokenBucketConfig { size: SIZE, one_time_burst: Some(ONE_TIME_BURST), refill_time: REFILL_TIME, }; let bw_tb = TokenBucket::new(SIZE, ONE_TIME_BURST, REFILL_TIME).unwrap(); let generated_bw_tb_cfg = TokenBucketConfig::from(&bw_tb); assert_eq!(generated_bw_tb_cfg, bw_tb_cfg); let rl_conf = RateLimiterConfig { bandwidth: Some(bw_tb_cfg), ops: None, }; let rl: RateLimiter = rl_conf.try_into().unwrap(); let generated_rl_conf = RateLimiterConfig::from(&rl); assert_eq!(generated_rl_conf, rl_conf); assert_eq!(generated_rl_conf.into_option(), Some(rl_conf)); } } ================================================ FILE: src/vmm/src/vmm_config/net.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::convert::TryInto; use std::ops::Deref; use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; use super::RateLimiterConfig; use crate::VmmError; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::net::{Net, TapError}; use crate::utils::net::mac::MacAddr; /// This struct represents the strongly typed equivalent of the json body from net iface /// related requests. #[derive(Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct NetworkInterfaceConfig { /// ID of the guest network interface. pub iface_id: String, /// Host level path for the guest network interface. pub host_dev_name: String, /// Guest MAC address. pub guest_mac: Option, /// Rate Limiter for received packages. pub rx_rate_limiter: Option, /// Rate Limiter for transmitted packages. pub tx_rate_limiter: Option, } impl From<&Net> for NetworkInterfaceConfig { fn from(net: &Net) -> Self { let rx_rl: RateLimiterConfig = net.rx_rate_limiter().into(); let tx_rl: RateLimiterConfig = net.tx_rate_limiter().into(); NetworkInterfaceConfig { iface_id: net.id().to_string(), host_dev_name: net.iface_name(), guest_mac: net.guest_mac().copied(), rx_rate_limiter: rx_rl.into_option(), tx_rate_limiter: tx_rl.into_option(), } } } /// The data fed into a network iface update request. Currently, only the RX and TX rate limiters /// can be updated. #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] #[serde(deny_unknown_fields)] pub struct NetworkInterfaceUpdateConfig { /// The net iface ID, as provided by the user at iface creation time. pub iface_id: String, /// New RX rate limiter config. Only provided data will be updated. I.e. if any optional data /// is missing, it will not be nullified, but left unchanged. pub rx_rate_limiter: Option, /// New TX rate limiter config. Only provided data will be updated. I.e. if any optional data /// is missing, it will not be nullified, but left unchanged. pub tx_rate_limiter: Option, } /// Errors associated with the operations allowed on a net device. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum NetworkInterfaceError { /// Could not create the network device: {0} CreateNetworkDevice(#[from] crate::devices::virtio::net::NetError), /// Cannot create the rate limiter: {0} CreateRateLimiter(#[from] std::io::Error), /// Unable to update the net device: {0} DeviceUpdate(#[from] VmmError), /// The MAC address is already in use: {0} GuestMacAddressInUse(String), /// Cannot open/create the tap device: {0} OpenTap(#[from] TapError), } /// Builder for a list of network devices. #[derive(Debug, Default)] pub struct NetBuilder { net_devices: Vec>>, } impl NetBuilder { /// Creates an empty list of Network Devices. pub fn new() -> Self { NetBuilder { // List of built network devices. net_devices: Vec::new(), } } /// Returns a immutable iterator over the network devices. pub fn iter(&self) -> ::std::slice::Iter<'_, Arc>> { self.net_devices.iter() } /// Adds an existing network device in the builder. pub fn add_device(&mut self, device: Arc>) { self.net_devices.push(device); } /// Builds a network device based on a network interface config. Keeps a device reference /// in the builder's internal list. pub fn build( &mut self, netif_config: NetworkInterfaceConfig, ) -> Result>, NetworkInterfaceError> { if let Some(ref mac_address) = netif_config.guest_mac { let mac_conflict = |net: &Arc>| { let net = net.lock().expect("Poisoned lock"); // Check if another net dev has same MAC. Some(mac_address) == net.guest_mac() && netif_config.iface_id != net.id() }; // Validate there is no Mac conflict. // No need to validate host_dev_name conflict. In such a case, // an error will be thrown during device creation anyway. if self.net_devices.iter().any(mac_conflict) { return Err(NetworkInterfaceError::GuestMacAddressInUse( mac_address.to_string(), )); } } // If this is an update, just remove the old one. if let Some(index) = self .net_devices .iter() .position(|net| net.lock().expect("Poisoned lock").id() == netif_config.iface_id) { self.net_devices.swap_remove(index); } // Add new device. let net = Arc::new(Mutex::new(Self::create_net(netif_config)?)); self.net_devices.push(net.clone()); Ok(net) } /// Creates a Net device from a NetworkInterfaceConfig. pub fn create_net(cfg: NetworkInterfaceConfig) -> Result { let rx_rate_limiter = cfg .rx_rate_limiter .map(super::RateLimiterConfig::try_into) .transpose() .map_err(NetworkInterfaceError::CreateRateLimiter)?; let tx_rate_limiter = cfg .tx_rate_limiter .map(super::RateLimiterConfig::try_into) .transpose() .map_err(NetworkInterfaceError::CreateRateLimiter)?; // Create and return the Net device crate::devices::virtio::net::Net::new( cfg.iface_id, &cfg.host_dev_name, cfg.guest_mac, rx_rate_limiter.unwrap_or_default(), tx_rate_limiter.unwrap_or_default(), ) .map_err(NetworkInterfaceError::CreateNetworkDevice) } /// Returns a vec with the structures used to configure the net devices. pub fn configs(&self) -> Vec { let mut ret = vec![]; for net in &self.net_devices { ret.push(NetworkInterfaceConfig::from(net.lock().unwrap().deref())); } ret } } #[cfg(test)] mod tests { use std::str::FromStr; use super::*; use crate::rate_limiter::RateLimiter; impl NetBuilder { pub(crate) fn len(&self) -> usize { self.net_devices.len() } } fn create_netif(id: &str, name: &str, mac: &str) -> NetworkInterfaceConfig { NetworkInterfaceConfig { iface_id: String::from(id), host_dev_name: String::from(name), guest_mac: Some(MacAddr::from_str(mac).unwrap()), rx_rate_limiter: RateLimiterConfig::default().into_option(), tx_rate_limiter: RateLimiterConfig::default().into_option(), } } impl Clone for NetworkInterfaceConfig { fn clone(&self) -> Self { NetworkInterfaceConfig { iface_id: self.iface_id.clone(), host_dev_name: self.host_dev_name.clone(), guest_mac: self.guest_mac, rx_rate_limiter: None, tx_rate_limiter: None, } } } #[test] fn test_insert() { let mut net_builder = NetBuilder::new(); let id_1 = "id_1"; let mut host_dev_name_1 = "dev1"; let mut guest_mac_1 = "01:23:45:67:89:0a"; // Test create. let netif_1 = create_netif(id_1, host_dev_name_1, guest_mac_1); net_builder.build(netif_1).unwrap(); assert_eq!(net_builder.net_devices.len(), 1); // Test update mac address (this test does not modify the tap). guest_mac_1 = "01:23:45:67:89:0b"; let netif_1 = create_netif(id_1, host_dev_name_1, guest_mac_1); net_builder.build(netif_1).unwrap(); assert_eq!(net_builder.net_devices.len(), 1); // Test update host_dev_name (the tap will be updated). host_dev_name_1 = "dev2"; let netif_1 = create_netif(id_1, host_dev_name_1, guest_mac_1); net_builder.build(netif_1).unwrap(); assert_eq!(net_builder.net_devices.len(), 1); } #[test] fn test_insert_error_cases() { let mut net_builder = NetBuilder::new(); let id_1 = "id_1"; let host_dev_name_1 = "dev3"; let guest_mac_1 = "01:23:45:67:89:0a"; // Adding the first valid network config. let netif_1 = create_netif(id_1, host_dev_name_1, guest_mac_1); net_builder.build(netif_1).unwrap(); // Error Cases for CREATE // Error Case: Add new network config with the same mac as netif_1. let id_2 = "id_2"; let host_dev_name_2 = "dev4"; let guest_mac_2 = "01:23:45:67:89:0b"; let netif_2 = create_netif(id_2, host_dev_name_2, guest_mac_1); let expected_error = NetworkInterfaceError::GuestMacAddressInUse(guest_mac_1.into()); assert_eq!( net_builder.build(netif_2).err().unwrap().to_string(), expected_error.to_string() ); assert_eq!(net_builder.net_devices.len(), 1); // Error Case: Add new network config with the same dev_host_name as netif_1. let netif_2 = create_netif(id_2, host_dev_name_1, guest_mac_2); assert_eq!( net_builder.build(netif_2).err().unwrap().to_string(), NetworkInterfaceError::CreateNetworkDevice( crate::devices::virtio::net::NetError::TapOpen(TapError::IfreqExecuteError( std::io::Error::from_raw_os_error(16), host_dev_name_1.to_string() )) ) .to_string() ); assert_eq!(net_builder.net_devices.len(), 1); // Adding the second valid network config. let netif_2 = create_netif(id_2, host_dev_name_2, guest_mac_2); net_builder.build(netif_2).unwrap(); // Error Cases for UPDATE // Error Case: Update netif_2 mac using the same mac as netif_1. let netif_2 = create_netif(id_2, host_dev_name_2, guest_mac_1); let expected_error = NetworkInterfaceError::GuestMacAddressInUse(guest_mac_1.into()); assert_eq!( net_builder.build(netif_2).err().unwrap().to_string(), expected_error.to_string() ); // Error Case: Update netif_2 dev_host_name using the same dev_host_name as netif_1. let netif_2 = create_netif(id_2, host_dev_name_1, guest_mac_2); assert_eq!( net_builder.build(netif_2).err().unwrap().to_string(), NetworkInterfaceError::CreateNetworkDevice( crate::devices::virtio::net::NetError::TapOpen(TapError::IfreqExecuteError( std::io::Error::from_raw_os_error(16), host_dev_name_1.to_string() )) ) .to_string() ); } #[test] fn test_net_config() { let net_id = "id"; let host_dev_name = "dev"; let guest_mac = "01:23:45:67:89:0b"; let net_if_cfg = create_netif(net_id, host_dev_name, guest_mac); assert_eq!( net_if_cfg.guest_mac.unwrap(), MacAddr::from_str(guest_mac).unwrap() ); let mut net_builder = NetBuilder::new(); net_builder.build(net_if_cfg.clone()).unwrap(); assert_eq!(net_builder.net_devices.len(), 1); let configs = net_builder.configs(); assert_eq!(configs.len(), 1); assert_eq!(configs.first().unwrap(), &net_if_cfg); } #[test] fn test_add_device() { let mut net_builder = NetBuilder::new(); let net_id = "test_id"; let host_dev_name = "dev"; let guest_mac = "01:23:45:67:89:0b"; let net = Net::new( net_id.to_string(), host_dev_name, Some(MacAddr::from_str(guest_mac).unwrap()), RateLimiter::default(), RateLimiter::default(), ) .unwrap(); net_builder.add_device(Arc::new(Mutex::new(net))); assert_eq!(net_builder.net_devices.len(), 1); assert_eq!( net_builder .net_devices .pop() .unwrap() .lock() .unwrap() .deref() .id(), net_id ); } } ================================================ FILE: src/vmm/src/vmm_config/pmem.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; use crate::devices::virtio::pmem::device::{Pmem, PmemError}; /// Errors associated wit the operations allowed on a pmem device #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum PmemConfigError { /// Attempt to add pmem as a root device while the root device defined as a block device AddingSecondRootDevice, /// A root pmem device already exist RootPmemDeviceAlreadyExist, /// Unable to create the virtio-pmem device: {0} CreateDevice(#[from] PmemError), /// Error accessing underlying file: {0} File(std::io::Error), } /// Use this structure to setup a Pmem device before boothing the kernel. #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(deny_unknown_fields)] pub struct PmemConfig { /// Unique identifier of the device. pub id: String, /// Path of the drive. pub path_on_host: String, /// Use this pmem device for rootfs #[serde(default)] pub root_device: bool, /// Map the file as read only #[serde(default)] pub read_only: bool, } /// Wrapper for the collection that holds all the Pmem devices. #[derive(Debug, Default)] pub struct PmemBuilder { /// The list of pmem devices pub devices: Vec>>, } impl PmemBuilder { /// Specifies whether there is a root block device already present in the list. pub fn has_root_device(&self) -> bool { self.devices .iter() .any(|d| d.lock().unwrap().config.root_device) } /// Build a device from the config pub fn build( &mut self, config: PmemConfig, has_block_root: bool, ) -> Result<(), PmemConfigError> { if config.root_device && has_block_root { return Err(PmemConfigError::AddingSecondRootDevice); } let position = self .devices .iter() .position(|d| d.lock().unwrap().config.id == config.id); if let Some(index) = position { if !self.devices[index].lock().unwrap().config.root_device && config.root_device && self.has_root_device() { return Err(PmemConfigError::RootPmemDeviceAlreadyExist); } let pmem = Pmem::new(config)?; let pmem = Arc::new(Mutex::new(pmem)); self.devices[index] = pmem; } else { if config.root_device && self.has_root_device() { return Err(PmemConfigError::RootPmemDeviceAlreadyExist); } let pmem = Pmem::new(config)?; let pmem = Arc::new(Mutex::new(pmem)); self.devices.push(pmem); } Ok(()) } /// Adds an existing pmem device in the builder. This function should /// only be used during snapshot restoration process and should add /// devices in the same order as they were in the original VM. pub fn add_device(&mut self, device: Arc>) { self.devices.push(device); } /// Returns a vec with the structures used to configure the devices. pub fn configs(&self) -> Vec { self.devices .iter() .map(|b| b.lock().unwrap().config.clone()) .collect() } } #[cfg(test)] mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; #[test] fn test_pmem_builder_build() { let mut builder = PmemBuilder::default(); let dummy_file = TempFile::new().unwrap(); dummy_file.as_file().set_len(Pmem::ALIGNMENT).unwrap(); let dummy_path = dummy_file.as_path().to_str().unwrap().to_string(); let mut config = PmemConfig { id: "1".into(), path_on_host: dummy_path, root_device: true, read_only: false, }; builder.build(config.clone(), false).unwrap(); assert_eq!(builder.devices.len(), 1); assert!(builder.has_root_device()); // First device got replaced with new one config.root_device = false; builder.build(config, false).unwrap(); assert_eq!(builder.devices.len(), 1); assert!(!builder.has_root_device()); } #[test] fn test_pmem_builder_build_seconde_root() { let mut builder = PmemBuilder::default(); let dummy_file = TempFile::new().unwrap(); dummy_file.as_file().set_len(Pmem::ALIGNMENT).unwrap(); let dummy_path = dummy_file.as_path().to_str().unwrap().to_string(); let mut config = PmemConfig { id: "1".into(), path_on_host: dummy_path, root_device: true, read_only: false, }; builder.build(config.clone(), false).unwrap(); config.id = "2".into(); assert!(matches!( builder.build(config.clone(), false).unwrap_err(), PmemConfigError::RootPmemDeviceAlreadyExist, )); } #[test] fn test_pmem_builder_build_root_with_block_already_a_root() { let mut builder = PmemBuilder::default(); let dummy_file = TempFile::new().unwrap(); dummy_file.as_file().set_len(Pmem::ALIGNMENT).unwrap(); let dummy_path = dummy_file.as_path().to_str().unwrap().to_string(); let config = PmemConfig { id: "1".into(), path_on_host: dummy_path, root_device: true, read_only: false, }; assert!(matches!( builder.build(config, true).unwrap_err(), PmemConfigError::AddingSecondRootDevice, )); } } ================================================ FILE: src/vmm/src/vmm_config/serial.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::path::PathBuf; use serde::Deserialize; /// The body of a PUT /serial request. #[derive(Debug, PartialEq, Eq, Deserialize)] #[serde(deny_unknown_fields)] pub struct SerialConfig { /// Named pipe or file used as output for guest serial console. pub serial_out_path: Option, } ================================================ FILE: src/vmm/src/vmm_config/snapshot.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 //! Configurations used in the snapshotting context. use std::path::PathBuf; /// For crates that depend on `vmm` we export. pub use semver::Version; use serde::{Deserialize, Serialize}; /// The snapshot type options that are available when /// creating a new snapshot. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Deserialize, Serialize)] pub enum SnapshotType { /// Diff snapshot. Diff, /// Full snapshot. #[default] Full, } /// Specifies the method through which guest memory will get populated when /// resuming from a snapshot: /// 1) A file that contains the guest memory to be loaded, /// 2) An UDS where a custom page-fault handler process is listening for the UFFD set up by /// Firecracker to handle its guest memory page faults. #[derive(Debug, PartialEq, Eq, Deserialize)] pub enum MemBackendType { /// Guest memory contents will be loaded from a file. File, /// Guest memory will be served through UFFD by a separate process. Uffd, } /// Stores the configuration that will be used for creating a snapshot. #[derive(Debug, PartialEq, Eq, Deserialize)] #[serde(deny_unknown_fields)] pub struct CreateSnapshotParams { /// This marks the type of snapshot we want to create. /// The default value is `Full`, which means a full snapshot. #[serde(default = "SnapshotType::default")] pub snapshot_type: SnapshotType, /// Path to the file that will contain the microVM state. pub snapshot_path: PathBuf, /// Path to the file that will contain the guest memory. pub mem_file_path: PathBuf, } /// Allows for changing the mapping between tap devices and host devices /// during snapshot restore #[derive(Debug, PartialEq, Eq, Deserialize)] pub struct NetworkOverride { /// The index of the interface to modify pub iface_id: String, /// The new name of the interface to be assigned pub host_dev_name: String, } /// Allows for changing the host UDS of the vsock backend during snapshot restore #[derive(Debug, PartialEq, Eq, Deserialize)] pub struct VsockOverride { /// The path to the UDS that will be used for the vsock interface pub uds_path: String, } /// Stores the configuration that will be used for loading a snapshot. #[derive(Debug, PartialEq, Eq)] pub struct LoadSnapshotParams { /// Path to the file that contains the microVM state to be loaded. pub snapshot_path: PathBuf, /// Specifies guest memory backend configuration. pub mem_backend: MemBackendConfig, /// Whether KVM dirty page tracking should be enabled, to space optimization /// of differential snapshots. pub track_dirty_pages: bool, /// When set to true, the vm is also resumed if the snapshot load /// is successful. pub resume_vm: bool, /// The network devices to override on load. pub network_overrides: Vec, /// When set, the vsock backend UDS path will be overridden pub vsock_override: Option, } /// Stores the configuration for loading a snapshot that is provided by the user. #[derive(Debug, Deserialize)] #[serde(deny_unknown_fields)] pub struct LoadSnapshotConfig { /// Path to the file that contains the microVM state to be loaded. pub snapshot_path: PathBuf, /// Path to the file that contains the guest memory to be loaded. To be used only if /// `mem_backend` is not specified. #[serde(skip_serializing_if = "Option::is_none")] pub mem_file_path: Option, /// Guest memory backend configuration. Is not to be used in conjunction with `mem_file_path`. /// None value is allowed only if `mem_file_path` is present. #[serde(skip_serializing_if = "Option::is_none")] pub mem_backend: Option, /// Whether or not to enable KVM dirty page tracking. #[serde(default)] #[deprecated] pub enable_diff_snapshots: bool, /// Whether KVM dirty page tracking should be enabled. #[serde(default)] pub track_dirty_pages: bool, /// Whether or not to resume the vm post snapshot load. #[serde(default)] pub resume_vm: bool, /// The network devices to override on load. #[serde(default)] pub network_overrides: Vec, /// Whether or not to override the vsock backend UDS path. #[serde(skip_serializing_if = "Option::is_none")] pub vsock_override: Option, } /// Stores the configuration used for managing snapshot memory. #[derive(Debug, PartialEq, Eq, Deserialize)] #[serde(deny_unknown_fields)] pub struct MemBackendConfig { /// Path to the backend used to handle the guest memory. pub backend_path: PathBuf, /// Specifies the guest memory backend type. pub backend_type: MemBackendType, } /// The microVM state options. #[derive(Debug, Deserialize, Serialize)] pub enum VmState { /// The microVM is paused, which means that we can create a snapshot of it. Paused, /// The microVM is resumed; this state should be set after we load a snapshot. Resumed, } /// Keeps the microVM state necessary in the snapshotting context. #[derive(Debug, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct Vm { /// The microVM state, which can be `paused` or `resumed`. pub state: VmState, } ================================================ FILE: src/vmm/src/vmm_config/vsock.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::convert::TryFrom; use std::sync::{Arc, Mutex}; use serde::{Deserialize, Serialize}; use crate::devices::virtio::vsock::{Vsock, VsockError, VsockUnixBackend, VsockUnixBackendError}; type MutexVsockUnix = Arc>>; /// Errors associated with `NetworkInterfaceConfig`. #[derive(Debug, derive_more::From, thiserror::Error, displaydoc::Display)] pub enum VsockConfigError { /// Cannot create backend for vsock device: {0} CreateVsockBackend(VsockUnixBackendError), /// Cannot create vsock device: {0} CreateVsockDevice(VsockError), } /// This struct represents the strongly typed equivalent of the json body /// from vsock related requests. #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(deny_unknown_fields)] pub struct VsockDeviceConfig { #[serde(default)] #[serde(skip_serializing_if = "Option::is_none")] /// ID of the vsock device. pub vsock_id: Option, /// A 32-bit Context Identifier (CID) used to identify the guest. pub guest_cid: u32, /// Path to local unix socket. pub uds_path: String, } #[derive(Debug)] struct VsockAndUnixPath { vsock: MutexVsockUnix, uds_path: String, } impl From<&VsockAndUnixPath> for VsockDeviceConfig { fn from(vsock: &VsockAndUnixPath) -> Self { let vsock_lock = vsock.vsock.lock().unwrap(); VsockDeviceConfig { vsock_id: None, guest_cid: u32::try_from(vsock_lock.cid()).unwrap(), uds_path: vsock.uds_path.clone(), } } } impl From<&Vsock> for VsockDeviceConfig { fn from(vsock: &Vsock) -> Self { VsockDeviceConfig { vsock_id: None, // deprecated guest_cid: u32::try_from(vsock.cid()).unwrap(), uds_path: vsock.backend().host_sock_path().to_owned(), } } } /// A builder of Vsock with Unix backend from 'VsockDeviceConfig'. #[derive(Debug, Default)] pub struct VsockBuilder { inner: Option, } impl VsockBuilder { /// Creates an empty Vsock with Unix backend Store. pub fn new() -> Self { Self { inner: None } } /// Inserts an existing vsock device. pub fn set_device(&mut self, device: Arc>>) { self.inner = Some(VsockAndUnixPath { uds_path: device .lock() .expect("Poisoned lock") .backend() .host_sock_path() .to_owned(), vsock: device.clone(), }); } /// Inserts a Unix backend Vsock in the store. /// If an entry already exists, it will overwrite it. pub fn insert(&mut self, cfg: VsockDeviceConfig) -> Result<(), VsockConfigError> { // Make sure to drop the old one and remove the socket before creating a new one. if let Some(existing) = self.inner.take() { std::fs::remove_file(existing.uds_path).map_err(VsockUnixBackendError::UnixBind)?; } self.inner = Some(VsockAndUnixPath { uds_path: cfg.uds_path.clone(), vsock: Arc::new(Mutex::new(Self::create_unixsock_vsock(cfg)?)), }); Ok(()) } /// Provides a reference to the Vsock if present. pub fn get(&self) -> Option<&MutexVsockUnix> { self.inner.as_ref().map(|pair| &pair.vsock) } /// Creates a Vsock device from a VsockDeviceConfig. pub fn create_unixsock_vsock( cfg: VsockDeviceConfig, ) -> Result, VsockConfigError> { let backend = VsockUnixBackend::new(u64::from(cfg.guest_cid), cfg.uds_path)?; Vsock::new(u64::from(cfg.guest_cid), backend).map_err(VsockConfigError::CreateVsockDevice) } /// Returns the structure used to configure the vsock device. pub fn config(&self) -> Option { self.inner.as_ref().map(VsockDeviceConfig::from) } } #[cfg(test)] pub(crate) mod tests { use vmm_sys_util::tempfile::TempFile; use super::*; use crate::devices::virtio::device::VirtioDevice; use crate::devices::virtio::vsock::VSOCK_DEV_ID; pub(crate) fn default_config(tmp_sock_file: &TempFile) -> VsockDeviceConfig { VsockDeviceConfig { vsock_id: None, guest_cid: 3, uds_path: tmp_sock_file.as_path().to_str().unwrap().to_string(), } } #[test] fn test_vsock_create() { let mut tmp_sock_file = TempFile::new().unwrap(); tmp_sock_file.remove().unwrap(); let vsock_config = default_config(&tmp_sock_file); VsockBuilder::create_unixsock_vsock(vsock_config).unwrap(); } #[test] fn test_vsock_insert() { let mut store = VsockBuilder::new(); let mut tmp_sock_file = TempFile::new().unwrap(); tmp_sock_file.remove().unwrap(); let mut vsock_config = default_config(&tmp_sock_file); store.insert(vsock_config.clone()).unwrap(); let vsock = store.get().unwrap(); assert_eq!(vsock.lock().unwrap().id(), VSOCK_DEV_ID); let new_cid = vsock_config.guest_cid + 1; vsock_config.guest_cid = new_cid; store.insert(vsock_config).unwrap(); let vsock = store.get().unwrap(); assert_eq!(vsock.lock().unwrap().cid(), u64::from(new_cid)); } #[test] fn test_vsock_config() { let mut vsock_builder = VsockBuilder::new(); let mut tmp_sock_file = TempFile::new().unwrap(); tmp_sock_file.remove().unwrap(); let vsock_config = default_config(&tmp_sock_file); vsock_builder.insert(vsock_config.clone()).unwrap(); let config = vsock_builder.config(); assert!(config.is_some()); assert_eq!(config.unwrap(), vsock_config); } #[test] fn test_set_device() { let mut vsock_builder = VsockBuilder::new(); let mut tmp_sock_file = TempFile::new().unwrap(); tmp_sock_file.remove().unwrap(); let vsock = Vsock::new( 0, VsockUnixBackend::new(1, tmp_sock_file.as_path().to_str().unwrap().to_string()) .unwrap(), ) .unwrap(); vsock_builder.set_device(Arc::new(Mutex::new(vsock))); assert!(vsock_builder.inner.is_some()); assert_eq!( vsock_builder.inner.unwrap().uds_path, tmp_sock_file.as_path().to_str().unwrap().to_string() ) } } ================================================ FILE: src/vmm/src/vstate/bus.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE-BSD-3-Clause file. //! Handles routing to devices in an address space. use std::cmp::Ordering; use std::collections::btree_map::BTreeMap; use std::sync::{Arc, Barrier, Mutex, RwLock, Weak}; use std::{error, fmt, result}; /// Trait for devices that respond to reads or writes in an arbitrary address space. /// /// The device does not care where it exists in address space as each method is only given an offset /// into its allocated portion of address space. #[allow(unused_variables)] pub trait BusDevice: Send { /// Reads at `offset` from this device fn read(&mut self, base: u64, offset: u64, data: &mut [u8]) {} /// Writes at `offset` into this device fn write(&mut self, base: u64, offset: u64, data: &[u8]) -> Option> { None } } /// Trait similar to [`BusDevice`] with the extra requirement that a device is `Send` and `Sync`. #[allow(unused_variables)] pub trait BusDeviceSync: Send + Sync { /// Reads at `offset` from this device fn read(&self, base: u64, offset: u64, data: &mut [u8]) {} /// Writes at `offset` into this device fn write(&self, base: u64, offset: u64, data: &[u8]) -> Option> { None } } impl BusDeviceSync for Mutex { /// Reads at `offset` from this device fn read(&self, base: u64, offset: u64, data: &mut [u8]) { self.lock() .expect("Failed to acquire device lock") .read(base, offset, data) } /// Writes at `offset` into this device fn write(&self, base: u64, offset: u64, data: &[u8]) -> Option> { self.lock() .expect("Failed to acquire device lock") .write(base, offset, data) } } /// Error type for [`Bus`]-related operations. #[derive(Debug)] pub enum BusError { /// The insertion failed because the new device overlapped with an old device. Overlap, /// Failed to operate on zero sized range. ZeroSizedRange, /// Failed to find address range. MissingAddressRange, } /// Result type for [`Bus`]-related operations. pub type Result = result::Result; impl fmt::Display for BusError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "bus_error: {self:?}") } } impl error::Error for BusError {} /// Holds a base and length representing the address space occupied by a `BusDevice`. /// /// * base - The address at which the range start. /// * len - The length of the range in bytes. #[derive(Debug, Copy, Clone)] pub struct BusRange { /// base address of a range within a [`Bus`] pub base: u64, /// length of a range within a [`Bus`] pub len: u64, } impl BusRange { /// Returns true if there is overlap with the given range. pub fn overlaps(&self, base: u64, len: u64) -> bool { self.base < (base + len) && base < self.base + self.len } } impl Eq for BusRange {} impl PartialEq for BusRange { fn eq(&self, other: &BusRange) -> bool { self.base == other.base } } impl Ord for BusRange { fn cmp(&self, other: &BusRange) -> Ordering { self.base.cmp(&other.base) } } impl PartialOrd for BusRange { fn partial_cmp(&self, other: &BusRange) -> Option { Some(self.cmp(other)) } } /// A device container for routing reads and writes over some address space. /// /// This doesn't have any restrictions on what kind of device or address space this applies to. The /// only restriction is that no two devices can overlap in this address space. #[derive(Default, Debug)] pub struct Bus { devices: RwLock>>, } impl Bus { /// Constructs an a bus with an empty address space. pub fn new() -> Bus { Bus { devices: RwLock::new(BTreeMap::new()), } } fn first_before(&self, addr: u64) -> Option<(BusRange, Arc)> { let devices = self.devices.read().unwrap(); let (range, dev) = devices .range(..=BusRange { base: addr, len: 1 }) .next_back()?; dev.upgrade().map(|d| (*range, d.clone())) } #[allow(clippy::type_complexity)] /// Get a reference to a device residing inside the bus at address [`addr`]. pub fn resolve(&self, addr: u64) -> Option<(u64, u64, Arc)> { if let Some((range, dev)) = self.first_before(addr) { let offset = addr - range.base; if offset < range.len { return Some((range.base, offset, dev)); } } None } /// Insert a device into the [`Bus`] in the range [`addr`, `addr` + `len`]. pub fn insert(&self, device: Arc, base: u64, len: u64) -> Result<()> { if len == 0 { return Err(BusError::ZeroSizedRange); } // Reject all cases where the new device's range overlaps with an existing device. if self .devices .read() .unwrap() .iter() .any(|(range, _dev)| range.overlaps(base, len)) { return Err(BusError::Overlap); } if self .devices .write() .unwrap() .insert(BusRange { base, len }, Arc::downgrade(&device)) .is_some() { return Err(BusError::Overlap); } Ok(()) } /// Removes the device at the given address space range. pub fn remove(&self, base: u64, len: u64) -> Result<()> { if len == 0 { return Err(BusError::ZeroSizedRange); } let bus_range = BusRange { base, len }; if self.devices.write().unwrap().remove(&bus_range).is_none() { return Err(BusError::MissingAddressRange); } Ok(()) } /// Reads data from the device that owns the range containing `addr` and puts it into `data`. /// /// Returns true on success, otherwise `data` is untouched. pub fn read(&self, addr: u64, data: &mut [u8]) -> Result<()> { if let Some((base, offset, dev)) = self.resolve(addr) { // OK to unwrap as lock() failing is a serious error condition and should panic. dev.read(base, offset, data); Ok(()) } else { Err(BusError::MissingAddressRange) } } /// Writes `data` to the device that owns the range containing `addr`. /// /// Returns true on success, otherwise `data` is untouched. pub fn write(&self, addr: u64, data: &[u8]) -> Result>> { if let Some((base, offset, dev)) = self.resolve(addr) { // OK to unwrap as lock() failing is a serious error condition and should panic. Ok(dev.write(base, offset, data)) } else { Err(BusError::MissingAddressRange) } } } #[cfg(test)] mod tests { use super::*; struct DummyDevice; impl BusDeviceSync for DummyDevice {} struct ConstantDevice; impl BusDeviceSync for ConstantDevice { #[allow(clippy::cast_possible_truncation)] fn read(&self, _base: u64, offset: u64, data: &mut [u8]) { for (i, v) in data.iter_mut().enumerate() { *v = (offset as u8) + (i as u8); } } #[allow(clippy::cast_possible_truncation)] fn write(&self, _base: u64, offset: u64, data: &[u8]) -> Option> { for (i, v) in data.iter().enumerate() { assert_eq!(*v, (offset as u8) + (i as u8)) } None } } #[test] fn bus_insert() { let bus = Bus::new(); let dummy = Arc::new(DummyDevice); bus.insert(dummy.clone(), 0x10, 0).unwrap_err(); bus.insert(dummy.clone(), 0x10, 0x10).unwrap(); let result = bus.insert(dummy.clone(), 0x0f, 0x10); assert_eq!(format!("{result:?}"), "Err(Overlap)"); bus.insert(dummy.clone(), 0x10, 0x10).unwrap_err(); bus.insert(dummy.clone(), 0x10, 0x15).unwrap_err(); bus.insert(dummy.clone(), 0x12, 0x15).unwrap_err(); bus.insert(dummy.clone(), 0x12, 0x01).unwrap_err(); bus.insert(dummy.clone(), 0x0, 0x20).unwrap_err(); bus.insert(dummy.clone(), 0x20, 0x05).unwrap(); bus.insert(dummy.clone(), 0x25, 0x05).unwrap(); bus.insert(dummy, 0x0, 0x10).unwrap(); } #[test] fn bus_remove() { let bus = Bus::new(); let dummy: Arc = Arc::new(DummyDevice); bus.remove(0x42, 0x0).unwrap_err(); bus.remove(0x13, 0x12).unwrap_err(); bus.insert(dummy.clone(), 0x13, 0x12).unwrap(); bus.remove(0x42, 0x42).unwrap_err(); bus.remove(0x13, 0x12).unwrap(); } #[test] #[allow(clippy::redundant_clone)] fn bus_read_write() { let bus = Bus::new(); let dummy = Arc::new(DummyDevice); bus.insert(dummy.clone(), 0x10, 0x10).unwrap(); bus.read(0x10, &mut [0, 0, 0, 0]).unwrap(); bus.write(0x10, &[0, 0, 0, 0]).unwrap(); bus.read(0x11, &mut [0, 0, 0, 0]).unwrap(); bus.write(0x11, &[0, 0, 0, 0]).unwrap(); bus.read(0x16, &mut [0, 0, 0, 0]).unwrap(); bus.write(0x16, &[0, 0, 0, 0]).unwrap(); bus.read(0x20, &mut [0, 0, 0, 0]).unwrap_err(); bus.write(0x20, &[0, 0, 0, 0]).unwrap_err(); bus.read(0x06, &mut [0, 0, 0, 0]).unwrap_err(); bus.write(0x06, &[0, 0, 0, 0]).unwrap_err(); } #[test] #[allow(clippy::redundant_clone)] fn bus_read_write_values() { let bus = Bus::new(); let dummy = Arc::new(ConstantDevice); bus.insert(dummy.clone(), 0x10, 0x10).unwrap(); let mut values = [0, 1, 2, 3]; bus.read(0x10, &mut values).unwrap(); assert_eq!(values, [0, 1, 2, 3]); bus.write(0x10, &values).unwrap(); bus.read(0x15, &mut values).unwrap(); assert_eq!(values, [5, 6, 7, 8]); bus.write(0x15, &values).unwrap(); } #[test] #[allow(clippy::redundant_clone)] fn busrange_cmp() { let range = BusRange { base: 0x10, len: 2 }; assert_eq!(range, BusRange { base: 0x10, len: 3 }); assert_eq!(range, BusRange { base: 0x10, len: 2 }); assert!(range < BusRange { base: 0x12, len: 1 }); assert!(range < BusRange { base: 0x12, len: 3 }); assert_eq!(range, range.clone()); let bus = Bus::new(); let mut data = [1, 2, 3, 4]; let device = Arc::new(DummyDevice); bus.insert(device.clone(), 0x10, 0x10).unwrap(); bus.write(0x10, &data).unwrap(); bus.read(0x10, &mut data).unwrap(); assert_eq!(data, [1, 2, 3, 4]); } #[test] fn bus_range_overlap() { let a = BusRange { base: 0x1000, len: 0x400, }; assert!(a.overlaps(0x1000, 0x400)); assert!(a.overlaps(0xf00, 0x400)); assert!(a.overlaps(0x1000, 0x01)); assert!(a.overlaps(0xfff, 0x02)); assert!(a.overlaps(0x1100, 0x100)); assert!(a.overlaps(0x13ff, 0x100)); assert!(!a.overlaps(0x1400, 0x100)); assert!(!a.overlaps(0xf00, 0x100)); } } ================================================ FILE: src/vmm/src/vstate/interrupts.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::sync::Arc; use std::sync::atomic::{AtomicBool, Ordering}; use kvm_ioctls::VmFd; use vmm_sys_util::eventfd::EventFd; use crate::Vm; use crate::logger::{IncMetric, METRICS}; use crate::snapshot::Persist; #[derive(Debug, thiserror::Error, displaydoc::Display)] /// Errors related with Firecracker interrupts pub enum InterruptError { /// Error allocating resources: {0} Allocator(#[from] vm_allocator::Error), /// IO error: {0} Io(#[from] std::io::Error), /// FamStruct error: {0} FamStruct(#[from] vmm_sys_util::fam::Error), /// KVM error: {0} Kvm(#[from] kvm_ioctls::Error), /// Invalid vector index: {0} InvalidVectorIndex(usize), } /// Configuration data for an MSI-X interrupt. #[derive(Copy, Clone, Debug, Default)] pub struct MsixVectorConfig { /// High address to delivery message signaled interrupt. pub high_addr: u32, /// Low address to delivery message signaled interrupt. pub low_addr: u32, /// Data to write to delivery message signaled interrupt. pub data: u32, /// Unique ID of the device to delivery message signaled interrupt. pub devid: u32, } /// Type that describes an allocated interrupt #[derive(Debug)] pub struct MsixVector { /// GSI used for this vector pub gsi: u32, /// EventFd used for this vector pub event_fd: EventFd, /// Flag determining whether the vector is enabled pub enabled: AtomicBool, } impl MsixVector { /// Create a new [`MsixVector`] of a particular type pub fn new(gsi: u32, enabled: bool) -> Result { Ok(MsixVector { gsi, event_fd: EventFd::new(libc::EFD_NONBLOCK)?, enabled: AtomicBool::new(enabled), }) } } impl MsixVector { /// Enable vector pub fn enable(&self, vmfd: &VmFd) -> Result<(), InterruptError> { if !self.enabled.load(Ordering::Acquire) { vmfd.register_irqfd(&self.event_fd, self.gsi)?; self.enabled.store(true, Ordering::Release); } Ok(()) } /// Disable vector pub fn disable(&self, vmfd: &VmFd) -> Result<(), InterruptError> { if self.enabled.load(Ordering::Acquire) { vmfd.unregister_irqfd(&self.event_fd, self.gsi)?; self.enabled.store(false, Ordering::Release); } Ok(()) } } #[derive(Debug)] /// MSI interrupts created for a VirtIO device pub struct MsixVectorGroup { /// Reference to the Vm object, which we'll need for interacting with the underlying KVM Vm /// file descriptor pub vm: Arc, /// A list of all the MSI-X vectors pub vectors: Vec, } impl MsixVectorGroup { /// Returns the number of vectors in this group pub fn num_vectors(&self) -> u16 { // It is safe to unwrap here. We are creating `MsixVectorGroup` objects through the // `Vm::create_msix_group` where the argument for the number of `vectors` is a `u16`. u16::try_from(self.vectors.len()).unwrap() } /// Enable the MSI-X vector group pub fn enable(&self) -> Result<(), InterruptError> { for route in &self.vectors { route.enable(&self.vm.common.fd)?; } Ok(()) } /// Disable the MSI-X vector group pub fn disable(&self) -> Result<(), InterruptError> { for route in &self.vectors { route.disable(&self.vm.common.fd)?; } Ok(()) } /// Trigger an interrupt for a vector in the group pub fn trigger(&self, index: usize) -> Result<(), InterruptError> { self.notifier(index) .ok_or(InterruptError::InvalidVectorIndex(index))? .write(1)?; METRICS.interrupts.triggers.inc(); Ok(()) } /// Get a referece to the underlying `EventFd` used to trigger interrupts for a vector in the /// group pub fn notifier(&self, index: usize) -> Option<&EventFd> { self.vectors.get(index).map(|route| &route.event_fd) } /// Update the MSI-X configuration for a vector in the group pub fn update( &self, index: usize, msi_config: MsixVectorConfig, masked: bool, set_gsi: bool, ) -> Result<(), InterruptError> { if let Some(vector) = self.vectors.get(index) { METRICS.interrupts.config_updates.inc(); // When an interrupt is masked the GSI will not be passed to KVM through // KVM_SET_GSI_ROUTING. So, call [`disable()`] to unregister the interrupt file // descriptor before passing the interrupt routes to KVM if masked { vector.disable(&self.vm.common.fd)?; } self.vm.register_msi(vector, masked, msi_config)?; if set_gsi { self.vm .set_gsi_routes() .map_err(|err| std::io::Error::other(format!("MSI-X update: {err}")))? } // Assign KVM_IRQFD after KVM_SET_GSI_ROUTING to avoid // panic on kernel which does not have commit a80ced6ea514 // (KVM: SVM: fix panic on out-of-bounds guest IRQ). if !masked { vector.enable(&self.vm.common.fd)?; } return Ok(()); } Err(InterruptError::InvalidVectorIndex(index)) } } impl<'a> Persist<'a> for MsixVectorGroup { type State = Vec; type ConstructorArgs = Arc; type Error = InterruptError; fn save(&self) -> Self::State { // We don't save the "enabled" state of the MSI interrupt. PCI devices store the MSI-X // configuration and make sure that the vector is enabled during the restore path if it was // initially enabled self.vectors.iter().map(|route| route.gsi).collect() } fn restore( constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { let mut vectors = Vec::with_capacity(state.len()); for gsi in state { vectors.push(MsixVector::new(*gsi, false)?); } Ok(MsixVectorGroup { vm: constructor_args, vectors, }) } } ================================================ FILE: src/vmm/src/vstate/kvm.rs ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use kvm_bindings::KVM_API_VERSION; use kvm_ioctls::Kvm as KvmFd; use serde::{Deserialize, Serialize}; pub use crate::arch::{Kvm, KvmArchError}; use crate::cpu_config::templates::KvmCapability; /// Errors associated with the wrappers over KVM ioctls. /// Needs `rustfmt::skip` to make multiline comments work #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum KvmError { /// The host kernel reports an invalid KVM API version: {0} ApiVersion(i32), /// Missing KVM capabilities: {0:#x?} Capabilities(u32), /** Error creating KVM object: {0} Make sure the user launching the firecracker process is \ configured on the /dev/kvm file's ACL. */ Kvm(kvm_ioctls::Error), /// Architecture specific error: {0} ArchError(#[from] KvmArchError) } impl Kvm { /// Create `Kvm` struct. pub fn new(kvm_cap_modifiers: Vec) -> Result { let kvm_fd = KvmFd::new().map_err(KvmError::Kvm)?; // Check that KVM has the correct version. // Safe to cast because this is a constant. #[allow(clippy::cast_possible_wrap)] if kvm_fd.get_api_version() != KVM_API_VERSION as i32 { return Err(KvmError::ApiVersion(kvm_fd.get_api_version())); } let total_caps = Self::combine_capabilities(&kvm_cap_modifiers); // Check that all desired capabilities are supported. Self::check_capabilities(&kvm_fd, &total_caps).map_err(KvmError::Capabilities)?; Ok(Kvm::init_arch(kvm_fd, kvm_cap_modifiers)?) } fn combine_capabilities(kvm_cap_modifiers: &[KvmCapability]) -> Vec { let mut total_caps = Self::DEFAULT_CAPABILITIES.to_vec(); for modifier in kvm_cap_modifiers.iter() { match modifier { KvmCapability::Add(cap) => { if !total_caps.contains(cap) { total_caps.push(*cap); } } KvmCapability::Remove(cap) => { if let Some(pos) = total_caps.iter().position(|c| c == cap) { total_caps.swap_remove(pos); } } } } total_caps } fn check_capabilities(kvm_fd: &KvmFd, capabilities: &[u32]) -> Result<(), u32> { for cap in capabilities { // If capability is not supported kernel will return 0. if kvm_fd.check_extension_raw(u64::from(*cap)) == 0 { return Err(*cap); } } Ok(()) } /// Saves and returns the Kvm state. pub fn save_state(&self) -> KvmState { KvmState { kvm_cap_modifiers: self.kvm_cap_modifiers.clone(), } } /// Returns the maximal number of memslots allowed in a [`Vm`] pub fn max_nr_memslots(&self) -> u32 { self.fd .get_nr_memslots() .try_into() .expect("Number of vcpus reported by KVM exceeds u32::MAX") } } /// Structure holding an general specific VM state. #[derive(Debug, Default, Serialize, Deserialize)] pub struct KvmState { /// Additional capabilities that were specified in cpu template. pub kvm_cap_modifiers: Vec, } #[cfg(test)] pub(crate) mod tests { use super::*; #[test] fn test_combine_capabilities() { // Default caps for x86_64 and aarch64 both have KVM_CAP_IOEVENTFD and don't have // KVM_CAP_IOMMU caps. let additional_capabilities = vec![ KvmCapability::Add(kvm_bindings::KVM_CAP_IOMMU), KvmCapability::Remove(kvm_bindings::KVM_CAP_IOEVENTFD), ]; let combined_caps = Kvm::combine_capabilities(&additional_capabilities); assert!(combined_caps.contains(&kvm_bindings::KVM_CAP_IOMMU)); assert!(!combined_caps.contains(&kvm_bindings::KVM_CAP_IOEVENTFD)); } } ================================================ FILE: src/vmm/src/vstate/memory.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::fs::File; use std::io::SeekFrom; use std::ops::Deref; use std::sync::{Arc, Mutex}; use bitvec::vec::BitVec; use kvm_bindings::{KVM_MEM_LOG_DIRTY_PAGES, kvm_userspace_memory_region}; use log::error; use serde::{Deserialize, Serialize}; pub use vm_memory::bitmap::{AtomicBitmap, BS, Bitmap, BitmapSlice}; pub use vm_memory::mmap::MmapRegionBuilder; use vm_memory::mmap::{MmapRegionError, NewBitmap}; pub use vm_memory::{ Address, ByteValued, Bytes, FileOffset, GuestAddress, GuestMemory, GuestMemoryRegion, GuestUsize, MemoryRegionAddress, MmapRegion, address, }; use vm_memory::{GuestMemoryError, GuestMemoryRegionBytes, VolatileSlice, WriteVolatile}; use vmm_sys_util::errno; use crate::utils::{get_page_size, u64_to_usize}; use crate::vmm_config::machine_config::HugePageConfig; use crate::vstate::vm::VmError; use crate::{DirtyBitmap, Vm}; /// Type of GuestRegionMmap. pub type GuestRegionMmap = vm_memory::GuestRegionMmap>; /// Type of GuestMemoryMmap. pub type GuestMemoryMmap = vm_memory::GuestRegionCollection; /// Type of GuestMmapRegion. pub type GuestMmapRegion = vm_memory::MmapRegion>; /// Errors associated with dumping guest memory to file. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum MemoryError { /// Cannot fetch system's page size: {0} PageSize(errno::Error), /// Cannot dump memory: {0} WriteMemory(GuestMemoryError), /// Cannot create mmap region: {0} MmapRegionError(MmapRegionError), /// Cannot create guest memory VmMemoryError, /// Cannot create memfd: {0} Memfd(memfd::Error), /// Cannot resize memfd file: {0} MemfdSetLen(std::io::Error), /// Total sum of memory regions exceeds largest possible file offset OffsetTooLarge, /// Cannot retrieve snapshot file metadata: {0} FileMetadata(std::io::Error), /// Memory region is not aligned Unaligned, /// Error protecting memory slot: {0} Mprotect(std::io::Error), /// Size too large for i64 conversion SlotSizeTooLarge, /// Dirty bitmap not found for memory slot {0} DirtyBitmapNotFound(u32), /// Dirty bitmap is larger than the slot size DirtyBitmapTooLarge, /// Dirty bitmap is smaller than the slot size DirtyBitmapTooSmall, /// Seek error: {0} SeekError(std::io::Error), /// Volatile memory error: {0} VolatileMemoryError(vm_memory::VolatileMemoryError), } impl From for MemoryError { fn from(e: vm_memory::VolatileMemoryError) -> Self { MemoryError::VolatileMemoryError(e) } } /// Type of the guest region #[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub enum GuestRegionType { /// Guest DRAM Dram, /// Hotpluggable memory Hotpluggable, } /// An extension to GuestMemoryRegion that can be split into multiple KVM slots of /// the same slot_size, and stores the type of region, and the starting KVM slot number. #[derive(Debug)] pub struct GuestRegionMmapExt { /// the wrapped GuestRegionMmap pub inner: GuestRegionMmap, /// the type of region pub region_type: GuestRegionType, /// the starting KVM slot number assigned to this region pub slot_from: u32, /// the size of the slots of this region pub slot_size: usize, /// a bitvec indicating whether slot `i` is plugged into KVM (1) or not (0) pub plugged: Mutex, } /// A guest memory slot, which is a slice of a guest memory region #[derive(Debug)] pub struct GuestMemorySlot<'a> { /// KVM memory slot number pub(crate) slot: u32, /// Start guest address of the slot pub(crate) guest_addr: GuestAddress, /// Corresponding slice in host memory pub(crate) slice: VolatileSlice<'a, BS<'a, Option>>, } impl From<&GuestMemorySlot<'_>> for kvm_userspace_memory_region { fn from(mem_slot: &GuestMemorySlot) -> Self { let flags = if mem_slot.slice.bitmap().is_some() { KVM_MEM_LOG_DIRTY_PAGES } else { 0 }; kvm_userspace_memory_region { flags, slot: mem_slot.slot, guest_phys_addr: mem_slot.guest_addr.raw_value(), memory_size: mem_slot.slice.len() as u64, userspace_addr: mem_slot.slice.ptr_guard().as_ptr() as u64, } } } impl<'a> GuestMemorySlot<'a> { /// Dumps the dirty pages in this slot onto the writer pub(crate) fn dump_dirty( &self, writer: &mut T, kvm_bitmap: &[u64], page_size: usize, ) -> Result<(), MemoryError> { let firecracker_bitmap = self.slice.bitmap(); let mut write_size = 0; let mut skip_size = 0; let mut dirty_batch_start = 0; let expected_bitmap_array_len = (self.slice.len() / page_size).div_ceil(64); if kvm_bitmap.len() > expected_bitmap_array_len { return Err(MemoryError::DirtyBitmapTooLarge); } else if kvm_bitmap.len() < expected_bitmap_array_len { return Err(MemoryError::DirtyBitmapTooSmall); } for (i, v) in kvm_bitmap.iter().enumerate() { for j in 0..64 { let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64; let page_offset = ((i * 64) + j) * page_size; let is_firecracker_page_dirty = firecracker_bitmap.dirty_at(page_offset); // We process 64 pages at a time, however the number of pages // in the slot might not be a multiple of 64. We need to break // once we go past the last page that is actually part of the // region. if page_offset >= self.slice.len() { // Ensure there are no more dirty bits after this point if (v >> j) != 0 { return Err(MemoryError::DirtyBitmapTooLarge); } break; } if is_kvm_page_dirty || is_firecracker_page_dirty { // We are at the start of a new batch of dirty pages. if skip_size > 0 { // Seek forward over the unmodified pages. let offset = skip_size .try_into() .map_err(|_| MemoryError::SlotSizeTooLarge)?; writer .seek(SeekFrom::Current(offset)) .map_err(MemoryError::SeekError)?; dirty_batch_start = page_offset; skip_size = 0; } write_size += page_size; } else { // We are at the end of a batch of dirty pages. if write_size > 0 { // Dump the dirty pages. let slice = &self.slice.subslice(dirty_batch_start, write_size)?; writer.write_all_volatile(slice)?; write_size = 0; } skip_size += page_size; } } } if write_size > 0 { writer.write_all_volatile(&self.slice.subslice(dirty_batch_start, write_size)?)?; } // Advance the cursor even if the trailing pages are clean, so that the // next slot starts writing at the correct offset. if skip_size > 0 { writer .seek(SeekFrom::Current(skip_size.try_into().unwrap())) .map_err(MemoryError::SeekError)?; } Ok(()) } /// Makes the slot host memory PROT_NONE (true) or PROT_READ|PROT_WRITE (false) pub(crate) fn protect(&self, protected: bool) -> Result<(), MemoryError> { let prot = if protected { libc::PROT_NONE } else { libc::PROT_READ | libc::PROT_WRITE }; // SAFETY: Parameters refer to an existing host memory region let ret = unsafe { libc::mprotect( self.slice.ptr_guard_mut().as_ptr().cast(), self.slice.len(), prot, ) }; if ret != 0 { Err(MemoryError::Mprotect(std::io::Error::last_os_error())) } else { Ok(()) } } } fn addr_in_range(addr: GuestAddress, start: GuestAddress, len: usize) -> bool { if let Some(end) = start.checked_add(len as u64) { addr >= start && addr < end } else { false } } impl GuestRegionMmapExt { /// Adds a DRAM region which only contains a single plugged slot pub(crate) fn dram_from_mmap_region(region: GuestRegionMmap, slot: u32) -> Self { let slot_size = u64_to_usize(region.len()); GuestRegionMmapExt { inner: region, region_type: GuestRegionType::Dram, slot_from: slot, slot_size, plugged: Mutex::new(BitVec::repeat(true, 1)), } } /// Adds an hotpluggable region which can contain multiple slots and is initially unplugged pub(crate) fn hotpluggable_from_mmap_region( region: GuestRegionMmap, slot_from: u32, slot_size: usize, ) -> Self { let slot_cnt = (u64_to_usize(region.len())) / slot_size; GuestRegionMmapExt { inner: region, region_type: GuestRegionType::Hotpluggable, slot_from, slot_size, plugged: Mutex::new(BitVec::repeat(false, slot_cnt)), } } pub(crate) fn from_state( region: GuestRegionMmap, state: &GuestMemoryRegionState, slot_from: u32, ) -> Result { let slot_cnt = state.plugged.len(); let slot_size = u64_to_usize(region.len()) .checked_div(slot_cnt) .ok_or(MemoryError::Unaligned)?; Ok(GuestRegionMmapExt { inner: region, slot_size, region_type: state.region_type, slot_from, plugged: Mutex::new(BitVec::from_iter(state.plugged.iter())), }) } pub(crate) fn slot_cnt(&self) -> u32 { u32::try_from(u64_to_usize(self.len()) / self.slot_size).unwrap() } pub(crate) fn mem_slot(&self, slot: u32) -> GuestMemorySlot<'_> { assert!(slot >= self.slot_from && slot < self.slot_from + self.slot_cnt()); let offset = ((slot - self.slot_from) as u64) * (self.slot_size as u64); GuestMemorySlot { slot, guest_addr: self.start_addr().unchecked_add(offset), slice: self .inner .get_slice(MemoryRegionAddress(offset), self.slot_size) .expect("slot range should be valid"), } } /// Returns a snapshot of the slots and their state at the time of calling /// /// Note: to avoid TOCTOU races use only within VMM thread. pub(crate) fn slots(&self) -> impl Iterator, bool)> { self.plugged .lock() .unwrap() .iter() .enumerate() .map(|(i, b)| { ( self.mem_slot(self.slot_from + u32::try_from(i).unwrap()), *b, ) }) .collect::>() .into_iter() } /// Returns a snapshot of the plugged slots at the time of calling /// /// Note: to avoid TOCTOU races use only within VMM thread. pub(crate) fn plugged_slots(&self) -> impl Iterator> { self.slots() .filter(|(_, plugged)| *plugged) .map(|(slot, _)| slot) } pub(crate) fn slots_intersecting_range( &self, from: GuestAddress, len: usize, ) -> impl Iterator> { self.slots().map(|(slot, _)| slot).filter(move |slot| { if let Some(slot_end) = slot.guest_addr.checked_add(slot.slice.len() as u64) { addr_in_range(slot.guest_addr, from, len) || addr_in_range(slot_end, from, len) } else { false } }) } /// (un)plug a slot from an Hotpluggable memory region pub(crate) fn update_slot( &self, vm: &Vm, mem_slot: &GuestMemorySlot<'_>, plug: bool, ) -> Result<(), VmError> { // This function can only be called on hotpluggable regions! assert!(self.region_type == GuestRegionType::Hotpluggable); let mut bitmap_guard = self.plugged.lock().unwrap(); let prev = bitmap_guard.replace((mem_slot.slot - self.slot_from) as usize, plug); // do not do anything if the state is what we're trying to set if prev == plug { return Ok(()); } let mut kvm_region = kvm_userspace_memory_region::from(mem_slot); if plug { // make it accessible _before_ adding it to KVM mem_slot.protect(false)?; vm.set_user_memory_region(kvm_region)?; } else { // to remove it we need to pass a size of zero kvm_region.memory_size = 0; vm.set_user_memory_region(kvm_region)?; // make it protected _after_ removing it from KVM mem_slot.protect(true)?; } Ok(()) } pub(crate) fn discard_range( &self, caddr: MemoryRegionAddress, len: usize, ) -> Result<(), GuestMemoryError> { let phys_address = self.get_host_address(caddr)?; match (self.inner.file_offset(), self.inner.flags()) { // If and only if we are resuming from a snapshot file, we have a file and it's mapped // private (Some(_), flags) if flags & libc::MAP_PRIVATE != 0 => { // Mmap a new anonymous region over the present one in order to create a hole // with zero pages. // This workaround is (only) needed after resuming from a snapshot file because the // guest memory is mmaped from file as private. In this case, MADV_DONTNEED on the // file only drops any anonymous pages in range, but subsequent accesses would read // whatever page is stored on the backing file. Mmapping anonymous pages ensures // it's zeroed. // SAFETY: The address and length are known to be valid. let ret = unsafe { libc::mmap( phys_address.cast(), len, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_FIXED | libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, -1, 0, ) }; if ret == libc::MAP_FAILED { let os_error = std::io::Error::last_os_error(); error!("discard_range: mmap failed: {:?}", os_error); Err(GuestMemoryError::IOError(os_error)) } else { Ok(()) } } // Match either the case of an anonymous mapping, or the case // of a shared file mapping. // TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd // (or in general MAP_SHARED of a fd). In those cases we should use // fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE). // We keep falling to the madvise branch to keep the previous behaviour. _ => { // Madvise the region in order to mark it as not used. // SAFETY: The address and length are known to be valid. let ret = unsafe { libc::madvise(phys_address.cast(), len, libc::MADV_DONTNEED) }; if ret < 0 { let os_error = std::io::Error::last_os_error(); error!("discard_range: madvise failed: {:?}", os_error); Err(GuestMemoryError::IOError(os_error)) } else { Ok(()) } } } } } impl Deref for GuestRegionMmapExt { type Target = MmapRegion>; fn deref(&self) -> &MmapRegion> { &self.inner } } impl GuestMemoryRegionBytes for GuestRegionMmapExt {} #[allow(clippy::cast_possible_wrap)] #[allow(clippy::cast_possible_truncation)] impl GuestMemoryRegion for GuestRegionMmapExt { type B = Option; fn len(&self) -> GuestUsize { self.inner.len() } fn start_addr(&self) -> GuestAddress { self.inner.start_addr() } fn bitmap(&self) -> BS<'_, Self::B> { self.inner.bitmap() } fn get_host_address( &self, addr: MemoryRegionAddress, ) -> vm_memory::guest_memory::Result<*mut u8> { self.inner.get_host_address(addr) } fn file_offset(&self) -> Option<&FileOffset> { self.inner.file_offset() } fn get_slice( &self, offset: MemoryRegionAddress, count: usize, ) -> vm_memory::guest_memory::Result>> { self.inner.get_slice(offset, count) } } /// Creates a `Vec` of `GuestRegionMmap` with the given configuration pub fn create( regions: impl Iterator, mmap_flags: libc::c_int, file: Option, track_dirty_pages: bool, ) -> Result, MemoryError> { let mut offset = 0; let file = file.map(Arc::new); regions .map(|(start, size)| { let mut builder = MmapRegionBuilder::new_with_bitmap( size, track_dirty_pages.then(|| AtomicBitmap::with_len(size)), ) .with_mmap_prot(libc::PROT_READ | libc::PROT_WRITE) .with_mmap_flags(libc::MAP_NORESERVE | mmap_flags); if let Some(ref file) = file { let file_offset = FileOffset::from_arc(Arc::clone(file), offset); builder = builder.with_file_offset(file_offset); } offset = match offset.checked_add(size as u64) { None => return Err(MemoryError::OffsetTooLarge), Some(new_off) if new_off >= i64::MAX as u64 => { return Err(MemoryError::OffsetTooLarge); } Some(new_off) => new_off, }; GuestRegionMmap::new( builder.build().map_err(MemoryError::MmapRegionError)?, start, ) .ok_or(MemoryError::VmMemoryError) }) .collect::, _>>() } /// Creates a GuestMemoryMmap with `size` in MiB backed by a memfd. pub fn memfd_backed( regions: &[(GuestAddress, usize)], track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result, MemoryError> { let size = regions.iter().map(|&(_, size)| size as u64).sum(); let memfd_file = create_memfd(size, huge_pages.into())?.into_file(); create( regions.iter().copied(), libc::MAP_SHARED | huge_pages.mmap_flags(), Some(memfd_file), track_dirty_pages, ) } /// Creates a GuestMemoryMmap from raw regions. pub fn anonymous( regions: impl Iterator, track_dirty_pages: bool, huge_pages: HugePageConfig, ) -> Result, MemoryError> { create( regions, libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | huge_pages.mmap_flags(), None, track_dirty_pages, ) } /// Creates a GuestMemoryMmap given a `file` containing the data /// and a `state` containing mapping information. pub fn snapshot_file( file: File, regions: impl Iterator, track_dirty_pages: bool, ) -> Result, MemoryError> { let regions: Vec<_> = regions.collect(); let memory_size = regions .iter() .try_fold(0u64, |acc, (_, size)| acc.checked_add(*size as u64)) .ok_or(MemoryError::OffsetTooLarge)?; let file_size = file.metadata().map_err(MemoryError::FileMetadata)?.len(); // ensure we do not mmap beyond EOF. The kernel would allow that but a SIGBUS is triggered // on an attempted access to a page of the buffer that lies beyond the end of the mapped file. if memory_size > file_size { return Err(MemoryError::OffsetTooLarge); } create( regions.into_iter(), libc::MAP_PRIVATE, Some(file), track_dirty_pages, ) } /// Defines the interface for snapshotting memory. pub trait GuestMemoryExtension where Self: Sized, { /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState; /// Mark memory range as dirty fn mark_dirty(&self, addr: GuestAddress, len: usize); /// Dumps all contents of GuestMemoryMmap to a writer. fn dump(&self, writer: &mut T) -> Result<(), MemoryError>; /// Dumps all pages of GuestMemoryMmap present in `dirty_bitmap` to a writer. fn dump_dirty( &self, writer: &mut T, dirty_bitmap: &DirtyBitmap, ) -> Result<(), MemoryError>; /// Resets all the memory region bitmaps fn reset_dirty(&self); /// Store the dirty bitmap in internal store fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize); /// Apply a function to each region in a memory range fn try_for_each_region_in_range( &self, addr: GuestAddress, range_len: usize, f: F, ) -> Result<(), GuestMemoryError> where F: FnMut(&GuestRegionMmapExt, MemoryRegionAddress, usize) -> Result<(), GuestMemoryError>; /// Discards a memory range, freeing up memory pages fn discard_range(&self, addr: GuestAddress, range_len: usize) -> Result<(), GuestMemoryError>; } /// State of a guest memory region saved to file/buffer. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct GuestMemoryRegionState { // This should have been named `base_guest_addr` since it's _guest_ addr, but for // backward compatibility we have to keep this name. At least this comment should help. /// Base GuestAddress. pub base_address: u64, /// Region size. pub size: usize, /// Region type pub region_type: GuestRegionType, /// Plugged/unplugged status of each slot pub plugged: Vec, } /// Describes guest memory regions and their snapshot file mappings. #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct GuestMemoryState { /// List of regions. pub regions: Vec, } impl GuestMemoryState { /// Turns this [`GuestMemoryState`] into a description of guest memory regions as understood /// by the creation functions of [`GuestMemoryExtensions`] pub fn regions(&self) -> impl Iterator + '_ { self.regions .iter() .map(|region| (GuestAddress(region.base_address), region.size)) } } impl GuestMemoryExtension for GuestMemoryMmap { /// Describes GuestMemoryMmap through a GuestMemoryState struct. fn describe(&self) -> GuestMemoryState { let mut guest_memory_state = GuestMemoryState::default(); self.iter().for_each(|region| { guest_memory_state.regions.push(GuestMemoryRegionState { base_address: region.start_addr().0, size: u64_to_usize(region.len()), region_type: region.region_type, plugged: region.plugged.lock().unwrap().iter().by_vals().collect(), }); }); guest_memory_state } /// Mark memory range as dirty fn mark_dirty(&self, addr: GuestAddress, len: usize) { // ignore invalid ranges using .flatten() for slice in self.get_slices(addr, len).flatten() { slice.bitmap().mark_dirty(0, slice.len()); } } /// Dumps all contents of GuestMemoryMmap to a writer. fn dump(&self, writer: &mut T) -> Result<(), MemoryError> { self.iter() .flat_map(|region| region.slots()) .try_for_each(|(mem_slot, plugged)| { if !plugged { let ilen = i64::try_from(mem_slot.slice.len()).unwrap(); writer.seek(SeekFrom::Current(ilen)).unwrap(); } else { writer.write_all_volatile(&mem_slot.slice)?; } Ok(()) }) .map_err(MemoryError::WriteMemory) } /// Dumps all pages of GuestMemoryMmap present in `dirty_bitmap` to a writer. fn dump_dirty( &self, writer: &mut T, dirty_bitmap: &DirtyBitmap, ) -> Result<(), MemoryError> { let page_size = get_page_size().map_err(MemoryError::PageSize)?; let write_result = self.iter() .flat_map(|region| region.slots()) .try_for_each(|(mem_slot, plugged)| { if !plugged { let ilen = i64::try_from(mem_slot.slice.len()) .map_err(|_| MemoryError::SlotSizeTooLarge)?; writer .seek(SeekFrom::Current(ilen)) .map_err(MemoryError::SeekError)?; } else { let kvm_bitmap = dirty_bitmap .get(&mem_slot.slot) .ok_or(MemoryError::DirtyBitmapNotFound(mem_slot.slot))?; mem_slot.dump_dirty(writer, kvm_bitmap, page_size)?; } Ok(()) }); if write_result.is_err() { self.store_dirty_bitmap(dirty_bitmap, page_size); } else { self.reset_dirty(); } write_result } /// Resets all the memory region bitmaps fn reset_dirty(&self) { self.iter().for_each(|region| { if let Some(bitmap) = (**region).bitmap() { bitmap.reset(); } }) } /// Stores the dirty bitmap inside into the internal bitmap fn store_dirty_bitmap(&self, dirty_bitmap: &DirtyBitmap, page_size: usize) { self.iter() .flat_map(|region| region.plugged_slots()) .for_each(|mem_slot| { let kvm_bitmap = dirty_bitmap.get(&mem_slot.slot).unwrap(); let firecracker_bitmap = mem_slot.slice.bitmap(); for (i, v) in kvm_bitmap.iter().enumerate() { for j in 0..64 { let is_kvm_page_dirty = ((v >> j) & 1u64) != 0u64; if is_kvm_page_dirty { let page_offset = ((i * 64) + j) * page_size; firecracker_bitmap.mark_dirty(page_offset, 1) } } } }); } fn try_for_each_region_in_range( &self, addr: GuestAddress, range_len: usize, mut f: F, ) -> Result<(), GuestMemoryError> where F: FnMut(&GuestRegionMmapExt, MemoryRegionAddress, usize) -> Result<(), GuestMemoryError>, { let mut cur = addr; let mut remaining = range_len; // iterate over all adjacent consecutive regions in range while let Some(region) = self.find_region(cur) { let start = region.to_region_addr(cur).unwrap(); let len = std::cmp::min( // remaining bytes inside the region u64_to_usize(region.len() - start.raw_value()), // remaning bytes to discard remaining, ); f(region, start, len)?; remaining -= len; if remaining == 0 { return Ok(()); } cur = cur .checked_add(len as u64) .ok_or(GuestMemoryError::GuestAddressOverflow)?; } // if we exit the loop because we didn't find a region, return an error Err(GuestMemoryError::InvalidGuestAddress(cur)) } fn discard_range(&self, addr: GuestAddress, range_len: usize) -> Result<(), GuestMemoryError> { self.try_for_each_region_in_range(addr, range_len, |region, start, len| { region.discard_range(start, len) }) } } fn create_memfd( mem_size: u64, hugetlb_size: Option, ) -> Result { // Create a memfd. let opts = memfd::MemfdOptions::default() .hugetlb(hugetlb_size) .allow_sealing(true); let mem_file = opts.create("guest_mem").map_err(MemoryError::Memfd)?; // Resize to guest mem size. mem_file .as_file() .set_len(mem_size) .map_err(MemoryError::MemfdSetLen)?; // Add seals to prevent further resizing. let mut seals = memfd::SealsHashSet::new(); seals.insert(memfd::FileSeal::SealShrink); seals.insert(memfd::FileSeal::SealGrow); mem_file.add_seals(&seals).map_err(MemoryError::Memfd)?; // Prevent further sealing changes. mem_file .add_seal(memfd::FileSeal::SealSeal) .map_err(MemoryError::Memfd)?; Ok(mem_file) } /// Test utilities pub mod test_utils { use super::*; /// Converts a vec of GuestRegionMmap into a GuestMemoryMmap using GuestRegionMmapExt pub fn into_region_ext(regions: Vec) -> GuestMemoryMmap { GuestMemoryMmap::from_regions( regions .into_iter() .zip(0u32..) // assign dummy slots .map(|(region, slot)| GuestRegionMmapExt::dram_from_mmap_region(region, slot)) .collect(), ) .unwrap() } } #[cfg(test)] mod tests { #![allow(clippy::undocumented_unsafe_blocks)] use std::collections::HashMap; use std::io::{Read, Seek, Write}; use std::os::unix::fs::MetadataExt; use vmm_sys_util::tempfile::TempFile; use super::*; use crate::snapshot::Snapshot; use crate::test_utils::single_region_mem; use crate::utils::{get_page_size, mib_to_bytes}; use crate::vstate::memory::test_utils::into_region_ext; #[test] fn test_anonymous() { for dirty_page_tracking in [true, false] { let region_size = 0x10000; let regions = vec![ (GuestAddress(0x0), region_size), (GuestAddress(0x10000), region_size), (GuestAddress(0x20000), region_size), (GuestAddress(0x30000), region_size), ]; let guest_memory = anonymous( regions.into_iter(), dirty_page_tracking, HugePageConfig::None, ) .unwrap(); guest_memory.iter().for_each(|region| { assert_eq!(region.bitmap().is_some(), dirty_page_tracking); }); } } #[test] fn test_snapshot_file_success() { for dirty_page_tracking in [true, false] { let page_size = 0x1000; let mut file = TempFile::new().unwrap().into_file(); file.set_len(page_size as u64).unwrap(); file.write_all(&vec![0x42u8; page_size]).unwrap(); let regions = vec![(GuestAddress(0), page_size)]; let guest_regions = snapshot_file(file, regions.into_iter(), dirty_page_tracking).unwrap(); assert_eq!(guest_regions.len(), 1); guest_regions.iter().for_each(|region| { assert_eq!(region.bitmap().is_some(), dirty_page_tracking); }); } } #[test] fn test_snapshot_file_multiple_regions() { let page_size = 0x1000; let total_size = 3 * page_size; let mut file = TempFile::new().unwrap().into_file(); file.set_len(total_size as u64).unwrap(); file.write_all(&vec![0x42u8; total_size]).unwrap(); let regions = vec![ (GuestAddress(0), page_size), (GuestAddress(0x10000), page_size), (GuestAddress(0x20000), page_size), ]; let guest_regions = snapshot_file(file, regions.into_iter(), false).unwrap(); assert_eq!(guest_regions.len(), 3); } #[test] fn test_snapshot_file_offset_too_large() { let page_size = 0x1000; let mut file = TempFile::new().unwrap().into_file(); file.set_len(page_size as u64).unwrap(); file.write_all(&vec![0x42u8; page_size]).unwrap(); let regions = vec![(GuestAddress(0), 2 * page_size)]; let result = snapshot_file(file, regions.into_iter(), false); assert!(matches!(result.unwrap_err(), MemoryError::OffsetTooLarge)); } #[test] fn test_mark_dirty() { let page_size = get_page_size().unwrap(); let region_size = page_size * 3; let regions = vec![ (GuestAddress(0), region_size), // pages 0-2 (GuestAddress(region_size as u64), region_size), // pages 3-5 (GuestAddress(region_size as u64 * 2), region_size), // pages 6-8 ]; let guest_memory = into_region_ext(anonymous(regions.into_iter(), true, HugePageConfig::None).unwrap()); let dirty_map = [ // page 0: not dirty (0, page_size, false), // pages 1-2: dirty range in one region (page_size, page_size * 2, true), // page 3: not dirty (page_size * 3, page_size, false), // pages 4-7: dirty range across 2 regions, (page_size * 4, page_size * 4, true), // page 8: not dirty (page_size * 8, page_size, false), ]; // Mark dirty memory for (addr, len, dirty) in &dirty_map { if *dirty { guest_memory.mark_dirty(GuestAddress(*addr as u64), *len); } } // Check that the dirty memory was set correctly for (addr, len, dirty) in &dirty_map { for slice in guest_memory .get_slices(GuestAddress(*addr as u64), *len) .flatten() { for i in 0..slice.len() { assert_eq!(slice.bitmap().dirty_at(i), *dirty); } } } } fn check_serde(guest_memory: &M) { let original_state = guest_memory.describe(); // Test direct bitcode serialization let serialized_data = bitcode::serialize(&original_state).unwrap(); let restored_state: GuestMemoryState = bitcode::deserialize(&serialized_data).unwrap(); assert_eq!(original_state, restored_state); // Test with Snapshot wrapper let snapshot_data = bitcode::serialize(&Snapshot::new(original_state.clone())).unwrap(); let restored_snapshot = Snapshot::load_without_crc_check(&snapshot_data).unwrap(); assert_eq!(original_state, restored_snapshot.data); } #[test] fn test_serde() { let page_size = get_page_size().unwrap(); let region_size = page_size * 3; // Test with a single region let guest_memory = into_region_ext( anonymous( [(GuestAddress(0), region_size)].into_iter(), false, HugePageConfig::None, ) .unwrap(), ); check_serde(&guest_memory); // Test with some regions let regions = vec![ (GuestAddress(0), region_size), // pages 0-2 (GuestAddress(region_size as u64), region_size), // pages 3-5 (GuestAddress(region_size as u64 * 2), region_size), // pages 6-8 ]; let guest_memory = into_region_ext(anonymous(regions.into_iter(), true, HugePageConfig::None).unwrap()); check_serde(&guest_memory); } #[test] fn test_describe() { let page_size: usize = get_page_size().unwrap(); // Two regions of one page each, with a one page gap between them. let mem_regions = [ (GuestAddress(0), page_size), (GuestAddress(page_size as u64 * 2), page_size), ]; let guest_memory = into_region_ext( anonymous(mem_regions.into_iter(), true, HugePageConfig::None).unwrap(), ); let expected_memory_state = GuestMemoryState { regions: vec![ GuestMemoryRegionState { base_address: 0, size: page_size, region_type: GuestRegionType::Dram, plugged: vec![true], }, GuestMemoryRegionState { base_address: page_size as u64 * 2, size: page_size, region_type: GuestRegionType::Dram, plugged: vec![true], }, ], }; let actual_memory_state = guest_memory.describe(); assert_eq!(expected_memory_state, actual_memory_state); // Two regions of three pages each, with a one page gap between them. let mem_regions = [ (GuestAddress(0), page_size * 3), (GuestAddress(page_size as u64 * 4), page_size * 3), ]; let guest_memory = into_region_ext( anonymous(mem_regions.into_iter(), true, HugePageConfig::None).unwrap(), ); let expected_memory_state = GuestMemoryState { regions: vec![ GuestMemoryRegionState { base_address: 0, size: page_size * 3, region_type: GuestRegionType::Dram, plugged: vec![true], }, GuestMemoryRegionState { base_address: page_size as u64 * 4, size: page_size * 3, region_type: GuestRegionType::Dram, plugged: vec![true], }, ], }; let actual_memory_state = guest_memory.describe(); assert_eq!(expected_memory_state, actual_memory_state); } #[test] fn test_dump() { let page_size = get_page_size().unwrap(); // Two regions of two pages each, with a one page gap between them. let region_1_address = GuestAddress(0); let region_2_address = GuestAddress(page_size as u64 * 3); let region_size = page_size * 2; let mem_regions = [ (region_1_address, region_size), (region_2_address, region_size), ]; let guest_memory = into_region_ext( anonymous(mem_regions.into_iter(), true, HugePageConfig::None).unwrap(), ); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { assert!(!r.bitmap().dirty_at(0)); assert!(!r.bitmap().dirty_at(1)); }); // Fill the first region with 1s and the second with 2s. let first_region = vec![1u8; region_size]; guest_memory.write(&first_region, region_1_address).unwrap(); let second_region = vec![2u8; region_size]; guest_memory .write(&second_region, region_2_address) .unwrap(); let memory_state = guest_memory.describe(); // dump the full memory. let mut memory_file = TempFile::new().unwrap().into_file(); guest_memory.dump(&mut memory_file).unwrap(); let restored_guest_memory = into_region_ext(snapshot_file(memory_file, memory_state.regions(), false).unwrap()); // Check that the region contents are the same. let mut restored_region = vec![0u8; page_size * 2]; restored_guest_memory .read(restored_region.as_mut_slice(), region_1_address) .unwrap(); assert_eq!(first_region, restored_region); restored_guest_memory .read(restored_region.as_mut_slice(), region_2_address) .unwrap(); assert_eq!(second_region, restored_region); } #[test] fn test_dump_dirty() { let page_size = get_page_size().unwrap(); // Two regions of two pages each, with a one page gap between them. let region_1_address = GuestAddress(0); let region_2_address = GuestAddress(page_size as u64 * 3); let region_size = page_size * 2; let mem_regions = [ (region_1_address, region_size), (region_2_address, region_size), ]; let guest_memory = into_region_ext( anonymous(mem_regions.into_iter(), true, HugePageConfig::None).unwrap(), ); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { assert!(!r.bitmap().dirty_at(0)); assert!(!r.bitmap().dirty_at(1)); }); // Fill the first region with 1s and the second with 2s. let first_region = vec![1u8; region_size]; guest_memory.write(&first_region, region_1_address).unwrap(); let second_region = vec![2u8; region_size]; guest_memory .write(&second_region, region_2_address) .unwrap(); // Firecracker Dirty Bitmap after the writes: // First region pages: [dirty, dirty] // Second region pages: [dirty, dirty] let memory_state = guest_memory.describe(); // KVM dirty bitmap: // First region pages: [dirty, clean] // Second region pages: [clean, dirty] let mut kvm_dirty_bitmap: DirtyBitmap = HashMap::new(); kvm_dirty_bitmap.insert(0, vec![0b01]); kvm_dirty_bitmap.insert(1, vec![0b10]); let mut file = TempFile::new().unwrap().into_file(); guest_memory .dump_dirty(&mut file, &kvm_dirty_bitmap) .unwrap(); // We can restore from this because this is the first dirty dump. let restored_guest_memory = into_region_ext(snapshot_file(file, memory_state.regions(), false).unwrap()); // Check that the region contents are the same. let mut restored_region = vec![0u8; region_size]; restored_guest_memory .read(restored_region.as_mut_slice(), region_1_address) .unwrap(); assert_eq!(first_region, restored_region); restored_guest_memory .read(restored_region.as_mut_slice(), region_2_address) .unwrap(); assert_eq!(second_region, restored_region); // Dirty the memory and dump again let file = TempFile::new().unwrap(); let mut reader = file.into_file(); let zeros = vec![0u8; page_size]; let ones = vec![1u8; page_size]; let twos = vec![2u8; page_size]; // Firecracker Dirty Bitmap: // First region pages: [clean, dirty] // Second region pages: [clean, clean] guest_memory .write(&twos, GuestAddress(page_size as u64)) .unwrap(); // KVM dirty bitmap: // First region pages: [dirty, clean] // Second region pages: [clean, dirty] kvm_dirty_bitmap.insert(0, vec![0b01]); kvm_dirty_bitmap.insert(1, vec![0b10]); guest_memory .dump_dirty(&mut reader, &kvm_dirty_bitmap) .unwrap(); // Check that only the dirty regions are dumped. let mut diff_file_content = Vec::new(); let expected_file_contents = [ ones.as_slice(), twos.as_slice(), zeros.as_slice(), twos.as_slice(), ] .concat(); reader.seek(SeekFrom::Start(0)).unwrap(); reader.read_to_end(&mut diff_file_content).unwrap(); assert_eq!(expected_file_contents, diff_file_content); // Take a 3rd snapshot // Firecracker Dirty Bitmap: // First region pages: [dirty, clean] // Second region pages: [dirty, clean] guest_memory.write(&twos, region_1_address).unwrap(); guest_memory.write(&ones, region_2_address).unwrap(); // KVM dirty bitmap: // First region pages: [clean, clean] // Second region pages: [clean, clean] kvm_dirty_bitmap.insert(0, vec![0b00]); kvm_dirty_bitmap.insert(1, vec![0b00]); let file = TempFile::new().unwrap(); let logical_size = page_size as u64 * 4; file.as_file().set_len(logical_size).unwrap(); let mut reader = file.into_file(); guest_memory .dump_dirty(&mut reader, &kvm_dirty_bitmap) .unwrap(); // Check that only the dirty regions are dumped. let mut diff_file_content = Vec::new(); // The resulting file is a sparse file with holes. let expected_file_contents = [ twos.as_slice(), zeros.as_slice(), // hole ones.as_slice(), zeros.as_slice(), // hole ] .concat(); reader.seek(SeekFrom::Start(0)).unwrap(); reader.read_to_end(&mut diff_file_content).unwrap(); assert_eq!(expected_file_contents, diff_file_content); // Make sure that only 2 of the pages are written in the file and the // other two are holes. let metadata = reader.metadata().unwrap(); let physical_size = metadata.blocks() * 512; assert_eq!(physical_size, 2 * page_size as u64); assert_ne!(physical_size, logical_size); // Test with bitmaps that are too large or too small kvm_dirty_bitmap.insert(0, vec![0b1, 0b01]); kvm_dirty_bitmap.insert(1, vec![0b10]); assert!(matches!( guest_memory.dump_dirty(&mut reader, &kvm_dirty_bitmap), Err(MemoryError::DirtyBitmapTooLarge) )); kvm_dirty_bitmap.insert(0, vec![0b01]); kvm_dirty_bitmap.insert(1, vec![0b110]); assert!(matches!( guest_memory.dump_dirty(&mut reader, &kvm_dirty_bitmap), Err(MemoryError::DirtyBitmapTooLarge) )); kvm_dirty_bitmap.insert(0, vec![]); kvm_dirty_bitmap.insert(1, vec![0b10]); assert!(matches!( guest_memory.dump_dirty(&mut reader, &kvm_dirty_bitmap), Err(MemoryError::DirtyBitmapTooSmall) )); } #[test] fn test_store_dirty_bitmap() { let page_size = get_page_size().unwrap(); // Two regions of three pages each, with a one page gap between them. let region_1_address = GuestAddress(0); let region_2_address = GuestAddress(page_size as u64 * 4); let region_size = page_size * 3; let mem_regions = [ (region_1_address, region_size), (region_2_address, region_size), ]; let guest_memory = into_region_ext( anonymous(mem_regions.into_iter(), true, HugePageConfig::None).unwrap(), ); // Check that Firecracker bitmap is clean. guest_memory.iter().for_each(|r| { assert!(!r.bitmap().dirty_at(0)); assert!(!r.bitmap().dirty_at(page_size)); assert!(!r.bitmap().dirty_at(page_size * 2)); }); let mut dirty_bitmap: DirtyBitmap = HashMap::new(); dirty_bitmap.insert(0, vec![0b101]); dirty_bitmap.insert(1, vec![0b101]); guest_memory.store_dirty_bitmap(&dirty_bitmap, page_size); // Assert that the bitmap now reports as being dirty maching the dirty bitmap guest_memory.iter().for_each(|r| { assert!(r.bitmap().dirty_at(0)); assert!(!r.bitmap().dirty_at(page_size)); assert!(r.bitmap().dirty_at(page_size * 2)); }); } #[test] fn test_create_memfd() { let size_bytes = mib_to_bytes(1) as u64; let memfd = create_memfd(size_bytes, None).unwrap(); assert_eq!(memfd.as_file().metadata().unwrap().len(), size_bytes); memfd.as_file().set_len(0x69).unwrap_err(); let mut seals = memfd::SealsHashSet::new(); seals.insert(memfd::FileSeal::SealGrow); memfd.add_seals(&seals).unwrap_err(); } /// This asserts that $lhs matches $rhs. macro_rules! assert_match { ($lhs:expr, $rhs:pat) => {{ assert!(matches!($lhs, $rhs)) }}; } #[test] fn test_discard_range() { let page_size: usize = 0x1000; let mem = single_region_mem(2 * page_size); // Fill the memory with ones. let ones = vec![1u8; 2 * page_size]; mem.write(&ones[..], GuestAddress(0)).unwrap(); // Remove the first page. mem.discard_range(GuestAddress(0), page_size).unwrap(); // Check that the first page is zeroed. let mut actual_page = vec![0u8; page_size]; mem.read(actual_page.as_mut_slice(), GuestAddress(0)) .unwrap(); assert_eq!(vec![0u8; page_size], actual_page); // Check that the second page still contains ones. mem.read(actual_page.as_mut_slice(), GuestAddress(page_size as u64)) .unwrap(); assert_eq!(vec![1u8; page_size], actual_page); // Malformed range: the len is too big. assert_match!( mem.discard_range(GuestAddress(0), 0x10000).unwrap_err(), GuestMemoryError::InvalidGuestAddress(_) ); // Region not mapped. assert_match!( mem.discard_range(GuestAddress(0x10000), 0x10).unwrap_err(), GuestMemoryError::InvalidGuestAddress(_) ); // Madvise fail: the guest address is not aligned to the page size. assert_match!( mem.discard_range(GuestAddress(0x20), page_size) .unwrap_err(), GuestMemoryError::IOError(_) ); } #[test] fn test_discard_range_on_file() { let page_size: usize = 0x1000; let mut memory_file = TempFile::new().unwrap().into_file(); memory_file.set_len(2 * page_size as u64).unwrap(); memory_file.write_all(&vec![2u8; 2 * page_size]).unwrap(); let mem = into_region_ext( snapshot_file( memory_file, std::iter::once((GuestAddress(0), 2 * page_size)), false, ) .unwrap(), ); // Fill the memory with ones. let ones = vec![1u8; 2 * page_size]; mem.write(&ones[..], GuestAddress(0)).unwrap(); // Remove the first page. mem.discard_range(GuestAddress(0), page_size).unwrap(); // Check that the first page is zeroed. let mut actual_page = vec![0u8; page_size]; mem.read(actual_page.as_mut_slice(), GuestAddress(0)) .unwrap(); assert_eq!(vec![0u8; page_size], actual_page); // Check that the second page still contains ones. mem.read(actual_page.as_mut_slice(), GuestAddress(page_size as u64)) .unwrap(); assert_eq!(vec![1u8; page_size], actual_page); // Malformed range: the len is too big. assert_match!( mem.discard_range(GuestAddress(0), 0x10000).unwrap_err(), GuestMemoryError::InvalidGuestAddress(_) ); // Region not mapped. assert_match!( mem.discard_range(GuestAddress(0x10000), 0x10).unwrap_err(), GuestMemoryError::InvalidGuestAddress(_) ); // Mmap fail: the guest address is not aligned to the page size. assert_match!( mem.discard_range(GuestAddress(0x20), page_size) .unwrap_err(), GuestMemoryError::IOError(_) ); } #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)] mod prop_tests { use std::io::{Read, Seek, SeekFrom}; use std::os::unix::fs::MetadataExt; use proptest::prelude::*; use vmm_sys_util::tempfile::TempFile; use super::*; /// Naive dump_dirty over a full GuestMemoryMmap: iterates all /// regions/slots, seeks over unplugged slots, and for plugged slots /// writes dirty pages one at a time. Returns the number of dirty pages written. fn dump_dirty_oracle( mem: &GuestMemoryMmap, writer: &mut File, dirty_bitmap: &DirtyBitmap, ) -> usize { let page_size = get_page_size().map_err(MemoryError::PageSize).unwrap(); let mut dirty_count = 0; for (slot, plugged) in mem.iter().flat_map(|r| r.slots()) { if !plugged { writer .seek(SeekFrom::Current(slot.slice.len() as i64)) .unwrap(); continue; } let kvm_bitmap = dirty_bitmap.get(&slot.slot).unwrap(); let fc_bitmap = slot.slice.bitmap(); let num_pages = slot.slice.len() / page_size; for page_index in 0..num_pages { let page_offset = page_index * page_size; let is_kvm_dirty = ((kvm_bitmap[page_index / 64] >> (page_index % 64)) & 1) != 0; let is_fc_dirty = fc_bitmap.dirty_at(page_offset); if is_kvm_dirty || is_fc_dirty { let slice = &slot.slice.subslice(page_offset, page_size).unwrap(); writer.write_all_volatile(slice).unwrap(); dirty_count += 1; } else { writer.seek(SeekFrom::Current(page_size as i64)).unwrap(); } } } dirty_count } /// Generate a KVM dirty bitmap for a slot of `num_pages` pages. fn kvm_bitmap_for(num_pages: usize) -> impl Strategy> { let num_u64s = num_pages.div_ceil(64); let last_chunk_valid_bits = num_pages % 64; proptest::collection::vec(any::(), num_u64s).prop_map(move |mut bm| { if last_chunk_valid_bits > 0 { let last = bm.len() - 1; bm[last] &= (1u64 << last_chunk_valid_bits) - 1; } bm }) } /// A region descriptor produced by the strategy. #[derive(Debug, Clone)] struct RegionSpec { /// gap (in pages) from the previous region. gap_pages: usize, /// type of the region region_type: GuestRegionType, /// size (in pages) of the KVM slots in the region pages_per_slot: usize, /// array indicating whether each slot is plugged or not plugged: Vec, /// mock KVM dirty bitmaps /// There is one per slot and each bit of the u64 is a single page kvm_bitmaps: Vec>, /// pages to be accessed by Firecracker during the test. /// One bitmap per slot, where each bool is one page fc_dirty_pages: Vec>, } /// Strategy for a single region: Dram (1 plugged slot) or /// Hotpluggable (1-4 slots, each independently plugged/unplugged). fn region_spec() -> impl Strategy { prop_oneof![ // Dram: 1 slot, always plugged (0usize..=8, 1usize..=128).prop_flat_map(|(gap_pages, pages_per_slot)| { ( kvm_bitmap_for(pages_per_slot), proptest::collection::vec(any::(), pages_per_slot), ) .prop_map(move |(bm, fc)| RegionSpec { gap_pages, region_type: GuestRegionType::Dram, pages_per_slot, plugged: vec![true], kvm_bitmaps: vec![bm], fc_dirty_pages: vec![fc], }) }), // Hotpluggable: 1-4 slots, each plugged or not (0usize..=8, 1usize..=128, 1usize..=4).prop_flat_map( |(gap_pages, pages_per_slot, num_slots)| { ( proptest::collection::vec(any::(), num_slots), proptest::collection::vec(kvm_bitmap_for(pages_per_slot), num_slots), proptest::collection::vec( proptest::collection::vec(any::(), pages_per_slot), num_slots, ), ) .prop_map( move |(plugged, kvm_bitmaps, fc_dirty_pages)| RegionSpec { gap_pages, region_type: GuestRegionType::Hotpluggable, pages_per_slot, plugged, kvm_bitmaps, fc_dirty_pages, }, ) }, ), ] } /// Build a GuestMemoryMmap and KVM dirty bitmap from region specs. fn build_memory(specs: &[RegionSpec]) -> (GuestMemoryMmap, DirtyBitmap, usize) { let page_size = get_page_size().unwrap(); let mut slot_from = 0u32; let mut regions = Vec::new(); let mut kvm_bitmap: DirtyBitmap = HashMap::new(); let mut total_size = 0usize; let mut next_addr = 0u64; for spec in specs { next_addr += (spec.gap_pages * page_size) as u64; let num_slots = spec.plugged.len(); let region_size = num_slots * spec.pages_per_slot * page_size; let mmap_regions = anonymous( [(GuestAddress(next_addr), region_size)].into_iter(), true, HugePageConfig::None, ) .unwrap(); let state = GuestMemoryRegionState { base_address: next_addr, size: region_size, region_type: spec.region_type, plugged: spec.plugged.clone(), }; let region = GuestRegionMmapExt::from_state( mmap_regions.into_iter().next().unwrap(), &state, slot_from, ) .unwrap(); for (i, bm) in spec.kvm_bitmaps.iter().enumerate() { kvm_bitmap.insert(slot_from + i as u32, bm.clone()); } regions.push(region); slot_from += num_slots as u32; total_size += region_size; next_addr += region_size as u64; } ( GuestMemoryMmap::from_regions(regions).unwrap(), kvm_bitmap, total_size, ) } proptest! { #![proptest_config(ProptestConfig::with_cases(4096))] #[test] fn dump_dirty_correctness( region_specs in proptest::collection::vec(region_spec(), 1..=3), ) { let page_size = get_page_size().unwrap(); let (guest_memory, kvm_bitmap, total_size) = build_memory(®ion_specs); // Fill backing memory with non-zero data via raw pointer so // that KVM-only-dirty pages carry distinguishable content // without triggering the firecracker bitmap. for region in guest_memory.iter() { let ptr = region .get_host_address(MemoryRegionAddress(0)) .unwrap(); // SAFETY: ptr is valid for region.len() bytes. unsafe { std::ptr::write_bytes(ptr, 0xAB, u64_to_usize(region.len())) }; } // Dirty selected pages in the firecracker bitmap. for (region, spec) in guest_memory.iter().zip(region_specs.iter()) { for (slot_idx, (slot, plugged)) in region.slots().enumerate() { if !plugged { continue; } for (page, dirty) in spec.fc_dirty_pages[slot_idx].iter().enumerate() { if *dirty { let addr = slot.guest_addr.0 + (page * page_size) as u64; guest_memory.write(&[0xCD], GuestAddress(addr)).unwrap(); } } } } // Run oracle first — dump_dirty calls reset_dirty() on // success, which would clear the firecracker bitmap before // the oracle implementation gets to read it. let mut oracle_file = TempFile::new().unwrap().into_file(); oracle_file.set_len(total_size as u64).unwrap(); let dirty_count = dump_dirty_oracle(&guest_memory, &mut oracle_file, &kvm_bitmap); let expected_blocks = (dirty_count * page_size) as u64 / 512; let oracle_pos = oracle_file.stream_position().unwrap(); // sanity check the oracle implementation prop_assert_eq!(oracle_pos, total_size as u64); prop_assert_eq!(oracle_file.metadata().unwrap().blocks(), expected_blocks); // Run the optimized implementation. let mut opt_file = TempFile::new().unwrap().into_file(); opt_file.set_len(total_size as u64).unwrap(); guest_memory .dump_dirty(&mut opt_file, &kvm_bitmap) .unwrap(); let opt_pos = opt_file.stream_position().unwrap(); // check the writer actually moved the cursor to the end and wrote all dirty blocks prop_assert_eq!(opt_pos, total_size as u64); prop_assert_eq!(opt_file.metadata().unwrap().blocks(), expected_blocks); // Read back and compare file contents. opt_file.seek(SeekFrom::Start(0)).unwrap(); oracle_file.seek(SeekFrom::Start(0)).unwrap(); let mut opt_buf = vec![0u8; total_size]; let mut oracle_buf = vec![0u8; total_size]; opt_file.read_exact(&mut opt_buf).unwrap(); oracle_file.read_exact(&mut oracle_buf).unwrap(); prop_assert_eq!(&opt_buf, &oracle_buf); } #[test] fn store_dirty_bitmap_correctness( region_specs in proptest::collection::vec(region_spec(), 1..=3), ) { let page_size = get_page_size().unwrap(); let (guest_memory, kvm_bitmap, _) = build_memory(®ion_specs); guest_memory.store_dirty_bitmap(&kvm_bitmap, page_size); // Verify: every KVM-dirty page on a plugged slot is now // dirty in the firecracker bitmap. for (region, spec) in guest_memory.iter().zip(region_specs.iter()) { for (slot_idx, (slot, plugged)) in region.slots().enumerate() { if !plugged { continue; } let num_pages = slot.slice.len() / page_size; let bm = &spec.kvm_bitmaps[slot_idx]; let fc = slot.slice.bitmap(); for page in 0..num_pages { let kvm_dirty = ((bm[page / 64] >> (page % 64)) & 1) == 1; let fc_dirty = fc.dirty_at(page * page_size); // Bitmap starts clean, so after store_dirty_bitmap // the fc bitmap must exactly match the KVM bitmap. prop_assert_eq!(fc_dirty, kvm_dirty, "mismatch at page {}", page); } } } } } } } ================================================ FILE: src/vmm/src/vstate/mod.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 /// Module with the implementation of a Bus that can hold devices. pub mod bus; /// VM interrupts implementation. pub mod interrupts; /// Module with Kvm implementation. pub mod kvm; /// Module with GuestMemory implementation. pub mod memory; /// Resource manager for devices. pub mod resources; /// Module with Vcpu implementation. pub mod vcpu; /// Module with Vm implementation. pub mod vm; ================================================ FILE: src/vmm/src/vstate/resources.rs ================================================ // Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 use std::convert::Infallible; use serde::{Deserialize, Serialize}; pub use vm_allocator::AllocPolicy; use vm_allocator::{AddressAllocator, IdAllocator}; use crate::arch; use crate::snapshot::Persist; /// Helper function to allocate many ids from an id allocator fn allocate_many_ids( id_allocator: &mut IdAllocator, count: u32, ) -> Result, vm_allocator::Error> { let mut ids = Vec::with_capacity(count as usize); for _ in 0..count { match id_allocator.allocate_id() { Ok(id) => ids.push(id), Err(err) => { // It is ok to unwrap here, we just allocated the GSI ids.into_iter().for_each(|id| { id_allocator.free_id(id).unwrap(); }); return Err(err); } } } Ok(ids) } /// A resource manager for (de)allocating interrupt lines (GSIs) and guest memory /// /// At the moment, we support: /// /// * GSIs for legacy x86_64 devices /// * GSIs for MMIO devicecs /// * Memory allocations in the MMIO address space #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ResourceAllocator { /// Allocator for legacy device interrupt lines pub gsi_legacy_allocator: IdAllocator, /// Allocator for PCI device GSIs pub gsi_msi_allocator: IdAllocator, /// Allocator for memory in the 32-bit MMIO address space pub mmio32_memory: AddressAllocator, /// Allocator for memory in the 64-bit MMIO address space pub mmio64_memory: AddressAllocator, /// Allocator for memory after the 64-bit MMIO address space pub past_mmio64_memory: AddressAllocator, /// Memory allocator for system data pub system_memory: AddressAllocator, } impl Default for ResourceAllocator { fn default() -> Self { ResourceAllocator::new() } } impl ResourceAllocator { /// Create a new resource allocator for Firecracker devices pub fn new() -> Self { // It is fine for us to unwrap the following since we know we are passing valid ranges for // all allocators Self { gsi_legacy_allocator: IdAllocator::new(arch::GSI_LEGACY_START, arch::GSI_LEGACY_END) .unwrap(), gsi_msi_allocator: IdAllocator::new(arch::GSI_MSI_START, arch::GSI_MSI_END).unwrap(), mmio32_memory: AddressAllocator::new( arch::MEM_32BIT_DEVICES_START, arch::MEM_32BIT_DEVICES_SIZE, ) .unwrap(), mmio64_memory: AddressAllocator::new( arch::MEM_64BIT_DEVICES_START, arch::MEM_64BIT_DEVICES_SIZE, ) .unwrap(), past_mmio64_memory: AddressAllocator::new( arch::FIRST_ADDR_PAST_64BITS_MMIO, arch::PAST_64BITS_MMIO_SIZE, ) .unwrap(), system_memory: AddressAllocator::new(arch::SYSTEM_MEM_START, arch::SYSTEM_MEM_SIZE) .unwrap(), } } /// Allocate a number of legacy GSIs /// /// # Arguments /// /// * `gsi_count` - The number of legacy GSIs to allocate pub fn allocate_gsi_legacy(&mut self, gsi_count: u32) -> Result, vm_allocator::Error> { allocate_many_ids(&mut self.gsi_legacy_allocator, gsi_count) } /// Allocate a number of GSIs for MSI /// /// # Arguments /// /// * `gsi_count` - The number of GSIs to allocate pub fn allocate_gsi_msi(&mut self, gsi_count: u32) -> Result, vm_allocator::Error> { allocate_many_ids(&mut self.gsi_msi_allocator, gsi_count) } /// Allocate a memory range in 32-bit MMIO address space /// /// If it succeeds, it returns the first address of the allocated range /// /// # Arguments /// /// * `size` - The size in bytes of the memory to allocate /// * `alignment` - The alignment of the address of the first byte /// * `policy` - A [`vm_allocator::AllocPolicy`] variant for determining the allocation policy pub fn allocate_32bit_mmio_memory( &mut self, size: u64, alignment: u64, policy: AllocPolicy, ) -> Result { Ok(self .mmio32_memory .allocate(size, alignment, policy)? .start()) } /// Allocate a memory range in 64-bit MMIO address space /// /// If it succeeds, it returns the first address of the allocated range /// /// # Arguments /// /// * `size` - The size in bytes of the memory to allocate /// * `alignment` - The alignment of the address of the first byte /// * `policy` - A [`vm_allocator::AllocPolicy`] variant for determining the allocation policy pub fn allocate_64bit_mmio_memory( &mut self, size: u64, alignment: u64, policy: AllocPolicy, ) -> Result { Ok(self .mmio64_memory .allocate(size, alignment, policy)? .start()) } /// Allocate a memory range for system data /// /// If it succeeds, it returns the first address of the allocated range /// /// # Arguments /// /// * `size` - The size in bytes of the memory to allocate /// * `alignment` - The alignment of the address of the first byte /// * `policy` - A [`vm_allocator::AllocPolicy`] variant for determining the allocation policy pub fn allocate_system_memory( &mut self, size: u64, alignment: u64, policy: AllocPolicy, ) -> Result { Ok(self .system_memory .allocate(size, alignment, policy)? .start()) } } impl<'a> Persist<'a> for ResourceAllocator { type State = ResourceAllocator; type ConstructorArgs = (); type Error = Infallible; fn save(&self) -> Self::State { self.clone() } fn restore( _constructor_args: Self::ConstructorArgs, state: &Self::State, ) -> Result { Ok(state.clone()) } } #[cfg(test)] mod tests { use vm_allocator::AllocPolicy; use super::ResourceAllocator; use crate::arch::{self, GSI_LEGACY_NUM, GSI_LEGACY_START, GSI_MSI_NUM, GSI_MSI_START}; use crate::snapshot::Persist; #[test] fn test_allocate_irq() { let mut allocator = ResourceAllocator::new(); // asking for 0 IRQs should return us an empty vector assert_eq!(allocator.allocate_gsi_legacy(0), Ok(vec![])); // We cannot allocate more GSIs than available assert_eq!( allocator.allocate_gsi_legacy(GSI_LEGACY_NUM + 1), Err(vm_allocator::Error::ResourceNotAvailable) ); // But allocating all of them at once should work assert_eq!( allocator.allocate_gsi_legacy(GSI_LEGACY_NUM), Ok((arch::GSI_LEGACY_START..=arch::GSI_LEGACY_END).collect::>()) ); // And now we ran out of GSIs assert_eq!( allocator.allocate_gsi_legacy(1), Err(vm_allocator::Error::ResourceNotAvailable) ); // But we should be able to ask for 0 GSIs assert_eq!(allocator.allocate_gsi_legacy(0), Ok(vec![])); let mut allocator = ResourceAllocator::new(); // We should be able to allocate 1 GSI assert_eq!( allocator.allocate_gsi_legacy(1), Ok(vec![arch::GSI_LEGACY_START]) ); // We can't allocate MAX_IRQS any more assert_eq!( allocator.allocate_gsi_legacy(GSI_LEGACY_NUM), Err(vm_allocator::Error::ResourceNotAvailable) ); // We can allocate another one and it should be the second available assert_eq!( allocator.allocate_gsi_legacy(1), Ok(vec![arch::GSI_LEGACY_START + 1]) ); // Let's allocate the rest in a loop for i in arch::GSI_LEGACY_START + 2..=arch::GSI_LEGACY_END { assert_eq!(allocator.allocate_gsi_legacy(1), Ok(vec![i])); } } #[test] fn test_allocate_gsi() { let mut allocator = ResourceAllocator::new(); // asking for 0 IRQs should return us an empty vector assert_eq!(allocator.allocate_gsi_msi(0), Ok(vec![])); // We cannot allocate more GSIs than available assert_eq!( allocator.allocate_gsi_msi(GSI_MSI_NUM + 1), Err(vm_allocator::Error::ResourceNotAvailable) ); // But allocating all of them at once should work assert_eq!( allocator.allocate_gsi_msi(GSI_MSI_NUM), Ok((arch::GSI_MSI_START..=arch::GSI_MSI_END).collect::>()) ); // And now we ran out of GSIs assert_eq!( allocator.allocate_gsi_msi(1), Err(vm_allocator::Error::ResourceNotAvailable) ); // But we should be able to ask for 0 GSIs assert_eq!(allocator.allocate_gsi_msi(0), Ok(vec![])); let mut allocator = ResourceAllocator::new(); // We should be able to allocate 1 GSI assert_eq!(allocator.allocate_gsi_msi(1), Ok(vec![arch::GSI_MSI_START])); // We can't allocate MAX_IRQS any more assert_eq!( allocator.allocate_gsi_msi(GSI_MSI_NUM), Err(vm_allocator::Error::ResourceNotAvailable) ); // We can allocate another one and it should be the second available assert_eq!( allocator.allocate_gsi_msi(1), Ok(vec![arch::GSI_MSI_START + 1]) ); // Let's allocate the rest in a loop for i in arch::GSI_MSI_START + 2..=arch::GSI_MSI_END { assert_eq!(allocator.allocate_gsi_msi(1), Ok(vec![i])); } } fn clone_allocator(allocator: &ResourceAllocator) -> ResourceAllocator { let state = allocator.save(); let serialized_data = bitcode::serialize(&state).unwrap(); let restored_state: ResourceAllocator = bitcode::deserialize(&serialized_data).unwrap(); ResourceAllocator::restore((), &restored_state).unwrap() } #[test] fn test_save_restore() { let mut allocator0 = ResourceAllocator::new(); let irq_0 = allocator0.allocate_gsi_legacy(1).unwrap()[0]; assert_eq!(irq_0, GSI_LEGACY_START); let gsi_0 = allocator0.allocate_gsi_msi(1).unwrap()[0]; assert_eq!(gsi_0, GSI_MSI_START); let mut allocator1 = clone_allocator(&allocator0); let irq_1 = allocator1.allocate_gsi_legacy(1).unwrap()[0]; assert_eq!(irq_1, GSI_LEGACY_START + 1); let gsi_1 = allocator1.allocate_gsi_msi(1).unwrap()[0]; assert_eq!(gsi_1, GSI_MSI_START + 1); let mmio32_mem = allocator1 .allocate_32bit_mmio_memory(0x42, 1, AllocPolicy::FirstMatch) .unwrap(); assert_eq!(mmio32_mem, arch::MEM_32BIT_DEVICES_START); let mmio64_mem = allocator1 .allocate_64bit_mmio_memory(0x42, 1, AllocPolicy::FirstMatch) .unwrap(); assert_eq!(mmio64_mem, arch::MEM_64BIT_DEVICES_START); let system_mem = allocator1 .allocate_system_memory(0x42, 1, AllocPolicy::FirstMatch) .unwrap(); assert_eq!(system_mem, arch::SYSTEM_MEM_START); let mut allocator2 = clone_allocator(&allocator1); allocator2 .allocate_32bit_mmio_memory(0x42, 1, AllocPolicy::ExactMatch(mmio32_mem)) .unwrap_err(); allocator2 .allocate_64bit_mmio_memory(0x42, 1, AllocPolicy::ExactMatch(mmio64_mem)) .unwrap_err(); allocator2 .allocate_system_memory(0x42, 1, AllocPolicy::ExactMatch(system_mem)) .unwrap_err(); let irq_2 = allocator2.allocate_gsi_legacy(1).unwrap()[0]; assert_eq!(irq_2, GSI_LEGACY_START + 2); let gsi_2 = allocator2.allocate_gsi_msi(1).unwrap()[0]; assert_eq!(gsi_2, GSI_MSI_START + 2); let mmio32_mem = allocator1 .allocate_32bit_mmio_memory(0x42, 1, AllocPolicy::FirstMatch) .unwrap(); assert_eq!(mmio32_mem, arch::MEM_32BIT_DEVICES_START + 0x42); let mmio64_mem = allocator1 .allocate_64bit_mmio_memory(0x42, 1, AllocPolicy::FirstMatch) .unwrap(); assert_eq!(mmio64_mem, arch::MEM_64BIT_DEVICES_START + 0x42); let system_mem = allocator1 .allocate_system_memory(0x42, 1, AllocPolicy::FirstMatch) .unwrap(); assert_eq!(system_mem, arch::SYSTEM_MEM_START + 0x42); } } ================================================ FILE: src/vmm/src/vstate/vcpu.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::os::fd::AsRawFd; use std::sync::atomic::{Ordering, fence}; use std::sync::mpsc::{Receiver, Sender, TryRecvError, channel}; use std::sync::{Arc, Barrier}; use std::{fmt, io, thread}; use kvm_bindings::{KVM_SYSTEM_EVENT_RESET, KVM_SYSTEM_EVENT_SHUTDOWN}; use kvm_ioctls::{VcpuExit, VcpuFd}; use libc::{c_int, c_void, siginfo_t}; use log::{error, info, warn}; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; use crate::FcExitCode; pub use crate::arch::{KvmVcpu, KvmVcpuConfigureError, KvmVcpuError, Peripherals, VcpuState}; use crate::cpu_config::templates::{CpuConfiguration, GuestConfigError}; #[cfg(feature = "gdb")] use crate::gdb::target::{GdbTargetError, get_raw_tid}; use crate::logger::{IncMetric, METRICS}; use crate::seccomp::{BpfProgram, BpfProgramRef}; use crate::utils::signal::{Killable, register_signal_handler, sigrtmin}; use crate::utils::sm::StateMachine; use crate::vstate::bus::Bus; use crate::vstate::vm::Vm; /// Signal number (SIGRTMIN) used to kick Vcpus. pub const VCPU_RTSIG_OFFSET: i32 = 0; /// Errors associated with the wrappers over KVM ioctls. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VcpuError { /// Error creating vcpu config: {0} VcpuConfig(GuestConfigError), /// Received error signaling kvm exit: {0} FaultyKvmExit(String), /// Failed to signal vcpu: {0} SignalVcpu(vmm_sys_util::errno::Error), /// Unexpected kvm exit received: {0} UnhandledKvmExit(String), /// Failed to run action on vcpu: {0} VcpuResponse(KvmVcpuError), /// Cannot spawn a new vCPU thread: {0} VcpuSpawn(io::Error), /// Vcpu not present in TLS VcpuTlsNotPresent, /// Error with gdb request sent #[cfg(feature = "gdb")] GdbRequest(GdbTargetError), } /// Encapsulates configuration parameters for the guest vCPUS. #[derive(Debug)] pub struct VcpuConfig { /// Number of guest VCPUs. pub vcpu_count: u8, /// Enable simultaneous multithreading in the CPUID configuration. pub smt: bool, /// Configuration for vCPU pub cpu_config: CpuConfiguration, } /// Error type for [`Vcpu::start_threaded`]. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum StartThreadedError { /// Failed to spawn vCPU thread: {0} Spawn(std::io::Error), /// Failed to clone kvm Vcpu fd: {0} CopyFd(CopyKvmFdError), } /// Error type for [`Vcpu::copy_kvm_vcpu_fd`]. #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum CopyKvmFdError { /// Error with libc dup of kvm Vcpu fd DupError(#[from] std::io::Error), /// Error creating the Vcpu from the duplicated Vcpu fd CreateVcpuError(#[from] kvm_ioctls::Error), } /// A wrapper around creating and using a vcpu. #[derive(Debug)] pub struct Vcpu { /// Access to kvm-arch specific functionality. pub kvm_vcpu: KvmVcpu, /// File descriptor for vcpu to trigger exit event on vmm. exit_evt: EventFd, /// Debugger emitter for gdb events #[cfg(feature = "gdb")] gdb_event: Option>, /// The receiving end of events channel owned by the vcpu side. event_receiver: Receiver, /// The transmitting end of the events channel which will be given to the handler. event_sender: Option>, /// The receiving end of the responses channel which will be given to the handler. response_receiver: Option>, /// The transmitting end of the responses channel owned by the vcpu side. response_sender: Sender, } impl Vcpu { /// Registers a signal handler which kicks the vcpu running on the current thread, if there is /// one. fn register_kick_signal_handler(&mut self) { extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) { // We write to the immediate_exit from other thread, so make sure the read in the // KVM_RUN sees the up to date value fence(Ordering::Acquire); } register_signal_handler(sigrtmin() + VCPU_RTSIG_OFFSET, handle_signal) .expect("Failed to register vcpu signal handler"); } /// Constructs a new VCPU for `vm`. /// /// # Arguments /// /// * `index` - Represents the 0-based CPU index between [0, max vcpus). /// * `vm` - The vm to which this vcpu will get attached. /// * `exit_evt` - An `EventFd` that will be written into when this vcpu exits. pub fn new(index: u8, vm: &Vm, exit_evt: EventFd) -> Result { let (event_sender, event_receiver) = channel(); let (response_sender, response_receiver) = channel(); let kvm_vcpu = KvmVcpu::new(index, vm).unwrap(); Ok(Vcpu { exit_evt, event_receiver, event_sender: Some(event_sender), response_receiver: Some(response_receiver), response_sender, #[cfg(feature = "gdb")] gdb_event: None, kvm_vcpu, }) } /// Sets a MMIO bus for this vcpu. pub fn set_mmio_bus(&mut self, mmio_bus: Arc) { self.kvm_vcpu.peripherals.mmio_bus = Some(mmio_bus); } /// Attaches the fields required for debugging #[cfg(feature = "gdb")] pub fn attach_debug_info(&mut self, gdb_event: Sender) { self.gdb_event = Some(gdb_event); } /// Obtains a copy of the VcpuFd pub fn copy_kvm_vcpu_fd(&self, vm: &Vm) -> Result { // SAFETY: We own this fd so it is considered safe to clone let r = unsafe { libc::dup(self.kvm_vcpu.fd.as_raw_fd()) }; if r < 0 { return Err(std::io::Error::last_os_error().into()); } // SAFETY: We assert this is a valid fd by checking the result from the dup unsafe { Ok(vm.fd().create_vcpu_from_rawfd(r)?) } } /// Moves the vcpu to its own thread and constructs a VcpuHandle. /// The handle can be used to control the remote vcpu. pub fn start_threaded( mut self, vm: &Vm, seccomp_filter: Arc, barrier: Arc, ) -> Result { let event_sender = self.event_sender.take().expect("vCPU already started"); let response_receiver = self.response_receiver.take().unwrap(); let vcpu_fd = self .copy_kvm_vcpu_fd(vm) .map_err(StartThreadedError::CopyFd)?; let vcpu_thread = thread::Builder::new() .name(format!("fc_vcpu {}", self.kvm_vcpu.index)) .spawn(move || { let filter = &*seccomp_filter; self.register_kick_signal_handler(); // Synchronization to make sure thread local data is initialized. barrier.wait(); self.run(filter); }) .map_err(StartThreadedError::Spawn)?; Ok(VcpuHandle::new( event_sender, response_receiver, vcpu_fd, vcpu_thread, )) } /// Main loop of the vCPU thread. /// /// Runs the vCPU in KVM context in a loop. Handles KVM_EXITs then goes back in. /// Note that the state of the VCPU and associated VM must be setup first for this to do /// anything useful. pub fn run(&mut self, seccomp_filter: BpfProgramRef) { // Load seccomp filters for this vCPU thread. // Execution panics if filters cannot be loaded, use --no-seccomp if skipping filters // altogether is the desired behaviour. if let Err(err) = crate::seccomp::apply_filter(seccomp_filter) { panic!( "Failed to set the requested seccomp filters on vCPU {}: Error: {}", self.kvm_vcpu.index, err ); } // Start running the machine state in the `Paused` state. StateMachine::run(self, Self::paused); } // This is the main loop of the `Running` state. fn running(&mut self) -> StateMachine { // This loop is here just for optimizing the emulation path. // No point in ticking the state machine if there are no external events. loop { match self.run_emulation() { // Emulation ran successfully, continue. Ok(VcpuEmulation::Handled) => (), // Emulation was interrupted, check external events. Ok(VcpuEmulation::Interrupted) => break, // The guest requested a SHUTDOWN or RESET. This is ARM // specific. On x86 the i8042 emulation signals the main thread // directly without calling Vcpu::exit(). Ok(VcpuEmulation::Stopped) => return self.exit(FcExitCode::Ok), // If the emulation requests a pause lets do this #[cfg(feature = "gdb")] Ok(VcpuEmulation::Paused) => { #[cfg(target_arch = "x86_64")] self.kvm_vcpu.kvmclock_ctrl(); return StateMachine::next(Self::paused); } // Emulation errors lead to vCPU exit. Err(_) => return self.exit(FcExitCode::GenericError), } } // By default don't change state. let mut state = StateMachine::next(Self::running); // Break this emulation loop on any transition request/external event. match self.event_receiver.try_recv() { // Running ---- Pause ----> Paused Ok(VcpuEvent::Pause) => { // Nothing special to do. self.response_sender .send(VcpuResponse::Paused) .expect("vcpu channel unexpectedly closed"); #[cfg(target_arch = "x86_64")] self.kvm_vcpu.kvmclock_ctrl(); // Move to 'paused' state. state = StateMachine::next(Self::paused); } Ok(VcpuEvent::Resume) => { self.response_sender .send(VcpuResponse::Resumed) .expect("vcpu channel unexpectedly closed"); } // SaveState cannot be performed on a running Vcpu. Ok(VcpuEvent::SaveState) => { self.response_sender .send(VcpuResponse::NotAllowed(String::from( "save/restore unavailable while running", ))) .expect("vcpu channel unexpectedly closed"); } // DumpCpuConfig cannot be performed on a running Vcpu. Ok(VcpuEvent::DumpCpuConfig) => { self.response_sender .send(VcpuResponse::NotAllowed(String::from( "cpu config dump is unavailable while running", ))) .expect("vcpu channel unexpectedly closed"); } Ok(VcpuEvent::Finish) => return StateMachine::finish(), // Unhandled exit of the other end. Err(TryRecvError::Disconnected) => { // Move to 'exited' state. state = self.exit(FcExitCode::GenericError); } // All other events or lack thereof have no effect on current 'running' state. Err(TryRecvError::Empty) => (), } state } // This is the main loop of the `Paused` state. fn paused(&mut self) -> StateMachine { match self.event_receiver.recv() { // Paused ---- Resume ----> Running Ok(VcpuEvent::Resume) => { if self.kvm_vcpu.fd.get_kvm_run().immediate_exit == 1u8 { warn!( "Received a VcpuEvent::Resume message with immediate_exit enabled. \ immediate_exit was disabled before proceeding" ); self.kvm_vcpu.fd.set_kvm_immediate_exit(0); } self.response_sender .send(VcpuResponse::Resumed) .expect("vcpu channel unexpectedly closed"); // Move to 'running' state. StateMachine::next(Self::running) } Ok(VcpuEvent::Pause) => { self.response_sender .send(VcpuResponse::Paused) .expect("vcpu channel unexpectedly closed"); StateMachine::next(Self::paused) } Ok(VcpuEvent::SaveState) => { // Save vcpu state. self.kvm_vcpu .save_state() .map(|vcpu_state| { self.response_sender .send(VcpuResponse::SavedState(Box::new(vcpu_state))) .expect("vcpu channel unexpectedly closed"); }) .unwrap_or_else(|err| { self.response_sender .send(VcpuResponse::Error(VcpuError::VcpuResponse(err))) .expect("vcpu channel unexpectedly closed"); }); StateMachine::next(Self::paused) } Ok(VcpuEvent::DumpCpuConfig) => { self.kvm_vcpu .dump_cpu_config() .map(|cpu_config| { self.response_sender .send(VcpuResponse::DumpedCpuConfig(Box::new(cpu_config))) .expect("vcpu channel unexpectedly closed"); }) .unwrap_or_else(|err| { self.response_sender .send(VcpuResponse::Error(VcpuError::VcpuResponse(err))) .expect("vcpu channel unexpectedly closed"); }); StateMachine::next(Self::paused) } Ok(VcpuEvent::Finish) => StateMachine::finish(), // Unhandled exit of the other end. Err(_) => { // Move to 'exited' state. self.exit(FcExitCode::GenericError) } } } // Transition to the exited state and finish on command. // Note that this function isn't called when the guest asks for a CPU // reset via the i8042 controller on x86. fn exit(&mut self, exit_code: FcExitCode) -> StateMachine { if let Err(err) = self.exit_evt.write(1) { METRICS.vcpu.failures.inc(); error!("Failed signaling vcpu exit event: {}", err); } // From this state we only accept going to finished. loop { self.response_sender .send(VcpuResponse::Exited(exit_code)) .expect("vcpu channel unexpectedly closed"); // Wait for and only accept 'VcpuEvent::Finish'. if let Ok(VcpuEvent::Finish) = self.event_receiver.recv() { break; } } StateMachine::finish() } /// Runs the vCPU in KVM context and handles the kvm exit reason. /// /// Returns error or enum specifying whether emulation was handled or interrupted. pub fn run_emulation(&mut self) -> Result { if self.kvm_vcpu.fd.get_kvm_run().immediate_exit == 1u8 { warn!("Requested a vCPU run with immediate_exit enabled. The operation was skipped"); self.kvm_vcpu.fd.set_kvm_immediate_exit(0); return Ok(VcpuEmulation::Interrupted); } match self.kvm_vcpu.fd.run() { Err(ref err) if err.errno() == libc::EINTR => { self.kvm_vcpu.fd.set_kvm_immediate_exit(0); // Notify that this KVM_RUN was interrupted. Ok(VcpuEmulation::Interrupted) } #[cfg(feature = "gdb")] Ok(VcpuExit::Debug(_)) => { if let Some(gdb_event) = &self.gdb_event { gdb_event .send(get_raw_tid(self.kvm_vcpu.index.into())) .expect("Unable to notify gdb event"); } Ok(VcpuEmulation::Paused) } emulation_result => handle_kvm_exit(&mut self.kvm_vcpu.peripherals, emulation_result), } } } /// Handle the return value of a call to [`VcpuFd::run`] and update our emulation accordingly fn handle_kvm_exit( peripherals: &mut Peripherals, emulation_result: Result, ) -> Result { match emulation_result { Ok(run) => match run { VcpuExit::MmioRead(addr, data) => { if let Some(mmio_bus) = &peripherals.mmio_bus { let _metric = METRICS.vcpu.exit_mmio_read_agg.record_latency_metrics(); if let Err(err) = mmio_bus.read(addr, data) { warn!("Invalid MMIO read @ {addr:#x}:{:#x}: {err}", data.len()); } METRICS.vcpu.exit_mmio_read.inc(); } Ok(VcpuEmulation::Handled) } VcpuExit::MmioWrite(addr, data) => { if let Some(mmio_bus) = &peripherals.mmio_bus { let _metric = METRICS.vcpu.exit_mmio_write_agg.record_latency_metrics(); if let Err(err) = mmio_bus.write(addr, data) { warn!("Invalid MMIO read @ {addr:#x}:{:#x}: {err}", data.len()); } METRICS.vcpu.exit_mmio_write.inc(); } Ok(VcpuEmulation::Handled) } // Documentation specifies that below kvm exits are considered // errors. VcpuExit::FailEntry(hardware_entry_failure_reason, cpu) => { // Hardware entry failure. METRICS.vcpu.failures.inc(); error!( "Received KVM_EXIT_FAIL_ENTRY signal: {} on cpu {}", hardware_entry_failure_reason, cpu ); Err(VcpuError::FaultyKvmExit(format!( "{:?}", VcpuExit::FailEntry(hardware_entry_failure_reason, cpu) ))) } VcpuExit::InternalError => { // Failure from the Linux KVM subsystem rather than from the hardware. METRICS.vcpu.failures.inc(); error!("Received KVM_EXIT_INTERNAL_ERROR signal"); Err(VcpuError::FaultyKvmExit(format!( "{:?}", VcpuExit::InternalError ))) } VcpuExit::SystemEvent(event_type, event_flags) => match event_type { KVM_SYSTEM_EVENT_RESET | KVM_SYSTEM_EVENT_SHUTDOWN => { info!( "Received KVM_SYSTEM_EVENT: type: {}, event: {:?}", event_type, event_flags ); Ok(VcpuEmulation::Stopped) } _ => { METRICS.vcpu.failures.inc(); error!( "Received KVM_SYSTEM_EVENT signal type: {}, flag: {:?}", event_type, event_flags ); Err(VcpuError::FaultyKvmExit(format!( "{:?}", VcpuExit::SystemEvent(event_type, event_flags) ))) } }, arch_specific_reason => { // run specific architecture emulation. peripherals.run_arch_emulation(arch_specific_reason) } }, // The unwrap on raw_os_error can only fail if we have a logic // error in our code in which case it is better to panic. Err(ref err) => match err.errno() { libc::EAGAIN => Ok(VcpuEmulation::Handled), libc::ENOSYS => { METRICS.vcpu.failures.inc(); error!("Received ENOSYS error because KVM failed to emulate an instruction."); Err(VcpuError::FaultyKvmExit( "Received ENOSYS error because KVM failed to emulate an instruction." .to_string(), )) } _ => { METRICS.vcpu.failures.inc(); error!("Failure during vcpu run: {}", err); Err(VcpuError::FaultyKvmExit(format!("{}", err))) } }, } } /// List of events that the Vcpu can receive. #[derive(Debug, Clone)] pub enum VcpuEvent { /// The vCPU thread will end when receiving this message. Finish, /// Pause the Vcpu. Pause, /// Event to resume the Vcpu. Resume, /// Event to save the state of a paused Vcpu. SaveState, /// Event to dump CPU configuration of a paused Vcpu. DumpCpuConfig, } /// List of responses that the Vcpu reports. pub enum VcpuResponse { /// Requested action encountered an error. Error(VcpuError), /// Vcpu is stopped. Exited(FcExitCode), /// Requested action not allowed. NotAllowed(String), /// Vcpu is paused. Paused, /// Vcpu is resumed. Resumed, /// Vcpu state is saved. SavedState(Box), /// Vcpu is in the state where CPU config is dumped. DumpedCpuConfig(Box), } impl fmt::Debug for VcpuResponse { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use crate::VcpuResponse::*; match self { Paused => write!(f, "VcpuResponse::Paused"), Resumed => write!(f, "VcpuResponse::Resumed"), Exited(code) => write!(f, "VcpuResponse::Exited({:?})", code), SavedState(_) => write!(f, "VcpuResponse::SavedState"), Error(err) => write!(f, "VcpuResponse::Error({:?})", err), NotAllowed(reason) => write!(f, "VcpuResponse::NotAllowed({})", reason), DumpedCpuConfig(_) => write!(f, "VcpuResponse::DumpedCpuConfig"), } } } /// Wrapper over Vcpu that hides the underlying interactions with the Vcpu thread. #[derive(Debug)] pub struct VcpuHandle { event_sender: Sender, response_receiver: Receiver, /// VcpuFd pub vcpu_fd: VcpuFd, // Rust JoinHandles have to be wrapped in Option if you ever plan on 'join()'ing them. // We want to be able to join these threads in tests. vcpu_thread: Option>, } /// Error type for [`VcpuHandle::send_event`]. #[derive(Debug, derive_more::From, thiserror::Error)] #[error("Failed to signal vCPU: {0}")] pub struct VcpuSendEventError(pub vmm_sys_util::errno::Error); impl VcpuHandle { /// Creates a new [`VcpuHandle`]. /// /// # Arguments /// + `event_sender`: [`Sender`] to communicate [`VcpuEvent`] to control the vcpu. /// + `response_received`: [`Received`] from which the vcpu's responses can be read. /// + `vcpu_thread`: A [`JoinHandle`] for the vcpu thread. pub fn new( event_sender: Sender, response_receiver: Receiver, vcpu_fd: VcpuFd, vcpu_thread: thread::JoinHandle<()>, ) -> Self { Self { event_sender, response_receiver, vcpu_fd, vcpu_thread: Some(vcpu_thread), } } /// Sends event to vCPU. /// /// # Errors /// /// When [`vmm_sys_util::linux::signal::Killable::kill`] errors. pub fn send_event(&mut self, event: VcpuEvent) -> Result<(), VcpuSendEventError> { // Use expect() to crash if the other thread closed this channel. self.event_sender .send(event) .expect("event sender channel closed on vcpu end."); // Kick the vcpu so it picks up the message. // Add a fence to ensure the write is visible to the vpu thread self.vcpu_fd.set_kvm_immediate_exit(1); fence(Ordering::Release); self.vcpu_thread .as_ref() // Safe to unwrap since constructor make this 'Some'. .unwrap() .kill(sigrtmin() + VCPU_RTSIG_OFFSET)?; Ok(()) } /// Returns a reference to the [`Received`] from which the vcpu's responses can be read. pub fn response_receiver(&self) -> &Receiver { &self.response_receiver } } // Wait for the Vcpu thread to finish execution impl Drop for VcpuHandle { fn drop(&mut self) { // We assume that by the time a VcpuHandle is dropped, other code has run to // get the state machine loop to finish so the thread is ready to join. // The strategy of avoiding more complex messaging protocols during the Drop // helps avoid cycles which were preventing a truly clean shutdown. // // If the code hangs at this point, that means that a Finish event was not // sent by Vmm. self.vcpu_thread.take().unwrap().join().unwrap(); } } /// Vcpu emulation state. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum VcpuEmulation { /// Handled. Handled, /// Interrupted. Interrupted, /// Stopped. Stopped, /// Pause request #[cfg(feature = "gdb")] Paused, } #[cfg(test)] pub(crate) mod tests { #![allow(clippy::undocumented_unsafe_blocks)] #[cfg(target_arch = "x86_64")] use std::collections::BTreeMap; use std::sync::atomic::Ordering; use std::sync::{Arc, Barrier, Mutex}; use linux_loader::loader::KernelLoader; use vmm_sys_util::errno; use super::*; use crate::RECV_TIMEOUT_SEC; use crate::arch::{BootProtocol, EntryPoint}; use crate::seccomp::get_empty_filters; use crate::utils::mib_to_bytes; use crate::utils::signal::validate_signal_num; use crate::vstate::bus::BusDevice; use crate::vstate::kvm::Kvm; use crate::vstate::memory::{GuestAddress, GuestMemoryMmap}; use crate::vstate::vcpu::VcpuError as EmulationError; use crate::vstate::vm::Vm; use crate::vstate::vm::tests::setup_vm_with_memory; struct DummyDevice; impl BusDevice for DummyDevice { fn read(&mut self, _base: u64, _offset: u64, _data: &mut [u8]) {} fn write(&mut self, _base: u64, _offset: u64, _data: &[u8]) -> Option> { None } } #[test] fn test_handle_kvm_exit() { let (_, _, mut vcpu) = setup_vcpu(0x1000); let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::Hlt)); assert!(matches!( res, Err(EmulationError::UnhandledKvmExit(s)) if s == "Hlt", )); let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::Shutdown)); assert!(matches!( res, Err(EmulationError::UnhandledKvmExit(s)) if s == "Shutdown", )); let res = handle_kvm_exit( &mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::FailEntry(0, 0)), ); assert_eq!( format!("{:?}", res.unwrap_err()), format!( "{:?}", EmulationError::FaultyKvmExit("FailEntry(0, 0)".to_string()) ) ); let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::InternalError)); assert_eq!( format!("{:?}", res.unwrap_err()), format!( "{:?}", EmulationError::FaultyKvmExit("InternalError".to_string()) ) ); let res = handle_kvm_exit( &mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::SystemEvent(2, &[])), ); assert_eq!(res.unwrap(), VcpuEmulation::Stopped); let res = handle_kvm_exit( &mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::SystemEvent(1, &[])), ); assert_eq!(res.unwrap(), VcpuEmulation::Stopped); let res = handle_kvm_exit( &mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::SystemEvent(3, &[])), ); assert_eq!( format!("{:?}", res.unwrap_err()), format!( "{:?}", EmulationError::FaultyKvmExit("SystemEvent(3, [])".to_string()) ) ); // Check what happens with an unhandled exit reason. let res = handle_kvm_exit(&mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::Unknown)); assert_eq!( res.unwrap_err().to_string(), "Unexpected kvm exit received: Unknown".to_string() ); let res = handle_kvm_exit( &mut vcpu.kvm_vcpu.peripherals, Err(errno::Error::new(libc::EAGAIN)), ); assert_eq!(res.unwrap(), VcpuEmulation::Handled); let res = handle_kvm_exit( &mut vcpu.kvm_vcpu.peripherals, Err(errno::Error::new(libc::ENOSYS)), ); assert_eq!( format!("{:?}", res.unwrap_err()), format!( "{:?}", EmulationError::FaultyKvmExit( "Received ENOSYS error because KVM failed to emulate an instruction." .to_string() ) ) ); let res = handle_kvm_exit( &mut vcpu.kvm_vcpu.peripherals, Err(errno::Error::new(libc::EINVAL)), ); assert_eq!( format!("{:?}", res.unwrap_err()), format!( "{:?}", EmulationError::FaultyKvmExit("Invalid argument (os error 22)".to_string()) ) ); let bus = Arc::new(Bus::new()); let dummy = Arc::new(Mutex::new(DummyDevice)); bus.insert(dummy, 0x10, 0x10).unwrap(); vcpu.set_mmio_bus(bus); let addr = 0x10; let res = handle_kvm_exit( &mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::MmioRead(addr, &mut [0, 0, 0, 0])), ); assert_eq!(res.unwrap(), VcpuEmulation::Handled); let res = handle_kvm_exit( &mut vcpu.kvm_vcpu.peripherals, Ok(VcpuExit::MmioWrite(addr, &[0, 0, 0, 0])), ); assert_eq!(res.unwrap(), VcpuEmulation::Handled); } impl PartialEq for VcpuResponse { fn eq(&self, other: &Self) -> bool { use crate::VcpuResponse::*; // Guard match with no wildcard to make sure we catch new enum variants. match self { Paused | Resumed | Exited(_) => (), Error(_) | NotAllowed(_) | SavedState(_) | DumpedCpuConfig(_) => (), }; match (self, other) { (Paused, Paused) | (Resumed, Resumed) => true, (Exited(code), Exited(other_code)) => code == other_code, (NotAllowed(_), NotAllowed(_)) | (SavedState(_), SavedState(_)) | (DumpedCpuConfig(_), DumpedCpuConfig(_)) => true, (Error(err), Error(other_err)) => { format!("{:?}", err) == format!("{:?}", other_err) } _ => false, } } } // Auxiliary function being used throughout the tests. #[allow(unused_mut)] pub(crate) fn setup_vcpu(mem_size: usize) -> (Kvm, Vm, Vcpu) { let (kvm, mut vm) = setup_vm_with_memory(mem_size); let (mut vcpus, _) = vm.create_vcpus(1).unwrap(); let mut vcpu = vcpus.remove(0); #[cfg(target_arch = "aarch64")] vcpu.kvm_vcpu.init(&[]).unwrap(); (kvm, vm, vcpu) } fn load_good_kernel(vm_memory: &GuestMemoryMmap) -> GuestAddress { use std::fs::File; use std::path::PathBuf; let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); #[cfg(target_arch = "x86_64")] path.push("src/test_utils/mock_resources/test_elf.bin"); #[cfg(target_arch = "aarch64")] path.push("src/test_utils/mock_resources/test_pe.bin"); let mut kernel_file = File::open(path).expect("Cannot open kernel file"); #[cfg(target_arch = "x86_64")] let entry_addr = linux_loader::loader::elf::Elf::load( vm_memory, Some(GuestAddress(crate::arch::get_kernel_start())), &mut kernel_file, Some(GuestAddress(crate::arch::get_kernel_start())), ) .unwrap(); #[cfg(target_arch = "aarch64")] let entry_addr = linux_loader::loader::pe::PE::load(vm_memory, None, &mut kernel_file, None).unwrap(); entry_addr.kernel_load } fn vcpu_configured_for_boot() -> (Vm, VcpuHandle, EventFd) { // Need enough mem to boot linux. let mem_size = mib_to_bytes(64); let (kvm, vm, mut vcpu) = setup_vcpu(mem_size); let vcpu_exit_evt = vcpu.exit_evt.try_clone().unwrap(); // Needs a kernel since we'll actually run this vcpu. let entry_point = EntryPoint { entry_addr: load_good_kernel(vm.guest_memory()), protocol: BootProtocol::LinuxBoot, }; #[cfg(target_arch = "x86_64")] { use crate::cpu_config::x86_64::cpuid::Cpuid; vcpu.kvm_vcpu .configure( vm.guest_memory(), entry_point, &VcpuConfig { vcpu_count: 1, smt: false, cpu_config: CpuConfiguration { cpuid: Cpuid::try_from(kvm.supported_cpuid.clone()).unwrap(), msrs: BTreeMap::new(), }, }, ) .expect("failed to configure vcpu"); } #[cfg(target_arch = "aarch64")] vcpu.kvm_vcpu .configure( vm.guest_memory(), entry_point, &VcpuConfig { vcpu_count: 1, smt: false, cpu_config: crate::cpu_config::aarch64::CpuConfiguration::default(), }, &kvm.optional_capabilities(), ) .expect("failed to configure vcpu"); let mut seccomp_filters = get_empty_filters(); let barrier = Arc::new(Barrier::new(2)); let vcpu_handle = vcpu .start_threaded( &vm, seccomp_filters.remove("vcpu").unwrap(), barrier.clone(), ) .expect("failed to start vcpu"); // Wait for vCPUs to initialize their TLS before moving forward. barrier.wait(); (vm, vcpu_handle, vcpu_exit_evt) } #[test] fn test_set_mmio_bus() { let (_, _, mut vcpu) = setup_vcpu(0x1000); assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_none()); vcpu.set_mmio_bus(Arc::new(Bus::new())); assert!(vcpu.kvm_vcpu.peripherals.mmio_bus.is_some()); } #[test] fn test_vcpu_kick() { let (_, vm, mut vcpu) = setup_vcpu(0x1000); let mut kvm_run = kvm_ioctls::KvmRunWrapper::mmap_from_fd(&vcpu.kvm_vcpu.fd, vm.fd().run_size()) .expect("cannot mmap kvm-run"); let vcpu_kvm_run = kvm_ioctls::KvmRunWrapper::mmap_from_fd(&vcpu.kvm_vcpu.fd, vm.fd().run_size()) .expect("cannot mmap kvm-run"); let success = Arc::new(std::sync::atomic::AtomicBool::new(false)); let vcpu_success = success.clone(); let barrier = Arc::new(Barrier::new(2)); let vcpu_barrier = barrier.clone(); // Start Vcpu thread which will be kicked with a signal. let handle = std::thread::Builder::new() .name("test_vcpu_kick".to_string()) .spawn(move || { vcpu.register_kick_signal_handler(); // Notify TLS was populated. vcpu_barrier.wait(); // Loop for max 1 second to check if the signal handler has run. for _ in 0..10 { if vcpu_kvm_run.as_ref().immediate_exit == 1 { // Signal handler has run and set immediate_exit to 1. vcpu_success.store(true, Ordering::Release); break; } std::thread::sleep(std::time::Duration::from_millis(100)); } }) .expect("cannot start thread"); barrier.wait(); // Set immediate_exit and kick the Vcpu using the custom signal. kvm_run.as_mut_ref().immediate_exit = 1; handle .kill(sigrtmin() + VCPU_RTSIG_OFFSET) .expect("failed to signal thread"); handle.join().expect("failed to join thread"); // Verify that the Vcpu saw its kvm immediate-exit as set. assert!(success.load(Ordering::Acquire)); } // Sends an event to a vcpu and expects a particular response. fn queue_event_expect_response( handle: &mut VcpuHandle, event: VcpuEvent, response: VcpuResponse, ) { handle .send_event(event) .expect("failed to send event to vcpu"); assert_eq!( handle .response_receiver() .recv_timeout(RECV_TIMEOUT_SEC) .expect("did not receive event response from vcpu"), response ); } #[test] fn test_immediate_exit_shortcircuits_execution() { let (_, _, mut vcpu) = setup_vcpu(0x1000); vcpu.kvm_vcpu.fd.set_kvm_immediate_exit(1); // Set a dummy value to be returned by the emulate call let result = vcpu.run_emulation().expect("Failed to run emulation"); assert_eq!( result, VcpuEmulation::Interrupted, "The Immediate Exit short-circuit should have prevented the execution of emulate" ); let event_sender = vcpu.event_sender.take().expect("vCPU already started"); let _ = event_sender.send(VcpuEvent::Resume); vcpu.kvm_vcpu.fd.set_kvm_immediate_exit(1); // paused is expected to coerce immediate_exit to 0 when receiving a VcpuEvent::Resume let _ = vcpu.paused(); assert_eq!( 0, vcpu.kvm_vcpu.fd.get_kvm_run().immediate_exit, "Immediate Exit should have been disabled by sending Resume to a paused VM" ) } #[test] fn test_vcpu_pause_resume() { let (_vm, mut vcpu_handle, vcpu_exit_evt) = vcpu_configured_for_boot(); // Queue a Resume event, expect a response. queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); // Queue a Pause event, expect a response. queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); // Validate vcpu handled the EINTR gracefully and didn't exit. let err = vcpu_exit_evt.read().unwrap_err(); assert_eq!(err.raw_os_error().unwrap(), libc::EAGAIN); // Queue another Pause event, expect a response. queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); // Queue a Resume event, expect a response. queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); // Queue another Resume event, expect a response. queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); // Queue another Pause event, expect a response. queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); // Queue a Resume event, expect a response. queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); vcpu_handle.send_event(VcpuEvent::Finish).unwrap(); } #[test] fn test_vcpu_save_state_events() { let (_vm, mut vcpu_handle, _vcpu_exit_evt) = vcpu_configured_for_boot(); // Queue a Resume event, expect a response. queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); // Queue a SaveState event, expect a response. queue_event_expect_response( &mut vcpu_handle, VcpuEvent::SaveState, VcpuResponse::NotAllowed(String::new()), ); // Queue another Pause event, expect a response. queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Pause, VcpuResponse::Paused); // Queue a SaveState event, get the response. vcpu_handle .send_event(VcpuEvent::SaveState) .expect("failed to send event to vcpu"); match vcpu_handle .response_receiver() .recv_timeout(RECV_TIMEOUT_SEC) .expect("did not receive event response from vcpu") { VcpuResponse::SavedState(_) => {} _ => panic!("unexpected response"), }; vcpu_handle.send_event(VcpuEvent::Finish).unwrap(); } #[test] fn test_vcpu_dump_cpu_config() { let (_vm, mut vcpu_handle, _) = vcpu_configured_for_boot(); // Queue a DumpCpuConfig event, expect a DumpedCpuConfig response. vcpu_handle .send_event(VcpuEvent::DumpCpuConfig) .expect("Failed to send an event to vcpu."); match vcpu_handle .response_receiver() .recv_timeout(RECV_TIMEOUT_SEC) .expect("Could not receive a response from vcpu.") { VcpuResponse::DumpedCpuConfig(_) => (), VcpuResponse::Error(err) => panic!("Got an error: {err}"), _ => panic!("Got an unexpected response."), } // Queue a Resume event, expect a response. queue_event_expect_response(&mut vcpu_handle, VcpuEvent::Resume, VcpuResponse::Resumed); // Queue a DumpCpuConfig event, expect a NotAllowed respoonse. // The DumpCpuConfig event is only allowed while paused. queue_event_expect_response( &mut vcpu_handle, VcpuEvent::DumpCpuConfig, VcpuResponse::NotAllowed(String::new()), ); vcpu_handle.send_event(VcpuEvent::Finish).unwrap(); } #[test] fn test_vcpu_rtsig_offset() { validate_signal_num(sigrtmin() + VCPU_RTSIG_OFFSET).unwrap(); } } ================================================ FILE: src/vmm/src/vstate/vm.rs ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // // Portions Copyright 2017 The Chromium OS Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the THIRD-PARTY file. use std::collections::HashMap; use std::fs::OpenOptions; use std::io::Write; use std::path::Path; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Mutex, MutexGuard}; #[cfg(target_arch = "x86_64")] use kvm_bindings::KVM_IRQCHIP_IOAPIC; use kvm_bindings::{ KVM_IRQ_ROUTING_IRQCHIP, KVM_IRQ_ROUTING_MSI, KVM_MSI_VALID_DEVID, KvmIrqRouting, kvm_irq_routing_entry, kvm_userspace_memory_region, }; use kvm_ioctls::VmFd; use log::debug; use serde::{Deserialize, Serialize}; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; pub use crate::arch::{ArchVm as Vm, ArchVmError, VmState}; use crate::arch::{GSI_MSI_END, host_page_size}; use crate::logger::info; use crate::pci::{DeviceRelocation, DeviceRelocationError, PciDevice}; use crate::persist::CreateSnapshotError; use crate::vmm_config::snapshot::SnapshotType; use crate::vstate::bus::Bus; use crate::vstate::interrupts::{InterruptError, MsixVector, MsixVectorConfig, MsixVectorGroup}; use crate::vstate::memory::{ GuestMemory, GuestMemoryExtension, GuestMemoryMmap, GuestMemoryRegion, GuestMemoryState, GuestRegionMmap, GuestRegionMmapExt, MemoryError, }; use crate::vstate::resources::ResourceAllocator; use crate::vstate::vcpu::VcpuError; use crate::{DirtyBitmap, Vcpu, mem_size_mib}; #[derive(Debug, Serialize, Deserialize)] /// A struct representing an interrupt line used by some device of the microVM pub struct RoutingEntry { entry: kvm_irq_routing_entry, masked: bool, } /// Architecture independent parts of a VM. #[derive(Debug)] pub struct VmCommon { /// The KVM file descriptor used to access this Vm. pub fd: VmFd, max_memslots: u32, /// The guest memory of this Vm. pub guest_memory: GuestMemoryMmap, next_kvm_slot: AtomicU32, /// Interrupts used by Vm's devices pub interrupts: Mutex>, /// Allocator for VM resources pub resource_allocator: Mutex, /// MMIO bus pub mmio_bus: Arc, } /// Errors associated with the wrappers over KVM ioctls. /// Needs `rustfmt::skip` to make multiline comments work #[rustfmt::skip] #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum VmError { /// Cannot set the memory regions: {0} SetUserMemoryRegion(kvm_ioctls::Error), /// Failed to create VM: {0} CreateVm(kvm_ioctls::Error), /// Failed to get KVM's dirty log: {0} GetDirtyLog(kvm_ioctls::Error), /// {0} Arch(#[from] ArchVmError), /// Error during eventfd operations: {0} EventFd(std::io::Error), /// Failed to create vcpu: {0} CreateVcpu(VcpuError), /// The number of configured slots is bigger than the maximum reported by KVM: {0} NotEnoughMemorySlots(u32), /// Failed to add a memory region: {0} InsertRegion(#[from] vm_memory::GuestRegionCollectionError), /// Error calling mincore: {0} Mincore(vmm_sys_util::errno::Error), /// ResourceAllocator error: {0} ResourceAllocator(#[from] vm_allocator::Error), /// MemoryError error: {0} MemoryError(#[from] MemoryError), } /// Contains Vm functions that are usable across CPU architectures impl Vm { /// Create a KVM VM pub fn create_common(kvm: &crate::vstate::kvm::Kvm) -> Result { // It is known that KVM_CREATE_VM occasionally fails with EINTR on heavily loaded machines // with many VMs. // // The behavior itself that KVM_CREATE_VM can return EINTR is intentional. This is because // the KVM_CREATE_VM path includes mm_take_all_locks() that is CPU intensive and all CPU // intensive syscalls should check for pending signals and return EINTR immediately to allow // userland to remain interactive. // https://lists.nongnu.org/archive/html/qemu-devel/2014-01/msg01740.html // // However, it is empirically confirmed that, even though there is no pending signal, // KVM_CREATE_VM returns EINTR. // https://lore.kernel.org/qemu-devel/8735e0s1zw.wl-maz@kernel.org/ // // To mitigate it, QEMU does an infinite retry on EINTR that greatly improves reliabiliy: // - https://github.com/qemu/qemu/commit/94ccff133820552a859c0fb95e33a539e0b90a75 // - https://github.com/qemu/qemu/commit/bbde13cd14ad4eec18529ce0bf5876058464e124 // // Similarly, we do retries up to 5 times. Although Firecracker clients are also able to // retry, they have to start Firecracker from scratch. Doing retries in Firecracker makes // recovery faster and improves reliability. const MAX_ATTEMPTS: u32 = 5; let mut attempt = 1; let fd = loop { match kvm.fd.create_vm() { Ok(fd) => break fd, Err(e) if e.errno() == libc::EINTR && attempt < MAX_ATTEMPTS => { info!("Attempt #{attempt} of KVM_CREATE_VM returned EINTR"); // Exponential backoff (1us, 2us, 4us, and 8us => 15us in total) std::thread::sleep(std::time::Duration::from_micros(2u64.pow(attempt - 1))); } Err(e) => return Err(VmError::CreateVm(e)), } attempt += 1; }; Ok(VmCommon { fd, max_memslots: kvm.max_nr_memslots(), guest_memory: GuestMemoryMmap::default(), next_kvm_slot: AtomicU32::new(0), interrupts: Mutex::new(HashMap::with_capacity(GSI_MSI_END as usize + 1)), resource_allocator: Mutex::new(ResourceAllocator::new()), mmio_bus: Arc::new(Bus::new()), }) } /// Creates the specified number of [`Vcpu`]s. /// /// The returned [`EventFd`] is written to whenever any of the vcpus exit. pub fn create_vcpus(&mut self, vcpu_count: u8) -> Result<(Vec, EventFd), VmError> { self.arch_pre_create_vcpus(vcpu_count)?; let exit_evt = EventFd::new(libc::EFD_NONBLOCK).map_err(VmError::EventFd)?; let mut vcpus = Vec::with_capacity(vcpu_count as usize); for cpu_idx in 0..vcpu_count { let exit_evt = exit_evt.try_clone().map_err(VmError::EventFd)?; let vcpu = Vcpu::new(cpu_idx, self, exit_evt).map_err(VmError::CreateVcpu)?; vcpus.push(vcpu); } self.arch_post_create_vcpus(vcpu_count)?; Ok((vcpus, exit_evt)) } /// Reserves the next `slot_cnt` contiguous kvm slot ids and returns the first one pub fn next_kvm_slot(&self, slot_cnt: u32) -> Option { let next = self .common .next_kvm_slot .fetch_add(slot_cnt, Ordering::Relaxed); if self.common.max_memslots <= next { None } else { Some(next) } } pub(crate) fn set_user_memory_region( &self, region: kvm_userspace_memory_region, ) -> Result<(), VmError> { // SAFETY: Safe because the fd is a valid KVM file descriptor. unsafe { self.fd() .set_user_memory_region(region) .map_err(VmError::SetUserMemoryRegion) } } fn register_memory_region(&mut self, region: Arc) -> Result<(), VmError> { let new_guest_memory = self .common .guest_memory .insert_region(Arc::clone(®ion))?; region .slots() .try_for_each(|(ref slot, plugged)| match plugged { // if the slot is plugged, add it to kvm user memory regions true => self.set_user_memory_region(slot.into()), // if the slot is not plugged, protect accesses to it false => slot.protect(true).map_err(VmError::MemoryError), })?; self.common.guest_memory = new_guest_memory; Ok(()) } /// Register a list of new memory regions to this [`Vm`]. pub fn register_dram_memory_regions( &mut self, regions: Vec, ) -> Result<(), VmError> { for region in regions { let next_slot = self .next_kvm_slot(1) .ok_or(VmError::NotEnoughMemorySlots(self.common.max_memslots))?; let arcd_region = Arc::new(GuestRegionMmapExt::dram_from_mmap_region(region, next_slot)); self.register_memory_region(arcd_region)? } Ok(()) } /// Register a new hotpluggable region to this [`Vm`]. pub fn register_hotpluggable_memory_region( &mut self, region: GuestRegionMmap, slot_size: usize, ) -> Result<(), VmError> { // caller should ensure the slot size divides the region length. assert!(region.len().is_multiple_of(slot_size as u64)); let slot_cnt = (region.len() / (slot_size as u64)) .try_into() .map_err(|_| VmError::NotEnoughMemorySlots(self.common.max_memslots))?; let slot_from = self .next_kvm_slot(slot_cnt) .ok_or(VmError::NotEnoughMemorySlots(self.common.max_memslots))?; let arcd_region = Arc::new(GuestRegionMmapExt::hotpluggable_from_mmap_region( region, slot_from, slot_size, )); self.register_memory_region(arcd_region) } /// Register a list of new memory regions to this [`Vm`]. /// /// Note: regions and state.regions need to be in the same order. pub fn restore_memory_regions( &mut self, regions: Vec, state: &GuestMemoryState, ) -> Result<(), VmError> { for (region, state) in regions.into_iter().zip(state.regions.iter()) { let slot_cnt = state .plugged .len() .try_into() .map_err(|_| VmError::NotEnoughMemorySlots(self.common.max_memslots))?; let next_slot = self .next_kvm_slot(slot_cnt) .ok_or(VmError::NotEnoughMemorySlots(self.common.max_memslots))?; let arcd_region = Arc::new(GuestRegionMmapExt::from_state(region, state, next_slot)?); self.register_memory_region(arcd_region)? } Ok(()) } /// Gets a reference to the kvm file descriptor owned by this VM. pub fn fd(&self) -> &VmFd { &self.common.fd } /// Gets a reference to this [`Vm`]'s [`GuestMemoryMmap`] object pub fn guest_memory(&self) -> &GuestMemoryMmap { &self.common.guest_memory } /// Gets a mutable reference to this [`Vm`]'s [`ResourceAllocator`] object pub fn resource_allocator(&self) -> MutexGuard<'_, ResourceAllocator> { self.common .resource_allocator .lock() .expect("Poisoned lock") } /// Resets the KVM dirty bitmap for each of the guest's memory regions. pub fn reset_dirty_bitmap(&self) { self.guest_memory() .iter() .flat_map(|region| region.plugged_slots()) .for_each(|mem_slot| { let _ = self.fd().get_dirty_log(mem_slot.slot, mem_slot.slice.len()); }); } /// Retrieves the KVM dirty bitmap for each of the guest's memory regions. pub fn get_dirty_bitmap(&self) -> Result { self.guest_memory() .iter() .flat_map(|region| region.plugged_slots()) .map(|mem_slot| { let bitmap = match mem_slot.slice.bitmap() { Some(_) => self .fd() .get_dirty_log(mem_slot.slot, mem_slot.slice.len()) .map_err(VmError::GetDirtyLog)?, None => mincore_bitmap( mem_slot.slice.ptr_guard_mut().as_ptr(), mem_slot.slice.len(), )?, }; Ok((mem_slot.slot, bitmap)) }) .collect() } /// Takes a snapshot of the virtual machine running inside the given [`Vmm`] and saves it to /// `mem_file_path`. /// /// If `snapshot_type` is [`SnapshotType::Diff`], and `mem_file_path` exists and is a snapshot /// file of matching size, then the diff snapshot will be directly merged into the existing /// snapshot. Otherwise, existing files are simply overwritten. pub(crate) fn snapshot_memory_to_file( &self, mem_file_path: &Path, snapshot_type: SnapshotType, ) -> Result<(), CreateSnapshotError> { use self::CreateSnapshotError::*; // Need to check this here, as we create the file in the line below let file_existed = mem_file_path.exists(); let mut file = OpenOptions::new() .write(true) .create(true) .truncate(false) .open(mem_file_path) .map_err(|err| MemoryBackingFile("open", err))?; // Determine what size our total memory area is. let mem_size_mib = mem_size_mib(self.guest_memory()); let expected_size = mem_size_mib * 1024 * 1024; if file_existed { let file_size = file .metadata() .map_err(|e| MemoryBackingFile("get_metadata", e))? .len(); // Here we only truncate the file if the size mismatches. // - For full snapshots, the entire file's contents will be overwritten anyway. We have // to avoid truncating here to deal with the edge case where it represents the // snapshot file from which this very microVM was loaded (as modifying the memory file // would be reflected in the mmap of the file, meaning a truncate operation would zero // out guest memory, and thus corrupt the VM). // - For diff snapshots, we want to merge the diff layer directly into the file. if file_size != expected_size { file.set_len(0) .map_err(|err| MemoryBackingFile("truncate", err))?; } } // Set the length of the file to the full size of the memory area. file.set_len(expected_size) .map_err(|e| MemoryBackingFile("set_length", e))?; match snapshot_type { SnapshotType::Diff => { let dirty_bitmap = self.get_dirty_bitmap()?; self.guest_memory().dump_dirty(&mut file, &dirty_bitmap)?; } SnapshotType::Full => { self.guest_memory().dump(&mut file)?; self.reset_dirty_bitmap(); self.guest_memory().reset_dirty(); } }; file.flush() .map_err(|err| MemoryBackingFile("flush", err))?; file.sync_all() .map_err(|err| MemoryBackingFile("sync_all", err)) } /// Register a device IRQ pub fn register_irq(&self, fd: &EventFd, gsi: u32) -> Result<(), errno::Error> { self.common.fd.register_irqfd(fd, gsi)?; let mut entry = kvm_irq_routing_entry { gsi, type_: KVM_IRQ_ROUTING_IRQCHIP, ..Default::default() }; #[cfg(target_arch = "x86_64")] { entry.u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC; } #[cfg(target_arch = "aarch64")] { entry.u.irqchip.irqchip = 0; } entry.u.irqchip.pin = gsi; self.common .interrupts .lock() .expect("Poisoned lock") .insert( gsi, RoutingEntry { entry, masked: false, }, ); Ok(()) } /// Register an MSI device interrupt pub fn register_msi( &self, route: &MsixVector, masked: bool, config: MsixVectorConfig, ) -> Result<(), errno::Error> { let mut entry = kvm_irq_routing_entry { gsi: route.gsi, type_: KVM_IRQ_ROUTING_MSI, ..Default::default() }; entry.u.msi.address_lo = config.low_addr; entry.u.msi.address_hi = config.high_addr; entry.u.msi.data = config.data; if self.common.fd.check_extension(kvm_ioctls::Cap::MsiDevid) { // According to KVM documentation: // https://docs.kernel.org/virt/kvm/api.html#kvm-set-gsi-routing // // if the capability is set, we need to set the flag and provide a valid unique device // ID. "For PCI, this is usually a BDF identifier in the lower 16 bits". // // The layout of `config.devid` is: // // |---- 16 bits ----|-- 8 bits --|-- 5 bits --|-- 3 bits --| // | segment | bus | device | function | // // For the time being, we are using a single PCI segment and a single bus per segment // so just passing config.devid should be fine. entry.flags = KVM_MSI_VALID_DEVID; entry.u.msi.__bindgen_anon_1.devid = config.devid; } self.common .interrupts .lock() .expect("Poisoned lock") .insert(route.gsi, RoutingEntry { entry, masked }); Ok(()) } /// Create a group of MSI-X interrupts pub fn create_msix_group(vm: Arc, count: u16) -> Result { debug!("Creating new MSI group with {count} vectors"); let mut vectors = Vec::with_capacity(count as usize); for gsi in vm .resource_allocator() .allocate_gsi_msi(count as u32)? .iter() { vectors.push(MsixVector::new(*gsi, false)?); } Ok(MsixVectorGroup { vm, vectors }) } /// Set GSI routes to KVM pub fn set_gsi_routes(&self) -> Result<(), InterruptError> { let entries = self.common.interrupts.lock().expect("Poisoned lock"); let mut routes = KvmIrqRouting::new(0)?; for entry in entries.values() { if entry.masked { continue; } routes.push(entry.entry)?; } self.common.fd.set_gsi_routing(&routes)?; Ok(()) } } /// Use `mincore(2)` to overapproximate the dirty bitmap for the given memslot. To be used /// if a diff snapshot is requested, but dirty page tracking wasn't enabled. fn mincore_bitmap(addr: *mut u8, len: usize) -> Result, VmError> { // TODO: Once Host 5.10 goes out of support, we can make this more robust and work on // swap-enabled systems, by doing mlock2(MLOCK_ONFAULT)/munlock() in this function (to // force swapped-out pages to get paged in, so that mincore will consider them incore). // However, on AMD (m6a/m7a) 5.10, doing so introduces a 100%/30ms regression to snapshot // creation, even if swap is disabled, so currently it cannot be done. // Mincore always works at PAGE_SIZE granularity, even if the VMA we are dealing with // is a hugetlbfs VMA (e.g. to report a single hugepage as "present", mincore will // give us 512 4k markers with the lowest bit set). let page_size = host_page_size(); let mut mincore_bitmap = vec![0u8; len / page_size]; let mut bitmap = vec![0u64; (len / page_size).div_ceil(64)]; // SAFETY: The safety invariants of GuestRegionMmap ensure that region.as_ptr() is a valid // userspace mapping of size region.len() bytes. The bitmap has exactly one byte for each // page in this userspace mapping. Note that mincore does not operate on bitmaps like // KVM_MEM_LOG_DIRTY_PAGES, but rather it uses 8 bits per page (e.g. 1 byte), setting the // least significant bit to 1 if the page corresponding to a byte is in core (available in // the page cache and resolvable via just a minor page fault). let r = unsafe { libc::mincore(addr.cast(), len, mincore_bitmap.as_mut_ptr()) }; if r != 0 { return Err(VmError::Mincore(vmm_sys_util::errno::Error::last())); } for (page_idx, b) in mincore_bitmap.iter().enumerate() { bitmap[page_idx / 64] |= (*b as u64 & 0x1) << (page_idx as u64 % 64); } Ok(bitmap) } impl DeviceRelocation for Vm { fn move_bar( &self, _old_base: u64, _new_base: u64, _len: u64, _pci_dev: &mut dyn PciDevice, ) -> Result<(), DeviceRelocationError> { Err(DeviceRelocationError::NotSupported) } } #[cfg(test)] pub(crate) mod tests { use std::sync::atomic::Ordering; use vm_memory::GuestAddress; use vm_memory::mmap::MmapRegionBuilder; use super::*; use crate::snapshot::Persist; use crate::test_utils::single_region_mem_raw; use crate::utils::mib_to_bytes; use crate::vstate::kvm::Kvm; use crate::vstate::memory::GuestRegionMmap; // Auxiliary function being used throughout the tests. pub(crate) fn setup_vm() -> (Kvm, Vm) { let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); let vm = Vm::new(&kvm).expect("Cannot create new vm"); (kvm, vm) } // Auxiliary function being used throughout the tests. pub(crate) fn setup_vm_with_memory(mem_size: usize) -> (Kvm, Vm) { let (kvm, mut vm) = setup_vm(); let gm = single_region_mem_raw(mem_size); vm.register_dram_memory_regions(gm).unwrap(); (kvm, vm) } #[test] fn test_new() { // Testing with a valid /dev/kvm descriptor. let kvm = Kvm::new(vec![]).expect("Cannot create Kvm"); Vm::new(&kvm).unwrap(); } #[test] fn test_register_memory_regions() { let (_, mut vm) = setup_vm(); // Trying to set a memory region with a size that is not a multiple of GUEST_PAGE_SIZE // will result in error. let gm = single_region_mem_raw(0x10); let res = vm.register_dram_memory_regions(gm); assert_eq!( res.unwrap_err().to_string(), "Cannot set the memory regions: Invalid argument (os error 22)" ); let gm = single_region_mem_raw(0x1000); let res = vm.register_dram_memory_regions(gm); res.unwrap(); } #[test] fn test_too_many_regions() { let (kvm, mut vm) = setup_vm(); let max_nr_regions = kvm.max_nr_memslots(); // SAFETY: valid mmap parameters let ptr = unsafe { libc::mmap( std::ptr::null_mut(), 0x1000, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, -1, 0, ) }; assert_ne!(ptr, libc::MAP_FAILED); for i in 0..=max_nr_regions { // SAFETY: we assert above that the ptr is valid, and the size matches what we passed to // mmap let region = unsafe { MmapRegionBuilder::new(0x1000) .with_raw_mmap_pointer(ptr.cast()) .build() .unwrap() }; let region = GuestRegionMmap::new(region, GuestAddress(i as u64 * 0x1000)).unwrap(); let res = vm.register_dram_memory_regions(vec![region]); if max_nr_regions <= i { assert!( matches!(res, Err(VmError::NotEnoughMemorySlots(v)) if v == max_nr_regions), "{:?} at iteration {}", res, i ); } else { res.unwrap_or_else(|_| { panic!( "to be able to insert more regions in iteration {i} - max_nr_memslots: \ {max_nr_regions} - num_regions: {}", vm.guest_memory().num_regions() ) }); } } } #[test] fn test_create_vcpus() { let vcpu_count = 2; let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); let (vcpu_vec, _) = vm.create_vcpus(vcpu_count).unwrap(); assert_eq!(vcpu_vec.len(), vcpu_count as usize); } fn enable_irqchip(vm: &mut Vm) { #[cfg(target_arch = "x86_64")] vm.setup_irqchip().unwrap(); #[cfg(target_arch = "aarch64")] vm.setup_irqchip(1).unwrap(); } fn create_msix_group(vm: &Arc) -> MsixVectorGroup { Vm::create_msix_group(vm.clone(), 4).unwrap() } #[test] fn test_msi_vector_group_new() { let (_, vm) = setup_vm_with_memory(mib_to_bytes(128)); let vm = Arc::new(vm); let msix_group = create_msix_group(&vm); assert_eq!(msix_group.num_vectors(), 4); } #[test] fn test_msi_vector_group_enable_disable() { let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); enable_irqchip(&mut vm); let vm = Arc::new(vm); let msix_group = create_msix_group(&vm); // Initially all vectors are disabled for route in &msix_group.vectors { assert!(!route.enabled.load(Ordering::Acquire)) } // Enable works msix_group.enable().unwrap(); for route in &msix_group.vectors { assert!(route.enabled.load(Ordering::Acquire)); } // Enabling an enabled group doesn't error out msix_group.enable().unwrap(); // Disable works msix_group.disable().unwrap(); for route in &msix_group.vectors { assert!(!route.enabled.load(Ordering::Acquire)) } // Disabling a disabled group doesn't error out } #[test] fn test_msi_vector_group_trigger() { let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); enable_irqchip(&mut vm); let vm = Arc::new(vm); let msix_group = create_msix_group(&vm); // We can now trigger all vectors for i in 0..4 { msix_group.trigger(i).unwrap() } // We can't trigger an invalid vector msix_group.trigger(4).unwrap_err(); } #[test] fn test_msi_vector_group_notifier() { let (_, vm) = setup_vm_with_memory(mib_to_bytes(128)); let vm = Arc::new(vm); let msix_group = create_msix_group(&vm); for i in 0..4 { assert!(msix_group.notifier(i).is_some()); } assert!(msix_group.notifier(4).is_none()); } #[test] fn test_msi_vector_group_update_invalid_vector() { let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); enable_irqchip(&mut vm); let vm = Arc::new(vm); let msix_group = create_msix_group(&vm); let config = MsixVectorConfig { high_addr: 0x42, low_addr: 0x12, data: 0x12, devid: 0xafa, }; msix_group.update(0, config, true, true).unwrap(); msix_group.update(4, config, true, true).unwrap_err(); } #[test] fn test_msi_vector_group_update() { let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); enable_irqchip(&mut vm); let vm = Arc::new(vm); assert!(vm.common.interrupts.lock().unwrap().is_empty()); let msix_group = create_msix_group(&vm); // Set some configuration for the vectors. Initially all are masked let mut config = MsixVectorConfig { high_addr: 0x42, low_addr: 0x13, data: 0x12, devid: 0xafa, }; for i in 0..4 { config.data = 0x12 * i; msix_group.update(i as usize, config, true, false).unwrap(); } // All vectors should be disabled for vector in &msix_group.vectors { assert!(!vector.enabled.load(Ordering::Acquire)); } for i in 0..4 { let gsi = crate::arch::GSI_MSI_START + i; let interrupts = vm.common.interrupts.lock().unwrap(); let kvm_route = interrupts.get(&gsi).unwrap(); assert!(kvm_route.masked); assert_eq!(kvm_route.entry.gsi, gsi); assert_eq!(kvm_route.entry.type_, KVM_IRQ_ROUTING_MSI); // SAFETY: because we know we setup MSI routes. unsafe { assert_eq!(kvm_route.entry.u.msi.address_hi, 0x42); assert_eq!(kvm_route.entry.u.msi.address_lo, 0x13); assert_eq!(kvm_route.entry.u.msi.data, 0x12 * i); } } // Simply enabling the vectors should not update the registered IRQ routes msix_group.enable().unwrap(); for i in 0..4 { let gsi = crate::arch::GSI_MSI_START + i; let interrupts = vm.common.interrupts.lock().unwrap(); let kvm_route = interrupts.get(&gsi).unwrap(); assert!(kvm_route.masked); assert_eq!(kvm_route.entry.gsi, gsi); assert_eq!(kvm_route.entry.type_, KVM_IRQ_ROUTING_MSI); // SAFETY: because we know we setup MSI routes. unsafe { assert_eq!(kvm_route.entry.u.msi.address_hi, 0x42); assert_eq!(kvm_route.entry.u.msi.address_lo, 0x13); assert_eq!(kvm_route.entry.u.msi.data, 0x12 * i); } } // Updating the config of a vector should enable its route (and only its route) config.data = 0; msix_group.update(0, config, false, true).unwrap(); for i in 0..4 { let gsi = crate::arch::GSI_MSI_START + i; let interrupts = vm.common.interrupts.lock().unwrap(); let kvm_route = interrupts.get(&gsi).unwrap(); assert_eq!(kvm_route.masked, i != 0); assert_eq!(kvm_route.entry.gsi, gsi); assert_eq!(kvm_route.entry.type_, KVM_IRQ_ROUTING_MSI); // SAFETY: because we know we setup MSI routes. unsafe { assert_eq!(kvm_route.entry.u.msi.address_hi, 0x42); assert_eq!(kvm_route.entry.u.msi.address_lo, 0x13); assert_eq!(kvm_route.entry.u.msi.data, 0x12 * i); } } } #[test] fn test_msi_vector_group_persistence() { let (_, mut vm) = setup_vm_with_memory(mib_to_bytes(128)); enable_irqchip(&mut vm); let vm = Arc::new(vm); let msix_group = create_msix_group(&vm); msix_group.enable().unwrap(); let state = msix_group.save(); let restored_group = MsixVectorGroup::restore(vm, &state).unwrap(); assert_eq!(msix_group.num_vectors(), restored_group.num_vectors()); // Even if an MSI group is enabled, we don't save it as such. During restoration, the PCI // transport will make sure the correct config is set for the vectors and enable them // accordingly. for (id, vector) in msix_group.vectors.iter().enumerate() { let new_vector = &restored_group.vectors[id]; assert_eq!(vector.gsi, new_vector.gsi); assert!(!new_vector.enabled.load(Ordering::Acquire)); } } #[cfg(target_arch = "x86_64")] #[test] fn test_restore_state_resource_allocator() { use vm_allocator::AllocPolicy; let (_, mut vm) = setup_vm_with_memory(0x1000); vm.setup_irqchip().unwrap(); // Allocate a GSI and some memory and make sure they are still allocated after restore let (gsi, range) = { let mut resource_allocator = vm.resource_allocator(); let gsi = resource_allocator.allocate_gsi_msi(1).unwrap()[0]; let range = resource_allocator .allocate_32bit_mmio_memory(1024, 1024, AllocPolicy::FirstMatch) .unwrap(); (gsi, range) }; let state = vm.save_state().unwrap(); let serialized_data = bitcode::serialize(&state).unwrap(); let restored_state: VmState = bitcode::deserialize(&serialized_data).unwrap(); vm.restore_state(&restored_state).unwrap(); let mut resource_allocator = vm.resource_allocator(); let gsi_new = resource_allocator.allocate_gsi_msi(1).unwrap()[0]; assert_eq!(gsi + 1, gsi_new); resource_allocator .allocate_32bit_mmio_memory(1024, 1024, AllocPolicy::ExactMatch(range)) .unwrap_err(); let range_new = resource_allocator .allocate_32bit_mmio_memory(1024, 1024, AllocPolicy::FirstMatch) .unwrap(); assert_eq!(range + 1024, range_new); } } ================================================ FILE: src/vmm/tests/devices.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow( clippy::cast_possible_truncation, clippy::tests_outside_test_module, clippy::undocumented_unsafe_blocks )] use std::os::raw::{c_int, c_void}; use std::os::unix::io::{AsRawFd, RawFd}; use std::sync::{Arc, Mutex}; use event_manager::{EventManager, SubscriberOps}; use libc::EFD_NONBLOCK; use vm_superio::Serial; use vmm::devices::legacy::serial::SerialOut; use vmm::devices::legacy::{EventFdTrigger, SerialEventsWrapper, SerialWrapper}; use vmm::vstate::bus::BusDevice; use vmm_sys_util::eventfd::EventFd; fn create_serial( pipe: c_int, ) -> Arc>>> { // Serial input is the reading end of the pipe. let serial_in = MockSerialInput(pipe); let kick_stdin_evt = EventFdTrigger::new(EventFd::new(libc::EFD_NONBLOCK).unwrap()); Arc::new(Mutex::new(SerialWrapper { serial: Serial::with_events( EventFdTrigger::new(EventFd::new(EFD_NONBLOCK).unwrap()), SerialEventsWrapper { buffer_ready_event_fd: Some(kick_stdin_evt.try_clone().unwrap()), }, SerialOut::Stdout(std::io::stdout()), ), input: Some(Box::new(serial_in)), })) } #[derive(Debug)] pub struct MockSerialInput(pub RawFd); impl std::io::Read for MockSerialInput { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let count = unsafe { libc::read(self.0, buf.as_mut_ptr().cast(), buf.len()) }; usize::try_from(count).map_err(|_| std::io::Error::last_os_error()) } } impl AsRawFd for MockSerialInput { fn as_raw_fd(&self) -> RawFd { self.0 } } #[test] fn test_issue_serial_hangup_anon_pipe_while_registered_stdin() { let mut fds: [c_int; 2] = [0; 2]; let rc = unsafe { libc::pipe(fds.as_mut_ptr()) }; assert!(rc == 0); // Serial input is the reading end of the pipe. let serial = create_serial(fds[0]); // Make reading fd non blocking to read just what is inflight. let flags = unsafe { libc::fcntl(fds[0], libc::F_GETFL, 0) }; let mut rc = unsafe { libc::fcntl(fds[0], libc::F_SETFL, flags | libc::O_NONBLOCK) }; assert!(rc == 0); const BYTES_COUNT: usize = 65; // Serial FIFO_SIZE + 1. let mut dummy_data = [1u8; BYTES_COUNT]; rc = unsafe { libc::write( fds[1], dummy_data.as_mut_ptr() as *const c_void, dummy_data.len(), ) as i32 }; assert!(dummy_data.len() == usize::try_from(rc).unwrap()); // Register the reading end of the pipe to the event manager, to be processed later on. let mut event_manager = EventManager::new().unwrap(); let _id = event_manager.add_subscriber(serial.clone()); // `EventSet::IN` was received on stdin. The event handling will consume // 64 bytes from stdin. The stdin monitoring is still armed. let mut ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 1); let mut data = [0u8; BYTES_COUNT]; // On the main thread, we will simulate guest "vCPU" thread serial reads. let data_bus_offset = 0; for i in 0..BYTES_COUNT - 1 { serial .lock() .unwrap() .read(0x0, data_bus_offset, &mut data[i..=i]); } assert!(data[..31] == dummy_data[..31]); assert!(data[32..64] == dummy_data[32..64]); // The avail capacity of the serial FIFO is 64. // Read the 65th from the stdin through the kick stdin event triggered by 64th of the serial // FIFO read, or by the armed level-triggered stdin monitoring. Either one of the events might // be handled first. The handling of the second event will find the stdin without any pending // bytes and will result in EWOULDBLOCK. Usually, EWOULDBLOCK will reregister the stdin, but // since it was not unregistered before, it will do a noop. ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 2); // The avail capacity of the serial FIFO is 63. rc = unsafe { libc::write( fds[1], dummy_data.as_mut_ptr() as *const c_void, dummy_data.len(), ) as i32 }; assert!(dummy_data.len() == usize::try_from(rc).unwrap()); // Writing to the other end of the pipe triggers handling a stdin event. // Now, 63 bytes will be read from stdin, filling up the buffer. ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 1); // Close the writing end (this sends an HANG_UP to the reading end). // While the stdin is registered, this event is caught by the event manager. rc = unsafe { libc::close(fds[1]) }; assert!(rc == 0); // This cycle of epoll has two important events. First, the received HANGUP and second // the fact that the FIFO is full, so even if the stdin reached EOF, there are still // pending bytes to be read. We still unregister the stdin and keep reading from it until // we get all pending bytes. ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 1); // Free up 64 bytes from the serial FIFO. for i in 0..BYTES_COUNT - 1 { serial .lock() .unwrap() .read(0x0, data_bus_offset, &mut data[i..=i]); } // Process the kick stdin event generated by the reading of the 64th byte of the serial FIFO. // This will consume some more bytes from the stdin while the stdin is unregistered. ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 1); // Two more bytes left. At the 2nd byte, another kick read stdin event is generated, // trying to fill again the serial FIFO with more bytes. for i in 0..2 { serial .lock() .unwrap() .read(0x0, data_bus_offset, &mut data[i..=i]); } // We try to read again, but we detect that stdin received previously EOF. // This can be deduced by reading from a non-blocking fd and getting 0 bytes as a result, // instead of EWOUDBLOCK. We unregister the stdin and the kick stdin read evt. ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 1); // Nothing can interrupt us. ev_count = event_manager.run_with_timeout(1).unwrap(); assert_eq!(ev_count, 0); } #[test] fn test_issue_hangup() { let mut fds: [c_int; 2] = [0; 2]; let rc = unsafe { libc::pipe(fds.as_mut_ptr()) }; assert!(rc == 0); // Serial input is the reading end of the pipe. let serial = create_serial(fds[0]); // Make reading fd non blocking to read just what is inflight. let flags = unsafe { libc::fcntl(fds[0], libc::F_GETFL, 0) }; let mut rc = unsafe { libc::fcntl(fds[0], libc::F_SETFL, flags | libc::O_NONBLOCK) }; assert!(rc == 0); // Close the writing end (this sends an HANG_UP to the reading end). // While the stdin is registered, this event is caught by the event manager. rc = unsafe { libc::close(fds[1]) }; assert!(rc == 0); // Register the reading end of the pipe to the event manager, to be processed later on. let mut event_manager = EventManager::new().unwrap(); let _id = event_manager.add_subscriber(serial); let mut ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 1); // Nothing can interrupt us. ev_count = event_manager.run_with_timeout(1).unwrap(); assert_eq!(ev_count, 0); } #[test] fn test_issue_serial_hangup_anon_pipe_while_unregistered_stdin() { let mut fds: [c_int; 2] = [0; 2]; let rc = unsafe { libc::pipe(fds.as_mut_ptr()) }; assert!(rc == 0); // Serial input is the reading end of the pipe. let serial = create_serial(fds[0]); // Make reading fd non blocking to read just what is inflight. let flags = unsafe { libc::fcntl(fds[0], libc::F_GETFL, 0) }; let mut rc = unsafe { libc::fcntl(fds[0], libc::F_SETFL, flags | libc::O_NONBLOCK) }; assert!(rc == 0); const BYTES_COUNT: usize = 65; // Serial FIFO_SIZE + 1. let mut dummy_data = [1u8; BYTES_COUNT]; rc = unsafe { libc::write( fds[1], dummy_data.as_mut_ptr() as *const c_void, dummy_data.len(), ) as i32 }; assert!(dummy_data.len() == usize::try_from(rc).unwrap()); // Register the reading end of the pipe to the event manager, to be processed later on. let mut event_manager = EventManager::new().unwrap(); let _id = event_manager.add_subscriber(serial.clone()); // `EventSet::IN` was received on stdin. The event handling will consume // 64 bytes from stdin. The stdin monitoring is still armed. let mut ev_count = event_manager.run_with_timeout(0).unwrap(); assert_eq!(ev_count, 1); let mut data = [0u8; BYTES_COUNT]; // On the main thread, we will simulate guest "vCPU" thread serial reads. let data_bus_offset = 0; for i in 0..BYTES_COUNT - 1 { serial .lock() .unwrap() .read(0x0, data_bus_offset, &mut data[i..=i]); } assert!(data[..31] == dummy_data[..31]); assert!(data[32..64] == dummy_data[32..64]); // The avail capacity of the serial FIFO is 64. // Read the 65th from the stdin through the kick stdin event triggered by 64th of the serial // FIFO read, or by the armed level-triggered stdin monitoring. Either one of the events might // be handled first. The handling of the second event will find the stdin without any pending // bytes and will result in EWOULDBLOCK. Usually, EWOULDBLOCK will reregister the stdin, but // since it was not unregistered before, it will do a noop. ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 2); // The avail capacity of the serial FIFO is 63. rc = unsafe { libc::write( fds[1], dummy_data.as_mut_ptr() as *const c_void, dummy_data.len(), ) as i32 }; assert!(dummy_data.len() == usize::try_from(rc).unwrap()); // Writing to the other end of the pipe triggers handling an stdin event. // Now, 63 bytes will be read from stdin, filling up the buffer. ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 1); // Serial FIFO is full, so silence the stdin. We do not need any other interruptions // until the serial FIFO is freed. ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 1); // Close the writing end (this sends an HANG_UP to the reading end). // While the stdin is unregistered, this event is not caught by the event manager. rc = unsafe { libc::close(fds[1]) }; assert!(rc == 0); // This would be a blocking epoll_wait, since the buffer is full and stdin is unregistered. // There is no event that can break the epoll wait loop. ev_count = event_manager.run_with_timeout(0).unwrap(); assert_eq!(ev_count, 0); // Free up 64 bytes from the serial FIFO. for i in 0..BYTES_COUNT - 1 { serial .lock() .unwrap() .read(0x0, data_bus_offset, &mut data[i..=i]); } // Process the kick stdin event generated by the reading of the 64th byte of the serial FIFO. // This will consume some more bytes from the stdin. Keep in mind that the HANGUP event was // lost and we do not know that the stdin reached EOF. ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 1); // Two more bytes left. At the 2nd byte, another kick read stdin event is generated, // trying to fill again the serial FIFO with more bytes. Keep in mind that the HANGUP event was // lost and we do not know that the stdin reached EOF. for i in 0..2 { serial .lock() .unwrap() .read(0x0, data_bus_offset, &mut data[i..=i]); } // We try to read again, but we detect that stdin received previously EOF. // This can be deduced by reading from a non-blocking fd and getting 0 bytes as a result, // instead of EWOUDBLOCK. We unregister the stdin and the kick stdin read evt. ev_count = event_manager.run().unwrap(); assert_eq!(ev_count, 1); // Nothing can interrupt us. ev_count = event_manager.run_with_timeout(0).unwrap(); assert_eq!(ev_count, 0); } ================================================ FILE: src/vmm/tests/integration_tests.rs ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow(clippy::cast_possible_truncation, clippy::tests_outside_test_module)] use std::io::{Seek, SeekFrom}; use std::sync::{Arc, Mutex}; use std::thread; use std::time::Duration; use vmm::builder::build_and_boot_microvm; use vmm::devices::virtio::block::CacheType; use vmm::persist::{MicrovmState, MicrovmStateError, VmInfo, snapshot_state_sanity_check}; use vmm::resources::VmResources; use vmm::rpc_interface::{ LoadSnapshotError, PrebootApiController, RuntimeApiController, VmmAction, VmmActionError, }; use vmm::seccomp::get_empty_filters; use vmm::snapshot::Snapshot; use vmm::test_utils::mock_resources::{MockVmResources, NOISY_KERNEL_IMAGE}; use vmm::test_utils::{create_vmm, default_vmm, default_vmm_no_boot}; use vmm::vmm_config::balloon::BalloonDeviceConfig; use vmm::vmm_config::boot_source::BootSourceConfig; use vmm::vmm_config::drive::BlockDeviceConfig; use vmm::vmm_config::instance_info::{InstanceInfo, VmState}; use vmm::vmm_config::machine_config::{MachineConfig, MachineConfigUpdate}; use vmm::vmm_config::net::NetworkInterfaceConfig; use vmm::vmm_config::snapshot::{ CreateSnapshotParams, LoadSnapshotParams, MemBackendConfig, MemBackendType, SnapshotType, }; use vmm::vmm_config::vsock::VsockDeviceConfig; use vmm::{DumpCpuConfigError, EventManager, FcExitCode, Vmm}; use vmm_sys_util::tempfile::TempFile; #[allow(unused_mut, unused_variables)] fn check_booted_microvm(vmm: Arc>, mut evmgr: EventManager) { // On x86_64, the vmm should exit once its workload completes and signals the exit event. // On aarch64, the test kernel doesn't exit, so the vmm is force-stopped. #[cfg(target_arch = "x86_64")] evmgr.run_with_timeout(500).unwrap(); #[cfg(target_arch = "aarch64")] vmm.lock().unwrap().stop(FcExitCode::Ok); assert_eq!( vmm.lock().unwrap().shutdown_exit_code(), Some(FcExitCode::Ok) ); } #[test] fn test_build_and_boot_microvm() { // Error case: no boot source configured. { let resources: VmResources = MockVmResources::new().into(); let mut event_manager = EventManager::new().unwrap(); let empty_seccomp_filters = get_empty_filters(); let vmm_ret = build_and_boot_microvm( &InstanceInfo::default(), &resources, &mut event_manager, &empty_seccomp_filters, ); assert_eq!(format!("{:?}", vmm_ret.err()), "Some(MissingKernelConfig)"); } for pci_enabled in [false, true] { for memory_hotplug in [false, true] { let (vmm, evmgr) = create_vmm(None, false, true, pci_enabled, memory_hotplug); check_booted_microvm(vmm, evmgr); } } } #[allow(unused_mut, unused_variables)] fn check_build_microvm(vmm: Arc>, mut evmgr: EventManager) { // The built microVM should be in the `VmState::Paused` state here. assert_eq!(vmm.lock().unwrap().instance_info().state, VmState::Paused); // The microVM should be able to resume and exit successfully. // On x86_64, the vmm should exit once its workload completes and signals the exit event. // On aarch64, the test kernel doesn't exit, so the vmm is force-stopped. vmm.lock().unwrap().resume_vm().unwrap(); #[cfg(target_arch = "x86_64")] evmgr.run_with_timeout(500).unwrap(); #[cfg(target_arch = "aarch64")] vmm.lock().unwrap().stop(FcExitCode::Ok); assert_eq!( vmm.lock().unwrap().shutdown_exit_code(), Some(FcExitCode::Ok) ); } #[test] fn test_build_microvm() { for pci_enabled in [false, true] { for memory_hotplug in [false, true] { let (vmm, evmgr) = create_vmm(None, false, false, pci_enabled, memory_hotplug); check_build_microvm(vmm, evmgr); } } } fn pause_resume_microvm(vmm: Arc>) { let mut api_controller = RuntimeApiController::new(vmm.clone()); // There's a race between this thread and the vcpu thread, but this thread // should be able to pause vcpu thread before it finishes running its test-binary. api_controller.handle_request(VmmAction::Pause).unwrap(); // Pausing again the microVM should not fail (microVM remains in the // `Paused` state). api_controller.handle_request(VmmAction::Pause).unwrap(); api_controller.handle_request(VmmAction::Resume).unwrap(); vmm.lock().unwrap().stop(FcExitCode::Ok); } #[test] fn test_pause_resume_microvm() { for pci_enabled in [false, true] { for memory_hotplug in [false, true] { // Tests that pausing and resuming a microVM work as expected. let (vmm, _) = create_vmm(None, false, true, pci_enabled, memory_hotplug); pause_resume_microvm(vmm); } } } #[test] #[cfg(target_arch = "x86_64")] fn test_dirty_bitmap_success() { let vmms = [ vmm::test_utils::dirty_tracking_vmm(Some(NOISY_KERNEL_IMAGE)), default_vmm(Some(NOISY_KERNEL_IMAGE)), ]; for (vmm, _) in vmms { // Let it churn for a while and dirty some pages... thread::sleep(Duration::from_millis(100)); let bitmap = vmm.lock().unwrap().vm.get_dirty_bitmap().unwrap(); let num_dirty_pages: u32 = bitmap .values() .map(|bitmap_per_region| { // Gently coerce to u32 let num_dirty_pages_per_region: u32 = bitmap_per_region.iter().map(|n| n.count_ones()).sum(); num_dirty_pages_per_region }) .sum(); assert!(num_dirty_pages > 0); vmm.lock().unwrap().stop(FcExitCode::Ok); } } #[test] fn test_disallow_snapshots_without_pausing() { let (vmm, _) = default_vmm(Some(NOISY_KERNEL_IMAGE)); let vm_info = VmInfo { mem_size_mib: 1u64, ..Default::default() }; // Verify saving state while running is not allowed. assert!(matches!( vmm.lock().unwrap().save_state(&vm_info), Err(MicrovmStateError::NotAllowed(_)) )); // Pause microVM. vmm.lock().unwrap().pause_vm().unwrap(); // It is now allowed. vmm.lock().unwrap().save_state(&vm_info).unwrap(); // Stop. vmm.lock().unwrap().stop(FcExitCode::Ok); } #[test] fn test_disallow_dump_cpu_config_without_pausing() { let (vmm, _) = default_vmm_no_boot(Some(NOISY_KERNEL_IMAGE)); // This call should succeed since the microVM is in the paused state before boot. vmm.lock().unwrap().dump_cpu_config().unwrap(); // Boot the microVM. vmm.lock().unwrap().resume_vm().unwrap(); // Verify this call is not allowed while running. assert!(matches!( vmm.lock().unwrap().dump_cpu_config(), Err(DumpCpuConfigError::NotAllowed(_)) )); // Stop the microVM. vmm.lock().unwrap().stop(FcExitCode::Ok); } fn verify_create_snapshot( is_diff: bool, pci_enabled: bool, memory_hotplug: bool, ) -> (TempFile, TempFile) { let snapshot_file = TempFile::new().unwrap(); let memory_file = TempFile::new().unwrap(); let (vmm, _) = create_vmm( Some(NOISY_KERNEL_IMAGE), is_diff, true, pci_enabled, memory_hotplug, ); let vm_info = VmInfo::from(&*vmm.lock().unwrap()); let mut controller = RuntimeApiController::new(vmm.clone()); // Be sure that the microVM is running. thread::sleep(Duration::from_millis(200)); // Pause microVM. controller.handle_request(VmmAction::Pause).unwrap(); // Create snapshot. let snapshot_type = match is_diff { true => SnapshotType::Diff, false => SnapshotType::Full, }; let snapshot_params = CreateSnapshotParams { snapshot_type, snapshot_path: snapshot_file.as_path().to_path_buf(), mem_file_path: memory_file.as_path().to_path_buf(), }; controller .handle_request(VmmAction::CreateSnapshot(snapshot_params)) .unwrap(); vmm.lock().unwrap().stop(FcExitCode::Ok); // Check that we can deserialize the microVM state from `snapshot_file`. let restored_microvm_state: MicrovmState = Snapshot::load(&mut snapshot_file.as_file()).unwrap().data; assert_eq!(restored_microvm_state.vm_info, vm_info); // Verify deserialized data. // The default vmm has no devices and one vCPU. assert_eq!( restored_microvm_state .device_states .mmio_state .block_devices .len(), 0 ); assert_eq!( restored_microvm_state .device_states .mmio_state .net_devices .len(), 0 ); assert!( restored_microvm_state .device_states .mmio_state .vsock_device .is_none() ); assert_eq!(restored_microvm_state.vcpu_states.len(), 1); (snapshot_file, memory_file) } fn verify_load_snapshot(snapshot_file: TempFile, memory_file: TempFile) { let mut event_manager = EventManager::new().unwrap(); let empty_seccomp_filters = get_empty_filters(); let mut vm_resources = VmResources::default(); let mut preboot_api_controller = PrebootApiController::new( &empty_seccomp_filters, InstanceInfo::default(), &mut vm_resources, &mut event_manager, ); preboot_api_controller .handle_preboot_request(VmmAction::LoadSnapshot(LoadSnapshotParams { snapshot_path: snapshot_file.as_path().to_path_buf(), mem_backend: MemBackendConfig { backend_path: memory_file.as_path().to_path_buf(), backend_type: MemBackendType::File, }, track_dirty_pages: false, resume_vm: true, network_overrides: vec![], vsock_override: None, })) .unwrap(); let vmm = preboot_api_controller.built_vmm.take().unwrap(); assert_eq!(vmm.lock().unwrap().instance_info.state, VmState::Running); vmm.lock().unwrap().stop(FcExitCode::Ok); } #[test] fn test_create_and_load_snapshot() { for diff_snap in [false, true] { for pci_enabled in [false, true] { for memory_hotplug in [false, true] { // Create snapshot. let (snapshot_file, memory_file) = verify_create_snapshot(diff_snap, pci_enabled, memory_hotplug); // Create a new microVm from snapshot. This only tests code-level logic; it verifies // that a microVM can be built with no errors from given snapshot. // It does _not_ verify that the guest is actually restored properly. We're using // python integration tests for that. verify_load_snapshot(snapshot_file, memory_file); } } } } #[test] fn test_snapshot_load_sanity_checks() { let microvm_state = get_microvm_state_from_snapshot(false); check_snapshot(microvm_state); let microvm_state = get_microvm_state_from_snapshot(true); check_snapshot(microvm_state); } fn check_snapshot(mut microvm_state: MicrovmState) { use vmm::persist::SnapShotStateSanityCheckError; snapshot_state_sanity_check(µvm_state).unwrap(); // Remove memory regions. microvm_state.vm_state.memory.regions.clear(); // Validate sanity checks fail because there is no mem region in state. assert_eq!( snapshot_state_sanity_check(µvm_state), Err(SnapShotStateSanityCheckError::NoMemory) ); } fn get_microvm_state_from_snapshot(pci_enabled: bool) -> MicrovmState { // Create a diff snapshot let (snapshot_file, _) = verify_create_snapshot(true, pci_enabled, false); // Deserialize the microVM state. snapshot_file.as_file().seek(SeekFrom::Start(0)).unwrap(); Snapshot::load(&mut snapshot_file.as_file()).unwrap().data } fn verify_load_snap_disallowed_after_boot_resources(res: VmmAction, res_name: &str) { let (snapshot_file, memory_file) = verify_create_snapshot(false, false, false); let mut event_manager = EventManager::new().unwrap(); let empty_seccomp_filters = get_empty_filters(); let mut vm_resources = VmResources::default(); let mut preboot_api_controller = PrebootApiController::new( &empty_seccomp_filters, InstanceInfo::default(), &mut vm_resources, &mut event_manager, ); preboot_api_controller.handle_preboot_request(res).unwrap(); // Load snapshot should no longer be allowed. let req = VmmAction::LoadSnapshot(LoadSnapshotParams { snapshot_path: snapshot_file.as_path().to_path_buf(), mem_backend: MemBackendConfig { backend_path: memory_file.as_path().to_path_buf(), backend_type: MemBackendType::File, }, track_dirty_pages: false, resume_vm: false, network_overrides: vec![], vsock_override: None, }); let err = preboot_api_controller.handle_preboot_request(req); assert!( matches!( err.unwrap_err(), VmmActionError::LoadSnapshot(LoadSnapshotError::LoadSnapshotNotAllowed) ), "LoadSnapshot should be disallowed after {}", res_name ); } #[test] fn test_preboot_load_snap_disallowed_after_boot_resources() { let tmp_file = TempFile::new().unwrap(); let tmp_file = tmp_file.as_path().to_str().unwrap().to_string(); // Verify LoadSnapshot not allowed after configuring various boot-specific resources. let req = VmmAction::ConfigureBootSource(BootSourceConfig { kernel_image_path: tmp_file.clone(), ..Default::default() }); verify_load_snap_disallowed_after_boot_resources(req, "ConfigureBootSource"); let config = BlockDeviceConfig { drive_id: String::new(), partuuid: None, is_root_device: false, cache_type: CacheType::Unsafe, is_read_only: Some(false), path_on_host: Some(tmp_file), rate_limiter: None, file_engine_type: None, socket: None, }; let req = VmmAction::InsertBlockDevice(config); verify_load_snap_disallowed_after_boot_resources(req, "InsertBlockDevice"); let req = VmmAction::InsertNetworkDevice(NetworkInterfaceConfig { iface_id: String::new(), host_dev_name: String::new(), guest_mac: None, rx_rate_limiter: None, tx_rate_limiter: None, }); verify_load_snap_disallowed_after_boot_resources(req, "InsertNetworkDevice"); let req = VmmAction::SetBalloonDevice(BalloonDeviceConfig::default()); verify_load_snap_disallowed_after_boot_resources(req, "SetBalloonDevice"); let req = VmmAction::SetVsockDevice(VsockDeviceConfig { vsock_id: Some(String::new()), guest_cid: 0, uds_path: String::new(), }); verify_load_snap_disallowed_after_boot_resources(req, "SetVsockDevice"); let req = VmmAction::UpdateMachineConfiguration(MachineConfigUpdate::from(MachineConfig::default())); verify_load_snap_disallowed_after_boot_resources(req, "SetVmConfiguration"); } ================================================ FILE: src/vmm/tests/io_uring.rs ================================================ // Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #![allow(clippy::cast_possible_truncation, clippy::tests_outside_test_module)] use std::os::unix::fs::FileExt; use std::os::unix::io::AsRawFd; use std::thread; use std::time::Duration; use vm_memory::VolatileMemory; use vmm::vstate::memory::{Bytes, MmapRegion}; use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet}; use vmm_sys_util::eventfd::EventFd; use vmm_sys_util::tempfile::TempFile; mod test_utils { use vm_memory::VolatileMemory; use vmm::io_uring::operation::{OpCode, Operation}; use vmm::io_uring::{IoUring, IoUringError, SQueueError}; use vmm::vstate::memory::MmapRegion; fn drain_cqueue(ring: &mut IoUring) { while let Some(entry) = ring.pop().unwrap() { entry.result().unwrap(); } } pub fn drive_submission_and_completion( ring: &mut IoUring, mem_region: &MmapRegion, opcode: OpCode, num_bytes: usize, ) { for i in 0..num_bytes { loop { let operation = match opcode { OpCode::Read => Operation::read( 0, mem_region .as_volatile_slice() .subslice(i, 1) .unwrap() .ptr_guard_mut() .as_ptr() as usize, 1, i as u64, i, ), OpCode::Write => Operation::write( 0, mem_region .as_volatile_slice() .subslice(i, 1) .unwrap() .ptr_guard_mut() .as_ptr() as usize, 1, i as u64, i, ), _ => panic!("Only supports read and write."), }; match ring.push(operation) { Ok(()) => break, Err((IoUringError::SQueue(SQueueError::FullQueue), _)) => { // Stop and wait. ring.submit_and_wait_all().unwrap(); drain_cqueue(ring); // Retry this OP } Err(_) => panic!("Unexpected error."), } } } ring.submit_and_wait_all().unwrap(); drain_cqueue(ring); assert_eq!(ring.pending_sqes().unwrap(), 0); } } use vmm::io_uring::operation::{OpCode, Operation}; use vmm::io_uring::restriction::Restriction; use vmm::io_uring::{IoUring, IoUringError, SQueueError}; use crate::test_utils::drive_submission_and_completion; const NUM_ENTRIES: u32 = 128; #[test] fn test_ring_new() { // Invalid entries count: 0. assert!(matches!( IoUring::::new(0, vec![], vec![], None), Err(IoUringError::Setup(err)) if err.kind() == std::io::ErrorKind::InvalidInput )); // Try to register too many files. let dummy_file = TempFile::new().unwrap().into_file(); assert!(matches!( IoUring::::new(10, vec![&dummy_file; 40000usize], vec![], None), // Max is 32768. Err(IoUringError::RegisterFileLimitExceeded) )); } #[test] fn test_eventfd() { // Test that events get delivered. let eventfd = EventFd::new(0).unwrap(); let file = TempFile::new().unwrap().into_file(); let mut ring = IoUring::new(NUM_ENTRIES, vec![&file], vec![], Some(eventfd.as_raw_fd())).unwrap(); let user_data: u8 = 71; let buf = [0; 4]; let epoll = Epoll::new().unwrap(); let mut ready_event = EpollEvent::default(); epoll .ctl( ControlOperation::Add, eventfd.as_raw_fd(), EpollEvent::new(EventSet::IN, 0), ) .unwrap(); ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, user_data)) .unwrap(); ring.submit().unwrap(); assert_eq!( epoll .wait(500, std::slice::from_mut(&mut ready_event)) .unwrap(), 1 ); assert_eq!(ready_event.event_set(), EventSet::IN); } #[test] fn test_restrictions() { // Check that only the allowlisted opcodes are permitted. { let file = TempFile::new().unwrap().into_file(); let mut ring = IoUring::new( NUM_ENTRIES, vec![&file], vec![ Restriction::RequireFixedFds, Restriction::AllowOpCode(OpCode::Read), ], None, ) .unwrap(); let buf = [0; 4]; // Read operations are allowed. ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, 71)) .unwrap(); assert_eq!(ring.submit_and_wait_all().unwrap(), 1); ring.pop().unwrap().unwrap().result().unwrap(); // Other operations are not allowed. ring.push(Operation::write(0, buf.as_ptr() as usize, 4, 0, 71)) .unwrap(); assert_eq!(ring.submit_and_wait_all().unwrap(), 1); ring.pop().unwrap().unwrap().result().unwrap_err(); } } #[test] fn test_ring_push() { // Forgot to register file. { let buf = [0; 4]; let mut ring = IoUring::new(NUM_ENTRIES, vec![], vec![], None).unwrap(); assert!(matches!( ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, 71)), Err((IoUringError::NoRegisteredFds, 71)) )); assert_eq!(ring.pending_sqes().unwrap(), 0); } // Now register file. { let file = TempFile::new().unwrap().into_file(); let mut ring = IoUring::new(NUM_ENTRIES, vec![&file], vec![], None).unwrap(); let user_data: u8 = 71; let buf = [0; 4]; // Invalid fd. assert!(matches!( ring.push(Operation::read(1, buf.as_ptr() as usize, 4, 0, user_data)), Err((IoUringError::InvalidFixedFd(1), 71)) )); assert_eq!(ring.pending_sqes().unwrap(), 0); assert_eq!(ring.num_ops(), 0); // Valid fd. ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, user_data)) .unwrap(); assert_eq!(ring.pending_sqes().unwrap(), 1); assert_eq!(ring.num_ops(), 1); // Full Queue. for _ in 1..(NUM_ENTRIES) { ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, user_data)) .unwrap(); } assert_eq!(ring.pending_sqes().unwrap(), NUM_ENTRIES); assert_eq!(ring.num_ops(), NUM_ENTRIES); assert!(matches!( ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, user_data)), Err((IoUringError::SQueue(SQueueError::FullQueue), 71)) )); assert_eq!(ring.pending_sqes().unwrap(), NUM_ENTRIES); assert_eq!(ring.num_ops(), NUM_ENTRIES); // We didn't get to submit so pop() should return None. assert!(ring.pop().unwrap().is_none()); assert_eq!(ring.num_ops(), NUM_ENTRIES); // Full Ring. ring.submit().unwrap(); // Wait for the io_uring ops to reach the CQ thread::sleep(Duration::from_millis(150)); for _ in 0..NUM_ENTRIES { ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, user_data)) .unwrap(); } ring.submit().unwrap(); // Wait for the io_uring ops to reach the CQ thread::sleep(Duration::from_millis(150)); assert_eq!(ring.num_ops(), NUM_ENTRIES * 2); // The CQ should be full now assert!(matches!( ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, user_data)), Err((IoUringError::FullCQueue, 71)) )); // Check if there are NUM_ENTRIES * 2 cqes let mut num_cqes = 0; while let Ok(Some(_entry)) = ring.pop() { num_cqes += 1; } assert_eq!(num_cqes, NUM_ENTRIES * 2); assert_eq!(ring.num_ops(), 0); } } #[test] fn test_ring_submit() { { let file = TempFile::new().unwrap().into_file(); let mut ring = IoUring::new(NUM_ENTRIES, vec![&file], vec![], None).unwrap(); let user_data: u8 = 71; let buf = [0; 4]; // Return 0 if we didn't push any sqes. assert_eq!(ring.submit().unwrap(), 0); assert_eq!(ring.num_ops(), 0); // Now push an sqe. ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, user_data)) .unwrap(); assert_eq!(ring.num_ops(), 1); assert_eq!(ring.submit().unwrap(), 1); // Now push & submit some more. ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, user_data)) .unwrap(); ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, user_data)) .unwrap(); assert_eq!(ring.num_ops(), 3); assert_eq!(ring.submit().unwrap(), 2); } } #[test] fn test_submit_and_wait_all() { let file = TempFile::new().unwrap().into_file(); let mut ring = IoUring::new(NUM_ENTRIES, vec![&file], vec![], None).unwrap(); let user_data: u8 = 71; let buf = [0; 4]; // Return 0 if we didn't push any sqes. assert_eq!(ring.submit_and_wait_all().unwrap(), 0); // Now push an sqe. ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, user_data)) .unwrap(); assert_eq!(ring.pending_sqes().unwrap(), 1); assert_eq!(ring.num_ops(), 1); // A correct waiting period yields the completed entries. assert_eq!(ring.submit_and_wait_all().unwrap(), 1); assert_eq!(ring.pop().unwrap().unwrap().user_data(), user_data); assert_eq!(ring.pending_sqes().unwrap(), 0); assert_eq!(ring.num_ops(), 0); // Now push, submit & wait for some more entries. ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, 72)) .unwrap(); ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, 73)) .unwrap(); ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, 74)) .unwrap(); ring.push(Operation::read(0, buf.as_ptr() as usize, 4, 0, 75)) .unwrap(); assert_eq!(ring.pending_sqes().unwrap(), 4); assert_eq!(ring.num_ops(), 4); assert_eq!(ring.submit_and_wait_all().unwrap(), 4); assert_eq!(ring.pending_sqes().unwrap(), 0); assert_eq!(ring.num_ops(), 4); assert!(ring.pop().unwrap().is_some()); assert!(ring.pop().unwrap().is_some()); assert!(ring.pop().unwrap().is_some()); assert!(ring.pop().unwrap().is_some()); assert!(ring.pop().unwrap().is_none()); assert_eq!(ring.num_ops(), 0); } #[test] fn test_write() { // Test that writing the sorted values 1-100 into a file works correctly. const NUM_BYTES: usize = 100; // Setup. let file = TempFile::new().unwrap().into_file(); let mut ring = IoUring::new(NUM_ENTRIES, vec![&file], vec![], None).unwrap(); // Create & init a memory mapping for storing the write buffers. let mem_region: MmapRegion = MmapRegion::build( None, NUM_BYTES, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, ) .unwrap(); let expected_result: Vec = (0..(NUM_BYTES as u8)).collect(); for i in 0..NUM_BYTES { mem_region .as_volatile_slice() .write_obj(i as u8, i) .unwrap(); } // Init the file with all zeros. file.write_all_at(&[0; NUM_BYTES], 0).unwrap(); // Perform the IO. drive_submission_and_completion(&mut ring, &mem_region, OpCode::Write, NUM_BYTES); // Verify the result. let mut buf = [0u8; NUM_BYTES]; file.read_exact_at(&mut buf, 0).unwrap(); assert_eq!(buf, &expected_result[..]); } #[test] fn test_read() { // Test that reading the sorted values 1-100 from a file works correctly. const NUM_BYTES: usize = 100; // Setup. let file = TempFile::new().unwrap().into_file(); let mut ring = IoUring::new(NUM_ENTRIES, vec![&file], vec![], None).unwrap(); // Create & init a memory mapping for storing the read buffers. let mem_region: MmapRegion = MmapRegion::build( None, NUM_BYTES, libc::PROT_READ | libc::PROT_WRITE, libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, ) .unwrap(); // Init the file with 1-100. let init_contents: Vec = (0..(NUM_BYTES as u8)).collect(); file.write_all_at(&init_contents, 0).unwrap(); // Perform the IO. drive_submission_and_completion(&mut ring, &mem_region, OpCode::Read, NUM_BYTES); let mut buf = [0; NUM_BYTES]; mem_region .as_volatile_slice() .read_slice(&mut buf, 0) .unwrap(); // Verify the result. assert_eq!(buf, &init_contents[..]); } ================================================ FILE: tests/README.md ================================================ # Firecracker Integration Tests The tests herein are meant to uphold the security, quality, and performance contracts of Firecracker. ## Running The testing system is built around [pytest](https://docs.pytest.org/en/latest/). Our `tools/devtool` script is a convenience wrapper which automatically downloads necessary test artifacts from S3, before invoking pytest inside a docker container. For detailed help on usage, see `tools/devtool help`. To run all available tests that would also run as part of our PR CI (e.g. excluding tests marked with `pytest.mark.nonci`): ```sh tools/devtool -y test ``` To run only tests from specific directories and/or files: ```sh tools/devtool -y test -- integration_tests/performance/test_boottime.py ``` To run a single specific test from a file: ```sh tools/devtool -y test -- integration_tests/performance/test_boottime.py::test_boottime ``` Note that all paths should be specified relative to the `tests` directory, _not_ the repository root. Alternatively, pytest provides the option to run all tests where the test name contains some substring via the `-k` option: ```sh tools/devtool -y test -- -k 1024 integration_tests/performance/test_boottime.py::test_boottime ``` This is particularly useful for specifying parameters of test functions. For example, the above command will run all boottime tests with a microVM size of 1024MB. If you are not interested in the capabilities of `devtool`, use pytest directly, either from inside the container: ```sh tools/devtool -y shell -p pytest [...] ``` or natively on your dev box: ```sh python3 -m pytest [...] ``` ### Output Output, including testrun results, goes to `stdout`. Errors go to `stderr`. By default, stdout and stderr are captured while tests are running and are printed in the final failure report only if they fail. To print them while running regardless of success or failure, pass the `-s` flag, e.g. `tools/devtool -y test -- -s`. ### Dependencies - A bare-metal `Linux` host with `uname -r` >= 5.10 and KVM enabled (`/dev/kvm` device node exists) - Docker - `awscli` version 2 ## Rust Integration Tests The `pytest`-powered integration tests rely on Firecracker's HTTP API for configuring and communicating with the VMM. Alongside these, the `vmm` crate also includes several [native-Rust integration tests](../src/vmm/tests/), which exercise its programmatic API without the HTTP integration. `Cargo` automatically picks up these tests when `cargo test` is issued. They also count towards code coverage. To run *only* the Rust integration tests: ```bash cargo test --test integration_tests --all ``` Unlike unit tests, Rust integration tests are each run in a separate process. `cargo` also packages them in a new crate. This has several known side effects: 1. Only the `pub` functions can be called. This is fine, as it allows the VMM to be consumed as a programmatic user would. If any function is necessary but not `pub`, please consider carefully whether it conceptually *needs* to be in the public interface before making it so. 1. The correct functioning scenario of the `vmm` implies that it `exit`s with code `0`. This is necessary for proper resource cleanup. However, `cargo` doesn't expect the test process to initiate its own demise, therefore it will not be able to properly collect test output. Example: ```bash cargo test --test integration_tests running 3 tests test test_setup_serial_device ... ok ``` To learn more about Rust integration test, see [the Rust book](https://doc.rust-lang.org/book/ch11-03-test-organization.html#integration-tests). ## A/B-Tests A/B-Testing is a testing strategy where some test function is executed twice in different environments (the _A_ and _B_ environments), and the overall test result depends on a comparison of these outputs of the test function in these two environments. The advantage of A/B-testing is that it does not require the specification of a ground truth to compare against. It is instead dynamically generated by running the test function in environment _A_. Firecracker's A/B-testing generally compares Firecracker binaries compiled from two separate commits (e.g. an _A_ binary which is compiled from the HEAD of the main branch, and a _B_ binary which is compiled from the HEAD of a pull request opened against main). We use this testing approach if a test's ground truth... - ...can change due to influence external to the code base (e.g. a security test that fails if a CVE is published for one of our dependencies), or - ...is too complex/changes too often to reasonably be contained in the code base (e.g. extensive performance benchmark results). For examples of how to utilize A/B-testing inside an integration test, have a look at our [A/B-Testing module](framework/ab_test.py) or our [`cargo audit` test](integration_tests/security/test_sec_audit.py). If such an A/B-Test is executed outside of the context of a PR (meaning there is no canonical choice of _A_ and _B_ to be made), it will simply try to assert the state of the environment in which it was executed (e.g. the `cargo audit` test above when run on a PR will fail iff a newly added dependency has a known open RustSec advisory. If run outside a PR, it will fail if any existing dependency has an open RustSec advisory). ### Functional A/B-Tests Firecracker has some functional A/B-tests (for example, in `test_vulnerabilities.py`), which generally compare the state of the pull request target branch (e.g. `main`), with the PR head. However, when running these locally, pytest does not know anything about potential PRs that the commit the tests are being run on are contained in, and as such cannot do this A/B-Test. To run functional A/B-Tests locally, you need to create a "fake" PR environment by setting the `BUILDKITE_PULL_REQUEST` and `BUILDKITE_PULL_REQUEST_BASE_BRANCH` environment variables: ``` BUILDKITE_PULL_REQUEST=true BUILDKITE_PULL_REQUEST_BASE_BRANCH=main ./tools/devtool test -- integration_tests/security/test_vulnerabilities.py ``` ### Performance A/B-Tests Firecracker has a special framework for orchestrating long-running A/B-tests which run outside the pre-PR CI. Instead, these tests are scheduled to run post-merge. Specific tests, such as our [snapshot restore latency tests](integration_tests/performance/test_snapshot.py) contain no assertions themselves, but rather they emit data series using the `aws_embedded_metrics` library. When executed by the [`tools/ab_test.py`](../tools/ab_test.py) orchestration script, these data series are collected. The orchestration script executes each test twice with different Firecracker binaries, and then matches up corresponding data series from the _A_ and _B_ run. For each data series, it performs a non-parametric test. For each data series where the difference between the _A_ and _B_ run is considered statically significant, it will print out the associated metric. Please see `tools/ab_test.py --help` for information on how to configure what the script considers significant. Writing your own A/B-Test is easy: Simply write a test that outputs a data series and has no functional assertions. Then, when this test is run under the A/B-Test orchestrator, all data series emitted will be picked up automatically for statistical analysis. To add a new A/B-Test to our post-PR test suite, add the corresponding test function to [`.buildkite/pipeline_perf.py`](../.buildkite/pipeline_perf.py). To manually run an A/B-Test, use ```sh tools/devtool -y test --ab [optional arguments to ab_test.py] run --binaries-a

--binaries-b [optional --artifacts-a --artifacts-b ] --pytest-opts ``` - _dir A_ and _dir B_ are directories containing firecracker and jailer binaries whose performance characteristics you wish to compare - _name A_ and _name B_ are names of artifacts A and B run will to use respectively You can use `./tools/devtool build --rev --release` to compile binaries from an arbitrary git object (commit SHAs, branches, tags etc.). This will create sub-directories in `build` containing the binaries. For example, to compare boottime of microVMs between Firecracker binaries compiled from the `main` branch and the `HEAD` of your current branch, run ```sh tools/devtool -y build --rev main --release tools/devtool -y build --rev HEAD --release tools/devtool -y test --no-build --ab -- run build/main build/HEAD --pytest-opts integration_tests/performance/test_boottime.py::test_boottime ``` To download custom artifacts use `./tools/devtool download_ci_artifacts ...`. This will place artifacts into `build/artifacts` directory. #### How to Write an A/B-Compatible Test and Common Pitfalls First, **A/B-Compatible tests need to emit more than one data point for each metric for which they wish to support A/B-testing**. This is because non-parametric tests operate on data series instead of individual data points. When emitting metrics with `aws_embedded_metrics`, each metric (data series) is associated with a set of dimensions. The `tools/ab_test.py` script uses these dimension to match up data series between two test runs. It only matches up two data series with the same name if their dimensions match. Special care needs to be taken when pytest expands the argument passed to `tools/ab_test.py`'s `--pytest-opts` option into multiple individual test cases. If two test cases use the same dimensions for different data series, the script will fail and print out the names of the violating data series. For this reason, **A/B-Compatible tests should include a `performance_test` key in their dimension set whose value is set to the name of the test**. In addition to the above, care should be taken that the dimensions of the data series emitted by some test case are unique to that test case. For example, if we have a boottime test parameterized by number of vcpus, but the emitted boottime data series' dimension set is just `{"performance_test": "test_boottime"}`, then `tools/ab_test.py` will not be able to tell apart data series belonging to different microVM sizes, and instead combine them (which is probably not desired). For this reason **A/B-Compatible tests should always include all pytest parameters in their dimension set.** Lastly, performance A/B-Testing through `tools/ab_test.py` can only detect performance differences that are present in the Firecracker binary. The `tools/ab_test.py` script only checks out the revisions it is passed to execute `cargo build` to generate a Firecracker binary. It does not run integration tests in the context of the checked out revision. In particular, both the _A_ and the _B_ run will be triggered from within the same docker container, and using the same revision of the integration test code. This means it is not possible to use orchestrated A/B-Testing to assess the impact of, say, changing only python code (such as enabling logging). Only Rust code can be A/B-Tested. The exception to this are toolchain differences. If both specified revisions have `rust-toolchain.toml` files, then `tools/ab_test.py` will compile using the toolchain specified by the revision, instead of the toolchain installed in the docker container from which the script is executed. ### A/B-Testing in Buildkite We run automated A/B-Tests on every pull request after merge, if the pull request touches any rust code. The pipeline is generated by the [`pipeline_perf.py`](../.buildkite/pipeline_perf.py) script. To manually schedule an A/B-Test in buildkite, the `REVISION_A` and `REVISION_B` environment variables need to be set in the "Environment Variables" field under "Options" in buildkite's "New Build" modal. ### Beyond commit comparisons While our automated A/B-Testing suite only supports A/B-Tests across commit ranges, you can also use the scripts to manually run A/B-comparisons for arbitrary environment (such as comparison how the same Firecracker binary behaves on different hosts). For this, run the desired tests in your environments using `devtool` as you would for a non-A/B test. This will produce `test_results` directories which will contain `metrics.json` files for each run test. The `tools/ab_test.py` script can find and use these `metrics.json` files in the provided directories to compare runs: ```sh tools/ab_test.py analyze ``` This will then print the same analysis described in the previous sections. #### Visualization To create visualization of A/B runs use `tools/ab_plot.py` script. It supports creating `pdf` and `table` outputs using same `metrics.json` files used by `tools/ab_test.py`. Example usage: ```sh ./tools/ab_plot.py --output_type pdf ``` Alternatively using `devtool` running the script in the dev container with pre-installed dependencies. ```sh ./tools/devtool sh ./tools/ab_plot.py --output_type pdf ``` > [!NOTE] Generating `pdf` output may take some time for tests with a lot of > permutations. #### Troubleshooting If during `tools/ab_test.py analyze` you get an error like ```bash $ tools/ab_test.py analyze Traceback (most recent call last): File "/firecracker/tools/ab_test.py", line 412, in data_a = load_data_series(args.report_a) File "/firecracker/tools/ab_test.py", line 122, in load_data_series for line in test["teardown"]["stdout"].splitlines(): KeyError: 'stdout' ``` double check that the `AWS_EMF_ENVIRONMENT` and `AWS_EMF_NAMESPACE` environment variables are set to `local`. Particularly, when collecting data from buildkite pipelines generated from `.buildkite/pipeline_perf.py`, ensure you pass `--step-param env/AWS_EMF_NAMESPACE=local --step-param env/AWS_EMF_SERVICE_NAME=local`! ## Adding Python Tests Tests can be added in any (existing or new) sub-directory of `tests/`, in files named `test_*.py`. ### Fixtures By default, `pytest` makes all fixtures in [`conftest.py`](../tests/conftest.py) available to all test functions. You can also create `conftest.py` in sub-directories containing tests, or define fixtures directly in test files. See the [`pytest` documentation](https://docs.pytest.org/en/6.2.x/fixture.html) for details. Most integration tests use fixtures that abstract away the creation and teardown of Firecracker processes. The following fixtures spawn Firecracker processes that are pre-initialized with specific guest kernels and rootfs: - `uvm_plain_any` is parametrized by the guest kernels [supported](../docs/kernel-policy.md) by Firecracker and a read-only Ubuntu 24.04 squashfs as rootfs, - `uvm_plain` yields a Firecracker process pre-initialized with a 5.10 kernel and the same Ubuntu 24.04 squashfs. - `uvm_any` yields started microvms, parametrized by all supported kernels, all CPU templates (static, custom and none), and either booted or restored from a snapshot. - `uvm_any_booted` works the same as `uvm_any`, but only for booted VMs. Generally, tests should use `uvm_plain_any` if you are testing some interaction between the guest and Firecracker, and `uvm_plain` should be used if Firecracker functionality unrelated to the guest is being tested. ### Markers Firecracker uses two special [pytest markers](https://pytest.org/en/7.4.x/example/markers.html) to determine which tests are run in which context: - Tests marked as `nonci` are not run in the PR CI pipelines. Instead, they run in separate pipelines according to various cron schedules. - Tests marked as `no_block_pr` are run in the "optional" PR CI pipeline. This pipeline is not required to pass for merging a PR. All tests without markers are run for every pull request, and are required to pass for the PR to be merged. ## Adding Rust Tests Add a new function annotated with `#[test]` in [`integration_tests.rs`](../src/vmm/tests/integration_tests.rs). ## Working With Guest Files There are helper methods for writing to and reading from a guest filesystem. For example, to overwrite the guest init process and later extract a log: ```python def test_with_any_microvm_and_my_init(test_microvm_any): # [...] test_microvm_any.slot.fsfiles['mounted_root_fs'].copy_to(my_init, 'sbin/') # [...] test_microvm_any.slot.fsfiles['mounted_root_fs'].copy_from('logs/', 'log') ``` `copy_to()` source paths are relative to the host root and destination paths are relative to the `mounted_root_fs` root. Vice versa for `copy_from()`. Copying files to/from a guest file system while the guest is running results in undefined behavior. ## Example Manual Testrun Running on an EC2 `.metal` instance with an `Amazon Linux 2` AMI: ```sh # Get firecracker yum install -y git git clone https://github.com/firecracker-microvm/firecracker.git # Run all tests cd firecracker tools/devtool test ``` ## CI Environment In our CI, integration tests are run on EC2 `.metal` instances. We list the instance types and host operating systems we test in [our `README`](../README.md#tested-platforms). Multiple test runs can share a `.metal` instance, meaning it is possible to observe noisy neighbor effects when running the integration test suite (and particularly, tests should not assume the ability to configure host-global resources). The exception to this are integration tests found in [`integration_tests/performance`](integration_tests/performance). These tests are always executed single-tenant, and additionally tweak various host-level setting to achieve consistent performance. Please see the `test` section of `tools/devtool help` for more information. ## Terminology - **Testrun**: A sandboxed run of all (or a selection of) integration tests. - **Test Session**: A `pytest` testing session. One per **testrun**. A **Testrun** will start a **Test Session** once the sandbox is created. - **Test**: A function named `test_` from this tree, that ensures a feature, functional parameter, or quality metric of Firecracker. Should assert or raise an exception if it fails. - **Fixture**: A function that returns an object that makes it very easy to add **Tests**: E.g., a spawned Firecracker microvm. Fixtures are functions marked with `@pytest.fixture` from a files named either `conftest.py`, or from files where tests are found. See `pytest` documentation on fixtures. - **Test Case**: An element from the cartesian product of a **Test** and all possible states of its parameters (including its fixtures). ## FAQ `Q1:` *I have a shell script that runs my tests and I don't want to rewrite it.*\ `A1:` Insofar as it makes sense, you should write it as a python test function. However, you can always call the script from a shim python test function. You can also add it as a microvm image resource in the s3 bucket (and it will be made available under `microvm.slot.path`) or copy it over to a guest filesystem as part of your test. `Q2:` *I want to add more tests that I don't want to commit to the Firecracker repository.*\ `A2:` Before a testrun or test session, just add your test directory under `tests/`. `pytest` will discover all tests in this tree. `Q3:` *I want to have my own test fixtures, and not commit them in the repo.*\ `A3:` Add a `conftest.py` file in your test directory, and place your fixtures there. `pytest` will bring them into scope for all your tests. `Q4:` *I want to use more/other microvm test images, but I don't want to add them to the common s3 bucket.*\ `A4:` Add your custom images to the `build/artifacts` subdirectory in the Firecracker source tree. This directory is bind-mounted in the container and used as a local image cache. `Q5:` *How can I get live logger output from the tests?*\ `A5:` Accessing **pytest.ini** will allow you to modify logger settings. `Q6:` *Is there a way to speed up integration tests execution time?*\ `A6:` You can narrow down the test selection as described in the **Running** section. For example: 1. Pass the `-k substring` option to pytest to only run a subset of tests by specifying a part of their name. 1. Only run the tests contained in a file or directory. ## Implementation Goals - Easily run tests manually on a development/test machine, and in a continuous integration environments. - Each test should be independent, and self-contained. Tests will time out, expect a clean environment, and leave a clean environment behind. - Always run with the latest dependencies and resources. ### Choice of Pytest & Dependencies Pytest was chosen because: - Python makes it easy to work in the clouds. - Python has built-in sandbox (virtual environment) support. - `pytest` has great test discovery and allows for simple, function-like tests. - `pytest` has powerful test fixture support. ## Test System TODOs **Note**: The below TODOs are also mentioned in their respective code files. ### Features - Use the Firecracker Open API spec to populate Microvm API resource URLs. - Event-based monitoring of microvm socket file creation to avoid while spins. - Self-tests (e.g., Tests that test the testing system). ### Implementation - Looking into `pytest-ordering` to ensure test order. - Create an integrated, layered `say` system across the test runner and pytest (probably based on an environment variable). - Per test function dependency installation would make tests easier to write. - Type hinting is used sparsely across tests/\* python module. The code would be more easily understood with consistent type hints everywhere. ### Bug fixes ## Further Reading Contributing to this testing system requires a dive deep on `pytest`. ## Troubleshooting tests When troubleshooting tests, it is important to only narrow down the ones that are of interest. One can use the `--last-failed` parameter to only run the tests that failed from the previous run. Useful when several tests fail after making large changes. ### Run tests from within the container To avoid having to enter/exit Docker every test run, you can run the tests directly within a Docker session: ```sh tools/devtool -y shell --privileged tools/test.sh integration_tests/functional/test_api.py ``` ### How to use the Python debugger (pdb) for debugging Just append `--pdb`, and when a test fails it will drop you in pdb, where you can examine local variables and the stack, and can use the normal Python REPL. ``` tools/devtool -y test -- -k 1024 integration_tests/performance/test_boottime.py::test_boottime --pdb ``` ### How to use ipython's ipdb instead of pdb ```sh tools/devtool -y shell --privileged export PYTEST_ADDOPTS=--pdbcls=IPython.terminal.debugger:TerminalPdb tools/test.sh -k 1024 integration_tests/performance/test_boottime.py::test_boottime ``` There is a helper command in devtool that does just that, and is easier to type: ```sh tools/devtool -y test_debug -k 1024 integration_tests/performance/test_boottime.py::test_boottime ``` ### How to connect to the console interactively There is a helper to enable the console, but it has to be run **before** spawning the Firecracker process: ```python uvm.help.enable_console() uvm.spawn() uvm.basic_config() uvm.start() ... ``` Once that is done, if you get dropped into pdb, you can do this to open a `tmux` tab connected to the console (via `screen`). ```python uvm.help.tmux_console() ``` ### How to reproduce intermittent (aka flaky) tests Just run the test in a loop, and make it drop you into pdb when it fails. ```sh while true; do tools/devtool -y test -- integration_tests/functional/test_balloon.py::test_deflate_on_oom -k False --pdb done ``` ### How to run tests in parallel with `-n` We can run the tests in parallel via `pytest-xdist`. Not all tests can run in parallel (the ones in `build` and `performance` are not supposed to run in parallel). By default, the tests run sequentially. One can use the `-n` to control the parallelism. Just `-n` will run as many workers as CPUs, which may be too many. As a rough heuristic, use half the available CPUs. I use -n4 for my 8 CPU (HT-enabled) laptop. In metals 8 is a good number; more than that just gives diminishing returns. ```sh tools/devtool -y test -- integration_tests/functional -n$(expr $(nproc) / 2) --dist worksteal ``` ### How to attach gdb to a running uvm First, make the test fail and drop you into PDB. For example: ```sh tools/devtool -y test_debug integration_tests/functional/test_api.py::test_api_happy_start --pdb ``` Then, ``` ipdb> test_microvm.help.gdbserver() ``` You get some instructions on how to run GDB to attach to gdbserver. ## How to run tests with a different version of Firecracker The integration tests usually compile Firecracker as part of the test initialization. But there's an option in case we want to run the tests against a different version of Firecracker, for example a previous release: ```sh ./tools/devtool test -- --binary-dir ../v1.8.0 ``` The directory specified with `--binary-dir` should contain at least two binaries: `firecracker` and `jailer`. ## How to run tests outside of Docker Tested in Ubuntu 22.04 and AL2023. AL2 does not work due to an old Python (3.8). ```sh # replace with yum in Fedora/AmazonLinux sudo apt install python3-pip sudo pip3 install pytest ipython requests psutil tenacity filelock "urllib3<2.0" requests_unixsocket aws_embedded_metrics pytest-json-report pytest-timeout cd tests sudo env /usr/local/bin/pytest integration_tests/functional/test_api.py ``` > :warning: **Notice this runs the tests as root!** ## Sandbox ```sh tools/devtool -y sandbox ``` That should drop you in an IPython REPL, where you can interact with a microvm: ```python uvm.help.print_log() uvm.get_all_metrics() uvm.ssh.run("ls") snap = uvm.snapshot_full() uvm.help.tmux_ssh() ``` It supports a number of options, you can check with `devtool sandbox -- --help`. ### Running outside of Docker Running without Docker ``` source /etc/os-release case $ID-$VERSION_ID in amzn-2) sudo yum remove -y python3 sudo amazon-linux-extras install -y python3.8 sudo ln -sv /usr/bin/python3.8 /usr/bin/python3 sudo ln -sv /usr/bin/pip3.8 /usr/bin/pip3 esac sudo pip3 install pytest ipython requests psutil tenacity filelock "urllib3<2.0" requests_unixsocket sudo env PYTHONPATH=tests HOME=$HOME ~/.local/bin/ipython3 -i tools/sandbox.py -- --binary-dir ../repro/v1.4.1 ``` > [!WARNING] > > **Notice this runs as root!** ================================================ FILE: tests/conftest.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Imported by pytest at the start of every test session. # Fixture Goals Fixtures herein are made available to every test collected by pytest. They are designed with the following goals in mind: - Running a test on a microvm is as easy as importing a microvm fixture. - Adding a new microvm image (kernel, rootfs) for tests to run on is as easy as creating a fixture that references some local paths # Notes - Reading up on pytest fixtures is probably needed when editing this file. https://docs.pytest.org/en/7.2.x/explanation/fixtures.html """ import inspect import json import os import shutil import sys import tempfile from pathlib import Path import pytest import host_tools.cargo_build as build_tools from framework import defs, utils from framework.artifacts import disks, kernel_params from framework.defs import ARTIFACT_DIR, DEFAULT_BINARY_DIR from framework.microvm import HugePagesConfig, MicroVMFactory, SnapshotType from framework.properties import global_props from framework.utils_cpu_templates import ( custom_cpu_templates_params, get_cpu_template_name, static_cpu_templates_params, ) from host_tools.metrics import get_metrics_logger from host_tools.network import NetNs # This codebase uses Python features available in Python 3.10 or above if sys.version_info < (3, 10): raise SystemError("This codebase requires Python 3.10 or above.") # Some tests create system-level resources; ensure we run as root. if os.geteuid() != 0: raise PermissionError("Test session needs to be run as root.") METRICS = get_metrics_logger() PHASE_REPORT_KEY = pytest.StashKey[dict[str, pytest.CollectReport]]() def pytest_addoption(parser): """Pytest hook. Add command line options.""" parser.addoption( "--binary-dir", action="store", help="use firecracker/jailer binaries from this directory instead of compiling from source", ) parser.addoption( "--custom-cpu-template", action="store", help="Path to custom CPU template to be applied unless overwritten by a test", default=None, type=Path, ) def pytest_report_header(): """Pytest hook to print relevant metadata in the logs""" return "\n".join( [ f"EC2 AMI: {global_props.ami}", f"EC2 Instance ID: {global_props.instance_id}", ] ) @pytest.hookimpl(wrapper=True, tryfirst=True) def pytest_runtest_makereport(item, call): # pylint:disable=unused-argument """Plugin to get test results in fixtures https://docs.pytest.org/en/latest/example/simple.html#making-test-result-information-available-in-fixtures """ # execute all other hooks to obtain the report object rep = yield # store test results for each phase of a call, which can # be "setup", "call", "teardown" item.stash.setdefault(PHASE_REPORT_KEY, {})[rep.when] = rep return rep @pytest.fixture(scope="function", autouse=True) def record_props(request, record_property): """Decorate test results with additional properties. Note: there is no need to call this fixture explicitly """ # Augment test result with global properties for prop_name, prop_val in global_props.__dict__.items(): # if record_testsuite_property worked with xdist we could use that # https://docs.pytest.org/en/7.1.x/reference/reference.html#record-testsuite-property # to record the properties once per report. But here we record each # prop per test. It just results in larger report files. record_property(prop_name, prop_val) # Extract attributes from the docstrings function_docstring = inspect.getdoc(request.function) record_property("description", function_docstring) def pytest_runtest_logreport(report): """Send general test metrics to CloudWatch""" # only publish metrics from the main process worker_id = os.environ.get("PYTEST_XDIST_WORKER") if worker_id is not None: return # The pytest's test protocol has three phases for each test item: setup, # call and teardown. At the end of each phase, pytest_runtest_logreport() # is called. # https://github.com/pytest-dev/pytest/blob/d489247505a953885a156e61d4473497cbc167ea/src/_pytest/hookspec.py#L643 # https://github.com/pytest-dev/pytest/blob/d489247505a953885a156e61d4473497cbc167ea/src/_pytest/hookspec.py#L800 METRICS.set_dimensions( # fine-grained { "test": report.nodeid, "instance": global_props.instance, "cpu_model": global_props.cpu_model, "host_kernel": "linux-" + global_props.host_linux_version, "phase": report.when, }, # per test { "test": report.nodeid, "instance": global_props.instance, "cpu_model": global_props.cpu_model, "host_kernel": "linux-" + global_props.host_linux_version, }, # per coarse-grained test name, dropping parameters and other dimensions to reduce metric count for dashboard # Note: noideid is formatted as below # - with parameters: "path/to/test.py::test_name[parameter0,parameter1]" # - without parameters: "path/to/test.py::test_name" { "test_name": report.nodeid.split("[")[0], }, # per phase {"phase": report.when}, # per host kernel {"host_kernel": "linux-" + global_props.host_linux_version}, # per CPU {"cpu_model": global_props.cpu_model}, # and global {}, ) METRICS.set_property("pytest_xdist_worker", worker_id) METRICS.set_property("result", report.outcome) METRICS.set_property("location", report.location) for prop_name, prop_val in report.user_properties: METRICS.set_property(prop_name, prop_val) METRICS.put_metric( "duration", report.duration, unit="Seconds", ) METRICS.put_metric( "failed", 1 if report.outcome == "failed" else 0, unit="Count", ) METRICS.flush() @pytest.fixture() def metrics(results_dir, request): """Fixture to pass the metrics scope We use a fixture instead of the @metrics_scope decorator as that conflicts with tests. Due to how aws-embedded-metrics works, this fixture is per-test rather than per-session, and we flush the metrics after each test. Ref: https://github.com/awslabs/aws-embedded-metrics-python """ metrics_logger = get_metrics_logger() for prop_name, prop_val in request.node.user_properties: metrics_logger.set_property(prop_name, prop_val) yield metrics_logger metrics_logger.flush() if results_dir: metrics_logger.store_data(results_dir) @pytest.fixture def record_property(record_property, metrics): """Override pytest's record_property to also set a property in our metrics context.""" def sub(key, value): record_property(key, value) metrics.set_property(key, value) return sub @pytest.fixture(autouse=True, scope="session") def test_fc_session_root_path(): """Ensure and yield the fc session root directory. Create a unique temporary session directory. This is important, since the scheduler will run multiple pytest sessions concurrently. """ os.makedirs(defs.DEFAULT_TEST_SESSION_ROOT_PATH, exist_ok=True) fc_session_root_path = tempfile.mkdtemp( prefix="fctest-", dir=defs.DEFAULT_TEST_SESSION_ROOT_PATH ) yield fc_session_root_path @pytest.fixture(scope="session") def bin_vsock_path(test_fc_session_root_path): """Build a simple vsock client/server application.""" vsock_helper_bin_path = os.path.join(test_fc_session_root_path, "vsock_helper") build_tools.gcc_compile("host_tools/vsock_helper.c", vsock_helper_bin_path) yield vsock_helper_bin_path @pytest.fixture(scope="session") def bin_vmclock_path(test_fc_session_root_path): """Build a simple util for test VMclock device""" vmclock_helper_bin_path = os.path.join(test_fc_session_root_path, "vmclock") build_tools.gcc_compile("host_tools/vmclock.c", vmclock_helper_bin_path) yield vmclock_helper_bin_path @pytest.fixture(scope="session") def change_net_config_space_bin(test_fc_session_root_path): """Build a binary that changes the MMIO config space.""" change_net_config_space_bin = os.path.join( test_fc_session_root_path, "change_net_config_space" ) build_tools.gcc_compile( "host_tools/change_net_config_space.c", change_net_config_space_bin, extra_flags="-static", ) yield change_net_config_space_bin @pytest.fixture(scope="session") def waitpkg_bin(test_fc_session_root_path): """Build a binary that attempts to use WAITPKG (UMONITOR / UMWAIT)""" waitpkg_bin_path = os.path.join(test_fc_session_root_path, "waitpkg") build_tools.gcc_compile( "host_tools/waitpkg.c", waitpkg_bin_path, extra_flags="-mwaitpkg", ) yield waitpkg_bin_path @pytest.fixture(scope="session") def msr_reader_bin(test_fc_session_root_path): """Build a binary that reads msrs""" msr_reader_bin_path = os.path.join(test_fc_session_root_path, "msr_reader") build_tools.gcc_compile( "data/msr/msr_reader.c", msr_reader_bin_path, ) yield msr_reader_bin_path @pytest.fixture(scope="session") def jailer_time_bin(test_fc_session_root_path): """Build a binary that fakes fc""" jailer_time_bin_path = os.path.join(test_fc_session_root_path, "jailer_time") build_tools.gcc_compile( "host_tools/jailer_time.c", jailer_time_bin_path, ) yield jailer_time_bin_path @pytest.fixture def bin_seccomp_paths(): """Build jailers and jailed binaries to test seccomp. They currently consist of: * a jailer that receives filter generated using seccompiler-bin; * a jailed binary that follows the seccomp rules; * a jailed binary that breaks the seccomp rules. """ demos = { f"demo_{example}": build_tools.get_example(f"seccomp_{example}") for example in ["jailer", "harmless", "malicious", "panic"] } yield demos @pytest.fixture(scope="session") def netns_factory(worker_id): """A network namespace factory Network namespaces are created once per test session and re-used in subsequent tests. """ # pylint:disable=protected-access class NetNsFactory: """A Network namespace factory that reuses namespaces.""" def __init__(self, prefix: str): self._all = [] self._returned = [] self.prefix = prefix def get(self, _netns_id): """Get a free network namespace""" if len(self._returned) > 0: ns = self._returned.pop(0) while ns.is_used(): pass return ns ns = NetNs(self.prefix + str(len(self._all))) # change the cleanup function so it is returned to the pool ns._cleanup_orig = ns.cleanup ns.cleanup = lambda: self._returned.append(ns) self._all.append(ns) return ns netns_fcty = NetNsFactory(f"netns-{worker_id}-") yield netns_fcty.get for netns in netns_fcty._all: netns._cleanup_orig() @pytest.fixture() def microvm_factory(request, record_property, results_dir, netns_factory): """Fixture to create microvms simply.""" binary_dir = request.config.getoption("--binary-dir") or DEFAULT_BINARY_DIR if isinstance(binary_dir, str): binary_dir = Path(binary_dir) record_property("firecracker_bin", str(binary_dir / "firecracker")) # If `--custom-cpu-template` option is provided, the given CPU template will # be applied afterwards unless overwritten. custom_cpu_template_path = request.config.getoption("--custom-cpu-template") custom_cpu_template = ( { "name": custom_cpu_template_path.stem, "template": json.loads(custom_cpu_template_path.read_text("utf-8")), } if custom_cpu_template_path else None ) # We could override the chroot base like so # jailer_kwargs={"chroot_base": "/srv/jailo"} uvm_factory = MicroVMFactory( binary_dir, netns_factory=netns_factory, custom_cpu_template=custom_cpu_template, ) yield uvm_factory # if the test failed, save important files from the root of the uVM into `test_results` for troubleshooting report = request.node.stash[PHASE_REPORT_KEY] if "call" in report and report["call"].failed: for uvm in uvm_factory.vms: # This is best effort. We want to proceed even if the VM is not responding. try: uvm.flush_metrics() except: # pylint: disable=bare-except pass try: uvm.snapshot_full( mem_path="post_failure.mem", vmstate_path="post_failure.vmstate" ) except: # pylint: disable=bare-except pass uvm_data = results_dir / uvm.id uvm_data.mkdir() uvm_data.joinpath("host-dmesg.log").write_text( utils.run_cmd(["dmesg", "-dPx"]).stdout ) shutil.copy(ARTIFACT_DIR / "id_rsa", uvm_data) if Path(uvm.screen_log).exists(): shutil.copy(uvm.screen_log, uvm_data) uvm_root = Path(uvm.chroot()) for item in os.listdir(uvm_root): src = uvm_root / item if not os.path.isfile(src): continue dst = uvm_data / item shutil.copy(src, dst) uvm_factory.kill() @pytest.fixture(params=custom_cpu_templates_params()) def custom_cpu_template(request, record_property): """Return all dummy custom CPU templates supported by the vendor.""" record_property("custom_cpu_template", request.param["name"]) return request.param @pytest.fixture( params=[ pytest.param(None, id="NO_CPU_TMPL"), *static_cpu_templates_params(), *custom_cpu_templates_params(), ], ) def cpu_template_any(request, record_property): """This fixture combines no template, static and custom CPU templates""" record_property( "cpu_template", get_cpu_template_name(request.param, with_type=True) ) return request.param @pytest.fixture(params=["Sync", "Async"]) def io_engine(request): """All supported io_engines""" return request.param @pytest.fixture( params=[SnapshotType.DIFF, SnapshotType.DIFF_MINCORE, SnapshotType.FULL] ) def snapshot_type(request): """All possible snapshot types""" return request.param @pytest.fixture def results_dir(request, pytestconfig): """ Fixture yielding the path to a directory into which the test can dump its results Directories are unique per test, and their names include test name and test parameters. Everything the tests puts into its directory will to be uploaded to S3. Directory will be placed inside defs.TEST_RESULTS_DIR. For example ```py @pytest.mark.parametrize("p", ["a", "b"]) def test_my_file(p, results_dir): (results_dir / "output.txt").write_text("Hello World") ``` will result in: - `defs.TEST_RESULTS_DIR`/test_my_file/test_my_file[a]/output.txt. - `defs.TEST_RESULTS_DIR`/test_my_file/test_my_file[b]/output.txt. When this fixture is called with DoctestItem as a request.node during doc tests, it will return None. """ try: report_file = pytestconfig.getoption("--json-report-file") parent = Path(report_file).parent.absolute() results_dir = parent / request.node.originalname / request.node.name except AttributeError: return None results_dir.mkdir(parents=True, exist_ok=True) return results_dir def guest_kernel_fxt(request, record_property): """Return all supported guest kernels.""" kernel = request.param if kernel is None: pytest.fail(f"No kernel artifacts found in {ARTIFACT_DIR}") # vmlinux-5.10.167 -> linux-5.10 prop = kernel.stem[2:] record_property("guest_kernel", prop) return kernel # Fixtures for all guest kernels, and specific versions guest_kernel = pytest.fixture(guest_kernel_fxt, params=kernel_params("vmlinux-*")) guest_kernel_acpi = pytest.fixture( guest_kernel_fxt, params=filter( lambda kernel: "no-acpi" not in kernel.id, kernel_params("vmlinux-*") ), ) guest_kernel_linux_5_10 = pytest.fixture( guest_kernel_fxt, params=filter( lambda kernel: "no-acpi" not in kernel.id, kernel_params("vmlinux-5.10*") ), ) guest_kernel_linux_6_1 = pytest.fixture( guest_kernel_fxt, params=kernel_params("vmlinux-6.1*"), ) @pytest.fixture def rootfs(): """Return an Ubuntu 24.04 read-only rootfs""" disk_list = disks("ubuntu-24.04.squashfs") if not disk_list: pytest.fail( f"No disk artifacts found matching 'ubuntu-24.04.squashfs' in {ARTIFACT_DIR}" ) return disk_list[0] @pytest.fixture def rootfs_rw(): """Return an Ubuntu 24.04 ext4 rootfs""" disk_list = disks("ubuntu-24.04.ext4") if not disk_list: pytest.fail( f"No disk artifacts found matching 'ubuntu-24.04.ext4' in {ARTIFACT_DIR}" ) return disk_list[0] @pytest.fixture def uvm_plain(microvm_factory, guest_kernel_linux_5_10, rootfs, pci_enabled): """Create a vanilla VM, non-parametrized""" return microvm_factory.build(guest_kernel_linux_5_10, rootfs, pci=pci_enabled) @pytest.fixture def uvm_plain_6_1(microvm_factory, guest_kernel_linux_6_1, rootfs, pci_enabled): """Create a vanilla VM, non-parametrized""" return microvm_factory.build(guest_kernel_linux_6_1, rootfs, pci=pci_enabled) @pytest.fixture def uvm_plain_acpi(microvm_factory, guest_kernel_acpi, rootfs, pci_enabled): """Create a vanilla VM, non-parametrized""" return microvm_factory.build(guest_kernel_acpi, rootfs, pci=pci_enabled) @pytest.fixture def uvm_plain_rw(microvm_factory, guest_kernel_linux_5_10, rootfs_rw): """Create a vanilla VM, non-parametrized""" return microvm_factory.build(guest_kernel_linux_5_10, rootfs_rw) @pytest.fixture def uvm_nano(uvm_plain): """A preconfigured uvm with 2vCPUs and 256MiB of memory ready to .start() """ uvm_plain.spawn() uvm_plain.basic_config(vcpu_count=2, mem_size_mib=256) return uvm_plain @pytest.fixture() def artifact_dir(): """Return the location of the CI artifacts""" return defs.ARTIFACT_DIR @pytest.fixture def uvm_plain_any(microvm_factory, guest_kernel, rootfs, pci_enabled): """All guest kernels kernel: all rootfs: Ubuntu 24.04 """ return microvm_factory.build(guest_kernel, rootfs, pci=pci_enabled) guest_kernel_6_1_debug = pytest.fixture( guest_kernel_fxt, params=kernel_params("vmlinux-6.1*", artifact_dir=defs.ARTIFACT_DIR / "debug"), ) @pytest.fixture def uvm_plain_debug(microvm_factory, guest_kernel_6_1_debug, rootfs_rw): """VM running a kernel with debug/trace Kconfig options""" return microvm_factory.build(guest_kernel_6_1_debug, rootfs_rw) @pytest.fixture def vcpu_count(): """Return default vcpu_count. Use indirect parametrization to override.""" return 2 @pytest.fixture def mem_size_mib(): """Return memory size. Use indirect parametrization to override.""" return 256 @pytest.fixture(params=[True, False], ids=["PCI_ON", "PCI_OFF"]) def pci_enabled(request): """Fixture that allows configuring whether a microVM will have PCI enabled or not""" yield request.param @pytest.fixture( params=[HugePagesConfig.NONE, HugePagesConfig.HUGETLBFS_2MB], ids=["NO_HUGE_PAGES", "2M_HUGE_PAGES"], ) def huge_pages(request): """Fixture that allows configuring whether a microVM will have huge pages enabled or not""" yield request.param def uvm_booted( microvm_factory, guest_kernel, rootfs, cpu_template, pci_enabled, vcpu_count=2, mem_size_mib=256, ): """Return a booted uvm""" uvm = microvm_factory.build(guest_kernel, rootfs, pci=pci_enabled) uvm.spawn() uvm.basic_config(vcpu_count=vcpu_count, mem_size_mib=mem_size_mib) uvm.set_cpu_template(cpu_template) uvm.add_net_iface() uvm.start() return uvm def uvm_restored( microvm_factory, guest_kernel, rootfs, cpu_template, pci_enabled, **kwargs ): """Return a restored uvm""" uvm = uvm_booted( microvm_factory, guest_kernel, rootfs, cpu_template, pci_enabled, **kwargs ) snapshot = uvm.snapshot_full() uvm.kill() uvm2 = microvm_factory.build_from_snapshot(snapshot) uvm2.cpu_template_name = uvm.cpu_template_name return uvm2 @pytest.fixture(params=[uvm_booted, uvm_restored]) def uvm_ctor(request): """Fixture to return uvms with different constructors""" return request.param @pytest.fixture def uvm_any( microvm_factory, uvm_ctor, guest_kernel, rootfs, cpu_template_any, pci_enabled, vcpu_count, mem_size_mib, ): """Return booted and restored uvms""" return uvm_ctor( microvm_factory, guest_kernel, rootfs, cpu_template_any, pci_enabled, vcpu_count=vcpu_count, mem_size_mib=mem_size_mib, ) @pytest.fixture def uvm_any_booted( microvm_factory, guest_kernel, rootfs, cpu_template_any, pci_enabled, vcpu_count, mem_size_mib, ): """Return booted uvms""" return uvm_booted( microvm_factory, guest_kernel, rootfs, cpu_template_any, pci_enabled, vcpu_count=vcpu_count, mem_size_mib=mem_size_mib, ) @pytest.fixture def uvm_any_with_pci( uvm_ctor, microvm_factory, guest_kernel_acpi, rootfs, cpu_template_any, vcpu_count, mem_size_mib, ): """Return booted uvms with PCI enabled""" return uvm_ctor( microvm_factory, guest_kernel_acpi, rootfs, cpu_template_any, True, vcpu_count=vcpu_count, mem_size_mib=mem_size_mib, ) @pytest.fixture def uvm_any_without_pci( uvm_ctor, microvm_factory, guest_kernel, rootfs, cpu_template_any, vcpu_count, mem_size_mib, ): """Return booted uvms with PCI disabled""" return uvm_ctor( microvm_factory, guest_kernel, rootfs, cpu_template_any, False, vcpu_count=vcpu_count, mem_size_mib=mem_size_mib, ) ================================================ FILE: tests/data/cpu_template_helper/fingerprint_AMD_GENOA_5.10host.json ================================================ { "firecracker_version": "1.14.0-dev", "kernel_version": "5.10.244-240.965.amzn2.x86_64", "microcode_version": "0xa101156", "bios_version": "1.0", "bios_revision": "2.23", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000010000" }, { "register": "ebx", "bitmap": "0b01101000011101000111010101000001" }, { "register": "ecx", "bitmap": "0b01000100010011010100000101100011" }, { "register": "edx", "bitmap": "0b01101001011101000110111001100101" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000101000010000111100010001" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00000111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b11110001101111110000011110101011" }, { "register": "ecx", "bitmap": "0b00000000010000010101111101001110" }, { "register": "edx", "bitmap": "0b10001100000000000000000000010000" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000100000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001011100111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000100110001000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x5", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001101000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x6", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001110000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x7", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010110000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000100110000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000100001" }, { "register": "ebx", "bitmap": "0b01101000011101000111010101000001" }, { "register": "ecx", "bitmap": "0b01000100010011010100000101100011" }, { "register": "edx", "bitmap": "0b01101001011101000110111001100101" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000101000010000111100010001" }, { "register": "ebx", "bitmap": "0b01000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000110000000000001111110011" }, { "register": "edx", "bitmap": "0b00101111110100111111101111111111" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00100000010001000100110101000001" }, { "register": "ebx", "bitmap": "0b01000011010110010101000001000101" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b11111111010010001111111101000000" }, { "register": "ebx", "bitmap": "0b11111111010010001111111101000000" }, { "register": "ecx", "bitmap": "0b00100000000010000000000101000000" }, { "register": "edx", "bitmap": "0b00100000000010000000000101000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01011100000000000010001000000000" }, { "register": "ebx", "bitmap": "0b01101100000000000100001000000000" }, { "register": "ecx", "bitmap": "0b00000100000000000110000101000000" }, { "register": "edx", "bitmap": "0b00001100000000001001000101000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011100100110100" }, { "register": "ebx", "bitmap": "0b00000011000000101101001000000101" }, { "register": "ecx", "bitmap": "0b00000000000000000111000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000009", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000d", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000f", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000010", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000011", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000012", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000013", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000014", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000015", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000016", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000017", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000018", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000019", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b11110000010010001111000001000000" }, { "register": "ebx", "bitmap": "0b11110000010000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000110" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000011111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000010" } ] }, { "leaf": "0x8000001d", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000111111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000001" } ] }, { "leaf": "0x8000001d", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001f", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000011000011111111111111111011" }, { "register": "ebx", "bitmap": "0b00000000000000000100000110110011" }, { "register": "ecx", "bitmap": "0b00000000000000000000001111101110" }, { "register": "edx", "bitmap": "0b00000000000000000000001111101111" } ] }, { "leaf": "0x80000020", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000021", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001100101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000110000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000000000001000000000000000001100101" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010117", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc001011f", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_AMD_GENOA_6.1host.json ================================================ { "firecracker_version": "1.14.0-dev", "kernel_version": "6.1.153-175.280.amzn2023.x86_64", "microcode_version": "0xa101156", "bios_version": "1.0", "bios_revision": "2.23", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000010000" }, { "register": "ebx", "bitmap": "0b01101000011101000111010101000001" }, { "register": "ecx", "bitmap": "0b01000100010011010100000101100011" }, { "register": "edx", "bitmap": "0b01101001011101000110111001100101" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000101000010000111100010001" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00000111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b11110001101111110000011110101011" }, { "register": "ecx", "bitmap": "0b00000000010000010101111101001110" }, { "register": "edx", "bitmap": "0b10001100000000000000000000010000" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000100000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001011100111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000100110001000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x5", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001101000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x6", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001110000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x7", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010110000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000100110000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000100001" }, { "register": "ebx", "bitmap": "0b01101000011101000111010101000001" }, { "register": "ecx", "bitmap": "0b01000100010011010100000101100011" }, { "register": "edx", "bitmap": "0b01101001011101000110111001100101" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000101000010000111100010001" }, { "register": "ebx", "bitmap": "0b01000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000110000000000001111110011" }, { "register": "edx", "bitmap": "0b00101111110100111111101111111111" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00100000010001000100110101000001" }, { "register": "ebx", "bitmap": "0b01000011010110010101000001000101" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b11111111010010001111111101000000" }, { "register": "ebx", "bitmap": "0b11111111010010001111111101000000" }, { "register": "ecx", "bitmap": "0b00100000000010000000000101000000" }, { "register": "edx", "bitmap": "0b00100000000010000000000101000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01011100000000000010001000000000" }, { "register": "ebx", "bitmap": "0b01101100000000000100001000000000" }, { "register": "ecx", "bitmap": "0b00000100000000000110000101000000" }, { "register": "edx", "bitmap": "0b00001100000000001001000101000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011100100110100" }, { "register": "ebx", "bitmap": "0b00010011000000101101001000000101" }, { "register": "ecx", "bitmap": "0b00000000000000000111000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000009", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000d", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000f", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000010", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000011", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000012", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000013", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000014", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000015", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000016", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000017", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000018", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000019", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b11110000010010001111000001000000" }, { "register": "ebx", "bitmap": "0b11110000010000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000110" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000011111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000010" } ] }, { "leaf": "0x8000001d", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000111111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000001" } ] }, { "leaf": "0x8000001d", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001f", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000020", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000021", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001001100101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000000000001000000000000000001100101" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000104", "bitmap": "0b0000000000000000000000000000000100000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010117", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc001011f", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_AMD_MILAN_5.10host.json ================================================ { "firecracker_version": "1.14.0-dev", "kernel_version": "5.10.244-240.965.amzn2.x86_64", "microcode_version": "0xa0011de", "bios_version": "1.0", "bios_revision": "0.98", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000010000" }, { "register": "ebx", "bitmap": "0b01101000011101000111010101000001" }, { "register": "ecx", "bitmap": "0b01000100010011010100000101100011" }, { "register": "edx", "bitmap": "0b01101001011101000110111001100101" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000101000000000111100010001" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00000111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00100001100111000000010110101011" }, { "register": "ecx", "bitmap": "0b00000000010000000000011000001100" }, { "register": "edx", "bitmap": "0b10001100000000000000000000000000" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000100110001000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000100110000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000100001" }, { "register": "ebx", "bitmap": "0b01101000011101000111010101000001" }, { "register": "ecx", "bitmap": "0b01000100010011010100000101100011" }, { "register": "edx", "bitmap": "0b01101001011101000110111001100101" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000101000000000111100010001" }, { "register": "ebx", "bitmap": "0b01000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000110000000000001111110011" }, { "register": "edx", "bitmap": "0b00101111110100111111101111111111" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00100000010001000100110101000001" }, { "register": "ebx", "bitmap": "0b01000011010110010101000001000101" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b11111111010000001111111101000000" }, { "register": "ebx", "bitmap": "0b11111111010000001111111101000000" }, { "register": "ecx", "bitmap": "0b00100000000010000000000101000000" }, { "register": "edx", "bitmap": "0b00100000000010000000000101000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01001000000000000010001000000000" }, { "register": "ebx", "bitmap": "0b01101000000000000100001000000000" }, { "register": "ecx", "bitmap": "0b00000010000000000110000101000000" }, { "register": "edx", "bitmap": "0b00000110000000001001000101000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011000000110000" }, { "register": "ebx", "bitmap": "0b00000011000000101101001000000101" }, { "register": "ecx", "bitmap": "0b00000000000000000111000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000009", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000d", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000f", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000010", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000011", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000012", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000013", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000014", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000015", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000016", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000017", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000018", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000019", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b11110000010000001111000001000000" }, { "register": "ebx", "bitmap": "0b11110000010000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000110" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000001111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000010" } ] }, { "leaf": "0x8000001d", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000111111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000001" } ] }, { "leaf": "0x8000001d", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001f", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000011111110100111111" }, { "register": "ebx", "bitmap": "0b00000000000000000100000101110011" }, { "register": "ecx", "bitmap": "0b00000000000000000000000111111101" }, { "register": "edx", "bitmap": "0b00000000000000000000000111111110" } ] }, { "leaf": "0x80000020", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000021", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001100101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000110000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000000000001000000000000000001100101" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010117", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc001011f", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_AMD_MILAN_6.1host.json ================================================ { "firecracker_version": "1.14.0-dev", "kernel_version": "6.1.153-175.280.amzn2023.x86_64", "microcode_version": "0xa0011de", "bios_version": "1.0", "bios_revision": "0.98", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000010000" }, { "register": "ebx", "bitmap": "0b01101000011101000111010101000001" }, { "register": "ecx", "bitmap": "0b01000100010011010100000101100011" }, { "register": "edx", "bitmap": "0b01101001011101000110111001100101" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000101000000000111100010001" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00000111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00100001100111000000010110101011" }, { "register": "ecx", "bitmap": "0b00000000010000000000011000001100" }, { "register": "edx", "bitmap": "0b10001100000000000000000000000000" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000100110001000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000100110000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000100001" }, { "register": "ebx", "bitmap": "0b01101000011101000111010101000001" }, { "register": "ecx", "bitmap": "0b01000100010011010100000101100011" }, { "register": "edx", "bitmap": "0b01101001011101000110111001100101" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000101000000000111100010001" }, { "register": "ebx", "bitmap": "0b01000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000110000000000001111110011" }, { "register": "edx", "bitmap": "0b00101111110100111111101111111111" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00100000010001000100110101000001" }, { "register": "ebx", "bitmap": "0b01000011010110010101000001000101" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b11111111010000001111111101000000" }, { "register": "ebx", "bitmap": "0b11111111010000001111111101000000" }, { "register": "ecx", "bitmap": "0b00100000000010000000000101000000" }, { "register": "edx", "bitmap": "0b00100000000010000000000101000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01001000000000000010001000000000" }, { "register": "ebx", "bitmap": "0b01101000000000000100001000000000" }, { "register": "ecx", "bitmap": "0b00000010000000000110000101000000" }, { "register": "edx", "bitmap": "0b00000110000000001001000101000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011000000110000" }, { "register": "ebx", "bitmap": "0b00010011000000101101001000000101" }, { "register": "ecx", "bitmap": "0b00000000000000000111000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000009", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000d", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000000f", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000010", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000011", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000012", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000013", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000014", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000015", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000016", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000017", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000018", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000019", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b11110000010000001111000001000000" }, { "register": "ebx", "bitmap": "0b11110000010000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000110" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001d", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000001111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000010" } ] }, { "leaf": "0x8000001d", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000111111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000001" } ] }, { "leaf": "0x8000001d", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8000001f", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000020", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000021", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001001100101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000000000001000000000000000001100101" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000104", "bitmap": "0b0000000000000000000000000000000100000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010117", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc001011f", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_ARM_NEOVERSE_N1_5.10host.json ================================================ { "firecracker_version": "1.12.0-dev", "kernel_version": "5.10.234-225.910.amzn2.aarch64", "microcode_version": "0x00000000000000ff", "bios_version": "1.0", "bios_revision": "1.0", "guest_cpu_config": { "kvm_capabilities": [], "vcpu_features": [], "reg_modifiers": [ { "addr": "0x60200000001000d4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60200000001000d5", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6020000000110000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001110000000111111110000000011010" }, { "addr": "0x6020000000110001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000111111110000000011010" }, { "addr": "0x6020000000110002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001110000111111111110000000111010" }, { "addr": "0x6030000000100000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000111111000000000000000000000" }, { "addr": "0x6030000000100002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100042", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111000101" }, { "addr": "0x6030000000100044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100048", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100050", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138007", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138045", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138047", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138055", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138056", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138057", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138065", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138066", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138067", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138075", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138076", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138077", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013a038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000001001111111101000011000001" }, { "addr": "0x603000000013c005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x603000000013c006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011111111" }, { "addr": "0x603000000013c008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000100110001" }, { "addr": "0x603000000013c009", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000010000000000000000" }, { "addr": "0x603000000013c00a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000010000000010001000" }, { "addr": "0x603000000013c00b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001000000001000100000101" }, { "addr": "0x603000000013c00d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000" }, { "addr": "0x603000000013c00e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001001001100000000000000000" }, { "addr": "0x603000000013c00f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000100100010001000010001" }, { "addr": "0x603000000013c010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000100000001000100010000" }, { "addr": "0x603000000013c011", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010011000100010010000100010001" }, { "addr": "0x603000000013c012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001001000110010000001000010" }, { "addr": "0x603000000013c013", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000100010010000100110001" }, { "addr": "0x603000000013c014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000101000010" }, { "addr": "0x603000000013c015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000010001000100100001" }, { "addr": "0x603000000013c016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001000100010000" }, { "addr": "0x603000000013c017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000" }, { "addr": "0x603000000013c018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000100010000001000100010" }, { "addr": "0x603000000013c019", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010011001000010001000100010001" }, { "addr": "0x603000000013c01a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000011" }, { "addr": "0x603000000013c01b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010001" }, { "addr": "0x603000000013c01d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001000100000000000000000000000000010001000100010001000100010010" }, { "addr": "0x603000000013c021", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000" }, { "addr": "0x603000000013c022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c023", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100010000001100000101010000001000" }, { "addr": "0x603000000013c029", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000010000001000010001000100100000" }, { "addr": "0x603000000013c031", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000001" }, { "addr": "0x603000000013c032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c033", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000001000100100101" }, { "addr": "0x603000000013c039", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001000010010000100100010" }, { "addr": "0x603000000013c03a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000010001" }, { "addr": "0x603000000013c03b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110001010000000001111000" }, { "addr": "0x603000000013c081", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c082", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c100", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c101", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c102", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c288", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c289", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c290", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c300", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c3a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c4f1", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c4f2", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c510", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c518", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c600", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c609", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c681", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c684", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c708", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000010000000000000000000100011" }, { "addr": "0x603000000013c807", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013d000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013d801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010110100010001001100000000000100" }, { "addr": "0x603000000013dce0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000001000011000011000010101100" }, { "addr": "0x603000000013dce1", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce2", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce3", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce5", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dcf0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013dcf3", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013de82", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013de83", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df02", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df11", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013df12", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df19", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013df40", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df41", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df42", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df43", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df44", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df45", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df46", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df47", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df48", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df49", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df50", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df51", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df52", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df53", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df54", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df55", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df56", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df57", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df58", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df59", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df60", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df61", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df62", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df63", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df64", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df65", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df66", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df67", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df68", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df69", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df70", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df71", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df72", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df73", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df74", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df75", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df76", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df77", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df78", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df79", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013e180", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e281", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e298", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011100000000" }, { "addr": "0x6030000000140000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000" }, { "addr": "0x6030000000140001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010" }, { "addr": "0x6030000000140002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000140003", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6040000000100054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100058", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010005c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100060", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100068", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010006c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100070", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100078", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010007c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100084", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100088", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010008c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100090", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100094", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100098", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010009c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000ac", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000bc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000cc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000d0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_ARM_NEOVERSE_N1_6.1host.json ================================================ { "firecracker_version": "1.12.0-dev", "kernel_version": "6.1.129-138.220.amzn2023.aarch64", "microcode_version": "0x00000000000000ff", "bios_version": "1.0", "bios_revision": "1.0", "guest_cpu_config": { "kvm_capabilities": [], "vcpu_features": [], "reg_modifiers": [ { "addr": "0x60200000001000d4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60200000001000d5", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6020000000110000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001110000000111111110000000011010" }, { "addr": "0x6020000000110001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000111111110000000011010" }, { "addr": "0x6020000000110002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001110000111111111110000000111010" }, { "addr": "0x6030000000100000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000111111000000000000000000000" }, { "addr": "0x6030000000100002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100042", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111000101" }, { "addr": "0x6030000000100044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100048", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100050", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138007", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138045", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138047", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138055", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138056", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138057", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138065", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138066", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138067", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138075", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138076", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138077", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013808c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000" }, { "addr": "0x603000000013a038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000001001111111101000011000001" }, { "addr": "0x603000000013c005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x603000000013c006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011111111" }, { "addr": "0x603000000013c008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000100110001" }, { "addr": "0x603000000013c009", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000010000000000000000" }, { "addr": "0x603000000013c00a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000010001000" }, { "addr": "0x603000000013c00b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001000000001000100000101" }, { "addr": "0x603000000013c00d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000" }, { "addr": "0x603000000013c00e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001001001100000000000000000" }, { "addr": "0x603000000013c00f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000100100010001000010001" }, { "addr": "0x603000000013c010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000100000001000100010000" }, { "addr": "0x603000000013c011", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010011000100010010000100010001" }, { "addr": "0x603000000013c012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001001000110010000001000010" }, { "addr": "0x603000000013c013", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000100010010000100110001" }, { "addr": "0x603000000013c014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000101000010" }, { "addr": "0x603000000013c015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000010001000100100001" }, { "addr": "0x603000000013c016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001000100010000" }, { "addr": "0x603000000013c017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000" }, { "addr": "0x603000000013c018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000100010000001000100010" }, { "addr": "0x603000000013c019", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010011001000010001000100010001" }, { "addr": "0x603000000013c01a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000011" }, { "addr": "0x603000000013c01b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010001" }, { "addr": "0x603000000013c01d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001000100000000000000000000000000010001000100010001000100010010" }, { "addr": "0x603000000013c021", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000" }, { "addr": "0x603000000013c022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c023", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001100000101000000000110" }, { "addr": "0x603000000013c029", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000010000001000010001000100100000" }, { "addr": "0x603000000013c031", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000001" }, { "addr": "0x603000000013c032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c033", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000001000100100101" }, { "addr": "0x603000000013c039", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001000010010000100100010" }, { "addr": "0x603000000013c03a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000010001" }, { "addr": "0x603000000013c03b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110001010000000001111000" }, { "addr": "0x603000000013c081", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c082", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c100", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c101", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c102", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c288", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c289", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c290", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c300", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c3a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c510", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c518", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c600", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c609", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c681", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c684", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c708", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000010000000000000000000100011" }, { "addr": "0x603000000013c807", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013d000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013d801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010110100010001001100000000000100" }, { "addr": "0x603000000013de82", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013de83", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df02", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df11", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013df12", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df19", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013e180", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e281", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e298", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011100000000" }, { "addr": "0x6030000000140000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000001" }, { "addr": "0x6030000000140001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010" }, { "addr": "0x6030000000140002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000140003", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6030000000160000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6030000000160001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6030000000160002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011" }, { "addr": "0x6040000000100054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100058", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010005c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100060", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100068", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010006c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100070", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100078", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010007c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100084", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100088", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010008c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100090", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100094", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100098", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010009c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000ac", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000bc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000cc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000d0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_ARM_NEOVERSE_V1_5.10host.json ================================================ { "firecracker_version": "1.12.0-dev", "kernel_version": "5.10.234-225.910.amzn2.aarch64", "microcode_version": "0x0000000000000001", "bios_version": "1.0", "bios_revision": "1.0", "guest_cpu_config": { "kvm_capabilities": [], "vcpu_features": [], "reg_modifiers": [ { "addr": "0x60200000001000d4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60200000001000d5", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6020000000110000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011010" }, { "addr": "0x6020000000110001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011010" }, { "addr": "0x6020000000110002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111010" }, { "addr": "0x6030000000100000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000111111000000000000000000000" }, { "addr": "0x6030000000100002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100042", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111000101" }, { "addr": "0x6030000000100044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100048", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100050", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138007", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138045", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138047", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138055", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138056", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138057", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138065", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138066", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138067", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138075", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138076", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138077", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013a038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000001000111111101010000000001" }, { "addr": "0x603000000013c005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x603000000013c006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x603000000013c008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000010000000100110001" }, { "addr": "0x603000000013c009", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000000010000000000000000" }, { "addr": "0x603000000013c00a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000010001000010011001" }, { "addr": "0x603000000013c00b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001000000001000100000101" }, { "addr": "0x603000000013c00d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000" }, { "addr": "0x603000000013c00e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001001001100000000000000000" }, { "addr": "0x603000000013c00f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000100100010001000010001" }, { "addr": "0x603000000013c010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000100000001000100010000" }, { "addr": "0x603000000013c011", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010011000100010010000100010001" }, { "addr": "0x603000000013c012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001001000110010000001000010" }, { "addr": "0x603000000013c013", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000100010010000100110001" }, { "addr": "0x603000000013c014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000101000010" }, { "addr": "0x603000000013c015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000010001000100100001" }, { "addr": "0x603000000013c016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000100001000100010000" }, { "addr": "0x603000000013c017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000100000000000100010001" }, { "addr": "0x603000000013c018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000100010000001000100010" }, { "addr": "0x603000000013c019", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010011001000010001000100010001" }, { "addr": "0x603000000013c01a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000011" }, { "addr": "0x603000000013c01b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010001" }, { "addr": "0x603000000013c01d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001000100000001000000010000000000100011000100010001000100010010" }, { "addr": "0x603000000013c021", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000" }, { "addr": "0x603000000013c022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c023", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111001000010000001100000101010000001001" }, { "addr": "0x603000000013c029", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001000000010001000100010001000100010000001000010010000100100000" }, { "addr": "0x603000000013c031", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000010001000100000000000000000000001000010001000000000010" }, { "addr": "0x603000000013c032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c033", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000001000100100101" }, { "addr": "0x603000000013c039", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001000010010000100100010" }, { "addr": "0x603000000013c03a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000001000100000000000010001000100000010000100000001000000010001" }, { "addr": "0x603000000013c03b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110001010000000001111000" }, { "addr": "0x603000000013c081", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c082", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c100", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c101", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c102", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c288", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c289", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c290", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c300", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c3a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c4f1", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c4f2", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c510", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c518", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c600", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c609", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c681", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c684", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c708", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000010000000000000000000100011" }, { "addr": "0x603000000013c807", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013d000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013d801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010110100010001001100000000000100" }, { "addr": "0x603000000013dce0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000001001000000011000010101100" }, { "addr": "0x603000000013dce1", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce2", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce3", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce5", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dcf0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013dcf3", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013de82", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013de83", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df02", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df11", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013df12", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df19", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013df40", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df41", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df42", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df43", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df44", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df45", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df46", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df47", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df48", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df49", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df50", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df51", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df52", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df53", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df54", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df55", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df56", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df57", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df58", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df59", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df60", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df61", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df62", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df63", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df64", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df65", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df66", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df67", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df68", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df69", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df70", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df71", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df72", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df73", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df74", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df75", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df76", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df77", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df78", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df79", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013e180", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e281", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e298", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011100000000" }, { "addr": "0x6030000000140000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000" }, { "addr": "0x6030000000140001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010" }, { "addr": "0x6030000000140002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000140003", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6040000000100054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100058", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010005c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100060", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100068", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010006c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100070", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100078", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010007c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100084", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100088", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010008c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100090", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100094", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100098", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010009c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000ac", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000bc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000cc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000d0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_ARM_NEOVERSE_V1_6.1host.json ================================================ { "firecracker_version": "1.12.0-dev", "kernel_version": "6.1.129-138.220.amzn2023.aarch64", "microcode_version": "0x0000000000000001", "bios_version": "1.0", "bios_revision": "1.0", "guest_cpu_config": { "kvm_capabilities": [], "vcpu_features": [], "reg_modifiers": [ { "addr": "0x60200000001000d4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60200000001000d5", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6020000000110000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011010" }, { "addr": "0x6020000000110001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011010" }, { "addr": "0x6020000000110002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111010" }, { "addr": "0x6030000000100000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000111111000000000000000000000" }, { "addr": "0x6030000000100002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100042", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111000101" }, { "addr": "0x6030000000100044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100048", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100050", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138007", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138045", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138047", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138055", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138056", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138057", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138065", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138066", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138067", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138075", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138076", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138077", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013808c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000" }, { "addr": "0x603000000013a038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000001000111111101010000000001" }, { "addr": "0x603000000013c005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x603000000013c006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x603000000013c008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000010000000100110001" }, { "addr": "0x603000000013c009", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000000010000000000000000" }, { "addr": "0x603000000013c00a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010001000010011001" }, { "addr": "0x603000000013c00b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001000000001000100000101" }, { "addr": "0x603000000013c00d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000" }, { "addr": "0x603000000013c00e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001001001100000000000000000" }, { "addr": "0x603000000013c00f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000100100010001000010001" }, { "addr": "0x603000000013c010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000100000001000100010000" }, { "addr": "0x603000000013c011", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010011000100010010000100010001" }, { "addr": "0x603000000013c012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001001000110010000001000010" }, { "addr": "0x603000000013c013", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000100010010000100110001" }, { "addr": "0x603000000013c014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000101000010" }, { "addr": "0x603000000013c015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000010001000100100001" }, { "addr": "0x603000000013c016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001000100010000" }, { "addr": "0x603000000013c017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000100000000000100010001" }, { "addr": "0x603000000013c018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000100010000001000100010" }, { "addr": "0x603000000013c019", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010011001000010001000100010001" }, { "addr": "0x603000000013c01a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000011" }, { "addr": "0x603000000013c01b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010001" }, { "addr": "0x603000000013c01d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001000100000001000000010000000000100001000100010001000100010010" }, { "addr": "0x603000000013c021", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000" }, { "addr": "0x603000000013c022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c023", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111000000010000001100000101000000000110" }, { "addr": "0x603000000013c029", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001000000010001000100010001000100010000001000010010000100100000" }, { "addr": "0x603000000013c031", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000010001000100000000000000000000001000010001000000000010" }, { "addr": "0x603000000013c032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c033", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000001000100100101" }, { "addr": "0x603000000013c039", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001000010010000100100010" }, { "addr": "0x603000000013c03a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000001000100000000000010001000100000010000000000001000000010001" }, { "addr": "0x603000000013c03b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110001010000000001111000" }, { "addr": "0x603000000013c081", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c082", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c100", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c101", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c102", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c288", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c289", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c290", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c300", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c3a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c510", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c518", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c600", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c609", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c681", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c684", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c708", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000010000000000000000000100011" }, { "addr": "0x603000000013c807", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013d000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013d801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010110100010001001100000000000100" }, { "addr": "0x603000000013de82", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013de83", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df02", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df11", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013df12", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df19", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013e180", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e281", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e298", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011100000000" }, { "addr": "0x6030000000140000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000001" }, { "addr": "0x6030000000140001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010" }, { "addr": "0x6030000000140002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000140003", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6030000000160000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6030000000160001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6030000000160002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011" }, { "addr": "0x6040000000100054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100058", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010005c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100060", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100068", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010006c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100070", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100078", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010007c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100084", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100088", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010008c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100090", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100094", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100098", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010009c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000ac", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000bc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000cc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000d0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_ARM_NEOVERSE_V2_5.10host.json ================================================ { "firecracker_version": "1.12.0-dev", "kernel_version": "5.10.234-225.910.amzn2.aarch64", "microcode_version": "0x0000000000000017", "bios_version": "1.0", "bios_revision": "1.0", "guest_cpu_config": { "kvm_capabilities": [], "vcpu_features": [], "reg_modifiers": [ { "addr": "0x60200000001000d4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60200000001000d5", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6020000000110000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011010" }, { "addr": "0x6020000000110001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011010" }, { "addr": "0x6020000000110002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111010" }, { "addr": "0x6030000000100000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000111111000000000000000000000" }, { "addr": "0x6030000000100002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100042", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111000101" }, { "addr": "0x6030000000100044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100048", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100050", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138007", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138045", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138047", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138055", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138056", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138057", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138065", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138066", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138067", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138075", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138076", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138077", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013a038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000001000011111101010011110001" }, { "addr": "0x603000000013c005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x603000000013c006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010111" }, { "addr": "0x603000000013c008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c009", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c011", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c013", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c019", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001000100000001000000010001000000100011000100010001000100010001" }, { "addr": "0x603000000013c021", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001" }, { "addr": "0x603000000013c022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c023", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111001000010000001100000101010000001001" }, { "addr": "0x603000000013c029", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001001000100001000100000000000100010000001000010010000100100000" }, { "addr": "0x603000000013c031", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000010001000100010001000100000000001000010001000000000010" }, { "addr": "0x603000000013c032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c033", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000100010001000000000000100000001000100100101" }, { "addr": "0x603000000013c039", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000001100010010000100100010" }, { "addr": "0x603000000013c03a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001001000100001000000010001000100010010000100000001000000010001" }, { "addr": "0x603000000013c03b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110001010000000001111000" }, { "addr": "0x603000000013c081", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c082", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c100", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c101", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c102", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c288", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c289", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c290", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c300", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c3a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c4f1", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c4f2", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c510", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c518", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c600", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c609", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c681", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c684", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c708", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000010000000000000000000100011" }, { "addr": "0x603000000013c807", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013d000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013d801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010110100010001001100000000000100" }, { "addr": "0x603000000013dce0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000011101100" }, { "addr": "0x603000000013dce1", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce2", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce3", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce5", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dce8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013dcf0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013dcf3", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013de82", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013de83", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df02", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df11", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013df12", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df19", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013df40", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df41", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df42", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df43", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df44", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df45", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df46", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df47", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df48", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df49", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df4f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df50", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df51", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df52", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df53", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df54", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df55", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df56", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df57", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df58", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df59", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df5e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df60", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df61", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df62", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df63", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df64", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df65", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df66", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df67", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df68", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df69", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df6f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df70", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df71", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df72", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df73", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df74", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df75", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df76", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df77", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df78", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df79", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df7f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013e180", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e281", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e298", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011100000000" }, { "addr": "0x6030000000140000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000" }, { "addr": "0x6030000000140001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010" }, { "addr": "0x6030000000140002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000140003", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6040000000100054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100058", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010005c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100060", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100068", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010006c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100070", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100078", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010007c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100084", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100088", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010008c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100090", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100094", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100098", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010009c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000ac", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000bc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000cc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000d0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_ARM_NEOVERSE_V2_6.1host.json ================================================ { "firecracker_version": "1.14.0-dev", "kernel_version": "6.1.150-174.273.amzn2023.aarch64", "microcode_version": "0x0000000000000017", "bios_version": "1.0", "bios_revision": "1.0", "guest_cpu_config": { "kvm_capabilities": [], "vcpu_features": [], "reg_modifiers": [ { "addr": "0x60200000001000d4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60200000001000d5", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6020000000110000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011010" }, { "addr": "0x6020000000110001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011010" }, { "addr": "0x6020000000110002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111010" }, { "addr": "0x6030000000100000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000111111000000000000000000000" }, { "addr": "0x6030000000100002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010000e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010001e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010002e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010003e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100042", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111000101" }, { "addr": "0x6030000000100044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100048", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000010004e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000100050", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138004", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138007", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013800f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013801f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013802f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013803f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138044", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138045", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138046", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138047", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013804f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138055", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138056", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138057", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013805f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138065", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138066", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138067", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013806f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138075", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138076", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000138077", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013807f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013808c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000" }, { "addr": "0x603000000013a038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000001000011111101010011110001" }, { "addr": "0x603000000013c005", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x603000000013c006", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010111" }, { "addr": "0x603000000013c008", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c009", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c00f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c010", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c011", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c012", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c013", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c014", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c015", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c016", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c017", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c018", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c019", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c01f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c020", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001000100000001000000010001000000100001000100010001000100010001" }, { "addr": "0x603000000013c021", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100001" }, { "addr": "0x603000000013c022", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c023", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c024", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c025", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c026", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c027", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c028", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000001111000000010000001100000101000000000110" }, { "addr": "0x603000000013c029", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c02f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c030", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001001000100001000100000000000100010000001000010010000100100000" }, { "addr": "0x603000000013c031", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000010001000100010001000100000000001000010001000000000010" }, { "addr": "0x603000000013c032", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c033", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c034", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c035", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c036", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c037", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c038", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000100010001000000000000100000001000100100101" }, { "addr": "0x603000000013c039", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000010000001100010010000100100010" }, { "addr": "0x603000000013c03a", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001001000100001000000010001000100010010000000000001000000010001" }, { "addr": "0x603000000013c03b", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03d", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03e", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c03f", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110001010000000001111000" }, { "addr": "0x603000000013c081", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c082", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c100", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c101", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c102", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c288", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c289", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c290", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c300", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c3a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c510", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c518", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c600", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c609", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c681", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c684", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013c708", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013c801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000010000000000000000000100011" }, { "addr": "0x603000000013c807", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013d000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013d801", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010110100010001001100000000000100" }, { "addr": "0x603000000013de82", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013de83", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013df02", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df11", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013df12", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x603000000013df19", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100" }, { "addr": "0x603000000013e180", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e281", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000001110111100111111011000111111011011011101011011100000011011110" }, { "addr": "0x603000000013e298", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011100000000" }, { "addr": "0x6030000000140000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000001" }, { "addr": "0x6030000000140001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010" }, { "addr": "0x6030000000140002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6030000000140003", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6030000000160000", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6030000000160001", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x6030000000160002", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011" }, { "addr": "0x6040000000100054", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100058", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010005c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100060", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100064", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100068", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010006c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100070", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100074", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100078", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010007c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100080", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100084", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100088", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010008c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100090", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100094", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x6040000000100098", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x604000000010009c", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000a8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000ac", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000b8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000bc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c4", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000c8", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000cc", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x60400000001000d0", "bitmap": "0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_INTEL_CASCADELAKE_5.10host.json ================================================ { "firecracker_version": "1.13.0-dev", "kernel_version": "5.10.238-234.956.amzn2.x86_64", "microcode_version": "0x5003901", "bios_version": "1.0", "bios_revision": "4.13", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000010110" }, { "register": "ebx", "bitmap": "0b01110101011011100110010101000111" }, { "register": "ecx", "bitmap": "0b01101100011001010111010001101110" }, { "register": "edx", "bitmap": "0b01001001011001010110111001101001" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000001010000011001010111" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00001111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01110110000000110110001100000001" }, { "register": "ebx", "bitmap": "0b00000000111100001011010111111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000110000110000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000001111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000010100000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000001100111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000101" } ] }, { "leaf": "0x4", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b11010001100111110110011111101011" }, { "register": "ecx", "bitmap": "0b00000000000000000000100000001100" }, { "register": "edx", "bitmap": "0b10101100000000000000010000000000" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001011111111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000101010001000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001111000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x5", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x6", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x7", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000011010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x11", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x12", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x13", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x14", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x15", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x16", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100100001" }, { "register": "edx", "bitmap": "0b00101100000100000000100000000000" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01100101011101000110111001001001" }, { "register": "ebx", "bitmap": "0b00101001010100100010100001101100" }, { "register": "ecx", "bitmap": "0b01101111011001010101100000100000" }, { "register": "edx", "bitmap": "0b00101001010100100010100001101110" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01101111011100100101000000100000" }, { "register": "ebx", "bitmap": "0b01110011011100110110010101100011" }, { "register": "ecx", "bitmap": "0b01000000001000000111001001101111" }, { "register": "edx", "bitmap": "0b00110101001011100011001000100000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01111010010010000100011100110000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000001000000000110000001000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011000000101110" }, { "register": "ebx", "bitmap": "0b00000001000000001101000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3a", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000100000000000000000000000000000000" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x10a", "bitmap": "0b0000000000000000000000000000000000001100000010101010000011101011" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0xd90", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_INTEL_CASCADELAKE_6.1host.json ================================================ { "firecracker_version": "1.13.0-dev", "kernel_version": "6.1.141-165.249.amzn2023.x86_64", "microcode_version": "0x5003901", "bios_version": "1.0", "bios_revision": "4.13", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000010110" }, { "register": "ebx", "bitmap": "0b01110101011011100110010101000111" }, { "register": "ecx", "bitmap": "0b01101100011001010111010001101110" }, { "register": "edx", "bitmap": "0b01001001011001010110111001101001" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000001010000011001010111" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00001111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01110110000000110110001100000001" }, { "register": "ebx", "bitmap": "0b00000000111100001011010111111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000110000110000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000001111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000010100000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000001100111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000101" } ] }, { "leaf": "0x4", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b11010001100111110110011111101011" }, { "register": "ecx", "bitmap": "0b00000000000000000000100000001100" }, { "register": "edx", "bitmap": "0b10101100000000000000010000000000" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001011111111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000101010001000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001111000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x5", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x6", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x7", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000011010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x11", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x12", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x13", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x14", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x15", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x16", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100100001" }, { "register": "edx", "bitmap": "0b00101100000100000000100000000000" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01100101011101000110111001001001" }, { "register": "ebx", "bitmap": "0b00101001010100100010100001101100" }, { "register": "ecx", "bitmap": "0b01101111011001010101100000100000" }, { "register": "edx", "bitmap": "0b00101001010100100010100001101110" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01101111011100100101000000100000" }, { "register": "ebx", "bitmap": "0b01110011011100110110010101100011" }, { "register": "ecx", "bitmap": "0b01000000001000000111001001101111" }, { "register": "edx", "bitmap": "0b00110101001011100011001000100000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01111010010010000100011100110000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000001000000000110000001000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011000000101110" }, { "register": "ebx", "bitmap": "0b00000001000000001101000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3a", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000100000000000000000000000000000000" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x10a", "bitmap": "0b0000000000000000000000000000000000001100000010101010000011101011" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0xd90", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_INTEL_GRANITE_RAPIDS_5.10host.json ================================================ { "firecracker_version": "1.15.0-dev", "kernel_version": "5.10.247-246.992.amzn2.x86_64", "microcode_version": "0x10003e0", "bios_version": "1.0", "bios_revision": "1.15", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000011111" }, { "register": "ebx", "bitmap": "0b01110101011011100110010101000111" }, { "register": "ecx", "bitmap": "0b01101100011001010111010001101110" }, { "register": "edx", "bitmap": "0b01001001011001010110111001101001" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000010100000011011010001" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00001111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000111111101111111100000001" }, { "register": "ebx", "bitmap": "0b00000000000000000000000011110000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000010110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000011111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000001110111111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000100" } ] }, { "leaf": "0x4", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b11110001101111110010011111101011" }, { "register": "ecx", "bitmap": "0b00011010010000010101111101001110" }, { "register": "edx", "bitmap": "0b10101100000000010100010000010000" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000100000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001011100111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000101010001000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x5", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x6", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x7", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000011010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x11", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x12", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x13", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x14", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x15", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x16", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x17", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x18", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x19", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1d", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1f", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1f", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100100001" }, { "register": "edx", "bitmap": "0b00101100000100000000100000000000" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01100101011101000110111001001001" }, { "register": "ebx", "bitmap": "0b00101001010100100010100001101100" }, { "register": "ecx", "bitmap": "0b01101111011001010101100000100000" }, { "register": "edx", "bitmap": "0b00101001010100100010100001101110" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01101111011100100101000000100000" }, { "register": "ebx", "bitmap": "0b01110011011100110110010101100011" }, { "register": "ecx", "bitmap": "0b00000000000000000111001001101111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00001000000000000111000001000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011100100110100" }, { "register": "ebx", "bitmap": "0b00000001000000001101001000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3a", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000100000000000000000000000000000000" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0xe1", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x10a", "bitmap": "0b0100000000000000000000000000000000001101000010001110000011101011" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_INTEL_GRANITE_RAPIDS_6.1host.json ================================================ { "firecracker_version": "1.15.0-dev", "kernel_version": "6.1.159-182.297.amzn2023.x86_64", "microcode_version": "0x10003e0", "bios_version": "1.0", "bios_revision": "1.14", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000011111" }, { "register": "ebx", "bitmap": "0b01110101011011100110010101000111" }, { "register": "ecx", "bitmap": "0b01101100011001010111010001101110" }, { "register": "edx", "bitmap": "0b01001001011001010110111001101001" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000010100000011011010001" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00001111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000111111101111111100000001" }, { "register": "ebx", "bitmap": "0b00000000000000000000000011110000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000010110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000011111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000001110111111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000100" } ] }, { "leaf": "0x4", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000010" }, { "register": "ebx", "bitmap": "0b11110001101111110010011111101011" }, { "register": "ecx", "bitmap": "0b00011011010000010101111101001110" }, { "register": "edx", "bitmap": "0b10101111110000010100010000010000" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000110000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000111111" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000001100000001011100111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000010101100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000011111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x5", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x6", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x7", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000011010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x11", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101011000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000010" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x12", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000010000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101100000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000110" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x11", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x12", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x13", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x14", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x15", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x16", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x17", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x18", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x19", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1d", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1d", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000100000000000010000000000000" }, { "register": "ebx", "bitmap": "0b00000000000010000000000001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000010000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1e", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000100000000010000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1f", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1f", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100100001" }, { "register": "edx", "bitmap": "0b00101100000100000000100000000000" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01100101011101000110111001001001" }, { "register": "ebx", "bitmap": "0b00101001010100100010100001101100" }, { "register": "ecx", "bitmap": "0b01101111011001010101100000100000" }, { "register": "edx", "bitmap": "0b00101001010100100010100001101110" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01101111011100100101000000100000" }, { "register": "ebx", "bitmap": "0b01110011011100110110010101100011" }, { "register": "ecx", "bitmap": "0b00000000000000000111001001101111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00001000000000000111000001000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011100100110100" }, { "register": "ebx", "bitmap": "0b00000001000000001101001000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3a", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000100000000000000000000000000000000" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0xe1", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x10a", "bitmap": "0b0100000000000000000000000000000000001101000010001110000011101011" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1c4", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1c5", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_INTEL_ICELAKE_5.10host.json ================================================ { "firecracker_version": "1.13.0-dev", "kernel_version": "5.10.238-234.956.amzn2.x86_64", "microcode_version": "0xd000404", "bios_version": "1.0", "bios_revision": "1.41", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000011011" }, { "register": "ebx", "bitmap": "0b01110101011011100110010101000111" }, { "register": "ecx", "bitmap": "0b01101100011001010111010001101110" }, { "register": "edx", "bitmap": "0b01001001011001010110111001101001" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000001100000011010100110" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00001111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000111111101111111100000001" }, { "register": "ebx", "bitmap": "0b00000000000000000000000011110000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000010110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000100110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000001111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000010110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000010001111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000100" } ] }, { "leaf": "0x4", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b11110001101111110010011111101011" }, { "register": "ecx", "bitmap": "0b00000000010000010101111101001110" }, { "register": "edx", "bitmap": "0b10101100000000000000010000010000" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001011100111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000101010001000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x5", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x6", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x7", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000011010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x11", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x12", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x13", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x14", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x15", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x16", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x17", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x18", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x19", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100100001" }, { "register": "edx", "bitmap": "0b00101100000100000000100000000000" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01100101011101000110111001001001" }, { "register": "ebx", "bitmap": "0b00101001010100100010100001101100" }, { "register": "ecx", "bitmap": "0b01101111011001010101100000100000" }, { "register": "edx", "bitmap": "0b00101001010100100010100001101110" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01101111011100100101000000100000" }, { "register": "ebx", "bitmap": "0b01110011011100110110010101100011" }, { "register": "ecx", "bitmap": "0b01000000001000000111001001101111" }, { "register": "edx", "bitmap": "0b00111001001011100011001000100000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01111010010010000100011100110000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000001000000000110000001000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011100100101110" }, { "register": "ebx", "bitmap": "0b00000001000000001101001000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3a", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000100000000000000000000000000000000" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x10a", "bitmap": "0b0000000000000000000000000000000000001100000000101010000011101011" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_INTEL_ICELAKE_6.1host.json ================================================ { "firecracker_version": "1.13.0-dev", "kernel_version": "6.1.141-165.249.amzn2023.x86_64", "microcode_version": "0xd000404", "bios_version": "1.0", "bios_revision": "1.41", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000011011" }, { "register": "ebx", "bitmap": "0b01110101011011100110010101000111" }, { "register": "ecx", "bitmap": "0b01101100011001010111010001101110" }, { "register": "edx", "bitmap": "0b01001001011001010110111001101001" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000001100000011010100110" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000111" }, { "register": "edx", "bitmap": "0b00001111101010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000111111101111111100000001" }, { "register": "ebx", "bitmap": "0b00000000000000000000000011110000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000010110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000100110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000001111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000010110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000010001111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000100" } ] }, { "leaf": "0x4", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000010" }, { "register": "ebx", "bitmap": "0b11110001101111110010011111101011" }, { "register": "ecx", "bitmap": "0b00000000010000010101111101001110" }, { "register": "edx", "bitmap": "0b10101100000000000000010000010000" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000001" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001011100111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000101010001000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x5", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x6", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x7", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000011010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x11", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x12", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x13", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x14", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x15", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x16", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x17", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x18", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x19", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100100001" }, { "register": "edx", "bitmap": "0b00101100000100000000100000000000" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01100101011101000110111001001001" }, { "register": "ebx", "bitmap": "0b00101001010100100010100001101100" }, { "register": "ecx", "bitmap": "0b01101111011001010101100000100000" }, { "register": "edx", "bitmap": "0b00101001010100100010100001101110" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01101111011100100101000000100000" }, { "register": "ebx", "bitmap": "0b01110011011100110110010101100011" }, { "register": "ecx", "bitmap": "0b01000000001000000111001001101111" }, { "register": "edx", "bitmap": "0b00111001001011100011001000100000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01111010010010000100011100110000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000001000000000110000001000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011100100101110" }, { "register": "ebx", "bitmap": "0b00000001000000001101001000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3a", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000100000000000000000000000000000000" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0x10a", "bitmap": "0b0000000000000000000000000000000000001100000000101010000011101011" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_INTEL_SAPPHIRE_RAPIDS_5.10host.json ================================================ { "firecracker_version": "1.13.0-dev", "kernel_version": "5.10.238-234.956.amzn2.x86_64", "microcode_version": "0x2b000639", "bios_version": "1.0", "bios_revision": "3.10", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000011111" }, { "register": "ebx", "bitmap": "0b01110101011011100110010101000111" }, { "register": "ecx", "bitmap": "0b01101100011001010111010001101110" }, { "register": "edx", "bitmap": "0b01001001011001010110111001101001" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000010000000011011111000" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00001111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000111111101111111100000001" }, { "register": "ebx", "bitmap": "0b00000000000000000000000011110000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000010110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000011111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000011100000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000011011111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000100" } ] }, { "leaf": "0x4", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b11110001101111110010011111101011" }, { "register": "ecx", "bitmap": "0b00011010010000010101111101001110" }, { "register": "edx", "bitmap": "0b10101100000000010100010000010000" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000100000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001011100111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000101010001000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x5", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x6", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x7", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000011010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x11", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x12", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x13", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x14", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x15", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x16", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x17", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x18", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x19", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1d", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1f", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1f", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100100001" }, { "register": "edx", "bitmap": "0b00101100000100000000100000000000" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01100101011101000110111001001001" }, { "register": "ebx", "bitmap": "0b00101001010100100010100001101100" }, { "register": "ecx", "bitmap": "0b01101111011001010101100000100000" }, { "register": "edx", "bitmap": "0b00101001010100100010100001101110" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01101111011100100101000000100000" }, { "register": "ebx", "bitmap": "0b01110011011100110110010101100011" }, { "register": "ecx", "bitmap": "0b00000000000000000111001001101111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00001000000000000111000001000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011100100101110" }, { "register": "ebx", "bitmap": "0b00000001000000001101001000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3a", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000100000000000000000000000000000000" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0xe1", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x10a", "bitmap": "0b0100000000000000000000000000000000001100000010001110000011101011" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/cpu_template_helper/fingerprint_INTEL_SAPPHIRE_RAPIDS_6.1host.json ================================================ { "firecracker_version": "1.13.0-dev", "kernel_version": "6.1.141-165.249.amzn2023.x86_64", "microcode_version": "0x2b000639", "bios_version": "1.0", "bios_revision": "3.10", "guest_cpu_config": { "kvm_capabilities": [], "cpuid_modifiers": [ { "leaf": "0x0", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000011111" }, { "register": "ebx", "bitmap": "0b01110101011011100110010101000111" }, { "register": "ecx", "bitmap": "0b01101100011001010111010001101110" }, { "register": "edx", "bitmap": "0b01001001011001010110111001101001" } ] }, { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000010000000011011111000" }, { "register": "ebx", "bitmap": "0b00000000000000010000100000000000" }, { "register": "ecx", "bitmap": "0b11110111111110100011001000000011" }, { "register": "edx", "bitmap": "0b00001111100010111111101111111111" } ] }, { "leaf": "0x2", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000111111101111111100000001" }, { "register": "ebx", "bitmap": "0b00000000000000000000000011110000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x3", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100001" }, { "register": "ebx", "bitmap": "0b00000010110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100100010" }, { "register": "ebx", "bitmap": "0b00000001110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101000011" }, { "register": "ebx", "bitmap": "0b00000011110000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000000000011111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x4", "subleaf": "0x3", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000101100011" }, { "register": "ebx", "bitmap": "0b00000011100000000000000000111111" }, { "register": "ecx", "bitmap": "0b00000000000000011011111111111111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000100" } ] }, { "leaf": "0x4", "subleaf": "0x4", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x5", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x6", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000100" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000010" }, { "register": "ebx", "bitmap": "0b11110001101111110010011111101011" }, { "register": "ecx", "bitmap": "0b00011011010000010101111101001110" }, { "register": "edx", "bitmap": "0b10101111110000010100010000010000" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000110000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x7", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000010111" } ] }, { "leaf": "0x8", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x9", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xa", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xb", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xc", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000001100000001011100111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000010101100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000011111" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000100000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000001001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x5", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x6", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000001000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000010010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x7", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000010000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000011010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x9", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101010000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x11", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000001000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101011000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000010" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x12", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000010000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000101100000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000110" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xe", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xf", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x10", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x11", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x12", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x13", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x14", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x15", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x16", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x17", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x18", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x19", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1a", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1b", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1c", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1d", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1d", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000100000000000010000000000000" }, { "register": "ebx", "bitmap": "0b00000000000010000000000001000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000010000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1e", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000100000000010000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1f", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1f", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000101" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000001" }, { "register": "ecx", "bitmap": "0b00000000000000000000001000000001" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x40000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01000000000000000000000000000001" }, { "register": "ebx", "bitmap": "0b01001011010011010101011001001011" }, { "register": "ecx", "bitmap": "0b01010110010010110100110101010110" }, { "register": "edx", "bitmap": "0b00000000000000000000000001001101" } ] }, { "leaf": "0x40000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000001000000000111111011111011" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000000", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b10000000000000000000000000001000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000100100001" }, { "register": "edx", "bitmap": "0b00101100000100000000100000000000" } ] }, { "leaf": "0x80000002", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01100101011101000110111001001001" }, { "register": "ebx", "bitmap": "0b00101001010100100010100001101100" }, { "register": "ecx", "bitmap": "0b01101111011001010101100000100000" }, { "register": "edx", "bitmap": "0b00101001010100100010100001101110" } ] }, { "leaf": "0x80000003", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b01101111011100100101000000100000" }, { "register": "ebx", "bitmap": "0b01110011011100110110010101100011" }, { "register": "ecx", "bitmap": "0b00000000000000000111001001101111" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000004", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000005", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000006", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00001000000000000111000001000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000007", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000100000000" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000011100100101110" }, { "register": "ebx", "bitmap": "0b00000001000000001101001000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] } ], "msr_modifiers": [ { "addr": "0x11", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x12", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x34", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3a", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x3b", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x48", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x8b", "bitmap": "0b0000000000000000000000000000000100000000000000000000000000000000" }, { "addr": "0x9e", "bitmap": "0b0000000000000000000000000000000000000000000000110000000000000000" }, { "addr": "0xce", "bitmap": "0b0000000000000000000000000000000010000000000000000000000000000000" }, { "addr": "0xe1", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x10a", "bitmap": "0b0100000000000000000000000000000000001100000010001110000011101011" }, { "addr": "0x140", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x174", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x175", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x176", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1a0", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x1c4", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1c5", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x1fc", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x277", "bitmap": "0b0000000000000111000001000000011000000000000001110000010000000110" }, { "addr": "0x4b564d00", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d01", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d02", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d03", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d04", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d05", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000001" }, { "addr": "0x4b564d06", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0x4b564d07", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000081", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000082", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000083", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000084", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000102", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0000103", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" }, { "addr": "0xc0010015", "bitmap": "0b0000000000000000000000000000000000000000000000000000000000000000" } ] } } ================================================ FILE: tests/data/custom_cpu_templates/AARCH64_WITH_SVE_AND_PAC.json ================================================ { "kvm_capabilities": ["170", "171", "172"], "vcpu_features": [{ "index": 0, "bitmap": "0b111xxxx" }] } ================================================ FILE: tests/data/custom_cpu_templates/C3.json ================================================ { "cpuid_modifiers": [ { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bxxxx000000000011xx00011011100100" }, { "register": "ecx", "bitmap": "0bxxxxxxxxx0xxxxxx00x000x0xx0000xx" }, { "register": "edx", "bitmap": "0b0x0x0xxxx00xx0xxxxx1xxxx1xxxxxxx" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "ebx", "bitmap": "0b000000000x0x000000x000x0xx0000xx" }, { "register": "ecx", "bitmap": "0bx0xxxxxxx0xxxxx0x0xx0xxxxxx0000x" }, { "register": "edx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxxx00xx" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0x00000xxx" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxxx000x" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ecx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxx0xx0xxxxx" }, { "register": "edx", "bitmap": "0bxxxxx0xxxxxxxxxxxxxxxxxxxxxxxxxx" } ] } ], "msr_modifiers": [] } ================================================ FILE: tests/data/custom_cpu_templates/GNR_TO_T2_5.10.json ================================================ { "cpuid_modifiers": [ { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bxxxx000000000011xx00011011110010" }, { "register": "ecx", "bitmap": "0bxxxxxxxxxxxxx0xx00xx00x0000000xx" }, { "register": "edx", "bitmap": "0b000x0xxxx00xx0xxxxx1xxxx1xxxxxxx" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "ebx", "bitmap": "0b00000000000x000000x00x1xxxx0x0xx" }, { "register": "ecx", "bitmap": "0bx0x00x00x0xxxxx0x0x00000x0x0000x" }, { "register": "edx", "bitmap": "0bxxxxxx0000xxxxx0x0xxxxx0xxx000xx" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxx0xxxxxxxxxxxxxxx00xxxx" }, { "register": "edx", "bitmap": "0bxxxxxxxxxxxx0xxxx0xxxxxxxxxxxxxx" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxx00xxxxxxx0x00000xxx" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxx0000x" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ecx", "bitmap": "0bxx0xxxxxxxxxxxxxxxxxxxx0xxxxxxxx" }, { "register": "edx", "bitmap": "0bxxxxx0xxxxxxxxxxxxxxxxxxxxxxxxxx" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ebx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0xxxxxxxxx" } ] } ], "msr_modifiers": [] } ================================================ FILE: tests/data/custom_cpu_templates/GNR_TO_T2_6.1.json ================================================ { "cpuid_modifiers": [ { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bxxxx000000000011xx00011011110010" }, { "register": "ecx", "bitmap": "0bxxxxxxxxxxxxx0xx00xx00x0000000xx" }, { "register": "edx", "bitmap": "0b000x0xxxx00xx0xxxxx1xxxx1xxxxxxx" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "ebx", "bitmap": "0b00000000000x000000x00x1xxxx0x0xx" }, { "register": "ecx", "bitmap": "0bx0x00x00x0xxxxx0x0x00000x0x0000x" }, { "register": "edx", "bitmap": "0bxxxxxx0000xxxxx0x0xxxxx0xxx000xx" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxx0xxxxxxxxxxxxxxx00xxxx" }, { "register": "edx", "bitmap": "0bxxxxxxxxxxxx0xxxx0xxxxxxxxxxxxxx" } ] }, { "leaf": "0x7", "subleaf": "0x2", "flags": 1, "modifiers": [ { "register": "edx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxxx0xxx" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxx00xxxxxxx0x00000xxx" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxx0000x" } ] }, { "leaf": "0xd", "subleaf": "0x11", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x12", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1d", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1d", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ecx", "bitmap": "0bxx0xxxxxxxxxxxxxxxxxxxx0xxxxxxxx" }, { "register": "edx", "bitmap": "0bxxxxx0xxxxxxxxxxxxxxxxxxxxxxxxxx" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ebx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0xxxxxxxxx" } ] } ], "msr_modifiers": [] } ================================================ FILE: tests/data/custom_cpu_templates/SPR_TO_T2_5.10.json ================================================ { "cpuid_modifiers": [ { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bxxxx000000000011xx00011011110010" }, { "register": "ecx", "bitmap": "0bxxxxxxxxxxxxx0xx00xx00x0000000xx" }, { "register": "edx", "bitmap": "0b000x0xxxx00xx0xxxxx1xxxx1xxxxxxx" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "ebx", "bitmap": "0b00000000000x000000x00x1xxxx0x0xx" }, { "register": "ecx", "bitmap": "0bx0x00x00x0xxxxx0x0x00000x0x0000x" }, { "register": "edx", "bitmap": "0bxxxxxx0000xxxxx0x0xxxxx0xxx000xx" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxx00xxxx" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxx00xxxxxxx0x00000xxx" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxx0000x" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ecx", "bitmap": "0bxx0xxxxxxxxxxxxxxxxxxxx0xxxxxxxx" }, { "register": "edx", "bitmap": "0bxxxxx0xxxxxxxxxxxxxxxxxxxxxxxxxx" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ebx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0xxxxxxxxx" } ] } ], "msr_modifiers": [] } ================================================ FILE: tests/data/custom_cpu_templates/SPR_TO_T2_6.1.json ================================================ { "cpuid_modifiers": [ { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bxxxx000000000011xx00011011110010" }, { "register": "ecx", "bitmap": "0bxxxxxxxxxxxxx0xx00xx00x0000000xx" }, { "register": "edx", "bitmap": "0b000x0xxxx00xx0xxxxx1xxxx1xxxxxxx" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "ebx", "bitmap": "0b00000000000x000000x00x1xxxx0x0xx" }, { "register": "ecx", "bitmap": "0bx0x00x00x0xxxxx0x0x00000x0x0000x" }, { "register": "edx", "bitmap": "0bxxxxxx0000xxxxx0x0xxxxx0xxx000xx" } ] }, { "leaf": "0x7", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxx00xxxx" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxx00xxxxxxx0x00000xxx" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxx0000x" } ] }, { "leaf": "0xd", "subleaf": "0x11", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0xd", "subleaf": "0x12", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1d", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1d", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x1e", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ebx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "ecx", "bitmap": "0b00000000000000000000000000000000" }, { "register": "edx", "bitmap": "0b00000000000000000000000000000000" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ecx", "bitmap": "0bxx0xxxxxxxxxxxxxxxxxxxx0xxxxxxxx" }, { "register": "edx", "bitmap": "0bxxxxx0xxxxxxxxxxxxxxxxxxxxxxxxxx" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ebx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0xxxxxxxxx" } ] } ], "msr_modifiers": [] } ================================================ FILE: tests/data/custom_cpu_templates/T2.json ================================================ { "cpuid_modifiers": [ { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bxxxx000000000011xx00011011110010" }, { "register": "ecx", "bitmap": "0bxxxxxxxxxxxxx0xx00xx00x0000000xx" }, { "register": "edx", "bitmap": "0b000x0xxxx00xx0xxxxx1xxxx1xxxxxxx" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "ebx", "bitmap": "0b00000000000x000000x00x1xxxx0x0xx" }, { "register": "ecx", "bitmap": "0bx0xxxxxxx0xxxxx0x0x00000x0x0000x" }, { "register": "edx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxx0xxx000xx" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0x00000xxx" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxxx000x" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ecx", "bitmap": "0bxx0xxxxxxxxxxxxxxxxxxxx0xxxxxxxx" }, { "register": "edx", "bitmap": "0bxxxxx0xxxxxxxxxxxxxxxxxxxxxxxxxx" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ebx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0xxxxxxxxx" } ] } ], "msr_modifiers": [] } ================================================ FILE: tests/data/custom_cpu_templates/T2A.json ================================================ { "cpuid_modifiers": [ { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bxxxx000000000011xx00011011110010" }, { "register": "ecx", "bitmap": "0bxxxxxxxxxxxxx0xx00xx00x0000000xx" }, { "register": "edx", "bitmap": "0b000x0xxxx00xx0xxxxx1xxxx1xxxxxxx" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "ebx", "bitmap": "0b00000000000x000000x00x1xxxx0x0xx" }, { "register": "ecx", "bitmap": "0bx0xxxxxxx0xxxxx0x0x00000x0x0000x" }, { "register": "edx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxx0xxx000xx" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0x00000xxx" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxxx000x" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ecx", "bitmap": "0bxx0xxxxxxxxxxxxxxxxxxxx000xxx0xx" }, { "register": "edx", "bitmap": "0bxxxxx00xx0xxxxxxxxxxxxxxxxxxxxxx" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ebx", "bitmap": "0bxxxxxxxxxxx111xxxxxxxx0xxxxxx1x0" } ] } ], "msr_modifiers": [] } ================================================ FILE: tests/data/custom_cpu_templates/T2CL.json ================================================ { "cpuid_modifiers": [ { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bxxxx000000000011xx00011011110010" }, { "register": "ecx", "bitmap": "0bxxxxxxxxxxxxx0xx00xx00x0000000xx" }, { "register": "edx", "bitmap": "0b000x0xxxx00xx0xxxxx1xxxx1xxxxxxx" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "ebx", "bitmap": "0b00000000000x000000x00x1xxxx0x0xx" }, { "register": "ecx", "bitmap": "0bx0xxxxxxx0xxxxx0x0x00000x0x0000x" }, { "register": "edx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxx0xxx000xx" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0x00000xxx" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxxx000x" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ecx", "bitmap": "0bxx0xxxxxxxxxxxxxxxxxxxx000xxxxxx" }, { "register": "edx", "bitmap": "0bxxxxx00000xxxxxxxxxxxxxxxxxxxxxx" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ebx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0xxxxxxxxx" } ] } ], "msr_modifiers": [ { "addr": "0x10a", "bitmap": "0b00000000000000000000000000000000000xxx0x0000x0x0xxx0000xxxxxxxxx" } ] } ================================================ FILE: tests/data/custom_cpu_templates/T2S.json ================================================ { "cpuid_modifiers": [ { "leaf": "0x1", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "eax", "bitmap": "0bxxxx000000000011xx00011011110010" }, { "register": "ecx", "bitmap": "0bxxxxxxxxxxxxx0xx00xx00x0000000xx" }, { "register": "edx", "bitmap": "0b000x0xxxx00xx0xxxxx1xxxx1xxxxxxx" } ] }, { "leaf": "0x7", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "ebx", "bitmap": "0b00000000000x000000x00x1xxxx0x0xx" }, { "register": "ecx", "bitmap": "0bx0xxxxxxx0xxxxx0x0x00000x0x0000x" }, { "register": "edx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxx0xxx000xx" } ] }, { "leaf": "0xd", "subleaf": "0x0", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0x00000xxx" } ] }, { "leaf": "0xd", "subleaf": "0x1", "flags": 1, "modifiers": [ { "register": "eax", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxxx000x" } ] }, { "leaf": "0x80000001", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ecx", "bitmap": "0bxx0xxxxxxxxxxxxxxxxxxxx0xxxxxxxx" }, { "register": "edx", "bitmap": "0bxxxxx0xxxxxxxxxxxxxxxxxxxxxxxxxx" } ] }, { "leaf": "0x80000008", "subleaf": "0x0", "flags": 0, "modifiers": [ { "register": "ebx", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxx0xxxxxxxxx" } ] } ], "msr_modifiers": [ { "addr": "0x10a", "bitmap": "0b0000000000000000000000000000000000001100000010000000110001001100" } ] } ================================================ FILE: tests/data/custom_cpu_templates/V1N1.json ================================================ { "reg_modifiers": [ { "addr": "0x603000000013c020", "bitmap": "0bxxxxxxxxxxxx0000xxxxxxxxxxxx0000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" }, { "addr": "0x603000000013c030", "bitmap": "0b0000xxxx00000000xxxx000000000000xxxxxxxxxxxxxxxx0001xxxxxxxxxxxx" }, { "addr": "0x603000000013c031", "bitmap": "0bxxxxxxxx000000000000xxxxxxxxxxxxxxxxxxxx000100000000xxxxxxxx0001" }, { "addr": "0x603000000013c03a", "bitmap": "0bxxxxxxxxxxxxxxxxxxxxxxxxxxxx0000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" } ] } ================================================ FILE: tests/data/metadata.json ================================================ { "latest": { "meta-data": "Lorem ipsum dolor sit amet, consectetur adipiscing elit", "user-data": "userdata 1 2 3 4" }, "2016-09-02": { "meta-data": "Lorem ipsum dolor sit amet" }, "2019-08-01": { "some-data": { "field1": "val1", "field2": [1,2,3,4] } } } ================================================ FILE: tests/data/metadata_invalid.json ================================================ { "latest": { "meta-data": "Lorem ipsum dolor sit amet, consectetur adipiscing elit", "user-data": "userdata 1 2 3 4" ================================================ FILE: tests/data/msr/msr_list_GNR_TO_T2_5.10_INTEL_GRANITE_RAPIDS_5.10host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x64bb6254c 0x11,0x24e6008 0x12,0x24e7001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0x400000000d08e0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x79e6bb7c8 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0xfd 0x831,0x1 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x0 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_GNR_TO_T2_5.10_INTEL_GRANITE_RAPIDS_5.10host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x17ac625eae0 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0x400000000d08e0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x17ac905e366 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0xfb 0x831,0x1 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0x0 0xc0000084,0x257fd5 0xc0000100,0x0 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_GNR_TO_T2_6.1_INTEL_GRANITE_RAPIDS_6.1host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x162c302cc2 0x11,0x24e6008 0x12,0x24e7001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0x400000000d08e0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x162d1c2824 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x1000000fd 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x7f27ef038740 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_GNR_TO_T2_6.1_INTEL_GRANITE_RAPIDS_6.1host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x119ae688626 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x401 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0x400000000d08e0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x119af2b2134 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x1000000fb 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0x0 0xc0000084,0x257fd5 0xc0000100,0x7fd8d4bba740 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_SPR_TO_T2_5.10_INTEL_SAPPHIRE_RAPIDS_5.10host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x1014d0f7c 0x11,0x24cb008 0x12,0x24cc001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0x400000000c08e0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x101d7c3a2 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x6c49380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_SPR_TO_T2_5.10_INTEL_SAPPHIRE_RAPIDS_5.10host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0xd2097b66 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0x400000000c08e0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0xd2b602dc 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0x0 0xc0000084,0x257fd5 0xc0000100,0x592380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_SPR_TO_T2_6.1_INTEL_SAPPHIRE_RAPIDS_6.1host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x12d75c18e 0x11,0x24cb008 0x12,0x24cc001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0x400000000c08e0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x12e64deb6 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x3402d380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_SPR_TO_T2_6.1_INTEL_SAPPHIRE_RAPIDS_6.1host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0xcecb4324 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x401 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0x400000000c08e0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0xcf54a28e 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0x0 0xc0000084,0x257fd5 0xc0000100,0x31d67380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2A_AMD_MILAN_5.10host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x20a051a0c 0x11,0x24cb008 0x12,0x24cc001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3b,0x0 0x48,0x0 0x8b,0x1000065 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x11e,0xbe702111 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x20b2717e2 0x802,0x0 0x803,0x50014 0x808,0x10 0x80a,0x10 0x80d,0x1 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x830,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x400 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83e,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x18df1380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010114,0x0 0xc0010117,0x0 0xc001011f,0x0 0xc0010131,0x0 0xc0010140,0x3 0xc0010141,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0xffff 0xc0011021,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc0011029,0x2 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2A_AMD_MILAN_5.10host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x13f0df111 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3b,0x0 0x48,0x0 0x8b,0x1000065 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x11e,0xbe702111 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x14034d011 0x802,0x0 0x803,0x50014 0x808,0x10 0x80a,0x10 0x80d,0x1 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x830,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x400 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83e,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0xffffffff81c016f0 0xc0000084,0x257fd5 0xc0000100,0x7004380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010114,0x0 0xc0010117,0x0 0xc001011f,0x0 0xc0010131,0x0 0xc0010140,0x3 0xc0010141,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0xffff 0xc0011021,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc0011029,0x2 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2A_AMD_MILAN_6.1host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x13df71122 0x11,0x24e6008 0x12,0x24e7001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3b,0x0 0x48,0x0 0x8b,0x1000065 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x11e,0xbe702111 0x140,0x0 0x174,0x10 0x175,0x3000 0x176,0x81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x13e9cecf6 0x802,0x0 0x803,0x50014 0x808,0x10 0x80a,0x10 0x80d,0x1 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x830,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x400 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83e,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x29a0c380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010114,0x0 0xc0010117,0x0 0xc001011f,0x0 0xc0010130,0x0 0xc0010131,0x0 0xc0010140,0x3 0xc0010141,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0xffff 0xc0011021,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc0011029,0x2 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2A_AMD_MILAN_6.1host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x12a9d3006 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3b,0x0 0x48,0x0 0x8b,0x1000065 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x11e,0xbe702111 0x140,0x0 0x174,0x10 0x175,0x3000 0x176,0x81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x12b7e327c 0x802,0x0 0x803,0x50014 0x808,0x10 0x80a,0x10 0x80d,0x1 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x830,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x400 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83e,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0xffffffff81c016f0 0xc0000084,0x257fd5 0xc0000100,0x344cb380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010114,0x0 0xc0010117,0x0 0xc001011f,0x0 0xc0010130,0x0 0xc0010131,0x0 0xc0010140,0x3 0xc0010141,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0xffff 0xc0011021,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc0011029,0x2 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2CL_INTEL_CASCADELAKE_5.10host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x15dc62d60 0x11,0x24cb008 0x12,0x24cc001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc0aa0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x15ea1f8d0 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x1de55380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2CL_INTEL_CASCADELAKE_5.10host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x1337dcd0a 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc0aa0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x13444450a 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0x0 0xc0000084,0x257fd5 0xc0000100,0x5660380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2CL_INTEL_CASCADELAKE_6.1host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x13c845eb6 0x11,0x24e6008 0x12,0x24e7001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc0aa0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x13d1abfbe 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x2820b380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2CL_INTEL_CASCADELAKE_6.1host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x1324e0ba2 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc0aa0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x132f90c4a 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0x0 0xc0000084,0x257fd5 0xc0000100,0x3c223380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2CL_INTEL_ICELAKE_5.10host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x237a664c4 0x11,0x24e6008 0x12,0x24e7001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc02a0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x238d35bf2 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x3f300380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2CL_INTEL_ICELAKE_5.10host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x11a6f98c0 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc02a0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x11b19f31e 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0x0 0xc0000084,0x257fd5 0xc0000100,0x302e7380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2CL_INTEL_ICELAKE_6.1host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x15bd5f0d0 0x11,0x24e6008 0x12,0x24e7001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc02a0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x15cca5b5a 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x3c720380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2CL_INTEL_ICELAKE_6.1host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x11bc7e610 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc02a0eb 0x11e,0xbe702111 0x122,0x3 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x11c6b41d8 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0x0 0xc0000084,0x257fd5 0xc0000100,0x11deb380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2S_INTEL_CASCADELAKE_5.10host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x151a50ae2 0x11,0x24e6008 0x12,0x24e7001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc080c4c 0x11e,0xbe702111 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x15254e698 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x1ac8380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2S_INTEL_CASCADELAKE_5.10host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x1f40deb9a 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc080c4c 0x11e,0xbe702111 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x1f4f53e20 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0x0 0xc0000084,0x257fd5 0xc0000100,0x20301380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0010200,0x0 0xc0010201,0x0 0xc0010202,0x0 0xc0010203,0x0 0xc0010204,0x0 0xc0010205,0x0 0xc0010206,0x0 0xc0010207,0x0 0xc0010208,0x0 0xc0010209,0x0 0xc001020a,0x0 0xc001020b,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2S_INTEL_CASCADELAKE_6.1host_5.10guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x1554f11b4 0x11,0x24e6008 0x12,0x24e7001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc080c4c 0x11e,0xbe702111 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81a01510 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x15659fdee 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81a00080 0xc0000083,0xffffffff81a015c0 0xc0000084,0x47700 0xc0000100,0x25c3380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_list_T2S_INTEL_CASCADELAKE_6.1host_6.1guest.csv ================================================ MSR_ADDR,VALUE 0x0,0x0 0x1,0x0 0x10,0x13d3c66ee 0x11,0x27fd008 0x12,0x27fe001 0x17,0x0 0x1b,0xfee00d00 0x2a,0x0 0x2c,0x1000000 0x34,0x0 0x3a,0x1 0x3b,0x0 0x48,0x1 0x8b,0x100000000 0xc1,0x0 0xc2,0x0 0xcd,0x3 0xce,0x80000000 0xfe,0x508 0x10a,0xc080c4c 0x11e,0xbe702111 0x140,0x0 0x174,0x10 0x175,0xfffffe0000003000 0x176,0xffffffff81c01630 0x179,0x20 0x17a,0x0 0x186,0x0 0x187,0x0 0x198,0x400000003e8 0x199,0x0 0x1a0,0x1 0x1d9,0x0 0x1db,0x0 0x1dc,0x0 0x1dd,0x0 0x1de,0x0 0x1fc,0x0 0x200,0x0 0x201,0x0 0x202,0x0 0x203,0x0 0x204,0x0 0x205,0x0 0x206,0x0 0x207,0x0 0x208,0x0 0x209,0x0 0x20a,0x0 0x20b,0x0 0x20c,0x0 0x20d,0x0 0x20e,0x0 0x20f,0x0 0x250,0x0 0x258,0x0 0x259,0x0 0x268,0x0 0x269,0x0 0x26a,0x0 0x26b,0x0 0x26c,0x0 0x26d,0x0 0x26e,0x0 0x26f,0x0 0x277,0x407050600070106 0x2ff,0x806 0x400,0x0 0x401,0x0 0x402,0x0 0x403,0x0 0x404,0x0 0x405,0x0 0x406,0x0 0x407,0x0 0x408,0x0 0x409,0x0 0x40a,0x0 0x40b,0x0 0x40c,0x0 0x40d,0x0 0x40e,0x0 0x40f,0x0 0x410,0x0 0x411,0x0 0x412,0x0 0x413,0x0 0x414,0x0 0x415,0x0 0x416,0x0 0x417,0x0 0x418,0x0 0x419,0x0 0x41a,0x0 0x41b,0x0 0x41c,0x0 0x41d,0x0 0x41e,0x0 0x41f,0x0 0x420,0x0 0x421,0x0 0x422,0x0 0x423,0x0 0x424,0x0 0x425,0x0 0x426,0x0 0x427,0x0 0x428,0x0 0x429,0x0 0x42a,0x0 0x42b,0x0 0x42c,0x0 0x42d,0x0 0x42e,0x0 0x42f,0x0 0x430,0x0 0x431,0x0 0x432,0x0 0x433,0x0 0x434,0x0 0x435,0x0 0x436,0x0 0x437,0x0 0x438,0x0 0x439,0x0 0x43a,0x0 0x43b,0x0 0x43c,0x0 0x43d,0x0 0x43e,0x0 0x43f,0x0 0x440,0x0 0x441,0x0 0x442,0x0 0x443,0x0 0x444,0x0 0x445,0x0 0x446,0x0 0x447,0x0 0x448,0x0 0x449,0x0 0x44a,0x0 0x44b,0x0 0x44c,0x0 0x44d,0x0 0x44e,0x0 0x44f,0x0 0x450,0x0 0x451,0x0 0x452,0x0 0x453,0x0 0x454,0x0 0x455,0x0 0x456,0x0 0x457,0x0 0x458,0x0 0x459,0x0 0x45a,0x0 0x45b,0x0 0x45c,0x0 0x45d,0x0 0x45e,0x0 0x45f,0x0 0x460,0x0 0x461,0x0 0x462,0x0 0x463,0x0 0x464,0x0 0x465,0x0 0x466,0x0 0x467,0x0 0x468,0x0 0x469,0x0 0x46a,0x0 0x46b,0x0 0x46c,0x0 0x46d,0x0 0x46e,0x0 0x46f,0x0 0x470,0x0 0x471,0x0 0x472,0x0 0x473,0x0 0x474,0x0 0x475,0x0 0x476,0x0 0x477,0x0 0x478,0x0 0x479,0x0 0x47a,0x0 0x47b,0x0 0x47c,0x0 0x47d,0x0 0x47e,0x0 0x47f,0x0 0x606,0x0 0x611,0x0 0x619,0x0 0x639,0x0 0x641,0x0 0x6e0,0x13de4b094 0x800,0x0 0x801,0x0 0x802,0x0 0x803,0x50014 0x804,0x0 0x805,0x0 0x806,0x0 0x807,0x0 0x808,0x10 0x809,0x0 0x80a,0x10 0x80b,0x0 0x80c,0x0 0x80d,0x1 0x80e,0xffffffff 0x80f,0x1ff 0x810,0x0 0x811,0x0 0x812,0x0 0x813,0x0 0x814,0x0 0x815,0x0 0x816,0x0 0x817,0x0 0x818,0x0 0x819,0x0 0x81a,0x0 0x81b,0x0 0x81c,0x0 0x81d,0x0 0x81e,0x0 0x81f,0x0 0x820,0x0 0x821,0x0 0x822,0x0 0x823,0x0 0x824,0x0 0x825,0x0 0x826,0x0 0x827,0x0 0x828,0x0 0x829,0x0 0x82a,0x0 0x82b,0x0 0x82c,0x0 0x82d,0x0 0x82e,0x0 0x82f,0x0 0x830,0x0 0x831,0x0 0x832,0x400ec 0x833,0x10000 0x834,0x10000 0x835,0x10700 0x836,0x400 0x837,0xfe 0x838,0x0 0x839,0x0 0x83a,0x0 0x83b,0x0 0x83c,0x0 0x83d,0x0 0x83e,0x0 0x83f,0x0 0x840,0x0 0x841,0x0 0x842,0x0 0x843,0x0 0x844,0x0 0x845,0x0 0x846,0x0 0x847,0x0 0x848,0x0 0x849,0x0 0x84a,0x0 0x84b,0x0 0x84c,0x0 0x84d,0x0 0x84e,0x0 0x84f,0x0 0x850,0x0 0x851,0x0 0x852,0x0 0x853,0x0 0x854,0x0 0x855,0x0 0x856,0x0 0x857,0x0 0x858,0x0 0x859,0x0 0x85a,0x0 0x85b,0x0 0x85c,0x0 0x85d,0x0 0x85e,0x0 0x85f,0x0 0x860,0x0 0x861,0x0 0x862,0x0 0x863,0x0 0x864,0x0 0x865,0x0 0x866,0x0 0x867,0x0 0x868,0x0 0x869,0x0 0x86a,0x0 0x86b,0x0 0x86c,0x0 0x86d,0x0 0x86e,0x0 0x86f,0x0 0x870,0x0 0x871,0x0 0x872,0x0 0x873,0x0 0x874,0x0 0x875,0x0 0x876,0x0 0x877,0x0 0x878,0x0 0x879,0x0 0x87a,0x0 0x87b,0x0 0x87c,0x0 0x87d,0x0 0x87e,0x0 0x87f,0x0 0x880,0x0 0x881,0x0 0x882,0x0 0x883,0x0 0x884,0x0 0x885,0x0 0x886,0x0 0x887,0x0 0x888,0x0 0x889,0x0 0x88a,0x0 0x88b,0x0 0x88c,0x0 0x88d,0x0 0x88e,0x0 0x88f,0x0 0x890,0x0 0x891,0x0 0x892,0x0 0x893,0x0 0x894,0x0 0x895,0x0 0x896,0x0 0x897,0x0 0x898,0x0 0x899,0x0 0x89a,0x0 0x89b,0x0 0x89c,0x0 0x89d,0x0 0x89e,0x0 0x89f,0x0 0x8a0,0x0 0x8a1,0x0 0x8a2,0x0 0x8a3,0x0 0x8a4,0x0 0x8a5,0x0 0x8a6,0x0 0x8a7,0x0 0x8a8,0x0 0x8a9,0x0 0x8aa,0x0 0x8ab,0x0 0x8ac,0x0 0x8ad,0x0 0x8ae,0x0 0x8af,0x0 0x8b0,0x0 0x8b1,0x0 0x8b2,0x0 0x8b3,0x0 0x8b4,0x0 0x8b5,0x0 0x8b6,0x0 0x8b7,0x0 0x8b8,0x0 0x8b9,0x0 0x8ba,0x0 0x8bb,0x0 0x8bc,0x0 0x8bd,0x0 0x8be,0x0 0x8bf,0x0 0x8c0,0x0 0x8c1,0x0 0x8c2,0x0 0x8c3,0x0 0x8c4,0x0 0x8c5,0x0 0x8c6,0x0 0x8c7,0x0 0x8c8,0x0 0x8c9,0x0 0x8ca,0x0 0x8cb,0x0 0x8cc,0x0 0x8cd,0x0 0x8ce,0x0 0x8cf,0x0 0x8d0,0x0 0x8d1,0x0 0x8d2,0x0 0x8d3,0x0 0x8d4,0x0 0x8d5,0x0 0x8d6,0x0 0x8d7,0x0 0x8d8,0x0 0x8d9,0x0 0x8da,0x0 0x8db,0x0 0x8dc,0x0 0x8dd,0x0 0x8de,0x0 0x8df,0x0 0x8e0,0x0 0x8e1,0x0 0x8e2,0x0 0x8e3,0x0 0x8e4,0x0 0x8e5,0x0 0x8e6,0x0 0x8e7,0x0 0x8e8,0x0 0x8e9,0x0 0x8ea,0x0 0x8eb,0x0 0x8ec,0x0 0x8ed,0x0 0x8ee,0x0 0x8ef,0x0 0x8f0,0x0 0x8f1,0x0 0x8f2,0x0 0x8f3,0x0 0x8f4,0x0 0x8f5,0x0 0x8f6,0x0 0x8f7,0x0 0x8f8,0x0 0x8f9,0x0 0x8fa,0x0 0x8fb,0x0 0x8fc,0x0 0x8fd,0x0 0x8fe,0x0 0x8ff,0x0 0xc0000080,0xd01 0xc0000081,0x23001000000000 0xc0000082,0xffffffff81c00080 0xc0000083,0x0 0xc0000084,0x257fd5 0xc0000100,0x1e840380 0xc0000101,0xffff88803ec00000 0xc0000102,0x0 0xc0000103,0x0 0xc0010000,0x0 0xc0010001,0x0 0xc0010002,0x0 0xc0010003,0x0 0xc0010004,0x0 0xc0010005,0x0 0xc0010006,0x0 0xc0010007,0x0 0xc0010010,0x0 0xc0010015,0x0 0xc001001b,0x20000000 0xc001001f,0x0 0xc0010055,0x0 0xc0010058,0x0 0xc0010112,0x0 0xc0010113,0x0 0xc0010117,0x0 0xc0011022,0x0 0xc0011023,0x0 0xc001102a,0x0 0xc001102c,0x0 0x400000000,0x0 0x2000000000,0x0 0x4000000000,0x0 0x8000000000,0x0 0x1000000000000,0x0 0x3c000000000000,0x0 0x80000000000000,0x0 0x40000000000000,0x0 ================================================ FILE: tests/data/msr/msr_reader.c ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // Helper script used to read MSR values from ranges known to contain MSRs. #include #include #include #include void print_msr(int msr_fd, uint64_t msr) { uint64_t value; if (pread(msr_fd, &value, sizeof(value), msr) == sizeof(value)) printf("0x%llx,0x%llx\n", msr, value); } int main() { int msr_fd = open("/dev/cpu/0/msr", O_RDONLY); if (msr_fd < 0) return -1; printf("MSR_ADDR,VALUE\n"); for (uint64_t msr = 0; msr <= 0xFFF; msr++) print_msr(msr_fd, msr); for (uint64_t msr = 0x10000; msr <= 0x10FFF; msr++) print_msr(msr_fd, msr); for (uint64_t msr = 0xC0000000; msr <= 0xC0011030; msr++) print_msr(msr_fd, msr); print_msr(msr_fd, 0x400000000); print_msr(msr_fd, 0x2000000000); print_msr(msr_fd, 0x4000000000); print_msr(msr_fd, 0x8000000000); print_msr(msr_fd, 0x1000000000000); print_msr(msr_fd, 0x3c000000000000); print_msr(msr_fd, 0x80000000000000); print_msr(msr_fd, 0x40000000000000); } ================================================ FILE: tests/data/msr/msr_writer.sh ================================================ #!/usr/bin/env bash # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # Helper script used to write MSR values from a text file set -eu input_file=$1 if [ -z "${input_file}" ] ; then >&2 echo "Usage: ${0} " exit 1 fi if [ ! -f "${input_file}" ] ; then >&2 echo "File ${input_file} does not exist" exit 1 fi while read -r reg val ; do wrmsr ${reg} ${val} done < ${input_file} ================================================ FILE: tests/data/msr/wrmsr_list.txt ================================================ 0x1b 0xfee00c00 0x174 0x11 0x17a 0x1 0x1a0 0x0 0x250 0x1 0x258 0x1 0x259 0x1 0x268 0x1 0x269 0x1 0x26a 0x1 0x26b 0x1 0x26c 0x1 0x26d 0x1 0x26e 0x1 0x26f 0x1 0x2ff 0x1 0x402 0x1 0x403 0x1 0x406 0x1 0x407 0x1 0x40a 0x1 0x40b 0x1 0x40e 0x1 0x40f 0x1 0x412 0x1 0x413 0x1 0x416 0x1 0x417 0x1 0x41a 0x1 0x41b 0x1 0x41e 0x1 0x41f 0x1 0x422 0x1 0x423 0x1 0x426 0x1 0x427 0x1 0x42a 0x1 0x42b 0x1 0x42e 0x1 0x42f 0x1 0x432 0x1 0x433 0x1 0x436 0x1 0x437 0x1 0x43a 0x1 0x43b 0x1 0x43e 0x1 0x43f 0x1 0x442 0x1 0x443 0x1 0x446 0x1 0x447 0x1 0x44a 0x1 0x44b 0x1 0x44e 0x1 0x44f 0x1 0x452 0x1 0x453 0x1 0x456 0x1 0x457 0x1 0x45a 0x1 0x45b 0x1 0x45e 0x1 0x45f 0x1 0x462 0x1 0x463 0x1 0x466 0x1 0x467 0x1 0x46a 0x1 0x46b 0x1 0x46e 0x1 0x46f 0x1 0x472 0x1 0x473 0x1 0x476 0x1 0x477 0x1 0x47a 0x1 0x47b 0x1 0x47e 0x1 0x47f 0x1 0x808 0x1 0x80f 0x1fe 0xc0000081 0x23001000000001 0xc0000084 0x47701 0xc0000103 0x1 0xc0010015 0x40000 ================================================ FILE: tests/framework/__init__.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Are you happy pylint?""" ================================================ FILE: tests/framework/ab_test.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Defines utilities for performing A/B-tests. A/B-Tests are style of tests where we do not care what state a test is in, but only that this state does not change across a pull request. This is useful if 1. Validating the state requires some baseline to be persisted in the repository, and maintaining this baseline adds significant operational burden (for example, performance tests), or 2. The state can change due to outside factors (e.g. Hardware changes), and such external changes would block all pull requests until they are resolved. Consider for example a `cargo audit` tests, which is used to reject usage of dependency versinos that have known security vulnerabilities, or which have been yanked. The "state" here is "list of vulnerable dependencies". Clearly, this can change due to external action (a new vulnerability is discovered and published to RustSec). At this point, every PR would fail until this dependency is removed, blocking all development. Simply removing the test from PR CI is not an option, since we want to avoid the scenario where a PR adds a dependency with a known vulnerability (e.g. the PR itself changes the "list of vulnerable dependencies"). A/B-Testing allows us to not block PRs on the former case, while still preventing the latter: We run cargo audit twice, once on main HEAD, and once on the PR HEAD. If the output of both invocations is the same, the test passes (with us being alerted to this situtation via a special pipeline that does not block PRs). If not, it fails, preventing PRs from introducing new vulnerable dependencies. """ from pathlib import Path from tempfile import TemporaryDirectory from typing import Callable, Optional, TypeVar from framework import utils from framework.properties import global_props from framework.utils import CommandReturn from framework.with_filelock import with_filelock # Locally, this will always compare against main, even if we try to merge into, say, a feature branch. # We might want to do a more sophisticated way to determine a "parent" branch here. DEFAULT_A_REVISION = global_props.buildkite_revision_a or "main" T = TypeVar("T") U = TypeVar("U") def default_comparator(ah: T, be: T) -> bool: """Returns `true` iff that the two arguments are equal. The default assertion for A/B-tests using `ab_test`. Ridiculous variable names sponsored by pylint.""" return ah == be def git_ab_test( test_runner: Callable[[Path, bool], T], comparator: Callable[[T, T], U] = default_comparator, *, a_revision: str = DEFAULT_A_REVISION, b_revision: Optional[str] = None, ) -> (T, T, U): """ Performs an A/B-test using the given test runner between the specified revision, and the currently checked out revision. The specified revisions will be checked out in temporary directories, with `test_runner` getting executed in the repository root. If the test depends on firecracker binaries built from the requested revision, care has to be taken that they are built from the sources in the temporary directory. Note that there are no guarantees on the order in which the two tests are run. :param test_runner: A callable which when executed runs the test in the context of the current working directory. Its first parameter is a temporary directory in which firecracker is checked out at some revision. The second parameter is `true` if and only if the checked out revision is the "A" revision. :param comparator: A callable taking two outputs from `test_runner` and comparing them. Should return some value indicating whether the test should pass or no, which will be returned by the `ab_test` functions, and on which the caller can then do an assertion. :param a_revision: The revision to checkout for the "A" part of the test. Defaults to the pull request target branch if run in CI, and "main" otherwise. :param b_revision: The git revision to check out for "B" part of the test. Defaults to whatever is currently checked out (in which case no temporary directory will be created). :return: The output of both "A" test, the "B" test and the comparator, which can then be used for assertions (alternatively, your comparator can perform any required assertions and not return anything). """ with TemporaryDirectory() as tmp_dir: dir_a = git_clone(Path(tmp_dir) / a_revision, a_revision) result_a = test_runner(dir_a, True) if b_revision: dir_b = git_clone(Path(tmp_dir) / b_revision, b_revision) else: # By default, pytest execution happens inside the `tests` subdirectory. Pass the repository root, as # documented. dir_b = Path.cwd().parent result_b = test_runner(dir_b, False) comparison = comparator(result_a, result_b) return result_a, result_b, comparison def git_ab_test_host_command_if_pr( command: str, *, comparator: Callable[[CommandReturn, CommandReturn], bool] = default_comparator, check_in_nonpr=True, ): """Runs the given bash command as an A/B-Test if we're in a pull request context (asserting that its stdout and stderr did not change across the PR). Otherwise runs the command, asserting it returns a zero exit code """ if global_props.buildkite_pr: git_ab_test_host_command(command, comparator=comparator) return None return utils.run_cmd( command, check=check_in_nonpr, cwd=Path.cwd().parent, ) def git_ab_test_host_command( command: str, *, comparator: Callable[[CommandReturn, CommandReturn], bool] = default_comparator, a_revision: str = DEFAULT_A_REVISION, b_revision: Optional[str] = None, ): """Performs an A/B-Test of the specified command, asserting that both the A and B invokations return the same stdout/stderr""" (_, old_out, old_err), (_, new_out, new_err), the_same = git_ab_test( lambda path, _is_a: utils.run_cmd(command, cwd=path), comparator, a_revision=a_revision, b_revision=b_revision, ) assert ( the_same ), f"The output of running command `{command}` changed:\nOld:\nstdout:\n{old_out}\nstderr:\n{old_err}\n\nNew:\nstdout:\n{new_out}\nstderr:\n{new_err}" def set_did_not_grow_comparator( set_generator: Callable[[CommandReturn], set], ) -> Callable[[CommandReturn, CommandReturn], bool]: """Factory function for comparators to use with git_ab_test_command that converts the command output to sets (using the given callable) and then checks that the "B" set is a subset of the "A" set """ return lambda output_a, output_b: set_generator(output_b).issubset( set_generator(output_a) ) @with_filelock def git_clone(clone_path, commitish): """Clone the repository at `commit`. :return: the working copy directory. """ if not clone_path.exists(): ret, _, _ = utils.run_cmd(f"git cat-file -t {commitish}") if ret != 0: # git didn't recognize this object; qualify it if it is a branch commitish = f"origin/{commitish}" # make a temp branch for that commit so we can directly check it out branch_name = f"tmp-{commitish}" utils.check_output(f"git branch {branch_name} {commitish}") _, git_root, _ = utils.run_cmd("git rev-parse --show-toplevel") # split off the '\n' at the end of the stdout utils.check_output( f"git clone -b {branch_name} {git_root.strip()} {clone_path}" ) utils.check_output(f"git branch -D {branch_name}") return clone_path ================================================ FILE: tests/framework/artifacts.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Define classes for interacting with CI artifacts""" import re from pathlib import Path from typing import Iterator import pytest from framework.defs import ARTIFACT_DIR def select_supported_kernels(): """Select guest kernels supported by the current combination of kernel and instance type. """ supported_kernels = [r"vmlinux-5.10.\d+", r"vmlinux-6.1.\d+"] # Booting with MPTable is deprecated but we still want to test # for it. Until we drop support for it we will be building a 5.10 guest # kernel without ACPI support, so that we are able to test this use-case # as well. # TODO: remove this once we drop support for MPTable supported_kernels.append(r"vmlinux-5.10.\d+-no-acpi") return supported_kernels def kernels(glob, artifact_dir: Path = ARTIFACT_DIR) -> Iterator: """Return supported kernels as kernels supported by the current combination of kernel and instance type. """ supported_kernels = select_supported_kernels() for kernel in sorted(artifact_dir.glob(glob)): for kernel_regex in supported_kernels: if re.fullmatch(kernel_regex, kernel.name): yield kernel break def disks(glob) -> list: """Return supported rootfs""" return sorted(ARTIFACT_DIR.glob(glob)) def kernel_params(glob="vmlinux-*", select=kernels, artifact_dir=ARTIFACT_DIR) -> list: """Return supported kernels or a single None if no kernels are found""" return [ pytest.param(kernel, id=kernel.name) for kernel in select(glob, artifact_dir) ] or [pytest.param(None, id="no-kernel-found")] ================================================ FILE: tests/framework/defs.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Some common defines used in different modules of the testing framework.""" import platform from pathlib import Path # Firecracker's binary name FC_BINARY_NAME = "firecracker" # The Firecracker sources workspace dir FC_WORKSPACE_DIR = Path(__file__).parent.parent.parent.resolve() # Folder containing JSON seccomp filters SECCOMP_JSON_DIR = FC_WORKSPACE_DIR / "resources/seccomp" # Maximum accepted duration of an API call, in milliseconds MAX_API_CALL_DURATION_MS = 700 # Default test session root directory path DEFAULT_TEST_SESSION_ROOT_PATH = "/srv" # Default test session artifacts path LOCAL_BUILD_PATH = FC_WORKSPACE_DIR / "build/" DEFAULT_BINARY_DIR = ( LOCAL_BUILD_PATH / "cargo_target" / f"{platform.machine()}-unknown-linux-musl" / "release" ) SUPPORTED_HOST_KERNELS = ["5.10", "6.1"] # When pytest is run in the devctr the test.sh scipt copies artifacts (rootfs, guest kernels, etc) # to the /srv/test_artifacts within the container ARTIFACT_DIR = Path(DEFAULT_TEST_SESSION_ROOT_PATH) / "test_artifacts" # Fall-back to the local directory if pytest was run without test.sh script if not ARTIFACT_DIR.exists(): current_artifacts_dir = ( (Path(LOCAL_BUILD_PATH) / "current_artifacts") .read_text(encoding="utf-8") .strip() ) ARTIFACT_DIR = FC_WORKSPACE_DIR / current_artifacts_dir ================================================ FILE: tests/framework/gitlint_rules.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """The user defined rules for gitlint.""" import re from gitlint.rules import CommitRule, RuleViolation class EndsSigned(CommitRule): """Checks commit message body formatting. Makes sure each commit message body ends with 1 or more signatures ("Signed-off-by"), followed by 0 or more co-authors ("Co-authored-by"). """ # The name of the rule. name = "body-requires-signature" # The unique id of the rule. id = "UC2" def validate(self, commit): r"""Validates Signed-off-by and Co-authored-by tags as Linux's scripts/checkpatch.pl >>> from gitlint.git import GitContext >>> from gitlint.rules import RuleViolation ... >>> ends_signed = EndsSigned() >>> miss_sob_follows_coab = "Missing 'Signed-off-by' following 'Co-authored-by'" >>> miss_sob = "'Signed-off-by' not found in commit message body" >>> non_sign = "Non 'Co-authored-by' or 'Signed-off-by' string found following 1st 'Signed-off-by'" >>> email_no_match = "'Co-authored-by' and 'Signed-off-by' name/email do not match" ... >>> msg1 = ( ... f"Title\n\nMessage.\n\n" ... f"Signed-off-by: name " ... ) >>> commit1 = GitContext.from_commit_msg(msg1).commits[0] >>> ends_signed.validate(commit1) [] >>> msg2 = ( ... f"Title\n\nMessage.\n\n" ... f"Co-authored-by: name \n\n" ... f"Signed-off-by: name " ... ) >>> commit2 = GitContext.from_commit_msg(msg2).commits[0] >>> ends_signed.validate(commit2) [] >>> msg3 = f"Title\n\nMessage.\n\n" >>> commit3 = GitContext.from_commit_msg(msg3).commits[0] >>> vio3 = ends_signed.validate(commit3) >>> vio3 == [RuleViolation("UC2", miss_sob)] True >>> msg4 = ( ... f"Title\n\nMessage.\n\n" ... f"Signed-off-by: name \n\na sentence" ... ) >>> commit4 = GitContext.from_commit_msg(msg4).commits[0] >>> vio4 = ends_signed.validate(commit4) >>> vio4 == [RuleViolation("UC2", non_sign, None, 6)] True >>> msg5 = ( ... f"Title\n\nMessage.\n\n" ... f"Co-authored-by: name " ... ) >>> commit5 = GitContext.from_commit_msg(msg5).commits[0] >>> vio5 = ends_signed.validate(commit5) >>> vio5 == [ ... RuleViolation("UC2", miss_sob, None, None), ... RuleViolation("UC2", miss_sob_follows_coab, None, 5) ... ] True >>> msg6 = ( ... f"Title\n\nMessage.\n\n" ... f"Co-authored-by: name \n\n" ... f"Signed-off-by: different name " ... ) >>> commit6 = GitContext.from_commit_msg(msg6).commits[0] >>> vio6 = ends_signed.validate(commit6) >>> vio6 == [RuleViolation("UC2", email_no_match, None, 6)] True """ violations = [] # Utilities def vln(stmt, i): violations.append(RuleViolation(self.id, stmt, None, i)) coab = "Co-authored-by" sob = "Signed-off-by" # find trailers trailers = [] for i, line in enumerate(commit.message.original.splitlines()): # ignore empty lines if not line: continue match = re.match(r"([\w-]+):\s+(.*)", line) if match: key, val = match.groups() trailers.append((i, key, val)) else: trailers.append((i, "line", line)) # artificial line so we can check any "previous line" rules trailers.append((trailers[-1][0] + 1, None, None)) # Checks commit message contains a `Signed-off-by` string if not [x for x in trailers if x[1] == sob]: vln(f"'{sob}' not found in commit message body", None) prev_trailer, prev_value = None, None sig_trailers = False for i, trailer, value in trailers: if trailer in {sob, coab}: sig_trailers = True elif trailer not in {sob, coab, None} and sig_trailers: vln( f"Non '{coab}' or '{sob}' string found following 1st '{sob}'", i, ) # Every co-author is immediately followed by a signature if prev_trailer == coab: if trailer != sob: vln(f"Missing '{sob}' following '{coab}'", i) else: # with the same name/email. if value != prev_value: vln(f"'{coab}' and '{sob}' name/email do not match", i) prev_trailer, prev_value = trailer, value # Return errors return violations ================================================ FILE: tests/framework/guest_stats.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Classes for querying guest stats inside microVMs. """ class ByteUnit: """Represents a byte unit that can be converted to other units.""" value_bytes: int def __init__(self, value_bytes: int): self.value_bytes = value_bytes @classmethod def from_kib(cls, value_kib: int): """Creates a ByteUnit from a value in KiB.""" if value_kib < 0: raise ValueError("value_kib must be non-negative") return ByteUnit(value_kib * 1024) def bytes(self) -> int: """Returns the value in B.""" return self.value_bytes def kib(self) -> float: """Returns the value in KiB as a decimal.""" return self.value_bytes / 1024 def mib(self) -> float: """Returns the value in MiB as a decimal.""" return self.value_bytes / (1 << 20) def gib(self) -> float: """Returns the value in GiB as a decimal.""" return self.value_bytes / (1 << 30) class Meminfo: """Represents the contents of /proc/meminfo inside the guest""" mem_total: ByteUnit mem_free: ByteUnit mem_available: ByteUnit buffers: ByteUnit cached: ByteUnit def __init__(self): self.mem_total = ByteUnit(0) self.mem_free = ByteUnit(0) self.mem_available = ByteUnit(0) self.buffers = ByteUnit(0) self.cached = ByteUnit(0) class MeminfoGuest: """Queries /proc/meminfo inside the guest""" def __init__(self, vm): self.vm = vm def get(self) -> Meminfo: """Returns the contents of /proc/meminfo inside the guest""" meminfo = Meminfo() for line in self.vm.ssh.check_output("cat /proc/meminfo").stdout.splitlines(): parts = line.split() if parts[0] == "MemTotal:": meminfo.mem_total = ByteUnit.from_kib(int(parts[1])) elif parts[0] == "MemFree:": meminfo.mem_free = ByteUnit.from_kib(int(parts[1])) elif parts[0] == "MemAvailable:": meminfo.mem_available = ByteUnit.from_kib(int(parts[1])) elif parts[0] == "Buffers:": meminfo.buffers = ByteUnit.from_kib(int(parts[1])) elif parts[0] == "Cached:": meminfo.cached = ByteUnit.from_kib(int(parts[1])) return meminfo ================================================ FILE: tests/framework/http_api.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """A simple HTTP client for the Firecracker API""" import urllib from http import HTTPStatus import requests from requests_unixsocket import DEFAULT_SCHEME, UnixAdapter from framework.swagger_validator import SwaggerValidator, ValidationError class Session(requests.Session): """An HTTP over UNIX sockets Session Wrapper over requests_unixsocket.Session """ def __init__(self): """Create a Session object.""" super().__init__() # 'UnixAdapter` saves in the pool at most 'pool_connections' # connections. When a new request is made, the adapter tries to match # that request with an already existing connection from the pool, by # comparing their url. # If there's a match, then the adapter uses the connection from the # pool to make the new request. # Otherwise, a new connection is created and saved in the pool. If # there is no space in the pool, the new connection will replace the # least recently used one in the pool. The evicted connection will be # closed. # # The `pool_connections` argument indicates the maximum number of # connection saved in the pool, not the maximum number of open # connections allowed at the same time # (see https://urllib3.readthedocs.io/en/stable/advanced-usage.html). # # We set this value to be equal to micro-http's `MAX_CONNECTIONS` - 1. # This is because when reaching the `pool_connection` limit, it is not # guaranteed that the event to close the connection will be received # before the event that results in creating a new connection (this # depends on the kernel). In case the two events are not received in # the same order, or are received together, the server might try to add # a new connection before removing the old one, resulting in a # `SERVER_FULL_ERROR`. self.mount(DEFAULT_SCHEME, UnixAdapter(pool_connections=9)) class Resource: """An abstraction over a REST path""" def __init__(self, api, resource, id_field=None): self._api = api self.resource = resource self.id_field = id_field def get(self): """Make a GET request""" url = self._api.endpoint + self.resource try: res = self._api.session.get(url) except Exception as e: if self._api.error_callback: self._api.error_callback("GET", self.resource, str(e)) raise assert res.status_code == HTTPStatus.OK, res.json() # Validate response against Swagger specification # only validate successful requests if self._api.validator and res.status_code == HTTPStatus.OK: try: response_body = res.json() self._api.validator.validate_response( "GET", self.resource, 200, response_body ) except ValidationError as e: # Re-raise with more context raise ValidationError( f"Response validation failed for GET {self.resource}: {e.message}" ) from e return res def request(self, method, path, **kwargs): """Make an HTTP request""" kwargs = {key: val for key, val in kwargs.items() if val is not None} url = self._api.endpoint + path try: res = self._api.session.request(method, url, json=kwargs) except Exception as e: if self._api.error_callback: self._api.error_callback(method, path, str(e)) raise if res.status_code != HTTPStatus.NO_CONTENT: json = res.json() msg = res.content if "fault_message" in json: msg = json["fault_message"] elif "error" in json: msg = json["error"] raise RuntimeError(msg, json, res) # Validate request against Swagger specification # do this after the actual request as we only want to validate successful # requests as the tests may be trying to pass bad requests and assert an # error is raised. if self._api.validator: if kwargs: try: self._api.validator.validate_request(method, path, kwargs) except ValidationError as e: # Re-raise with more context raise ValidationError( f"Request validation failed for {method} {path}: {e.message}" ) from e if res.status_code == HTTPStatus.OK: try: response_body = res.json() self._api.validator.validate_response( method, path, 200, response_body ) except ValidationError as e: # Re-raise with more context raise ValidationError( f"Response validation failed for {method} {path}: {e.message}" ) from e return res def put(self, **kwargs): """Make a PUT request""" path = self.resource if self.id_field is not None: path += "/" + kwargs[self.id_field] return self.request("PUT", path, **kwargs) def patch(self, **kwargs): """Make a PATCH request""" path = self.resource if self.id_field is not None: path += "/" + kwargs[self.id_field] return self.request("PATCH", path, **kwargs) class Api: """A simple HTTP client for the Firecracker API""" def __init__(self, api_usocket_full_name, *, validate=True, on_error=None): self.error_callback = on_error self.socket = api_usocket_full_name url_encoded_path = urllib.parse.quote_plus(api_usocket_full_name) self.endpoint = DEFAULT_SCHEME + url_encoded_path self.session = Session() # Initialize the swagger validator self.validator = SwaggerValidator() if validate else None self.describe = Resource(self, "/") self.vm = Resource(self, "/vm") self.vm_config = Resource(self, "/vm/config") self.actions = Resource(self, "/actions") self.boot = Resource(self, "/boot-source") self.drive = Resource(self, "/drives", "drive_id") self.version = Resource(self, "/version") self.logger = Resource(self, "/logger") self.machine_config = Resource(self, "/machine-config") self.metrics = Resource(self, "/metrics") self.network = Resource(self, "/network-interfaces", "iface_id") self.mmds = Resource(self, "/mmds") self.mmds_config = Resource(self, "/mmds/config") self.balloon = Resource(self, "/balloon") self.balloon_stats = Resource(self, "/balloon/statistics") self.balloon_hinting_start = Resource(self, "/balloon/hinting/start") self.balloon_hinting_status = Resource(self, "/balloon/hinting/status") self.balloon_hinting_stop = Resource(self, "/balloon/hinting/stop") self.vsock = Resource(self, "/vsock") self.snapshot_create = Resource(self, "/snapshot/create") self.snapshot_load = Resource(self, "/snapshot/load") self.cpu_config = Resource(self, "/cpu-config") self.entropy = Resource(self, "/entropy") self.pmem = Resource(self, "/pmem", "id") self.serial = Resource(self, "/serial") self.memory_hotplug = Resource(self, "/hotplug/memory") ================================================ FILE: tests/framework/jailer.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Define a class for creating the jailed context.""" import os import shutil import stat from pathlib import Path from tenacity import Retrying, retry_if_exception_type, stop_after_delay from framework import defs, utils from framework.defs import FC_BINARY_NAME # Default name for the socket used for API calls. DEFAULT_USOCKET_NAME = "run/firecracker.socket" # The default location for the chroot. DEFAULT_CHROOT_PATH = f"{defs.DEFAULT_TEST_SESSION_ROOT_PATH}/jailer" class JailerContext: """Represents jailer configuration and contains jailer helper functions. Each microvm will have a jailer configuration associated with it. """ # Keep in sync with parameters from code base. jailer_id = None exec_file = None uid = None gid = None chroot_base = None daemonize = None new_pid_ns = None extra_args = None api_socket_name = None cgroups = None resource_limits = None cgroup_ver = None parent_cgroup = None def __init__( self, jailer_id, exec_file, uid=1234, gid=1234, chroot_base=DEFAULT_CHROOT_PATH, netns=None, daemonize=True, new_pid_ns=False, cgroups=None, resource_limits=None, cgroup_ver=None, parent_cgroup=None, **extra_args, ): """Set up jailer fields. This plays the role of a default constructor as it populates the jailer's fields with some default values. Each field can be further adjusted by each test even with None values. """ self.jailer_id = jailer_id assert jailer_id is not None self.exec_file = exec_file self.uid = uid self.gid = gid self.chroot_base = Path(chroot_base) self.netns = netns self.daemonize = daemonize self.new_pid_ns = new_pid_ns self.extra_args = extra_args self.api_socket_name = DEFAULT_USOCKET_NAME self.cgroups = cgroups or [] self.resource_limits = resource_limits self.cgroup_ver = cgroup_ver self.parent_cgroup = parent_cgroup assert chroot_base is not None # Disabling 'too-many-branches' warning for this function as it needs to # check every argument, so the number of branches will increase # with every new argument. # pylint: disable=too-many-branches def construct_param_list(self): """Create the list of parameters we want the jailer to start with. We want to be able to vary any parameter even the required ones as we might want to add integration tests that validate the enforcement of mandatory arguments. """ jailer_param_list = [] # Pretty please, try to keep the same order as in the code base. if self.jailer_id is not None: jailer_param_list.extend(["--id", str(self.jailer_id)]) if self.exec_file is not None: jailer_param_list.extend(["--exec-file", str(self.exec_file)]) if self.uid is not None: jailer_param_list.extend(["--uid", str(self.uid)]) if self.gid is not None: jailer_param_list.extend(["--gid", str(self.gid)]) if self.chroot_base is not None: jailer_param_list.extend(["--chroot-base-dir", str(self.chroot_base)]) if self.netns is not None: jailer_param_list.extend(["--netns", str(self.netns.path)]) if self.daemonize: jailer_param_list.append("--daemonize") if self.new_pid_ns: jailer_param_list.append("--new-pid-ns") if self.parent_cgroup: jailer_param_list.extend(["--parent-cgroup", str(self.parent_cgroup)]) if self.cgroup_ver: jailer_param_list.extend(["--cgroup-version", str(self.cgroup_ver)]) if self.cgroups: for cgroup in self.cgroups: jailer_param_list.extend(["--cgroup", str(cgroup)]) if self.resource_limits is not None: for limit in self.resource_limits: jailer_param_list.extend(["--resource-limit", str(limit)]) # applying necessary extra args if needed if len(self.extra_args) > 0: jailer_param_list.append("--") for key, value in self.extra_args.items(): jailer_param_list.append("--{}".format(key)) if value is not None: jailer_param_list.append(value) if key == "api-sock": self.api_socket_name = value return jailer_param_list # pylint: enable=too-many-branches def chroot_base_with_id(self): """Return the MicroVM chroot base + MicroVM ID.""" return self.chroot_base / Path(self.exec_file).name / self.jailer_id def api_socket_path(self): """Return the MicroVM API socket path.""" return os.path.join(self.chroot_path(), self.api_socket_name) def chroot_path(self): """Return the MicroVM chroot path.""" return os.path.join(self.chroot_base_with_id(), "root") def jailed_path(self, file_path, create=False, subdir="."): """Create a hard link or block special device owned by uid:gid. Create a hard link or block special device from the specified file, changes the owner to uid:gid, and returns a path to the file which is valid within the jail. """ file_path = Path(file_path) chroot_path = Path(self.chroot_path()) global_p = chroot_path / subdir / file_path.name global_p.parent.mkdir(parents=True, exist_ok=True) jailed_p = Path("/") / subdir / file_path.name if create: stat_src = file_path.stat() if file_path.is_block_device(): perms = stat.S_IRUSR | stat.S_IWUSR os.mknod(global_p, mode=stat.S_IFBLK | perms, device=stat_src.st_rdev) else: stat_dst = chroot_path.stat() if stat_src.st_dev == stat_dst.st_dev: # if they are in the same device, hardlink global_p.unlink(missing_ok=True) global_p.hardlink_to(file_path) else: # otherwise, copy shutil.copyfile(file_path, global_p) os.chown(global_p, self.uid, self.gid) return str(jailed_p) def setup(self): """Set up this jailer context.""" os.makedirs(self.chroot_base, exist_ok=True) def cleanup(self): """Clean up this jailer context.""" # Remove the cgroup folders associated with this microvm. # The base /sys/fs/cgroup//firecracker folder will remain, # because we can't remove it unless we're sure there's no other running # microVM. if self.cgroups: controllers = set() # Extract the controller for every cgroup that needs to be set. for cgroup in self.cgroups: controllers.add(cgroup.split(".")[0]) for controller in controllers: # Obtain the tasks from each cgroup and wait on them before # removing the microvm's associated cgroup folder. try: for attempt in Retrying( retry=retry_if_exception_type(TimeoutError), stop=stop_after_delay(5), reraise=True, ): with attempt: self._kill_cgroup_tasks(controller) except TimeoutError: pass # Remove cgroups and sub cgroups. back_cmd = r"-depth -type d -exec rmdir {} \;" cmd = "find /sys/fs/cgroup/{}/{}/{} {}".format( controller, FC_BINARY_NAME, self.jailer_id, back_cmd ) # We do not need to know if it succeeded or not; afterall, # we are trying to clean up resources created by the jailer # itself not the testing system. utils.run_cmd(cmd) def _kill_cgroup_tasks(self, controller): """Simulate wait on pid. Read the tasks file and stay there until /proc/{pid} disappears. The retry function that calls this code makes sure we do not timeout. """ # pylint: disable=subprocess-run-check tasks_file = "/sys/fs/cgroup/{}/{}/{}/tasks".format( controller, FC_BINARY_NAME, self.jailer_id ) # If tests do not call start on machines, the cgroups will not be # created. if not os.path.exists(tasks_file): return True cmd = "cat {}".format(tasks_file) result = utils.check_output(cmd) tasks_split = result.stdout.splitlines() for task in tasks_split: if os.path.exists("/proc/{}".format(task)): raise TimeoutError return True @property def pid_file(self): """Return the PID file of the jailed process""" return Path(self.chroot_path()) / (self.exec_file.name + ".pid") ================================================ FILE: tests/framework/microvm.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Classes for working with microVMs. This module defines `Microvm`, which can be used to create, test drive, and destroy microvms. - Use the Firecracker Open API spec to populate Microvm API resource URLs. """ # pylint:disable=too-many-lines import json import logging import os import re import select import shutil import signal import time import uuid from collections import namedtuple from dataclasses import dataclass from enum import Enum, auto from functools import cached_property, lru_cache from pathlib import Path from typing import Optional import psutil from tenacity import Retrying, retry, stop_after_attempt, wait_fixed import host_tools.cargo_build as build_tools import host_tools.network as net_tools from framework import utils from framework.defs import DEFAULT_BINARY_DIR, MAX_API_CALL_DURATION_MS from framework.http_api import Api from framework.jailer import JailerContext from framework.microvm_helpers import MicrovmHelpers from framework.properties import global_props from framework.utils_cpu_templates import get_cpu_template_name from framework.utils_drive import VhostUserBlkBackend, VhostUserBlkBackendType from framework.utils_uffd import spawn_pf_handler, uffd_handler from host_tools.fcmetrics import FCMetricsMonitor from host_tools.memory import MemoryMonitor LOG = logging.getLogger("microvm") class SnapshotType(Enum): """Supported snapshot types.""" FULL = auto() DIFF = auto() DIFF_MINCORE = auto() def __repr__(self): cls_name = self.__class__.__name__ return f"{cls_name}.{self.name}" @property def needs_rebase(self) -> bool: """Does this snapshot type need rebasing on top of a base snapshot before restoration?""" return self in [SnapshotType.DIFF, SnapshotType.DIFF_MINCORE] @property def needs_dirty_page_tracking(self) -> bool: """Does taking this snapshot type require dirty page tracking to be enabled?""" return self == SnapshotType.DIFF @property def api_type(self) -> str: """Converts this `SnapshotType` to the string value expected by the Firecracker API""" match self: case SnapshotType.FULL: return "Full" case SnapshotType.DIFF | SnapshotType.DIFF_MINCORE: return "Diff" def hardlink_or_copy(src, dst): """If src and dst are in the same device, hardlink. Otherwise, copy.""" dst.touch(exist_ok=False) if dst.stat().st_dev == src.stat().st_dev: dst.unlink() dst.hardlink_to(src) else: shutil.copyfile(src, dst) @dataclass(frozen=True, repr=True) class Snapshot: """A Firecracker snapshot""" vmstate: Path mem: Path net_ifaces: list disks: dict ssh_key: Path snapshot_type: SnapshotType meta: dict def rebase_snapshot( self, base, use_snapshot_editor=False, binary_dir=DEFAULT_BINARY_DIR ): """Rebases current incremental snapshot onto a specified base layer.""" if not self.snapshot_type.needs_rebase: raise ValueError(f"Cannot rebase {self.snapshot_type}") if use_snapshot_editor: build_tools.run_snap_editor_rebase( base.mem, self.mem, binary_dir=binary_dir ) else: build_tools.run_rebase_snap_bin(base.mem, self.mem) new_args = self.__dict__ | {"mem": base.mem} return Snapshot(**new_args) def copy_to_chroot(self, chroot) -> "Snapshot": """ Move all the snapshot files into the microvm jail. Use different names so a snapshot doesn't overwrite our original snapshot. """ mem_src = chroot / self.mem.with_suffix(".src").name hardlink_or_copy(self.mem, mem_src) vmstate_src = chroot / self.vmstate.with_suffix(".src").name hardlink_or_copy(self.vmstate, vmstate_src) return Snapshot( vmstate=vmstate_src, mem=mem_src, net_ifaces=self.net_ifaces, disks=self.disks, ssh_key=self.ssh_key, snapshot_type=self.snapshot_type, meta=self.meta, ) @classmethod # TBD when Python 3.11: -> Self def load_from(cls, src: Path) -> "Snapshot": """Load a snapshot saved with `save_to`""" snap_json = src / "snapshot.json" obj = json.loads(snap_json.read_text()) return cls( vmstate=src / obj["vmstate"], mem=src / obj["mem"], net_ifaces=[net_tools.NetIfaceConfig(**d) for d in obj["net_ifaces"]], disks={dsk: src / p for dsk, p in obj["disks"].items()}, ssh_key=src / obj["ssh_key"], snapshot_type=SnapshotType(obj["snapshot_type"]), meta=obj["meta"], ) def save_to(self, dst: Path): """Serialize snapshot details to `dst` Deserialize the snapshot with `load_from` """ for path in [self.vmstate, self.mem, self.ssh_key]: new_path = dst / path.name hardlink_or_copy(path, new_path) new_disks = {} for disk_id, path in self.disks.items(): new_path = dst / path.name hardlink_or_copy(path, new_path) new_disks[disk_id] = new_path.name obj = { "vmstate": self.vmstate.name, "mem": self.mem.name, "net_ifaces": [x.__dict__ for x in self.net_ifaces], "disks": new_disks, "ssh_key": self.ssh_key.name, "snapshot_type": self.snapshot_type.value, "meta": self.meta, } snap_json = dst / "snapshot.json" snap_json.write_text(json.dumps(obj)) def delete(self): """Delete the backing files from disk.""" self.mem.unlink() self.vmstate.unlink() class HugePagesConfig(str, Enum): """Enum describing the huge pages configurations supported Firecracker""" NONE = "None" HUGETLBFS_2MB = "2M" # pylint: disable=R0904 class Microvm: """Class to represent a Firecracker microvm. A microvm is described by a unique identifier, a path to all the resources it needs in order to be able to start and the binaries used to spawn it. Besides keeping track of microvm resources and exposing microvm API methods, `spawn()` and `kill()` can be used to start/end the microvm process. """ def __init__( self, microvm_id: str, fc_binary_path: Path, jailer_binary_path: Path, netns: net_tools.NetNs, monitor_memory: bool = True, jailer_kwargs: Optional[dict] = None, numa_node=None, custom_cpu_template: Path = None, pci: bool = False, ): """Set up microVM attributes, paths, and data structures.""" # pylint: disable=too-many-statements # Unique identifier for this machine. assert microvm_id is not None self._microvm_id = microvm_id self.kernel_file = None self.rootfs_file = None self.ssh_key = None self.initrd_file = None self.boot_args = None self.uffd_handler = None self.fc_binary_path = Path(fc_binary_path) assert fc_binary_path.exists() self.jailer_binary_path = Path(jailer_binary_path) assert jailer_binary_path.exists() jailer_kwargs = jailer_kwargs or {} self.netns = netns # Create the jailer context associated with this microvm. self.jailer = JailerContext( jailer_id=self._microvm_id, exec_file=self.fc_binary_path, netns=netns, new_pid_ns=True, **jailer_kwargs, ) self.pci_enabled = pci if pci: self.jailer.extra_args["enable-pci"] = None # Copy the /etc/localtime file in the jailer root self.jailer.jailed_path("/etc/localtime", subdir="etc") self._screen_pid = None self.time_api_requests = global_props.host_linux_version != "6.1" # disable the HTTP API timings as they cause a lot of false positives if int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", 1)) > 1: self.time_api_requests = False self.monitors = [] self.memory_monitor = None if monitor_memory: self.memory_monitor = MemoryMonitor(self) self.monitors.append(self.memory_monitor) self.api = None self.log_file = None self.serial_out_path = None self.metrics_file = None self._spawned = False self._killed = False # device dictionaries self.iface = {} self.disks = {} self.disks_vhost_user = {} self.vcpus_count = None self.mem_size_bytes = None self.cpu_template_name = "None" # The given custom CPU template will be set in basic_config() but could # be overwritten via set_cpu_template(). self.custom_cpu_template = custom_cpu_template self._connections = [] self._pre_cmd = [] if numa_node: node_str = str(numa_node) self.add_pre_cmd([["numactl", "-N", node_str, "-m", node_str]]) # MMDS content from file self.metadata_file = None self.help = MicrovmHelpers(self) self.gdb_socket = None def __repr__(self): return f"" def mark_killed(self): """ Marks this `Microvm` as killed, meaning test tear down should not try to kill it raises an exception if the Firecracker process managing this VM is not actually dead """ if self.firecracker_pid is not None: utils.wait_process_termination(self.firecracker_pid) self._killed = True def kill(self, might_be_dead=False): """All clean up associated with this microVM should go here.""" # pylint: disable=subprocess-run-check # if it was already killed, return if self._killed: return # Stop any registered monitors for monitor in self.monitors: monitor.stop() # Kill all background SSH connections for connection in self._connections: connection.close(strict=not might_be_dead) # We start with vhost-user backends, # because if we stop Firecracker first, the backend will want # to exit as well and this will cause a race condition. for backend in self.disks_vhost_user.values(): backend.kill() self.disks_vhost_user.clear() assert ( "Shutting down VM after intercepting signal" not in self.log_data or might_be_dead ), self.log_data # pylint: disable=bare-except try: if self.firecracker_pid: os.kill(self.firecracker_pid, signal.SIGKILL) if self.screen_pid: os.kill(self.screen_pid, signal.SIGKILL) except: if not might_be_dead: msg = ( "Failed to kill Firecracker Process. Did it already die (or did the UFFD handler process die and take it down)?" if self.uffd_handler else "Failed to kill Firecracker Process. Did it already die?" ) self._dump_debug_information(msg) raise # if microvm was spawned then check if it gets killed if self._spawned: # Wait until the Firecracker process is actually dead utils.wait_process_termination(self.firecracker_pid) # The following logic guards us against the case where `firecracker_pid` for some # reason is the wrong PID, e.g. this is a regression test for # https://github.com/firecracker-microvm/firecracker/pull/4442/commits/d63eb7a65ffaaae0409d15ed55d99ecbd29bc572 # filter ps results for the jailer's unique id _, stdout, stderr = utils.run_cmd( f"ps ax -o pid,cmd -ww | grep {self.jailer.jailer_id}" ) assert not stderr, f"error querying processes using `ps`: {stderr}" offenders = [] for proc in stdout.splitlines(): _, cmd = proc.lower().split(maxsplit=1) if "firecracker" in proc and not cmd.startswith("screen"): offenders.append(proc) # make sure firecracker was killed assert not offenders, ( f"Firecracker reported its pid {self.firecracker_pid}, which was killed, but there still exist processes using the supposedly dead Firecracker's jailer_id: \n" + "\n".join(offenders) ) if self.uffd_handler and self.uffd_handler.is_running(): self.uffd_handler.kill() # Mark the microVM as not spawned, so we avoid trying to kill twice. self._spawned = False self._killed = True if self.time_api_requests: self._validate_api_response_times() if self.memory_monitor: self.memory_monitor.check_samples() def _validate_api_response_times(self): """ Parses the firecracker logs for information regarding api server request processing times, and asserts they are within acceptable bounds. """ # Log messages are either # 2023-06-16T07:45:41.767987318 [fc44b23e-ce47-4635-9549-5779a6bd9cee:fc_api] The API server received a Get request on "/mmds". # or # 2023-06-16T07:47:31.204704732 [2f2427c7-e4de-4226-90e6-e3556402be84:fc_api] The API server received a Put request on "/actions" with body "{\"action_type\": \"InstanceStart\"}". api_request_regex = re.compile( r"\] The API server received a (?P\w+) request on \"(?P(/(\w|-)*)+)\"( with body (?P.*))?\." ) api_request_times_regex = re.compile( r"\] Total previous API call duration: (?P\d+) us.$" ) # Note: Processing of api requests is synchronous, so these messages cannot be torn by concurrency effects log_lines = self.log_data.split("\n") ApiCall = namedtuple("ApiCall", "method url body") current_call = None for log_line in log_lines: match = api_request_regex.search(log_line) if match: if current_call is not None: raise Exception( f"API call duration log entry for {current_call.method} {current_call.url} with body {current_call.body} is missing!" ) current_call = ApiCall( match.group("method"), match.group("url"), match.group("body") ) match = api_request_times_regex.search(log_line) if match: if current_call is None: raise Exception( "Got API call duration log entry before request entry" ) if current_call.url not in ["/snapshot/create", "/snapshot/load"]: exec_time = float(match.group("execution_time")) / 1000.0 assert ( exec_time <= MAX_API_CALL_DURATION_MS ), f"{current_call.method} {current_call.url} API call exceeded maximum duration: {exec_time} ms. Body: {current_call.body}" current_call = None @property def firecracker_version(self): """Return the version of the Firecracker executable.""" _, stdout, _ = utils.check_output(f"{self.fc_binary_path} --version") return re.match(r"^Firecracker v(.+)", stdout.partition("\n")[0]).group(1) @property def path(self): """Return the path on disk used that represents this microVM.""" return self.jailer.chroot_base_with_id() # some functions use this fsfiles = path @property def id(self): """Return the unique identifier of this microVM.""" return self._microvm_id @property def log_data(self): """Return the log data.""" if self.log_file is None: return "" return self.log_file.read_text() @property def state(self): """Get the InstanceInfo property and return the state field.""" return self.api.describe.get().json()["state"] @cached_property def firecracker_pid(self): """Return Firecracker's PID Reads the pid from a file created by jailer. """ if not self._spawned: return None # Read the PID from Firecracker's pidfile. Retry if # file doesn't exist yet, or doesn't yet contain an integer for attempt in Retrying( stop=stop_after_attempt(5), wait=wait_fixed(0.1), reraise=True, ): with attempt: return int(self.jailer.pid_file.read_text(encoding="ascii")) @cached_property def ps(self): """Returns a handle to the psutil.Process for this VM""" return psutil.Process(self.firecracker_pid) @property def dimensions(self): """Gets a default set of cloudwatch dimensions describing the configuration of this microvm""" return { "instance": global_props.instance, "cpu_model": global_props.cpu_model, "host_kernel": f"linux-{global_props.host_linux_version}", "guest_kernel": self.kernel_file.stem[2:], "rootfs": self.rootfs_file.name, "vcpus": str(self.vcpus_count), "guest_memory": f"{self.mem_size_bytes / (1024 * 1024)}MB", "pci": f"{self.pci_enabled}", } @property def guest_kernel_version(self): """Get the guest kernel version from the filename It won't work if the file name does not like name-X.Y.Z """ splits = self.kernel_file.name.split("-") if len(splits) < 2: return None return tuple(int(x) for x in splits[1].split(".")) def get_metrics(self): """Return iterator to metric data points written by FC""" with self.metrics_file.open() as fd: for line in fd: if not line.endswith("}\n"): LOG.warning("Line is not a proper JSON object. Partial write?") continue yield json.loads(line) def get_all_metrics(self): """Return all metric data points written by FC.""" return list(self.get_metrics()) def flush_metrics(self): """Flush the microvm metrics and get the latest datapoint""" self.api.actions.put(action_type="FlushMetrics") # get the latest metrics return self.get_all_metrics()[-1] def create_jailed_resource(self, path): """Create a hard link to some resource inside this microvm.""" return self.jailer.jailed_path(path, create=True) def get_jailed_resource(self, path): """Get the relative jailed path to a resource.""" return self.jailer.jailed_path(path, create=False) def chroot(self): """Get the chroot of this microVM.""" return self.jailer.chroot_path() @property def screen_session(self): """The screen session name The id of this microVM, which should be unique. """ return self.id @property def screen_log(self): """Get the screen log file.""" return f"/tmp/screen-{self.screen_session}.log" @property def screen_pid(self) -> Optional[int]: """Get the screen PID.""" if self._screen_pid: return int(self._screen_pid) return None def pin_vmm(self, cpu_id: int) -> bool: """Pin the firecracker process VMM thread to a cpu list.""" if self.firecracker_pid: for thread_name, thread_pids in utils.get_threads( self.firecracker_pid ).items(): # the firecracker thread should start with firecracker... if thread_name.startswith("firecracker"): for pid in thread_pids: utils.set_cpu_affinity(pid, [cpu_id]) return True return False def pin_vcpu(self, vcpu_id: int, cpu_id: int): """Pin the firecracker vcpu thread to a cpu list.""" if self.firecracker_pid: for thread in utils.get_threads(self.firecracker_pid)[f"fc_vcpu {vcpu_id}"]: utils.set_cpu_affinity(thread, [cpu_id]) return True return False def pin_api(self, cpu_id: int): """Pin the firecracker process API server thread to a cpu list.""" if self.firecracker_pid: for thread in utils.get_threads(self.firecracker_pid)["fc_api"]: utils.set_cpu_affinity(thread, [cpu_id]) return True return False def pin_threads(self, first_cpu): """ Pins all microvm threads (VMM, API and vCPUs) to consecutive physical cpu core, starting with "first_cpu" Return next "free" cpu core. """ for vcpu, pcpu in enumerate(range(first_cpu, first_cpu + self.vcpus_count)): assert self.pin_vcpu( vcpu, pcpu ), f"Failed to pin fc_vcpu {vcpu} thread to core {pcpu}." # The cores first_cpu,...,first_cpu + self.vcpus_count - 1 are assigned to the individual vCPU threads, # So the remaining two threads (VMM and API) get first_cpu + self.vcpus_count # and first_cpu + self.vcpus_count + 1 assert self.pin_vmm( first_cpu + self.vcpus_count ), "Failed to pin firecracker thread." assert self.pin_api( first_cpu + self.vcpus_count + 1 ), "Failed to pin fc_api thread." return first_cpu + self.vcpus_count + 2 def add_pre_cmd(self, pre_cmd): """Prepends commands to the command line to launch the microVM For example, this can be used to pin the VM to a NUMA node or to trace the VM with strace. """ self._pre_cmd = pre_cmd + self._pre_cmd def spawn( self, log_file="fc.log", serial_out_path="serial.log", log_level="Debug", log_show_level=False, log_show_origin=False, metrics_path="fc.ndjson", emit_metrics: bool = False, validate_api: bool = True, ): """Start a microVM as a daemon or in a screen session.""" # pylint: disable=subprocess-run-check # pylint: disable=too-many-branches self.jailer.setup() self.api = Api( self.jailer.api_socket_path(), validate=validate_api, on_error=lambda verb, uri, err_msg: self._dump_debug_information( f"Error during {verb} {uri}: {err_msg}" ), ) if log_file is not None: self.log_file = Path(self.path) / log_file self.log_file.touch() self.create_jailed_resource(self.log_file) # The default value for `level`, when configuring the logger via cmd # line, is `Info`. We set the level to `Debug` to also have the boot # time printed in the log. self.jailer.extra_args.update({"log-path": log_file, "level": log_level}) if log_show_level: self.jailer.extra_args["show-level"] = None if log_show_origin: self.jailer.extra_args["show-log-origin"] = None if serial_out_path is not None: self.serial_out_path = Path(self.path) / serial_out_path self.serial_out_path.touch() self.create_jailed_resource(self.serial_out_path) if metrics_path is not None: self.metrics_file = Path(self.path) / metrics_path self.metrics_file.touch() self.create_jailed_resource(self.metrics_file) self.jailer.extra_args.update({"metrics-path": self.metrics_file.name}) else: assert not emit_metrics if self.metadata_file: if os.path.exists(self.metadata_file): LOG.debug("metadata file exists, adding as a jailed resource") self.create_jailed_resource(self.metadata_file) self.jailer.extra_args.update( {"metadata": os.path.basename(self.metadata_file)} ) if log_level != "Debug": # Checking the timings requires DEBUG level log messages self.time_api_requests = False cmd = [ *self._pre_cmd, str(self.jailer_binary_path), *self.jailer.construct_param_list(), ] # When the daemonize flag is on, we want to clone-exec into the # jailer rather than executing it via spawning a shell. if self.jailer.daemonize: utils.check_output(cmd, shell=False) else: # Run Firecracker under screen. This is used when we want to access # the serial console. The file will collect the output from # 'screen'ed Firecracker. screen_pid = utils.start_screen_process( self.screen_log, self.screen_session, cmd[0], cmd[1:], ) self._screen_pid = screen_pid # If `--new-pid-ns` is used, the Firecracker process will detach from # the screen and the screen process will exit. We do not want to # attempt to kill it in that case to avoid a race condition. if self.jailer.new_pid_ns: self._screen_pid = None self._spawned = True if emit_metrics: self.monitors.append(FCMetricsMonitor(self)) # Ensure Firecracker is in as good a state as possible wrts guest # responsiveness / API availability. # If we are using a config file and it has a network device specified, # use SSH to wait until guest userspace is available. If we are # using the API, wait until the log message indicating the API server # has finished initializing is printed (if logging is enabled), or # until the API socket file has been created. # If none of these apply, do a last ditch effort to make sure the # Firecracker process itself at least came up by checking # for the startup log message. Otherwise, you're on your own kid. if "config-file" in self.jailer.extra_args and self.iface: assert not serial_out_path self.wait_for_ssh_up() elif "no-api" not in self.jailer.extra_args: if self.log_file and log_level in ("Trace", "Debug", "Info"): self.check_log_message("API server started.") else: self._wait_for_api_socket() if serial_out_path is not None: self.api.serial.put(serial_out_path=serial_out_path) elif self.log_file and log_level in ("Trace", "Debug", "Info"): assert not serial_out_path self.check_log_message("Running Firecracker") @retry(wait=wait_fixed(0.2), stop=stop_after_attempt(5), reraise=True) def _wait_for_api_socket(self): """Wait until the API socket and chroot folder are available.""" # We expect the jailer to start within 80 ms. However, we wait for # 1 sec since we are rechecking the existence of the socket 5 times # and leave 0.2 delay between them. os.stat(self.jailer.api_socket_path()) @retry(wait=wait_fixed(0.2), stop=stop_after_attempt(5), reraise=True) def check_log_message(self, message): """Wait until `message` appears in logging output.""" assert ( message in self.log_data ), f'Message ("{message}") not found in log data ("{self.log_data}").' @retry(wait=wait_fixed(0.2), stop=stop_after_attempt(5), reraise=True) def get_exit_code(self): """Get exit code from logging output""" exit_msg_pattern = ( r"Firecracker exiting (with error|successfully). exit_code=(\d+)" ) match = re.search(exit_msg_pattern, self.log_data) if match: exit_code = int(match.group(2)) return exit_code raise AssertionError(f"unable to find exit code from the log: {self.log_data}") @retry(wait=wait_fixed(0.2), stop=stop_after_attempt(5), reraise=True) def check_any_log_message(self, messages): """Wait until any message in `messages` appears in logging output.""" for message in messages: if message in self.log_data: return raise AssertionError( f"`{messages}` were not found in this log: {self.log_data}" ) def serial_input(self, input_string): """Send a string to the Firecracker serial console via screen.""" input_cmd = f'screen -S {self.screen_session} -p 0 -X stuff "{input_string}"' return utils.check_output(input_cmd) def basic_config( self, vcpu_count: int = 2, smt: bool = None, mem_size_mib: int = 256, add_root_device: bool = True, boot_args: str = None, use_initrd: bool = False, track_dirty_pages: bool = False, huge_pages: HugePagesConfig = None, rootfs_io_engine=None, cpu_template: Optional[str] = None, enable_entropy_device=False, ): """Shortcut for quickly configuring a microVM. It handles: - CPU and memory. - Kernel image (will load the one in the microVM allocated path). - Root File System (will use the one in the microVM allocated path). - Does not start the microvm. The function checks the response status code and asserts that the response is within the interval [200, 300). If boot_args is None, the default boot_args used in tests is reboot=k panic=1 nomodule swiotlb=noforce console=ttyS0 [pci=off] which differs from Firecracker's default only in the enabling of the serial console. Reference: file:../../src/vmm/src/vmm_config/boot_source.rs::DEFAULT_KERNEL_CMDLINE """ self.api.machine_config.put( vcpu_count=vcpu_count, smt=smt, mem_size_mib=mem_size_mib, track_dirty_pages=track_dirty_pages, huge_pages=huge_pages, ) self.vcpus_count = vcpu_count self.mem_size_bytes = mem_size_mib * 2**20 if self.custom_cpu_template is not None: self.set_cpu_template(self.custom_cpu_template) if cpu_template is not None: self.set_cpu_template(cpu_template) if self.memory_monitor: self.memory_monitor.start() if boot_args is not None: self.boot_args = boot_args else: self.boot_args = "reboot=k panic=1 nomodule swiotlb=noforce console=ttyS0" if not self.pci_enabled: self.boot_args += " pci=off" boot_source_args = { "kernel_image_path": self.create_jailed_resource(self.kernel_file), "boot_args": self.boot_args, } if use_initrd and self.initrd_file is not None: boot_source_args.update( initrd_path=self.create_jailed_resource(self.initrd_file) ) self.api.boot.put(**boot_source_args) if add_root_device and self.rootfs_file is not None: read_only = self.rootfs_file.suffix == ".squashfs" # Add the root file system self.add_drive( drive_id="rootfs", path_on_host=self.rootfs_file, is_root_device=True, is_read_only=read_only, io_engine=rootfs_io_engine, ) if enable_entropy_device: self.enable_entropy_device() def set_cpu_template(self, cpu_template): """Set guest CPU template.""" self.cpu_template_name = get_cpu_template_name(cpu_template) if cpu_template is None: return # static CPU template if isinstance(cpu_template, str): self.api.machine_config.patch(cpu_template=cpu_template) # custom CPU template elif isinstance(cpu_template, dict): self.api.cpu_config.put(**cpu_template["template"]) def add_drive( self, drive_id, path_on_host, is_root_device=False, is_read_only=False, partuuid=None, cache_type=None, io_engine=None, ): """Add a block device.""" path_on_jail = self.create_jailed_resource(path_on_host) self.api.drive.put( drive_id=drive_id, path_on_host=path_on_jail, is_root_device=is_root_device, is_read_only=is_read_only, partuuid=partuuid, cache_type=cache_type, io_engine=io_engine, ) self.disks[drive_id] = path_on_host def add_vhost_user_drive( self, drive_id, path_on_host, partuuid=None, is_root_device=False, is_read_only=False, cache_type=None, backend_type=VhostUserBlkBackendType.CROSVM, ): """Add a vhost-user block device.""" # It is possible that the user adds another drive # with the same ID. In that case, we should clean # the previous backend up first. prev = self.disks_vhost_user.pop(drive_id, None) if prev: prev.kill() backend = VhostUserBlkBackend.with_backend( backend_type, path_on_host, self.chroot(), drive_id, is_read_only ) socket = backend.spawn(self.jailer.uid, self.jailer.gid) self.api.drive.put( drive_id=drive_id, socket=socket, partuuid=partuuid, is_root_device=is_root_device, cache_type=cache_type, ) self.disks_vhost_user[drive_id] = backend def patch_drive(self, drive_id, file=None): """Modify/patch an existing block device.""" if file: self.api.drive.patch( drive_id=drive_id, path_on_host=self.create_jailed_resource(file.path), ) self.disks[drive_id] = Path(file.path) else: self.api.drive.patch(drive_id=drive_id) def add_net_iface(self, iface=None, api=True, **kwargs): """Add a network interface""" if iface is None: iface = net_tools.NetIfaceConfig.with_id(len(self.iface)) tap = self.netns.add_tap( iface.tap_name, ip=f"{iface.host_ip}/{iface.netmask_len}" ) self.iface[iface.dev_name] = { "iface": iface, "tap": tap, } # If api, call it... there may be cases when we don't want it, for # example during restore if api: self.api.network.put( iface_id=iface.dev_name, host_dev_name=iface.tap_name, guest_mac=iface.guest_mac, **kwargs, ) return iface def add_pmem( self, pmem_id, path_on_host, root_device=False, read_only=False, ): """Add a pmem device.""" path_on_jail = self.create_jailed_resource(path_on_host) self.api.pmem.put( id=pmem_id, path_on_host=path_on_jail, root_device=root_device, read_only=read_only, ) self.disks[pmem_id] = path_on_host def start(self): """Start the microvm. This function validates that the microvm boot succeeds. """ # Check that the VM has not started yet assert self.state == "Not started" self.api.actions.put(action_type="InstanceStart") # Check that the VM has started assert self.state == "Running" if self.iface: self.wait_for_ssh_up() def pause(self): """Pauses the microVM""" self.api.vm.patch(state="Paused") def resume(self): """Resume the microVM""" self.api.vm.patch(state="Resumed") def make_snapshot( self, snapshot_type: SnapshotType, *, mem_path: str = "mem", vmstate_path="vmstate", ): """Create a Snapshot object from a microvm. The snapshot's memory and vstate files will be saved at the specified paths relative to the Microvm's chroot. It pauses the microvm before taking the snapshot. """ self.pause() # Notify monitor that snapshot is being created if self.memory_monitor: self.memory_monitor.set_threshold_for_snapshot() self.api.snapshot_create.put( mem_file_path=str(mem_path), snapshot_path=str(vmstate_path), snapshot_type=snapshot_type.api_type, ) root = Path(self.chroot()) return Snapshot( vmstate=root / vmstate_path, mem=root / mem_path, disks=self.disks, net_ifaces=[x["iface"] for ifname, x in self.iface.items()], ssh_key=self.ssh_key, snapshot_type=snapshot_type, meta={ "kernel_file": str(self.kernel_file), "vcpus_count": self.vcpus_count, }, ) def snapshot_diff(self, *, mem_path: str = "mem", vmstate_path="vmstate"): """Make a Diff snapshot""" return self.make_snapshot( SnapshotType.DIFF, mem_path=mem_path, vmstate_path=vmstate_path ) def snapshot_full(self, *, mem_path: str = "mem", vmstate_path="vmstate"): """Make a Full snapshot""" return self.make_snapshot( SnapshotType.FULL, mem_path=mem_path, vmstate_path=vmstate_path ) def restore_from_snapshot( self, snapshot: Snapshot, resume: bool = False, rename_interfaces: dict = None, vsock_override: str = None, *, uffd_handler_name: str = None, ): """Restore a snapshot""" jailed_snapshot = snapshot.copy_to_chroot(Path(self.chroot())) if uffd_handler_name: self.uffd_handler = spawn_pf_handler( self, uffd_handler(uffd_handler_name, binary_dir=self.fc_binary_path.parent), jailed_snapshot, ) jailed_mem = Path("/") / jailed_snapshot.mem.name jailed_vmstate = Path("/") / jailed_snapshot.vmstate.name snapshot_disks = [v for k, v in jailed_snapshot.disks.items()] assert len(snapshot_disks) > 0, "Snapshot requires at least one disk." jailed_disks = [] for disk in snapshot_disks: jailed_disks.append(self.create_jailed_resource(disk)) self.disks = jailed_snapshot.disks self.ssh_key = jailed_snapshot.ssh_key # Create network interfaces. for iface in jailed_snapshot.net_ifaces: self.add_net_iface(iface, api=False) mem_backend = {"backend_type": "File", "backend_path": str(jailed_mem)} if self.uffd_handler is not None: mem_backend = { "backend_type": "Uffd", "backend_path": str(self.uffd_handler.socket_path), } for key, value in jailed_snapshot.meta.items(): setattr(self, key, value) # Adjust things just in case self.kernel_file = Path(self.kernel_file) iface_overrides = [] if rename_interfaces is not None: iface_overrides = [ {"iface_id": k, "host_dev_name": v} for k, v in rename_interfaces.items() ] optional_kwargs = {} if iface_overrides: # For backwards compatibility ab testing we want to avoid adding # new parameters until we have a release baseline with the new # parameter. Once the release baseline has moved, this assignment # can be inline in the snapshot_load command below optional_kwargs["network_overrides"] = iface_overrides if vsock_override is not None: optional_kwargs["vsock_override"] = {"uds_path": vsock_override} self.api.snapshot_load.put( mem_backend=mem_backend, snapshot_path=str(jailed_vmstate), enable_diff_snapshots=jailed_snapshot.snapshot_type.needs_dirty_page_tracking, resume_vm=resume, **optional_kwargs, ) if self.memory_monitor: response = self.api.machine_config.get() self.mem_size_bytes = int(response.json()["mem_size_mib"]) * 2**20 # Notify monitor that this is a restored VM self.memory_monitor.set_threshold_for_restored_vm() self.memory_monitor.start() # This is not a "wait for boot", but rather a "VM still works after restoration" if jailed_snapshot.net_ifaces and resume: self.wait_for_ssh_up() return jailed_snapshot def enable_entropy_device(self): """Enable entropy device for microVM""" self.api.entropy.put() def restore_from_path(self, snap_dir: Path, **kwargs): """Restore snapshot from a path""" return self.restore_from_snapshot(Snapshot.load_from(snap_dir), **kwargs) @lru_cache def ssh_iface(self, iface_idx=0): """Return a cached SSH connection on a given interface id.""" guest_ip = list(self.iface.values())[iface_idx]["iface"].guest_ip self.ssh_key = Path(self.ssh_key) connection = net_tools.SSHConnection( netns=self.netns.id, ssh_key=self.ssh_key, user="root", host=guest_ip, control_path=Path(self.chroot()) / f"ssh-{iface_idx}.sock", on_error=lambda exc: self._dump_debug_information( f"Failure executing command via SSH in microVM: {exc}" ), ) self._connections.append(connection) return connection @property def ssh(self): """Return a cached SSH connection on the 1st interface""" return self.ssh_iface(0) @property def thread_backtraces(self): """Return backtraces of all threads""" backtraces = [] for thread_name, thread_pids in utils.get_threads(self.firecracker_pid).items(): for pid in thread_pids: try: stack = Path(f"/proc/{pid}/stack").read_text("UTF-8") except FileNotFoundError: continue # process might've gone away between get_threads() call and here backtraces.append(f"{thread_name} ({pid=}):\n{stack}") return "\n".join(backtraces) def _dump_debug_information(self, what: str): """ Dumps debug information about this microvm Used for example when running a command inside the guest via `SSHConnection.check_output` fails. """ LOG.error(what) LOG.error("Firecracker logs:\n%s", self.log_data) if self.uffd_handler: LOG.error("Uffd logs:\n%s", self.uffd_handler.log_data) if not self._killed: LOG.error("Thread backtraces:\n%s", self.thread_backtraces) def wait_for_ssh_up(self): """Wait for guest running inside the microVM to come up and respond.""" # Ensure that we have an initialized SSH connection to the guest that can # run commands. The actual connection retry loop happens in SSHConnection._init_connection _ = self.ssh_iface(0) def enable_gdb(self): """Enables GDB debugging""" self.gdb_socket = "gdb.socket" self.api.machine_config.patch(gdb_socket_path=self.gdb_socket) def hotplug_memory( self, requested_size_mib: int, timeout: int = 60, poll: float = 0.1 ): """Send a hot(un)plug request and wait up to timeout seconds for completion polling every poll seconds Returns: api latency (secs), total latency (secs) """ api_start = time.time() self.api.memory_hotplug.patch(requested_size_mib=requested_size_mib) api_end = time.time() # Wait for the hotplug to complete deadline = time.time() + timeout while time.time() < deadline: if ( self.api.memory_hotplug.get().json()["plugged_size_mib"] == requested_size_mib ): plug_end = time.time() return api_end - api_start, plug_end - api_start time.sleep(poll) raise TimeoutError(f"Hotplug did not complete within {timeout} seconds") class MicroVMFactory: """MicroVM factory""" def __init__(self, binary_path: Path, **kwargs): self.vms = [] self.binary_path = binary_path self.netns_factory = kwargs.pop("netns_factory", net_tools.NetNs) self.kwargs = kwargs assert self.fc_binary_path.exists(), "missing firecracker binary" assert self.jailer_binary_path.exists(), "missing jailer binary" @property def fc_binary_path(self): """The path to the firecracker binary from which this factory will build VMs""" return self.binary_path / "firecracker" @property def jailer_binary_path(self): """The path to the jailer binary using which this factory will build VMs""" return self.binary_path / "jailer" def build(self, kernel=None, rootfs=None, **kwargs): """Build a microvm""" kwargs = self.kwargs | kwargs microvm_id = kwargs.pop("microvm_id", str(uuid.uuid4())) vm = Microvm( microvm_id=microvm_id, fc_binary_path=kwargs.pop("fc_binary_path", self.fc_binary_path), jailer_binary_path=kwargs.pop( "jailer_binary_path", self.jailer_binary_path ), netns=kwargs.pop("netns", self.netns_factory(microvm_id)), **kwargs, ) vm.netns.setup() self.vms.append(vm) if kernel is not None: vm.kernel_file = kernel if rootfs is not None: ssh_key = rootfs.with_suffix(".id_rsa") # copy only iff not a read-only rootfs rootfs_path = rootfs if rootfs_path.suffix != ".squashfs": rootfs_path = Path(vm.path) / rootfs.name shutil.copyfile(rootfs, rootfs_path) vm.rootfs_file = rootfs_path vm.ssh_key = ssh_key return vm def build_from_snapshot(self, snapshot: Snapshot, uffd_handler_name=None): """Build a microvm from a snapshot""" vm = self.build() vm.spawn() vm.restore_from_snapshot( snapshot, resume=True, uffd_handler_name=uffd_handler_name ) return vm def build_n_from_snapshot( self, current_snapshot, nr_vms, *, uffd_handler_name=None, incremental=False, use_snapshot_editor=True, no_netns_reuse=False, ): """A generator of `n` microvms restored, either all restored from the same given snapshot (incremental=False), or created by taking successive snapshots of restored VMs """ last_snapshot = None for _ in range(nr_vms): microvm = self.build( **( {"netns": net_tools.NetNs(str(uuid.uuid4()))} if no_netns_reuse else {} ) ) microvm.spawn() snapshot_copy = microvm.restore_from_snapshot( current_snapshot, resume=True, uffd_handler_name=uffd_handler_name ) yield microvm if incremental: # When doing diff snapshots, we continuously overwrite the same base snapshot file from the first # iteration in-place with successive snapshots, so don't delete it! if ( last_snapshot is not None and not last_snapshot.snapshot_type.needs_rebase ): last_snapshot.delete() next_snapshot = microvm.make_snapshot(current_snapshot.snapshot_type) if current_snapshot.snapshot_type.needs_rebase: next_snapshot = next_snapshot.rebase_snapshot( current_snapshot, use_snapshot_editor, binary_dir=microvm.fc_binary_path.parent, ) last_snapshot = current_snapshot current_snapshot = next_snapshot microvm.kill() snapshot_copy.delete() if last_snapshot is not None and not last_snapshot.snapshot_type.needs_rebase: last_snapshot.delete() current_snapshot.delete() def kill(self): """Clean up all built VMs""" for vm in self.vms: vm.kill() vm.jailer.cleanup() chroot_base_with_id = vm.jailer.chroot_base_with_id() if len(vm.jailer.jailer_id) > 0 and chroot_base_with_id.exists(): shutil.rmtree(chroot_base_with_id) vm.netns.cleanup() self.vms.clear() class Serial: """Class for serial console communication with a Microvm.""" RX_TIMEOUT_S = 60 def __init__(self, vm): """Initialize a new Serial object.""" self._poller = None self._vm = vm def open(self): """Open a serial connection.""" # Open the screen log file. if self._poller is not None: # serial already opened return attempt = 0 while not Path(self._vm.screen_log).exists() and attempt < 5: time.sleep(0.2) attempt += 1 serial_log_fd = os.open(self._vm.screen_log, os.O_RDONLY) self._poller = select.poll() self._poller.register(serial_log_fd, select.POLLIN | select.POLLHUP) def tx(self, input_string, end="\n"): # pylint: disable=invalid-name # No need to have a snake_case naming style for a single word. r"""Send a string terminated by an end token (defaulting to "\n").""" self._vm.serial_input(input_string + end) def rx_char(self): """Read a single character.""" result = self._poller.poll(0.1) for fd, flag in result: if flag & select.POLLHUP: assert False, "Oh! The console vanished before test completed." if flag & select.POLLIN: output_char = str(os.read(fd, 1), encoding="utf-8", errors="ignore") return output_char return "" def rx(self, token="\n"): # pylint: disable=invalid-name # No need to have a snake_case naming style for a single word. r"""Read a string delimited by an end token (defaults to "\n").""" rx_str = "" start = time.time() while True: rx_str += self.rx_char() if rx_str.endswith(token): break if (time.time() - start) >= self.RX_TIMEOUT_S: self._vm.kill() assert False return rx_str ================================================ FILE: tests/framework/microvm_helpers.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Microvm helper functions for interactive use""" import ipaddress import os import platform import subprocess import tempfile from pathlib import Path def docker_apt_install(packages: str | list[str]): """Install a package in the Docker devctr""" apt_lists = Path("/var/lib/apt/lists/") if len(list(apt_lists.iterdir())) == 0: subprocess.run(["apt", "update"], check=True) if isinstance(packages, str): packages = [packages] subprocess.run(["apt", "install", "-y", *packages], check=True) class DockerInfo: """Class to extract information from the Docker environment""" @property def ip(self): """Return this container's IP address""" return ( subprocess.check_output( "ip -j address show eth0 |jq -r '.[].addr_info[].local'", shell=True, ) .decode("ascii") .strip() ) @property def id(self): """Return this container's id""" return platform.node() @property def in_docker(self): """Are we running inside a Docker container?""" return Path("/.dockerenv").exists() DOCKER = DockerInfo() class MicrovmHelpers: """Microvm helper functions for interactive use""" # keep track of assigned subnets shared_subnet_ctr = 0 # Try not to collide with anything by using the last /16 of the 10.x.x.x # private block _supernet = ipaddress.IPv4Network("10.255.0.0/16") _subnets_gen = _supernet.subnets(new_prefix=30) # Addresses that can be used outside the netns. Could be public IPv4 blocks _ingress_net = ipaddress.IPv4Network("172.16.0.0/12") _ingress_gen = _ingress_net.hosts() def __init__(self, vm): self.vm = vm def print_log(self): """Print Firecracker's log""" print(self.vm.log_data) def resize_disk(self, disk, size: int = 2**30): """Resize a filesystem The filesystem should be unmounted for this to work """ os.truncate(disk, size) subprocess.check_output(["resize2fs", "-f", str(disk)]) def gdbserver(self, port=2000): """Attach gdbserver to the FC process See https://sourceware.org/gdb/current/onlinedocs/gdb.html/Remote-Debugging.html#Remote-Debugging """ comm = f"localhost:{port}" subprocess.Popen(["gdbserver", "--attach", comm, str(self.vm.firecracker_pid)]) print(f"Connect gdb with:\n\tgdb --ex 'target remote {DOCKER.ip}:{port}'") def lldbserver(self, port=2001): """Attach lldb-server to the FC process See https://lldb.llvm.org/use/remote.html TBD does not work. Fails with error: attach failed: lost connection """ # Unlike gdbserver, lldb-server is not a separate package, but is part # of lldb and it's about ~400MB to install, so we don't include it in # the devctr docker_apt_install("lldb") subprocess.Popen(["lldb-server", "p", "--listen", f"*:{port}", "--server"]) print( f"Connect lldb with\n\tlldb -o 'platform select remote-linux' -o 'platform connect connect://{DOCKER.ip}:{port}' -o 'attach {self.vm.firecracker_pid}'" ) def tmux_neww(self, cmd: str): """Open a window in the local tmux""" return subprocess.run(["tmux", "neww", cmd], check=True) def how_to_ssh(self): """Print how to SSH to the microvm This may be useful for example to get a terminal """ ip = self.vm.iface["eth0"]["iface"].guest_ip return f"{self.vm.netns.cmd_prefix()} ssh -o StrictHostKeyChecking=no -i {self.vm.ssh_key} root@{ip}" def tmux_ssh(self, cmd=""): """Open a tmux window with an SSH session to the VM""" if len(cmd) > 0: cmd = f" {cmd}" return self.tmux_neww(self.how_to_ssh() + cmd) def enable_console(self): """Helper method to attach a console, before the machine boots""" if self.vm.api is not None: raise RuntimeError(".spawn already called, too late to enable the console") if self.vm.boot_args is None: self.vm.boot_args = "" self.vm.boot_args += "console=ttyS0 reboot=k panic=1 swiotlb=noforce" self.vm.jailer.daemonize = False self.vm.jailer.new_pid_ns = False def how_to_console(self): """Print how to connect to the VM console""" return f"screen -dR {self.vm.screen_session}" def tmux_console(self): """Open a tmux window with the console""" return self.tmux_neww(self.how_to_console()) def how_to_docker(self): """How to get into this container from outside""" return f"docker exec -it {DOCKER.id}" def enable_ip_forwarding(self, iface="eth0", ingress_ipv4=None): """Enables IP forwarding in the guest""" i = MicrovmHelpers.shared_subnet_ctr MicrovmHelpers.shared_subnet_ctr += 1 netns = self.vm.netns.id veth_host = f"vethhost{i}" veth_guest = f"vethguest{i}" veth_net = next(self._subnets_gen) veth_host_ip, veth_guest_ip = list(veth_net.hosts()) iface = self.vm.iface[iface]["iface"] tap_host_ip = iface.host_ip tap_net = iface.network.with_netmask # i.e. 192.168.7.0/255.255.255.0 # get the device associated with the default route upstream_dev = ( subprocess.check_output( "ip -j route list default |jq -r '.[0].dev'", shell=True, ) .decode("ascii") .strip() ) def run(cmd): return subprocess.run(cmd, shell=True, check=True) def run_in_netns(cmd): return run(f"ip netns exec {netns} " + cmd) # outside netns # iptables -L -v -n --line-numbers run( f"ip link add name {veth_host} type veth peer name {veth_guest} netns {netns}" ) run(f"ip addr add {veth_host_ip}/{veth_net.prefixlen} dev {veth_host}") run_in_netns( f"ip addr add {veth_guest_ip}/{veth_net.prefixlen} dev {veth_guest}" ) run(f"ip link set {veth_host} up") run_in_netns(f"ip link set {veth_guest} up") run("iptables -P FORWARD ACCEPT") # iptables -L FORWARD # iptables -t nat -L run( f"iptables -t nat -A POSTROUTING -s {veth_net} -o {upstream_dev} -j MASQUERADE" ) run_in_netns(f"ip route add default via {veth_host_ip}") run_in_netns( f"iptables -t nat -A POSTROUTING -s {tap_net} -o {veth_guest} -j MASQUERADE" ) # Configure the guest self.vm.ssh.run(f"ip route add default via {tap_host_ip}") # Copy the nameserver from the host nameserver = ( subprocess.check_output( r"grep -oP 'nameserver\s+\K.+' /etc/resolv.conf", shell=True ) .decode("ascii") .strip() ) self.vm.ssh.run(f"echo nameserver {nameserver} >/etc/resolv.conf") # only configure ingress if we get an IP if not ingress_ipv4: return if not isinstance(ingress_ipv4, ipaddress.IPv4Address): ingress_ipv4 = next(self._ingress_gen) guest_ip = iface.guest_ip # packets heading towards the clone address are rewritten to the guest ip run_in_netns( f"iptables -t nat -A PREROUTING -i {veth_guest} -d {ingress_ipv4} -j DNAT --to {guest_ip}" ) # add a route on the host for the clone address run(f"ip route add {ingress_ipv4} via {veth_guest_ip}") def trace_cmd_guest(self, fns, cmd, port=4321): """Run trace-cmd on the guest, but transfer the data directly to the host.""" docker_apt_install("trace-cmd") print("host> trace-cmd listen") _proc = subprocess.Popen( [ "ip", "netns", "exec", self.vm.netns.id, "trace-cmd", "listen", "-p", str(port), ] ) print("guest> trace-cmd record") host_ip = self.vm.iface["eth0"]["iface"].host_ip _guest_ps = self.vm.ssh.run( f"trace-cmd record -N {host_ip}:{port} -p function {' '.join(fns)} {cmd}" ) return list(Path(".").glob("trace.*.dat")) def tmux_gdb(self): """Run GDB on a new tmux window""" chroot_gdb_socket = Path(self.vm.jailer.chroot_path(), self.vm.gdb_socket) with tempfile.NamedTemporaryFile( mode="w", suffix=".gdb", delete=False, prefix="fc_gdb_" ) as f: f.write( f""" target remote {chroot_gdb_socket} directory resources/linux hbreak start_kernel continue """ ) gdb_script = f.name self.tmux_neww( f""" until [ -S {chroot_gdb_socket} ]; do echo 'waiting for {chroot_gdb_socket}'; sleep 1; done; gdb {self.vm.kernel_file} -x {gdb_script} """ ) ================================================ FILE: tests/framework/properties.py ================================================ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # pylint:disable=broad-except """ Metadata we want to attach to tests for further analysis and troubleshooting """ import os import platform import re import subprocess from pathlib import Path from framework.utils import get_kernel_version from framework.utils_cpuid import get_cpu_codename, get_cpu_model_name, get_cpu_vendor from framework.utils_imdsv2 import imdsv2_get def run_cmd(cmd): """Return the stdout of a command""" return subprocess.check_output(cmd, shell=True).decode().strip() def get_os_version(): """Get the OS version >>> get_os_version() 'Ubuntu 24.04.3 LTS' """ os_release = Path("/etc/os-release").read_text(encoding="ascii") match = re.search('PRETTY_NAME="(.*)"', os_release) return match.group(1) def get_host_os(kv: str = None): """ Extract OS information from the kernel if it's there. This only works for AL2 and AL2023 >>> get_host_os("6.1.41-63.118.amzn2023.x86_64") 'amzn2023' """ if kv is None: kv = platform.release() parts = kv.split("-") if len(parts) < 2: return kv misc = parts[1].split(".") if len(misc) > 2 and misc[2] in {"amzn2", "amzn2023"}: return misc[2] return kv class GlobalProps: """Class to hold metadata about the testrun environment""" def __init__(self): self.cpu_architecture: str = platform.machine() self.cpu_model = get_cpu_model_name() self.cpu_codename = get_cpu_codename() self.cpu_vendor = get_cpu_vendor().name.lower() self.cpu_microcode = run_cmd( "grep microcode /proc/cpuinfo |head -1 |awk '{print $3}'" ) self.host_linux_full_version = platform.release() # major.minor self.host_linux_version = get_kernel_version(1) # major.minor.patch self.host_linux_patch = get_kernel_version(2) self.os = get_os_version() self.host_os = get_host_os() or "NA" self.libc_ver = "-".join(platform.libc_ver()) self.rust_version = run_cmd("rustc --version |awk '{print $2}'") # Buildkite/PR information self.buildkite_pipeline_slug = os.environ.get("BUILDKITE_PIPELINE_SLUG") self.buildkite_build_number = os.environ.get("BUILDKITE_BUILD_NUMBER") self.buildkite_pr = os.environ.get("BUILDKITE_PULL_REQUEST", "false") != "false" self.buildkite_revision_a = os.environ.get("BUILDKITE_PULL_REQUEST_BASE_BRANCH") if self._in_git_repo(): self.git_commit_id = run_cmd("git rev-parse HEAD") self.git_branch = run_cmd("git show -s --pretty=%D HEAD") self.git_origin_url = run_cmd("git config --get remote.origin.url") else: self.git_commit_id = None self.git_branch = None self.git_origin_url = None self.environment = self._detect_environment() if self.is_ec2: self.instance = imdsv2_get("/meta-data/instance-type") self.instance_id = imdsv2_get("/meta-data/instance-id") self.ami = imdsv2_get("/meta-data/ami-id") else: self.instance = "NA" self.instance_id = "NA" self.ami = "NA" @property def host_linux_version_tpl(self): """Host Linux version major.minor, as a tuple for easy comparison""" return tuple(int(x) for x in self.host_linux_version.split(".")) @property def is_ec2(self): """Are we running on an EC2 instance?""" return self.environment == "ec2" def _detect_environment(self): """Detect what kind of environment we are running under The most reliable way is to just query IMDSv2 https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/identify_ec2_instances.html """ try: imdsv2_get("/meta-data/instance-type") return "ec2" except Exception: return "local" def _in_git_repo(self): try: run_cmd("git rev-parse --show-toplevel") except subprocess.CalledProcessError: return False return True global_props = GlobalProps() ================================================ FILE: tests/framework/static_analysis.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Module to perform a static analysis of a binary to determine all actually invoked syscalls. Compares these against seccomp filters, and lists redundant rules (e.g. those never triggered because the syscall they allow is not actually used in the binary).""" import functools import json import logging import platform import re import subprocess from abc import ABC, abstractmethod from collections import defaultdict from dataclasses import dataclass from pathlib import Path from typing import Callable, ClassVar, Generic, Tuple, TypeVar, get_args import seccomp logger = logging.getLogger(__name__) # pylint: disable=c-extension-no-member,too-many-return-statements,too-few-public-methods @dataclass class Instruction(ABC): """ABC representing a single assembly instruction""" mnemonic: str args: list[str] comment_prefix: ClassVar[str] @property @abstractmethod def is_call(self): """Checks whether the given instruction is a subroutine call""" @property @abstractmethod def is_syscall(self): """Checks whether the given instruction is a syscall instruction""" @classmethod def from_str(cls, insn_str): """Parses the given string as a single assembly instruction, in the syntax that objdump uses by default on this architecture""" # remove comments insn_str = re.sub(rf"\s+{cls.comment_prefix}.*", "", insn_str) parts = insn_str.split(maxsplit=1) args = [] if len(parts) > 1: # Strip each argument, in case objdump decides to put , # spaces after commas (happens on ARM, doesn't happen on x86) args = [x.strip() for x in parts[1].split(",")] return cls(parts[0], args) @abstractmethod def backpropagate_register( self, reg: str ) -> str | int | Tuple[str, Callable[[int], int]]: """ If this instruction loads an immediate into the given register, returns that immediate as an integer. If the instruction is a register to register transfer, returns the source register for this transfer. If this instruction doesn't change the given register, returns the given register. Returns None if we don't know how to backpropagate through this instruction. :param reg: the register to backpropagate through this instruction :return: An integer if the register is loaded with an immediate by this instruction, or a register which needs to be backpropagated further (together with an optional forward-propagation function). """ def __str__(self): return f"{self.mnemonic} {','.join(self.args)}" class InstructionX86_64(Instruction): # pylint: disable=invalid-name """A x86_64 instruction""" comment_prefix = "#" @property def is_call(self): return self.mnemonic in ["call", "jmp"] @property def is_syscall(self): return self.mnemonic == "syscall" def backpropagate_register( self, reg: str ) -> str | int | Tuple[str, Callable[[int], int]]: # Simplifying assumption: an instruction will not modify a register # that it doesn't reference (generally wrong, but fine for our purposes) affected_registers = [ match for (match, _) in re.findall(r"(%[a-z0-9]{2,4})(\W|)", str(self)) ] if reg not in affected_registers: return reg match self.mnemonic: case "mov": if len(self.args) != 2: raise UnsupportedInstructionError(self, reg) src, dst = self.args if dst == reg: # an immediate load if src.startswith("$"): return int(src[3:], 16) # We moved something into our target register. If it's a new register, we understand # what's going on. Anything else, and tough luck if re.match(r"^%\w{2,4}$", src): return src raise UnsupportedInstructionError(self, reg) return reg case "xor": src, dst = self.args if src == dst: # we know that reg is part of the arguments, and we know that the arguments are identical # Thus we have xor reg,reg, which is effectively zeroing reg return 0 case "push": # a push doesn't do anything return reg raise UnsupportedInstructionError(self, reg) class InstructionAarch64(Instruction): """An aarch64 assembly instruction""" comment_prefix = "//" @property def is_call(self): return self.mnemonic in ["b", "bl"] @property def is_syscall(self): return self.mnemonic == "svc" and self.args == ["#0x0"] def backpropagate_register( self, reg: str ) -> str | int | Tuple[str, Callable[[int], int]]: affected_registers = [ match for (_, match, _) in re.findall(r"(\s|,)([wx]\d{1,2})(\W|)", str(self)) ] if reg not in affected_registers: return reg match self.mnemonic: case "mov": if len(self.args) != 2: raise UnsupportedInstructionError(self, reg) dst, src = self.args if dst == reg: # an immediate load if src.startswith("#"): return int(src[3:], 16) if src in ["xzr", "wzr"]: # See https://developer.arm.com/documentation/102374/0102/Registers-in-AArch64---other-registers return 0 # We moved something into our target register. If it's a new register, we understand # what's going on. Anything else, and tough luck if re.match(r"^[xw]\d{1,2}$", src): return src raise UnsupportedInstructionError(self, reg) return reg case "movk": # https://developer.arm.com/documentation/dui0802/a/A64-General-Instructions/MOVK assert len(self.args) in [2, 3], str(self) immediate = int(self.args[1][3:], 16) shift = 0 if len(self.args) == 3: # shift has form "lsl #", so strip first 5 characters shift = int(self.args[2][5:]) mask = 0b1111_1111_1111_1111 << shift return reg, lambda x: (x & ~mask) | (immediate << shift) case "add" | "sub": if len(self.args) != 3: raise UnsupportedInstructionError(self, reg) dst, src, imm = self.args if dst != reg: return reg try: # We can only handle additions of constants, because # the backpropagation algorithm cannot follow multiple registers. imm = int(imm[3:], 16) except ValueError as exc: raise UnsupportedInstructionError(self, reg) from exc if self.mnemonic == "add": return src, lambda x: x + imm # must have self.mnemonic == "sub" here by the case label above. return src, lambda x: x - imm raise UnsupportedInstructionError(self, reg) TInstruction = TypeVar( # pylint: disable=invalid-name "TInstruction", bound=Instruction ) class Architecture(Generic[TInstruction]): """ABC representing an instruction set architecture, specifically containing information pertaining to syscall and subroutine call conventions""" # The symbolic name of the register used to pass the syscall number to the architectures # syscall instruction syscall_nr_register: ClassVar[str] # The list of registers (in order) used to pass arguments to the architectures syscall instruction syscall_argument_registers: ClassVar[list[str]] # The list of registers (in order) used to pass arguments to normal function calls fn_call_argument_registers: ClassVar[list[str]] # Convert to the correct variant of seccomp's Arch enum seccomp_arch: ClassVar[seccomp.Arch] t_instruction: type def __init_subclass__(cls) -> None: # Determine the generic parameter of a subclass, and store it in t_instruction. pylint doesnt understand it # pylint: disable=no-member cls.t_instruction = get_args(cls.__orig_bases__[0])[0] @staticmethod @abstractmethod def generalize_reg(reg: str) -> list[str]: """For a given register, return a list of registers that partially alias it. E.g. on x86, when given %rdi as input, return [%rdi, %edi, %di]""" @classmethod def determine_register_value(cls, instructions: list[TInstruction], register: str): """Determines the value of the given register at the end of the given instruction sequence via backpropagation""" looking_for = cls.generalize_reg(register) transforms = [] for insn in reversed(instructions): for reg in looking_for: next_reg = insn.backpropagate_register(reg) if isinstance(next_reg, tuple): next_reg, transform = next_reg transforms.insert(0, transform) if isinstance(next_reg, int): # Apply all transforms in reverse order of which we discovered them: We now forward propagate # the actual value! return functools.reduce( lambda acc, fn: fn(acc), transforms, next_reg ) if next_reg != reg: looking_for = cls.generalize_reg(next_reg) break raise BackpropagationReachedStartOfFn(looking_for) class ArchitectureX86_64( # pylint: disable=invalid-name Architecture[InstructionX86_64] ): """The x86_64 ISA""" syscall_nr_register = "%eax" syscall_argument_registers = ["%rdi", "%rsi", "%rdx", "%r10", "%r8", "%r9"] fn_call_argument_registers = ["%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9"] seccomp_arch = seccomp.Arch.X86_64 @staticmethod def generalize_reg(reg: str) -> list[str]: suffixes = ["ax", "bx", "cx", "dx", "si", "di", "bp", "sp"] prefixes = ["%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"] for suffix in suffixes: if reg.endswith(suffix): return [f"%r{suffix}", f"%e{suffix}", f"%{suffix}"] for prefix in prefixes: if reg.startswith(prefix): return [prefix, f"{prefix}d", f"{prefix}w"] return [reg] class ArchitectureAarch64(Architecture[InstructionAarch64]): """The aarch64 ISA""" ALL_REGS = [f"x{i}" for i in range(0, 32)] syscall_nr_register = "x8" syscall_argument_registers = ALL_REGS[:8] fn_call_argument_registers = ALL_REGS[:8] seccomp_arch = seccomp.Arch.AARCH64 @staticmethod def generalize_reg(reg: str) -> list[str]: mtch = re.match(r"^[xw](\d{1,2})$", reg) if mtch: nr = mtch.group(1) return [f"x{nr}", f"w{nr}"] return [reg] SYSCALL_WRAPPERS = ["syscall", "__syscall_cp", "__syscall_cp_c"] SPECIFIC_SYSCALL_WRAPPERS = { "ioctl": {"syscall": "ioctl", "nargs": 3}, "__mmap": {"syscall": "mmap", "nargs": 6}, "socket": {"syscall": "socket", "nargs": 3}, "__madvise": {"syscall": "madvise", "nargs": 3}, # special snowflake ioctl: https://github.com/kraj/musl/blob/ffb23aef7b5339b8c3234f4c6a93c488dc873919/src/termios/tcsetattr.c#L5 "tcsetattr": { "syscall": "ioctl", "nargs": 3, "arg_transform": {1: lambda x: x + 0x5402}, }, } class Function: """Represents a single function in the binary (e.g. as determined from DWARF debug information)""" def __init__(self, name: str, arch: Architecture): self.name = name self.instructions = [] self.arch = arch def resolve_registers_before_insn(self, i: int, registers: list[str]): """Tries to determine the values of the given registers when the i-th instruction executes.""" resolved_registers = {} for reg in registers: try: resolved_registers[reg] = self.arch.determine_register_value( self.instructions[:i], reg ) except ( UnsupportedInstructionError, BackpropagationReachedStartOfFn, ) as exc: resolved_registers[reg] = exc return resolved_registers class UnsupportedInstructionError(Exception): """Exception indicating that an unsupported instruction was encountered during backpropagation, and this unsupported instruction refers to the register being backpropagated.""" def __init__(self, insn: Instruction, reg: str): super().__init__( f"Encountered unsupported instruction during backpropagation which affects a register of interest ({reg}): {insn}" ) self.instruction = insn class BackpropagationReachedStartOfFn(Exception): """Exception indicating that the beginning of a function was reached during backpropagation, without any immediate value being loaded into the register whose value we were trying to determine""" def __init__(self, current_register): super().__init__( f"Backpropagation reached beginning of function definition while backpropagating {current_register}. Maybe it is a parameter itself?" ) def parse_objdump_output(output: str, arch: Architecture) -> list[Function]: """Parse the stdout from obj dump into a list of the contained functions""" lines = output.splitlines() # Skip the first line of the output, it's just the file format lines = lines[2:] functions = [] current_function = None for line in lines: line = line.strip() # Skip empty lines and those just announcing the start of a new section if not line or line.startswith("Disassembly of section"): # all functions are separated by empty lines. This is a sanity check to ensure the regex below # catches all functions! current_function = None continue # Start of a new function? mtch = re.match(r"^<(.+)>:$", line) if mtch: # group 0 is always the full match (e.g. in our case the entire string because we have a regex with ^ and $) # to get the groups defined inside the regex, start at 1. current_function = Function(mtch.group(1), arch) functions.append(current_function) continue # otherwise, must be instruction if not current_function: logger.error( "Unexpectedly found data outside of function. Skipping line %s", line ) continue current_function.instructions.append(arch.t_instruction.from_str(line)) return functions def find_syscalls_in_binary(binary_path: Path): # pylint: disable=too-many-branches """Statically analyzes the given binary to find all syscalls. Uses objdump's '-d' option, parses the output, and then at the call site of each syscall instruction (and also of simple wrappers around it that weren't inlined during compilation), tries to determine the values of registers holding arguments to the syscall instruction.""" if platform.processor() == "x86_64": arch = ArchitectureX86_64() else: arch = ArchitectureAarch64() disassembly = subprocess.check_output( f"objdump --demangle=rust -d {binary_path} --no-show-raw-insn --no-addresses".split() ).decode("utf-8") functions = parse_objdump_output(disassembly, arch) found_syscalls = {} for fn in functions: # We don't want to find syscall instruction inside functions that we treat as synthetic syscall instructions # themselves, because we will not be able to figure out any argument values here (since they are instead # determined by the arguments to the function itself). Not excluding these would mean the script recognizes # them as very broad syscall invocations (e.g. only the syscall number is known, but nothing else, meaning # all seccomp rules that refer to this syscall are more specific and thus cannot be ruled out). if fn.name in SYSCALL_WRAPPERS or fn.name in SPECIFIC_SYSCALL_WRAPPERS: continue for i, insn in enumerate(fn.instructions): if insn.is_syscall: resolved_registers = fn.resolve_registers_before_insn( i, [arch.syscall_nr_register] + arch.syscall_argument_registers, ) syscall_nr = resolved_registers.pop(arch.syscall_nr_register) syscall_args = [ resolved_registers[reg] for reg in arch.syscall_argument_registers ] elif insn.is_call: # in objdump output, these look like 'call ', so strip the angle brackets called = insn.args[0][1:-1] if called in SYSCALL_WRAPPERS: resolved_registers = fn.resolve_registers_before_insn( i, arch.fn_call_argument_registers ) # On x86_64, we are not able to recover the 6th argument passed, since it is passed on the stack # This is because for the syscall wrapper, the syscall number itself is passed in one of the 6 # registers available for function arguments in the cdecl convention (instead of being passed in # eax, which is not usually used for function arguments). syscall_nr = resolved_registers.pop( arch.fn_call_argument_registers[0] ) syscall_args = [ resolved_registers[reg] for reg in arch.fn_call_argument_registers[1:] ] elif called in SPECIFIC_SYSCALL_WRAPPERS: resolved_registers = fn.resolve_registers_before_insn( i, arch.fn_call_argument_registers ) syscall_nr = seccomp.resolve_syscall( arch.seccomp_arch, SPECIFIC_SYSCALL_WRAPPERS[called]["syscall"] ) syscall_nargs = SPECIFIC_SYSCALL_WRAPPERS[called]["nargs"] syscall_args = [ resolved_registers[reg] for reg in arch.fn_call_argument_registers[:syscall_nargs] ] if all(isinstance(arg, Exception) for arg in syscall_args): logger.warning( "Could not resolve any argument for syscall wrapper %s in function %s", called, fn.name, ) # If the wrapper performs some transformation of an argument, apply it. # It'd be cool to determine these automatically via back propagation or something, # but that's a fairly complex task, and we only have a single syscall wrapper that needs this for arg, modifier in ( SPECIFIC_SYSCALL_WRAPPERS[called] .get("arg_transform", {}) .items() ): syscall_args[arg] = modifier(syscall_args[arg]) else: continue else: continue # This gets triggered in the __lockfile function on x86_64 (syscall number is loader before a branching instruction, # but if the branch is not taken, linear execution will eventually hit a ret. So during backpropagation we # would need to skip the section of assembly between "jmp" and "ret", but our script doesn't do anything # sophisticated like that and thus instead tries to analyse this branch where the syscall number register # gets clobbered, and it eventually hits a "pop" which it doesnt understand). The syscall in question is # "futex", and we call that one a million times elsewhere anyway. # # See: https://github.com/kraj/musl/blob/ffb23aef7b5339b8c3234f4c6a93c488dc873919/src/stdio/__lockfile.c#L4 if isinstance(syscall_nr, Exception): logger.warning( "Failed to resolve syscall number for instruction %s in function %s: %s", insn, fn.name, syscall_nr, ) continue syscall_name = seccomp.resolve_syscall( arch.seccomp_arch, syscall_nr ).decode("utf-8") if syscall_name not in found_syscalls: found_syscalls[syscall_name] = [] found_syscalls[syscall_name].append( [None if isinstance(arg, Exception) else arg for arg in syscall_args] ) return found_syscalls def load_seccomp_rules(seccomp_path: Path): """Loads seccomp rules from the given file, and presents them as a dictionary mapping syscalls to a list of individual filters. Each individual filter describes some restriction of the arguments that are allowed to be passed to the syscall.""" filters = json.loads(seccomp_path.read_text("utf-8")) all_filters = ( filters["vcpu"]["filter"] + filters["vmm"]["filter"] + filters["api"]["filter"] ) allowlist = defaultdict(list) for seccomp_filter in all_filters: syscall_name = seccomp_filter["syscall"] allowlist[syscall_name].append( {arg["index"]: arg["val"] for arg in seccomp_filter.get("args", [])} ) return allowlist KNOWN_SUPERFLUOUS_RULES = { # This syscall is inserted at runtime by the linux kernel, and thus not actually present in our binary. "restart_syscall": [{}] } def determine_unneeded_seccomp_rules(seccomp_rules, found_syscalls): """Based on the given list of syscall determined through static analysis, compute which of the given seccomp rules are redundant. By 'redundant' we here mean that no syscall that would match it is actually present in the given list of syscalls.""" # TODO: We could also determine "too broad" rules here: If all actual invocations of a syscall specific a parameter, # but the rule does not restrict that parameter, we could recommend to strengthen the rule to specify the parameter! redundant_rules = [] for syscall, rules in seccomp_rules.items(): for allowed_arguments in rules: if ( syscall in KNOWN_SUPERFLUOUS_RULES and allowed_arguments in KNOWN_SUPERFLUOUS_RULES[syscall] ): continue # A rule is not needed if for all actual invocation of the syscall the rule governs, # the rule does not match. # Here, we determine "does not match" as "the rule specifies some value for an argument of the syscall to be # allowed, but the invocation of the syscall never passes this specified value of the argument". # If there are no invocations of a syscall altogether, then the universal quantification will be vacuously # true, and any rules involving that syscall are reported as non-needed. rule_not_needed = all( any( actual_invocations[arg_index] is not None and actual_invocations[arg_index] != allowed_arg for arg_index, allowed_arg in allowed_arguments.items() ) for actual_invocations in found_syscalls.get(syscall, []) ) if rule_not_needed: redundant_rules.append((syscall, allowed_arguments)) return redundant_rules ================================================ FILE: tests/framework/swagger_validator.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """A validator for Firecracker API Swagger schema""" from pathlib import Path import yaml from jsonschema import Draft4Validator, ValidationError def _filter_none_recursive(data): if isinstance(data, dict): return {k: _filter_none_recursive(v) for k, v in data.items() if v is not None} if isinstance(data, list): return [_filter_none_recursive(item) for item in data if item is not None] return data class SwaggerValidator: """Validator for API requests against the Swagger/OpenAPI specification""" _instance = None _initialized = False def __new__(cls): if cls._instance is None: cls._instance = super().__new__(cls) return cls._instance def __init__(self): """Initialize the validator with the Swagger specification.""" if self._initialized: return self._initialized = True swagger_path = ( Path(__file__).parent.parent.parent / "src" / "firecracker" / "swagger" / "firecracker.yaml" ) with open(swagger_path, "r", encoding="utf-8") as f: self.swagger_spec = yaml.safe_load(f) # Cache validators for each endpoint self._validators = {} self._build_validators() def _build_validators(self): """Build JSON schema validators for each endpoint.""" paths = self.swagger_spec.get("paths", {}) definitions = self.swagger_spec.get("definitions", {}) for path, methods in paths.items(): for method, spec in methods.items(): if method.upper() not in ["GET", "PUT", "PATCH", "POST", "DELETE"]: continue # Build request body validators parameters = spec.get("parameters", []) for param in parameters: if param.get("in") == "body" and "schema" in param: schema = self._resolve_schema(param["schema"], definitions) if method.upper() == "PATCH": # do not validate required fields on PATCH requests schema["required"] = [] key = ("request", method.upper(), path) self._validators[key] = Draft4Validator(schema) # Build response validators for 200/204 responses responses = spec.get("responses", {}) for status_code, response_spec in responses.items(): if str(status_code) in ["200", "204"] and "schema" in response_spec: schema = self._resolve_schema( response_spec["schema"], definitions ) key = ("response", method.upper(), path, str(status_code)) self._validators[key] = Draft4Validator(schema) def _resolve_schema(self, schema, definitions): """Resolve $ref references in schema.""" if "$ref" in schema: ref_path = schema["$ref"] if ref_path.startswith("#/definitions/"): def_name = ref_path.split("/")[-1] if def_name in definitions: return self._resolve_schema(definitions[def_name], definitions) # Recursively resolve nested schemas resolved = schema.copy() if "properties" in resolved: resolved["properties"] = { k: self._resolve_schema(v, definitions) for k, v in resolved["properties"].items() } if "items" in resolved and isinstance(resolved["items"], dict): resolved["items"] = self._resolve_schema(resolved["items"], definitions) if not "additionalProperties" in resolved: resolved["additionalProperties"] = False return resolved def validate_request(self, method, path, body): """ Validate a request body against the Swagger specification. Args: method: HTTP method (GET, PUT, PATCH, etc.) path: API path (e.g., "/drives/{drive_id}") body: Request body as a dictionary Raises: ValidationError: If the request body doesn't match the schema """ # Normalize path - replace specific IDs with parameter placeholders normalized_path = self._normalize_path(path) key = ("request", method.upper(), normalized_path) if key in self._validators: validator = self._validators[key] # Remove None values from body before validation cleaned_body = _filter_none_recursive(body) validator.validate(cleaned_body) else: raise ValidationError(f"{key} is not in the schema") def validate_response(self, method, path, status_code, body): """ Validate a response body against the Swagger specification. Args: method: HTTP method (GET, PUT, PATCH, etc.) path: API path (e.g., "/drives/{drive_id}") status_code: HTTP status code (e.g., 200, 204) body: Response body as a dictionary Raises: ValidationError: If the response body doesn't match the schema """ # Normalize path - replace specific IDs with parameter placeholders normalized_path = self._normalize_path(path) key = ("response", method.upper(), normalized_path, str(status_code)) if key in self._validators: validator = self._validators[key] # Remove None values from body before validation cleaned_body = _filter_none_recursive(body) validator.validate(cleaned_body) else: raise ValidationError(f"{key} is not in the schema") def _normalize_path(self, path): """ Normalize a path by replacing specific IDs with parameter placeholders. E.g., "/drives/rootfs" -> "/drives/{drive_id}" """ # Match against known patterns in the swagger spec paths = self.swagger_spec.get("paths", {}) # Direct match if path in paths: return path # Try to match parameterized paths parts = path.split("/") for swagger_path in paths.keys(): swagger_parts = swagger_path.split("/") if len(parts) == len(swagger_parts): match = True for _, (part, swagger_part) in enumerate(zip(parts, swagger_parts)): # Check if it's a parameter placeholder or exact match if swagger_part.startswith("{") and swagger_part.endswith("}"): continue # This is a parameter, any value matches if part != swagger_part: match = False break if match: return swagger_path return path ================================================ FILE: tests/framework/utils.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Generic utility functions that are used in the framework.""" import errno import json import logging import os import platform import re import select import signal import subprocess import time import typing from collections import defaultdict, namedtuple from contextlib import contextmanager from pathlib import Path from typing import Dict import psutil import semver from packaging import version from tenacity import ( Retrying, retry, retry_if_exception_type, stop_after_attempt, wait_fixed, ) FLUSH_CMD = 'screen -S {session} -X colon "logfile flush 0^M"' CommandReturn = namedtuple("CommandReturn", "returncode stdout stderr") CMDLOG = logging.getLogger("commands") def get_threads(pid: int) -> dict: """Return dict consisting of child threads.""" try: proc = psutil.Process(pid) threads_map = defaultdict(list) for thread in proc.threads(): threads_map[psutil.Process(thread.id).name()].append(thread.id) return threads_map except psutil.NoSuchProcess: return {} def get_cpu_affinity(pid: int) -> list: """Get CPU affinity for a thread.""" return psutil.Process(pid).cpu_affinity() def set_cpu_affinity(pid: int, cpulist: list) -> list: """Set CPU affinity for a thread.""" real_cpulist = list(map(CpuMap, cpulist)) return psutil.Process(pid).cpu_affinity(real_cpulist) CpuTimes = namedtuple("CpuTimes", ["user", "system"]) def get_cpu_times(process: psutil.Process) -> Dict[str, CpuTimes]: """Return a dict mapping thread name to CPU usage (in seconds) since start.""" # We're consciously ignoring whatever erorr is returned by psutil and returning # empty {} as result in case of any error retrieving the process threads # information # pylint: disable=locally-disabled, broad-exception-caught threads = [] try: threads = process.threads() except Exception as exc: logging.warning("Process %d does not exist", process.pid, exc_info=exc) return {} cpu_times = {} for thread in threads: try: thread_name = psutil.Process(thread.id).name() cpu_times[thread_name] = CpuTimes(thread.user_time, thread.system_time) except Exception as exc: logging.warning("Thread %d no longer exists", thread.id, exc_info=exc) continue return cpu_times def get_cpu_utilization( process: psutil.Process, interval: int = 1, split_user_system: bool = False, ) -> Dict[str, float | CpuTimes]: """Return current process per thread CPU utilization over the interval (seconds).""" cpu_utilization = {} cpu_times_before = get_cpu_times(process) time.sleep(interval) cpu_times_after = get_cpu_times(process) threads = set(cpu_times_before.keys()) & set(cpu_times_after.keys()) for thread_name in threads: before = cpu_times_before[thread_name] after = cpu_times_after[thread_name] user = (after.user - before.user) / interval * 100 system = (after.system - before.system) / interval * 100 if split_user_system: cpu_utilization[thread_name] = CpuTimes(user, system) else: cpu_utilization[thread_name] = user + system return cpu_utilization def track_cpu_utilization( pid: int, iterations: int, omit: int ) -> Dict[str, list[float]]: """Tracks cpu utilization of a process for certain number of iterations. Sleeps for first `omit` seconds. """ assert iterations > 0 # Sleep first `omit` secconds time.sleep(omit) cpu_utilization = defaultdict(list) process = psutil.Process(pid) for _ in range(iterations): current_cpu_utilization = get_cpu_utilization(process) assert len(current_cpu_utilization) > 0 for thread_name, value in current_cpu_utilization.items(): cpu_utilization[thread_name].append(value) return cpu_utilization def get_resident_memory(process: psutil.Process): """Returns current memory utilization in KiB, including used HugeTLBFS""" proc_status = Path("/proc", str(process.pid), "status").read_text("utf-8") for line in proc_status.splitlines(): if line.startswith("HugetlbPages:"): # entry is in KiB hugetlbfs_usage = int(line.split()[1]) break else: assert False, f"HugetlbPages not found in {str(proc_status)}" return hugetlbfs_usage + process.memory_info().rss // 1024 @contextmanager def chroot(path): """ Create a chroot environment for running some code """ # Need to keep these around so we can exit the chroot real_root = os.open("/", os.O_RDONLY) working_dir = os.getcwd() try: # Jump in the chroot os.chroot(path) os.chdir("/") yield finally: # Jump out of the chroot os.fchdir(real_root) os.chroot(".") os.chdir(working_dir) class CpuMap: """Cpu map from real cpu cores to containers visible cores. When a docker container is restricted in terms of assigned cpu cores, the information from `/proc/cpuinfo` will present all the cpu cores of the machine instead of showing only the container assigned cores. This class maps the real assigned host cpu cores to virtual cpu cores, starting from 0. """ arr = [] def __new__(cls, cpu): """Instantiate the class field.""" assert CpuMap.len() > cpu if not CpuMap.arr: CpuMap.arr = CpuMap._cpus() return CpuMap.arr[cpu] @staticmethod def len(): """Get the host cpus count.""" if not CpuMap.arr: CpuMap.arr = CpuMap._cpus() return len(CpuMap.arr) @classmethod def _cpus(cls): """Obtain the real processor map. See this issue for details: https://github.com/moby/moby/issues/20770. Note that this method is called only once when `CpuMap.arr` is initialized. """ # https://psutil.readthedocs.io/en/latest/#psutil.Process.cpu_affinity # > If no argument is passed it returns the current CPU affinity as a # > list of intergers. return psutil.Process().cpu_affinity() class CmdBuilder: """Command builder class.""" def __init__(self, bin_path): """Initialize the command builder.""" self._bin_path = bin_path self._args = {} def with_arg(self, flag, value=""): """Add a new argument.""" self._args[flag] = value return self def build(self): """Build the command.""" cmd = self._bin_path + " " for flag, value in self._args.items(): cmd += f"{flag} {value} " return cmd def search_output_from_cmd(cmd: str, find_regex: typing.Pattern) -> typing.Match: """ Run a shell command and search a given regex object in stdout. If the regex object is not found, a RuntimeError exception is raised. :param cmd: command to run :param find_regex: regular expression object to search for :return: result of re.search() """ # Run the given command in a shell _, stdout, _ = check_output(cmd) # Search for the object content = re.search(find_regex, stdout) # If the result is not None, return it if content: return content raise RuntimeError( "Could not find '%s' in output for '%s'" % (find_regex.pattern, cmd) ) def get_stable_rss_mem(uvm, percentage_delta=1): """ Get the RSS memory that a guest uses, given the pid of the guest. Wait till the fluctuations in RSS drop below percentage_delta. Or print a warning if this does not happen. """ first_rss = 0 second_rss = 0 for _ in range(5): first_rss = get_resident_memory(uvm.ps) time.sleep(1) second_rss = get_resident_memory(uvm.ps) abs_diff = abs(first_rss - second_rss) abs_delta = abs_diff / first_rss * 100 print( f"RSS readings (bytes): old: {first_rss} new: {second_rss} abs_diff: {abs_diff} abs_delta: {abs_delta}" ) if abs_delta < percentage_delta: return second_rss time.sleep(1) print("WARNING: RSS readings did not stabilize") return second_rss def _format_output_message(proc, stdout, stderr): output_message = f"\n[{proc.pid}] Command:\n{proc.args}" # Append stdout/stderr to the output message if stdout != "": output_message += f"\n[{proc.pid}] stdout:\n{stdout.decode()}" if stderr != "": output_message += f"\n[{proc.pid}] stderr:\n{stderr.decode()}" output_message += f"\nReturned error code: {proc.returncode}" return output_message def run_cmd(cmd, check=False, shell=True, cwd=None, timeout=None) -> CommandReturn: """ Execute a given command. :param cmd: command to execute :param check: whether a non-zero return code should result in a `ChildProcessError` or not. :param shell: run the command in a sub-shell :param cwd: sets the current directory before the child is executed :param timeout: Time before command execution should be aborted with a `TimeoutExpired` exception :return: return code, stdout, stderr """ if isinstance(cmd, list) or not shell: # Create the async process proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd ) else: proc = subprocess.Popen( cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd ) try: stdout, stderr = proc.communicate(timeout=timeout) except subprocess.TimeoutExpired: proc.kill() # Sometimes stdout/stderr are passed on to children, in which case killing # the parent won't close them and communicate will still hang. proc.stdout.close() proc.stderr.close() stdout, stderr = proc.communicate() # Log the message with one call so that multiple statuses # don't get mixed up CMDLOG.warning( "Timeout executing command: %s\n", _format_output_message(proc, stdout, stderr), ) raise output_message = _format_output_message(proc, stdout, stderr) # If a non-zero return code was thrown, raise an exception if check and proc.returncode != 0: raise ChildProcessError(output_message) CMDLOG.debug(output_message) return CommandReturn(proc.returncode, stdout.decode(), stderr.decode()) def check_output(cmd, shell=True, cwd=None, timeout=None) -> CommandReturn: """Identical to `run_cmd`, but always sets `check_output` to `True`.""" return run_cmd(cmd, True, shell, cwd, timeout) def assert_seccomp_level(pid, seccomp_level): """Test that seccomp_level applies to all threads of a process.""" # Get number of threads cmd = "ps -T --no-headers -p {} | awk '{{print $2}}'".format(pid) process = check_output(cmd) threads_out_lines = process.stdout.splitlines() for tid in threads_out_lines: # Verify each thread's Seccomp status cmd = "cat /proc/{}/status | grep Seccomp:".format(tid) process = check_output(cmd) seccomp_line = "".join(process.stdout.split()) assert seccomp_line == "Seccomp:" + seccomp_level def run_guest_cmd(ssh_connection, cmd, expected, use_json=False): """Runs a shell command at the remote accessible via SSH""" _, stdout, stderr = ssh_connection.check_output(cmd) assert stderr == "" stdout = stdout if not use_json else json.loads(stdout) assert stdout == expected def get_process_pidfd(pid): """Get a pidfd file descriptor for the process with PID `pid` Will return a pid file descriptor for the process with PID `pid` if it is still alive. If the process has already exited we will receive either a `ProcessLookupError` exception or and an `OSError` exception with errno `EINVAL`. In these cases, we will return `None`. Any other error while calling the system call, will raise an OSError exception. """ try: pidfd = os.pidfd_open(pid) except ProcessLookupError: return None except OSError as err: if err.errno == errno.EINVAL: return None raise return pidfd def wait_process_termination(p_pid): """Wait for a process to terminate. Will return successfully if the process got indeed killed or raises an exception if the process is still alive after retrying several times. """ pidfd = get_process_pidfd(p_pid) # If pidfd is None the process has already terminated if pidfd is not None: epoll = select.epoll() epoll.register(pidfd, select.EPOLLIN) # This will return once the process exits epoll.poll() os.close(pidfd) def get_firecracker_version_from_toml(): """ Return the version of the firecracker crate, from Cargo.toml. Should be the same as the output of `./firecracker --version`, if the code has not been released. """ cmd = "cd ../src/firecracker && cargo pkgid | cut -d# -f2 | cut -d: -f2" _, stdout, _ = check_output(cmd) return semver.Version.parse(stdout) def get_kernel_version(level=2): """Return the current kernel version in format `major.minor.patch`.""" linux_version = platform.release() actual_level = 0 for idx, char in enumerate(linux_version): if char == ".": actual_level += 1 if actual_level > level or (not char.isdigit() and char != "."): linux_version = linux_version[0:idx] break return linux_version def supports_hugetlbfs_discard(): """Returns True if the kernel supports hugetlbfs discard""" return version.parse(get_kernel_version()) >= version.parse("5.18.0") def generate_mmds_session_token( ssh_connection, ipv4_address, token_ttl, imds_compat=False ): """Generate session token used for MMDS V2 requests.""" cmd = "curl -m 2 -s" cmd += " -X PUT" if imds_compat: cmd += ' -H "X-aws-ec2-metadata-token-ttl-seconds: {}"'.format(token_ttl) else: cmd += ' -H "X-metadata-token-ttl-seconds: {}"'.format(token_ttl) cmd += " http://{}/latest/api/token".format(ipv4_address) _, stdout, _ = ssh_connection.run(cmd) token = stdout return token def generate_mmds_get_request( ipv4_address, token=None, app_json=True, imds_compat=False ): """Build `GET` request to fetch metadata from MMDS.""" cmd = "curl -m 2 -s" if token is not None: cmd += " -X GET" if imds_compat: cmd += ' -H "X-aws-ec2-metadata-token: {}"'.format(token) else: cmd += ' -H "X-metadata-token: {}"'.format(token) if app_json: cmd += ' -H "Accept: application/json"' cmd += " http://{}/".format(ipv4_address) return cmd def configure_mmds( test_microvm, iface_ids, version=None, ipv4_address=None, imds_compat=False ): """Configure mmds service.""" mmds_config = {"network_interfaces": iface_ids} if version is not None: mmds_config["version"] = version if ipv4_address: mmds_config["ipv4_address"] = ipv4_address if imds_compat is not None: mmds_config["imds_compat"] = imds_compat response = test_microvm.api.mmds_config.put(**mmds_config) return response def populate_data_store(test_microvm, data_store): """Populate the MMDS data store of the microvm with the provided data""" response = test_microvm.api.mmds.get() assert response.json() == {} test_microvm.api.mmds.put(**data_store) response = test_microvm.api.mmds.get() assert response.json() == data_store def start_screen_process(screen_log, session_name, binary_path, binary_params): """Start binary process into a screen session.""" start_cmd = "screen -L -Logfile {logfile} -dmS {session} {binary} {params}" start_cmd = start_cmd.format( logfile=screen_log, session=session_name, binary=binary_path, params=" ".join(binary_params), ) check_output(start_cmd) # Build a regex object to match (number).session_name regex_object = re.compile(r"([0-9]+)\.{}".format(session_name)) # Run 'screen -ls' in a retry loop, 30 times with a 1s delay between calls. # If the output of 'screen -ls' matches the regex object, it will return the # PID. Otherwise, a RuntimeError will be raised. for attempt in Retrying( retry=retry_if_exception_type(RuntimeError), stop=stop_after_attempt(30), wait=wait_fixed(1), reraise=True, ): with attempt: screen_pid = search_output_from_cmd( cmd="screen -ls", find_regex=regex_object ).group(1) # Make sure the screen process launched successfully # As the parent process for the binary. screen_ps = psutil.Process(int(screen_pid)) wait_process_running(screen_ps) # Configure screen to flush stdout to file. check_output(FLUSH_CMD.format(session=session_name)) return screen_pid def guest_run_fio_iteration(ssh_connection, iteration): """Start FIO workload into a microVM.""" fio = """fio --filename=/dev/vda --direct=1 --rw=randread --bs=4k \ --ioengine=libaio --iodepth=16 --runtime=10 --numjobs=4 --time_based \ --group_reporting --name=iops-test-job --eta-newline=1 --readonly \ --output /tmp/fio{} > /dev/null &""".format( iteration ) exit_code, _, stderr = ssh_connection.run(fio) assert exit_code == 0, stderr def check_filesystem(ssh_connection, disk_fmt, disk): """Check for filesystem corruption inside a microVM.""" if disk_fmt == "squashfs": return ssh_connection.check_output(f"fsck.{disk_fmt} -n {disk}") def check_entropy(ssh_connection): """Check that we can get random numbers from /dev/hwrng""" ssh_connection.check_output("dd if=/dev/hwrng of=/dev/null bs=4096 count=1") @retry(wait=wait_fixed(0.5), stop=stop_after_attempt(5), reraise=True) def wait_process_running(process): """Wait for a process to run. Will return successfully if the process is in a running state and will otherwise raise an exception. """ assert process.is_running() class Timeout: """ A Context Manager to timeout sections of code. >>> with Timeout(30): # doctest: +SKIP ... time.sleep(35) # doctest: +SKIP """ def __init__(self, seconds, msg="Timed out"): self.seconds = seconds self.msg = msg def handle_timeout(self, signum, frame): """Handle SIGALRM signal""" raise TimeoutError() def __enter__(self): signal.signal(signal.SIGALRM, self.handle_timeout) signal.alarm(self.seconds) def __exit__(self, _type, _value, _traceback): signal.alarm(0) def pvh_supported() -> bool: """Checks if PVH boot is supported""" return platform.architecture() == "x86_64" ================================================ FILE: tests/framework/utils_cpu_templates.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Utilities for CPU template related functionality.""" # pylint:disable=too-many-return-statements import json from pathlib import Path import pytest from framework.properties import global_props from framework.utils_cpuid import CpuModel, CpuVendor, get_cpu_vendor # All existing CPU templates available on Intel INTEL_TEMPLATES = ["C3", "T2", "T2CL", "T2S"] # All existing CPU templates available on AMD AMD_TEMPLATES = ["T2A"] # All existing CPU templates available on ARM ARM_TEMPLATES = ["V1N1"] def get_supported_cpu_templates(): """Return the list of static CPU templates supported by the platform.""" host_linux = global_props.host_linux_version_tpl match get_cpu_vendor(), global_props.cpu_codename: case CpuVendor.INTEL, CpuModel.INTEL_CASCADELAKE: return INTEL_TEMPLATES case CpuVendor.INTEL, CpuModel.INTEL_ICELAKE: return sorted(set(INTEL_TEMPLATES) - {"T2S"}) case CpuVendor.AMD, CpuModel.AMD_MILAN: return AMD_TEMPLATES case CpuVendor.ARM, CpuModel.ARM_NEOVERSE_V1 if host_linux >= (6, 1): return ARM_TEMPLATES case _: return [] SUPPORTED_CPU_TEMPLATES = get_supported_cpu_templates() def get_supported_custom_cpu_templates(): """Return the list of custom CPU templates supported by the platform.""" host_linux = global_props.host_linux_version_tpl match get_cpu_vendor(), global_props.cpu_codename: case CpuVendor.INTEL, CpuModel.INTEL_CASCADELAKE: return INTEL_TEMPLATES case CpuVendor.INTEL, CpuModel.INTEL_ICELAKE: return set(INTEL_TEMPLATES) - {"T2S"} case CpuVendor.INTEL, CpuModel.INTEL_SAPPHIRE_RAPIDS: # Intel AMX is only supported on kernel 5.17+. KVM does not support # related CPUID range. if host_linux >= (5, 17): return ["SPR_TO_T2_6.1"] return ["SPR_TO_T2_5.10"] case CpuVendor.INTEL, CpuModel.INTEL_GRANITE_RAPIDS: # Intel AMX is only supported on kernel 5.17+. KVM does not support # related CPUID range. if host_linux >= (5, 17): return ["GNR_TO_T2_6.1"] return ["GNR_TO_T2_5.10"] case CpuVendor.AMD, CpuModel.AMD_MILAN: return AMD_TEMPLATES case CpuVendor.ARM, CpuModel.ARM_NEOVERSE_N1 if host_linux >= (6, 1): return ["V1N1"] case CpuVendor.ARM, CpuModel.ARM_NEOVERSE_V1 if host_linux >= (6, 1): return ["V1N1", "AARCH64_WITH_SVE_AND_PAC"] case CpuVendor.ARM, CpuModel.ARM_NEOVERSE_V1: return ["AARCH64_WITH_SVE_AND_PAC"] case CpuVendor.ARM, CpuModel.ARM_NEOVERSE_V2: return ["AARCH64_WITH_SVE_AND_PAC"] case _: return [] def custom_cpu_templates_params(): """Return Custom CPU templates as pytest parameters""" for name in sorted(get_supported_custom_cpu_templates()): tmpl = Path(f"./data/custom_cpu_templates/{name}.json") yield pytest.param( {"name": name, "template": json.loads(tmpl.read_text("utf-8"))}, id="custom_" + name, ) def static_cpu_templates_params(): """Return Static CPU templates as pytest parameters""" for name in sorted(get_supported_cpu_templates()): yield pytest.param(name, id="static_" + name) def get_cpu_template_name(cpu_template, with_type=False): """Return the CPU template name.""" if isinstance(cpu_template, str): return ("static_" if with_type else "") + cpu_template if isinstance(cpu_template, dict): return ("custom_" if with_type else "") + cpu_template["name"] return "None" ================================================ FILE: tests/framework/utils_cpuid.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Helper functions for testing CPU identification functionality.""" import platform import re import subprocess from enum import Enum, auto from framework.utils import check_output from framework.utils_imdsv2 import imdsv2_get CPU_FEATURES_CMD = r"lscpu |grep -oP '^Flags:\s+\K.+'" class CpuVendor(Enum): """CPU vendors enum.""" AMD = auto() INTEL = auto() ARM = auto() class CpuModel(str, Enum): """CPU models""" AMD_MILAN = "AMD_MILAN" AMD_GENOA = "AMD_GENOA" ARM_NEOVERSE_N1 = "ARM_NEOVERSE_N1" ARM_NEOVERSE_V1 = "ARM_NEOVERSE_V1" ARM_NEOVERSE_V2 = "ARM_NEOVERSE_V2" INTEL_CASCADELAKE = "INTEL_CASCADELAKE" INTEL_ICELAKE = "INTEL_ICELAKE" INTEL_SAPPHIRE_RAPIDS = "INTEL_SAPPHIRE_RAPIDS" INTEL_GRANITE_RAPIDS = "INTEL_GRANITE_RAPIDS" CPU_DICT = { CpuVendor.INTEL: { "Intel(R) Xeon(R) Platinum 8259CL CPU": "INTEL_CASCADELAKE", "Intel(R) Xeon(R) Platinum 8375C CPU": "INTEL_ICELAKE", "Intel(R) Xeon(R) Platinum 8488C": "INTEL_SAPPHIRE_RAPIDS", "Intel(R) Xeon(R) 6975P-C": "INTEL_GRANITE_RAPIDS", }, CpuVendor.AMD: {"AMD EPYC 7R13": "AMD_MILAN", "AMD EPYC 9R14": "AMD_GENOA"}, CpuVendor.ARM: { "0xd0c": "ARM_NEOVERSE_N1", "0xd40": "ARM_NEOVERSE_V1", "0xd4f": "ARM_NEOVERSE_V2", }, } def get_cpu_vendor(): """Return the CPU vendor.""" brand_str = subprocess.check_output("lscpu", shell=True).strip().decode() machine_str = platform.machine() if "AuthenticAMD" in brand_str: return CpuVendor.AMD if "aarch64" in machine_str: return CpuVendor.ARM return CpuVendor.INTEL def get_cpu_model_name(): """Return the CPU model name.""" if platform.machine() == "aarch64": _, stdout, _ = check_output("cat /proc/cpuinfo | grep 'CPU part' | uniq") else: _, stdout, _ = check_output("cat /proc/cpuinfo | grep 'model name' | uniq") info = stdout.strip().split(sep=":") assert len(info) == 2 raw_cpu_model = info[1].strip() if platform.machine() == "x86_64": return raw_cpu_model return CPU_DICT[CpuVendor.ARM].get(raw_cpu_model, "Unknown") def get_cpu_codename(default="Unknown"): """Return the CPU codename.""" cpu_model = get_cpu_model_name() vendor = get_cpu_vendor() if vendor == CpuVendor.INTEL: result = re.match(r"^(.*) @.*$", cpu_model) if result: return CPU_DICT[CpuVendor.INTEL].get(result.group(1), default) # Some Intel CPUs (e.g. Intel Sapphire Rapids) don't include "@ ". return CPU_DICT[CpuVendor.INTEL].get(cpu_model, default) if vendor == CpuVendor.AMD: result = re.match(r"^(.*) [0-9]*-Core Processor$", cpu_model) if result: return CPU_DICT[CpuVendor.AMD].get(result.group(1), default) if vendor == CpuVendor.ARM: return cpu_model return default def get_instance_type(): """Get the instance type through IMDSv2""" return imdsv2_get("/meta-data/instance-type") def check_guest_cpuid_output( vm, guest_cmd, expected_header, expected_separator, expected_key_value_store ): """Parse cpuid output inside guest and match with expected one.""" _, stdout, stderr = vm.ssh.run(guest_cmd) assert stderr == "" for line in stdout.split("\n"): if line != "": # All the keys have been matched. Stop. if not expected_key_value_store: break # Try to match the header if needed. if expected_header not in (None, ""): if line.strip() == expected_header: expected_header = None continue # See if any key matches. # We Use a try-catch block here since line.split() may fail. try: [key, value] = list( map(lambda x: x.strip(), line.split(expected_separator)) ) except ValueError: continue if key in expected_key_value_store.keys(): assert value == expected_key_value_store[key], ( "%s does not have the expected value" % key ) del expected_key_value_store[key] else: break assert not expected_key_value_store, ( "some keys in dictionary have not been found in the output: %s" % expected_key_value_store ) def build_cpuid_dict(raw_cpuid_output): """Build CPUID dict based on raw cpuid output""" cpuid_dict = {} ptrn = re.compile("^ *(.*) (.*): eax=(.*) ebx=(.*) ecx=(.*) edx=(.*)$") for line in raw_cpuid_output.strip().split("\n"): match = re.match(ptrn, line) assert match, f"`{line}` does not match the regex pattern." leaf, subleaf, eax, ebx, ecx, edx = [int(x, 16) for x in match.groups()] cpuid_dict[(leaf, subleaf, "eax")] = eax cpuid_dict[(leaf, subleaf, "ebx")] = ebx cpuid_dict[(leaf, subleaf, "ecx")] = ecx cpuid_dict[(leaf, subleaf, "edx")] = edx return cpuid_dict def get_guest_cpuid(vm, leaf=None, subleaf=None): """ Return the guest CPUID of CPU 0 in the form of a dictionary where the key is a tuple: - leaf (integer) - subleaf (integer) - register ("eax", "ebx", "ecx" or "edx") and the value is the register value (integer). """ if leaf is not None and subleaf is not None: read_cpuid_cmd = f"cpuid -r -l {leaf} -s {subleaf} | head -n 2 | grep -v CPU" else: read_cpuid_cmd = "cpuid -r | sed '/CPU 1/q' | grep -v CPU" _, stdout, stderr = vm.ssh.run(read_cpuid_cmd) assert stderr == "" return build_cpuid_dict(stdout) def check_cpuid_feat_flags(vm, must_be_set, must_be_unset): """ Check that CPUID feature flag are set and unset as expected. """ cpuid = get_guest_cpuid(vm) allowed_regs = ["eax", "ebx", "ecx", "edx"] for leaf, subleaf, reg, flags in must_be_set: assert reg in allowed_regs actual = cpuid[(leaf, subleaf, reg)] & flags expected = flags assert ( actual == expected ), f"{leaf=:#x} {subleaf=:#x} {reg=} {actual=:#x}, {expected=:#x}" for leaf, subleaf, reg, flags in must_be_unset: assert reg in allowed_regs if (leaf, subleaf, reg) not in cpuid: # The absence of the leaf/subleaf is equivalent to "unset". continue actual = cpuid[(leaf, subleaf, reg)] & flags expected = 0 assert ( actual == expected ), f"{leaf=:#x} {subleaf=:#x} {reg=} {actual=:#x}, {expected=:#x}" ================================================ FILE: tests/framework/utils_drive.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Utilities for vhost-user-blk backend.""" import os import subprocess import time from abc import ABC, abstractmethod from enum import Enum from pathlib import Path from subprocess import check_output from framework import utils MB = 1024 * 1024 class VhostUserBlkBackendType(Enum): """vhost-user-blk backend type""" QEMU = "Qemu" CROSVM = "Crosvm" def partuuid_and_disk_path(rootfs, disk_path): """ We create a new file with specified path, get its partuuid and use it as a rootfs. """ initial_size = rootfs.stat().st_size + 50 * MB disk_path.touch() os.truncate(disk_path, initial_size) check_output(f"echo type=83 | sfdisk --no-tell-kernel {str(disk_path)}", shell=True) check_output(f"dd bs=1M seek=1 if={str(rootfs)} of={disk_path}", shell=True) ptuuid = check_output( f"blkid -s PTUUID -o value {disk_path}", shell=True, encoding="ascii" ).strip() # PARTUUID for an msdos partition table is PTUUID- partuuid = ptuuid + "-01" return (partuuid, disk_path) class VhostUserBlkBackend(ABC): """vhost-user-blk backend base class""" @classmethod def get_all_subclasses(cls): """Get all subclasses of the class.""" subclasses = {} for subclass in cls.__subclasses__(): subclasses[subclass.__name__] = subclass subclasses.update(subclass.get_all_subclasses()) return subclasses @classmethod def with_backend(cls, backend: VhostUserBlkBackendType, *args, **kwargs): """Get a backend of a specific type.""" subclasses = cls.get_all_subclasses() return subclasses[backend.value + cls.__name__](*args, **kwargs) def __init__( self, host_mem_path, chroot, backend_id, readonly, ): self.host_mem_path = host_mem_path self.socket_path = Path(chroot) / f"{backend_id}_vhost_user.sock" self.readonly = readonly self.proc = None def spawn(self, uid, gid): """ Spawn a backend. Return socket path in the jail that can be used with FC API. """ assert not self.proc, "backend already spawned" args = self._spawn_cmd() proc = subprocess.Popen(args) # Give the backend time to initialise. time.sleep(1) assert proc is not None and proc.poll() is None, "backend is not up" assert self.socket_path.exists() os.chown(self.socket_path, uid, gid) self.proc = proc return str(Path("/") / os.path.basename(self.socket_path)) @abstractmethod def _spawn_cmd(self): """Return a spawn command for the backend""" return "" @abstractmethod def resize(self, new_size): """Resize the vhost-user-backed drive""" def pin(self, cpu_id: int): """Pin the vhost-user backend to a CPU list.""" return utils.set_cpu_affinity(self.proc.pid, [cpu_id]) def kill(self): """Kill the backend""" if self.proc.poll() is None: self.proc.terminate() self.proc.wait() os.remove(self.socket_path) assert not os.path.exists(self.socket_path) class QemuVhostUserBlkBackend(VhostUserBlkBackend): """vhost-user-blk backend implementaiton for Qemu backend""" def _spawn_cmd(self): args = [ "vhost-user-blk", "--socket-path", self.socket_path, "--blk-file", self.host_mem_path, ] if self.readonly: args.append("--read-only") return args def resize(self, new_size): raise NotImplementedError("not supported for Qemu backend") class CrosvmVhostUserBlkBackend(VhostUserBlkBackend): """vhost-user-blk backend implementaiton for crosvm backend""" def __init__( self, host_mem_path, chroot, backend_id, readonly=False, ): super().__init__( host_mem_path, chroot, backend_id, readonly, ) self.ctr_socket_path = Path(chroot) / f"{backend_id}_ctr.sock" def _spawn_cmd(self): ro = ",ro" if self.readonly else "" args = [ "crosvm", "--log-level", "off", "devices", "--disable-sandbox", "--control-socket", self.ctr_socket_path, "--block", f"vhost={self.socket_path},path={self.host_mem_path}{ro}", ] return args def resize(self, new_size): assert self.proc, "backend is not spawned" assert self.ctr_socket_path.exists() utils.check_output( f"crosvm disk resize 0 {new_size * 1024 * 1024} {self.ctr_socket_path}" ) def kill(self): super().kill() assert self.proc.poll() is not None os.remove(self.ctr_socket_path) assert not os.path.exists(self.ctr_socket_path) ================================================ FILE: tests/framework/utils_fio.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """File containing utility methods for fio-based performance tests""" import json import os from enum import Enum from pathlib import Path from framework.utils import CmdBuilder DEFAULT_RUNTIME_SEC = 30 DEFAULT_WARMUP_SEC = 10 class Mode(str, Enum): """ Modes of fio operation """ # Sequential reads. READ = "read" # Sequential writes. WRITE = "write" # Sequential trims (Linux block devices and SCSI character devices only). TRIM = "trim" # RANDOM reads. RANDREAD = "randread" # RANDOM writes. RANDWRITE = "randwrite" # RANDOM trims (Linux block devices and SCSI character devices only). RANDTRIM = "randtrim" # SEQUENTial mixed reads and writes. READWRITE = "readwrite" # RANDOM mixed reads and writes. RANDRW = "randrw" class Engine(str, Enum): """ Fio backend engines """ LIBAIO = "libaio" PSYNC = "psync" def build_cmd( file_path: str, file_size_mb: str | None, block_size: int, mode: Mode, num_jobs: int, io_engine: Engine, runtime: int | None = DEFAULT_RUNTIME_SEC, warmup_time: int | None = DEFAULT_WARMUP_SEC, write_logs: bool = True, ) -> str: """Build fio cmd""" cmd = ( CmdBuilder("fio") .with_arg(f"--name={mode.value}-{block_size}") .with_arg(f"--filename={file_path}") ) if file_size_mb: cmd = cmd.with_arg(f"--size={file_size_mb}M") cmd = cmd.with_arg(f"--bs={block_size}") if runtime and warmup_time: cmd = ( cmd.with_arg("--time_based=1") .with_arg(f"--runtime={runtime}") .with_arg(f"--ramp_time={warmup_time}") ) cmd = ( cmd.with_arg(f"--rw={mode.value}") .with_arg("--direct=1") .with_arg("--randrepeat=0") .with_arg(f"--ioengine={io_engine.value}") .with_arg("--iodepth=32") .with_arg(f"--numjobs={num_jobs}") # Set affinity of the entire fio process to a set of vCPUs equal # in size to number of workers .with_arg(f"--cpus_allowed={','.join(str(i) for i in range(num_jobs))}") # Instruct fio to pin one worker per vcpu .with_arg("--cpus_allowed_policy=split") .with_arg("--output-format=json+") .with_arg("--output=./fio.json") ) if write_logs: cmd = cmd.with_arg("--log_avg_msec=1000").with_arg( f"--write_bw_log={mode.value}" ) # Latency measurements only make sence for psync engine if io_engine == Engine.PSYNC: cmd = cmd.with_arg(f"--write_lat_log={mode}") return cmd.build() class LogType(Enum): """Fio log types""" BW = "_bw" CLAT = "_clat" def process_log_files(root_dir: str, log_type: LogType) -> ([[str]], [[str]]): """ Parses fio logs which have a form of: 1000, 2007920, 0, 0, 0 1000, 2005276, 1, 0, 0 2000, 1996240, 0, 0, 0 2000, 1993861, 1, 0, 0 ... where the first column is the timestamp, second is the bw/clat and third is the direction The logs directory will look smth like this: readwrite_bw.1.log readwrite_bw.2.log readwrite_clat.1.log readwrite_clat.2.log readwrite_lat.1.log readwrite_lat.2.log readwrite_slat.1.log readwrite_slat.2.log job0 job1 read write read write [..] [..] [..] [..] | | | | | --|------- ---- | | ------| | [[], []] [[], []] reads writes The output is 2 arrays: array of reads and array of writes """ paths = [] for item in os.listdir(root_dir): if item.endswith(".log") and log_type.value in item: paths.append(Path(root_dir / item)) if not paths: return [], [] reads = [] writes = [] for path in sorted(paths): lines = path.read_text("UTF-8").splitlines() read_values = [] write_values = [] for line in lines: # See https://fio.readthedocs.io/en/latest/fio_doc.html#log-file-formats _, value, direction, _ = line.split(",", maxsplit=3) value = int(value.strip()) match direction.strip(): case "0": read_values.append(value) case "1": write_values.append(value) case _: assert False reads.append(read_values) writes.append(write_values) return reads, writes def process_json_files(root_dir: str) -> ([[int]], [[int]]): """ Reads `bw_bytes` values from fio*.json files and packs them into 2 arrays of bw_reads and bw_writes. Each entrly is an array in itself of `jobs` per file. """ paths = [] for item in os.listdir(root_dir): if item.endswith(".json") and "fio" in item: paths.append(Path(root_dir / item)) bw_reads = [] bw_writes = [] for path in sorted(paths): data = json.loads(path.read_text("UTF-8")) reads = [] writes = [] for job in data["jobs"]: if "read" in job: reads.append(job["read"]["bw_bytes"]) if "write" in job: writes.append(job["write"]["bw_bytes"]) bw_reads.append(reads) bw_writes.append(writes) return bw_reads, bw_writes ================================================ FILE: tests/framework/utils_ftrace.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Utilities for interacting with the kernel's ftrace subsystem""" import contextlib from framework.utils import check_output @contextlib.contextmanager def ftrace_events(events: str = "*:*"): """Temporarily enables the kernel's tracing functionality for the specified events Assumes that the caller is the only test executing on the host""" # We have to do system-wide tracing because inside docker we live in a pidns, but trace-cmd does not know about # this. We don't know how to translate the pidns PID to one ftrace would understand, so we use the fact that only # one vm is running at the same time, and thus we can attribute all KVM events to this one VM check_output("mount -t tracefs nodev /sys/kernel/tracing") check_output("echo > /sys/kernel/tracing/trace") # clear the trace buffers check_output(f"echo {events} > /sys/kernel/tracing/set_event") check_output("echo nop > /sys/kernel/tracing/current_tracer") check_output("echo 1 > /sys/kernel/tracing/tracing_on") try: yield finally: check_output("echo 0 > /sys/kernel/tracing/tracing_on") check_output("umount /sys/kernel/tracing") ================================================ FILE: tests/framework/utils_imdsv2.py ================================================ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """A simple IMDSv2 client - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html - https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-categories.html Important! For this client to work in a container scenario, make sure your instances are set with an adequate hop limit (2 for example). See `ec2:MetadataHttpPutResponseHopLimit` """ import time import requests IMDSV2_HDR_TOKEN_TTL = "X-aws-ec2-metadata-token-ttl-seconds" IMDSV2_HDR_TOKEN = "X-aws-ec2-metadata-token" class IMDSv2Client: """ A simple IMDSv2 client. >>> IMDSv2Client().get("/meta-data/instance-type") # doctest: +SKIP ... """ def __init__(self, endpoint="http://169.254.169.254", version="latest"): self.endpoint = endpoint self.version = version self.ttl = 21600 self.token_expiry_time = 0 self.token = None def get_token(self): """Get a token from IMDSv2""" if self.token_expiry_time < time.time(): headers = {IMDSV2_HDR_TOKEN_TTL: str(self.ttl)} # To get a token, docs say to always use latest url = f"{self.endpoint}/latest/api/token" res = requests.put(url, headers=headers, timeout=2) self.token = res.content self.token_expiry_time = time.time() + self.ttl return self.token def get(self, path): """ Get a metadata path from IMDSv2 >>> IMDSv2Client().get("/meta-data/instance-type") # doctest: +SKIP 'm5d.metal' """ headers = {IMDSV2_HDR_TOKEN: self.get_token()} url = f"{self.endpoint}/{self.version}{path}" res = requests.get(url, headers=headers, timeout=2) if res.status_code != 200: raise Exception(f"IMDSv2 returned {res.status_code} for {url}") return res.text IMDS_V2 = IMDSv2Client() imdsv2_get = IMDS_V2.get ================================================ FILE: tests/framework/utils_iperf.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """File containing utility methods for iperf-based performance tests""" import concurrent.futures import json import time from framework import utils from framework.utils import CmdBuilder, CpuMap, track_cpu_utilization class IPerf3Test: """Class abstracting away the setup and execution of an iperf3-based performance test""" def __init__( self, microvm, base_port, runtime, omit, mode, num_clients, connect_to, *, iperf="iperf3", payload_length="DEFAULT", ): self._microvm = microvm self._base_port = base_port self._runtime = runtime self._omit = omit self._mode = mode # entry into mode-map self._num_clients = num_clients self._connect_to = connect_to # the "host" value to pass to "--client" self._payload_length = payload_length # the value to pass to "--len" self._iperf = iperf self._guest_iperf = iperf def run_test(self, first_free_cpu): """Runs the performance test, using pinning the iperf3 servers to CPUs starting from `first_free_cpu`""" assert self._num_clients < CpuMap.len() - self._microvm.vcpus_count - 2 for server_idx in range(self._num_clients): assigned_cpu = CpuMap(first_free_cpu) cmd = ( self.host_command(server_idx) .with_arg("--affinity", assigned_cpu) .build() ) utils.check_output(f"{self._microvm.netns.cmd_prefix()} {cmd}") first_free_cpu += 1 # Wait for the iperf3 server to start time.sleep(2) with concurrent.futures.ThreadPoolExecutor() as executor: cpu_load_future = executor.submit( track_cpu_utilization, self._microvm.firecracker_pid, # Ignore the final two data points as they are impacted by test teardown self._runtime - 2, self._omit, ) clients = [] for client_idx in range(self._num_clients): client_future = executor.submit( self.spawn_iperf3_client, client_idx, self.client_mode_to_iperf3_flag, ) clients.append((self._mode, client_future)) data = {"cpu_load_raw": cpu_load_future.result(), "g2h": [], "h2g": []} for mode, future in clients: data[mode].append(json.loads(future.result())) return data @property def client_mode_to_iperf3_flag(self): """Converts client mode into iperf3 mode flag""" if self._mode == "h2g": return "-R" return "" def spawn_iperf3_client(self, client_idx, client_mode_flag): """ Spawns an iperf3 client within the guest. The `client_idx` determines what direction data should flow for this particular client (e.g. client-to-server or server-to-client) """ # Add the port where the iperf3 client is going to send/receive. cmd = ( self.guest_command(client_idx) .with_arg(client_mode_flag) .with_arg("--affinity", client_idx % self._microvm.vcpus_count) .build() ) return self._microvm.ssh.check_output(cmd).stdout def host_command(self, port_offset): """Builds the command used for spawning an iperf3 server on the host""" return ( CmdBuilder(self._iperf) .with_arg("-sD") .with_arg("-p", self._base_port + port_offset) .with_arg("-1") ) def guest_command(self, port_offset): """Builds the command used for spawning an iperf3 client in the guest""" cmd = ( CmdBuilder(self._guest_iperf) .with_arg("--time", self._runtime) .with_arg("--json") .with_arg("--omit", self._omit) .with_arg("-p", self._base_port + port_offset) .with_arg("--client", self._connect_to) ) if self._payload_length != "DEFAULT": return cmd.with_arg("--len", self._payload_length) return cmd def emit_iperf3_metrics(metrics, iperf_result, omit): """Consume the iperf3 data produced by the tcp/vsock throughput performance tests""" cpu_util = iperf_result["cpu_load_raw"] for thread_name, values in cpu_util.items(): for value in values: metrics.put_metric(f"cpu_utilization_{thread_name}", value, "Percent") data_points = zip( *[time_series["intervals"][omit:] for time_series in iperf_result["g2h"]] ) for point_in_time in data_points: metrics.put_metric( "throughput_guest_to_host", sum(interval["sum"]["bits_per_second"] for interval in point_in_time), "Bits/Second", ) data_points = zip( *[time_series["intervals"][omit:] for time_series in iperf_result["h2g"]] ) for point_in_time in data_points: metrics.put_metric( "throughput_host_to_guest", sum(interval["sum"]["bits_per_second"] for interval in point_in_time), "Bits/Second", ) ================================================ FILE: tests/framework/utils_repo.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Utilities to deal with the git repo.""" import subprocess from fnmatch import fnmatch from pathlib import Path def git_repo_files(root: str = ".", glob: str = "*"): """ Return a list of files in the git repo from a given path :param root: path where to look for files, defaults to the current dir :param glob: what pattern to apply to file names :return: list of found files """ files = subprocess.check_output( ["git", "ls-files", root], encoding="ascii", ).splitlines() for file in files: if fnmatch(file, glob): yield Path(file) ================================================ FILE: tests/framework/utils_uffd.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """UFFD related utility functions""" import os import stat import subprocess import time from pathlib import Path from framework.utils import chroot from host_tools import cargo_build SOCKET_PATH = "/firecracker-uffd.sock" class UffdHandler: """Describe the UFFD page fault handler process.""" def __init__( self, name, socket_path, snapshot: "Snapshot", chroot_path, log_file_name ): """Instantiate the handler process with arguments.""" self._proc = None self._handler_name = name self.socket_path = socket_path self.snapshot = snapshot self._chroot = chroot_path self._log_file = log_file_name def spawn(self, uid, gid): """Spawn handler process using arguments provided.""" with chroot(self._chroot): st = os.stat(self._handler_name) os.chmod(self._handler_name, st.st_mode | stat.S_IEXEC) chroot_log_file = Path("/") / self._log_file with open(chroot_log_file, "w", encoding="utf-8") as logfile: args = [ f"/{self._handler_name}", self.socket_path, self.snapshot.mem.name, ] self._proc = subprocess.Popen( args, stdout=logfile, stderr=subprocess.STDOUT ) # Give it time start and fail, if it really has too (bad things happen). time.sleep(1) if not self.is_running(): print(chroot_log_file.read_text(encoding="utf-8")) assert False, "Could not start PF handler!" # The page fault handler will create the socket path with root rights. # Change rights to the jailer's. os.chown(self.socket_path, uid, gid) @property def proc(self): """Return UFFD handler process.""" return self._proc def is_running(self): """Check if UFFD process is running""" return self.proc is not None and self.proc.poll() is None @property def log_file(self): """Return the path to the UFFD handler's log file""" return Path(self._chroot) / Path(self._log_file) @property def log_data(self): """Return the log data of the UFFD handler""" if self.log_file is None: return "" return self.log_file.read_text(encoding="utf-8") def kill(self): """Kills the uffd handler process""" assert self.is_running() self.proc.kill() def mark_killed(self): """Marks the uffd handler as already dead""" assert not self.is_running() self._proc = None def __del__(self): """Tear down the UFFD handler process.""" if self.is_running(): self.kill() def spawn_pf_handler(vm, handler_path, jailed_snapshot): """Spawn page fault handler process.""" # Copy snapshot memory file into chroot of microVM. # Copy the valid page fault binary into chroot of microVM. jailed_handler = vm.create_jailed_resource(handler_path) handler_name = os.path.basename(jailed_handler) uffd_handler = UffdHandler( handler_name, SOCKET_PATH, jailed_snapshot, vm.chroot(), "uffd.log" ) uffd_handler.spawn(vm.jailer.uid, vm.jailer.gid) return uffd_handler def uffd_handler(handler_name, **kwargs): """Retrieves the uffd handler with the given name""" return cargo_build.get_example(f"uffd_{handler_name}_handler", **kwargs) ================================================ FILE: tests/framework/utils_vsock.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Helper functions for testing vsock device.""" import hashlib import os.path import re import time from pathlib import Path from socket import AF_UNIX, SOCK_STREAM, socket from subprocess import Popen from threading import Thread from tenacity import Retrying, stop_after_attempt, wait_fixed ECHO_SERVER_PORT = 5252 SERVER_ACCEPT_BACKLOG = 128 TEST_CONNECTION_COUNT = 50 BLOB_SIZE = 1 * 1024 * 1024 BUF_SIZE = 64 * 1024 VSOCK_UDS_PATH = "v.sock" class HostEchoWorker(Thread): """A vsock echo worker, connecting to a guest echo server. This will initiate a connection to a guest echo server, then start sending it the contents of the file at `blob_path`. The echo server should send the exact same data back, so a hash is performed on everything received from the server. This hash will later be checked against the hashed contents of `blob_path`. """ def __init__(self, uds_path, blob_path): """.""" super().__init__() self.uds_path = uds_path self.blob_path = blob_path self.hash = None self.error = None self.sock = _vsock_connect_to_guest(self.uds_path, ECHO_SERVER_PORT) def run(self): """Thread code payload. Wrap up the real "run" into a catch-all block, because Python cannot into threads - if this thread were to raise an unhandled exception, the whole process would lock. """ try: self._run() # pylint: disable=broad-except except Exception as err: self.error = err def close_uds(self): """Close vsock UDS connection.""" self.sock.close() def _run(self): with open(self.blob_path, "rb") as blob_file: hash_obj = hashlib.md5() while True: buf = blob_file.read(BUF_SIZE) if not buf: break sent = self.sock.send(buf) while sent < len(buf): sent += self.sock.send(buf[sent:]) buf = self.sock.recv(sent) while len(buf) < sent: buf += self.sock.recv(sent - len(buf)) hash_obj.update(buf) self.hash = hash_obj.hexdigest() def make_blob(dst_dir, size=BLOB_SIZE): """Generate a random data file.""" blob_path = os.path.join(dst_dir, "vsock-test.blob") with open(blob_path, "wb") as blob_file: left = size blob_hash = hashlib.md5() while left > 0: count = min(left, 4096) buf = os.urandom(count) blob_hash.update(buf) blob_file.write(buf) left -= count return blob_path, blob_hash.hexdigest() def start_guest_echo_server(vm): """Start a vsock echo server in the microVM. Returns a UDS path to connect to the server. """ cmd = f"nohup socat VSOCK-LISTEN:{ECHO_SERVER_PORT},backlog=128,reuseaddr,fork EXEC:'/bin/cat' > /dev/null 2>&1 &" vm.ssh.check_output(cmd) # Give the server time to initialise time.sleep(1) return os.path.join(vm.jailer.chroot_path(), VSOCK_UDS_PATH) def check_host_connections(uds_path, blob_path, blob_hash): """Test host-initiated connections. This will spawn `TEST_CONNECTION_COUNT` `HostEchoWorker` threads. After the workers are done transferring the data read from `blob_path`, the hashes they computed for the data echoed back by the server are checked against `blob_hash`. """ workers = [] for _ in range(TEST_CONNECTION_COUNT): worker = HostEchoWorker(uds_path, blob_path) workers.append(worker) worker.start() for wrk in workers: wrk.join() for wrk in workers: assert wrk.hash == blob_hash def check_guest_connections(vm, server_port_path, blob_path, blob_hash): """Test guest-initiated connections. This will start an echo server on the host (in its own thread), then start `TEST_CONNECTION_COUNT` workers inside the guest VM, all communicating with the echo server. """ echo_server = Popen( ["socat", f"UNIX-LISTEN:{server_port_path},fork,backlog=5", "exec:'/bin/cat'"] ) try: # Give socat a bit of time to create the socket for attempt in Retrying( wait=wait_fixed(0.2), stop=stop_after_attempt(3), reraise=True, ): with attempt: assert Path(server_port_path).exists() # Link the listening Unix socket into the VM's jail, so that # Firecracker can connect to it. vm.create_jailed_resource(server_port_path) # Increase maximum process count for the ssh service. # Avoids: "bash: fork: retry: Resource temporarily unavailable" # Needed to execute the bash script that tests for concurrent # vsock guest initiated connections. vm.ssh.check_output( "echo 1024 > /sys/fs/cgroup/system.slice/ssh.service/pids.max" ) # Build the guest worker sub-command. # `vsock_helper` will read the blob file from STDIN and send the echo # server response to STDOUT. This response is then hashed, and the # hash is compared against `blob_hash` (computed on the host). This # comparison sets the exit status of the worker command. worker_cmd = "hash=$(" worker_cmd += "cat {}".format(blob_path) worker_cmd += " | /tmp/vsock_helper echo 2 {}".format(ECHO_SERVER_PORT) worker_cmd += " | md5sum | cut -f1 -d\\ " worker_cmd += ")" worker_cmd += ' && [[ "$hash" = "{}" ]]'.format(blob_hash) # Run `TEST_CONNECTION_COUNT` concurrent workers, using the above # worker sub-command. # If any worker fails, this command will fail. If all worker sub-commands # succeed, this will also succeed. cmd = 'workers="";' cmd += "for i in $(seq 1 {}); do".format(TEST_CONNECTION_COUNT) cmd += " ({})& ".format(worker_cmd) cmd += ' workers="$workers $!";' cmd += "done;" cmd += "for w in $workers; do wait $w || (wait; exit 1); done" vm.ssh.check_output(cmd) finally: echo_server.terminate() rc = echo_server.wait() # socat exits with 128 + 15 (SIGTERM) assert rc == 143 def make_host_port_path(uds_path, port): """Build the path for a Unix socket, mapped to host vsock port `port`.""" return "{}_{}".format(uds_path, port) def _vsock_connect_to_guest(uds_path, port): """Return a Unix socket, connected to the guest vsock port `port`.""" sock = socket(AF_UNIX, SOCK_STREAM) sock.connect(uds_path) buf = bytearray("CONNECT {}\n".format(port).encode("utf-8")) sock.send(buf) ack_buf = sock.recv(32) assert re.match("^OK [0-9]+\n$", ack_buf.decode("utf-8")) is not None return sock def _copy_vsock_data_to_guest(ssh_connection, blob_path, vm_blob_path, vsock_helper): # Copy the data file and a vsock helper to the guest. cmd = "mkdir -p /tmp/vsock" ecode, _, _ = ssh_connection.run(cmd) assert ecode == 0, "Failed to set up tmpfs drive on the guest." ssh_connection.scp_put(vsock_helper, "/tmp/vsock_helper") ssh_connection.scp_put(blob_path, vm_blob_path) def check_vsock_device(vm, bin_vsock_path, test_fc_session_root_path, ssh_connection): """Create a blob and test guest and host initiated connections on vsock.""" vm_blob_path = "/tmp/vsock/test.blob" # Generate a random data file for vsock. blob_path, blob_hash = make_blob(test_fc_session_root_path) # Copy the data file and a vsock helper to the guest. _copy_vsock_data_to_guest(ssh_connection, blob_path, vm_blob_path, bin_vsock_path) # Test vsock guest-initiated connections. path = os.path.join(vm.path, make_host_port_path(VSOCK_UDS_PATH, ECHO_SERVER_PORT)) check_guest_connections(vm, path, vm_blob_path, blob_hash) # Test vsock host-initiated connections. path = start_guest_echo_server(vm) check_host_connections(path, blob_path, blob_hash) ================================================ FILE: tests/framework/vm_config.json ================================================ { "boot-source": { "kernel_image_path": "vmlinux.bin", "boot_args": "console=ttyS0 reboot=k panic=1", "initrd_path": null }, "drives": [ { "drive_id": "rootfs", "partuuid": null, "is_root_device": true, "cache_type": "Unsafe", "is_read_only": false, "path_on_host": "bionic.rootfs.ext4", "io_engine": "Sync", "rate_limiter": null, "socket": null } ], "machine-config": { "vcpu_count": 2, "mem_size_mib": 1024, "smt": false, "track_dirty_pages": false, "huge_pages": "None" }, "cpu-config": null, "balloon": null, "network-interfaces": [], "vsock": null, "logger": null, "metrics": null, "mmds-config": null, "entropy": null, "pmem": [], "memory-hotplug": null } ================================================ FILE: tests/framework/vm_config_cpu_template_C3.json ================================================ { "boot-source": { "kernel_image_path": "vmlinux.bin", "boot_args": "console=ttyS0 reboot=k panic=1" }, "drives": [ { "drive_id": "rootfs", "path_on_host": "bionic.rootfs.ext4", "is_root_device": true, "is_read_only": false } ], "machine-config": { "vcpu_count": 2, "mem_size_mib": 1024, "cpu_template": "C3" } } ================================================ FILE: tests/framework/vm_config_missing_mem_size_mib.json ================================================ { "boot-source": { "kernel_image_path": "vmlinux.bin", "boot_args": "console=ttyS0 reboot=k panic=1" }, "drives": [ { "drive_id": "rootfs", "path_on_host": "bionic.rootfs.ext4", "is_root_device": true, "is_read_only": false } ], "machine-config": { "vcpu_count": 2, "smt": false, "track_dirty_pages": false } } ================================================ FILE: tests/framework/vm_config_missing_vcpu_count.json ================================================ { "boot-source": { "kernel_image_path": "vmlinux.bin", "boot_args": "console=ttyS0 reboot=k panic=1 swiotlb=noforce" }, "drives": [ { "drive_id": "rootfs", "path_on_host": "bionic.rootfs.ext4", "is_root_device": true, "is_read_only": false } ], "machine-config": { "mem_size_mib": 1024, "smt": false, "track_dirty_pages": false } } ================================================ FILE: tests/framework/vm_config_network.json ================================================ { "boot-source": { "kernel_image_path": "vmlinux.bin", "boot_args": "console=ttyS0 reboot=k panic=1", "initrd_path": null }, "drives": [ { "drive_id": "rootfs", "path_on_host": "bionic.rootfs.ext4", "is_root_device": true, "partuuid": null, "is_read_only": false, "cache_type": "Unsafe", "io_engine": "Sync", "rate_limiter": null } ], "machine-config": { "vcpu_count": 2, "mem_size_mib": 1024, "smt": false, "track_dirty_pages": false }, "cpu-config": null, "balloon": null, "network-interfaces": [ { "iface_id": "eth0", "host_dev_name": "tap0", "guest_mac": "06:00:c0:a8:00:02", "rx_rate_limiter": null, "tx_rate_limiter": null } ], "vsock": null, "logger": null, "metrics": null, "mmds-config": null, "entropy": null } ================================================ FILE: tests/framework/vm_config_smt_true.json ================================================ { "boot-source": { "kernel_image_path": "vmlinux.bin", "boot_args": "console=ttyS0 reboot=k panic=1" }, "drives": [ { "drive_id": "rootfs", "path_on_host": "bionic.rootfs.ext4", "is_root_device": true, "is_read_only": false } ], "machine-config": { "vcpu_count": 2, "mem_size_mib": 1024, "smt": true } } ================================================ FILE: tests/framework/vm_config_with_mmdsv1.json ================================================ { "boot-source": { "kernel_image_path": "vmlinux.bin", "boot_args": "console=ttyS0 reboot=k panic=1", "initrd_path": null }, "drives": [ { "drive_id": "rootfs", "partuuid": null, "is_root_device": true, "cache_type": "Unsafe", "is_read_only": false, "path_on_host": "bionic.rootfs.ext4", "io_engine": "Sync", "rate_limiter": null, "socket": null } ], "machine-config": { "vcpu_count": 2, "mem_size_mib": 1024, "track_dirty_pages": false }, "balloon": null, "network-interfaces": [ { "iface_id": "1", "host_dev_name": "tap0", "guest_mac": "06:00:c0:a8:00:02", "rx_rate_limiter": null, "tx_rate_limiter": null } ], "vsock": null, "logger": null, "metrics": null, "mmds-config": { "network_interfaces": ["1"] }, "entropy": null } ================================================ FILE: tests/framework/vm_config_with_mmdsv2.json ================================================ { "boot-source": { "kernel_image_path": "vmlinux.bin", "boot_args": "console=ttyS0 reboot=k panic=1", "initrd_path": null }, "drives": [ { "drive_id": "rootfs", "partuuid": null, "is_root_device": true, "cache_type": "Unsafe", "is_read_only": false, "path_on_host": "bionic.rootfs.ext4", "io_engine": "Sync", "rate_limiter": null, "socket": null } ], "machine-config": { "vcpu_count": 2, "mem_size_mib": 1024, "smt": false, "track_dirty_pages": false }, "balloon": null, "network-interfaces": [ { "iface_id": "1", "host_dev_name": "tap0", "guest_mac": "06:00:c0:a8:00:02", "rx_rate_limiter": null, "tx_rate_limiter": null } ], "vsock": null, "logger": null, "metrics": null, "mmds-config": { "network_interfaces": ["1"], "ipv4_address": "169.254.169.250", "version": "V2", "imds_compat": true }, "entropy": null } ================================================ FILE: tests/framework/with_filelock.py ================================================ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Simple decorator so that only one process is running the decorated function at any one time. Caveat: two functions sharing the same name and using this decorator will use the same lock, which may be unintended, but safe. TBD disambiguate with the module name in that case. """ import functools import tempfile from pathlib import Path from filelock import FileLock def with_filelock(func): """Decorator so that only one process is running the decorated function at any one time. """ tmp_dir = Path(tempfile.gettempdir()) @functools.wraps(func) def wrapper(*args, **kwargs): lock_path = (tmp_dir / func.__name__).with_suffix(".lock") lock = FileLock(lock_path) with lock: return func(*args, **kwargs) return wrapper ================================================ FILE: tests/host_tools/__init__.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Are you happy pylint?""" ================================================ FILE: tests/host_tools/cargo_build.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Functionality for a shared binary build and release path for all tests.""" import os import platform from pathlib import Path from framework import defs, utils from framework.defs import DEFAULT_BINARY_DIR, LOCAL_BUILD_PATH from framework.with_filelock import with_filelock DEFAULT_TARGET = f"{platform.machine()}-unknown-linux-musl" DEFAULT_TARGET_DIR = f"{DEFAULT_TARGET}/release/" def nightly_toolchain() -> str: """Receives the name of the installed nightly toolchain""" return utils.check_output("rustup toolchain list | grep nightly").stdout.strip() def cargo( subcommand, cargo_args: str = "", subcommand_args: str = "", *, env: dict = None, cwd: str = None, nightly: bool = False, ): """Executes the specified cargo subcommand""" toolchain = f"+{nightly_toolchain()}" if nightly else "" env = env or {} env_string = " ".join(f'{key}="{str(value)}"' for key, value in env.items()) cmd = ( f"{env_string} cargo {toolchain} {subcommand} {cargo_args} -- {subcommand_args}" ) return utils.check_output(cmd, cwd=cwd) def get_rustflags(): """Get the relevant rustflags for building/unit testing.""" if platform.machine() == "aarch64": return "-C link-arg=-lgcc -C link-arg=-lfdt " return "" def cargo_test(path, extra_args=""): """Trigger unit tests depending on flags provided.""" env = { "CARGO_TARGET_DIR": os.path.join(path, "unit-tests"), "RUST_TEST_THREADS": 1, "RUST_BACKTRACE": 1, "RUSTFLAGS": get_rustflags(), } cargo("test", extra_args + " --all --no-fail-fast", env=env) def get_binary(name, *, binary_dir=DEFAULT_BINARY_DIR, example=None): """Get a binary. The binaries are built before starting a testrun.""" bin_path = binary_dir / name if example: bin_path = binary_dir / "examples" / example return bin_path def get_example(name, *args, package="firecracker", **kwargs): """Build an example binary""" return get_binary(package, *args, **kwargs, example=name) def run_seccompiler_bin( bpf_path, json_path=defs.SECCOMP_JSON_DIR, basic=False, split_output=False, binary_dir=DEFAULT_BINARY_DIR, ): """ Run seccompiler-bin. :param bpf_path: path to the output file :param json_path: optional path to json file """ # If no custom json filter, use the default one for the current target. if json_path == defs.SECCOMP_JSON_DIR: json_path = json_path / f"{DEFAULT_TARGET}.json" seccompiler_args = f"--input-file {json_path} --target-arch {platform.machine()} --output-file {bpf_path}" if basic: seccompiler_args += " --basic" if split_output: seccompiler_args += " --split-output" seccompiler = get_binary("seccompiler-bin", binary_dir=binary_dir) utils.check_output(f"{seccompiler} {seccompiler_args}") def run_snap_editor_rebase(base_snap, diff_snap, binary_dir=DEFAULT_BINARY_DIR): """ Run apply_diff_snap. :param base_snap: path to the base snapshot mem file :param diff_snap: path to diff snapshot mem file """ snap_ed = get_binary("snapshot-editor", binary_dir=binary_dir) utils.check_output( f"{snap_ed} edit-memory rebase --memory-path {base_snap} --diff-path {diff_snap}" ) def run_rebase_snap_bin(base_snap, diff_snap): """ Run apply_diff_snap. :param base_snap: path to the base snapshot mem file :param diff_snap: path to diff snapshot mem file """ rebase_snap = get_binary("rebase-snap") utils.check_output(f"{rebase_snap} --base-file {base_snap} --diff-file {diff_snap}") @with_filelock def gcc_compile(src_file, output_file, extra_flags="-static -O3"): """Build a source file with gcc.""" output_file = Path(output_file) if not output_file.exists(): compile_cmd = f"gcc {src_file} -o {output_file} {extra_flags}" utils.check_output(compile_cmd) def build_gdb(): """Builds Firecracker with GDB feature enabled. Returns the binary dir""" build_path = LOCAL_BUILD_PATH / "gdb" cargo( "build", f"--features gdb --target {DEFAULT_TARGET} --all", env={"CARGO_TARGET_DIR": build_path}, ) return build_path / DEFAULT_TARGET / "debug" ================================================ FILE: tests/host_tools/change_net_config_space.c ================================================ // Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // This is used by the `test_net_config_space.py` integration test, which writes // into the microVM configured network device config space a new MAC address. #include #include #include #include #include #include #include #include int show_usage() { printf("Usage: ./change_net_config_space.bin [dev_addr] [mac_addr]\n"); printf("Example:\n"); printf("> ./change_net_config_space.bin 0xd00001000 0x060504030201\n"); return 0; } int main(int argc, char *argv[]) { int fd, i, offset; uint8_t *map_base; volatile uint8_t *virt_addr; uint64_t mapped_size, page_size, page_addr, offset_in_page; uint64_t width = 6; uint64_t dev_addr = 0x00000000; uint64_t mac = 0; if (argc != 3) { return show_usage(); } dev_addr = strtoull(argv[1], NULL, 0); mac = strtoull(argv[2], NULL, 0); fd = open("/dev/mem", O_RDWR | O_SYNC); if (fd < 0) { perror("Failed to open '/dev/mem'."); return 1; } // Get the page size. mapped_size = page_size = getpagesize(); // Get the target address physical frame page offset. offset_in_page = (unsigned) dev_addr & (page_size - 1); page_addr = dev_addr & ~(page_size - 1); /* If the data length goes out of the current page, * double the needed map size. */ if (offset_in_page + width > page_size) { /* This access spans pages. * Must map two pages to make it possible. */ mapped_size *= 2; } // Map the `/dev/mem` to virtual memory. map_base = mmap(NULL, mapped_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, page_addr ); if (map_base == MAP_FAILED) { perror("Failed to mmap '/dev/mem'."); return 2; } // Write in the network device config space a new MAC. virt_addr = (volatile uint8_t*) (map_base + offset_in_page); *virt_addr = (uint8_t) (mac >> 40); printf("%02x", *virt_addr); for (i = 1; i <= 5; i++) { *(virt_addr + i) = (uint8_t) (mac >> (5 - i) * 8); printf(":%02x", *(virt_addr + i)); } // Deallocate resources. munmap(map_base, mapped_size); close(fd); return 0; } ================================================ FILE: tests/host_tools/cpu_load.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Utilities for measuring cpu utilisation for a process.""" import time from threading import Thread import psutil from framework import utils class CpuLoadExceededException(Exception): """A custom exception containing details on excessive cpu load.""" def __init__(self, cpu_load_samples, threshold): """Compose the error message containing the cpu load details.""" super().__init__( f"Cpu load samples {cpu_load_samples} exceeded maximum" f"threshold {threshold}.\n" ) class CpuLoadMonitor(Thread): """Class to represent a cpu load monitor for a thread.""" CPU_LOAD_SAMPLES_TIMEOUT_S = 1 def __init__(self, process_pid, thread_pid, threshold): """Set up monitor attributes.""" Thread.__init__(self) self._process_pid = process_pid self._thread_pid = thread_pid self._cpu_load_samples = [] self._threshold = threshold self._should_stop = False @property def process_pid(self): """Get the process pid.""" return self._process_pid @property def thread_pid(self): """Get the thread pid.""" return self._thread_pid @property def threshold(self): """Get the cpu load threshold.""" return self._threshold @property def cpu_load_samples(self): """Get the cpu load samples.""" return self._cpu_load_samples def signal_stop(self): """Signal that the thread should stop.""" self._should_stop = True def run(self): """Thread for monitoring cpu load of some pid. It is up to the caller to check the queue. """ process = psutil.Process(self._process_pid) while not self._should_stop: utilization = utils.get_cpu_utilization(process) try: fc_thread_util = utilization["firecracker"] if fc_thread_util > self._threshold: self._cpu_load_samples.append(fc_thread_util) except KeyError: pass # no firecracker process time.sleep(0.05) # 50 milliseconds granularity. def check_samples(self): """Check that there are no samples above the threshold.""" if len(self.cpu_load_samples) > 0: raise CpuLoadExceededException(self._cpu_load_samples, self._threshold) def __enter__(self): """Functions to use this CPU Load class as a Context Manager >>> clm = CpuLoadMonitor(1000, 1000, 45) >>> with clm: >>> # do stuff """ self.start() def __exit__(self, _type, _value, _traceback): """Exit context""" self.check_samples() self.signal_stop() self.join() ================================================ FILE: tests/host_tools/drive.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Utilities for creating filesystems on the host.""" import os import tempfile from framework import utils class FilesystemFile: """Facility for creating and working with filesystems.""" KNOWN_FILEFS_FORMATS = {"ext4"} path = None def __init__(self, path: str = None, size: int = 256, fs_format: str = "ext4"): """Create a new file system in a file. Raises if the file system format is not supported, if the file already exists, or if it ends in '/'. """ # If no path is supplied, use a temporary file. # This is useful to force placing the file on disk, not in memory, # because qemu vhost-user-blk backend always uses O_DIRECT, # but O_DIRECT is not supported by tmpfs. if path is None: _, path = tempfile.mkstemp(suffix=f".{fs_format}", dir="/tmp") if fs_format not in self.KNOWN_FILEFS_FORMATS: raise ValueError("Format not in: + " + str(self.KNOWN_FILEFS_FORMATS)) # Here we append the format as a path = os.path.join(path + "." + fs_format) if os.path.isfile(path): raise FileExistsError("File already exists: " + path) utils.check_output( "dd status=none if=/dev/zero" " of=" + path + " bs=1M count=" + str(size) ) utils.check_output("mkfs.ext4 -qF " + path) self.path = path def __repr__(self): return f"" def resize(self, new_size): """Resize the filesystem.""" utils.check_output("truncate --size " + str(new_size) + "M " + self.path) utils.check_output("resize2fs " + self.path) def size(self): """Return the size of the filesystem.""" return os.stat(self.path).st_size def __del__(self): """Destructor cleaning up filesystem from where it was created.""" if self.path: try: os.remove(self.path) except OSError: pass ================================================ FILE: tests/host_tools/fcmetrics.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Provides: - Mechanism to collect and export Firecracker metrics every 60seconds to CloudWatch - Utility functions to validate Firecracker metrics format and to validate Firecracker device metrics. """ import datetime import json import logging import math import platform import time from threading import Thread import jsonschema import pytest from framework import utils from framework.properties import global_props from framework.utils_repo import git_repo_files from host_tools.metrics import get_metrics_logger def create_metrics_schema_objects(metrics): """ Helper functions to create jsonschema objects for Firecracker metrics. """ metrics_schema = { "type": "object", "required": [], "properties": {}, "additionalProperties": False, } if isinstance(metrics, dict): special_metrics = "utc_timestamp_ms" if special_metrics in metrics.keys(): metrics.pop(special_metrics) metrics_schema["properties"][special_metrics] = {"type": "number"} metrics_schema["required"].append(special_metrics) for sub_metrics_name, sub_metrics_fields in metrics.items(): obj = create_metrics_schema_objects(sub_metrics_fields) metrics_schema["properties"][sub_metrics_name] = obj metrics_schema["required"].append(sub_metrics_name) return metrics_schema if isinstance(metrics, list): for metrics_field in metrics: if isinstance(metrics_field, str): metrics_schema["properties"][metrics_field] = {"type": "number"} metrics_schema["required"].append(metrics_field) elif isinstance(metrics_field, dict): for sub_metrics_name, sub_metrics_fields in metrics_field.items(): obj = create_metrics_schema_objects(sub_metrics_fields) metrics_schema["properties"][sub_metrics_name] = obj metrics_schema["required"].append(sub_metrics_name) return metrics_schema raise Exception("Invalid schema") def validate_fc_metrics(metrics): """ This functions makes sure that all components of firecracker_metrics struct are present. """ latency_agg_metrics_fields = [ "min_us", "max_us", "sum_us", ] block_metrics = [ "activate_fails", "cfg_fails", "no_avail_buffer", "event_fails", "execute_fails", "invalid_reqs_count", "flush_count", "queue_event_count", "rate_limiter_event_count", "update_count", "update_fails", "read_bytes", "write_bytes", "read_count", "write_count", "rate_limiter_throttled_events", "io_engine_throttled_events", "remaining_reqs_count", {"read_agg": latency_agg_metrics_fields}, {"write_agg": latency_agg_metrics_fields}, ] net_metrics = [ "activate_fails", "cfg_fails", "mac_address_updates", "no_rx_avail_buffer", "no_tx_avail_buffer", "event_fails", "rx_queue_event_count", "rx_event_rate_limiter_count", "rx_rate_limiter_throttled", "rx_tap_event_count", "rx_bytes_count", "rx_packets_count", "rx_fails", "rx_count", "tap_read_fails", "tap_write_fails", "tx_bytes_count", "tx_malformed_frames", "tx_fails", "tx_count", "tx_packets_count", "tx_queue_event_count", "tx_rate_limiter_event_count", "tx_rate_limiter_throttled", "tx_spoofed_mac_count", "tx_remaining_reqs_count", {"tap_write_agg": latency_agg_metrics_fields}, ] firecracker_metrics = { "utc_timestamp_ms": "", "api_server": [ "process_startup_time_us", "process_startup_time_cpu_us", ], "balloon": [ "activate_fails", "inflate_count", "stats_updates_count", "stats_update_fails", "deflate_count", "event_fails", "free_page_report_count", "free_page_report_freed", "free_page_report_fails", "free_page_hint_count", "free_page_hint_freed", "free_page_hint_fails", ], "block": block_metrics, "deprecated_api": [ "deprecated_http_api_calls", ], "get_api_requests": [ "instance_info_count", "machine_cfg_count", "mmds_count", "vmm_version_count", "hotplug_memory_count", ], "i8042": [ "error_count", "missed_read_count", "missed_write_count", "read_count", "reset_count", "write_count", ], "latencies_us": [ "full_create_snapshot", "diff_create_snapshot", "load_snapshot", "pause_vm", "resume_vm", "vmm_full_create_snapshot", "vmm_diff_create_snapshot", "vmm_load_snapshot", "vmm_pause_vm", "vmm_resume_vm", ], "logger": [ "missed_metrics_count", "metrics_fails", "missed_log_count", ], "mmds": [ "rx_accepted", "rx_accepted_err", "rx_accepted_unusual", "rx_bad_eth", "rx_invalid_token", "rx_no_token", "rx_count", "tx_bytes", "tx_count", "tx_errors", "tx_frames", "connections_created", "connections_destroyed", ], "net": net_metrics, "patch_api_requests": [ "drive_count", "drive_fails", "network_count", "network_fails", "machine_cfg_count", "machine_cfg_fails", "mmds_count", "mmds_fails", "hotplug_memory_count", "hotplug_memory_fails", ], "put_api_requests": [ "actions_count", "actions_fails", "boot_source_count", "boot_source_fails", "drive_count", "drive_fails", "logger_count", "logger_fails", "machine_cfg_count", "machine_cfg_fails", "cpu_cfg_count", "cpu_cfg_fails", "metrics_count", "metrics_fails", "network_count", "network_fails", "mmds_count", "mmds_fails", "vsock_count", "vsock_fails", "pmem_count", "pmem_fails", "serial_count", "serial_fails", "hotplug_memory_count", "hotplug_memory_fails", ], "seccomp": [ "num_faults", ], "vcpu": [ "exit_io_in", "exit_io_out", "exit_mmio_read", "exit_mmio_write", "failures", "kvmclock_ctrl_fails", {"exit_io_in_agg": latency_agg_metrics_fields}, {"exit_io_out_agg": latency_agg_metrics_fields}, {"exit_mmio_read_agg": latency_agg_metrics_fields}, {"exit_mmio_write_agg": latency_agg_metrics_fields}, ], "vmm": [ "panic_count", ], "uart": [ "error_count", "flush_count", "missed_read_count", "missed_write_count", "read_count", "write_count", ], "signals": [ "sigbus", "sigsegv", "sigxfsz", "sigxcpu", "sigpipe", "sighup", "sigill", ], "vsock": [ "activate_fails", "cfg_fails", "rx_queue_event_fails", "tx_queue_event_fails", "ev_queue_event_fails", "muxer_event_fails", "conn_event_fails", "rx_queue_event_count", "tx_queue_event_count", "rx_bytes_count", "tx_bytes_count", "rx_packets_count", "tx_packets_count", "conns_added", "conns_killed", "conns_removed", "killq_resync", "tx_flush_fails", "tx_write_fails", "rx_read_fails", ], "entropy": [ "activate_fails", "entropy_event_fails", "entropy_event_count", "entropy_bytes", "host_rng_fails", "entropy_rate_limiter_throttled", "rate_limiter_event_count", ], "interrupts": ["triggers", "config_updates"], "pmem": [ "activate_fails", "cfg_fails", "event_fails", "queue_event_count", ], "memory_hotplug": [ "activate_fails", "queue_event_fails", "queue_event_count", "plug_count", "plug_bytes", "plug_fails", {"plug_agg": latency_agg_metrics_fields}, "unplug_count", "unplug_bytes", "unplug_fails", "unplug_discard_fails", {"unplug_agg": latency_agg_metrics_fields}, "state_count", "state_fails", {"state_agg": latency_agg_metrics_fields}, "unplug_all_count", "unplug_all_fails", {"unplug_all_agg": latency_agg_metrics_fields}, ], } # validate timestamp before jsonschema validation which some more time utc_time = datetime.datetime.now(datetime.timezone.utc) utc_timestamp_ms = math.floor(utc_time.timestamp() * 1000) # Assert that the absolute difference is less than 1 second, to check that # the reported utc_timestamp_ms is actually a UTC timestamp from the Unix # Epoch.Regression test for: # https://github.com/firecracker-microvm/firecracker/issues/2639 assert abs(utc_timestamp_ms - metrics["utc_timestamp_ms"]) < 1000 if platform.machine() == "aarch64": firecracker_metrics["rtc"] = [ "error_count", "missed_read_count", "missed_write_count", ] # add vhost-user metrics to the schema if applicable vhost_user_devices = [] for metrics_name in metrics.keys(): if metrics_name.startswith("vhost_user_"): firecracker_metrics[metrics_name] = [ "activate_fails", "cfg_fails", "init_time_us", "activate_time_us", "config_change_time_us", ] vhost_user_devices.append(metrics_name) if metrics_name.startswith("block_"): firecracker_metrics[metrics_name] = block_metrics if metrics_name.startswith("net_"): firecracker_metrics[metrics_name] = net_metrics firecracker_metrics_schema = create_metrics_schema_objects(firecracker_metrics) jsonschema.validate(instance=metrics, schema=firecracker_metrics_schema) def validate_missing_metrics(metrics): # remove some metrics and confirm that fields and not just top level metrics # are validated. temp_pop_metrics = metrics["api_server"].pop("process_startup_time_us") with pytest.raises( jsonschema.ValidationError, match="'process_startup_time_us' is a required property", ): jsonschema.validate(instance=metrics, schema=firecracker_metrics_schema) metrics["api_server"]["process_startup_time_us"] = temp_pop_metrics if platform.machine() == "aarch64": temp_pop_metrics = metrics["rtc"].pop("error_count") with pytest.raises( jsonschema.ValidationError, match="'error_count' is a required property" ): jsonschema.validate(instance=metrics, schema=firecracker_metrics_schema) metrics["rtc"]["error_count"] = temp_pop_metrics for vhost_user_dev in vhost_user_devices: temp_pop_metrics = metrics[vhost_user_dev].pop("activate_time_us") with pytest.raises( jsonschema.ValidationError, match="'activate_time_us' is a required property", ): jsonschema.validate(instance=metrics, schema=firecracker_metrics_schema) metrics[vhost_user_dev]["activate_time_us"] = temp_pop_metrics validate_missing_metrics(metrics) class FcDeviceMetrics: """ Provides functions to validate breaking change and aggregation of metrics """ def __init__(self, name, num_dev, aggr_supported=True): self.dev_name = name self.num_dev = num_dev self.aggr_supported = aggr_supported def validate(self, microvm): """ validate breaking change of device metrics """ fc_metrics = microvm.flush_metrics() # make sure all items of firecracker_metrics are as expected validate_fc_metrics(fc_metrics) # make sure "{self.name}" is aggregate of "{self.name}_*" # and that there are only {num_dev} entries of "{self.name}_*" self.validate_per_device_metrics(fc_metrics) def validate_per_device_metrics(self, fc_metrics): """ validate aggregation of device metrics """ metrics_calculated = {} actual_num_devices = 0 for component_metric_names, component_metric_values in fc_metrics.items(): if ( f"{self.dev_name}_" in component_metric_names and component_metric_names.startswith(self.dev_name) ): actual_num_devices += 1 for metrics_name, metric_value in component_metric_values.items(): if isinstance(metric_value, int): if metrics_name not in metrics_calculated: metrics_calculated[metrics_name] = 0 metrics_calculated[metrics_name] += metric_value elif isinstance(metric_value, dict): # this is for LatencyAggregateMetrics metrics type if metrics_name not in metrics_calculated: metrics_calculated[metrics_name] = { "min_us": 0, "max_us": 0, "sum_us": 0, } metrics_calculated[metrics_name]["sum_us"] += metric_value[ "sum_us" ] assert self.num_dev == actual_num_devices if self.aggr_supported: metrics_aggregate = fc_metrics[self.dev_name] assert metrics_aggregate == metrics_calculated def get_emf_unit_for_fc_metrics(full_key): """Returns CloudWatch Unit for requested FC metrics key""" # We need to check each key because unit can be in group or key # e.g. latencies_us.diff_create_snapshot and # api_server.process_startup_time_us for key in full_key.lower().split("."): if key.endswith("_bytes") or key.endswith("_bytes_count"): return "Bytes" if key.endswith("_ms"): return "Milliseconds" if key.endswith("_us"): return "Microseconds" return "Count" def flush_fc_metrics_to_cw(fc_metrics, metrics): """ Flush Firecracker metrics to CloudWatch. Use an existing metrics logger with existing dimensions so that it is easier to correlate the metrics with the test calling it. Add a prefix "fc_metrics." to differentiate these metrics, this also helps to avoid using this metrics in A/B tests. NOTE: There are metrics with keywords "fail", "err", "num_faults", "panic" in their name and represent some kind of failure in Firecracker. We assert that all these are zero, to catch potentially silent failure modes. This means the FcMonitor cannot be used in negative tests that might cause such metrics to be emitted. """ # Pre-order tree traversal to convert a tree into its list of paths with dot separate segments def flatten_dict(node, prefix: str): if not isinstance(node, dict): return {prefix: node} result = {} for child_metric_name, child_metrics in node.items(): result.update(flatten_dict(child_metrics, f"{prefix}.{child_metric_name}")) return result flattened_metrics = flatten_dict(fc_metrics, "fc_metrics") for key, value in flattened_metrics.items(): if ".utc_timestamp_ms." in key: continue metrics.put_metric(key, value, get_emf_unit_for_fc_metrics(key)) metrics.flush() ignored_failure_metrics = [ # We trigger these spuriously in vsock tests due to iperf-vsock not implementing connection shutdown # See also https://github.com/stefano-garzarella/iperf-vsock/issues/4 "fc_metrics.vsock.rx_read_fails", "fc_metrics.vsock.tx_write_fails", "fc_metrics.vsock.tx_flush_fails", ] failure_metrics = { key: value for key, value in flattened_metrics.items() if "err" in key.split(".")[-1] or "fail" in key.split(".")[-1] or "panic" in key.split(".")[-1] or "num_faults" in key.split(".")[-1] if value if key not in ignored_failure_metrics } assert not failure_metrics, json.dumps(failure_metrics, indent=1) class FCMetricsMonitor(Thread): """ read Firecracker metrics from the microvm every `timer` secs and uploads the metrics to CW. `timer` is in seconds and is default to 60sec to match default time Firecrackers takes to dump metrics. We do this as a daemon thread every `timer` sec, instead of collecting all metrics together in the end, to retain timestamp of the metrics. """ def __init__(self, vm, timer=60): Thread.__init__(self, daemon=True) self.vm = vm vm.monitors.append(self) self.timer = timer self.metrics_index = 0 self.running = False self.metrics_logger = get_metrics_logger() self.metrics_logger.set_dimensions( { "instance": global_props.instance, "host_kernel": "linux-" + global_props.host_linux_version, "guest_kernel": vm.kernel_file.stem[2:], } ) self.start() def _flush_metrics(self): """ Since vm.flush_metrics provides only the latest metrics, we call vm.get_all_metrics() instead to be able to collect and upload all metrics emitted by the microvm. This utility function is created to keep common code in one place and is called every `self.timer` seconds once the daemon starts and then once when the daemon stops. """ all_metrics = self.vm.get_all_metrics() for metrics in all_metrics[self.metrics_index :]: flush_fc_metrics_to_cw(metrics, self.metrics_logger) self.metrics_index += 1 def stop(self): """ Stop the daemon gracefully. Since we depend on the vm to provide the metrics, this method should be called just before killing the vm. We collect final metrics here in stop instead of letting it be collected from the "run" method because, "run" could be in sleep when stop is called and once it wakes out of sleep the "vm" might not be avaiable to provide the metrics. """ if self.is_alive(): self.running = False # wait for the running thread to finish # this should also avoid any race condition leading to # uploading the same metrics twice self.join() try: self.vm.api.actions.put(action_type="FlushMetrics") except: # pylint: disable=bare-except # if this doesn't work, ignore the failure. This function is called during teardown, # and if it fails there, then the resulting exception hides the actual test failure. logging.error("Failed to flush Firecracker metrics!") finally: self._flush_metrics() def run(self): self.running = True while self.running is True: self._flush_metrics() # instead of a time.sleep(60), sleep in intervals of 1 sec # so that we can terminate the thread sooner. # this way we can also make stop() wait for 1 sec before # it collects and uploads metrics for _x in range(self.timer): time.sleep(1) if self.running is False: break def find_metrics_files(): """Gets a list of all Firecracker sources files ending with 'metrics.rs'""" return list(git_repo_files(root="..", glob="*metrics.rs")) def extract_fields(file_path): """Gets a list of all metrics defined in the given file, in the form tuples (name, type)""" fields = utils.run_cmd( rf'grep -Po "(?<=pub )(\w+): (Shared(?:Inc|Store)Metric|LatencyAggregateMetrics)" {file_path}' ).stdout.strip() return [field.split(": ", maxsplit=1) for field in fields.splitlines()] def is_file_production(filepath): """Returns True iff accesses to metric fields in the given file should cause the metric be considered 'used in production code'. Excludes, for example, files in which the metrics are defined, where accesses happen as part of copy constructors, etc.""" path = filepath.lower() return ( "/test/" in path or "/tests/" in path or path.endswith("_test.rs") or "test_" in path or "tests.rs" in path or ("metrics.rs" in path and "vmm" in path) ) KNOWN_FALSE_POSITIVES = [ "min_us", "max_us", "sum_us", "process_startup_time_us", "process_startup_time_cpu_us", ] def is_metric_used(field, field_type): """Returns True iff the given metric has a production use in the firecracker codebase""" if field in KNOWN_FALSE_POSITIVES: return True if field_type in ("SharedIncMetric", "SharedStoreMetric"): pattern = rf"{field}\s*\.\s*store|{field}\s*\.\s*inc|{field}\s*\.\s*add|{field}\s*\.\s*fetch|METRICS.*{field}" elif field_type == "LatencyAggregateMetrics": pattern = rf"{field}\s*\.\s*record_latency_metrics" else: raise RuntimeError(f"Unknown metric type: {field_type}") result = utils.run_cmd(f'grep -RPzo "{pattern}" ../src') for line in result.stdout.strip().split("\0"): if not line: continue if not is_file_production(line.split(":", maxsplit=1)[0]): return True return False ================================================ FILE: tests/host_tools/jailer_time.c ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // This is used by `performance/test_jailer.py` #include #include int main(int argc, char** argv) { // print current time in us struct timespec now = {0}; clock_gettime(CLOCK_MONOTONIC, &now); unsigned long long current_ns = (unsigned long long)now.tv_sec * 1000000000 + (unsigned long long)now.tv_nsec; unsigned long long current_us = current_ns / 1000; printf("%llu\n", current_us); // print the --start-time-us value printf("%s", argv[4]); } ================================================ FILE: tests/host_tools/memory.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Utilities for measuring memory utilization for a process.""" import time from threading import Thread import psutil from framework.properties import global_props class MemoryUsageExceededError(Exception): """A custom exception containing details on excessive memory usage.""" def __init__(self, usage, threshold, *args): """Compose the error message containing the memory consumption.""" super().__init__( f"Memory usage ({usage / (1 << 20):.2f} MiB) exceeded maximum threshold " f"({threshold / (1 << 20)} MiB)", *args, ) class MemoryMonitor(Thread): """Class to represent an RSS memory monitor for a Firecracker process. The guest's memory region is skipped, as the main interest is the VMM memory usage. """ # If guest memory is >3GiB, it is split in a 2nd region # Gap starts at 3GiBs and is 1GiB long X86_32BIT_MEMORY_GAP_START = 3 << 30 X86_32BIT_MEMORY_GAP_SIZE = 1 << 30 # If guest memory is >255GiB, it is split in a 3rd region # Gap starts at 256 GiB and is 256GiB long X86_64BIT_MEMORY_GAP_START = 256 << 30 # On ARM64 we just have a single gap, but memory starts at an offset # Gap starts at 256 GiB and is GiB long # Memory starts at 2GiB ARM64_64BIT_MEMORY_GAP_START = 256 << 30 ARM64_MEMORY_START = 2 << 30 def __init__( self, vm, threshold_booted=5 << 20, threshold_snapshot=6 << 20, threshold_restored=5 << 20, period_s=0.01, ): """Initialize monitor attributes.""" Thread.__init__(self) self._vm = vm self.threshold_booted = threshold_booted self.threshold_snapshot = threshold_snapshot self.threshold_restored = threshold_restored # Start with booted threshold by default self.threshold = threshold_booted self._exceeded = None self._period_s = period_s self._should_stop = False self._current_rss = 0 self.daemon = True def signal_stop(self): """Signal that the thread should stop.""" self._should_stop = True def set_threshold_for_restored_vm(self): """Set threshold for a restored VM.""" self.threshold = self.threshold_restored def set_threshold_for_snapshot(self): """Set threshold for snapshot creation.""" self.threshold = self.threshold_snapshot def stop(self): """Stop the thread""" if self.is_alive(): self.signal_stop() self.join(timeout=1) def run(self): """Thread for monitoring the RSS memory usage of a Firecracker process. If overhead memory exceeds the maximum value, it is saved and memory monitoring ceases. It is up to the caller to check. """ guest_mem_bytes = self._vm.mem_size_bytes try: ps = psutil.Process(self._vm.firecracker_pid) except (psutil.NoSuchProcess, FileNotFoundError): return while not self._should_stop: try: mmaps = ps.memory_maps(grouped=False) except psutil.NoSuchProcess: return mem_total = 0 for mmap in mmaps: if self.is_guest_mem(mmap.size, guest_mem_bytes): continue mem_total += mmap.rss self._current_rss = mem_total if mem_total > self.threshold: self._exceeded = ps return time.sleep(self._period_s) def is_guest_mem_x86(self, size, guest_mem_bytes): """ Checks if a region is a guest memory region based on x86_64 physical memory layout """ # it could be bigger if hotplugging is enabled # if it's bigger, it's likely not from FC because we don't have big allocations return size >= guest_mem_bytes or size in ( # memory fits before the first gap guest_mem_bytes, # guest memory spans at least two regions & memory fits before the second gap self.X86_32BIT_MEMORY_GAP_START, # guest memory spans exactly two regions guest_mem_bytes - self.X86_32BIT_MEMORY_GAP_START, # guest memory fills the space between the two gaps self.X86_64BIT_MEMORY_GAP_START - self.X86_32BIT_MEMORY_GAP_START - self.X86_32BIT_MEMORY_GAP_SIZE, # guest memory spans 3 regions, this is what remains past the second gap guest_mem_bytes - self.X86_64BIT_MEMORY_GAP_START + self.X86_32BIT_MEMORY_GAP_SIZE, ) def is_guest_mem_arch64(self, size, guest_mem_bytes): """ Checks if a region is a guest memory region based on ARM64 physical memory layout """ # it could be bigger if hotplugging is enabled # if it's bigger, it's likely not from FC because we don't have big allocations return size >= guest_mem_bytes or size in ( # guest memory fits before the gap guest_mem_bytes, # guest memory fills the space before the gap self.ARM64_64BIT_MEMORY_GAP_START - self.ARM64_MEMORY_START, # guest memory spans 2 regions, this is what remains past the gap guest_mem_bytes - self.ARM64_64BIT_MEMORY_GAP_START + self.ARM64_MEMORY_START, ) def is_guest_mem(self, size, guest_mem_bytes): """ If the address is recognised as a guest memory region, return True, otherwise return False. """ if global_props.cpu_architecture == "x86_64": return self.is_guest_mem_x86(size, guest_mem_bytes) return self.is_guest_mem_arch64(size, guest_mem_bytes) def check_samples(self): """Check that there are no samples over the threshold.""" if self._exceeded is not None: raise MemoryUsageExceededError( self._current_rss, self.threshold, self._exceeded ) @property def current_rss(self): """Obtain current RSS for Firecracker's overhead.""" # This is to ensure that the monitor has updated itself. time.sleep(2 * self._period_s) return self._current_rss def __enter__(self): """To use it as a Context Manager >>> mm = MemoryMonitor(vm, threshold=10*1024) >>> with mm: >>> # do stuff """ self.start() def __exit__(self, _type, _value, _traceback): """Exit context""" if self.is_alive(): self.signal_stop() self.join(timeout=1) self.check_samples() ================================================ FILE: tests/host_tools/metrics.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Fixture to send metrics to AWS CloudWatch and validate Firecracker metrics We use the aws-embedded-metrics library although it has some sharp corners, namely: 1. It uses asyncio, which complicates the flushing a bit. 2. It has an stateful API. Setting dimensions will override previous ones. Example: set_dimensions("instance") put_metric("duration", 1) set_dimensions("cpu") put_metric("duration", 1) This will end with 2 identical metrics with dimension "cpu" (the last one). The correct way of doing it is: set_dimensions("instance") put_metric("duration", 1) flush() set_dimensions("cpu") put_metric("duration", 1) This is not very intuitive, but we assume all metrics within a test will have the same dimensions. # Debugging You can override the destination of the metrics to stdout with: AWS_EMF_NAMESPACE=$USER-test AWS_EMF_ENVIRONMENT=local ./tools/devtest test # References: - https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch_Embedded_Metric_Format_Specification.html - https://github.com/awslabs/aws-embedded-metrics-python """ import asyncio import json import os from pathlib import Path from aws_embedded_metrics.logger.metrics_logger_factory import create_metrics_logger class MetricsWrapper: """A convenient metrics logger""" def __init__(self, logger): self.metrics = {} self.dimensions = {} self.logger = logger def set_dimensions(self, *dimensions, **kwargs): """Set dimensions""" if self.logger: self.logger.set_dimensions(*dimensions, **kwargs) self.dimensions = {} for dimension_dict in dimensions: for k, v in dimension_dict.items(): self.dimensions[k] = v def put_metric(self, name, data, unit): """Put a datapoint with given dimensions""" if name not in self.metrics: self.metrics[name] = {"unit": unit, "values": []} self.metrics[name]["values"].append(data) if self.logger: self.logger.put_metric(name, data, unit) def set_property(self, *args, **kwargs): """Set a property""" if self.logger: self.logger.set_property(*args, **kwargs) def flush(self): """Flush any remaining metrics""" if self.logger: asyncio.run(self.logger.flush()) def store_data(self, dir_path): """Store data into a file""" metrics_path = Path(dir_path / "metrics.json") with open(metrics_path, "w", encoding="utf-8") as f: json.dump( { "metrics": self.metrics, "dimensions": self.dimensions, }, f, ) def get_metrics_logger(): """Get a new metrics logger object""" # if no metrics namespace, don't output metrics if "AWS_EMF_NAMESPACE" in os.environ: logger = create_metrics_logger() logger.reset_dimensions(False) else: logger = None return MetricsWrapper(logger) ================================================ FILE: tests/host_tools/network.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Utilities for test host microVM network setup.""" import ipaddress import os import random import re import signal import string from dataclasses import dataclass, field from pathlib import Path from tenacity import retry, stop_after_attempt, wait_fixed from framework import utils from framework.utils import Timeout class SSHConnection: """ SSHConnection encapsulates functionality for microVM SSH interaction. This class should be instantiated as part of the ssh fixture with the hostname obtained from the MAC address, the username for logging into the image and the path of the ssh key. Establishes a ControlMaster upon construction, which is then re-used for all subsequent SSH interactions. """ def __init__( self, netns, ssh_key: Path, control_path: Path, host, user, *, on_error=None ): """Instantiate a SSH client and connect to a microVM.""" self.netns = netns self.ssh_key = ssh_key # check that the key exists and the permissions are 0o400 # This saves a lot of debugging time. assert ssh_key.exists() ssh_key.chmod(0o400) assert (ssh_key.stat().st_mode & 0o777) == 0o400 self.host = host self.user = user self._control_path = control_path self._on_error = None self.options = [ "-o", f"ControlPath={self._control_path}", ] # _init_connection loops until it can connect to the guest # dumping debug state on every iteration is not useful or wanted, so # only dump it once if _all_ iterations fail. try: self._init_connection() except Exception as exc: if on_error: on_error(exc) raise self._on_error = on_error @property def user_host(self): """remote address for in SSH format @""" return f"{self.user}@{self.host}" def remote_path(self, path): """Convert a path to remote""" return f"{self.user_host}:{path}" def _scp(self, path1, path2, options): """Copy files to/from the VM using scp.""" self._exec(["scp", *options, path1, path2], check=True) def scp_put(self, local_path, remote_path, recursive=False): """Copy files to the VM using scp.""" opts = self.options.copy() if recursive: opts.append("-r") self._scp(local_path, self.remote_path(remote_path), opts) def scp_get(self, remote_path, local_path, recursive=False): """Copy files from the VM using scp.""" opts = self.options.copy() if recursive: opts.append("-r") self._scp(self.remote_path(remote_path), local_path, opts) @retry( wait=wait_fixed(1), stop=stop_after_attempt(20), reraise=True, ) def _init_connection(self): """Initialize the persistent background connection which will be used to execute all commands sent via this `SSHConnection` object. Since we're connecting to a microVM we just started, we'll probably have to wait for it to boot up and start the SSH server. We'll keep trying to execute a remote command that can't fail (`/bin/true`), until we get a successful (0) exit code. """ assert not self._control_path.exists() # Sadly, we cannot get debug output from this command (e.g. `-vvv`), # because passing -vvv causes the daemonized ssh to hold on to stderr, # and inside utils.run_cmd we're using subprocess.communicate, which # only returns once stderr gets closed (which would thus result in an # indefinite hang). establish_cmd = [ "ssh", # Only need to pass the ssh key here, as all multiplexed # connections won't have to re-authenticate "-i", str(self.ssh_key), "-o", "StrictHostKeyChecking=no", "-o", "ConnectTimeout=2", # Set up a persistent background connection "-o", "ControlMaster=auto", "-o", "ControlPersist=yes", *self.options, self.user_host, "true", ] try: # don't set a low timeout here, because otherwise we might get into a race condition # where ssh already forked off the persisted connection daemon, but gets killed here # before exiting itself. In that case, self._control_path will exist, and the retry # will hit the assert at the start of this function. self._exec(establish_cmd, check=True) except Exception: # if the control socket is present, then the daemon is running, and we should stop it # before retrying again if self._control_path.exists(): self.close() raise def _check_liveness(self, strict=True) -> int | None: """Checks whether the ControlPersist connection is still alive It will return the pid of the ControlMaster if it is still running, otherwise None """ check_cmd = ["ssh", "-O", "check", *self.options, self.user_host] try: _, _, stderr = self._exec(check_cmd, check=True) except ChildProcessError: if strict: raise return None pid_match = re.match(r"Master running \(pid=(\d+)\)", stderr) assert pid_match, f"SSH ControlMaster connection not alive anymore: {stderr}" return int(pid_match.group(1)) def close(self, strict=True): """Closes the ControlPersist connection""" master_pid = self._check_liveness(strict) if master_pid is None: return stop_cmd = ["ssh", "-O", "stop", *self.options, self.user_host] _, _, stderr = self._exec(stop_cmd, check=True) assert "Stop listening request sent" in stderr try: with Timeout(5): utils.wait_process_termination(master_pid) except TimeoutError: # for some reason it won't exit, let's force it... # if this also fails, when during teardown we'll get an error about # "found a process with supposedly dead Firecracker's jailer ID" os.kill(master_pid, signal.SIGKILL) def run(self, cmd_string, timeout=100, *, check=False, debug=False): """ Execute the command passed as a string in the ssh context. If `debug` is set, pass `-vvv` to `ssh`. Note that this will clobber stderr. """ self._check_liveness(True) command = ["ssh", *self.options, self.user_host, cmd_string] if debug: command.insert(1, "-vvv") return self._exec(command, timeout, check=check) def check_output(self, cmd_string, timeout=100, *, debug=False): """Same as `run`, but raises an exception on non-zero return code of remote command""" return self.run(cmd_string, timeout, check=True, debug=debug) def _exec(self, cmd, timeout=100, check=False): """Private function that handles the ssh client invocation.""" if self.netns is not None: cmd = ["ip", "netns", "exec", self.netns] + cmd try: return utils.run_cmd(cmd, check=check, timeout=timeout) except Exception as exc: if self._on_error: self._on_error(exc) raise def mac_from_ip(ip_address): """Create a MAC address based on the provided IP. Algorithm: - the first 2 bytes are fixed to 06:00, which is in an LAA range - https://en.wikipedia.org/wiki/MAC_address#Ranges_of_group_and_locally_administered_addresses - the next 4 bytes are the IP address Example of function call: mac_from_ip("192.168.241.2") -> 06:00:C0:A8:F1:02 C0 = 192, A8 = 168, F1 = 241 and 02 = 2 :param ip_address: IP address as string :return: MAC address from IP """ mac_as_list = ["06", "00"] mac_as_list.extend(f"{int(octet):02x}" for octet in ip_address.split(".")) return ":".join(mac_as_list) def get_guest_net_if_name(ssh_connection, guest_ip): """Get network interface name based on its IPv4 address.""" cmd = "ip a s | grep '{}' | tr -s ' ' | cut -d' ' -f6".format(guest_ip) _, guest_if_name, _ = ssh_connection.run(cmd) if_name = guest_if_name.strip() return if_name if if_name != "" else None def random_str(k): """Create a random string of length `k`.""" symbols = string.ascii_lowercase + string.digits return "".join(random.choices(symbols, k=k)) class Tap: """Functionality for creating a tap and cleaning up after it.""" def __init__(self, name, netns, ip=None): """Set up the name and network namespace for this tap interface. It also creates a new tap device, brings it up and moves the interface to the specified namespace. """ self._name = name self._netns = netns # Create the tap device tap0 directly in the network namespace to avoid # conflicts self.netns.check_output(f"ip tuntap add mode tap name {name}") if ip: self.netns.check_output(f"ifconfig {name} {ip} up") @property def name(self): """Return the name of this tap interface.""" return self._name @property def netns(self): """Return the network namespace of this tap.""" return self._netns def set_tx_queue_len(self, tx_queue_len): """Set the length of the tap's TX queue.""" self.netns.check_output(f"ip link set {self.name} txqueuelen {tx_queue_len}") def __repr__(self): return f"" @dataclass(frozen=True, repr=True) class NetIfaceConfig: """Defines a network interface configuration.""" host_ip: str guest_ip: str tap_name: str dev_name: str netmask_len: int @property def guest_mac(self): """Return the guest MAC address.""" return mac_from_ip(self.guest_ip) @property def network(self): """Return the guest network""" return ipaddress.IPv4Interface(f"{self.host_ip}/{self.netmask_len}").network @staticmethod def with_id(i, netmask_len=30): """Define network iface with id `i`.""" return NetIfaceConfig( host_ip=f"192.168.{i}.1", guest_ip=f"192.168.{i}.2", tap_name=f"tap{i}", dev_name=f"eth{i}", netmask_len=netmask_len, ) @dataclass(repr=True) class NetNs: """Defines a network namespace.""" id: str taps: dict[str, Tap] = field(init=False, default_factory=dict) @property def path(self): """Get the host netns file path. Returns the path on the host to the file which represents the netns. """ return Path("/var/run/netns") / self.id def cmd_prefix(self): """Return the jailer context netns file prefix.""" return f"ip netns exec {self.id}" def check_output(self, cmd: str): """Run a command inside the netns.""" return utils.check_output(f"{self.cmd_prefix()} {cmd}") def setup(self): """Set up this network namespace.""" if not self.path.exists(): utils.check_output(f"ip netns add {self.id}") def cleanup(self): """Clean up this network namespace.""" if self.path.exists(): utils.check_output(f"ip netns del {self.id}") def add_tap(self, name, ip): """Add a TAP device to the namespace We assume that a Tap is always configured with the same IP. """ if name not in self.taps: tap = Tap(name, self, ip) self.taps[name] = tap return self.taps[name] def is_used(self): """Are any of the TAPs still in use Waits until there's no carrier signal. Otherwise trying to reuse the TAP may return `Resource busy (os error 16)` """ for tap in self.taps: _, stdout, _ = self.check_output(f"cat /sys/class/net/{tap}/carrier") if stdout.strip() != "0": return True return False ================================================ FILE: tests/host_tools/proc.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Utility functions for interacting with the processor.""" import re from framework import utils def proc_type(): """Obtain the model processor on a Linux system.""" cmd = "cat /proc/cpuinfo" result = utils.check_output(cmd) lines = result.stdout.strip().splitlines() for line in lines: if "model name" in line: return re.sub(".*model name.*:", "", line, count=1) cmd = "uname -m" result = utils.check_output(cmd).stdout.strip() if "aarch64" in result: return "ARM" return "" ================================================ FILE: tests/host_tools/test_syscalls.c ================================================ // Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // This is used by `test_seccomp_validate.py` #include #include #include #include #include #include #include #include #include #include void install_bpf_filter(char *bpf_file) { int fd = open(bpf_file, O_RDONLY); if (fd == -1) { perror("open"); exit(EXIT_FAILURE); } struct stat sb; if (fstat(fd, &sb) == -1) { perror("stat"); exit(EXIT_FAILURE); } size_t size = sb.st_size; struct sock_filter *filterbuf = (struct sock_filter*)malloc(size); if (read(fd, filterbuf, size) == -1) { perror("read"); exit(EXIT_FAILURE); } /* Install seccomp filter */ size_t insn_len = size / sizeof(struct sock_filter); struct sock_fprog prog = { .len = (unsigned short)(insn_len), .filter = filterbuf, }; if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { perror("prctl(NO_NEW_PRIVS)"); exit(EXIT_FAILURE); } if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) { perror("prctl(SECCOMP)"); exit(EXIT_FAILURE); } } int main(int argc, char **argv) { /* parse arguments */ if (argc < 3) { fprintf(stderr, "Usage: %s BPF_FILE ARG0..\n", argv[0]); exit(EXIT_FAILURE); } char *bpf_file = argv[1]; long syscall_id = atoi(argv[2]); long arg0, arg1, arg2, arg3; arg0 = arg1 = arg2 = arg3 = 0L; if (argc > 3) arg0 = atol(argv[3]); if (argc > 4) arg1 = atol(argv[4]); if (argc > 5) arg2 = atol(argv[5]); if (argc > 6) arg3 = atol(argv[6]); /* read seccomp filter from file */ if (strcmp(bpf_file, "/dev/null") != 0) { install_bpf_filter(bpf_file); } long res = syscall(syscall_id, arg0, arg1, arg2, arg3); return EXIT_SUCCESS; } ================================================ FILE: tests/host_tools/udp_offload.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ A utility for sending a UDP message with UDP oflload enabled. Inspired by the "TUN_F_CSUM is a must" chapter in https://blog.cloudflare.com/fr-fr/virtual-networking-101-understanding-tap/ by Cloudflare. """ import socket import sys def eprint(*args, **kwargs): """Print to stderr""" print(*args, file=sys.stderr, **kwargs) # Define SOL_UDP and UDP_SEGMENT if not defined in the system headers try: from socket import SOL_UDP, UDP_SEGMENT except ImportError: SOL_UDP = 17 # Protocol number for UDP UDP_SEGMENT = 103 # Option code for UDP segmentation (non-standard) if __name__ == "__main__": # Get the IP and port from command-line arguments if len(sys.argv) != 3: eprint("Usage: python3 udp_offload.py ") sys.exit(1) ip_address = sys.argv[1] port = int(sys.argv[2]) # Create a UDP socket sockfd = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # Set the UDP segmentation option (UDP_SEGMENT) to 1400 bytes OPTVAL = 1400 try: sockfd.setsockopt(SOL_UDP, UDP_SEGMENT, OPTVAL) except (AttributeError, PermissionError): eprint("Unable to set UDP_SEGMENT option") sys.exit(1) # Set the destination address and port servaddr = (ip_address, port) # Send the message to the destination address MESSAGE = b"x" try: sockfd.sendto(MESSAGE, servaddr) print("Message sent successfully") except socket.error as e: eprint(f"Error sending message: {e}") sys.exit(1) sockfd.close() ================================================ FILE: tests/host_tools/vmclock-abi.h ================================================ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* * This structure provides a vDSO-style clock to VM guests, exposing the * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch * counter, etc.) and real time. It is designed to address the problem of * live migration, which other clock enlightenments do not. * * When a guest is live migrated, this affects the clock in two ways. * * First, even between identical hosts the actual frequency of the underlying * counter will change within the tolerances of its specification (typically * ±50PPM, or 4 seconds a day). This frequency also varies over time on the * same host, but can be tracked by NTP as it generally varies slowly. With * live migration there is a step change in the frequency, with no warning. * * Second, there may be a step change in the value of the counter itself, as * its accuracy is limited by the precision of the NTP synchronization on the * source and destination hosts. * * So any calibration (NTP, PTP, etc.) which the guest has done on the source * host before migration is invalid, and needs to be redone on the new host. * * In its most basic mode, this structure provides only an indication to the * guest that live migration has occurred. This allows the guest to know that * its clock is invalid and take remedial action. For applications that need * reliable accurate timestamps (e.g. distributed databases), the structure * can be mapped all the way to userspace. This allows the application to see * directly for itself that the clock is disrupted and take appropriate * action, even when using a vDSO-style method to get the time instead of a * system call. * * In its more advanced mode. this structure can also be used to expose the * precise relationship of the CPU counter to real time, as calibrated by the * host. This means that userspace applications can have accurate time * immediately after live migration, rather than having to pause operations * and wait for NTP to recover. This mode does, of course, rely on the * counter being reliable and consistent across CPUs. * * Note that this must be true UTC, never with smeared leap seconds. If a * guest wishes to construct a smeared clock, it can do so. Presenting a * smeared clock through this interface would be problematic because it * actually messes with the apparent counter *period*. A linear smearing * of 1 ms per second would effectively tweak the counter period by 1000PPM * at the start/end of the smearing period, while a sinusoidal smear would * basically be impossible to represent. * * This structure is offered with the intent that it be adopted into the * nascent virtio-rtc standard, as a virtio-rtc that does not address the live * migration problem seems a little less than fit for purpose. For that * reason, certain fields use precisely the same numeric definitions as in * the virtio-rtc proposal. The structure can also be exposed through an ACPI * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for * the fact that it uses a real _CRS to convey the address of the structure * (which should be a full page, to allow for mapping directly to userspace). */ #ifndef __VMCLOCK_ABI_H__ #define __VMCLOCK_ABI_H__ #include struct vmclock_abi { /* CONSTANT FIELDS */ __le32 magic; #define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ __le32 size; /* Size of region containing this structure */ __le16 version; /* 1 */ __u8 counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ #define VMCLOCK_COUNTER_ARM_VCNT 0 #define VMCLOCK_COUNTER_X86_TSC 1 #define VMCLOCK_COUNTER_INVALID 0xff __u8 time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ #define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ #define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ #define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ #define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ #define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ __le32 seq_count; /* Low bit means an update is in progress */ /* * This field changes to another non-repeating value when the CPU * counter is disrupted, for example on live migration. This lets * the guest know that it should discard any calibration it has * performed of the counter against external sources (NTP/PTP/etc.). */ __le64 disruption_marker; __le64 flags; /* Indicates that the tai_offset_sec field is valid */ #define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) /* * Optionally used to notify guests of pending maintenance events. * A guest which provides latency-sensitive services may wish to * remove itself from service if an event is coming up. Two flags * indicate the approximate imminence of the event. */ #define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ #define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ #define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) #define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) #define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) #define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) /* * If the MONOTONIC flag is set then (other than leap seconds) it is * guaranteed that the time calculated according this structure at * any given moment shall never appear to be later than the time * calculated via the structure at any *later* moment. * * In particular, a timestamp based on a counter reading taken * immediately after setting the low bit of seq_count (and the * associated memory barrier), using the previously-valid time and * period fields, shall never be later than a timestamp based on * a counter reading taken immediately before *clearing* the low * bit again after the update, using the about-to-be-valid fields. */ #define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) /* * If the VM_GEN_COUNTER_PRESENT flag is set, the hypervisor will * bump the vm_generation_counter field every time the guest is * loaded from some save state (restored from a snapshot). */ #define VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT (1 << 8) /* * If the NOTIFICATION_PRESENT flag is set, the hypervisor will send * a notification every time it updates seq_count to a new even number. */ #define VMCLOCK_FLAG_NOTIFICATION_PRESENT (1 << 9) __u8 pad[2]; __u8 clock_status; #define VMCLOCK_STATUS_UNKNOWN 0 #define VMCLOCK_STATUS_INITIALIZING 1 #define VMCLOCK_STATUS_SYNCHRONIZED 2 #define VMCLOCK_STATUS_FREERUNNING 3 #define VMCLOCK_STATUS_UNRELIABLE 4 /* * The time exposed through this device is never smeared. This field * corresponds to the 'subtype' field in virtio-rtc, which indicates * the smearing method. However in this case it provides a *hint* to * the guest operating system, such that *if* the guest OS wants to * provide its users with an alternative clock which does not follow * UTC, it may do so in a fashion consistent with the other systems * in the nearby environment. */ __u8 leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ #define VMCLOCK_SMEARING_STRICT 0 #define VMCLOCK_SMEARING_NOON_LINEAR 1 #define VMCLOCK_SMEARING_UTC_SLS 2 __le16 tai_offset_sec; /* Actually two's complement signed */ __u8 leap_indicator; /* * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined * in the current draft of virtio-rtc, but since smearing cannot be * used with the shared memory device, some values are not used. * * The _POST_POS and _POST_NEG values allow the guest to perform * its own smearing during the day or so after a leap second when * such smearing may need to continue being applied for a leap * second which is now theoretically "historical". */ #define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */ #define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */ #define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */ #define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */ #define VMCLOCK_LEAP_POST_POS 0x04 #define VMCLOCK_LEAP_POST_NEG 0x05 /* Bit shift for counter_period_frac_sec and its error rate */ __u8 counter_period_shift; /* * Paired values of counter and UTC at a given point in time. */ __le64 counter_value; /* * Counter period, and error margin of same. The unit of these * fields is 1/2^(64 + counter_period_shift) of a second. */ __le64 counter_period_frac_sec; __le64 counter_period_esterror_rate_frac_sec; __le64 counter_period_maxerror_rate_frac_sec; /* * Time according to time_type field above. */ __le64 time_sec; /* Seconds since time_type epoch */ __le64 time_frac_sec; /* Units of 1/2^64 of a second */ __le64 time_esterror_nanosec; __le64 time_maxerror_nanosec; /* * This field changes to another non-repeating value when the VM * is loaded from a snapshot. This event, typically, represents a * "jump" forward in time. As a result, in this case as well, the * guest needs to discard any calibrarion against external sources. * Loading a snapshot in a VM has different semantics than other VM * events such as live migration, i.e. apart from re-adjusting guest * clocks a guest user space might want to discard UUIDs, reset * network connections or reseed entropy, etc. As a result, we * use a dedicated marker for such events. */ __le64 vm_generation_counter; }; #endif /* __VMCLOCK_ABI_H__ */ ================================================ FILE: tests/host_tools/vmclock.c ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 #include #include #include #include #include #include #include #include #include #include #include "vmclock-abi.h" const char *VMCLOCK_DEV_PATH = "/dev/vmclock0"; int open_vmclock(void) { int fd = open(VMCLOCK_DEV_PATH, 0); if (fd == -1) { perror("open"); exit(1); } return fd; } struct vmclock_abi *get_vmclock_handle(int fd) { void *ptr = mmap(NULL, sizeof(struct vmclock_abi), PROT_READ, MAP_SHARED, fd, 0); if (ptr == MAP_FAILED) { perror("mmap"); exit(1); } return ptr; } #define READ_VMCLOCK_FIELD_FN(type, field) \ type read##_##field (struct vmclock_abi *vmclock) { \ type ret; \ while (1) { \ type seq = vmclock->seq_count & ~1ULL; \ \ /* This matches a write fence in the VMM */ \ atomic_thread_fence(memory_order_acquire); \ \ ret = vmclock->field; \ \ /* This matches a write fence in the VMM */ \ atomic_thread_fence(memory_order_acquire); \ if (seq == vmclock->seq_count) \ break; \ } \ \ return ret; \ } READ_VMCLOCK_FIELD_FN(uint64_t, disruption_marker); READ_VMCLOCK_FIELD_FN(uint64_t, vm_generation_counter); /* * Read `vmclock_abi` structure using a file descriptor pointing to * `/dev/vmclock0`. */ void read_vmclock(int fd, struct vmclock_abi *vmclock) { int ret; /* * Use `pread()`, since the device doesn't implement lseek(), so * we can't reset `fp`. */ ret = pread(fd, vmclock, sizeof(*vmclock), 0); if (ret < 0) { perror("read"); exit(1); } else if (ret < (int) sizeof(*vmclock)) { fprintf(stderr, "We don't handle partial writes (%d). Exiting!\n", ret); exit(1); } } void print_vmclock(struct vmclock_abi *vmclock) { int has_vm_gen_counter = vmclock->flags & VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT; int has_notifications = vmclock->flags & VMCLOCK_FLAG_NOTIFICATION_PRESENT; printf("VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT: %s\n", has_vm_gen_counter ? "true" : "false"); printf("VMCLOCK_FLAG_NOTIFICATION_PRESENT: %s\n", has_notifications ? "true" : "false"); printf("VMCLOCK_MAGIC: 0x%x\n", vmclock->magic); printf("VMCLOCK_SIZE: 0x%x\n", vmclock->size); printf("VMCLOCK_VERSION: %u\n", vmclock->version); printf("VMCLOCK_CLOCK_STATUS: %u\n", vmclock->clock_status); printf("VMCLOCK_COUNTER_ID: %u\n", vmclock->counter_id); printf("VMCLOCK_DISRUPTION_MARKER: %lu\n", read_disruption_marker(vmclock)); printf("VMCLOCK_VM_GENERATION_COUNTER: %lu\n", read_vm_generation_counter(vmclock)); fflush(stdout); } void run_poll(int fd) { struct vmclock_abi vmclock; int epfd, ret, nfds; struct epoll_event ev; read_vmclock(fd, &vmclock); print_vmclock(&vmclock); epfd = epoll_create(1); if (epfd < 0) { perror("epoll_create"); exit(1); } ev.events = EPOLLIN | EPOLLRDNORM; ev.data.fd = fd; ret = epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev); if (ret < 0) { perror("epoll_add"); exit(1); } while (1) { nfds = epoll_wait(epfd, &ev, 1, -1); if (nfds < 0) { perror("epoll_wait"); exit(1); } if (ev.data.fd != fd) { fprintf(stderr, "Unknown file descriptor %d\n", ev.data.fd); exit(1); } if (ev.events & EPOLLHUP) { fprintf(stderr, "Device does not support notifications. Stop polling\n"); exit(1); } else if (ev.events & EPOLLIN) { fprintf(stdout, "Got VMClock notification\n"); read_vmclock(fd, &vmclock); print_vmclock(&vmclock); } } } void print_help_message() { fprintf(stderr, "usage: vmclock MODE\n"); fprintf(stderr, "Available modes:\n"); fprintf(stderr, " -r\tRead vmclock_abi using read()\n"); fprintf(stderr, " -m\tRead vmclock_abi using mmap()\n"); fprintf(stderr, " -p\tPoll VMClock for changes\n"); } int main(int argc, char *argv[]) { int fd; struct vmclock_abi vmclock, *vmclock_ptr; if (argc != 2) { print_help_message(); exit(1); } fd = open_vmclock(); if (!strncmp(argv[1], "-r", 2)) { printf("Reading VMClock with read()\n"); read_vmclock(fd, &vmclock); print_vmclock(&vmclock); } else if (!strncmp(argv[1], "-m", 2)) { printf("Reading VMClock with mmap()\n"); vmclock_ptr = get_vmclock_handle(fd); print_vmclock(vmclock_ptr); } else if (!strncmp(argv[1], "-p", 2)) { printf("Polling VMClock\n"); run_poll(fd); } else { print_help_message(); exit(1); } return 0; } ================================================ FILE: tests/host_tools/vsock_helper.c ================================================ // Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // This is a vsock helper tool, used by the Firecracker integration tests, // to - well - test the virtio vsock device. It can be used to // run a vsock echo client, that reads data from STDIN, sends it to an // echo server, then forwards the server's reply to STDOUT. #include #include #include #include #include #include #include #include #include #include #include #include #define BUF_SIZE (16 * 1024) #define SERVER_ACCEPT_BACKLOG 128 int print_usage() { fprintf(stderr, "Usage: ./vsock-helper echo \n"); fprintf(stderr, "\n"); fprintf(stderr, " echo connect to an echo server, listening on CID:port.\n"); fprintf(stderr, " STDIN will be piped through to the echo server, and\n"); fprintf(stderr, " data coming from the server will pe sent to STDOUT.\n"); fprintf(stderr, "\n"); return -1; } int xfer(int src_fd, int dst_fd) { char buf[BUF_SIZE]; int count = read(src_fd, buf, sizeof(buf)); if (!count) return 0; if (count < 0) return -1; int offset = 0; do { int written; written = write(dst_fd, &buf[offset], count - offset); if (written <= 0) return -1; offset += written; } while (offset < count); return offset; } int run_echo(uint32_t cid, uint32_t port) { int sock = socket(AF_VSOCK, SOCK_STREAM, 0); if (sock < 0) { perror("socket()"); return -1; } struct sockaddr_vm vsock_addr = { .svm_family = AF_VSOCK, .svm_port = port, .svm_cid = cid }; if (connect(sock, (struct sockaddr*)&vsock_addr, sizeof(vsock_addr)) < 0) { perror("connect()"); return -1; } for (;;) { int ping_cnt = xfer(STDIN_FILENO, sock); if (!ping_cnt) break; if (ping_cnt < 0) return -1; int pong_cnt = 0; while (pong_cnt < ping_cnt) { int res = xfer(sock, STDOUT_FILENO); if (res <= 0) return -1; pong_cnt += res; } } return close(sock); } int main(int argc, char **argv) { if (argc < 3) { return print_usage(); } if (strcmp(argv[1], "echo") == 0) { if (argc != 4) { return print_usage(); } uint32_t cid = atoi(argv[2]); uint32_t port = atoi(argv[3]); if (!cid || !port) { return print_usage(); } return run_echo(cid, port); } return print_usage(); } ================================================ FILE: tests/host_tools/waitpkg.c ================================================ // Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 // This is a sample code to attempt to use WAITPKG (UMONITOR / UWAIT / TPAUSE // instructions). It is used to test that attemping to use it generates #UD. #include #include #include void umwait(volatile int *addr) { _umonitor((void *)addr); printf("address monitoring hardware armed\n"); uint64_t timeout = 1000000000ULL; uint32_t control = 0; uint8_t cflag = _umwait(control, timeout); printf("cflag = %d\n", cflag); } int main() { int a = 0; umwait(&a); return 0; } ================================================ FILE: tests/integration_tests/build/__init__.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: tests/integration_tests/build/test_clippy.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests ensuring codebase style compliance for Rust and Python.""" import platform import pytest from host_tools.cargo_build import cargo SUCCESS_CODE = 0 MACHINE = platform.machine() TARGETS = [ "{}-unknown-linux-gnu".format(MACHINE), "{}-unknown-linux-musl".format(MACHINE), ] @pytest.mark.parametrize("target", TARGETS) def test_rust_clippy(target): """ Test that clippy does not generate any errors/warnings. """ cargo( "clippy", f"--target {target} --all --all-targets --all-features", "-D warnings" ) ================================================ FILE: tests/integration_tests/build/test_coverage.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests enforcing code coverage for production code.""" import os import warnings import pytest from framework import utils from framework.properties import global_props from host_tools import proc from host_tools.cargo_build import cargo PROC_MODEL = proc.proc_type() # Toolchain target architecture. if "Intel" in PROC_MODEL: VENDOR = "Intel" ARCH = "x86_64" elif "AMD" in PROC_MODEL: VENDOR = "AMD" ARCH = "x86_64" elif "ARM" in PROC_MODEL: VENDOR = "ARM" ARCH = "aarch64" else: raise Exception(f"Unsupported processor model ({PROC_MODEL})") # Toolchain target. # Currently profiling with `aarch64-unknown-linux-musl` is unsupported (see # https://github.com/rust-lang/rustup/issues/3095#issuecomment-1280705619) therefore we profile and # run coverage with the `gnu` toolchains and run unit tests with the `musl` toolchains. TARGET = f"{ARCH}-unknown-linux-gnu" @pytest.mark.timeout(600) def test_coverage(monkeypatch): """Test code coverage""" # Re-direct to repository root. monkeypatch.chdir("..") # Generate test profiles. cargo( "test", f"--all --target {TARGET}", "--test-threads=1", env={ "RUSTFLAGS": "-Cinstrument-coverage", "LLVM_PROFILE_FILE": "coverage-%p-%m.profraw", }, ) lcov_file = "./build/cargo_target/coverage.lcov" # Generate coverage report. cmd = f""" grcov . \ -s . \ --binary-path ./build/cargo_target/{TARGET}/debug/ \ --excl-start "mod tests" \ --ignore "build/*" \ --ignore "**/tests/*" \ --ignore "**/test_utils*" \ --ignore "**/mock_*" \ --ignore "src/firecracker/examples/*" \ --ignore "**/generated*" \ -t lcov \ --ignore-not-existing \ -o {lcov_file}""" # Ignore code not relevant for the intended platform # - CPUID and CPU template # - Static CPU templates intended for specific CPU vendors if "AMD" == VENDOR: cmd += " \ --ignore **/intel* \ --ignore *t2* \ --ignore *t2s* \ --ignore *t2cl* \ --ignore *c3* \ " elif "Intel" == VENDOR: cmd += " \ --ignore **/amd* \ --ignore *t2a* \ " utils.check_output(cmd) # Only upload if token is present and we're in EC2 if "CODECOV_TOKEN" in os.environ and global_props.is_ec2: pr_number = os.environ.get("BUILDKITE_PULL_REQUEST") branch = os.environ.get("BUILDKITE_BRANCH") if not branch: branch = utils.check_output("git rev-parse --abbrev-ref HEAD").stdout # -Z flag means "fail on error". There's supposed to be a more descriptive long form in # --fail-on-error, but it doesnt work. codecov_cmd = f"codecov -Z -f {lcov_file} -F {global_props.host_linux_version}-{global_props.instance}" if pr_number and pr_number != "false": codecov_cmd += f" -P {pr_number}" else: codecov_cmd += f" -B {branch}" utils.check_output(codecov_cmd) else: warnings.warn( "Not uploading coverage report due to missing CODECOV_TOKEN environment variable" ) ================================================ FILE: tests/integration_tests/build/test_dependencies.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Enforces controls over dependencies.""" from host_tools.cargo_build import cargo def test_unused_dependencies(): """ Test that there are no unused dependencies. """ cargo("udeps", "--all", nightly=True) ================================================ FILE: tests/integration_tests/build/test_gdb.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """A test that ensures that firecracker builds with GDB feature enabled at integration time.""" import host_tools.cargo_build def test_gdb_compiles(): """Checks that Firecracker compiles with GDB enabled""" host_tools.cargo_build.build_gdb() ================================================ FILE: tests/integration_tests/build/test_seccomp_no_redundant_rules.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """A test that fails if it can definitely prove a seccomp rule redundant (although it passing does not guarantee the converse, that all rules are definitely needed). """ import platform from pathlib import Path from framework import utils from framework.static_analysis import ( determine_unneeded_seccomp_rules, find_syscalls_in_binary, load_seccomp_rules, ) # Make sure we don't override the Firecracker binary used from other tests TMP_BUILD_DIR = "../redundant_seccomp_rules_build" def test_redundant_seccomp_rules(): """Test that fails if static analysis determines redundant seccomp rules""" arch = platform.processor() nightly_toolchain = utils.check_output( "rustup toolchain list | grep nightly" ).stdout.strip() target = f"{arch}-unknown-linux-musl" utils.check_output( f'CARGO_TARGET_DIR={TMP_BUILD_DIR} RUSTFLAGS="-C relocation-model=static -C link-args=-no-pie" cargo +{nightly_toolchain} -Zbuild-std=panic_abort,std build --release --target {target} -p firecracker' ) found_syscalls = find_syscalls_in_binary( Path(f"{TMP_BUILD_DIR}/{target}/release/firecracker") ) seccomp_rules = load_seccomp_rules(Path(f"../resources/seccomp/{target}.json")) redundant_rules = determine_unneeded_seccomp_rules(seccomp_rules, found_syscalls) assert not redundant_rules, f"Found redundant seccomp rules! {redundant_rules}" ================================================ FILE: tests/integration_tests/build/test_unittests.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """A test that ensures that all unit tests pass at integration time.""" import platform import pytest import host_tools.cargo_build as host # pylint:disable=import-error MACHINE = platform.machine() # Currently profiling with `aarch64-unknown-linux-musl` is unsupported (see # https://github.com/rust-lang/rustup/issues/3095#issuecomment-1280705619) therefore we profile and # run coverage with the `gnu` toolchains and run unit tests with the `musl` toolchains. TARGET = "{}-unknown-linux-musl".format(MACHINE) @pytest.mark.timeout(600) def test_unittests(test_fc_session_root_path): """ Run unit and doc tests for all supported targets. """ extra_args = f"--target {TARGET}" host.cargo_test(test_fc_session_root_path, extra_args=extra_args) host.cargo_test(test_fc_session_root_path, extra_args=extra_args + " --examples") def test_benchmarks_compile(): """Checks that all benchmarks compile""" host.cargo("bench", f"--all --no-run --target {TARGET}") ================================================ FILE: tests/integration_tests/functional/__init__.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: tests/integration_tests/functional/test_api.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests that ensure the correctness of the Firecracker API.""" # Disable pylint C0302: Too many lines in module # pylint: disable=C0302 import os import platform import re import resource from pathlib import Path import pytest import semver import host_tools.drive as drive_tools import host_tools.network as net_tools from framework import utils, utils_cpuid from framework.utils import get_firecracker_version_from_toml from framework.utils_cpu_templates import SUPPORTED_CPU_TEMPLATES MEM_LIMIT = 1000000000 NOT_SUPPORTED_BEFORE_START = ( "The requested operation is not supported before starting the microVM." ) NOT_SUPPORTED_AFTER_START = ( "The requested operation is not supported after starting the microVM" ) def test_api_happy_start(uvm_plain): """ Test that a regular microvm API config and boot sequence works. """ test_microvm = uvm_plain test_microvm.spawn() # Set up the microVM with 2 vCPUs, 256 MiB of RAM and # a root file system with the rw permission. test_microvm.basic_config() test_microvm.start() if utils.pvh_supported(): assert "Kernel loaded using PVH boot protocol" in test_microvm.log_data def test_drive_io_engine(uvm_plain, io_engine): """ Test io_engine configuration. Test that the io_engine can be configured via the API on kernels that support the given type and that FC returns an error otherwise. """ test_microvm = uvm_plain test_microvm.spawn() test_microvm.basic_config(add_root_device=False) test_microvm.add_net_iface() kwargs = { "drive_id": "rootfs", "path_on_host": test_microvm.create_jailed_resource(test_microvm.rootfs_file), "is_root_device": True, "is_read_only": True, } test_microvm.api.drive.put(io_engine=io_engine, **kwargs) test_microvm.start() assert ( test_microvm.api.vm_config.get().json()["drives"][0]["io_engine"] == io_engine ) def test_api_put_update_pre_boot(uvm_plain, io_engine): """ Test that PUT updates are allowed before the microvm boots. Tests updates on drives, boot source and machine config. """ test_microvm = uvm_plain test_microvm.spawn() # Set up the microVM with 2 vCPUs, 256 MiB of RAM and # a root file system with the rw permission. test_microvm.basic_config() fs1 = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "scratch")) test_microvm.api.drive.put( drive_id="scratch", path_on_host=test_microvm.create_jailed_resource(fs1.path), is_root_device=False, is_read_only=False, io_engine=io_engine, ) # Updates to `kernel_image_path` with an invalid path are not allowed. expected_msg = re.escape( "The kernel file cannot be opened: No such file or directory (os error 2)" ) with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.boot.put(kernel_image_path="foo.bar") # Updates to `kernel_image_path` with a valid path are allowed. test_microvm.api.boot.put( kernel_image_path=test_microvm.get_jailed_resource(test_microvm.kernel_file) ) # Updates to `path_on_host` with an invalid path are not allowed. with pytest.raises(RuntimeError, match="No such file or directory"): test_microvm.api.drive.put( drive_id="rootfs", path_on_host="foo.bar", is_read_only=True, is_root_device=True, io_engine=io_engine, ) # Updates to `is_root_device` that result in two root block devices are not # allowed. with pytest.raises(RuntimeError, match="A root block device already exists"): test_microvm.api.drive.put( drive_id="scratch", path_on_host=test_microvm.get_jailed_resource(fs1.path), is_read_only=False, is_root_device=True, io_engine=io_engine, ) # Valid updates to `path_on_host` and `is_read_only` are allowed. fs2 = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "otherscratch")) test_microvm.api.drive.put( drive_id="scratch", path_on_host=test_microvm.create_jailed_resource(fs2.path), is_read_only=True, is_root_device=False, io_engine=io_engine, ) # Valid updates to all fields in the machine configuration are allowed. # The machine configuration has a default value, so all PUTs are updates. microvm_config_json = { "vcpu_count": 4, "smt": platform.machine() == "x86_64", "mem_size_mib": 256, "track_dirty_pages": True, } if platform.machine() == "x86_64": microvm_config_json["cpu_template"] = "C3" test_microvm.api.machine_config.put(**microvm_config_json) response = test_microvm.api.machine_config.get() response_json = response.json() vcpu_count = microvm_config_json["vcpu_count"] assert response_json["vcpu_count"] == vcpu_count smt = microvm_config_json["smt"] assert response_json["smt"] == smt mem_size_mib = microvm_config_json["mem_size_mib"] assert response_json["mem_size_mib"] == mem_size_mib if platform.machine() == "x86_64": cpu_template = str(microvm_config_json["cpu_template"]) assert response_json["cpu_template"] == cpu_template track_dirty_pages = microvm_config_json["track_dirty_pages"] assert response_json["track_dirty_pages"] == track_dirty_pages def test_net_api_put_update_pre_boot(uvm_plain): """ Test PUT updates on network configurations before the microvm boots. """ test_microvm = uvm_plain test_microvm.spawn() tap1name = test_microvm.id[:8] + "tap1" tap1 = net_tools.Tap(tap1name, test_microvm.netns) test_microvm.api.network.put( iface_id="1", guest_mac="06:00:00:00:00:01", host_dev_name=tap1.name ) # Adding new network interfaces is allowed. tap2name = test_microvm.id[:8] + "tap2" tap2 = net_tools.Tap(tap2name, test_microvm.netns) test_microvm.api.network.put( iface_id="2", guest_mac="07:00:00:00:00:01", host_dev_name=tap2.name ) # Updates to a network interface with an unavailable MAC are not allowed. guest_mac = "06:00:00:00:00:01" expected_msg = f"The MAC address is already in use: {guest_mac}" with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.network.put( iface_id="2", host_dev_name=tap2name, guest_mac=guest_mac ) # Updates to a network interface with an available MAC are allowed. test_microvm.api.network.put( iface_id="2", host_dev_name=tap2name, guest_mac="08:00:00:00:00:01" ) # Updates to a network interface with an unavailable name are not allowed. expected_msg = "Could not create the network device" with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.network.put( iface_id="1", host_dev_name=tap2name, guest_mac="06:00:00:00:00:01" ) # Updates to a network interface with an available name are allowed. tap3name = test_microvm.id[:8] + "tap3" tap3 = net_tools.Tap(tap3name, test_microvm.netns) test_microvm.api.network.put( iface_id="3", host_dev_name=tap3.name, guest_mac="06:00:00:00:00:01" ) def test_api_mmds_config(uvm_plain): """ Test /mmds/config PUT scenarios that unit tests can't cover. Tests updates on MMDS config before and after attaching a network device. """ test_microvm = uvm_plain test_microvm.spawn() # Set up the microVM with 2 vCPUs, 256 MiB of RAM and # a root file system with the rw permission. test_microvm.basic_config() # Setting MMDS config with empty network interface IDs list is not allowed. err_msg = ( "The list of network interface IDs that allow " "forwarding MMDS requests is empty." ) with pytest.raises(RuntimeError, match=err_msg): test_microvm.api.mmds_config.put(network_interfaces=[]) # Setting MMDS config when no network device has been attached # is not allowed. err_msg = ( "The list of network interface IDs provided contains " "at least one ID that does not correspond to any " "existing network interface." ) with pytest.raises(RuntimeError, match=err_msg): test_microvm.api.mmds_config.put(network_interfaces=["foo"]) # Attach network interface. tap = net_tools.Tap(f"tap1-{test_microvm.id[:6]}", test_microvm.netns) test_microvm.api.network.put( iface_id="1", guest_mac="06:00:00:00:00:01", host_dev_name=tap.name ) # Setting MMDS config with an ID that does not correspond to an already # attached network device is not allowed. err_msg = ( "The list of network interface IDs provided contains" " at least one ID that does not correspond to any " "existing network interface." ) with pytest.raises(RuntimeError, match=err_msg): test_microvm.api.mmds_config.put(network_interfaces=["1", "foo"]) # Updates to MMDS version with invalid value are not allowed. err_msg = ( "An error occurred when deserializing the json body of a " "request: unknown variant `foo`, expected `V1` or `V2`" ) with pytest.raises(RuntimeError, match=err_msg): test_microvm.api.mmds_config.put(version="foo", network_interfaces=["1"]) # Valid MMDS config not specifying version or IPv4 address. test_microvm.api.mmds_config.put(network_interfaces=["1"]) assert test_microvm.api.vm_config.get().json()["mmds-config"]["version"] == "V1" # Valid MMDS config not specifying version. mmds_config = {"ipv4_address": "169.254.169.250", "network_interfaces": ["1"]} test_microvm.api.mmds_config.put(**mmds_config) assert ( test_microvm.api.vm_config.get().json()["mmds-config"]["ipv4_address"] == "169.254.169.250" ) # Valid MMDS config. mmds_config = { "version": "V2", "ipv4_address": "169.254.169.250", "network_interfaces": ["1"], } test_microvm.api.mmds_config.put(**mmds_config) assert test_microvm.api.vm_config.get().json()["mmds-config"]["version"] == "V2" # pylint: disable=too-many-statements def test_api_machine_config(uvm_plain): """ Test /machine_config PUT/PATCH scenarios that unit tests can't cover. """ test_microvm = uvm_plain test_microvm.spawn() # Test invalid vcpu count < 0. with pytest.raises(RuntimeError): test_microvm.api.machine_config.put(vcpu_count="-2") # Test invalid type for smt flag. with pytest.raises(RuntimeError): test_microvm.api.machine_config.put(smt="random_string") # Test invalid CPU template. with pytest.raises(RuntimeError): test_microvm.api.machine_config.put(cpu_template="random_string") test_microvm.api.machine_config.patch(track_dirty_pages=True) # Test missing vcpu_count. with pytest.raises( RuntimeError, match="missing field `vcpu_count` at line 1 column 21." ): test_microvm.api.machine_config.put(mem_size_mib=128) # Test missing mem_size_mib. with pytest.raises( RuntimeError, match="missing field `mem_size_mib` at line 1 column 17." ): test_microvm.api.machine_config.put(vcpu_count=2) # Test default smt value. test_microvm.api.machine_config.put(mem_size_mib=128, vcpu_count=1) response = test_microvm.api.machine_config.get() assert response.json()["smt"] is False # Test that smt=True errors on ARM. if platform.machine() == "x86_64": test_microvm.api.machine_config.patch(smt=True) elif platform.machine() == "aarch64": expected_msg = ( "Enabling simultaneous multithreading is not supported on aarch64" ) with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.machine_config.patch(smt=True) # Test invalid mem_size_mib < 0. with pytest.raises(RuntimeError): test_microvm.api.machine_config.put(mem_size_mib="-2") # Test invalid mem_size_mib > usize::MAX. bad_size = 1 << 64 fail_msg = ( "error occurred when deserializing the json body of a request: invalid type" ) with pytest.raises(RuntimeError, match=fail_msg): test_microvm.api.machine_config.put(mem_size_mib=bad_size) # Reset the configuration of the microvm # This will explicitly set vcpu_num = 2, mem_size_mib = 256 # track_dirty_pages = false. All other parameters are # unspecified so will revert to default values. test_microvm.basic_config() # Test mem_size_mib of valid type, but too large. firecracker_pid = test_microvm.firecracker_pid resource.prlimit( firecracker_pid, resource.RLIMIT_AS, (MEM_LIMIT, resource.RLIM_INFINITY) ) bad_size = (1 << 64) - 1 test_microvm.api.machine_config.patch(mem_size_mib=bad_size) fail_msg = re.escape( "Invalid Memory Configuration: Cannot create mmap region: Out of memory (os error 12)" ) with pytest.raises(RuntimeError, match=fail_msg): test_microvm.start() # Test invalid mem_size_mib = 0. with pytest.raises( RuntimeError, match=re.escape( "The memory size (MiB) is either 0, or not a multiple of the configured page size." ), ): test_microvm.api.machine_config.patch(mem_size_mib=0) # Test valid mem_size_mib. test_microvm.api.machine_config.patch(mem_size_mib=256) # Set the cpu template if len(SUPPORTED_CPU_TEMPLATES) == 0: # No static CPU templates are supported on this CPU. test_microvm.api.machine_config.patch(cpu_template="None") else: test_microvm.api.machine_config.patch(cpu_template=SUPPORTED_CPU_TEMPLATES[0]) test_microvm.start() # Validate full vm configuration after patching machine config. json = test_microvm.api.vm_config.get().json() assert json["machine-config"]["vcpu_count"] == 2 assert json["machine-config"]["mem_size_mib"] == 256 assert json["machine-config"]["smt"] is False def test_negative_machine_config_api(uvm_plain): """ Test the deprecated `cpu_template` field in PUT and PATCH requests on `/machine-config` API is handled correctly. When using the `cpu_template` field (even if the value is "None"), the HTTP response header should have "Deprecation: true". """ test_microvm = uvm_plain test_microvm.spawn() # Use `cpu_template` field in PUT /machine-config response = test_microvm.api.machine_config.put( vcpu_count=2, mem_size_mib=256, cpu_template="None", ) assert response.headers["deprecation"] assert ( "PUT /machine-config: cpu_template field is deprecated." in test_microvm.log_data ) # Use `cpu_template` field in PATCH /machine-config response = test_microvm.api.machine_config.patch(cpu_template="None") assert ( "PATCH /machine-config: cpu_template field is deprecated." in test_microvm.log_data ) def test_api_cpu_config(uvm_plain, custom_cpu_template): """ Test /cpu-config PUT scenarios. """ test_microvm = uvm_plain test_microvm.spawn() with pytest.raises(RuntimeError): test_microvm.api.cpu_config.put(foo=False) test_microvm.api.cpu_config.put(**custom_cpu_template["template"]) def test_api_put_update_post_boot(uvm_plain, io_engine): """ Test that PUT updates are rejected after the microvm boots. """ test_microvm = uvm_plain test_microvm.spawn() # Set up the microVM with 2 vCPUs, 256 MiB of RAM and # a root file system with the rw permission. test_microvm.basic_config() iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, host_dev_name=tap1.name, guest_mac="06:00:00:00:00:01" ) test_microvm.start() # Valid updates to `kernel_image_path` are not allowed after boot. with pytest.raises(RuntimeError, match=NOT_SUPPORTED_AFTER_START): test_microvm.api.boot.put( kernel_image_path=test_microvm.get_jailed_resource(test_microvm.kernel_file) ) # Valid updates to the machine configuration are not allowed after boot. with pytest.raises(RuntimeError, match=NOT_SUPPORTED_AFTER_START): test_microvm.api.machine_config.patch(vcpu_count=4) with pytest.raises(RuntimeError, match=NOT_SUPPORTED_AFTER_START): test_microvm.api.machine_config.put(vcpu_count=4, mem_size_mib=128) # Network interface update is not allowed after boot. with pytest.raises(RuntimeError, match=NOT_SUPPORTED_AFTER_START): test_microvm.api.network.put( iface_id="1", host_dev_name=tap1.name, guest_mac="06:00:00:00:00:02" ) # Block device update is not allowed after boot. with pytest.raises(RuntimeError, match=NOT_SUPPORTED_AFTER_START): test_microvm.api.drive.put( drive_id="rootfs", path_on_host=test_microvm.jailer.jailed_path(test_microvm.rootfs_file), is_read_only=False, is_root_device=True, io_engine=io_engine, ) # MMDS config is not allowed post-boot. mmds_config = { "version": "V2", "ipv4_address": "169.254.169.250", "network_interfaces": ["1"], } with pytest.raises(RuntimeError, match=NOT_SUPPORTED_AFTER_START): test_microvm.api.mmds_config.put(**mmds_config) def test_rate_limiters_api_config(uvm_plain, io_engine): """ Test the IO rate limiter API config. """ test_microvm = uvm_plain test_microvm.spawn() # Test the DRIVE rate limiting API. # Test drive with bw rate-limiting. fs1 = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "bw")) test_microvm.api.drive.put( drive_id="bw", path_on_host=test_microvm.create_jailed_resource(fs1.path), is_read_only=False, is_root_device=False, rate_limiter={"bandwidth": {"size": 1000000, "refill_time": 100}}, io_engine=io_engine, ) # Test drive with ops rate-limiting. fs2 = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "ops")) test_microvm.api.drive.put( drive_id="ops", path_on_host=test_microvm.create_jailed_resource(fs2.path), is_read_only=False, is_root_device=False, rate_limiter={"ops": {"size": 1, "refill_time": 100}}, io_engine=io_engine, ) # Test drive with bw and ops rate-limiting. fs3 = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "bwops")) test_microvm.api.drive.put( drive_id="bwops", path_on_host=test_microvm.create_jailed_resource(fs3.path), is_read_only=False, is_root_device=False, rate_limiter={ "bandwidth": {"size": 1000000, "refill_time": 100}, "ops": {"size": 1, "refill_time": 100}, }, io_engine=io_engine, ) # Test drive with 'empty' rate-limiting (same as not specifying the field) fs4 = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "nada")) test_microvm.api.drive.put( drive_id="nada", path_on_host=test_microvm.create_jailed_resource(fs4.path), is_read_only=False, is_root_device=False, rate_limiter={}, io_engine=io_engine, ) # Test the NET rate limiting API. # Test network with tx bw rate-limiting. iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, guest_mac="06:00:00:00:00:01", host_dev_name=tap1.name, tx_rate_limiter={"bandwidth": {"size": 1000000, "refill_time": 100}}, ) # Test network with rx bw rate-limiting. iface_id = "2" tapname = test_microvm.id[:8] + "tap" + iface_id tap2 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, guest_mac="06:00:00:00:00:02", host_dev_name=tap2.name, rx_rate_limiter={"bandwidth": {"size": 1000000, "refill_time": 100}}, ) # Test network with tx and rx bw and ops rate-limiting. iface_id = "3" tapname = test_microvm.id[:8] + "tap" + iface_id tap3 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, guest_mac="06:00:00:00:00:03", host_dev_name=tap3.name, rx_rate_limiter={ "bandwidth": {"size": 1000000, "refill_time": 100}, "ops": {"size": 1, "refill_time": 100}, }, tx_rate_limiter={ "bandwidth": {"size": 1000000, "refill_time": 100}, "ops": {"size": 1, "refill_time": 100}, }, ) # Test entropy device bw and ops rate-limiting. test_microvm.api.entropy.put( rate_limiter={ "bandwidth": {"size": 1000000, "refill_time": 100}, "ops": {"size": 1, "refill_time": 100}, }, ) def test_api_patch_pre_boot(uvm_plain, io_engine): """ Test that PATCH updates are not allowed before the microvm boots. """ test_microvm = uvm_plain test_microvm.spawn() # Sets up the microVM with 2 vCPUs, 256 MiB of RAM, 1 network interface # and a root file system with the rw permission. test_microvm.basic_config() fs1 = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "scratch")) drive_id = "scratch" test_microvm.api.drive.put( drive_id=drive_id, path_on_host=test_microvm.create_jailed_resource(fs1.path), is_root_device=False, is_read_only=False, io_engine=io_engine, ) iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, host_dev_name=tap1.name, guest_mac="06:00:00:00:00:01" ) # Partial updates to the boot source are not allowed. with pytest.raises(RuntimeError, match="Invalid request method"): test_microvm.api.boot.patch(kernel_image_path="otherfile") # Partial updates to the machine configuration are allowed before boot. test_microvm.api.machine_config.patch(vcpu_count=4) response_json = test_microvm.api.machine_config.get().json() assert response_json["vcpu_count"] == 4 # Partial updates to the logger configuration are not allowed. with pytest.raises(RuntimeError, match="Invalid request method"): test_microvm.api.logger.patch(level="Error") # Patching drive before boot is not allowed. with pytest.raises(RuntimeError, match=NOT_SUPPORTED_BEFORE_START): test_microvm.api.drive.patch(drive_id=drive_id, path_on_host="foo.bar") # Patching net before boot is not allowed. with pytest.raises(RuntimeError, match=NOT_SUPPORTED_BEFORE_START): test_microvm.api.network.patch(iface_id=iface_id) def test_negative_api_patch_post_boot(uvm_plain, io_engine): """ Test PATCH updates that are not allowed after the microvm boots. """ test_microvm = uvm_plain test_microvm.spawn() # Sets up the microVM with 2 vCPUs, 256 MiB of RAM, 1 network iface and # a root file system with the rw permission. test_microvm.basic_config() fs1 = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "scratch")) test_microvm.api.drive.put( drive_id="scratch", path_on_host=test_microvm.create_jailed_resource(fs1.path), is_root_device=False, is_read_only=False, io_engine=io_engine, ) iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id tap1 = net_tools.Tap(tapname, test_microvm.netns) test_microvm.api.network.put( iface_id=iface_id, host_dev_name=tap1.name, guest_mac="06:00:00:00:00:01" ) test_microvm.start() # Partial updates to the boot source are not allowed. with pytest.raises(RuntimeError, match="Invalid request method"): test_microvm.api.boot.patch(kernel_image_path="otherfile") # Partial updates to the machine configuration are not allowed after boot. with pytest.raises(RuntimeError, match=NOT_SUPPORTED_AFTER_START): test_microvm.api.machine_config.patch(vcpu_count=4) # Partial updates to the logger configuration are not allowed. with pytest.raises(RuntimeError, match="Invalid request method"): test_microvm.api.logger.patch(level="Error") def test_drive_patch(uvm_plain, io_engine): """ Extensively test drive PATCH scenarios before and after boot. """ test_microvm = uvm_plain test_microvm.spawn() # Sets up the microVM with 2 vCPUs, 256 MiB of RAM and # a root file system with the rw permission. test_microvm.basic_config(rootfs_io_engine="Sync") fs = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "scratch")) test_microvm.add_drive( drive_id="scratch", path_on_host=fs.path, is_root_device=False, is_read_only=False, io_engine=io_engine, ) fs_vub = drive_tools.FilesystemFile( os.path.join(test_microvm.fsfiles, "scratch_vub") ) test_microvm.add_vhost_user_drive("scratch_vub", fs_vub.path) # Patching drive before boot is not allowed. with pytest.raises(RuntimeError, match=NOT_SUPPORTED_BEFORE_START): test_microvm.api.drive.patch(drive_id="scratch", path_on_host="foo.bar") test_microvm.start() _drive_patch(test_microvm, io_engine) @pytest.mark.skipif( platform.machine() != "x86_64", reason="not yet implemented on aarch64" ) def test_send_ctrl_alt_del(uvm_plain_any): """ Test shutting down the microVM gracefully on x86, by sending CTRL+ALT+DEL. """ # This relies on the i8042 device and AT Keyboard support being present in # the guest kernel. test_microvm = uvm_plain_any test_microvm.spawn() test_microvm.basic_config() test_microvm.add_net_iface() test_microvm.start() test_microvm.api.actions.put(action_type="SendCtrlAltDel") # If everything goes as expected, the guest OS will issue a reboot, # causing Firecracker to exit. test_microvm.mark_killed() def _drive_patch(test_microvm, io_engine): """Exercise drive patch test scenarios.""" # Patches without mandatory fields for virtio block are not allowed. expected_msg = "Running method expected different backend." with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.drive.patch(drive_id="scratch") # Patches with any fields for vhost-user block are not allowed. with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.drive.patch( drive_id="scratch_vub", path_on_host="some_path", ) # Patches with any fields for vhost-user block are not allowed. with pytest.raises(RuntimeError, match=expected_msg): test_microvm.api.drive.patch( drive_id="scratch_vub", rate_limiter={ "bandwidth": {"size": 1000000, "refill_time": 100}, "ops": {"size": 1, "refill_time": 100}, }, ) drive_path = "foo.bar" # Cannot patch drive permissions post boot. with pytest.raises(RuntimeError, match="unknown field `is_read_only`"): test_microvm.api.drive.patch( drive_id="scratch", path_on_host=drive_path, is_read_only=True ) # Cannot patch io_engine post boot. with pytest.raises(RuntimeError, match="unknown field `io_engine`"): test_microvm.api.drive.patch( drive_id="scratch", path_on_host=drive_path, io_engine="Sync" ) # Updates to `is_root_device` with a valid value are not allowed. with pytest.raises(RuntimeError, match="unknown field `is_root_device`"): test_microvm.api.drive.patch( drive_id="scratch", path_on_host=drive_path, is_root_device=False ) # Updates to `path_on_host` with an invalid path are not allowed. expected_msg = f"Error manipulating the backing file: No such file or directory (os error 2) {drive_path}" with pytest.raises(RuntimeError, match=re.escape(expected_msg)): test_microvm.api.drive.patch(drive_id="scratch", path_on_host=drive_path) fs = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "scratch_new")) # Updates to `path_on_host` with a valid path are allowed. test_microvm.api.drive.patch( drive_id="scratch", path_on_host=test_microvm.create_jailed_resource(fs.path) ) # Updates to valid `path_on_host` and `rate_limiter` are allowed. test_microvm.api.drive.patch( drive_id="scratch", path_on_host=test_microvm.create_jailed_resource(fs.path), rate_limiter={ "bandwidth": {"size": 1000000, "refill_time": 100}, "ops": {"size": 1, "refill_time": 100}, }, ) # Updates to `rate_limiter` only are allowed. test_microvm.api.drive.patch( drive_id="scratch", rate_limiter={ "bandwidth": {"size": 5000, "refill_time": 100}, "ops": {"size": 500, "refill_time": 100}, }, ) # Updates to `rate_limiter` and invalid path fail. with pytest.raises(RuntimeError, match="No such file or directory"): test_microvm.api.drive.patch( drive_id="scratch", path_on_host="foo.bar", rate_limiter={ "bandwidth": {"size": 5000, "refill_time": 100}, "ops": {"size": 500, "refill_time": 100}, }, ) # Validate full vm configuration after patching drives. response = test_microvm.api.vm_config.get().json() expected_drives = [ { "drive_id": "rootfs", "partuuid": None, "is_root_device": True, "cache_type": "Unsafe", "is_read_only": True, "path_on_host": "/" + test_microvm.rootfs_file.name, "rate_limiter": None, "io_engine": "Sync", "socket": None, }, { "drive_id": "scratch", "partuuid": None, "is_root_device": False, "cache_type": "Unsafe", "is_read_only": False, "path_on_host": "/scratch_new.ext4", "rate_limiter": { "bandwidth": {"size": 5000, "one_time_burst": None, "refill_time": 100}, "ops": {"size": 500, "one_time_burst": None, "refill_time": 100}, }, "io_engine": io_engine, "socket": None, }, { "drive_id": "scratch_vub", "partuuid": None, "is_root_device": False, "cache_type": "Unsafe", "is_read_only": None, "path_on_host": None, "rate_limiter": None, "io_engine": None, "socket": str( Path("/") / test_microvm.disks_vhost_user["scratch_vub"].socket_path.name ), }, ] assert sorted(response["drives"], key=lambda d: d["drive_id"]) == sorted( expected_drives, key=lambda d: d["drive_id"] ) def test_api_version(uvm_plain): """ Test the permanent VM version endpoint. """ test_microvm = uvm_plain test_microvm.spawn() test_microvm.basic_config() # Getting the VM version should be available pre-boot. preboot_response = test_microvm.api.version.get() # Check that the response contains the version. assert "firecracker_version" in preboot_response.json() # Start the microvm. test_microvm.start() # Getting the VM version should be available post-boot. postboot_response = test_microvm.api.version.get() # Check that the response contains the version. assert "firecracker_version" in postboot_response.json() # Validate VM version post-boot is the same as pre-boot. assert preboot_response.json() == postboot_response.json() cargo_version = get_firecracker_version_from_toml() api_version = semver.Version.parse(preboot_response.json()["firecracker_version"]) # Cargo version should match FC API version assert cargo_version == api_version binary_version = semver.Version.parse(test_microvm.firecracker_version) assert api_version == binary_version def test_api_vsock(uvm_nano): """ Test vsock related API commands. """ vm = uvm_nano # Create a vsock device. vm.api.vsock.put(guest_cid=15, uds_path="vsock.sock") # Updating an existing vsock is currently fine. vm.api.vsock.put(guest_cid=166, uds_path="vsock.sock") # Check PUT request. Although vsock_id is deprecated, it must still work. response = vm.api.vsock.put(vsock_id="vsock1", guest_cid=15, uds_path="vsock.sock") assert response.headers["deprecation"] # Updating an existing vsock is currently fine even with deprecated # `vsock_id`. response = vm.api.vsock.put(vsock_id="vsock1", guest_cid=166, uds_path="vsock.sock") assert response.headers["deprecation"] # No other vsock action is allowed after booting the VM. vm.start() # Updating an existing vsock should not be fine at this point. with pytest.raises(RuntimeError): vm.api.vsock.put(guest_cid=17, uds_path="vsock.sock") def test_api_entropy(uvm_plain): """ Test entropy related API commands. """ test_microvm = uvm_plain test_microvm.spawn() test_microvm.basic_config() # Create a new entropy device should be OK. test_microvm.api.entropy.put() # Overwriting an existing should be OK. test_microvm.api.entropy.put() # Start the microvm test_microvm.start() with pytest.raises(RuntimeError): test_microvm.api.entropy.put() def test_api_memory_hotplug(uvm_plain_6_1): """ Test hotplug related API commands. """ test_microvm = uvm_plain_6_1 test_microvm.spawn() test_microvm.basic_config() test_microvm.add_net_iface() # Adding hotplug memory region should be OK. test_microvm.api.memory_hotplug.put( total_size_mib=1024, block_size_mib=128, slot_size_mib=1024 ) # Overwriting an existing should be OK. # Omitting optional values should be ok test_microvm.api.memory_hotplug.put(total_size_mib=1024) # Get API should be rejected before boot with pytest.raises(AssertionError): test_microvm.api.memory_hotplug.get() # Patch API should be rejected before boot with pytest.raises(RuntimeError, match=NOT_SUPPORTED_BEFORE_START): test_microvm.api.memory_hotplug.patch(requested_size_mib=512) # Start the microvm test_microvm.start() # Put API should be rejected after boot with pytest.raises(RuntimeError, match=NOT_SUPPORTED_AFTER_START): test_microvm.api.memory_hotplug.put(total_size_mib=1024) # Get API should work after boot status = test_microvm.api.memory_hotplug.get().json() assert status["total_size_mib"] == 1024 # Patch API should work after boot test_microvm.api.memory_hotplug.patch(requested_size_mib=512) status = test_microvm.api.memory_hotplug.get().json() assert status["requested_size_mib"] == 512 def test_api_balloon(uvm_nano): """ Test balloon related API commands. """ test_microvm = uvm_nano # Updating an inexistent balloon device should give an error. with pytest.raises(RuntimeError): test_microvm.api.balloon.patch(amount_mib=0) # Adding a memory balloon should be OK. test_microvm.api.balloon.put(amount_mib=1, deflate_on_oom=True) # As is overwriting one. test_microvm.api.balloon.put( amount_mib=0, deflate_on_oom=False, stats_polling_interval_s=5 ) # Getting the device configuration should be available pre-boot. response = test_microvm.api.balloon.get() assert response.json()["amount_mib"] == 0 assert response.json()["deflate_on_oom"] is False assert response.json()["stats_polling_interval_s"] == 5 # Updating an existing balloon device is forbidden before boot. with pytest.raises(RuntimeError): test_microvm.api.balloon.patch(amount_mib=2) # We can't have a balloon device with a target size greater than # the available amount of memory. with pytest.raises(RuntimeError): test_microvm.api.balloon.put( amount_mib=1024, deflate_on_oom=False, stats_polling_interval_s=5 ) # Start the microvm. test_microvm.add_net_iface() test_microvm.start() # But updating should be OK. test_microvm.api.balloon.patch(amount_mib=4) # Check we can't request more than the total amount of VM memory. with pytest.raises(RuntimeError): test_microvm.api.balloon.patch(amount_mib=300) # Check we can't disable statistics as they were enabled at boot. # We can, however, change the interval to a non-zero value. test_microvm.api.balloon_stats.patch(stats_polling_interval_s=5) # Getting the device configuration should be available post-boot. response = test_microvm.api.balloon.get() assert response.json()["amount_mib"] == 4 assert response.json()["deflate_on_oom"] is False assert response.json()["stats_polling_interval_s"] == 5 # Check we can't overflow the `num_pages` field in the config space by # requesting too many MB. There are 256 4K pages in a MB. Here, we are # requesting u32::MAX / 128. with pytest.raises(RuntimeError): test_microvm.api.balloon.patch(amount_mib=33554432) def test_pmem_api(uvm_plain_any, rootfs): """ Test virtio-pmem API commands """ vm = uvm_plain_any vm.spawn() vm.basic_config(add_root_device=False) invalid_pmem_path_on_host = os.path.join(vm.fsfiles, "invalid_scratch") utils.check_output(f"touch {invalid_pmem_path_on_host}") invalid_pmem_file_path = vm.create_jailed_resource(str(invalid_pmem_path_on_host)) pmem_size_mb = 2 pmem_path_on_host = drive_tools.FilesystemFile( os.path.join(vm.fsfiles, "scratch"), size=pmem_size_mb ) pmem_file_path = vm.create_jailed_resource(pmem_path_on_host.path) # Try to add pmem without setting `path_on_host` expected_msg = re.escape( "An error occurred when deserializing the json body of a request: missing field `path_on_host`" ) with pytest.raises(RuntimeError, match=expected_msg): vm.api.pmem.put(id="pmem") # Try to add pmem with 0 sized backing file expected_msg = re.escape("Error backing file size is 0") with pytest.raises(RuntimeError, match=expected_msg): vm.api.pmem.put(id="pmem", path_on_host=invalid_pmem_file_path) # Try to add pmem as root while block is set as root vm.api.drive.put(drive_id="drive", path_on_host=pmem_file_path, is_root_device=True) expected_msg = re.escape( "Attempt to add pmem as a root device while the root device defined as a block device" ) with pytest.raises(RuntimeError, match=expected_msg): vm.api.pmem.put(id="pmem", path_on_host=pmem_file_path, root_device=True) # Reset block from being root vm.api.drive.put( drive_id="drive", path_on_host=pmem_file_path, is_root_device=False ) # Try to add pmem as root twice vm.api.pmem.put(id="pmem", path_on_host=pmem_file_path, root_device=True) expected_msg = re.escape("A root pmem device already exist") with pytest.raises(RuntimeError, match=expected_msg): vm.api.pmem.put(id="pmem2", path_on_host=pmem_file_path, root_device=True) # Reset pmem from being root vm.api.pmem.put(id="pmem", path_on_host=pmem_file_path, root_device=False) # Add a rootfs to boot a vm vm.add_pmem("rootfs", rootfs, True, True) # No post boot API calls to pmem with pytest.raises(RuntimeError): vm.api.pmem.put(id="pmem") def test_get_full_config_after_restoring_snapshot(microvm_factory, uvm_nano): """ Test the configuration of a microVM after restoring from a snapshot. """ net_iface = uvm_nano.add_net_iface() cpu_vendor = utils_cpuid.get_cpu_vendor() setup_cfg = {} # Basic config also implies a root block device. setup_cfg["machine-config"] = { "vcpu_count": 2, "mem_size_mib": 256, "smt": True, "track_dirty_pages": False, "huge_pages": "None", } if cpu_vendor == utils_cpuid.CpuVendor.ARM: setup_cfg["machine-config"]["smt"] = False if len(SUPPORTED_CPU_TEMPLATES) != 0: setup_cfg["machine-config"]["cpu_template"] = SUPPORTED_CPU_TEMPLATES[0] uvm_nano.api.machine_config.patch(**setup_cfg["machine-config"]) setup_cfg["cpu-config"] = None setup_cfg["drives"] = [ { "drive_id": "rootfs", "partuuid": None, "is_root_device": True, "cache_type": "Unsafe", "is_read_only": True, "path_on_host": f"/{uvm_nano.rootfs_file.name}", "rate_limiter": None, "io_engine": "Sync", "socket": None, } ] uvm_nano.api.pmem.put( id="pmem", path_on_host="/" + uvm_nano.rootfs_file.name, root_device=False, read_only=False, ) setup_cfg["pmem"] = [ { "id": "pmem", "path_on_host": "/" + uvm_nano.rootfs_file.name, "root_device": False, "read_only": False, } ] # Add a memory balloon device. uvm_nano.api.balloon.put(amount_mib=1, deflate_on_oom=True) setup_cfg["balloon"] = { "amount_mib": 1, "deflate_on_oom": True, "stats_polling_interval_s": 0, "free_page_reporting": False, "free_page_hinting": False, } # Add a vsock device. uvm_nano.api.vsock.put(guest_cid=15, uds_path="vsock.sock") setup_cfg["vsock"] = {"guest_cid": 15, "uds_path": "vsock.sock"} setup_cfg["memory-hotplug"] = { "total_size_mib": 1024, "block_size_mib": 128, "slot_size_mib": 1024, } uvm_nano.api.memory_hotplug.put(**setup_cfg["memory-hotplug"]) setup_cfg["logger"] = None setup_cfg["metrics"] = None setup_cfg["mmds-config"] = { "version": "V1", "network_interfaces": [net_iface.dev_name], } uvm_nano.api.mmds_config.put(**setup_cfg["mmds-config"]) # Start the microvm. uvm_nano.start() # Add a tx rate limiter to the net device. tx_rl = { "bandwidth": {"size": 1000000, "refill_time": 100, "one_time_burst": None}, "ops": None, } response = uvm_nano.api.network.patch( iface_id=net_iface.dev_name, tx_rate_limiter=tx_rl ) setup_cfg["network-interfaces"] = [ { "guest_mac": net_tools.mac_from_ip(net_iface.guest_ip), "iface_id": net_iface.dev_name, "host_dev_name": net_iface.tap_name, "rx_rate_limiter": None, "tx_rate_limiter": tx_rl, } ] snapshot = uvm_nano.snapshot_full() uvm2 = microvm_factory.build_from_snapshot(snapshot) expected_cfg = setup_cfg.copy() # We expect boot-source to be set with the following values expected_cfg["boot-source"] = { "kernel_image_path": uvm_nano.get_jailed_resource(uvm_nano.kernel_file), "initrd_path": None, "boot_args": "reboot=k panic=1 nomodule swiotlb=noforce console=ttyS0", } if not uvm_nano.pci_enabled: expected_cfg["boot-source"]["boot_args"] += " pci=off" # no ipv4_address or imds_compat specified during PUT /mmds/config so we expect the default expected_cfg["mmds-config"] = { "version": "V1", "ipv4_address": "169.254.169.254", "network_interfaces": [net_iface.dev_name], "imds_compat": False, } # We should expect a null entropy device expected_cfg["entropy"] = None # Validate full vm configuration post-restore. response = uvm2.api.vm_config.get().json() assert response != setup_cfg assert response == expected_cfg def test_get_full_config(uvm_plain): """ Test the reported configuration of a microVM configured with all resources. """ test_microvm = uvm_plain expected_cfg = {} test_microvm.spawn() # Basic config also implies a root block device. test_microvm.basic_config(boot_args="", rootfs_io_engine="Sync") expected_cfg["machine-config"] = { "vcpu_count": 2, "mem_size_mib": 256, "smt": False, "track_dirty_pages": False, "huge_pages": "None", } expected_cfg["cpu-config"] = None expected_cfg["boot-source"] = { "boot_args": "", "kernel_image_path": f"/{test_microvm.kernel_file.name}", "initrd_path": None, } expected_cfg["drives"] = [ { "drive_id": "rootfs", "partuuid": None, "is_root_device": True, "cache_type": "Unsafe", "is_read_only": True, "path_on_host": "/" + test_microvm.rootfs_file.name, "rate_limiter": None, "io_engine": "Sync", "socket": None, } ] test_microvm.api.pmem.put( id="pmem", path_on_host="/" + test_microvm.rootfs_file.name, root_device=False, read_only=False, ) expected_cfg["pmem"] = [ { "id": "pmem", "path_on_host": "/" + test_microvm.rootfs_file.name, "root_device": False, "read_only": False, } ] # Add a memory balloon device. test_microvm.api.balloon.put(amount_mib=1, deflate_on_oom=True) expected_cfg["balloon"] = { "amount_mib": 1, "deflate_on_oom": True, "stats_polling_interval_s": 0, "free_page_reporting": False, "free_page_hinting": False, } # Add a vsock device. response = test_microvm.api.vsock.put(guest_cid=15, uds_path="vsock.sock") expected_cfg["vsock"] = {"guest_cid": 15, "uds_path": "vsock.sock"} # Add hot-pluggable memory. expected_cfg["memory-hotplug"] = { "total_size_mib": 1024, "block_size_mib": 128, "slot_size_mib": 1024, } test_microvm.api.memory_hotplug.put(**expected_cfg["memory-hotplug"]) # Add a net device. iface_id = "1" tapname = test_microvm.id[:8] + "tap" + iface_id tap1 = net_tools.Tap(tapname, test_microvm.netns) guest_mac = "06:00:00:00:00:01" tx_rl = { "bandwidth": {"size": 1000000, "refill_time": 100, "one_time_burst": None}, "ops": None, } response = test_microvm.api.network.put( iface_id=iface_id, guest_mac=guest_mac, host_dev_name=tap1.name, tx_rate_limiter=tx_rl, ) expected_cfg["network-interfaces"] = [ { "iface_id": iface_id, "host_dev_name": tap1.name, "guest_mac": "06:00:00:00:00:01", "rx_rate_limiter": None, "tx_rate_limiter": tx_rl, } ] # Update MMDS config. mmds_config = { "version": "V2", "ipv4_address": "169.254.169.250", "network_interfaces": ["1"], "imds_compat": True, } response = test_microvm.api.mmds_config.put(**mmds_config) expected_cfg["logger"] = None expected_cfg["metrics"] = None expected_cfg["mmds-config"] = { "version": "V2", "ipv4_address": "169.254.169.250", "network_interfaces": ["1"], "imds_compat": True, } # We should expect a null entropy device expected_cfg["entropy"] = None # Getting full vm configuration should be available pre-boot. response = test_microvm.api.vm_config.get() assert response.json() == expected_cfg # Start the microvm. test_microvm.start() # Validate full vm configuration post-boot as well. response = test_microvm.api.vm_config.get() assert response.json() == expected_cfg def test_map_private_seccomp_regression(uvm_plain): """ Seccomp mmap MAP_PRIVATE regression test. When sending large buffer to an api endpoint there will be an attempt to call mmap with MAP_PRIVATE|MAP_ANONYMOUS. This would result in vmm being killed by the seccomp filter before this PR. """ test_microvm = uvm_plain test_microvm.jailer.extra_args.update( {"http-api-max-payload-size": str(1024 * 1024 * 2)} ) test_microvm.spawn() test_microvm.time_api_request = False response = test_microvm.api.mmds.get() assert response.json() == {} data_store = {"latest": {"meta-data": {"ami-id": "b" * (1024 * 1024)}}} test_microvm.api.mmds.put(**data_store) # pylint: disable=protected-access def test_negative_snapshot_load_api(microvm_factory): """ Test snapshot load API. """ vm = microvm_factory.build() vm.spawn() # Specifying both `mem_backend` and 'mem_file_path` should fail. err_msg = ( "too many fields: either `mem_backend` or " "`mem_file_path` exclusively is required." ) with pytest.raises(RuntimeError, match=err_msg): vm.api.snapshot_load.put( snapshot_path="foo", mem_backend={"backend_type": "File", "backend_path": "bar"}, mem_file_path="bar", ) # API request with `mem_backend` but no `backend_type` should fail. with pytest.raises(RuntimeError, match="missing field `backend_type`"): vm.api.snapshot_load.put( snapshot_path="foo", mem_backend={"backend_path": "bar"}, ) # API request with `mem_backend` but no `backend_path` should fail. with pytest.raises(RuntimeError, match="missing field `backend_path`"): vm.api.snapshot_load.put( snapshot_path="foo", mem_backend={"backend_type": "File"}, ) # API request with invalid `backend_type` should fail. with pytest.raises( RuntimeError, match="unknown variant `foo`, expected `File` or `Uffd`" ): vm.api.snapshot_load.put( snapshot_path="foo", mem_backend={"backend_type": "foo", "backend_path": "bar"}, ) # API request without `snapshot_path` should fail. with pytest.raises(RuntimeError, match="missing field `snapshot_path`"): vm.api.snapshot_load.put( mem_backend={"backend_type": "File", "backend_path": "bar"}, ) # API request without `mem_backend` or `mem_file_path` should fail. err_msg = "missing field: either `mem_backend` or " "`mem_file_path` is required" with pytest.raises(RuntimeError, match=err_msg): vm.api.snapshot_load.put(snapshot_path="foo") # Deprecated API should return deprecation response header. with pytest.raises(RuntimeError) as exc_info: vm.api.snapshot_load.put( snapshot_path="foo", mem_file_path="bar", ) assert exc_info.value.args[2].headers["deprecation"] # The snapshot/memory files above don't exist, but the request is otherwise syntactically valid. # In this case, Firecracker exits. vm.mark_killed() ================================================ FILE: tests/integration_tests/functional/test_api_server.py ================================================ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests scenario exercising api server functionality.""" import socket from framework.utils import check_output def test_api_socket_in_use(uvm_plain): """ Test error message when api socket is already in use. This is a very frequent scenario when Firecracker cannot start due to the socket being left open from previous runs. Check that the error message is a fixed one and that it also contains the name of the path. """ microvm = uvm_plain cmd = "mkdir {}/run".format(microvm.chroot()) check_output(cmd) sock = socket.socket(socket.AF_UNIX) sock.bind(microvm.jailer.api_socket_path()) microvm.spawn(log_level="warn", serial_out_path=None) msg = "Failed to open the API socket at: /run/firecracker.socket. Check that it is not already used." microvm.check_log_message(msg) microvm.mark_killed() ================================================ FILE: tests/integration_tests/functional/test_balloon.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for guest-side operations on /balloon resources.""" import logging import signal import time from subprocess import TimeoutExpired import pytest import requests from framework.guest_stats import MeminfoGuest from framework.utils import get_stable_rss_mem STATS_POLLING_INTERVAL_S = 1 def check_guest_dmesg_for_stalls(ssh_connection): """Check guest dmesg for RCU stalls and soft lockups.""" _, stdout, _ = ssh_connection.check_output("dmesg") assert "rcu_sched self-detected stall on CPU" not in stdout assert "rcu_preempt detected stalls on CPUs/tasks" not in stdout assert "BUG: soft lockup -" not in stdout def lower_ssh_oom_chance(ssh_connection): """Lure OOM away from ssh process""" logger = logging.getLogger("lower_ssh_oom_chance") cmd = "cat /run/sshd.pid" exit_code, stdout, stderr = ssh_connection.run(cmd) # add something to the logs for troubleshooting if exit_code != 0: logger.error("while running: %s", cmd) logger.error("stdout: %s", stdout) logger.error("stderr: %s", stderr) for pid in stdout.split(" "): cmd = f"choom -n -1000 -p {pid}" exit_code, stdout, stderr = ssh_connection.run(cmd) if exit_code != 0: logger.error("while running: %s", cmd) logger.error("stdout: %s", stdout) logger.error("stderr: %s", stderr) def make_guest_dirty_memory(ssh_connection, amount_mib=32): """Tell the guest, over ssh, to dirty `amount` pages of memory.""" lower_ssh_oom_chance(ssh_connection) try: _ = ssh_connection.run(f"/usr/local/bin/fillmem {amount_mib}", timeout=1.0) except TimeoutExpired: # It's ok if this expires. Sometimes the SSH connection # gets killed by the OOM killer *after* the fillmem program # started. As a result, we can ignore timeouts here. pass time.sleep(5) def _test_rss_memory_lower(test_microvm): """Check inflating the balloon makes guest use less rss memory.""" # Get the firecracker pid, and open an ssh connection. ssh_connection = test_microvm.ssh # Using deflate_on_oom, get the RSS as low as possible test_microvm.api.balloon.patch(amount_mib=200) # Get initial rss consumption. init_rss = get_stable_rss_mem(test_microvm) # Get the balloon back to 0. test_microvm.api.balloon.patch(amount_mib=0) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) # Dirty memory, then inflate balloon and get ballooned rss consumption. make_guest_dirty_memory(ssh_connection, amount_mib=32) test_microvm.api.balloon.patch(amount_mib=200) balloon_rss = get_stable_rss_mem(test_microvm) # Check that the ballooning reclaimed the memory. assert balloon_rss - init_rss <= 15000 # Deflate the balloon and check we didn't see any stall messages test_microvm.api.balloon.patch(amount_mib=0) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) check_guest_dmesg_for_stalls(ssh_connection) # pylint: disable=C0103 def test_rss_memory_lower(uvm_plain_any): """ Test that inflating the balloon makes guest use less rss memory. """ test_microvm = uvm_plain_any test_microvm.spawn() test_microvm.basic_config() test_microvm.add_net_iface() # Add a memory balloon. test_microvm.api.balloon.put( amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0 ) # Start the microvm. test_microvm.start() _test_rss_memory_lower(test_microvm) # pylint: disable=C0103 def test_inflate_reduces_free(uvm_plain_any): """ Check that the output of free in guest changes with inflate. """ test_microvm = uvm_plain_any test_microvm.spawn() test_microvm.basic_config() test_microvm.add_net_iface() # Install deflated balloon. test_microvm.api.balloon.put( amount_mib=0, deflate_on_oom=False, stats_polling_interval_s=1 ) # Start the microvm test_microvm.start() meminfo = MeminfoGuest(test_microvm) # Get the free memory before ballooning. available_mem_deflated = meminfo.get().mem_free.kib() # Inflate 64 MB == 16384 page balloon. test_microvm.api.balloon.patch(amount_mib=64) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) # Get the free memory after ballooning. available_mem_inflated = meminfo.get().mem_free.kib() # Assert that ballooning reclaimed about 64 MB of memory. assert available_mem_inflated <= available_mem_deflated - 85 * 64000 / 100 check_guest_dmesg_for_stalls(test_microvm.ssh) # pylint: disable=C0103 @pytest.mark.parametrize("deflate_on_oom", [True, False]) def test_deflate_on_oom(uvm_plain_any, deflate_on_oom): """ Verify that setting the `deflate_on_oom` option works correctly. https://github.com/firecracker-microvm/firecracker/blob/main/docs/ballooning.md deflate_on_oom=True should result in balloon_stats['actual_mib'] be reduced deflate_on_oom=False should result in balloon_stats['actual_mib'] remain the same """ test_microvm = uvm_plain_any test_microvm.spawn() test_microvm.basic_config() test_microvm.add_net_iface() # Add a deflated memory balloon. test_microvm.api.balloon.put( amount_mib=0, deflate_on_oom=deflate_on_oom, stats_polling_interval_s=1 ) # Start the microvm. test_microvm.start() # We get an initial reading of the RSS, then calculate the amount # we need to inflate the balloon with by subtracting it from the # VM size and adding an offset of 50 MiB in order to make sure we # get a lower reading than the initial one. initial_rss = get_stable_rss_mem(test_microvm) inflate_size = 256 - (int(initial_rss / 1024) + 50) # Inflate the balloon test_microvm.api.balloon.patch(amount_mib=inflate_size) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) # Check that using memory leads to the balloon device automatically # deflate (or not). balloon_size_before = test_microvm.api.balloon_stats.get().json()["actual_mib"] make_guest_dirty_memory(test_microvm.ssh, 128) try: balloon_size_after = test_microvm.api.balloon_stats.get().json()["actual_mib"] except requests.exceptions.ConnectionError: assert ( not deflate_on_oom ), "Guest died even though it should have deflated balloon to alleviate memory pressure" test_microvm.mark_killed() else: print(f"size before: {balloon_size_before} size after: {balloon_size_after}") if deflate_on_oom: assert balloon_size_after < balloon_size_before, "Balloon did not deflate" else: assert balloon_size_after >= balloon_size_before, "Balloon deflated" # Kill it here, letting the infrastructure know that the process might # be dead already. test_microvm.kill(might_be_dead=True) # pylint: disable=C0103 def test_reinflate_balloon(uvm_plain_any): """ Verify that repeatedly inflating and deflating the balloon works. """ test_microvm = uvm_plain_any test_microvm.spawn() test_microvm.basic_config() test_microvm.add_net_iface() # Add a deflated memory balloon. test_microvm.api.balloon.put( amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0 ) # Start the microvm. test_microvm.start() # First inflate the balloon to free up the uncertain amount of memory # used by the kernel at boot and establish a baseline, then give back # the memory. test_microvm.api.balloon.patch(amount_mib=200) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) test_microvm.api.balloon.patch(amount_mib=0) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) # Get the guest to dirty memory. make_guest_dirty_memory(test_microvm.ssh, amount_mib=32) first_reading = get_stable_rss_mem(test_microvm) # Now inflate the balloon. test_microvm.api.balloon.patch(amount_mib=200) second_reading = get_stable_rss_mem(test_microvm) # Now deflate the balloon. test_microvm.api.balloon.patch(amount_mib=0) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) # Now have the guest dirty memory again. make_guest_dirty_memory(test_microvm.ssh, amount_mib=32) third_reading = get_stable_rss_mem(test_microvm) # Now inflate the balloon again. test_microvm.api.balloon.patch(amount_mib=200) fourth_reading = get_stable_rss_mem(test_microvm) # Check that the memory used is the same after regardless of the previous # inflate history of the balloon (with the third reading being allowed # to be smaller than the first, since memory allocated at booting up # is probably freed after the first inflation. assert (third_reading - first_reading) <= 20000 assert abs(second_reading - fourth_reading) <= 20000 # Deflate the balloon and check we didn't see any stall messages test_microvm.api.balloon.patch(amount_mib=0) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) check_guest_dmesg_for_stalls(test_microvm.ssh) # pylint: disable=C0103 def test_stats(uvm_plain_any): """ Verify that balloon stats work as expected. """ test_microvm = uvm_plain_any test_microvm.spawn() test_microvm.basic_config() test_microvm.add_net_iface() # Add a memory balloon with stats enabled. test_microvm.api.balloon.put( amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=STATS_POLLING_INTERVAL_S, ) # Start the microvm. test_microvm.start() # Give Firecracker enough time to poll the stats at least once post-boot time.sleep(STATS_POLLING_INTERVAL_S * 2) # Get an initial reading of the stats. initial_stats = test_microvm.api.balloon_stats.get().json() # Major faults happen when a page fault has to be satisfied from disk. They are not # triggered by our `make_guest_dirty_memory` workload, as it uses MAP_ANONYMOUS, which # only triggers minor faults. However, during the boot process, things are read from the # rootfs, so we should at least see a non-zero number of major faults. assert initial_stats["major_faults"] > 0 # Dirty 10MB of pages. make_guest_dirty_memory(test_microvm.ssh, amount_mib=10) time.sleep(1) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) # Make sure that the stats catch the page faults. after_workload_stats = test_microvm.api.balloon_stats.get().json() assert initial_stats.get("minor_faults", 0) < after_workload_stats["minor_faults"] # Now inflate the balloon with 10MB of pages. test_microvm.api.balloon.patch(amount_mib=10) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) # Get another reading of the stats after the polling interval has passed. inflated_stats = test_microvm.api.balloon_stats.get().json() # Ensure the stats reflect inflating the balloon. assert after_workload_stats["free_memory"] > inflated_stats["free_memory"] assert after_workload_stats["available_memory"] > inflated_stats["available_memory"] # Deflate the balloon.check that the stats show the increase in # available memory. test_microvm.api.balloon.patch(amount_mib=0) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) # Get another reading of the stats after the polling interval has passed. deflated_stats = test_microvm.api.balloon_stats.get().json() # Ensure that stats don't have unknown balloon stats fields assert "balloon: unknown stats update tag:" not in test_microvm.log_data # Ensure the stats reflect deflating the balloon. assert inflated_stats["free_memory"] < deflated_stats["free_memory"] assert inflated_stats["available_memory"] < deflated_stats["available_memory"] check_guest_dmesg_for_stalls(test_microvm.ssh) def test_stats_update(uvm_plain_any): """ Verify that balloon stats update correctly. """ test_microvm = uvm_plain_any test_microvm.spawn() test_microvm.basic_config() test_microvm.add_net_iface() # Add a memory balloon with stats enabled. test_microvm.api.balloon.put( amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=STATS_POLLING_INTERVAL_S, ) # Start the microvm. test_microvm.start() # Dirty 30MB of pages. make_guest_dirty_memory(test_microvm.ssh, amount_mib=30) # This call will internally wait for rss to become stable. _ = get_stable_rss_mem(test_microvm) # Get an initial reading of the stats. initial_stats = test_microvm.api.balloon_stats.get().json() # Inflate the balloon to trigger a change in the stats. test_microvm.api.balloon.patch(amount_mib=10) # Wait out the polling interval, then get the updated stats. time.sleep(STATS_POLLING_INTERVAL_S * 2) next_stats = test_microvm.api.balloon_stats.get().json() assert initial_stats["available_memory"] != next_stats["available_memory"] # Inflate the balloon more to trigger a change in the stats. test_microvm.api.balloon.patch(amount_mib=30) time.sleep(1) # Change the polling interval. test_microvm.api.balloon_stats.patch(stats_polling_interval_s=60) # The polling interval change should update the stats. final_stats = test_microvm.api.balloon_stats.get().json() assert next_stats["available_memory"] != final_stats["available_memory"] # Ensure that stats don't have unknown balloon stats fields assert "balloon: unknown stats update tag:" not in test_microvm.log_data check_guest_dmesg_for_stalls(test_microvm.ssh) def test_balloon_snapshot(uvm_plain_any, microvm_factory): """ Test that the balloon works after pause/resume. """ vm = uvm_plain_any vm.spawn() # Free page reporting and hinting fragment guest memory VMAs # making it harder to identify them in the memory monitor. vm.memory_monitor = None vm.basic_config( vcpu_count=2, mem_size_mib=256, ) vm.add_net_iface() # Add a memory balloon with stats enabled. vm.api.balloon.put( amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=STATS_POLLING_INTERVAL_S, ) vm.start() # Dirty 60MB of pages. make_guest_dirty_memory(vm.ssh, amount_mib=60) time.sleep(1) # Check memory usage. first_reading = get_stable_rss_mem(vm) # Now inflate the balloon with 20MB of pages. vm.api.balloon.patch(amount_mib=20) # Check memory usage again. second_reading = get_stable_rss_mem(vm) # There should be a reduction in RSS, but it's inconsistent. # We only test that the reduction happens. assert first_reading > second_reading snapshot = vm.snapshot_full() microvm = microvm_factory.build_from_snapshot(snapshot) # Free page reporting and hinting fragment guest memory VMAs # making it harder to identify them in the memory monitor. microvm.memory_monitor = None # Wait out the polling interval, then get the updated stats. time.sleep(STATS_POLLING_INTERVAL_S * 2) stats_after_snap = microvm.api.balloon_stats.get().json() # Check memory usage. third_reading = get_stable_rss_mem(microvm) # Dirty 60MB of pages. make_guest_dirty_memory(microvm.ssh, amount_mib=60) # Check memory usage. fourth_reading = get_stable_rss_mem(microvm) assert fourth_reading > third_reading # Inflate the balloon with another 20MB of pages. microvm.api.balloon.patch(amount_mib=40) fifth_reading = get_stable_rss_mem(microvm) # There should be a reduction in RSS, but it's inconsistent. # We only test that the reduction happens. assert fourth_reading > fifth_reading # Get the stats after we take a snapshot and dirty some memory, # then reclaim it. # Ensure we gave enough time for the stats to update. time.sleep(STATS_POLLING_INTERVAL_S * 2) latest_stats = microvm.api.balloon_stats.get().json() # Ensure the stats are still working after restore and show # that the balloon inflated. assert stats_after_snap["available_memory"] > latest_stats["available_memory"] check_guest_dmesg_for_stalls(microvm.ssh) @pytest.mark.parametrize("method", ["reporting", "hinting"]) def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method): """ Test that the balloon hinting and reporting works after pause/resume. """ vm = uvm_plain_any vm.spawn() # Free page reporting and hinting fragment guest memory VMAs # making it harder to identify them in the memory monitor. vm.memory_monitor = None vm.basic_config( vcpu_count=2, mem_size_mib=256, ) vm.add_net_iface() free_page_reporting = method == "reporting" free_page_hinting = method == "hinting" # Add a memory balloon with stats enabled. vm.api.balloon.put( amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=STATS_POLLING_INTERVAL_S, free_page_reporting=free_page_reporting, free_page_hinting=free_page_hinting, ) vm.start() vm.ssh.check_output( "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 second_reading snapshot = vm.snapshot_full() microvm = microvm_factory.build_from_snapshot(snapshot) # Free page reporting and hinting fragment guest memory VMAs # making it harder to identify them in the memory monitor. microvm.memory_monitor = None microvm.ssh.check_output( "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 fourth_reading check_guest_dmesg_for_stalls(microvm.ssh) @pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"]) def test_memory_scrub(uvm_plain_any, method): """ Test that the memory is zeroed after deflate. """ microvm = uvm_plain_any microvm.spawn() microvm.basic_config(vcpu_count=2, mem_size_mib=256) microvm.add_net_iface() free_page_reporting = method == "reporting" free_page_hinting = method == "hinting" # Add a memory balloon with stats enabled. microvm.api.balloon.put( amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1, free_page_reporting=free_page_reporting, free_page_hinting=free_page_hinting, ) microvm.start() # Dirty 60MB of pages. make_guest_dirty_memory(microvm.ssh, amount_mib=60) if method == "traditional": # Now inflate the balloon with 60MB of pages. microvm.api.balloon.patch(amount_mib=60) elif method == "hinting": time.sleep(1) microvm.api.balloon_hinting_start.patch() elif method == "reporting": # Reporting can take up to 2 seconds to complete time.sleep(2) # Wait for the inflate to complete. _ = get_stable_rss_mem(microvm) if method == "traditional": # Deflate the balloon completely. microvm.api.balloon.patch(amount_mib=0) # Wait for the deflate to complete. _ = get_stable_rss_mem(microvm) microvm.ssh.check_output("/usr/local/bin/readmem {} {}".format(60, 1)) check_guest_dmesg_for_stalls(microvm.ssh) ================================================ FILE: tests/integration_tests/functional/test_binary.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests to check several aspects of the binaries""" import re import subprocess import pytest from framework import utils @pytest.mark.timeout(500) def test_firecracker_binary_static_linking(microvm_factory): """ Test to make sure the firecracker binary is statically linked. """ fc_binary_path = microvm_factory.fc_binary_path _, stdout, stderr = utils.check_output(f"file {fc_binary_path}") assert "" in stderr # expected "statically linked" for aarch64 and # "static-pie linked" for x86_64 assert "statically linked" in stdout or "static-pie linked" in stdout def test_release_debuginfo(microvm_factory): """Ensure the debuginfo file has the right ELF sections""" fc_binary = microvm_factory.fc_binary_path debuginfo = fc_binary.with_suffix(".debug") stdout = subprocess.check_output( ["readelf", "-S", str(debuginfo)], encoding="ascii", ) matches = { match[0] for match in re.findall(r"\[..] (\.(\w|\.)+)", stdout, re.MULTILINE) } needed_sections = { ".debug_aranges", ".debug_info", ".debug_abbrev", ".debug_line", ".debug_frame", ".debug_str", ".debug_ranges", } missing_sections = needed_sections - matches assert missing_sections == set() def test_release_no_gdb(microvm_factory): """Ensure the gdb feature is not enabled in releases""" fc_binary = microvm_factory.fc_binary_path # We use C++ demangle since there's no Rust support, but it's good enough # for our purposes. stdout = subprocess.check_output( ["readelf", "-W", "--demangle", "-s", str(fc_binary)], encoding="ascii", ) gdb_symbols = [] for line in stdout.splitlines(): parts = line.split(maxsplit=7) if len(parts) == 8: symbol_name = parts[-1] if "gdb" in symbol_name: gdb_symbols.append(symbol_name) assert not gdb_symbols ================================================ FILE: tests/integration_tests/functional/test_binary_size.py ================================================ # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests that check if the release binary sizes fall within expected size. This is not representative of the actual memory overhead of Firecracker. A more representative test is file:../performance/test_memory_overhead.py """ import platform import pytest MACHINE = platform.machine() @pytest.mark.timeout(500) def test_firecracker_binary_size(record_property, metrics, microvm_factory): """ Test if the size of the firecracker binary is within expected ranges. """ fc_binary = microvm_factory.fc_binary_path result = fc_binary.stat().st_size record_property("firecracker_binary_size", f"{result}B") metrics.set_dimensions({"cpu_arch": MACHINE}) metrics.put_metric("firecracker_binary_size", result, unit="Bytes") @pytest.mark.timeout(500) def test_jailer_binary_size(record_property, metrics, microvm_factory): """ Test if the size of the jailer binary is within expected ranges. """ jailer_binary = microvm_factory.jailer_binary_path result = jailer_binary.stat().st_size record_property("jailer_binary_size", f"{result}B") metrics.set_dimensions({"cpu_arch": MACHINE}) metrics.put_metric("jailer_binary_size", result, unit="Bytes") ================================================ FILE: tests/integration_tests/functional/test_cmd_line_parameters.py ================================================ # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests that ensure the correctness of the command line parameters.""" import subprocess from pathlib import Path import pytest from framework.utils import check_output from host_tools.fcmetrics import validate_fc_metrics def test_describe_snapshot(uvm_plain): """ Test `--describe-snapshot` correctness for all snapshot versions. For each release create a snapshot and verify the data version of the snapshot state file. """ vm = uvm_plain fc_binary = vm.fc_binary_path cmd = [fc_binary, "--snapshot-version"] snap_version_tuple = check_output(cmd).stdout.strip().split("\n")[0].split(".") snap_version = ".".join(str(x) for x in snap_version_tuple) vm.spawn() vm.basic_config(track_dirty_pages=True) vm.start() snapshot = vm.snapshot_diff() vm.kill() cmd = [fc_binary, "--describe-snapshot", snapshot.vmstate] _, stdout, stderr = check_output(cmd) assert stderr == "" assert snap_version in stdout def test_cli_metrics_path(uvm_plain): """ Test --metrics-path parameter """ microvm = uvm_plain metrics_path = Path(microvm.path) / "my_metrics.ndjson" microvm.spawn(metrics_path=metrics_path) microvm.basic_config() microvm.start() metrics = microvm.flush_metrics() validate_fc_metrics(metrics) def test_cli_metrics_path_if_metrics_initialized_twice_fail(uvm_plain): """ Given: a running firecracker with metrics configured with the CLI option When: Configure metrics via API Then: API returns an error """ microvm = uvm_plain # First configure the µvm metrics with --metrics-path metrics_path = Path(microvm.path) / "metrics.ndjson" metrics_path.touch() microvm.spawn(metrics_path=metrics_path) # Then try to configure it with PUT /metrics metrics2_path = Path(microvm.path) / "metrics2.ndjson" metrics2_path.touch() # It should fail with because it's already configured with pytest.raises(RuntimeError, match="Reinitialization of metrics not allowed."): microvm.api.metrics.put( metrics_path=microvm.create_jailed_resource(metrics2_path) ) def test_cli_metrics_if_resume_no_metrics(uvm_plain, microvm_factory): """ Check that metrics configuration is not part of the snapshot """ # Given: a snapshot of a FC with metrics configured with the CLI option uvm1 = uvm_plain metrics_path = Path(uvm1.path) / "metrics.ndjson" metrics_path.touch() uvm1.spawn(metrics_path=metrics_path) uvm1.basic_config() uvm1.start() snapshot = uvm1.snapshot_full() # When: restoring from the snapshot uvm2 = microvm_factory.build_from_snapshot(snapshot) # Then: the old metrics configuration does not exist metrics2 = Path(uvm2.jailer.chroot_path()) / metrics_path.name assert not metrics2.exists() def test_cli_no_params(microvm_factory): """ Test running firecracker with no parameters should work """ fc_binary = microvm_factory.fc_binary_path process = subprocess.Popen(fc_binary) try: process.communicate(timeout=3) assert process.returncode is None except subprocess.TimeoutExpired: # The good case process.kill() ================================================ FILE: tests/integration_tests/functional/test_cmd_line_start.py ================================================ # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests microvm start with configuration file as command line parameter.""" import json import os import platform import re import shutil from pathlib import Path import pytest from tenacity import Retrying, retry_if_exception_type, stop_after_attempt, wait_fixed from framework import utils from framework.utils import generate_mmds_get_request, generate_mmds_session_token from framework.utils_cpu_templates import SUPPORTED_CPU_TEMPLATES # Directory with metadata JSON files DIR = Path("./data") def _configure_vm_from_json(test_microvm, vm_config_file): """ Configure a microvm using a file sent as command line parameter. Create resources needed for the configuration of the microvm and set as configuration file a copy of the file that was passed as parameter to this helper function. """ # since we don't use basic-config, we do it by hand test_microvm.create_jailed_resource(test_microvm.kernel_file) test_microvm.create_jailed_resource(test_microvm.rootfs_file) vm_config_file = Path(vm_config_file) obj = json.load(vm_config_file.open(encoding="UTF-8")) obj["boot-source"]["kernel_image_path"] = str(test_microvm.kernel_file.name) obj["drives"][0]["path_on_host"] = str(test_microvm.rootfs_file.name) obj["drives"][0]["is_read_only"] = True vm_config = Path(test_microvm.chroot()) / vm_config_file.name vm_config.write_text(json.dumps(obj)) test_microvm.jailer.extra_args = {"config-file": vm_config.name} return obj def _add_metadata_file(test_microvm, metadata_file): """ Configure the microvm using a metadata file. Given a test metadata file this creates a copy of the file and uses the copy to configure the microvm. """ vm_metadata_path = os.path.join(test_microvm.path, os.path.basename(metadata_file)) shutil.copyfile(metadata_file, vm_metadata_path) test_microvm.metadata_file = vm_metadata_path def _configure_network_interface(test_microvm): """ Create tap interface before spawning the microVM. The network namespace is already pre-created. The tap interface has to be created beforehand when starting the microVM from a config file. """ # Create tap device, and avoid creating it in the guest since it is already # specified in the JSON test_microvm.add_net_iface(api=False) def _build_cmd_to_fetch_metadata(ssh_connection, version, ipv4_address): """ Build command to fetch metadata from the guest's side. The request is built based on the MMDS version configured. If MMDSv2 is used, a session token must be created before the `GET` request. """ # Fetch data from MMDS from the guest's side. if version == "V2": # If MMDS is configured to version 2, so we need to create # the session token first. token = generate_mmds_session_token(ssh_connection, ipv4_address, token_ttl=60) else: token = None return generate_mmds_get_request(ipv4_address, token) def _get_optional_fields_from_file(vm_config_file): """ Retrieve optional `version` and `ipv4_address` fields from MMDS config. Parse the vm config json file and retrieves optional fields from MMDS config. Default values are used for the fields that are not specified. :return: a pair of (version, ipv4_address) fields from mmds config. """ # Get MMDS version and IPv4 address configured from the file. with open(vm_config_file, encoding="utf-8") as json_file: mmds_config = json.load(json_file)["mmds-config"] # Default to V1 if version is not specified. version = mmds_config.get("version", "V1") # Set to default if IPv4 is not specified . ipv4_address = mmds_config.get("ipv4_address", "169.254.169.254") # Default to False if imds_compat is not specified. imds_compat = mmds_config.get("imds_compat", False) return version, ipv4_address, imds_compat @pytest.mark.parametrize("vm_config_file", ["framework/vm_config.json"]) def test_config_start_with_api(uvm_plain, vm_config_file): """ Test if a microvm configured from file boots successfully. """ test_microvm = uvm_plain vm_config = _configure_vm_from_json(test_microvm, vm_config_file) test_microvm.spawn(serial_out_path=None) assert test_microvm.state == "Running" # Validate full vm configuration. response = test_microvm.api.vm_config.get() assert response.json() == vm_config @pytest.mark.parametrize("vm_config_file", ["framework/vm_config.json"]) def test_config_start_no_api(uvm_plain, vm_config_file): """ Test microvm start when API server thread is disabled. """ test_microvm = uvm_plain _configure_vm_from_json(test_microvm, vm_config_file) test_microvm.jailer.extra_args.update({"no-api": None}) test_microvm.spawn(serial_out_path=None) # Get names of threads in Firecracker. cmd = f"ps -T --no-headers -p {test_microvm.firecracker_pid} | awk '{{print $5}}'" # Retry running 'ps' in case it failed to list the firecracker process # The regex matches any expression that contains 'firecracker' and does # not contain 'fc_api' for attempt in Retrying( retry=retry_if_exception_type(RuntimeError), stop=stop_after_attempt(10), wait=wait_fixed(1), reraise=True, ): with attempt: utils.search_output_from_cmd( cmd=cmd, find_regex=re.compile("^(?!.*fc_api)(?:.*)?firecracker", re.DOTALL), ) @pytest.mark.parametrize("vm_config_file", ["framework/vm_config_network.json"]) def test_config_start_no_api_exit(uvm_plain, vm_config_file): """ Test microvm exit when API server is disabled. """ test_microvm = uvm_plain _configure_vm_from_json(test_microvm, vm_config_file) _configure_network_interface(test_microvm) test_microvm.jailer.extra_args.update({"no-api": None}) test_microvm.spawn(serial_out_path=None) # Start Firecracker and MicroVM test_microvm.ssh.run("reboot") # Exit test_microvm.mark_killed() # waits for process to terminate # Check error log and exit code test_microvm.check_log_message("Firecracker exiting successfully") assert test_microvm.get_exit_code() == 0 @pytest.mark.parametrize( "vm_config_file", [ "framework/vm_config_missing_vcpu_count.json", "framework/vm_config_missing_mem_size_mib.json", ], ) def test_config_bad_machine_config(uvm_plain, vm_config_file): """ Test microvm start when the `machine_config` is invalid. """ test_microvm = uvm_plain _configure_vm_from_json(test_microvm, vm_config_file) test_microvm.jailer.extra_args.update({"no-api": None}) test_microvm.spawn(serial_out_path=None) test_microvm.check_log_message("Configuration for VMM from one single json failed") test_microvm.mark_killed() @pytest.mark.parametrize( "test_config", [ ("framework/vm_config_cpu_template_C3.json", True, False), ("framework/vm_config_smt_true.json", False, True), ], ) def test_config_machine_config_params(uvm_plain, test_config): """ Test microvm start with optional `machine_config` parameters. """ test_microvm = uvm_plain # Test configuration determines if the file is a valid config or not # based on the CPU (vm_config_file, cpu_template_used, smt_used) = test_config _configure_vm_from_json(test_microvm, vm_config_file) test_microvm.jailer.extra_args.update({"no-api": None}) test_microvm.spawn(serial_out_path=None) should_fail = False if cpu_template_used and "C3" not in SUPPORTED_CPU_TEMPLATES: should_fail = True if smt_used and (platform.machine() == "aarch64"): should_fail = True if should_fail: test_microvm.check_any_log_message( [ "Failed to build MicroVM from Json", "Could not Start MicroVM from one single json", ] ) test_microvm.mark_killed() else: test_microvm.check_log_message( "Successfully started microvm that was configured from one single json" ) @pytest.mark.parametrize("vm_config_file", ["framework/vm_config.json"]) def test_config_start_with_limit(uvm_plain, vm_config_file): """ Negative test for customised request payload limit. """ test_microvm = uvm_plain _configure_vm_from_json(test_microvm, vm_config_file) test_microvm.jailer.extra_args.update({"http-api-max-payload-size": "250"}) test_microvm.spawn(serial_out_path=None) assert test_microvm.state == "Running" cmd = "curl --unix-socket {} -i".format(test_microvm.api.socket) cmd += ' -X PUT "http://localhost/mmds/config"' cmd += ' -H "Content-Length: 260"' cmd += ' -H "Accept: application/json"' cmd += ' -d "some body"' response = "HTTP/1.1 400 \r\n" response += "Server: Firecracker API\r\n" response += "Connection: keep-alive\r\n" response += "Content-Type: application/json\r\n" response += "Content-Length: 145\r\n\r\n" response += '{ "error": "Request payload with size 260 is larger than ' response += "the limit of 250 allowed by server.\n" response += 'All previous unanswered requests will be dropped." }' _, stdout, _ = utils.check_output(cmd) assert stdout.encode("utf-8") == response.encode("utf-8") @pytest.mark.parametrize("vm_config_file", ["framework/vm_config_with_mmdsv2.json"]) def test_config_with_default_limit(uvm_plain, vm_config_file): """ Test for request payload limit. """ test_microvm = uvm_plain _configure_vm_from_json(test_microvm, vm_config_file) _configure_network_interface(test_microvm) test_microvm.spawn(serial_out_path=None) assert test_microvm.state == "Running" data_store = {"latest": {"meta-data": {}}} data_store["latest"]["meta-data"]["ami-id"] = "abc" test_microvm.api.mmds.put(json=data_store) cmd_err = "curl --unix-socket {} -i".format(test_microvm.api.socket) cmd_err += ' -X PUT "http://localhost/mmds/config"' cmd_err += ' -H "Content-Length: 51201"' cmd_err += ' -H "Accept: application/json"' cmd_err += ' -d "some body"' response_err = "HTTP/1.1 400 \r\n" response_err += "Server: Firecracker API\r\n" response_err += "Connection: keep-alive\r\n" response_err += "Content-Type: application/json\r\n" response_err += "Content-Length: 149\r\n\r\n" response_err += '{ "error": "Request payload with size 51201 is larger ' response_err += "than the limit of 51200 allowed by server.\n" response_err += 'All previous unanswered requests will be dropped." }' _, stdout, _stderr = utils.check_output(cmd_err) assert stdout.encode("utf-8") == response_err.encode("utf-8") def test_start_with_metadata(uvm_plain): """ Test if metadata from file is available via MMDS. """ test_microvm = uvm_plain metadata_file = DIR / "metadata.json" _add_metadata_file(test_microvm, metadata_file) test_microvm.spawn(serial_out_path=None) test_microvm.check_log_message("Successfully added metadata to mmds from file") assert test_microvm.state == "Not started" response = test_microvm.api.mmds.get() with open(metadata_file, encoding="utf-8") as json_file: assert response.json() == json.load(json_file) def test_start_with_metadata_limit(uvm_plain): """ Test that the metadata size limit is enforced when populating from a file. """ test_microvm = uvm_plain test_microvm.jailer.extra_args.update({"mmds-size-limit": "30"}) metadata_file = DIR / "metadata.json" _add_metadata_file(test_microvm, metadata_file) test_microvm.spawn(serial_out_path=None) test_microvm.check_log_message( "Populating MMDS from file failed: The MMDS patch request doesn't fit." ) test_microvm.mark_killed() def test_start_with_metadata_default_limit(uvm_plain): """ Test that the metadata size limit defaults to the api payload limit. """ test_microvm = uvm_plain test_microvm.jailer.extra_args.update({"http-api-max-payload-size": "30"}) metadata_file = DIR / "metadata.json" _add_metadata_file(test_microvm, metadata_file) test_microvm.spawn(serial_out_path=None) test_microvm.check_log_message( "Populating MMDS from file failed: The MMDS patch request doesn't fit." ) test_microvm.mark_killed() def test_start_with_missing_metadata(uvm_plain): """ Test if a microvm is configured with a missing metadata file. """ test_microvm = uvm_plain metadata_file = "../resources/tests/metadata_nonexisting.json" vm_metadata_path = os.path.join(test_microvm.path, os.path.basename(metadata_file)) test_microvm.metadata_file = vm_metadata_path try: test_microvm.spawn(serial_out_path=None) except: # pylint: disable=bare-except pass finally: test_microvm.check_log_message( "Unable to open or read from the mmds content file" ) test_microvm.check_log_message("No such file or directory") test_microvm.mark_killed() def test_start_with_invalid_metadata(uvm_plain): """ Test if a microvm is configured with a invalid metadata file. """ test_microvm = uvm_plain metadata_file = DIR / "metadata_invalid.json" vm_metadata_path = os.path.join(test_microvm.path, os.path.basename(metadata_file)) shutil.copy(metadata_file, vm_metadata_path) test_microvm.metadata_file = vm_metadata_path try: test_microvm.spawn(serial_out_path=None) except: # pylint: disable=bare-except pass finally: test_microvm.check_log_message("MMDS error: metadata provided not valid json") test_microvm.check_log_message("EOF while parsing an object") test_microvm.mark_killed() @pytest.mark.parametrize( "vm_config_file", ["framework/vm_config_with_mmdsv1.json", "framework/vm_config_with_mmdsv2.json"], ) def test_config_start_and_mmds_with_api(uvm_plain, vm_config_file): """ Test MMDS behavior when the microvm is configured from file. """ test_microvm = uvm_plain _configure_vm_from_json(test_microvm, vm_config_file) _configure_network_interface(test_microvm) # Network namespace has already been created. test_microvm.spawn(serial_out_path=None) data_store = { "latest": { "meta-data": {"ami-id": "ami-12345678", "reservation-id": "r-fea54097"} } } # MMDS should be empty by default. response = test_microvm.api.mmds.get() assert response.json() == {} # Populate MMDS with data. test_microvm.api.mmds.put(**data_store) # Ensure the MMDS contents have been successfully updated. response = test_microvm.api.mmds.get() assert response.json() == data_store # Get MMDS version and IPv4 address configured from the file. version, ipv4_address, imds_compat = _get_optional_fields_from_file(vm_config_file) cmd = "ip route add {} dev eth0".format(ipv4_address) _, stdout, stderr = test_microvm.ssh.run(cmd) assert stderr == stdout == "" # Fetch data from MMDS from the guest's side. cmd = _build_cmd_to_fetch_metadata(test_microvm.ssh, version, ipv4_address) cmd += "/latest/meta-data/" _, stdout, _ = test_microvm.ssh.run(cmd) if imds_compat: assert stdout == "ami-id\nreservation-id" else: assert json.loads(stdout) == data_store["latest"]["meta-data"] # Validate MMDS configuration. response = test_microvm.api.vm_config.get() assert response.json()["mmds-config"] == { "network_interfaces": ["1"], "imds_compat": imds_compat, "ipv4_address": ipv4_address, "version": version, } @pytest.mark.parametrize( "vm_config_file", ["framework/vm_config_with_mmdsv1.json", "framework/vm_config_with_mmdsv2.json"], ) @pytest.mark.parametrize("metadata_file", [DIR / "metadata.json"]) def test_with_config_and_metadata_no_api(uvm_plain, vm_config_file, metadata_file): """ Test microvm start when config/mmds and API server thread is disabled. Ensures the metadata is stored successfully inside the MMDS and is available to reach from the guest's side. """ test_microvm = uvm_plain _configure_vm_from_json(test_microvm, vm_config_file) _add_metadata_file(test_microvm, metadata_file) _configure_network_interface(test_microvm) test_microvm.jailer.extra_args.update({"no-api": None}) test_microvm.spawn(serial_out_path=None) # Get MMDS version and IPv4 address configured from the file. version, ipv4_address, imds_compat = _get_optional_fields_from_file(vm_config_file) cmd = "ip route add {} dev eth0".format(ipv4_address) _, stdout, stderr = test_microvm.ssh.run(cmd) assert stderr == stdout == "" # Fetch data from MMDS from the guest's side. cmd = _build_cmd_to_fetch_metadata(test_microvm.ssh, version, ipv4_address) _, stdout, _ = test_microvm.ssh.run(cmd) # Compare response against the expected MMDS contents. metadata = json.load(Path(metadata_file).open(encoding="UTF-8")) if imds_compat: assert stdout == "2016-09-02/\n2019-08-01/\nlatest/" else: assert json.loads(stdout) == metadata ================================================ FILE: tests/integration_tests/functional/test_concurrency.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Ensure multiple microVMs work correctly when spawned simultaneously.""" from concurrent.futures import ThreadPoolExecutor NO_OF_MICROVMS = 20 def test_run_concurrency(microvm_factory, guest_kernel, rootfs, pci_enabled): """ Check we can spawn multiple microvms. """ def launch1(): microvm = microvm_factory.build(guest_kernel, rootfs, pci=pci_enabled) microvm.time_api_requests = False # is flaky because of parallelism microvm.spawn() microvm.basic_config(vcpu_count=1, mem_size_mib=128) microvm.add_net_iface() microvm.start() with ThreadPoolExecutor(max_workers=NO_OF_MICROVMS) as tpe: for _ in range(NO_OF_MICROVMS): tpe.submit(launch1) ================================================ FILE: tests/integration_tests/functional/test_cpu_all.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Test all vCPUs are configured correctly and work properly. This test suite aims to catch bugs of Firecracker's vCPU configuration and CPU templates especially under multi-vCPU setup, by checking that all vCPUs are operating identically, except for the expected differences. """ import pytest # Use the maximum number of vCPUs supported by Firecracker MAX_VCPUS = 32 @pytest.mark.parametrize("vcpu_count", [MAX_VCPUS]) def test_all_vcpus_online(uvm_any): """Check all vCPUs are online inside guest""" assert ( uvm_any.ssh.check_output("cat /sys/devices/system/cpu/online").stdout.strip() == f"0-{uvm_any.vcpus_count - 1}" ) @pytest.mark.parametrize("vcpu_count", [MAX_VCPUS]) def test_all_vcpus_have_same_features(uvm_any): """ Check all vCPUs have the same features inside guest. This test ensures Firecracker or CPU templates don't configure CPU features differently between vCPUs. Note that whether the shown CPU features are expected or not should be tested in (arch-specific) test_cpu_features_*.py only for vCPU 0. Thus, we only test the equivalence of all CPUs in the same guest. """ # Get a feature set for each CPU and deduplicate them. unique_feature_lists = uvm_any.ssh.check_output( 'grep -E "^(flags|Features)" /proc/cpuinfo | uniq' ).stdout.splitlines() assert len(unique_feature_lists) == 1 ================================================ FILE: tests/integration_tests/functional/test_cpu_features_aarch64.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for the CPU features for aarch64.""" import pytest from framework.properties import global_props from framework.utils_cpuid import CPU_FEATURES_CMD, CpuModel pytestmark = pytest.mark.skipif( global_props.cpu_architecture != "aarch64", reason="Only run in aarch64" ) G2_FEATS = set( ( "fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp " "asimdhp cpuid asimdrdm lrcpc dcpop asimddp ssbs" ).split() ) G3_FEATS = G2_FEATS | set( "sha512 asimdfhm dit uscat ilrcpc flagm jscvt fcma sha3 sm3 sm4 rng dcpodp i8mm bf16 dgh".split() ) G3_SVE_AND_PAC = set("paca pacg sve svebf16 svei8mm".split()) G4_FEATS = (G3_FEATS | set("bti flagm2 frint sb".split())) - set("sm3 sm4".split()) G4_SVE_AND_PAC = set( "paca pacg sve sve2 sveaes svebitperm svepmull svesha3 svebf16 svei8mm".split() ) def test_guest_cpu_features(uvm_any): """Check the CPU features for a microvm with different CPU templates""" vm = uvm_any expected_cpu_features = set() match global_props.cpu_model, vm.cpu_template_name: case CpuModel.ARM_NEOVERSE_N1, "V1N1": expected_cpu_features = G2_FEATS case CpuModel.ARM_NEOVERSE_N1, "None": expected_cpu_features = G2_FEATS # [cm]7g with guest kernel 5.10 and later case CpuModel.ARM_NEOVERSE_V1, "V1N1": expected_cpu_features = G2_FEATS case CpuModel.ARM_NEOVERSE_V1, "AARCH64_WITH_SVE_AND_PAC": expected_cpu_features = G3_FEATS | G3_SVE_AND_PAC case CpuModel.ARM_NEOVERSE_V1, "None": expected_cpu_features = G3_FEATS case CpuModel.ARM_NEOVERSE_V2, "None": expected_cpu_features = G4_FEATS case CpuModel.ARM_NEOVERSE_V2, "AARCH64_WITH_SVE_AND_PAC": expected_cpu_features = G4_FEATS | G4_SVE_AND_PAC guest_feats = set(vm.ssh.check_output(CPU_FEATURES_CMD).stdout.split()) assert guest_feats == expected_cpu_features ================================================ FILE: tests/integration_tests/functional/test_cpu_features_host_vs_guest.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # pylint: disable=too-many-statements # pylint: disable=too-many-branches """ Check CPU features in the host vs the guest. This test can highlight differences between the host and what the guest sees. No CPU templates as we are interested only on what is passed through to the guest by default. For that, check test_feat_parity.py """ import os from framework import utils from framework.properties import global_props from framework.utils_cpuid import CPU_FEATURES_CMD, CpuModel CPU_MODEL = global_props.cpu_codename INTEL_HOST_ONLY_FEATS = { "acpi", "aperfmperf", "arch_perfmon", "art", "bts", "cat_l3", "cdp_l3", "cqm", "cqm_llc", "cqm_mbm_local", "cqm_mbm_total", "cqm_occup_llc", "dca", "ds_cpl", "dtes64", "dtherm", "dts", "epb", "ept", "ept_ad", "est", "flexpriority", "flush_l1d", "hwp", "hwp_act_window", "hwp_epp", "hwp_pkg_req", "ibpb_exit_to_user", "ida", "intel_ppin", "intel_pt", "mba", "monitor", "pbe", "pdcm", "pebs", "pln", "pts", "rdt_a", "sdbg", "smx", "tm", "tm2", "tpr_shadow", "vmx", "vnmi", "vpid", "xtpr", } INTEL_GUEST_ONLY_FEATS = { "hypervisor", "tsc_known_freq", "umip", } AMD_MILAN_HOST_ONLY_FEATS = { "amd_ppin", "aperfmperf", "bpext", "cat_l3", "cdp_l3", "cpb", "cqm", "cqm_llc", "cqm_mbm_local", "cqm_mbm_total", "cqm_occup_llc", "decodeassists", "extapic", "flushbyasid", "hw_pstate", "ibpb_exit_to_user", "ibs", "irperf", "lbrv", "mba", "monitor", "mwaitx", "overflow_recov", "pausefilter", "perfctr_llc", "perfctr_nb", "pfthreshold", "rdpru", "rdt_a", "sev", "sev_es", "skinit", "smca", "sme", "succor", "svm_lock", "tce", "tsc_scale", "v_vmsave_vmload", "vgif", "vmcb_clean", "wdt", "npt", "nrip_save", "svm", } AMD_GUEST_ONLY_FEATS = { "hypervisor", "tsc_adjust", "tsc_deadline_timer", "tsc_known_freq", } AMD_MILAN_HOST_ONLY_FEATS_6_1 = AMD_MILAN_HOST_ONLY_FEATS - { "sme", } | {"brs", "rapl", "v_spec_ctrl"} AMD_GENOA_HOST_ONLY_FEATS = AMD_MILAN_HOST_ONLY_FEATS | { "avic", "flush_l1d", "ibrs_enhanced", } AMD_GENOA_HOST_ONLY_FEATS_6_1 = AMD_MILAN_HOST_ONLY_FEATS_6_1 - {"brs"} | { "avic", "amd_lbr_v2", "cppc", "flush_l1d", "ibrs_enhanced", "perfmon_v2", "x2avic", } def test_host_vs_guest_cpu_features(uvm_plain_any): """Check CPU features host vs guest""" vm = uvm_plain_any vm.spawn() vm.basic_config() vm.add_net_iface() vm.start() host_feats = set(utils.check_output(CPU_FEATURES_CMD).stdout.split()) guest_feats = set(vm.ssh.check_output(CPU_FEATURES_CMD).stdout.split()) match CPU_MODEL: case CpuModel.AMD_MILAN: if global_props.host_linux_version_tpl < (6, 1): assert host_feats - guest_feats == AMD_MILAN_HOST_ONLY_FEATS else: assert host_feats - guest_feats == AMD_MILAN_HOST_ONLY_FEATS_6_1 assert guest_feats - host_feats == AMD_GUEST_ONLY_FEATS case CpuModel.AMD_GENOA: if global_props.host_linux_version_tpl < (6, 1): assert host_feats - guest_feats == AMD_GENOA_HOST_ONLY_FEATS else: assert host_feats - guest_feats == AMD_GENOA_HOST_ONLY_FEATS_6_1 assert guest_feats - host_feats == AMD_GUEST_ONLY_FEATS case CpuModel.INTEL_CASCADELAKE: expected_host_minus_guest = INTEL_HOST_ONLY_FEATS expected_guest_minus_host = INTEL_GUEST_ONLY_FEATS # Ubuntu hasn't backported the patch for VMScape yet. # This is only requried for Intel Cascade Lake since we only run # tests on Intel Cascade Lake for Ubuntu. if "amzn" not in global_props.host_os: expected_host_minus_guest -= {"ibpb_exit_to_user"} # Linux kernel v6.4+ passes through the CPUID bit for "flush_l1d" to guests. # https://github.com/torvalds/linux/commit/45cf86f26148e549c5ba4a8ab32a390e4bde216e # # Our test ubuntu host kernel is v6.14 and has the commit. if global_props.host_linux_version_tpl >= (6, 4): expected_host_minus_guest -= {"flush_l1d"} # Linux kernel v6.6+ drops the "invpcid_single" synthetic feature bit. # https://github.com/torvalds/linux/commit/54e3d9434ef61b97fd3263c141b928dc5635e50d # # Our test ubuntu host kernel is v6.14 and has the commit. host_has_invpcid_single = global_props.host_linux_version_tpl < (6, 6) guest_has_invpcid_single = vm.guest_kernel_version < (6, 6) if host_has_invpcid_single and not guest_has_invpcid_single: expected_host_minus_guest |= {"invpcid_single"} if not host_has_invpcid_single and guest_has_invpcid_single: expected_guest_minus_host |= {"invpcid_single"} assert host_feats - guest_feats == expected_host_minus_guest assert guest_feats - host_feats == expected_guest_minus_host case CpuModel.INTEL_ICELAKE: expected_host_minus_guest = INTEL_HOST_ONLY_FEATS # As long as BHB clearing software mitigation is enabled, Intel Ice Lake is not # vulnerable to VMScape and "IBPB before exit to userspace" is not needed. # https://docs.kernel.org/admin-guide/hw-vuln/vmscape.html#affected-processors expected_host_minus_guest -= {"ibpb_exit_to_user"} host_guest_diff_5_10 = expected_host_minus_guest - {"cdp_l3"} | { "pconfig", "tme", "split_lock_detect", } host_guest_diff_6_1 = host_guest_diff_5_10 - { "bts", "dtes64", "dts", "pebs", } if global_props.host_linux_version_tpl < (6, 1): assert host_feats - guest_feats == host_guest_diff_5_10 else: assert host_feats - guest_feats == host_guest_diff_6_1 assert guest_feats - host_feats == INTEL_GUEST_ONLY_FEATS - {"umip"} case CpuModel.INTEL_SAPPHIRE_RAPIDS | CpuModel.INTEL_GRANITE_RAPIDS: expected_host_minus_guest = INTEL_HOST_ONLY_FEATS.copy() expected_guest_minus_host = INTEL_GUEST_ONLY_FEATS.copy() host_version = global_props.host_linux_version_tpl guest_version = vm.guest_kernel_version # KVM does not support virtualization of the following hardware features yet for several # reasons (e.g. security, simply difficulty of implementation). expected_host_minus_guest |= { # Intel Total Memory Encryption (TME) is the capability to encrypt the entirety of # physical memory of a system. TME is enabled by system BIOS/hardware and applies to # the phyiscal memory as a whole. "tme", # PCONFIG instruction allows software to configure certain platform features. It # supports these features with multiple leaf functions, selecting a leaf function # using the value in EAX. As of this writing, the only defined PCONFIG leaf function # is for key programming for total memory encryption-multi-key (TME-MK). "pconfig", # Architectural Last Branch Record (Arch LBR) that is a feature that logs the most # recently executed branch instructions (e.g. source and destination addresses). # Traditional LBR implementations have existed in Intel CPUs for years and the MSR # interface varied by CPU model. Arch LBR is a standardized version. There is a # kernel patch created in 2022 but didn't get merged due to a mess. # https://lore.kernel.org/all/20221125040604.5051-1-weijiang.yang@intel.com/ "arch_lbr", # ENQCMD/ENQCMDS are instructions that allow software to atomically write 64-byte # commands to enqueue registers, which are special device registers accessed using # memory-mapped I/O. "enqcmd", # Intel Resource Director Technology (RDT) feature set provides a set of allocation # (resource control) capabilities including Cache Allocation Technology (CAT) and # Code and Data Prioritization (CDP). # L3 variants are listed in INTEL_HOST_ONLY_FEATS. "cat_l2", "cdp_l2", # Firecracker disables WAITPKG in CPUID normalization. # https://github.com/firecracker-microvm/firecracker/pull/5118 "waitpkg", } # FIX: Split lock detection should be enabled on Granite Rapids too. This is a temporary patch # to prevent recurrent, known test failures. Once addressed, split lock detection will be enabled # on both Sapphire and Granite Rapids. if CPU_MODEL == CpuModel.INTEL_SAPPHIRE_RAPIDS: # This is a synthesized bit for split lock detection that raise an Alignment Check # (#AC) exception if an operand of an atomic operation crosses two cache lines. It # is not enumerated on CPUID, instead detected by actually attempting to read from # MSR address 0x33 (MSR_MEMORY_CTRL in Intel SDM, MSR_TEST_CTRL in Linux kernel). expected_host_minus_guest |= {"split_lock_detect"} # FIX: VMScape mitigation has not yet been backported to 5.10. elif host_version < (6, 1) and CPU_MODEL == CpuModel.INTEL_GRANITE_RAPIDS: expected_host_minus_guest -= { "ibpb_exit_to_user", } # The following features are also not virtualized by KVM yet but are only supported on # newer kernel versions. if host_version >= (5, 18): expected_host_minus_guest |= { # Indirect Brach Tracking (IBT) is a feature where the CPU ensures that indirect # branch targets start with ENDBRANCH instruction (`endbr32` or `endbr64`), # which executes as a no-op; if anything else is found, a control-protection # (#CP) fault will be raised. # https://github.com/torvalds/linux/commit/991625f3dd2cbc4b787deb0213e2bcf8fa264b21 "ibt", } if CPU_MODEL == CpuModel.INTEL_SAPPHIRE_RAPIDS: expected_host_minus_guest |= { # Hardware Feedback Interface (HFI) is a feature that gives OSes a performance # and energy efficiency capability data for each CPU that can be used to # influence task placement decisions. Only available on Sapphire Rapids. # https://github.com/torvalds/linux/commit/7b8f40b3de75c971a4e5f9308b06deb59118dbac "hfi", } # FIX: This should also be backported to 5.10. Lower priority than split_lock_detect # though. elif host_version < (5, 19) and CPU_MODEL == CpuModel.INTEL_GRANITE_RAPIDS: expected_host_minus_guest -= { # From v5.19 onwards, PPIN is detected by reading MSRs. On versions before, # a static list of architectures is enumerated. As of now, Granite Rapids has # not been backported to this list, and hence PPIN is not enabled. "intel_ppin", } # AVX512 FP16 is supported and passed through on v5.11+. # https://github.com/torvalds/linux/commit/e1b35da5e624f8b09d2e98845c2e4c84b179d9a4 # https://github.com/torvalds/linux/commit/2224fc9efb2d6593fbfb57287e39ba4958b188ba if host_version >= (5, 11) and guest_version < (5, 11): expected_host_minus_guest |= {"avx512_fp16"} # AVX VNNI support is supported and passed through on v5.12+. # https://github.com/torvalds/linux/commit/b85a0425d8056f3bd8d0a94ecdddf2a39d32a801 # https://github.com/torvalds/linux/commit/1085a6b585d7d1c441cd10fdb4c7a4d96a22eba7 if host_version >= (5, 12) and guest_version < (5, 12): expected_host_minus_guest |= {"avx_vnni"} # Bus lock detection is supported on v5.12+ and passed through on v5.13+. # https://github.com/torvalds/linux/commit/f21d4d3b97a8603567e5d4250bd75e8ebbd520af # https://github.com/torvalds/linux/commit/76ea438b4afcd9ee8da3387e9af4625eaccff58f if host_version >= (5, 13) and guest_version < (5, 12): expected_host_minus_guest |= {"bus_lock_detect"} # Intel AMX is supported and passed through on v5.17+. # https://github.com/torvalds/linux/commit/690a757d610e50c2c3acd2e4bc3992cfc63feff2 if host_version >= (5, 17) and guest_version < (5, 17): expected_host_minus_guest |= {"amx_bf16", "amx_int8", "amx_tile"} expected_guest_minus_host -= { # UMIP can be emulated by KVM on Intel processors, but is supported in hardware on # Intel Sapphire Rapids and passed through. "umip", # This is a synthesized bit and it is always set on guest thanks to kvm-clock. But # Intel Sapphire Rapids reports TSC frequency on CPUID leaf 0x15, so the bit is also # set on host. "tsc_known_freq", } assert host_feats - guest_feats == expected_host_minus_guest assert guest_feats - host_feats == expected_guest_minus_host case CpuModel.ARM_NEOVERSE_N1: expected_guest_minus_host = set() expected_host_minus_guest = set() # Upstream kernel v6.11+ hides "ssbs" from "lscpu" on Neoverse-N1 and Neoverse-V1 since # they have an errata whereby an MSR to the SSBS special-purpose register does not # affect subsequent speculative instructions, permitting speculative store bypassing for # a window of time. # https://github.com/torvalds/linux/commit/adeec61a4723fd3e39da68db4cc4d924e6d7f641 # # While Amazon Linux kernels (v5.10 and v6.1) backported the above commit, our test # ubuntu kernel (v6.8) and our guest kernels (v5.10 and v6.1) don't pick it. host_has_ssbs = global_props.host_os not in { "amzn2", "amzn2023", } and global_props.host_linux_version_tpl < (6, 11) guest_has_ssbs = vm.guest_kernel_version < (6, 11) if host_has_ssbs and not guest_has_ssbs: expected_host_minus_guest |= {"ssbs"} if not host_has_ssbs and guest_has_ssbs: expected_guest_minus_host |= {"ssbs"} assert host_feats - guest_feats == expected_host_minus_guest assert guest_feats - host_feats == expected_guest_minus_host case CpuModel.ARM_NEOVERSE_V1 | CpuModel.ARM_NEOVERSE_V2: expected_guest_minus_host = set() # KVM does not enable PAC or SVE features by default # and Firecracker does not enable them either. expected_host_minus_guest = {"paca", "pacg", "sve", "svebf16", "svei8mm"} if CPU_MODEL == CpuModel.ARM_NEOVERSE_V2: expected_host_minus_guest |= { "svebitperm", "svesha3", "sveaes", "sve2", "svepmull", } # Upstream kernel v6.11+ hides "ssbs" from "lscpu" on Neoverse-N1 and Neoverse-V1 since # they have an errata whereby an MSR to the SSBS special-purpose register does not # affect subsequent speculative instructions, permitting speculative store bypassing for # a window of time. # https://github.com/torvalds/linux/commit/adeec61a4723fd3e39da68db4cc4d924e6d7f641 # # While Amazon Linux kernels (v5.10 and v6.1) backported the above commit, our test # ubuntu kernel (v6.8) and our guest kernels (v5.10 and v6.1) don't pick it. host_has_ssbs = global_props.host_os not in { "amzn2", "amzn2023", } and global_props.host_linux_version_tpl < (6, 11) guest_has_ssbs = vm.guest_kernel_version < (6, 11) if host_has_ssbs and not guest_has_ssbs: expected_host_minus_guest |= {"ssbs"} if not host_has_ssbs and guest_has_ssbs: expected_guest_minus_host |= {"ssbs"} assert host_feats - guest_feats == expected_host_minus_guest assert guest_feats - host_feats == expected_guest_minus_host case _: # only fail if running in CI if os.environ.get("BUILDKITE") is not None: assert ( guest_feats == host_feats ), f"Cpu model {CPU_MODEL} is not supported" ================================================ FILE: tests/integration_tests/functional/test_cpu_features_x86_64.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for the CPU topology emulation feature.""" # pylint: disable=too-many-lines import csv import io import os import platform import re import shutil import sys from difflib import unified_diff from pathlib import Path import pytest import framework.utils_cpuid as cpuid_utils from framework import utils from framework.defs import SUPPORTED_HOST_KERNELS from framework.properties import global_props from framework.utils_cpu_templates import get_cpu_template_name PLATFORM = platform.machine() UNSUPPORTED_HOST_KERNEL = ( utils.get_kernel_version(level=1) not in SUPPORTED_HOST_KERNELS ) DATA_FILES = Path("./data/msr") pytestmark = pytest.mark.skipif( global_props.cpu_architecture != "x86_64", reason="Only run in x86_64" ) def read_msr_csv(fd): """Read a CSV of MSRs""" csvin = csv.DictReader(fd) return list(csvin) def clean_and_mkdir(dir_path): """ Create a clean directory """ shutil.rmtree(dir_path, ignore_errors=True) os.makedirs(dir_path) def _check_cpuid_x86(test_microvm, expected_cpu_count, expected_htt): expected_cpu_features = { "maximum IDs for CPUs in pkg": f"{expected_cpu_count:#x} ({expected_cpu_count})", "CLFLUSH line size": "0x8 (8)", "hypervisor guest status": "true", "hyper-threading / multi-core supported": expected_htt, } cpuid_utils.check_guest_cpuid_output( test_microvm, "cpuid -1", None, "=", expected_cpu_features ) def _check_extended_cache_features(vm): l3_params = cpuid_utils.get_guest_cpuid(vm, "0x80000006")[(0x80000006, 0, "edx")] # fmt: off line_size = (l3_params >> 0) & 0xFF lines_per_tag = (l3_params >> 8) & 0xF assoc = (l3_params >> 12) & 0xF cache_size = (l3_params >> 18) & 0x3FFF # fmt: on assert line_size > 0 assert lines_per_tag == 0x1 # This is hardcoded in the AMD spec assert assoc == 0x9 # This is hardcoded in the AMD spec assert cache_size > 0 def skip_test_based_on_artifacts(snapshot_artifacts_dir): """ It is possible that some X template is not supported on the instance where the snapshots were created and, snapshot is loaded on an instance where X is supported. This results in error since restore doesn't find the file to load. e.g. let's suppose snapshot is created on Skylake and restored on Cascade Lake. So, the created artifacts could just be: snapshot_artifacts/wrmsr/vmlinux-5.10/T2S but the restore test would fail because the files in snapshot_artifacts/wrmsr/vmlinux-5.10/T2CL won't be available. To avoid this we make an assumption that if template directory does not exist then snapshot was not created for that template and we skip the test. """ if not Path.exists(snapshot_artifacts_dir): reason = f"\n Since {snapshot_artifacts_dir} does not exist \ we skip the test assuming that snapshot was not" pytest.skip(re.sub(" +", " ", reason)) @pytest.mark.parametrize( "num_vcpus", [1, 2, 16], ) @pytest.mark.parametrize( "htt", [True, False], ) def test_cpuid(uvm_plain_any, num_vcpus, htt): """ Check the CPUID for a microvm with the specified config. """ vm = uvm_plain_any vm.spawn() vm.basic_config(vcpu_count=num_vcpus, smt=htt) vm.add_net_iface() vm.start() _check_cpuid_x86(vm, num_vcpus, "true" if num_vcpus > 1 else "false") @pytest.mark.skipif( cpuid_utils.get_cpu_vendor() != cpuid_utils.CpuVendor.AMD, reason="L3 cache info is only present in 0x80000006 for AMD", ) def test_extended_cache_features(uvm_plain_any): """ Check extended cache features (leaf 0x80000006). """ vm = uvm_plain_any vm.spawn() vm.basic_config() vm.add_net_iface() vm.start() _check_extended_cache_features(vm) def test_brand_string(uvm_plain_any): """ Ensure good formatting for the guest brand string. * For Intel CPUs, the guest brand string should be: Intel(R) Xeon(R) Processor @ {host frequency} or Intel(R) Xeon(R) Processor where {host frequency} is the frequency reported by the host CPUID (e.g. 4.01GHz) * For AMD CPUs, the guest brand string should be: AMD EPYC * For other CPUs, the guest brand string should be: "" """ test_microvm = uvm_plain_any test_microvm.spawn() test_microvm.basic_config(vcpu_count=1) test_microvm.add_net_iface() test_microvm.start() guest_cmd = "cat /proc/cpuinfo | grep 'model name' | head -1" _, stdout, stderr = test_microvm.ssh.run(guest_cmd) assert stderr == "" line = stdout.rstrip() mo = re.search("^model name\\s+:\\s+(.+)$", line) assert mo guest_brand_string = mo.group(1) assert guest_brand_string cpu_vendor = cpuid_utils.get_cpu_vendor() if cpu_vendor == cpuid_utils.CpuVendor.AMD: # Assert the model name matches "AMD EPYC" mo = re.search("model name.*: AMD EPYC", stdout) assert mo elif cpu_vendor == cpuid_utils.CpuVendor.INTEL: # Get host frequency cif = open("/proc/cpuinfo", "r", encoding="utf-8") cpu_info = cif.read() mo = re.search("model name.*:.* ([0-9]*.[0-9]*[G|M|T]Hz)", cpu_info) # Skip if host frequency is not reported if mo is None: return host_frequency = mo.group(1) # Assert the model name matches "Intel(R) Xeon(R) Processor @ " mo = re.search( "model name.*: Intel\\(R\\) Xeon\\(R\\) Processor @ ([0-9]*.[0-9]*[T|G|M]Hz)", stdout, ) assert mo # Get the frequency guest_frequency = mo.group(1) # Assert the guest frequency matches the host frequency assert host_frequency == guest_frequency else: assert False # From the `Intel® 64 Architecture x2APIC Specification` # (https://courses.cs.washington.edu/courses/cse451/24wi/documentation/x2apic.pdf): # > The X2APIC MSRs cannot to be loaded and stored on VMX transitions. A VMX transition fails # > if the VMM has specified that the transition should access any MSRs in the address range # > from 0000_0800H to 0000_08FFH X2APIC_MSRS = [hex(i) for i in range(0x0000_0800, 0x0000_08FF + 1)] # Some MSR values should not be checked since they can change at guest runtime # and between different boots. # Current exceptions: # * FS and GS change on task switch and arch_prctl. # * TSC is different for each guest. # * MSR_{C, L}STAR used for SYSCALL/SYSRET; can be different between guests. # * MSR_IA32_SYSENTER_E{SP, IP} used for SYSENTER/SYSEXIT; same as above. # * MSR_KVM_{WALL, SYSTEM}_CLOCK addresses for struct pvclock_* can be different. # * MSR_IA32_TSX_CTRL is not available to read/write via KVM (known limitation). # # More detailed information about MSRs can be found in the Intel® 64 and IA-32 # Architectures Software Developer’s Manual - Volume 4: Model-Specific Registers # Check `arch_gen/src/x86/msr_idex.rs` and `msr-index.h` in upstream Linux # for symbolic definitions. # fmt: off MSR_EXCEPTION_LIST = [ "0x10", # MSR_IA32_TSC "0x11", # MSR_KVM_WALL_CLOCK "0x12", # MSR_KVM_SYSTEM_TIME "0x122", # MSR_IA32_TSX_CTRL "0x175", # MSR_IA32_SYSENTER_ESP "0x176", # MSR_IA32_SYSENTER_EIP "0x6e0", # MSR_IA32_TSC_DEADLINE "0xc0000082", # MSR_LSTAR "0xc0000083", # MSR_CSTAR "0xc0000100", # MSR_FS_BASE "0xc0000101", # MSR_GS_BASE # MSRs below are required only on T2A, however, # we are adding them to the common exception list to keep things simple "0x834" , # LVT Performance Monitor Interrupt Register "0xc0010007", # MSR_K7_PERFCTR3 "0xc001020b", # Performance Event Counter MSR_F15H_PERF_CTR5 "0xc0011029", # MSR_F10H_DECFG also referred to as MSR_AMD64_DE_CFG "0x830" , # IA32_X2APIC_ICR is interrupt command register and, # bit 0-7 represent interrupt vector that varies. "0x83f" , # IA32_X2APIC_SELF_IPI # A self IPI is semantically identical to an # inter-processor interrupt sent via the ICR, # with a Destination Shorthand of Self, # Trigger Mode equal to Edge, # and a Delivery Mode equal to Fixed. # bit 0-7 represent interrupt vector that varies. ] + X2APIC_MSRS # fmt: on MSR_SUPPORTED_TEMPLATES = [ "T2A", "T2CL", "T2S", "SPR_TO_T2_5.10", "SPR_TO_T2_6.1", "GNR_TO_T2_5.10", "GNR_TO_T2_6.1", ] @pytest.mark.timeout(900) @pytest.mark.no_block_pr def test_cpu_rdmsr( msr_reader_bin, microvm_factory, cpu_template_any, guest_kernel, rootfs, results_dir ): """ Test MSRs that are available to the guest. This test boots a uVM and tries to read a set of MSRs from the guest. The guest MSR list is compared against a list of MSRs that are expected when running on a particular combination of host CPU model, host kernel, guest kernel and CPU template. The list is dependent on: * host CPU model, since some MSRs are passed through from the host in some CPU templates * host kernel version, since firecracker relies on MSR emulation provided by KVM * guest kernel version, since some MSRs are writable from guest uVMs and different guest kernels might set different values * CPU template, since enabled CPUIDs are different between CPU templates and some MSRs are not available if CPUID features are disabled This comparison helps validate that defaults have not changed due to emulation implementation changes by host kernel patches and CPU templates. TODO: This validates T2S, T2CL and T2A templates. Since T2 and C3 did not set the ARCH_CAPABILITIES MSR, the value of that MSR is different between different host CPU types (see Github PR #3066). So we can either: * add an exceptions for different template types when checking values * deprecate T2 and C3 since they are somewhat broken Testing matrix: - All supported guest kernels and rootfs - Microvm: 1vCPU with 1024 MB RAM """ cpu_template_name = get_cpu_template_name(cpu_template_any) if cpu_template_name not in MSR_SUPPORTED_TEMPLATES: pytest.skip(f"This test does not support {cpu_template_name} template.") vcpus, guest_mem_mib = 1, 1024 vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False) vm.spawn() vm.add_net_iface() vm.basic_config(vcpu_count=vcpus, mem_size_mib=guest_mem_mib) vm.set_cpu_template(cpu_template_any) vm.start() vm.ssh.scp_put(msr_reader_bin, "/tmp/msr_reader") _, stdout, stderr = vm.ssh.run("/tmp/msr_reader") assert stderr == "" # Load results read from the microvm guest_recs = read_msr_csv(io.StringIO(stdout)) # Load baseline host_cpu = global_props.cpu_codename host_kv = global_props.host_linux_version guest_kv = re.search(r"vmlinux-(\d+\.\d+)", guest_kernel.name).group(1) baseline_file_name = ( f"msr_list_{cpu_template_name}_{host_cpu}_{host_kv}host_{guest_kv}guest.csv" ) # save it as an artifact, so we don't have to manually launch an instance to # get a baseline save_msrs = results_dir / baseline_file_name save_msrs.write_text(stdout) # Load baseline baseline_file_path = DATA_FILES / baseline_file_name baseline_recs = read_msr_csv(baseline_file_path.open()) check_msrs_are_equal(baseline_recs, guest_recs) # These names need to be consistent across the two parts of the snapshot-restore test # that spans two instances (one that takes a snapshot and one that restores from it) # fmt: off SNAPSHOT_RESTORE_SHARED_NAMES = { "snapshot_artifacts_root_dir_wrmsr": "snapshot_artifacts/wrmsr", "snapshot_artifacts_root_dir_cpuid": "snapshot_artifacts/cpuid", "msrs_before_fname": "msrs_before.txt", "msrs_after_fname": "msrs_after.txt", "cpuid_before_fname": "cpuid_before.txt", "cpuid_after_fname": "cpuid_after.txt", } # fmt: on def dump_msr_state_to_file(msr_reader_bin, dump_fname, ssh_conn): """ Read MSR state via SSH and dump it into a file. """ ssh_conn.scp_put(msr_reader_bin, "/tmp/msr_reader") _, stdout, stderr = ssh_conn.run("/tmp/msr_reader") assert stderr == "" with open(dump_fname, "w", encoding="UTF-8") as file: file.write(stdout) @pytest.mark.skipif( UNSUPPORTED_HOST_KERNEL, reason=f"Supported kernels are {SUPPORTED_HOST_KERNELS}", ) @pytest.mark.timeout(900) @pytest.mark.nonci def test_cpu_wrmsr_snapshot( msr_reader_bin, microvm_factory, guest_kernel, rootfs, cpu_template_any ): """ This is the first part of the test verifying that MSRs retain their values after restoring from a snapshot. This function makes MSR value modifications according to the ./data/msr/wrmsr_list.txt file. Before taking a snapshot, MSR values are dumped into a text file. After restoring from the snapshot on another instance, the MSRs are dumped again and their values are compared to previous. Some MSRs are not inherently supposed to retain their values, so they form an MSR exception list. This part of the test is responsible for taking a snapshot and publishing its files along with the `before` MSR dump. """ cpu_template_name = get_cpu_template_name(cpu_template_any) if cpu_template_name not in MSR_SUPPORTED_TEMPLATES: pytest.skip(f"This test does not support {cpu_template_name} template.") shared_names = SNAPSHOT_RESTORE_SHARED_NAMES vcpus, guest_mem_mib = 1, 1024 vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False) vm.spawn() vm.add_net_iface() vm.basic_config( vcpu_count=vcpus, mem_size_mib=guest_mem_mib, track_dirty_pages=True, boot_args="msr.allow_writes=on", ) vm.set_cpu_template(cpu_template_any) vm.start() # Make MSR modifications msr_writer_host_fname = DATA_FILES / "msr_writer.sh" msr_writer_guest_fname = "/tmp/msr_writer.sh" vm.ssh.scp_put(msr_writer_host_fname, msr_writer_guest_fname) wrmsr_input_host_fname = DATA_FILES / "wrmsr_list.txt" wrmsr_input_guest_fname = "/tmp/wrmsr_input.txt" vm.ssh.scp_put(wrmsr_input_host_fname, wrmsr_input_guest_fname) _, _, stderr = vm.ssh.run( f"{msr_writer_guest_fname} {wrmsr_input_guest_fname}", timeout=None ) assert stderr == "" # Dump MSR state to a file that will be published to S3 for the 2nd part of the test snapshot_artifacts_dir = ( Path(shared_names["snapshot_artifacts_root_dir_wrmsr"]) / guest_kernel.name / get_cpu_template_name(cpu_template_any, with_type=True) ) clean_and_mkdir(snapshot_artifacts_dir) msrs_before_fname = snapshot_artifacts_dir / shared_names["msrs_before_fname"] dump_msr_state_to_file(msr_reader_bin, msrs_before_fname, vm.ssh) # Take a snapshot snapshot = vm.snapshot_diff() # Copy snapshot files to be published to S3 for the 2nd part of the test snapshot.save_to(snapshot_artifacts_dir) def check_msrs_are_equal(before_recs, after_recs): """ Checks that reported MSRs and their values in the files are equal. """ before = {x["MSR_ADDR"]: x["VALUE"] for x in before_recs} after = {x["MSR_ADDR"]: x["VALUE"] for x in after_recs} # We first want to see if the same set of MSRs are exposed in the microvm. all_msrs = set(before.keys()) | set(after.keys()) changes = 0 for msr in all_msrs: if msr in before and msr not in after: print(f"MSR removed {msr} before={before[msr]}") changes += 1 elif msr not in before and msr in after: print(f"MSR added {msr} after={after[msr]}") changes += 1 elif msr in MSR_EXCEPTION_LIST: continue elif before[msr] != after[msr]: # Compare values print(f"MSR changed {msr} before={before[msr]} after={after[msr]}") changes += 1 assert changes == 0 @pytest.mark.skipif( UNSUPPORTED_HOST_KERNEL, reason=f"Supported kernels are {SUPPORTED_HOST_KERNELS}", ) @pytest.mark.timeout(900) @pytest.mark.nonci def test_cpu_wrmsr_restore( msr_reader_bin, microvm_factory, cpu_template_any, guest_kernel ): """ This is the second part of the test verifying that MSRs retain their values after restoring from a snapshot. Before taking a snapshot, MSR values are dumped into a text file. After restoring from the snapshot on another instance, the MSRs are dumped again and their values are compared to previous. Some MSRs are not inherently supposed to retain their values, so they form an MSR exception list. This part of the test is responsible for restoring from a snapshot and comparing two sets of MSR values. """ cpu_template_name = get_cpu_template_name(cpu_template_any) if cpu_template_name not in MSR_SUPPORTED_TEMPLATES: pytest.skip(f"This test does not support {cpu_template_name} template.") shared_names = SNAPSHOT_RESTORE_SHARED_NAMES snapshot_artifacts_dir = ( Path(shared_names["snapshot_artifacts_root_dir_wrmsr"]) / guest_kernel.name / get_cpu_template_name(cpu_template_any, with_type=True) ) skip_test_based_on_artifacts(snapshot_artifacts_dir) vm = microvm_factory.build() vm.spawn() vm.restore_from_path(snapshot_artifacts_dir, resume=True) # Dump MSR state to a file for further comparison msrs_after_fname = snapshot_artifacts_dir / shared_names["msrs_after_fname"] dump_msr_state_to_file(msr_reader_bin, msrs_after_fname, vm.ssh) msrs_before_fname = snapshot_artifacts_dir / shared_names["msrs_before_fname"] # Compare the two lists of MSR values and assert they are equal before_recs = read_msr_csv(msrs_before_fname.open()) after_recs = read_msr_csv(msrs_after_fname.open()) check_msrs_are_equal(before_recs, after_recs) def dump_cpuid_to_file(dump_fname, ssh_conn): """ Read CPUID via SSH and dump it into a file. """ _, stdout, stderr = ssh_conn.run("cpuid --one-cpu") assert stderr == "" dump_fname.write_text(stdout, encoding="UTF-8") @pytest.mark.skipif( UNSUPPORTED_HOST_KERNEL, reason=f"Supported kernels are {SUPPORTED_HOST_KERNELS}", ) @pytest.mark.timeout(900) @pytest.mark.nonci def test_cpu_cpuid_snapshot(microvm_factory, guest_kernel, rootfs, cpu_template_any): """ This is the first part of the test verifying that CPUID remains the same after restoring from a snapshot. Before taking a snapshot, CPUID is dumped into a text file. After restoring from the snapshot on another instance, the CPUID is dumped again and its content is compared to previous. This part of the test is responsible for taking a snapshot and publishing its files along with the `before` CPUID dump. """ cpu_template_name = get_cpu_template_name(cpu_template_any) if cpu_template_name not in MSR_SUPPORTED_TEMPLATES: pytest.skip(f"This test does not support {cpu_template_name} template.") shared_names = SNAPSHOT_RESTORE_SHARED_NAMES vm = microvm_factory.build( kernel=guest_kernel, rootfs=rootfs, ) vm.spawn() vm.add_net_iface() vm.basic_config( vcpu_count=1, mem_size_mib=1024, track_dirty_pages=True, ) vm.set_cpu_template(cpu_template_any) vm.start() # Dump CPUID to a file that will be published to S3 for the 2nd part of the test snapshot_artifacts_dir = ( Path(shared_names["snapshot_artifacts_root_dir_cpuid"]) / guest_kernel.name / get_cpu_template_name(cpu_template_any, with_type=True) ) clean_and_mkdir(snapshot_artifacts_dir) cpuid_before_fname = snapshot_artifacts_dir / shared_names["cpuid_before_fname"] dump_cpuid_to_file(cpuid_before_fname, vm.ssh) # Take a snapshot snapshot = vm.snapshot_diff() # Copy snapshot files to be published to S3 for the 2nd part of the test snapshot.save_to(snapshot_artifacts_dir) def check_cpuid_is_equal(before_cpuid_fname, after_cpuid_fname): """ Checks that CPUID dumps in the files are equal. """ with open(before_cpuid_fname, "r", encoding="UTF-8") as file: before = file.readlines() with open(after_cpuid_fname, "r", encoding="UTF-8") as file: after = file.readlines() diff = sys.stdout.writelines(unified_diff(before, after)) assert not diff, f"\n\n{diff}" @pytest.mark.skipif( UNSUPPORTED_HOST_KERNEL, reason=f"Supported kernels are {SUPPORTED_HOST_KERNELS}", ) @pytest.mark.timeout(900) @pytest.mark.nonci def test_cpu_cpuid_restore(microvm_factory, guest_kernel, cpu_template_any): """ This is the second part of the test verifying that CPUID remains the same after restoring from a snapshot. Before taking a snapshot, CPUID is dumped into a text file. After restoring from the snapshot on another instance, the CPUID is dumped again and compared to previous. This part of the test is responsible for restoring from a snapshot and comparing two CPUIDs. """ cpu_template_name = get_cpu_template_name(cpu_template_any) if cpu_template_name not in MSR_SUPPORTED_TEMPLATES: pytest.skip(f"This test does not support {cpu_template_name} template.") shared_names = SNAPSHOT_RESTORE_SHARED_NAMES snapshot_artifacts_dir = ( Path(shared_names["snapshot_artifacts_root_dir_cpuid"]) / guest_kernel.name / get_cpu_template_name(cpu_template_any, with_type=True) ) skip_test_based_on_artifacts(snapshot_artifacts_dir) vm = microvm_factory.build() vm.spawn() vm.restore_from_path(snapshot_artifacts_dir, resume=True) # Dump CPUID to a file for further comparison cpuid_after_fname = snapshot_artifacts_dir / shared_names["cpuid_after_fname"] dump_cpuid_to_file(cpuid_after_fname, vm.ssh) # Compare the two lists of MSR values and assert they are equal check_cpuid_is_equal( snapshot_artifacts_dir / shared_names["cpuid_before_fname"], snapshot_artifacts_dir / shared_names["cpuid_after_fname"], ) def test_cpu_template(uvm_plain_any, cpu_template_any, microvm_factory): """ Test masked and enabled cpu features against the expected template. This test checks that all expected masked features are not present in the guest and that expected enabled features are present for each of the supported CPU templates. """ cpu_template_name = get_cpu_template_name(cpu_template_any) if cpu_template_name not in [ "T2", "T2S", "SPR_TO_T2_5.10", "SPR_TO_T2_6.1", "GNR_TO_T2_5.10", "GNR_TO_T2_6.1", "C3", ]: pytest.skip(f"This test does not support {cpu_template_name} template.") test_microvm = uvm_plain_any test_microvm.spawn() # Set template as specified in the `cpu_template` parameter. test_microvm.basic_config( vcpu_count=1, mem_size_mib=256, ) test_microvm.set_cpu_template(cpu_template_any) test_microvm.add_net_iface() if cpuid_utils.get_cpu_vendor() != cpuid_utils.CpuVendor.INTEL: # We shouldn't be able to apply Intel templates on AMD hosts with pytest.raises(RuntimeError): test_microvm.start() return test_microvm.start() check_masked_features(test_microvm, cpu_template_name) check_enabled_features(test_microvm, cpu_template_name) # Check that cpu features are still correct # after snap/restore cycle. snapshot = test_microvm.snapshot_full() restored_vm = microvm_factory.build() restored_vm.spawn() restored_vm.restore_from_snapshot(snapshot, resume=True) check_masked_features(restored_vm, cpu_template_name) check_enabled_features(restored_vm, cpu_template_name) def check_masked_features(test_microvm, cpu_template): """Verify the masked features of the given template.""" # fmt: off must_be_unset = [] if cpu_template == "C3": must_be_unset = [ (0x1, 0x0, "ecx", (1 << 2) | # DTES64 (1 << 3) | # MONITOR (1 << 4) | # DS_CPL_SHIFT (1 << 5) | # VMX (1 << 8) | # TM2 (1 << 10) | # CNXT_ID (1 << 11) | # SDBG (1 << 12) | # FMA (1 << 14) | # XTPR_UPDATE (1 << 15) | # PDCM (1 << 22) # MOVBE ), (0x1, 0x0, "edx", (1 << 18) | # PSN (1 << 21) | # DS (1 << 22) | # ACPI (1 << 27) | # SS (1 << 29) | # TM (1 << 31) # PBE ), (0x7, 0x0, "ebx", (1 << 2) | # SGX (1 << 3) | # BMI1 (1 << 4) | # HLE (1 << 5) | # AVX2 (1 << 8) | # BMI2 (1 << 10) | # INVPCID (1 << 11) | # RTM (1 << 12) | # RDT_M (1 << 14) | # MPX (1 << 15) | # RDT_A (1 << 16) | # AVX512F (1 << 17) | # AVX512DQ (1 << 18) | # RDSEED (1 << 19) | # ADX (1 << 21) | # AVX512IFMA (1 << 23) | # CLFLUSHOPT (1 << 24) | # CLWB (1 << 25) | # PT (1 << 26) | # AVX512PF (1 << 27) | # AVX512ER (1 << 28) | # AVX512CD (1 << 29) | # SHA (1 << 30) | # AVX512BW (1 << 31) # AVX512VL ), (0x7, 0x0, "ecx", (1 << 1) | # AVX512_VBMI (1 << 2) | # UMIP (1 << 3) | # PKU (1 << 4) | # OSPKE (1 << 11) | # AVX512_VNNI (1 << 14) | # AVX512_VPOPCNTDQ (1 << 16) | # LA57 (1 << 22) | # RDPID (1 << 30) # SGX_LC ), (0x7, 0x0, "edx", (1 << 2) | # AVX512_4VNNIW (1 << 3) # AVX512_4FMAPS ), (0xd, 0x0, "eax", (1 << 3) | # MPX_STATE bit 0 (1 << 4) | # MPX_STATE bit 1 (1 << 5) | # AVX512_STATE bit 0 (1 << 6) | # AVX512_STATE bit 1 (1 << 7) | # AVX512_STATE bit 2 (1 << 9) # PKRU ), (0xd, 0x1, "eax", (1 << 1) | # XSAVEC_SHIFT (1 << 2) | # XGETBV_SHIFT (1 << 3) # XSAVES_SHIFT ), (0x80000001, 0x0, "ecx", (1 << 5) | # LZCNT (1 << 8) # PREFETCH ), (0x80000001, 0x0, "edx", (1 << 26) # PDPE1GB ), ] elif cpu_template in ("T2", "T2S"): must_be_unset = [ (0x1, 0x0, "ecx", (1 << 2) | # DTES64 (1 << 3) | # MONITOR (1 << 4) | # DS_CPL_SHIFT (1 << 5) | # VMX (1 << 6) | # SMX (1 << 7) | # EIST (1 << 8) | # TM2 (1 << 10) | # CNXT_ID (1 << 11) | # SDBG (1 << 14) | # XTPR_UPDATE (1 << 15) | # PDCM (1 << 18) # DCA ), (0x1, 0x0, "edx", (1 << 18) | # PSN (1 << 21) | # DS (1 << 22) | # ACPI (1 << 27) | # SS (1 << 29) | # TM (1 << 30) | # IA64 (1 << 31) # PBE ), (0x7, 0x0, "ebx", (1 << 2) | # SGX (1 << 4) | # HLE (1 << 11) | # RTM (1 << 12) | # RDT_M (1 << 14) | # MPX (1 << 15) | # RDT_A (1 << 16) | # AVX512F (1 << 17) | # AVX512DQ (1 << 18) | # RDSEED (1 << 19) | # ADX (1 << 21) | # AVX512IFMA (1 << 22) | # PCOMMIT (1 << 23) | # CLFLUSHOPT (1 << 24) | # CLWB (1 << 25) | # PT (1 << 26) | # AVX512PF (1 << 27) | # AVX512ER (1 << 28) | # AVX512CD (1 << 29) | # SHA (1 << 30) | # AVX512BW (1 << 31) # AVX512VL ), (0x7, 0x0, "ecx", (1 << 1) | # AVX512_VBMI (1 << 2) | # UMIP (1 << 3) | # PKU (1 << 4) | # OSPKE (1 << 6) | # AVX512_VBMI2 (1 << 8) | # GFNI (1 << 9) | # VAES (1 << 10) | # VPCLMULQDQ (1 << 11) | # AVX512_VNNI (1 << 12) | # AVX512_BITALG (1 << 14) | # AVX512_VPOPCNTDQ (1 << 16) | # LA57 (1 << 22) | # RDPID (1 << 30) # SGX_LC ), (0x7, 0x0, "edx", (1 << 2) | # AVX512_4VNNIW (1 << 3) | # AVX512_4FMAPS (1 << 4) | # FSRM (1 << 8) # AVX512_VP2INTERSECT ), (0xd, 0x0, "eax", (1 << 3) | # MPX_STATE bit 0 (1 << 4) | # MPX_STATE bit 1 (1 << 5) | # AVX512_STATE bit 0 (1 << 6) | # AVX512_STATE bit 1 (1 << 7) | # AVX512_STATE bit 2 (1 << 9) # PKRU ), (0xd, 0x1, "eax", (1 << 1) | # XSAVEC_SHIFT (1 << 2) | # XGETBV_SHIFT (1 << 3) # XSAVES_SHIFT ), (0x80000001, 0x0, "ecx", (1 << 8) | # PREFETCH (1 << 29) # MWAIT_EXTENDED ), (0x80000001, 0x0, "edx", (1 << 26) # PDPE1GB ), (0x80000008, 0x0, "ebx", (1 << 9) # WBNOINVD ) ] elif cpu_template in ["SPR_TO_T2_5.10", "SPR_TO_T2_6.1"]: must_be_unset = [ (0x1, 0x0, "ecx", (1 << 2) | # DTES64 (1 << 3) | # MONITOR (1 << 4) | # DS-CPL (1 << 5) | # VMX (1 << 6) | # SMX (1 << 7) | # EIST (1 << 8) | # TM2 (1 << 10) | # CNXT-ID (1 << 11) | # SDBG (1 << 14) | # XTPR_UPDATE (1 << 15) | # PDCM (1 << 18) # DCA ), (0x1, 0x0, "edx", (1 << 18) | # PSN (1 << 21) | # DS (1 << 22) | # ACPI (1 << 27) | # SS (1 << 29) | # TM (1 << 30) | # IA64 (1 << 31) # PBE ), (0x7, 0x0, "ebx", (1 << 2) | # SGX (1 << 4) | # HLE (1 << 11) | # RTM (1 << 12) | # RDT-M (1 << 14) | # MPX (1 << 15) | # RDT-A (1 << 16) | # AVX512F (1 << 17) | # AVX512DQ (1 << 18) | # RDSEED (1 << 19) | # ADX (1 << 21) | # AVX512IFMA (1 << 22) | # PCOMMIT (1 << 23) | # CLFLUSHOPT (1 << 24) | # CLWB (1 << 25) | # PT (1 << 26) | # AVX512PF (1 << 27) | # AVX512ER (1 << 28) | # AVX512CD (1 << 29) | # SHA (1 << 30) | # AVX512BW (1 << 31) # AVX512VL ), (0x7, 0x0, "ecx", (1 << 1) | # AVX512_VBMI (1 << 2) | # UMIP (1 << 3) | # PKU (1 << 4) | # OSPKE (1 << 6) | # AVX512_VBMI2 (1 << 8) | # GFNI (1 << 9) | # VAES (1 << 10) | # VPCLMULQDQ (1 << 11) | # AVX512_VNNI (1 << 12) | # AVX512_BITALG (1 << 14) | # AVX512_VPOPCNTDQ (1 << 16) | # LA57 (1 << 22) | # RDPID (1 << 24) | # BUS_LOCK_DETECT (1 << 25) | # CLDEMOTE (1 << 27) | # MOVDIRI (1 << 28) | # MOVDIR64B (1 << 30) # SGX_LC ), (0x7, 0x0, "edx", (1 << 2) | # AVX512_4VNNIW (1 << 3) | # AVX512_4FMAPS (1 << 4) | # FSRM (1 << 8) | # AVX512_VP2INTERSECT (1 << 14) | # SERIALIZE (1 << 16) | # TSXLDTRK (1 << 22) | # AMX-BF16 (1 << 23) | # AVX512_FP16 (1 << 24) | # AMX-TILE (1 << 25) # AMX-INT8 ), (0x7, 0x1, "eax", (1 << 4) | # AVX-VNI (1 << 5) # AVX512_BF16 ), # Note that we don't intentionally mask hardware security features. # - IPRED_CTRL: CPUID.(EAX=07H,ECX=2):EDX[1] # - RRSBA_CTRL: CPUID.(EAX=07H,ECX=2):EDX[2] # - BHI_CTRL: CPUID.(EAX=07H,ECX=2):EDX[4] (0xd, 0x0, "eax", (1 << 3) | # MPX state bit 0 (1 << 4) | # MPX state bit 1 (1 << 5) | # AVX-512 state bit 0 (1 << 6) | # AVX-512 state bit 1 (1 << 7) | # AVX-512 state bit 2 (1 << 9) | # PKRU state (1 << 17) | # AMX TILECFG state (1 << 18) # AMX TILEDATA state ), (0xd, 0x1, "eax", (1 << 1) | # XSAVEC (1 << 2) | # XGETBV with ECX=1 (1 << 3) | # XSAVES/XRSTORS and IA32_XSS (1 << 4) # XFD ), (0xd, 0x11, "eax", (1 << 32) - 1), # AMX TILECFG XSTATE leaf, EAX (0xd, 0x11, "ebx", (1 << 32) - 1), # AMX TILECFG XSTATE leaf, EBX (0xd, 0x11, "ecx", (1 << 32) - 1), # AMX TILECFG XSTATE leaf, ECX (0xd, 0x11, "edx", (1 << 32) - 1), # AMX TILECFG XSTATE leaf, EDX (0xd, 0x12, "eax", (1 << 32) - 1), # AMX TILEDATA XSTATE leaf, EAX (0xd, 0x12, "ebx", (1 << 32) - 1), # AMX TILEDATA XSTATE leaf, EBX (0xd, 0x12, "ecx", (1 << 32) - 1), # AMX TILEDATA XSTATE leaf, ECX (0xd, 0x12, "edx", (1 << 32) - 1), # AMX TILEDATA XSTATE leaf, EDX (0x1d, 0x0, "eax", (1 << 32) - 1), # AMX Tile Information leaf, EAX (0x1d, 0x0, "ebx", (1 << 32) - 1), # AMX Tile Information leaf, EBX (0x1d, 0x0, "ecx", (1 << 32) - 1), # AMX Tile Information leaf, ECX (0x1d, 0x0, "edx", (1 << 32) - 1), # AMX Tile Information leaf, EDX (0x1d, 0x1, "eax", (1 << 32) - 1), # AMX Tile Palette 1 leaf, EAX (0x1d, 0x1, "ebx", (1 << 32) - 1), # AMX Tile Palette 1 leaf, EBX (0x1d, 0x1, "ecx", (1 << 32) - 1), # AMX Tile Palette 1 leaf, ECX (0x1d, 0x1, "edx", (1 << 32) - 1), # AMX TIle Palette 1 leaf, EDX (0x1e, 0x0, "eax", (1 << 32) - 1), # AMX TMUL Information leaf, EAX (0x1e, 0x0, "ebx", (1 << 32) - 1), # AMX TMUL Information leaf, EBX (0x1e, 0x0, "ecx", (1 << 32) - 1), # AMX TMUL Information leaf, ECX (0x1e, 0x0, "edx", (1 << 32) - 1), # AMX TMUL Information leaf, EDX (0x80000001, 0x0, "ecx", (1 << 8) | # PREFETCHW (1 << 29) # MWAITX / MONITORX ), (0x80000001, 0x0, "edx", (1 << 26) # 1-GByte pages ), (0x80000008, 0x0, "ebx", (1 << 9) # WBNOINVD ) ] elif cpu_template in ["GNR_TO_T2_5.10", "GNR_TO_T2_6.1"]: must_be_unset = [ (0x1, 0x0, "ecx", (1 << 2) | # DTES64 (1 << 3) | # MONITOR (1 << 4) | # DS-CPL (1 << 5) | # VMX (1 << 6) | # SMX (1 << 7) | # EIST (1 << 8) | # TM2 (1 << 10) | # CNXT-ID (1 << 11) | # SDBG (1 << 14) | # XTPR_UPDATE (1 << 15) | # PDCM (1 << 18) # DCA ), (0x1, 0x0, "edx", (1 << 18) | # PSN (1 << 21) | # DS (1 << 22) | # ACPI (1 << 27) | # SS (1 << 29) | # TM (1 << 30) | # IA64 (1 << 31) # PBE ), (0x7, 0x0, "ebx", (1 << 2) | # SGX (1 << 4) | # HLE (1 << 11) | # RTM (1 << 12) | # RDT-M (1 << 14) | # MPX (1 << 15) | # RDT-A (1 << 16) | # AVX512F (1 << 17) | # AVX512DQ (1 << 18) | # RDSEED (1 << 19) | # ADX (1 << 21) | # AVX512IFMA (1 << 22) | # PCOMMIT (1 << 23) | # CLFLUSHOPT (1 << 24) | # CLWB (1 << 25) | # PT (1 << 26) | # AVX512PF (1 << 27) | # AVX512ER (1 << 28) | # AVX512CD (1 << 29) | # SHA (1 << 30) | # AVX512BW (1 << 31) # AVX512VL ), (0x7, 0x0, "ecx", (1 << 1) | # AVX512_VBMI (1 << 2) | # UMIP (1 << 3) | # PKU (1 << 4) | # OSPKE (1 << 6) | # AVX512_VBMI2 (1 << 8) | # GFNI (1 << 9) | # VAES (1 << 10) | # VPCLMULQDQ (1 << 11) | # AVX512_VNNI (1 << 12) | # AVX512_BITALG (1 << 14) | # AVX512_VPOPCNTDQ (1 << 16) | # LA57 (1 << 22) | # RDPID (1 << 24) | # BUS_LOCK_DETECT (1 << 25) | # CLDEMOTE (1 << 27) | # MOVDIRI (1 << 28) | # MOVDIR64B (1 << 30) # SGX_LC ), (0x7, 0x0, "edx", (1 << 2) | # AVX512_4VNNIW (1 << 3) | # AVX512_4FMAPS (1 << 4) | # FSRM (1 << 8) | # AVX512_VP2INTERSECT (1 << 14) | # SERIALIZE (1 << 16) | # TSXLDTRK (1 << 22) | # AMX-BF16 (1 << 23) | # AVX512_FP16 (1 << 24) | # AMX-TILE (1 << 25) # AMX-INT8 ), (0x7, 0x1, "eax", (1 << 4) | # AVX-VNI (1 << 5) | # AVX512_BF16 (1 << 21) # AMX_FP16 ), (0x7, 0x1, "edx", (1 << 14) | # PREFETCHI (1 << 19) # AVX10 ), # Note that we don't intentionally mask hardware security features. # - IPRED_CTRL: CPUID.(EAX=07H,ECX=2):EDX[1] # - RRSBA_CTRL: CPUID.(EAX=07H,ECX=2):EDX[2] # - BHI_CTRL: CPUID.(EAX=07H,ECX=2):EDX[4] # - MCDT_NO: CPUID.(EAX=07H,ECX=2):EDX[5] (0x7, 0x2, "edx", (1 << 3) # DDPD_U ), (0xd, 0x0, "eax", (1 << 3) | # MPX state bit 0 (1 << 4) | # MPX state bit 1 (1 << 5) | # AVX-512 state bit 0 (1 << 6) | # AVX-512 state bit 1 (1 << 7) | # AVX-512 state bit 2 (1 << 9) | # PKRU state (1 << 17) | # AMX TILECFG state (1 << 18) # AMX TILEDATA state ), (0xd, 0x1, "eax", (1 << 1) | # XSAVEC (1 << 2) | # XGETBV with ECX=1 (1 << 3) | # XSAVES/XRSTORS and IA32_XSS (1 << 4) # XFD ), (0xd, 0x11, "eax", (1 << 32) - 1), # AMX TILECFG XSTATE leaf, EAX (0xd, 0x11, "ebx", (1 << 32) - 1), # AMX TILECFG XSTATE leaf, EBX (0xd, 0x11, "ecx", (1 << 32) - 1), # AMX TILECFG XSTATE leaf, ECX (0xd, 0x11, "edx", (1 << 32) - 1), # AMX TILECFG XSTATE leaf, EDX (0xd, 0x12, "eax", (1 << 32) - 1), # AMX TILEDATA XSTATE leaf, EAX (0xd, 0x12, "ebx", (1 << 32) - 1), # AMX TILEDATA XSTATE leaf, EBX (0xd, 0x12, "ecx", (1 << 32) - 1), # AMX TILEDATA XSTATE leaf, ECX (0xd, 0x12, "edx", (1 << 32) - 1), # AMX TILEDATA XSTATE leaf, EDX (0x1d, 0x0, "eax", (1 << 32) - 1), # AMX Tile Information leaf, EAX (0x1d, 0x0, "ebx", (1 << 32) - 1), # AMX Tile Information leaf, EBX (0x1d, 0x0, "ecx", (1 << 32) - 1), # AMX Tile Information leaf, ECX (0x1d, 0x0, "edx", (1 << 32) - 1), # AMX Tile Information leaf, EDX (0x1d, 0x1, "eax", (1 << 32) - 1), # AMX Tile Palette 1 leaf, EAX (0x1d, 0x1, "ebx", (1 << 32) - 1), # AMX Tile Palette 1 leaf, EBX (0x1d, 0x1, "ecx", (1 << 32) - 1), # AMX Tile Palette 1 leaf, ECX (0x1d, 0x1, "edx", (1 << 32) - 1), # AMX TIle Palette 1 leaf, EDX (0x1e, 0x0, "eax", (1 << 32) - 1), # AMX TMUL Information leaf, EAX (0x1e, 0x0, "ebx", (1 << 32) - 1), # AMX TMUL Information leaf, EBX (0x1e, 0x0, "ecx", (1 << 32) - 1), # AMX TMUL Information leaf, ECX (0x1e, 0x0, "edx", (1 << 32) - 1), # AMX TMUL Information leaf, EDX (0x80000001, 0x0, "ecx", (1 << 8) | # PREFETCHW (1 << 29) # MWAITX / MONITORX ), (0x80000001, 0x0, "edx", (1 << 26) # 1-GByte pages ), (0x80000008, 0x0, "ebx", (1 << 9) # WBNOINVD ) ] # fmt: on cpuid_utils.check_cpuid_feat_flags( test_microvm, [], must_be_unset, ) def check_enabled_features(test_microvm, cpu_template): """Test for checking that all expected features are enabled in guest.""" enabled_list = { # feature_info_1_edx "x87 FPU on chip": "true", "CMPXCHG8B inst.": "true", "VME: virtual-8086 mode enhancement": "true", "SSE extensions": "true", "SSE2 extensions": "true", "DE: debugging extensions": "true", "PSE: page size extensions": "true", "TSC: time stamp counter": "true", "RDMSR and WRMSR support": "true", "PAE: physical address extensions": "true", "MCE: machine check exception": "true", "APIC on chip": "true", "MMX Technology": "true", "SYSENTER and SYSEXIT": "true", "MTRR: memory type range registers": "true", "PTE global bit": "true", "FXSAVE/FXRSTOR": "true", "MCA: machine check architecture": "true", "CMOV: conditional move/compare instr": "true", "PAT: page attribute table": "true", "PSE-36: page size extension": "true", "CLFLUSH instruction": "true", # feature_info_1_ecx "PNI/SSE3: Prescott New Instructions": "true", "PCLMULDQ instruction": "true", "SSSE3 extensions": "true", "AES instruction": "true", "CMPXCHG16B instruction": "true", "PCID: process context identifiers": "true", "SSE4.1 extensions": "true", "SSE4.2 extensions": "true", "x2APIC: extended xAPIC support": "true", "POPCNT instruction": "true", "time stamp counter deadline": "true", "XSAVE/XSTOR states": "true", "OS-enabled XSAVE/XSTOR": "true", "AVX: advanced vector extensions": "true", "F16C half-precision convert instruction": "true", "RDRAND instruction": "true", "hypervisor guest status": "true", # thermal_and_power_mgmt "ARAT always running APIC timer": "true", # extended_features "FSGSBASE instructions": "true", "IA32_TSC_ADJUST MSR supported": "true", "SMEP supervisor mode exec protection": "true", "enhanced REP MOVSB/STOSB": "true", "SMAP: supervisor mode access prevention": "true", # xsave_0xd_0 "x87 state": "true", "SSE state": "true", "AVX state": "true", # xsave_0xd_1 "XSAVEOPT instruction": "true", # extended_080000001_edx "SYSCALL and SYSRET instructions": "true", "64-bit extensions technology available": "true", "execution disable": "true", "RDTSCP": "true", # intel_080000001_ecx "LAHF/SAHF supported in 64-bit mode": "true", # adv_pwr_mgmt "TscInvariant": "true", } cpuid_utils.check_guest_cpuid_output( test_microvm, "cpuid -1", None, "=", enabled_list ) if cpu_template in [ "T2", "SPR_TO_T2_5.10", "SPR_TO_T2_6.1", "GNR_TO_T2_5.10", "GNR_TO_T2_6.1", ]: t2_enabled_features = { "FMA instruction": "true", "BMI1 instructions": "true", "BMI2 instructions": "true", "AVX2: advanced vector extensions 2": "true", "MOVBE instruction": "true", "INVPCID instruction": "true", } cpuid_utils.check_guest_cpuid_output( test_microvm, "cpuid -1", None, "=", t2_enabled_features ) @pytest.mark.skipif( global_props.cpu_codename != "INTEL_SAPPHIRE_RAPIDS" or global_props.host_linux_version_tpl < (5, 17), reason="Intel AMX is only supported on Intel Sapphire Rapids and kernel v5.17+", ) def test_intel_amx_reported_on_sapphire_rapids( microvm_factory, guest_kernel_linux_6_1, rootfs ): """ Verifies that Intel AMX is reported on guest (v5.17+) """ uvm = microvm_factory.build(guest_kernel_linux_6_1, rootfs) uvm.spawn() uvm.basic_config() uvm.add_net_iface() uvm.start() expected_dict = { "AMX-BF16: tile bfloat16 support": "true", # CPUID.(EAX=07H,ECX=0):EDX[22] "AMX-TILE: tile architecture support": "true", # CPUID.(EAX=07H,ECX=0):EDX[24] "AMX-INT8: tile 8-bit integer support": "true", # CPUID.(EAX=07H,ECX=0):EDX[25] "AMX-FP16: FP16 tile operations": "false", # CPUID.(EAX=07H,ECX=1):EAX[21], not supported on host as well "XTILECFG state": "true", # CPUID.(EAX=0DH,ECX=0):EAX[17] "XTILEDATA state": "true", # CPUID.(EAX=0DH,ECX=0):EAX[17] } cpuid_utils.check_guest_cpuid_output( uvm, "cpuid -1", None, "=", expected_dict, ) def test_waitpkg_inaccessibility(uvm_nano, waitpkg_bin): """ Verifies that attempting to use WAITPKG (UMONITOR / UMWAIT instructions) generates #UD. """ vm = uvm_nano vm.add_net_iface() vm.start() rmt_path = "/tmp/waitpkg" vm.ssh.scp_put(waitpkg_bin, rmt_path) cmd = f"{rmt_path}; echo $?" _, stdout, stderr = vm.ssh.check_output(cmd) assert stdout == "132\n" assert "Illegal instruction" in stderr ================================================ FILE: tests/integration_tests/functional/test_cpu_template_helper.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests that verify the cpu-template-helper's behavior.""" import json import platform from pathlib import Path import pytest from framework import utils from framework.defs import SUPPORTED_HOST_KERNELS from framework.properties import global_props from framework.utils_cpuid import get_guest_cpuid from host_tools import cargo_build PLATFORM = platform.machine() TEST_RESOURCES_DIR = Path("./data/cpu_template_helper") class CpuTemplateHelper: """ Class for CPU template helper tool. """ # Class constants BINARY_NAME = "cpu-template-helper" def __init__(self): """Build CPU template helper tool binary""" self.binary = cargo_build.get_binary(self.BINARY_NAME) def template_dump(self, output_path): """Dump guest CPU config in the JSON custom CPU template format""" cmd = f"{self.binary} template dump --output {output_path}" utils.check_output(cmd) def template_strip(self, paths, suffix=""): """Strip entries shared between multiple CPU template files""" paths = " ".join([str(path) for path in paths]) cmd = f"{self.binary} template strip --paths {paths} --suffix '{suffix}'" utils.check_output(cmd) def template_verify(self, template_path): """Verify the specified CPU template""" cmd = f"{self.binary} template verify --template {template_path}" utils.check_output(cmd) def fingerprint_dump(self, output_path): """Dump a fingerprint""" cmd = f"{self.binary} fingerprint dump --output {output_path}" utils.check_output(cmd) def fingerprint_compare( self, prev_path, curr_path, filters, ): """Compare two fingerprint files""" cmd = ( f"{self.binary} fingerprint compare" f" --prev {prev_path} --curr {curr_path}" ) if filters: cmd += f" --filters {' '.join(filters)}" utils.check_output(cmd) @pytest.fixture(scope="session", name="cpu_template_helper") def cpu_template_helper_fixture(): """Fixture of CPU template helper tool""" return CpuTemplateHelper() def build_cpu_config_dict(cpu_config_path): """Build a dictionary from JSON CPU config file.""" cpu_config_dict = { "cpuid": {}, "msrs": {}, } cpu_config_json = json.loads(cpu_config_path.read_text(encoding="utf-8")) # CPUID for leaf_modifier in cpu_config_json["cpuid_modifiers"]: for register_modifier in leaf_modifier["modifiers"]: cpu_config_dict["cpuid"][ ( int(leaf_modifier["leaf"], 16), int(leaf_modifier["subleaf"], 16), register_modifier["register"], ) ] = int(register_modifier["bitmap"], 2) # MSR for msr_modifier in cpu_config_json["msr_modifiers"]: cpu_config_dict["msrs"][int(msr_modifier["addr"], 16)] = int( msr_modifier["bitmap"], 2 ) return cpu_config_dict # List of CPUID leaves / subleaves that are not enumerated in # KVM_GET_SUPPORTED_CPUID on Intel and AMD. UNAVAILABLE_CPUID_ON_DUMP_LIST = [ # KVM changed to not return the host's processor topology information on # CPUID.Bh in the following commit (backported into kernel 5.10 and 6.1, # but not into kernel 4.14 due to merge conflict), since it's confusing # and the userspace VMM has to populate it with meaningful values. # https://github.com/torvalds/linux/commit/45e966fcca03ecdcccac7cb236e16eea38cc18af # Since Firecracker only populates subleaves 0 and 1 (thread level and core # level) in the normalization process and the subleaf 2 is left empty or # not listed, the subleaf 2 should be skipped when the userspace cpuid # enumerates it. (0xB, 0x2), # On CPUID.12h, the subleaves 0 and 1 enumerate Intel SGX capability and # attributes respectively, and subleaves 2 or higher enumerate Intel SGX # EPC that is listed only when CPUID.07h:EBX[2] is 1, meaning that SGX is # supported. However, as seen in CPU config baseline files, CPUID.07h:EBX[2] # is 0 on all tested platforms. On the other hand, the userspace cpuid # command enumerates subleaves up to 2 regardless of CPUID.07h:EBX[2]. # KVM_GET_SUPPORTED_CPUID returns 0 in CPUID.12h.0 and firecracker passes # it as it is, so here we ignore subleaves 1 and 2. (0x12, 0x1), (0x12, 0x2), # CPUID.18h enumerates deterministic address translation parameters and the # subleaf 0 reports the maximum supported subleaf in EAX, and all the tested # platforms reports 0 in EAX. However, the userspace cpuid command in ubuntu # 22 also lists the subleaf 1. (0x18, 0x1), # CPUID.1Bh enumerates PCONFIG information. The availability of PCONFIG is # enumerated in CPUID.7h.0:EDX[18]. While all the supported platforms don't # support it, the userspace cpuid command in ubuntu 22 reports not only # the subleaf 0 but also the subleaf 1. (0x1B, 0x1), # CPUID.1Fh is a preferred superset to CPUID.0Bh. For the same reason as # CPUID.Bh, the subleaf 2 should be skipped when the guest userspace cpuid # enumerates it. (0x1F, 0x2), # CPUID.20000000h is not documented in Intel SDM and AMD APM. KVM doesn't # report it, but the userspace cpuid command in ubuntu 22 does. (0x20000000, 0x0), # CPUID.40000100h is Xen-specific leaf. # https://xenbits.xen.org/docs/4.6-testing/hypercall/x86_64/include,public,arch-x86,cpuid.h.html (0x40000100, 0x0), # CPUID.8000001Bh or later are not supported on kernel 4.14 with an # exception CPUID.8000001Dh and CPUID.8000001Eh normalized by firecracker. # https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/x86/kvm/cpuid.c?h=v4.14.313#n637 # On kernel 4.16 or later, these leaves are supported. # https://github.com/torvalds/linux/commit/8765d75329a386dd7742f94a1ea5fdcdea8d93d0 (0x8000001B, 0x0), (0x8000001C, 0x0), # CPUID.80860000h is a Transmeta-specific leaf. (0x80860000, 0x0), # CPUID.C0000000h is a Centaur-specific leaf. (0xC0000000, 0x0), ] # An upper range of CPUID leaves which are not supported by our kernels UNAVAILABLE_CPUID_UPPER_RANGE = range(0x8000001F, 0x80000029) # Dictionary of CPUID bitmasks that should not be tested due to its mutability. CPUID_EXCEPTION_LIST = { # CPUID.01h:ECX[OSXSAVE (bit 27)] is linked to CR4[OSXSAVE (bit 18)] that # can be updated by guest OS. # https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/x86/kvm/x86.c?h=v5.10.176#n9872 (0x1, 0x0, "ecx"): 1 << 27, # CPUID.07h:ECX[OSPKE (bit 4)] is linked to CR4[PKE (bit 22)] that can be # updated by guest OS. # https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/x86/kvm/x86.c?h=v5.10.176#n9872 (0x7, 0x0, "ecx"): 1 << 4, # CPUID.0Dh:EBX is variable depending on XCR0 that can be updated by guest # OS with XSETBV instruction. # https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/arch/x86/kvm/x86.c?h=v5.10.176#n973 (0xD, 0x0, "ebx"): 0xFFFF_FFFF, (0xD, 0x1, "ebx"): 0xFFFF_FFFF, } # List of MSR indices that should not be tested due to its mutability or inavailablility # in the guest. MSR_EXCEPTION_LIST = [ # MSR_KVM_WALL_CLOCK and MSR_KVM_SYSTEM_TIME depend on the elapsed time. 0x11, 0x12, # MSR_IA32_FEAT_CTL and MSR_IA32_SPEC_CTRL are R/W MSRs that can be # modified by OS to control features. 0x3A, 0x48, # MSR_IA32_SMBASE is not accessible outside of System Management Mode. 0x9E, # MSR_IA32_UMWAIT_CONTROL is R/W MSR that guest OS modifies after boot to # control UMWAIT feature. 0xE1, # MSR_IA32_TSX_CTRL is R/W MSR to disable Intel TSX feature as a mitigation # against TAA vulnerability. 0x122, # MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP and MSR_IA32_SYSENTER_EIP are # R/W MSRs that will be set up by OS to call fast system calls with # SYSENTER. 0x174, 0x175, 0x176, # MSR_IA32_XFD is R/W MSR for guest OS to control which XSAVE-enabled # features are temporarily disabled. Guest OS disables TILEDATA by default # using the MSR. 0x1C4, # IA32_PAT_MSR is R/W MSR for guest OS to control memory page attributes. 0x277, # MSR_IA32_TSC_DEADLINE specifies the time at which a timer interrupt # should occur and depends on the elapsed time. 0x6E0, # MSR_KVM_SYSTEM_TIME_NEW and MSR_KVM_WALL_CLOCK_NEW depend on the elapsed # time. 0x4B564D00, 0x4B564D01, # MSR_KVM_ASYNC_PF_EN is an asynchronous page fault (APF) control MSR and # is intialized in VM setup process. 0x4B564D02, # MSR_KVM_STEAL_TIME indicates CPU steal time filled in by the hypervisor # periodically. 0x4B564D03, # MSR_KVM_PV_EOI_EN is PV End Of Interrupt (EOI) MSR and is initialized in # VM setup process. 0x4B564D04, # MSR_KVM_ASYNC_PF_INT is an interrupt vector for delivery of 'page ready' # APF events and is initialized just before MSR_KVM_ASYNC_PF_EN. 0x4B564D06, # MSR_STAR, MSR_LSTAR, MSR_CSTAR and MSR_SYSCALL_MASK are R/W MSRs that # will be set up by OS to call fast system calls with SYSCALL. 0xC0000081, 0xC0000082, 0xC0000083, 0xC0000084, # MSR_AMD64_VIRT_SPEC_CTRL is R/W and can be modified by OS to control # security features for speculative attacks. 0xC001011F, # Not available in the guest # MSR_TSC_RATE is a Time Stamp Counter Ratio which allows the hypervisor # to control the guest's view of the Time Stamp Counter. 0xC0000104, ] def get_guest_msrs(microvm, msr_index_list): """ Return the guest MSR in the form of a dictionary where the key is a MSR index and the value is the register value. """ msrs_dict = {} for index in msr_index_list: if index in MSR_EXCEPTION_LIST: continue rdmsr_cmd = f"rdmsr -0 {index}" code, stdout, stderr = microvm.ssh.run(rdmsr_cmd) assert stderr == "", f"Failed to get MSR for {index=:#x}: {code=}" msrs_dict[index] = int(stdout, 16) return msrs_dict @pytest.mark.skipif( PLATFORM != "x86_64", reason=( "`cpuid` and `rdmsr` commands are only available on x86_64. " "System registers are not accessible on aarch64." ), ) def test_cpu_config_dump_vs_actual( uvm_plain_any, cpu_template_helper, tmp_path, ): """ Verify that the dumped CPU config matches the actual CPU config inside guest. """ # Dump CPU config with the helper tool. cpu_config_path = tmp_path / "cpu_config.json" cpu_template_helper.template_dump(cpu_config_path) dump_cpu_config = build_cpu_config_dict(cpu_config_path) # Retrieve actual CPU config from guest microvm = uvm_plain_any microvm.spawn() microvm.basic_config(vcpu_count=1) microvm.add_net_iface() microvm.start() actual_cpu_config = { "cpuid": get_guest_cpuid(microvm), "msrs": get_guest_msrs(microvm, dump_cpu_config["msrs"].keys()), } # Compare CPUID between actual and dumped CPU config. # Verify all the actual CPUIDs are covered and match with the dumped one. keys_not_in_dump = {} for key, actual in actual_cpu_config["cpuid"].items(): if (key[0], key[1]) in UNAVAILABLE_CPUID_ON_DUMP_LIST: continue if key[0] in UNAVAILABLE_CPUID_UPPER_RANGE: continue if key not in dump_cpu_config["cpuid"]: keys_not_in_dump[key] = actual_cpu_config["cpuid"][key] continue dump = dump_cpu_config["cpuid"][key] if key in CPUID_EXCEPTION_LIST: actual &= ~CPUID_EXCEPTION_LIST[key] dump &= ~CPUID_EXCEPTION_LIST[key] assert actual == dump, ( f"Mismatched CPUID for leaf={key[0]:#x} subleaf={key[1]:#x} reg={key[2]}:" f"{actual=:#034b} vs. {dump=:#034b}" ) assert len(keys_not_in_dump) == 0 # Verify all CPUID on the dumped CPU config are covered in actual one. for key, dump in dump_cpu_config["cpuid"].items(): actual = actual_cpu_config["cpuid"].get(key) # `cpuid -r` command does not list up invalid leaves / subleaves # without specifying them. if actual is None: actual = get_guest_cpuid(microvm, key[0], key[1])[key] if key in CPUID_EXCEPTION_LIST: actual &= ~CPUID_EXCEPTION_LIST[key] dump &= ~CPUID_EXCEPTION_LIST[key] assert actual == dump, ( f"Mismatched CPUID for leaf={key[0]:#x} subleaf={key[1]:#x} reg={key[2]}:" f"{actual=:#034b} vs. {dump=:#034b}" ) # Compare MSR between actual and dumped CPU config. for key in dump_cpu_config["msrs"]: if key in MSR_EXCEPTION_LIST: continue actual = actual_cpu_config["msrs"][key] dump = dump_cpu_config["msrs"][key] assert ( actual == dump ), f"Mismatched MSR for {key:#010x}: {actual=:#066b} vs. {dump=:#066b}" @pytest.mark.no_block_pr @pytest.mark.skipif( global_props.host_linux_version not in SUPPORTED_HOST_KERNELS, reason=f"Supported kernels are {SUPPORTED_HOST_KERNELS}", ) def test_guest_cpu_config_change(results_dir, cpu_template_helper): """ Verify that the guest CPU config has not changed since the baseline fingerprint was gathered. """ fname = f"fingerprint_{global_props.cpu_codename}_{global_props.host_linux_version}host.json" # Dump a fingerprint with the generated VM config. fingerprint_path = results_dir / fname cpu_template_helper.fingerprint_dump(fingerprint_path) # Baseline fingerprint. baseline_path = TEST_RESOURCES_DIR / fname # Compare with baseline cpu_template_helper.fingerprint_compare( baseline_path, fingerprint_path, ["guest_cpu_config"], ) def test_json_static_templates(cpu_template_helper, tmp_path, custom_cpu_template): """ Verify that JSON static CPU templates are applied as intended. """ custom_cpu_template_path = tmp_path / "template.json" Path(custom_cpu_template_path).write_text( json.dumps(custom_cpu_template["template"]), encoding="utf-8" ) # Verify the JSON static CPU template. cpu_template_helper.template_verify(custom_cpu_template_path) def test_consecutive_fingerprint_consistency(cpu_template_helper, tmp_path): """ Verify that two fingerprints obtained consecutively are consistent. """ # Dump a fingerprint with the helper tool. fp1 = tmp_path / "fp1.json" cpu_template_helper.fingerprint_dump(fp1) fp2 = tmp_path / "fp2.json" cpu_template_helper.fingerprint_dump(fp2) # Compare them. cpu_template_helper.fingerprint_compare(fp1, fp2, None) ================================================ FILE: tests/integration_tests/functional/test_dirty_pages_in_full_snapshot.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Test scenario for reseting dirty pages after making a full snapshot.""" def test_dirty_pages_after_full_snapshot(uvm_plain): """ Test if dirty pages are erased after making a full snapshot of a VM """ vm_mem_size = 128 uvm = uvm_plain uvm.spawn() uvm.basic_config(mem_size_mib=vm_mem_size, track_dirty_pages=True) uvm.add_net_iface() uvm.start() snap_full = uvm.snapshot_full(vmstate_path="vmstate_full", mem_path="mem_full") snap_diff = uvm.snapshot_diff(vmstate_path="vmstate_diff", mem_path="mem_diff") snap_diff2 = uvm.snapshot_diff(vmstate_path="vmstate_diff2", mem_path="mem_diff2") # file size is the same, but the `diff` snapshot is actually a sparse file assert snap_full.mem.stat().st_size == snap_diff.mem.stat().st_size # full -> diff: full should have more things in it # Diff snapshots will contain some pages, because we always mark # pages used for virt queues as dirty. assert snap_diff.mem.stat().st_blocks < snap_full.mem.stat().st_blocks assert snap_diff2.mem.stat().st_blocks < snap_full.mem.stat().st_blocks # diff -> diff: there should be no differences assert snap_diff.mem.stat().st_blocks == snap_diff2.mem.stat().st_blocks ================================================ FILE: tests/integration_tests/functional/test_drive_vhost_user.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for vhost-user-block device.""" import os import shutil from pathlib import Path import pytest import host_tools.drive as drive_tools from framework.utils_drive import partuuid_and_disk_path from host_tools.fcmetrics import FcDeviceMetrics @pytest.fixture def uvm_vhost_user_plain_any(microvm_factory, guest_kernel, pci_enabled): """Builds a plain VM with no root volume""" return microvm_factory.build( guest_kernel, None, pci=pci_enabled, monitor_memory=False ) @pytest.fixture def uvm_vhost_user_booted_ro(uvm_vhost_user_plain_any, rootfs): """Returns a VM with a vhost-user rootfs""" vm = uvm_vhost_user_plain_any # We need to setup ssh keys manually because we did not specify rootfs # in microvm_factory.build method ssh_key = rootfs.with_suffix(".id_rsa") vm.ssh_key = ssh_key vm.spawn() vm.basic_config(add_root_device=False) vm.add_vhost_user_drive("rootfs", rootfs, is_root_device=True, is_read_only=True) vm.add_net_iface() vm.start() return vm @pytest.fixture def uvm_vhost_user_booted_rw(uvm_vhost_user_plain_any, rootfs): """Returns a VM with a vhost-user rootfs""" vm = uvm_vhost_user_plain_any # We need to setup ssh keys manually because we did not specify rootfs # in microvm_factory.build method ssh_key = rootfs.with_suffix(".id_rsa") vm.ssh_key = ssh_key vm.spawn() vm.basic_config(add_root_device=False) # Create a rw rootfs file that is unique to the microVM rootfs_rw = Path(vm.chroot()) / "rootfs" shutil.copy(rootfs, rootfs_rw) vm.add_vhost_user_drive( "rootfs", rootfs_rw, is_root_device=True, is_read_only=False ) vm.add_net_iface() vm.start() return vm def _check_block_size(ssh_connection, dev_path, size): """ Checks the size of the block device. """ _, stdout, stderr = ssh_connection.run("blockdev --getsize64 {}".format(dev_path)) assert stderr == "" assert stdout.strip() == str(size) def _check_drives(test_microvm, assert_dict, keys_array): """ Checks the info on the block devices. """ _, stdout, stderr = test_microvm.ssh.run("blockdev --report") assert stderr == "" blockdev_out_lines = stdout.splitlines() for key in keys_array: line = int(key.split("-")[0]) col = int(key.split("-")[1]) blockdev_out_line_cols = blockdev_out_lines[line].split() assert blockdev_out_line_cols[col] == assert_dict[key] def test_vhost_user_block(uvm_vhost_user_booted_ro): """ This test simply tries to boot a VM with vhost-user-block as a root device. """ vm = uvm_vhost_user_booted_ro vhost_user_block_metrics = FcDeviceMetrics( "vhost_user_block", 1, aggr_supported=False ) # Now check that vhost-user-block with rw is last. # 1-0 means line 1, column 0. assert_dict = { "1-0": "ro", "1-6": "/dev/vda", } _check_drives(vm, assert_dict, assert_dict.keys()) vhost_user_block_metrics.validate(vm) def test_vhost_user_block_read_write(uvm_vhost_user_booted_rw): """ This test simply tries to boot a VM with vhost-user-block as a root device. This test configures vhost-user-block to be read write. """ vm = uvm_vhost_user_booted_rw # Now check that vhost-user-block with rw is last. # 1-0 means line 1, column 0. assert_dict = { "1-0": "rw", "1-6": "/dev/vda", } _check_drives(vm, assert_dict, assert_dict.keys()) def test_vhost_user_block_disconnect(uvm_vhost_user_booted_ro): """ Test that even if backend is killed, Firecracker is still responsive. """ vm = uvm_vhost_user_booted_ro # Killing the backend vm.disks_vhost_user["rootfs"].kill() del vm.disks_vhost_user["rootfs"] # Verify that Firecracker is still responsive _config = vm.api.vm_config.get().json() def test_device_ordering(uvm_vhost_user_plain_any, rootfs): """ Verify device ordering. The root device should correspond to /dev/vda in the guest and the order of the other devices should match their configuration order. """ vm = uvm_vhost_user_plain_any # We need to setup ssh keys manually because we did not specify rootfs # in microvm_factory.build method ssh_key = rootfs.with_suffix(".id_rsa") vm.ssh_key = ssh_key vm.spawn() vm.basic_config(add_root_device=False) vm.add_net_iface() # Adding first block device. fs1 = drive_tools.FilesystemFile(os.path.join(vm.fsfiles, "scratch1"), size=128) vm.add_drive("scratch1", fs1.path) # Adding second block device (rootfs) vm.add_vhost_user_drive("rootfs", rootfs, is_root_device=True, is_read_only=True) # Adding third block device. fs2 = drive_tools.FilesystemFile(os.path.join(vm.fsfiles, "scratch2"), size=512) vm.add_drive("scratch2", fs2.path) # Create a rw rootfs file that is unique to the microVM rootfs_rw = Path(vm.chroot()) / "rootfs" shutil.copy(rootfs, rootfs_rw) # Adding forth block device. vm.add_vhost_user_drive("dummy_rootfs", rootfs_rw) block_metrics = FcDeviceMetrics("block", 2, aggr_supported=True) vhost_user_block_metrics = FcDeviceMetrics( "vhost_user_block", 2, aggr_supported=False ) vm.start() rootfs_size = rootfs.stat().st_size # The devices were added in this order: fs1, rootfs, fs2. fs3 # However, the rootfs is the root device and goes first, # so we expect to see this order: rootfs, fs1, fs2. fs3 # First check drives order by sizes. ssh_connection = vm.ssh _check_block_size(ssh_connection, "/dev/vda", rootfs_size) _check_block_size(ssh_connection, "/dev/vdb", fs1.size()) _check_block_size(ssh_connection, "/dev/vdc", fs2.size()) _check_block_size(ssh_connection, "/dev/vdd", rootfs_size) # Now check that vhost-user-block with rw is last. # 1-0 means line 1, column 0. assert_dict = { "1-0": "ro", "1-6": "/dev/vda", "2-0": "rw", "2-6": "/dev/vdb", "3-0": "rw", "3-6": "/dev/vdc", "4-0": "rw", "4-6": "/dev/vdd", } _check_drives(vm, assert_dict, assert_dict.keys()) block_metrics.validate(vm) vhost_user_block_metrics.validate(vm) def test_partuuid_boot(uvm_vhost_user_plain_any, rootfs): """ Test the output reported by blockdev when booting with PARTUUID. """ vm = uvm_vhost_user_plain_any # We need to setup ssh keys manually because we did not specify rootfs # in microvm_factory.build method ssh_key = rootfs.with_suffix(".id_rsa") vm.ssh_key = ssh_key vm.spawn() vm.basic_config(add_root_device=False) # Create a rootfs with partuuid unique to this microVM partuuid, disk_path = partuuid_and_disk_path(rootfs, Path(vm.chroot()) / "disk.img") vm.add_vhost_user_drive( "1", disk_path, is_root_device=True, partuuid=partuuid, is_read_only=True ) vm.add_net_iface() vm.start() # Now check that vhost-user-block with rw is last. # 1-0 means line 1, column 0. assert_dict = { "1-0": "ro", "1-6": "/dev/vda", } _check_drives(vm, assert_dict, assert_dict.keys()) def test_partuuid_update(uvm_vhost_user_plain_any, rootfs): """ Test successful switching from PARTUUID boot to /dev/vda boot. """ vm = uvm_vhost_user_plain_any # We need to setup ssh keys manually because we did not specify rootfs # in microvm_factory.build method ssh_key = rootfs.with_suffix(".id_rsa") vm.ssh_key = ssh_key vm.spawn() vm.basic_config(add_root_device=False) vm.add_net_iface() # Add the root block device specified through PARTUUID. vm.add_vhost_user_drive( "rootfs", rootfs, is_root_device=True, partuuid="0eaa91a0-01", is_read_only=True, ) # Adding a drive with the same ID creates another backend with another socket. vm.add_vhost_user_drive("rootfs", rootfs, is_root_device=True, is_read_only=True) vhost_user_block_metrics = FcDeviceMetrics( "vhost_user_block", 1, aggr_supported=False ) vm.start() # Now check that vhost-user-block with rw is last. # 1-0 means line 1, column 0. assert_dict = { "1-0": "ro", "1-6": "/dev/vda", } _check_drives(vm, assert_dict, assert_dict.keys()) vhost_user_block_metrics.validate(vm) def test_config_change(uvm_plain_any): """ Verify handling of block device resize. We expect that the guest will start reporting the updated size after Firecracker handles a PATCH request to the vhost-user block device. """ orig_size = 10 # MB new_sizes = [20, 10, 30] # MB mkfs_mount_cmd = "mkfs.ext4 /dev/vdb && mkdir -p /tmp/tmp && mount /dev/vdb /tmp/tmp && umount /tmp/tmp" vm = uvm_plain_any vm.spawn(log_level="Info") vm.basic_config() vm.add_net_iface() # Add a block device to test resizing. fs = drive_tools.FilesystemFile(size=orig_size) vm.add_vhost_user_drive("scratch", fs.path) vm.start() # Check that guest reports correct original size. _check_block_size(vm.ssh, "/dev/vdb", orig_size * 1024 * 1024) # Check that we can create a filesystem and mount it vm.ssh.check_output(mkfs_mount_cmd) for new_size in new_sizes: # Instruct the backend to resize the device. # It will both resize the file and update its device config. vm.disks_vhost_user["scratch"].resize(new_size) # Instruct Firecracker to reread device config and notify # the guest of a config change. vm.patch_drive("scratch") # Check that guest reports correct new size. _check_block_size(vm.ssh, "/dev/vdb", new_size * 1024 * 1024) # Check that we can create a filesystem and mount it vm.ssh.check_output(mkfs_mount_cmd) ================================================ FILE: tests/integration_tests/functional/test_drive_virtio.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for guest-side operations on /drives resources.""" import os import pytest import host_tools.drive as drive_tools from framework import utils from framework.utils_drive import partuuid_and_disk_path MB = 1024 * 1024 @pytest.fixture def partuuid_and_disk_path_tmpfs(rootfs, tmp_path): """ We create a new file in tmpfs, get its partuuid and use it as a rootfs. """ disk_path = tmp_path / "disk.img" yield partuuid_and_disk_path(rootfs, disk_path) disk_path.unlink() def test_rescan_file(uvm_plain_any, io_engine): """ Verify that rescan works with a file-backed virtio device. """ test_microvm = uvm_plain_any test_microvm.spawn() # Set up the microVM with 1 vCPUs, 256 MiB of RAM and a root file system test_microvm.basic_config() test_microvm.add_net_iface() block_size = 2 # Add a scratch block device. fs = drive_tools.FilesystemFile( os.path.join(test_microvm.fsfiles, "scratch"), size=block_size ) test_microvm.add_drive("scratch", fs.path, io_engine=io_engine) test_microvm.start() _check_block_size(test_microvm.ssh, "/dev/vdb", fs.size()) # Check if reading from the entire disk results in a file of the same size # or errors out, after a truncate on the host. truncated_size = block_size // 2 utils.check_output(f"truncate --size {truncated_size}M {fs.path}") block_copy_name = "/tmp/dev_vdb_copy" _, _, stderr = test_microvm.ssh.run( f"dd if=/dev/vdb of={block_copy_name} bs=1M count={block_size}" ) assert "dd: error reading '/dev/vdb': Input/output error" in stderr _check_file_size(test_microvm.ssh, f"{block_copy_name}", truncated_size * MB) test_microvm.api.drive.patch( drive_id="scratch", path_on_host=test_microvm.create_jailed_resource(fs.path), ) _check_block_size(test_microvm.ssh, "/dev/vdb", fs.size()) def test_device_ordering(uvm_plain_any, io_engine): """ Verify device ordering. The root device should correspond to /dev/vda in the guest and the order of the other devices should match their configuration order. """ test_microvm = uvm_plain_any test_microvm.spawn() # Add first scratch block device. fs1 = drive_tools.FilesystemFile( os.path.join(test_microvm.fsfiles, "scratch1"), size=128 ) test_microvm.add_drive("scratch1", fs1.path, io_engine=io_engine) # Set up the microVM with 1 vCPUs, 256 MiB of RAM and a root file system # (this is the second block device added). test_microvm.basic_config() test_microvm.add_net_iface() # Add the third block device. fs2 = drive_tools.FilesystemFile( os.path.join(test_microvm.fsfiles, "scratch2"), size=512 ) test_microvm.add_drive("scratch2", fs2.path, io_engine=io_engine) test_microvm.start() # Determine the size of the microVM rootfs in bytes. _, stdout, _ = utils.check_output( "du --apparent-size --block-size=1 {}".format(test_microvm.rootfs_file), ) assert len(stdout.split()) == 2 rootfs_size = stdout.split("\t")[0] # The devices were added in this order: fs1, rootfs, fs2. # However, the rootfs is the root device and goes first, # so we expect to see this order: rootfs, fs1, fs2. # The devices are identified by their size. ssh_connection = test_microvm.ssh _check_block_size(ssh_connection, "/dev/vda", rootfs_size) _check_block_size(ssh_connection, "/dev/vdb", fs1.size()) _check_block_size(ssh_connection, "/dev/vdc", fs2.size()) def test_rescan_dev(uvm_plain_any, io_engine): """ Verify that rescan works with a device-backed virtio device. """ test_microvm = uvm_plain_any test_microvm.spawn() # Set up the microVM with 1 vCPUs, 256 MiB of RAM and a root file system test_microvm.basic_config() test_microvm.add_net_iface() # Add a scratch block device. fs1 = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "fs1")) test_microvm.add_drive("scratch", fs1.path, io_engine=io_engine) test_microvm.start() _check_block_size(test_microvm.ssh, "/dev/vdb", fs1.size()) fs2 = drive_tools.FilesystemFile( os.path.join(test_microvm.fsfiles, "fs2"), size=512 ) losetup = ["losetup", "--find", "--show", fs2.path] rc, stdout, _ = utils.check_output(losetup) assert rc == 0 loopback_device = stdout.rstrip() try: test_microvm.api.drive.patch( drive_id="scratch", path_on_host=test_microvm.create_jailed_resource(loopback_device), ) _check_block_size(test_microvm.ssh, "/dev/vdb", fs2.size()) finally: if loopback_device: utils.check_output(["losetup", "--detach", loopback_device]) def test_non_partuuid_boot(uvm_plain_any, io_engine): """ Test the output reported by blockdev when booting from /dev/vda. """ test_microvm = uvm_plain_any test_microvm.spawn() # Sets up the microVM with 1 vCPUs, 256 MiB of RAM and a root file system test_microvm.basic_config(vcpu_count=1) test_microvm.add_net_iface() # Add another read-only block device. fs = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "readonly")) test_microvm.add_drive("scratch", fs.path, is_read_only=True, io_engine=io_engine) test_microvm.start() # Keep a dictionary where the keys are the location and the values # represent the input to assert against. # 1, 0 means line 1, column 0. assert_dict = { (1, 0): "ro", (1, 6): "/dev/vda", (2, 0): "ro", } _check_drives(test_microvm, assert_dict, assert_dict.keys()) def test_partuuid_boot(uvm_plain_any, partuuid_and_disk_path_tmpfs, io_engine): """ Test the output reported by blockdev when booting with PARTUUID. """ partuuid = partuuid_and_disk_path_tmpfs[0] disk_path = partuuid_and_disk_path_tmpfs[1] test_microvm = uvm_plain_any test_microvm.spawn() # Sets up the microVM with 1 vCPUs, 256 MiB of RAM and without root file system test_microvm.basic_config(vcpu_count=1, add_root_device=False) test_microvm.add_net_iface() # Add the root block device specified through PARTUUID. test_microvm.add_drive( "rootfs", disk_path, is_root_device=True, partuuid=partuuid, io_engine=io_engine, ) test_microvm.start() assert_dict = { (1, 0): "rw", (1, 6): "/dev/vda", (2, 0): "rw", (2, 6): "/dev/vda1", } _check_drives(test_microvm, assert_dict, assert_dict.keys()) def test_partuuid_update(uvm_plain_any, io_engine): """ Test successful switching from PARTUUID boot to /dev/vda boot. """ test_microvm = uvm_plain_any test_microvm.spawn() # Set up the microVM with 1 vCPUs, 256 MiB of RAM test_microvm.basic_config(vcpu_count=1, add_root_device=False) test_microvm.add_net_iface() # Add the root block device specified through PARTUUID. test_microvm.add_drive( "rootfs", test_microvm.rootfs_file, is_root_device=True, partuuid="0eaa91a0-01", io_engine=io_engine, ) # Update the root block device to boot from /dev/vda. test_microvm.add_drive( "rootfs", test_microvm.rootfs_file, is_root_device=True, io_engine=io_engine, ) test_microvm.start() # Assert that the final booting method is from /dev/vda. assert_dict = { (1, 0): "rw", (1, 6): "/dev/vda", } _check_drives(test_microvm, assert_dict, assert_dict.keys()) def test_patch_drive(uvm_plain_any, io_engine): """ Test replacing the backing filesystem after guest boot works. """ test_microvm = uvm_plain_any test_microvm.spawn() # Set up the microVM with 1 vCPUs, 256 MiB of RAM and a root file system test_microvm.basic_config() test_microvm.add_net_iface() fs1 = drive_tools.FilesystemFile(os.path.join(test_microvm.fsfiles, "scratch")) test_microvm.add_drive("scratch", fs1.path, io_engine=io_engine) test_microvm.start() _check_mount(test_microvm.ssh, "/dev/vdb") # Updates to `path_on_host` with a valid path are allowed. fs2 = drive_tools.FilesystemFile( os.path.join(test_microvm.fsfiles, "otherscratch"), size=512 ) test_microvm.api.drive.patch( drive_id="scratch", path_on_host=test_microvm.create_jailed_resource(fs2.path) ) _check_mount(test_microvm.ssh, "/dev/vdb") # The `lsblk` command should output 2 lines to STDOUT: "SIZE" and the size # of the device, in bytes. blksize_cmd = "LSBLK_DEBUG=all lsblk -b /dev/vdb --output SIZE" size_bytes_str = "536870912" # = 512 MiB _, stdout, _ = test_microvm.ssh.check_output(blksize_cmd) lines = stdout.split("\n") # skip "SIZE" assert lines[1].strip() == size_bytes_str def test_no_flush(uvm_plain_any, io_engine): """ Verify default block ignores flush. """ test_microvm = uvm_plain_any test_microvm.spawn() test_microvm.basic_config(vcpu_count=1, add_root_device=False) test_microvm.add_net_iface() # Add the block device test_microvm.add_drive( "rootfs", test_microvm.rootfs_file, is_root_device=True, io_engine=io_engine, ) test_microvm.start() # Verify all flush commands were ignored during boot. fc_metrics = test_microvm.flush_metrics() assert fc_metrics["block"]["flush_count"] == 0 # Have the guest drop the caches to generate flush requests. cmd = "sync; echo 1 > /proc/sys/vm/drop_caches" _, _, stderr = test_microvm.ssh.run(cmd) assert stderr == "" # Verify all flush commands were ignored even after # dropping the caches. fc_metrics = test_microvm.flush_metrics() assert fc_metrics["block"]["flush_count"] == 0 def test_flush(uvm_plain_rw, io_engine): """ Verify block with flush actually flushes. """ test_microvm = uvm_plain_rw test_microvm.spawn() test_microvm.basic_config(vcpu_count=1, add_root_device=False) test_microvm.add_net_iface() # Add the block device with explicitly enabling flush. test_microvm.add_drive( "rootfs", test_microvm.rootfs_file, is_root_device=True, cache_type="Writeback", io_engine=io_engine, ) test_microvm.start() # Have the guest drop the caches to generate flush requests. cmd = "sync; echo 1 > /proc/sys/vm/drop_caches" _, _, stderr = test_microvm.ssh.run(cmd) assert stderr == "" # On average, dropping the caches right after boot generates # about 6 block flush requests. fc_metrics = test_microvm.flush_metrics() assert fc_metrics["block"]["flush_count"] > 0 def _check_block_size(ssh_connection, dev_path, size): _, stdout, stderr = ssh_connection.run("blockdev --getsize64 {}".format(dev_path)) assert stderr == "" assert stdout.strip() == str(size) def _check_file_size(ssh_connection, dev_path, size): _, stdout, stderr = ssh_connection.run("stat --format=%s {}".format(dev_path)) assert stderr == "" assert stdout.strip() == str(size) def _process_blockdev_output(blockdev_out, assert_dict, keys_array): blockdev_out_lines = blockdev_out.splitlines() for line, col in keys_array: blockdev_out_line_cols = blockdev_out_lines[line].split() assert blockdev_out_line_cols[col] == assert_dict[line, col] def _check_drives(test_microvm, assert_dict, keys_array): _, stdout, stderr = test_microvm.ssh.run("blockdev --report") assert stderr == "" _process_blockdev_output(stdout, assert_dict, keys_array) def _check_mount(ssh_connection, dev_path): _, _, stderr = ssh_connection.run(f"mount {dev_path} /tmp", timeout=30.0) assert stderr == "" _, _, stderr = ssh_connection.run("umount /tmp", timeout=30.0) assert stderr == "" ================================================ FILE: tests/integration_tests/functional/test_error_code.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests scenarios for Firecracker kvm exit handling.""" import platform import pytest @pytest.mark.skipif( platform.machine() != "aarch64", reason="The error code returned on aarch64 will not be returned on x86 " "under the same conditions.", ) def test_enosys_error_code(uvm_plain): """ Test that ENOSYS error is caught and firecracker exits gracefully. """ # On aarch64 we trigger this error by running a C program that # maps a file into memory and then tries to load the content from an # offset in the file bigger than its length into a register asm volatile # ("ldr %0, [%1], 4" : "=r" (ret), "+r" (buf)); vm = uvm_plain vm.spawn() vm.memory_monitor = None vm.basic_config( vcpu_count=1, boot_args="reboot=k panic=1 swiotlb=noforce init=/usr/local/bin/devmemread", ) vm.start() # Check if FC process is closed vm.mark_killed() vm.check_log_message( "Received ENOSYS error because KVM failed to emulate an instruction." ) vm.check_log_message("Vmm is stopping.") ================================================ FILE: tests/integration_tests/functional/test_feat_parity.py ================================================ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for the verifying features exposed by CPUID and MSRs by various CPU templates.""" import pytest import framework.utils_cpuid as cpuid_utils from framework.properties import global_props from framework.utils_cpu_templates import SUPPORTED_CPU_TEMPLATES pytestmark = pytest.mark.skipif( global_props.cpu_architecture != "x86_64", reason="x86_64 specific tests" ) # CPU templates designed to provide instruction set feature parity INST_SET_TEMPLATES = ["T2A", "T2CL"] @pytest.fixture( name="inst_set_cpu_template", params=sorted(set(SUPPORTED_CPU_TEMPLATES).intersection(INST_SET_TEMPLATES)), ) def inst_set_cpu_template_fxt(request): """CPU template fixture for instruction set feature parity templates""" return request.param @pytest.fixture(name="vm") def vm_fxt(uvm_plain_any, inst_set_cpu_template): """ Create a VM, using the normal CPU templates """ vm = uvm_plain_any vm.spawn() vm.basic_config(vcpu_count=1, mem_size_mib=1024, cpu_template=inst_set_cpu_template) vm.add_net_iface() vm.start() return vm def test_feat_parity_cpuid_mpx(vm): """ Verify that MPX (Memory Protection Extensions) is not enabled in any of the supported CPU templates. """ # fmt: off must_be_set = [] must_be_unset = [ (0x7, 0x0, "ebx", (1 << 14) # MPX ), ] # fmt: on cpuid_utils.check_cpuid_feat_flags( vm, must_be_set, must_be_unset, ) @pytest.mark.parametrize( "inst_set_cpu_template", sorted(set(SUPPORTED_CPU_TEMPLATES).intersection(INST_SET_TEMPLATES + ["T2"])), indirect=True, ) def test_feat_parity_cpuid_inst_set(vm): """ Verify that CPUID feature flags related to instruction sets are properly set for T2, T2CL and T2A CPU templates. """ # fmt: off must_be_set = [ (0x7, 0x0, "ebx", (1 << 5) | # AVX2 (1 << 9) # REP MOVSB/STOSB ), ] must_be_unset = [ (0x1, 0x0, "ecx", (1 << 15) # PDCM ), (0x7, 0x0, "ebx", (1 << 16) | # AVX512F (1 << 17) | # AVX512DQ (1 << 18) | # RDSEED (1 << 19) | # ADX (1 << 23) | # CLFLUSHOPT (1 << 24) | # CLWB (1 << 29) | # SHA (1 << 30) | # AVX512BW (1 << 31) # AVX512VL ), (0x7, 0x0, "ecx", (1 << 1) | # AVX512_VBMI (1 << 6) | # AVX512_VBMI2 (1 << 8) | # GFNI (1 << 9) | # VAES (1 << 10) | # VPCLMULQDQ (1 << 11) | # AVX512_VNNI (1 << 12) | # AVX512_BITALG (1 << 14) | # AVX512_VPOPCNTDQ (1 << 22) # RDPID/IA32_TSC_AUX ), (0x7, 0x0, "edx", (1 << 2) | # AVX512_4VNNIW (1 << 3) | # AVX512_4FMAPS (1 << 4) | # Fast Short REP MOV (1 << 8) # AVX512_VP2INTERSECT ), (0x80000001, 0x0, "ecx", (1 << 6) | # SSE4A (1 << 7) | # MisAlignSee (1 << 8) | # PREFETCHW (1 << 29) # MwaitExtended ), (0x80000001, 0x0, "edx", (1 << 22) | # MmxExt (1 << 25) # FFXSR ), (0x80000008, 0x0, "ebx", (1 << 0) | # CLZERO (1 << 4) | # RDPRU (1 << 8) | # MCOMMIT (1 << 9) | # WBNOINVD (1 << 13) # INT_WBINVD ), ] # fmt: on cpuid_utils.check_cpuid_feat_flags( vm, must_be_set, must_be_unset, ) ================================================ FILE: tests/integration_tests/functional/test_gdb.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """A test that ensures that firecracker works with GDB feature enabled.""" import os import platform import signal import subprocess import tempfile from pathlib import Path import pytest import host_tools.cargo_build from framework.microvm import MicroVMFactory @pytest.mark.skipif( platform.machine() != "x86_64", reason="GDB requires a vmlinux but we ship a uImage for ARM in our CI", ) def test_gdb_connects(guest_kernel_linux_6_1, rootfs): """Checks that GDB works in a FC VM""" bin_dir = host_tools.cargo_build.build_gdb() vmfcty = MicroVMFactory(bin_dir) kernel_dbg = guest_kernel_linux_6_1.parent / "debug" / guest_kernel_linux_6_1.name uvm = vmfcty.build(kernel_dbg, rootfs) uvm.spawn(validate_api=False) uvm.add_net_iface() uvm.basic_config() uvm.enable_gdb() chroot_gdb_socket = Path(uvm.jailer.chroot_path(), uvm.gdb_socket) gdb_commands = f""" target remote {chroot_gdb_socket} hbreak start_kernel # continue to start_kernel continue # continue boot until interrupted continue """ with tempfile.NamedTemporaryFile( mode="w", suffix=".gdb", delete=False, prefix="fc_gdb_" ) as f: f.write(gdb_commands) gdb_script = f.name gdb_proc = subprocess.Popen( f""" until [ -S {chroot_gdb_socket} ]; do echo 'waiting for {chroot_gdb_socket}'; sleep 1; done; gdb {kernel_dbg} -batch -x {gdb_script} """, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, ) # start the VM and wait for it to be running uvm.start() # the VM started successfully, let's kill everything gdb_proc.terminate() os.kill(uvm.firecracker_pid, signal.SIGKILL) uvm.mark_killed() # verify that GDB hit the breakpoint on start_kernel stdout, stderr = gdb_proc.communicate(timeout=10) assert ( "hit Breakpoint 1, start_kernel" in stdout ), f"Breakpoint wasn't hit:\nstdout:\n{stdout}\n\nstderr:\n{stderr}" ================================================ FILE: tests/integration_tests/functional/test_instrumented_firecracker.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Integration test verifying that Firecracker builds and runs correctly when instrumented with the `log_instrument::instrument` macro. Ensures that TRACE-level logs and entry/exit markers (ThreadId(...) >> / <<) are emitted when the binary is built with the `tracing` feature. """ import platform import re from pathlib import Path import pytest from framework import utils from host_tools.cargo_build import cargo, get_binary # Typical markers emitted by the `log_instrument` macro TRACE_LEVEL_HINT = "TRACE" MARKER_REGEX = re.compile(r"ThreadId\(\d+\).*?(?:>>|<<)") PATHS_TO_INSTRUMENT = [ "../src/firecracker/src/main.rs", "../src/firecracker/src/api_server", "../src/vmm/src/lib.rs", "../src/vmm/src/builder.rs", ] TMP_BUILD_DIR = "../test_instrumented_firecracker_build" ARCH_STR = f"{platform.machine()}" def build_instrumented_binary(): """Builds an instrumented Firecracker binary with tracing instrumentation.""" # we need a different directory to avoid overriding the main bin instrumented_binary_dir = ( Path(TMP_BUILD_DIR) / f"{ARCH_STR}-unknown-linux-musl" / "release" ) clippy_tracing = get_binary("clippy-tracing") for p in PATHS_TO_INSTRUMENT: utils.check_output( f"{clippy_tracing} --action fix --suffix log_instrument:: --cfg-attr 'feature =\"tracing\"' --path {p}" ) cargo( "build", f"--workspace --target {platform.machine()}-unknown-linux-musl --release " f"--features tracing --bin firecracker", env={"CARGO_TARGET_DIR": TMP_BUILD_DIR}, ) return get_binary("firecracker", binary_dir=instrumented_binary_dir) def cleanup_instrumentation(): """Cleans up tracing instrumentation from the Firecracker binary.""" clippy_tracing = get_binary("clippy-tracing") for p in PATHS_TO_INSTRUMENT: utils.check_output( f"{clippy_tracing} --action strip --suffix log_instrument:: --cfg-attr 'feature =\"tracing\"' --path {p}" ) @pytest.fixture(scope="module") def instrumented_binary(): """Build and provide the path to an instrumented Firecracker binary.""" binary_path = build_instrumented_binary() yield binary_path cleanup_instrumentation() def test_log_instrument_firecracker_basic_functionality( instrumented_binary, microvm_factory ): """Test that instrumented Firecracker can start and handle basic API calls with trace logging.""" vm = microvm_factory.build(fc_binary_path=instrumented_binary) vm.spawn(log_level="Info", log_show_level=True, log_show_origin=True) # Generate some log traffic _ = vm.api.describe.get() _ = vm.api.machine_config.get() pre_tracing_log_data = vm.log_data # Ensure TRACE logs are being captured logger_config = { "level": "Trace", "show_level": True, "show_log_origin": True, } _ = vm.api.logger.put(**logger_config) # Another API call after enabling TRACE _ = vm.api.describe.get() assert ( TRACE_LEVEL_HINT not in pre_tracing_log_data ), "TRACE level logs were found before setting log level to TRACE. " pre_tracing_log_matches = re.findall(MARKER_REGEX, pre_tracing_log_data) assert ( len(pre_tracing_log_matches) == 0 ), f"Expected no log-instrument traces in logs before enabling TRACE, but found: {pre_tracing_log_matches}" post_tracing_log_data = vm.log_data assert ( TRACE_LEVEL_HINT in post_tracing_log_data ), "Expected TRACE level logs in output" post_tracing_log_matches = re.findall(MARKER_REGEX, post_tracing_log_data) assert ( len(post_tracing_log_matches) > 0 ), f"Expected to find log-instrument traces in logs, but found none. Log data: {post_tracing_log_data[:1000]}..." entry_traces = [match for match in post_tracing_log_matches if ">>" in match] exit_traces = [match for match in post_tracing_log_matches if "<<" in match] assert len(entry_traces) > 0, "Expected to find function entry traces (>>)" assert len(exit_traces) > 0, "Expected to find function exit traces (<<)" meaningful_keywords = ["vmm", "request", "response"] # Ensure that each of the meaningful keywords is present in at least one trace # match from the post-tracing logs. assert all( any(keyword.lower() in trace.lower() for trace in post_tracing_log_matches) for keyword in meaningful_keywords ), ( f"Expected to find traces from meaningful keywords {meaningful_keywords}, " f"but traces were: {post_tracing_log_matches[:10]}..." ) ================================================ FILE: tests/integration_tests/functional/test_kernel_cmdline.py ================================================ # Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Test kernel commandline behavior.""" from framework.microvm import Serial def test_init_params(uvm_plain): """Correct propagation of boot args to the kernel's command line. Test that init's parameters (the ones present after "--") do not get altered or misplaced. """ vm = uvm_plain vm.help.enable_console() vm.spawn(serial_out_path=None) vm.memory_monitor = None # We will override the init with /bin/cat so that we try to read the # Ubuntu version from the /etc/issue file. vm.basic_config( vcpu_count=1, boot_args="console=ttyS0 reboot=k panic=1 swiotlb=noforce init=/bin/cat -- /etc/issue", ) vm.start() serial = Serial(vm) serial.open() # If the string does not show up, the test will fail. serial.rx(token="Ubuntu 24.04") ================================================ FILE: tests/integration_tests/functional/test_kvm_ptp.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Check that the kvm_ptp device works""" import pytest def test_kvm_ptp(uvm_any_booted): """Test kvm_ptp is usable""" vm = uvm_any_booted if vm.guest_kernel_version[:2] < (6, 1): pytest.skip("Only supported in kernel 6.1 and after") _, dmesg, _ = vm.ssh.check_output("dmesg |grep -i ptp") assert "PTP clock support registered" in dmesg # wait up to 5s to see the PTP device vm.ssh.check_output("udevadm wait -t 5 /dev/ptp0") # phc_ctl[14515.127]: clock time is 1697545854.728335694 or Tue Oct 17 12:30:54 2023 vm.ssh.check_output("phc_ctl /dev/ptp0 -- get") ================================================ FILE: tests/integration_tests/functional/test_log_instrument.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Checks that the output of instrumentation examples is correct""" import pytest from framework import utils from host_tools.cargo_build import get_binary EXPECTED_OUTPUTS = { "one": """[2023-10-12T16:29:00Z TRACE log_instrument] ThreadId(1)>>one [2023-10-12T16:29:00Z DEBUG one] cmp: true [2023-10-12T16:29:00Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:29:00Z DEBUG one] cmp: false [2023-10-12T16:29:00Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:29:00Z DEBUG one] cmp: false [2023-10-12T16:29:00Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:29:30Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:29:30Z DEBUG two] [\"a\", \"b\"] [2023-10-12T16:29:30Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:30:04Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:30:04Z DEBUG three] [\"a\", \"b\"] [2023-10-12T16:30:04Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:30:37Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:30:37Z DEBUG four] [\"a\", \"b\"] [2023-10-12T16:30:37Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:31:12Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:31:12Z DEBUG five] [\"a\", \"b\"] [2023-10-12T16:31:12Z DEBUG five] 23 [2023-10-12T16:31:12Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:31:54Z DEBUG six] cmp: true [2023-10-12T16:31:54Z TRACE log_instrument] ThreadId(1)<>one [2023-10-12T16:31:54Z DEBUG six] cmp: false [2023-10-12T16:31:54Z TRACE log_instrument] ThreadId(1)::one>>two [2023-10-12T16:31:54Z DEBUG six] res: 0 [2023-10-12T16:31:54Z TRACE log_instrument] ThreadId(1)::one<>one [2023-10-12T16:31:54Z DEBUG six] cmp: false [2023-10-12T16:31:54Z TRACE log_instrument] ThreadId(1)::one>>two [2023-10-12T16:31:54Z DEBUG six] res: 1 [2023-10-12T16:31:54Z TRACE log_instrument] ThreadId(1)::one< 0 # Wait for token to expire. time.sleep(1) # Check `GET` request fails when expired token is provided. run_guest_cmd( ssh_connection, generate_mmds_get_request(DEFAULT_IPV4, token=token), "MMDS token not valid.", ) def test_deprecated_mmds_config(uvm_plain): """ Test deprecated Mmds configs. """ test_microvm = uvm_plain test_microvm.spawn() test_microvm.basic_config() # Attach network device. test_microvm.add_net_iface() # Use the default version, which is 1 for backwards compatibility. response = configure_mmds(test_microvm, iface_ids=["eth0"]) assert "deprecation" in response.headers response = configure_mmds(test_microvm, iface_ids=["eth0"], version="V1") assert "deprecation" in response.headers response = configure_mmds(test_microvm, iface_ids=["eth0"], version="V2") assert "deprecation" not in response.headers test_microvm.start() datapoints = test_microvm.get_all_metrics() assert ( sum( datapoint["deprecated_api"]["deprecated_http_api_calls"] for datapoint in datapoints ) == 2 ) def _configure_with_aws_credentials(microvm, version, imds_compat): microvm.spawn() microvm.basic_config() microvm.add_net_iface() # V2 requires session tokens for GET requests configure_mmds( microvm, iface_ids=["eth0"], version=version, imds_compat=imds_compat ) now = datetime.now(timezone.utc) credentials = { "Code": "Success", "LastUpdated": now.strftime("%Y-%m-%dT%H:%M:%SZ"), "Type": "AWS-HMAC", "AccessKeyId": "AAA", "SecretAccessKey": "BBB", "Token": "CCC", "Expiration": (now + timedelta(seconds=60)).strftime("%Y-%m-%dT%H:%M:%SZ"), } data_store = { "latest": { "meta-data": { "iam": { "security-credentials": {"role": json.dumps(credentials, indent=2)} }, "placement": {"availability-zone": "us-east-1a"}, } } } populate_data_store(microvm, data_store) microvm.start() ssh_connection = microvm.ssh run_guest_cmd(ssh_connection, f"ip route add {DEFAULT_IPV4} dev eth0", "") return ssh_connection @pytest.mark.parametrize("version", MMDS_VERSIONS) @pytest.mark.parametrize("imds_compat", [None, False, True]) @pytest.mark.parametrize("sdk", ["py", "go"]) def test_aws_credential_provider(uvm_plain, version, imds_compat, sdk): """ Test AWS SDK's credential provider works on MMDS """ ssh_connection = _configure_with_aws_credentials(uvm_plain, version, imds_compat) match sdk: case "py": cmd = r"""python3 - < {out_filename} &") vm.ssh.check_output( f"nohup socat UDP4-LISTEN:{port} CREATE:{out_filename} > /dev/null 2>&1 &" ) # wait for socat server to spin up time.sleep(1) # Try to send a UDP message from host with UDP offload enabled vm.netns.check_output(f"python3 ./host_tools/udp_offload.py {vm.ssh.host} {port}") # Check that the server received the message # Allow for some delay due to the asynchronous nature of the test for attempt in Retrying( stop=stop_after_attempt(10), wait=wait_fixed(0.1), reraise=True, ): with attempt: ret = vm.ssh.check_output(f"sync; cat {out_filename}") assert ret.stdout == message, f"{ret.stdout=} {ret.stderr=}" ================================================ FILE: tests/integration_tests/functional/test_net_config_space.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests on devices config space.""" import random import string import subprocess from threading import Thread import host_tools.network as net_tools # pylint: disable=import-error # pylint: disable=global-statement PAYLOAD_DATA_SIZE = 20 def test_net_change_mac_address(uvm_plain_any, change_net_config_space_bin): """ Test changing the MAC address of the network device. """ test_microvm = uvm_plain_any test_microvm.help.enable_console() test_microvm.spawn() test_microvm.basic_config(boot_args="ipv6.disable=1") # Data exchange interface ('eth0' in guest). test_microvm.add_net_iface() # Control interface ('eth1' in guest). test_microvm.add_net_iface() test_microvm.start() # Create the control ssh connection. ssh_conn = test_microvm.ssh_iface(1) host_ip0 = test_microvm.iface["eth0"]["iface"].host_ip guest_ip0 = test_microvm.iface["eth0"]["iface"].guest_ip # Start a server(host) - client(guest) communication with the following # parameters. host_port = 4444 iterations = 1 _exchange_data(test_microvm.jailer, ssh_conn, host_ip0, host_port, iterations) fc_metrics = test_microvm.flush_metrics() assert fc_metrics["net"]["tx_spoofed_mac_count"] == 0 # Change the MAC address of the network data interface. # This change will be propagated only inside the net device kernel struct # and will be used for ethernet frames formation when data is exchanged # on the network interface. mac = "06:05:04:03:02:01" mac_hex = "0x060504030201" guest_if1_name = net_tools.get_guest_net_if_name(ssh_conn, guest_ip0) assert guest_if1_name is not None _change_guest_if_mac(ssh_conn, mac, guest_if1_name) _exchange_data(test_microvm.jailer, ssh_conn, host_ip0, host_port, iterations) # `tx_spoofed_mac_count` metric was incremented due to the MAC address # change. fc_metrics = test_microvm.flush_metrics() assert fc_metrics["net"]["tx_spoofed_mac_count"] > 0 net_addr_base = _get_net_mem_addr_base(ssh_conn, guest_if1_name) assert net_addr_base is not None config_offset = 0x4000 if test_microvm.pci_enabled else 0x100 dev_addr = net_addr_base + config_offset # Write into '/dev/mem' the same mac address, byte by byte. # This changes the MAC address physically, in the network device registers. # After this step, the net device kernel struct MAC address will be the # same with the MAC address stored in the network device registers. The # `tx_spoofed_mac_count` metric shouldn't be incremented later on. rmt_path = "/tmp/change_net_config_space" test_microvm.ssh.scp_put(change_net_config_space_bin, rmt_path) cmd = f"chmod u+x {rmt_path} && {rmt_path} {dev_addr} {mac_hex}" # This should be executed successfully. _, stdout, _ = ssh_conn.check_output(cmd) assert stdout == mac # Discard any parasite data exchange which might've been # happened on the emulation thread while the config space # was changed on the vCPU thread. test_microvm.flush_metrics() _exchange_data(test_microvm.jailer, ssh_conn, host_ip0, host_port, iterations) fc_metrics = test_microvm.flush_metrics() assert fc_metrics["net"]["tx_spoofed_mac_count"] == 0 # Try again, just to be extra sure. _exchange_data(test_microvm.jailer, ssh_conn, host_ip0, host_port, iterations) fc_metrics = test_microvm.flush_metrics() assert fc_metrics["net"]["tx_spoofed_mac_count"] == 0 def _create_server(jailer, host_ip, port, iterations): # Wait for `iterations` TCP segments, on one connection. # This server has to run under the network namespace, initialized # by the integration test microvm jailer. # pylint: disable=global-statement script = ( "import socket\n" "s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n" "s.setsockopt(\n" " socket.SOL_SOCKET, socket.SO_REUSEADDR,\n" " s.getsockopt(socket.SOL_SOCKET,\n" " socket.SO_REUSEADDR) | 1\n" ")\n" "s.bind(('{}', {}))\n" "s.listen(1)\n" "conn, addr = s.accept()\n" "recv_iterations = {}\n" "while recv_iterations > 0:\n" " data = conn.recv({})\n" " recv_iterations -= 1\n" "conn.close()\n" "s.close()" ) # The host uses Python3 cmd = 'python3 -c "{}"'.format( script.format(host_ip, port, iterations, PAYLOAD_DATA_SIZE) ) netns_cmd = jailer.netns.cmd_prefix() + " " + cmd exit_code = subprocess.call(netns_cmd, shell=True) assert exit_code == 0 def _send_data_g2h(ssh_connection, host_ip, host_port, iterations, data, retries): script = ( "import socket\n" "import time\n" "s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)\n" "retries={}\n" "while retries > 0:\n" " try:\n" " s.connect(('{}',{}))\n" " retries = 0\n" " except Exception as e:\n" " retries -= 1\n" " time.sleep(1)\n" " if retries == 0:\n" " exit(1)\n" "send_iterations={}\n" "while send_iterations > 0:\n" " s.sendall(b'{}')\n" " send_iterations -= 1\n" "s.close()" ) # The guest has Python3 cmd = 'python3 -c "{}"'.format( script.format(retries, host_ip, str(host_port), iterations, data) ) # Wait server to initialize. _, _, stderr = ssh_connection.check_output(cmd) # If this assert fails, a connection refused happened. assert stderr == "" def _start_host_server_thread(jailer, host_ip, host_port, iterations): thread = Thread( target=_create_server, args=(jailer, host_ip, host_port, iterations) ) thread.start() return thread def _exchange_data(jailer, ssh_control_connection, host_ip, host_port, iterations): server_thread = _start_host_server_thread(jailer, host_ip, host_port, iterations) # Generate random data. letters = string.ascii_lowercase data = "".join(random.choice(letters) for _ in range(PAYLOAD_DATA_SIZE)) # We need to synchronize host server with guest client. Server thread has # to start listening for incoming connections before the client tries to # connect. To synchronize, we implement a polling mechanism, retrying to # establish a connection, on the client side, mechanism to retry guest # client socket connection, in case the server had not started yet. _send_data_g2h( ssh_control_connection, host_ip, host_port, iterations, data, retries=5 ) # Wait for host server to receive the data sent by the guest client. server_thread.join() def _change_guest_if_mac(ssh_connection, guest_if_mac, guest_if_name): cmd = "ip link set dev {} address ".format(guest_if_name) + guest_if_mac # The connection will be down, because changing the mac will issue down/up # on the interface. ssh_connection.run(cmd) def _find_iomem_range(ssh_connection, dev_name): # `/proc/iomem` includes information of the system's MMIO registered # slots. It looks like this: # # ``` # ~ cat /proc/iomem # 00000000-00000fff : Reserved # 00001000-0007ffff : System RAM # 00080000-0009ffff : Reserved # 000f0000-000fffff : System ROM # 00100000-0fffffff : System RAM # 01000000-018031d0 : Kernel code # 018031d1-01c863bf : Kernel data # 01df8000-0209ffff : Kernel bss # d0000000-d0000fff : LNRO0005:00 # d0000000-d0000fff : LNRO0005:00 # d0001000-d0001fff : LNRO0005:01 # d0001000-d0001fff : LNRO0005:01 # ``` # # So, to find the address range of a device we just `cat` # its contents and grep for the VirtIO device name, which # with ACPI is "LNRO0005:XY". cmd = f"cat /proc/iomem | grep -m 1 {dev_name}" _, stdout, _ = ssh_connection.check_output(cmd) # Take range in the form 'start-end' from line. The line looks like this: # d00002000-d0002fff : LNRO0005:02 mem_range = stdout.strip().split(" ")[0] # Parse range into (start, end) integers tokens = mem_range.split("-") return (int(tokens[0], 16), int(tokens[1], 16)) def _get_net_mem_addr_base(ssh_connection, if_name): """Get the net device memory start address.""" _, stdout, _ = ssh_connection.check_output(f"find /sys/devices -name {if_name}") device_paths = stdout.strip().split("\n") assert ( len(device_paths) == 1 ), f"No or multiple devices found for {if_name}:\n{stdout}" device_path = device_paths[0] parts = device_path.split("/") assert len(parts) >= 6, f"Unexpected device path: {device_path}" device = parts[-4] start_addr, _ = _find_iomem_range(ssh_connection, device) return start_addr ================================================ FILE: tests/integration_tests/functional/test_pause_resume.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Basic tests scenarios for snapshot save/restore.""" import platform import time from subprocess import TimeoutExpired import pytest def verify_net_emulation_paused(metrics): """Verify net emulation is paused based on provided metrics.""" net_metrics = metrics["net"] assert net_metrics["rx_queue_event_count"] == 0 assert net_metrics["rx_tap_event_count"] == 0 assert net_metrics["rx_bytes_count"] == 0 assert net_metrics["rx_packets_count"] == 0 assert net_metrics["rx_fails"] == 0 assert net_metrics["rx_count"] == 0 assert net_metrics["tap_read_fails"] == 0 assert net_metrics["tap_write_fails"] == 0 assert net_metrics["tx_bytes_count"] == 0 assert net_metrics["tx_fails"] == 0 assert net_metrics["tx_count"] == 0 assert net_metrics["tx_packets_count"] == 0 assert net_metrics["tx_queue_event_count"] == 0 print(net_metrics) def test_pause_resume(uvm_nano): """ Test scenario: boot/pause/resume. """ microvm = uvm_nano microvm.add_net_iface() # Pausing the microVM before being started is not allowed. with pytest.raises(RuntimeError): microvm.api.vm.patch(state="Paused") # Resuming the microVM before being started is also not allowed. with pytest.raises(RuntimeError): microvm.api.vm.patch(state="Resumed") microvm.start() # Pausing the microVM after it's been started is successful. microvm.api.vm.patch(state="Paused") # Flush and reset metrics as they contain pre-pause data. microvm.flush_metrics() # Verify guest is no longer active. with pytest.raises(TimeoutExpired): microvm.ssh.check_output("true", timeout=1) # Verify emulation was indeed paused and no events from either # guest or host side were handled. verify_net_emulation_paused(microvm.flush_metrics()) # Pausing the microVM when it is already `Paused` is allowed # (microVM remains in `Paused` state). microvm.api.vm.patch(state="Paused") # Resuming the microVM is successful. microvm.api.vm.patch(state="Resumed") # Verify guest is active again. microvm.ssh.check_output("true") # Resuming the microVM when it is already `Resumed` is allowed # (microVM remains in the running state). microvm.api.vm.patch(state="Resumed") # Verify guest is still active. microvm.kill() def test_describe_instance(uvm_nano): """ Test scenario: DescribeInstance different states. """ microvm = uvm_nano # Check MicroVM state is "Not started" response = microvm.api.describe.get() assert "Not started" in response.text # Start MicroVM microvm.start() # Check MicroVM state is "Running" response = microvm.api.describe.get() assert "Running" in response.text # Pause MicroVM microvm.api.vm.patch(state="Paused") # Check MicroVM state is "Paused" response = microvm.api.describe.get() assert "Paused" in response.text # Resume MicroVM response = microvm.api.vm.patch(state="Resumed") # Check MicroVM state is "Running" after VM is resumed response = microvm.api.describe.get() assert "Running" in response.text microvm.kill() def test_pause_resume_preboot(uvm_nano): """ Test pause/resume operations are not allowed pre-boot. """ basevm = uvm_nano expected_err = "not supported before starting the microVM" # Try to pause microvm when not running, it must fail. with pytest.raises(RuntimeError, match=expected_err): basevm.api.vm.patch(state="Paused") # Try to resume microvm when not running, it must fail. with pytest.raises(RuntimeError, match=expected_err): basevm.api.vm.patch(state="Resumed") @pytest.mark.skipif( platform.machine() != "x86_64", reason="Only x86_64 supports pvclocks." ) def test_kvmclock_ctrl(uvm_plain_any): """ Test that pausing vCPUs does not trigger a soft lock-up """ microvm = uvm_plain_any microvm.help.enable_console() microvm.spawn() # With 2 vCPUs under certain conditions soft lockup warnings can rarely be in dmesg causing this test to fail. # Example of the warning: `watchdog: BUG: soft lockup - CPU#0 stuck for (x)s! [(udev-worker):758]` # With 1 vCPU this intermittent issue doesn't occur. If the KVM_CLOCK_CTRL IOCTL is not made # the test will fail with 1 vCPU, so we can assert the call to the IOCTL is made. microvm.basic_config(vcpu_count=1) microvm.add_net_iface() microvm.start() # Launch reproducer in host # This launches `ls -R /` in a loop inside the guest. The command writes its output in the # console. This detail is important as it writing in the console seems to increase the probability # that we will pause the execution inside the kernel and cause a lock up. Setting KVM_CLOCK_CTRL # bit that informs the guest we're pausing the vCPUs, should avoid that lock up. microvm.ssh.check_output( "timeout 60 sh -c 'while true; do ls -R /; done' > /dev/ttyS0 2>&1 < /dev/null &" ) for _ in range(12): microvm.api.vm.patch(state="Paused") time.sleep(5) microvm.api.vm.patch(state="Resumed") dmesg = microvm.ssh.check_output("dmesg").stdout assert "rcu_sched self-detected stall on CPU" not in dmesg assert "rcu_preempt detected stalls on CPUs/tasks" not in dmesg assert "BUG: soft lockup -" not in dmesg ================================================ FILE: tests/integration_tests/functional/test_pci.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for the PCI devices""" def test_pci_root_present(uvm_any_with_pci): """ Test that a guest with PCI enabled has a PCI root device. """ vm = uvm_any_with_pci devices = vm.ssh.run("lspci").stdout.strip().split("\n") print(devices) assert devices[0].startswith( "00:00.0 Host bridge: Intel Corporation Device" ), "PCI root not found in guest" def test_pci_disabled(uvm_any_without_pci): """ Test that a guest with PCI disabled does not have a PCI root device but still works. """ vm = uvm_any_without_pci _, stdout, _ = vm.ssh.run("lspci") assert ( "00:00.0 Host bridge: Intel Corporation Device" not in stdout ), "PCI root not found in guest" ================================================ FILE: tests/integration_tests/functional/test_pmem.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for the virtio-pmem device.""" import json import os import host_tools.drive as drive_tools from framework import utils ALIGNMENT = 2 << 20 def align(size: int) -> int: """ Align the value to ALIGNMENT """ return (size + ALIGNMENT - 1) & ~(ALIGNMENT - 1) def check_pmem_exist(vm, index, root, read_only, size, extension): """ Check the pmem exist with correct parameters """ vm.ssh.check_output(f"ls /dev/pmem{index}") if root: _, stdout, _ = vm.ssh.check_output("mount") if read_only: assert f"/dev/pmem0 on / type {extension} (ro" in stdout else: assert f"/dev/pmem0 on / type {extension} (rw" in stdout _, stdout, _ = vm.ssh.check_output("lsblk -J") j = json.loads(stdout) blocks = j["blockdevices"] for block in blocks: if block["name"] == f"pmem{index}": assert block["size"][-1] == "M" block_size_mb = int(block["size"][:-1]) assert int(block_size_mb << 20) == size if root: assert "/" in block["mountpoints"] return assert False def test_pmem_add(uvm_plain_any, microvm_factory): """ Test addition of pmem devices to the VM and writes persistance """ vm = uvm_plain_any vm.spawn() vm.basic_config(add_root_device=True) vm.add_net_iface() # Pmem should work with non 2MB aligned files as well pmem_size_mb_1 = 1 fs_1 = drive_tools.FilesystemFile( os.path.join(vm.fsfiles, "scratch_1"), size=pmem_size_mb_1 ) pmem_size_mb_2 = 2 fs_2 = drive_tools.FilesystemFile( os.path.join(vm.fsfiles, "scratch_2"), size=pmem_size_mb_2 ) vm.add_pmem("pmem_1", fs_1.path, False, False) vm.add_pmem("pmem_2", fs_2.path, False, True) vm.start() # Both 1MB and 2MB block will show as 2MB because of # the aligment check_pmem_exist(vm, 0, False, False, align(pmem_size_mb_1 << 20), "ext4") check_pmem_exist(vm, 1, False, True, align(pmem_size_mb_2 << 20), "ext4") # Write something to the pmem0 to see that it is indeed saved to # underlying file when VM shots down test_string = "testing pmem persistance" vm.ssh.check_output("mkdir /tmp/mnt") vm.ssh.check_output("mount /dev/pmem0 -o dax=always /tmp/mnt") vm.ssh.check_output(f'echo "{test_string}" > /tmp/mnt/test') snapshot = vm.snapshot_full() # Killing or rebooting an old VM will make OS to flush writes to the underlying file vm.kill() restored_vm = microvm_factory.build_from_snapshot(snapshot) check_pmem_exist(restored_vm, 0, False, False, align(pmem_size_mb_1 << 20), "ext4") check_pmem_exist(restored_vm, 1, False, True, align(pmem_size_mb_2 << 20), "ext4") # The /tmp/mnt and the mount still persist after snapshot restore. # Since we used `dax=always` during mounting there is no data in guest page # cache, so the read happens directly from pmem device _, stdout, _ = restored_vm.ssh.check_output("cat /tmp/mnt/test") assert stdout.strip() == test_string def test_pmem_add_as_root_rw(uvm_plain_any, rootfs_rw, microvm_factory): """ Test addition of a single root pmem device in read-write mode """ vm = uvm_plain_any vm.memory_monitor = None vm.monitors = [] vm.spawn() vm.basic_config(add_root_device=False) vm.add_net_iface() rootfs_size = os.path.getsize(rootfs_rw) vm.add_pmem("pmem", rootfs_rw, True, False) vm.start() check_pmem_exist(vm, 0, True, False, align(rootfs_size), "ext4") snapshot = vm.snapshot_full() restored_vm = microvm_factory.build_from_snapshot(snapshot) check_pmem_exist(restored_vm, 0, True, False, align(rootfs_size), "ext4") def test_pmem_add_as_root_ro(uvm_plain_any, rootfs, microvm_factory): """ Test addition of a single root pmem device in read-only mode """ vm = uvm_plain_any vm.memory_monitor = None vm.monitors = [] vm.spawn() vm.basic_config(add_root_device=False) vm.add_net_iface() rootfs_size = os.path.getsize(rootfs) vm.add_pmem("pmem", rootfs, True, True) vm.start() check_pmem_exist(vm, 0, True, True, align(rootfs_size), "squashfs") snapshot = vm.snapshot_full() restored_vm = microvm_factory.build_from_snapshot(snapshot) check_pmem_exist(restored_vm, 0, True, True, align(rootfs_size), "squashfs") def inside_buff_cache(vm) -> int: """Get buffer/cache usage from inside the vm""" _, stdout, _ = vm.ssh.check_output("free") # Get the `buffer/cache` of the `free` command which represents # kernel page cache size return int(stdout.splitlines()[1].split()[5]) def outside_rssanon(vm) -> int: """Get RssAnon usage from outside the vm""" cmd = f"cat /proc/{vm.firecracker_pid}/status | grep RssAnon" _, stdout, _ = utils.check_output(cmd) return int(stdout.split()[1]) def test_pmem_dax_memory_saving( microvm_factory, guest_kernel_acpi, rootfs_rw, ): """ Test that booting from pmem with DAX enabled indeed saves memory in the guest by not needing guest to use its page cache """ # Boot from a block device vm = microvm_factory.build( guest_kernel_acpi, rootfs_rw, pci=True, monitor_memory=False ) vm.spawn() vm.basic_config() vm.add_net_iface() vm.start() block_cache_usage = inside_buff_cache(vm) block_rss_usage = outside_rssanon(vm) # Boot from pmem with DAX enabled for root device vm_pmem = microvm_factory.build( guest_kernel_acpi, rootfs_rw, pci=True, monitor_memory=False ) vm_pmem.spawn() vm_pmem.basic_config( add_root_device=False, boot_args="reboot=k panic=1 nomodule swiotlb=noforce console=ttyS0 rootflags=dax", ) vm_pmem.add_net_iface() vm_pmem.add_pmem("pmem", rootfs_rw, True, False) vm_pmem.start() pmem_cache_usage = inside_buff_cache(vm_pmem) pmem_rss_usage = outside_rssanon(vm_pmem) # The pmem cache usage should be much lower than drive cache usage. # The 50% is an arbitrary number, but does provide a good guarantee # that DAX is working assert ( pmem_cache_usage < block_cache_usage * 0.5 ), f"{block_cache_usage} <= {pmem_cache_usage}" # RssAnon difference will be smaller, so no multipliers assert ( pmem_rss_usage < block_rss_usage ), f"{block_cache_usage} <= {pmem_cache_usage}" ================================================ FILE: tests/integration_tests/functional/test_pvtime.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for verifying the PVTime device is enabled on aarch64.""" import pytest from framework.properties import global_props @pytest.mark.skipif( global_props.cpu_architecture != "aarch64", reason="Only run in aarch64" ) def test_guest_has_pvtime_enabled(uvm_plain): """ Check that the guest kernel has enabled PV steal time. """ vm = uvm_plain vm.spawn() vm.basic_config() vm.add_net_iface() vm.start() _, stdout, _ = vm.ssh.run("dmesg | grep 'stolen time PV'") assert ( "stolen time PV" in stdout ), "Guest kernel did not report PV steal time enabled" ================================================ FILE: tests/integration_tests/functional/test_rng.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for the virtio-rng device""" import pytest from framework.utils import check_entropy from host_tools.network import SSHConnection def uvm_with_rng_booted(uvm_plain_any, microvm_factory, rate_limiter): """Return a booted microvm with virtio-rng configured""" # pylint: disable=unused-argument uvm = uvm_plain_any uvm.spawn(log_level="INFO") uvm.basic_config(vcpu_count=2, mem_size_mib=256) uvm.add_net_iface() uvm.api.entropy.put(rate_limiter=rate_limiter) uvm.start() # Just stuff it in the microvm so we can look at it later uvm.rng_rate_limiter = rate_limiter return uvm def uvm_with_rng_restored(uvm_plain_any, microvm_factory, rate_limiter): """Return a restored uvm with virtio-rng configured""" uvm = uvm_with_rng_booted(uvm_plain_any, microvm_factory, rate_limiter) snapshot = uvm.snapshot_full() uvm.kill() uvm2 = microvm_factory.build_from_snapshot(snapshot) uvm2.rng_rate_limiter = uvm.rng_rate_limiter return uvm2 @pytest.fixture(params=[uvm_with_rng_booted, uvm_with_rng_restored]) def uvm_ctor(request): """Fixture to return uvms with different constructors""" return request.param @pytest.fixture(params=[None]) def rate_limiter(request): """Fixture to return different rate limiters""" return request.param @pytest.fixture def uvm_any(microvm_factory, uvm_ctor, uvm_plain_any, rate_limiter): """Return booted and restored uvms""" return uvm_ctor(uvm_plain_any, microvm_factory, rate_limiter) def list_rng_available(ssh_connection: SSHConnection) -> list[str]: """Returns a list of rng devices available in the VM""" return ( ssh_connection.check_output("cat /sys/class/misc/hw_random/rng_available") .stdout.strip() .split() ) def get_rng_current(ssh_connection: SSHConnection) -> str: """Returns the current rng device used by hwrng""" return ssh_connection.check_output( "cat /sys/class/misc/hw_random/rng_current" ).stdout.strip() def assert_virtio_rng_is_current_hwrng_device(ssh_connection: SSHConnection): """Asserts that virtio_rng is the current device used by hwrng""" # we expect something like virtio_rng.0 assert get_rng_current(ssh_connection).startswith( "virtio_rng" ), "virtio_rng device should be the current used by hwrng" def test_rng_not_present(uvm_nano): """ Test a guest microVM *without* an entropy device and ensure that we cannot get data from /dev/hwrng """ vm = uvm_nano vm.add_net_iface() vm.start() assert not any( rng.startswith("virtio_rng") for rng in list_rng_available(vm.ssh) ), "virtio_rng device should not be available in the uvm" def test_rng_present(uvm_any): """ Test a guest microVM with an entropy defined configured and ensure that we can access `/dev/hwrng` """ vm = uvm_any assert_virtio_rng_is_current_hwrng_device(vm.ssh) check_entropy(vm.ssh) def _get_percentage_difference(measured, base): """Return the percentage delta between the arguments.""" if measured == base: return 0 try: return ((measured - base) / base) * 100.0 except ZeroDivisionError: # It means base and only base is 0. return 100.0 def _throughput_units_multiplier(units): """ Parse the throughput units and return the multiplier that would translate the corresponding value to Bytes/sec """ if units == "kB/s": return 1000 if units == "MB/s": return 1000 * 1000 if units == "GB/s": return 1000 * 1000 * 1000 raise Exception("Unknown units") def _process_dd_output(out): """ Parse the output of `dd` and return the achieved throughput in KB/sec. """ # Example `dd` output: # # $ dd if=/dev/hwrng of=/dev/null bs=100 count=1 # 1+0 records in # 1+0 records out # 100 bytes (100 B) copied, 0.000749912 s, 133 kB/s # So we split the lines of the output and keep the last line. report = out.splitlines()[-1].split(" ") # Last two items in the line are value and units (value, units) = (report[-2], report[-1]) return float(value) * _throughput_units_multiplier(units) / 1000 def _get_throughput(ssh, random_bytes): """ Request `random_bytes` from `/dev/hwrng` and return the achieved throughput in KB/sec """ # Issue a `dd` command to request 100 times `random_bytes` from the device. # 100 here is used to get enough confidence on the achieved throughput. cmd = "dd if=/dev/hwrng of=/dev/null bs={} count=100".format(random_bytes) _, _, stderr = ssh.check_output(cmd) # dd gives its output on stderr return _process_dd_output(stderr) def _check_entropy_rate_limited(ssh, random_bytes, expected_kbps): """ Ask for `random_bytes` from `/dev/hwrng` in the guest and check that achieved throughput does not exceed the expected throughput by more than 2%. NOTE: 2% is accounting for the initial credits available in the buckets which can be consumed immediately. In the `dd` command we read `size * 100` bytes, where `size` is the size of the bucket. As a result, the first `size` bytes will be read "immediately" and the remaining `99 * size` bytes will be read at a rate of `size / refill_time`. So, the total test runtime will be `99 * refill_time`. That helps us calculate the expected throughput allowed from our rate limiter like this: size * 100 / (99 * refill_time) = (100 / 99) * (size / refill_time) = (100 / 99) * expected_throughput_rate = 1.01 * expected_throughput_rate (kudos to @roypat for this analysis) So, we should expect a 1% margin from the expected throughput. We use 2% for accounting for rounding/measurements errors. """ measured_kbps = _get_throughput(ssh, random_bytes) assert ( _get_percentage_difference(measured_kbps, expected_kbps) <= 2 ), "Expected {} KB/s, measured {} KB/s".format(expected_kbps, measured_kbps) def _rate_limiter_id(rate_limiter): """ Helper function to return a name for the rate_limiter to be used as an id for parametrized tests. """ size = rate_limiter["bandwidth"]["size"] refill_time = rate_limiter["bandwidth"]["refill_time"] return "{} KB/sec".format(float(size) / float(refill_time)) # parametrize the RNG rate limiter @pytest.mark.parametrize( "rate_limiter", [ {"bandwidth": {"size": 1000, "refill_time": 100}}, {"bandwidth": {"size": 10000, "refill_time": 100}}, {"bandwidth": {"size": 100000, "refill_time": 100}}, ], indirect=True, ids=_rate_limiter_id, ) @pytest.mark.parametrize("uvm_ctor", [uvm_with_rng_booted], indirect=True) def test_rng_bw_rate_limiter(uvm_any): """ Test that rate limiter without initial burst budget works """ vm = uvm_any size = vm.rng_rate_limiter["bandwidth"]["size"] refill_time = vm.rng_rate_limiter["bandwidth"]["refill_time"] expected_kbps = size / refill_time assert_virtio_rng_is_current_hwrng_device(vm.ssh) # Check the rate limiter using a request size equal to the size # of the token bucket. _check_entropy_rate_limited(vm.ssh, size, expected_kbps) ================================================ FILE: tests/integration_tests/functional/test_rtc.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Check the well functioning af the RTC device on aarch64 platforms.""" import platform import re import pytest from framework import utils DMESG_LOG_REGEX = r"rtc-pl031\s+(\d+).rtc: setting system clock to" @pytest.mark.skipif( platform.machine() != "aarch64", reason="RTC exists only on aarch64." ) def test_rtc(uvm_plain_any): """ Test RTC functionality on aarch64. """ vm = uvm_plain_any vm.spawn() vm.memory_monitor = None vm.basic_config() vm.add_net_iface() vm.start() # check that the kernel creates an rtcpl031 base device. _, stdout, _ = vm.ssh.run("dmesg") rtc_log = re.findall(DMESG_LOG_REGEX, stdout) assert rtc_log is not None _, stdout, _ = vm.ssh.run("stat /dev/rtc0") assert "character special file" in stdout _, host_stdout, _ = utils.check_output("date +%s") _, guest_stdout, _ = vm.ssh.run("date +%s") assert abs(int(guest_stdout) - int(host_stdout)) < 5 ================================================ FILE: tests/integration_tests/functional/test_serial_io.py ================================================ # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests scenario for the Firecracker serial console.""" import fcntl import os import platform import signal import termios import time from framework import utils from framework.microvm import Serial PLATFORM = platform.machine() def test_serial_after_snapshot(uvm_plain, microvm_factory): """ Serial I/O after restoring from a snapshot. """ microvm = uvm_plain microvm.help.enable_console() microvm.spawn(serial_out_path=None) microvm.basic_config( vcpu_count=2, mem_size_mib=256, ) serial = Serial(microvm) serial.open() microvm.start() # looking for the # prompt at the end serial.rx("ubuntu-fc-uvm:~#") # Create snapshot. snapshot = microvm.snapshot_full() # Kill base microVM. microvm.kill() # Load microVM clone from snapshot. vm = microvm_factory.build() vm.help.enable_console() vm.spawn(serial_out_path=None) vm.restore_from_snapshot(snapshot, resume=True) serial = Serial(vm) serial.open() # We need to send a newline to signal the serial to flush # the login content. serial.tx("") # looking for the # prompt at the end serial.rx("ubuntu-fc-uvm:~#") serial.tx("pwd") res = serial.rx("#") assert "/root" in res def test_serial_console_login(uvm_plain_any): """ Test serial console login. """ microvm = uvm_plain_any microvm.help.enable_console() microvm.spawn(serial_out_path=None) # We don't need to monitor the memory for this test because we are # just rebooting and the process dies before pmap gets the RSS. microvm.memory_monitor = None # Set up the microVM with 1 vCPU and a serial console. microvm.basic_config(vcpu_count=1) microvm.start() serial = Serial(microvm) serial.open() serial.rx("ubuntu-fc-uvm:") serial.tx("id") serial.rx("uid=0(root) gid=0(root) groups=0(root)") def get_total_mem_size(pid): """Get total memory usage for a process.""" cmd = f"pmap {pid} | tail -n 1 | sed 's/^ //' | tr -s ' ' | cut -d' ' -f2" _, stdout, stderr = utils.check_output(cmd) assert stderr == "" # This assumes that the pmap returns something in the form of # 123456789K (which is typically the case for us) return float(stdout.strip()[:-1] * 1000) def send_bytes(tty, bytes_count, timeout=60): """Send data to the terminal.""" start = time.time() for _ in range(bytes_count): fcntl.ioctl(tty, termios.TIOCSTI, "\n") current = time.time() if current - start > timeout: break def test_serial_dos(uvm_plain_any): """ Test serial console behavior under DoS. """ microvm = uvm_plain_any microvm.help.enable_console() microvm.spawn() # Set up the microVM with 1 vCPU and a serial console. microvm.basic_config( vcpu_count=1, ) microvm.add_net_iface() microvm.start() # Open an fd for firecracker process terminal. tty_path = f"/proc/{microvm.firecracker_pid}/fd/0" tty_fd = os.open(tty_path, os.O_RDWR) # Check if the total memory size changed. before_size = get_total_mem_size(microvm.firecracker_pid) send_bytes(tty_fd, 100000000, timeout=1) after_size = get_total_mem_size(microvm.firecracker_pid) # Give the check a bit of tolerance (1%) since sometimes random unrelated # allocations break it. assert after_size <= (before_size * 1.01), ( "The memory size of the " "Firecracker process " "changed from {} to {}.".format(before_size, after_size) ) def test_serial_block(uvm_plain_any): """ Test that writing to stdout never blocks the vCPU thread. """ test_microvm = uvm_plain_any test_microvm.help.enable_console() test_microvm.spawn(serial_out_path=None) # Set up the microVM with 1 vCPU so we make sure the vCPU thread # responsible for the SSH connection will also run the serial. test_microvm.basic_config( vcpu_count=1, mem_size_mib=512, ) test_microvm.add_net_iface() test_microvm.start() # Get an initial reading of missed writes to the serial. fc_metrics = test_microvm.flush_metrics() init_count = fc_metrics["uart"]["missed_write_count"] # Stop `screen` process which captures stdout so we stop consuming stdout. os.kill(test_microvm.screen_pid, signal.SIGSTOP) # Generate a random text file. test_microvm.ssh.check_output( "base64 /dev/urandom | head -c 100000 > /tmp/file.txt" ) # Dump output to terminal test_microvm.ssh.check_output("cat /tmp/file.txt > /dev/ttyS0") # Check that the vCPU isn't blocked. test_microvm.ssh.check_output("cd /") # Check the metrics to see if the serial missed bytes. fc_metrics = test_microvm.flush_metrics() last_count = fc_metrics["uart"]["missed_write_count"] # Should be significantly more than before the `cat` command. assert last_count - init_count > 10000 REGISTER_FAILED_WARNING = "Failed to register serial input fd: event_manager: failed to manage epoll file descriptor: Operation not permitted (os error 1)" def test_no_serial_fd_error_when_daemonized(uvm_plain): """ Tests that when running firecracker daemonized, the serial device does not try to register stdin to epoll (which would fail due to stdin no longer being pointed at a terminal). Regression test for #4037. """ test_microvm = uvm_plain test_microvm.spawn() test_microvm.add_net_iface() test_microvm.basic_config( vcpu_count=1, mem_size_mib=512, ) test_microvm.start() assert REGISTER_FAILED_WARNING not in test_microvm.log_data def test_serial_file_output(uvm_any): """Test that redirecting serial console output to a file works for booted and restored VMs""" uvm_any.ssh.check_output("echo 'hello' > /dev/ttyS0") assert b"hello" in uvm_any.serial_out_path.read_bytes() ================================================ FILE: tests/integration_tests/functional/test_shut_down.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests scenarios for shutting down Firecracker/VM.""" import platform from packaging import version from framework import utils def test_reboot(uvm_plain_any): """ Test reboot from guest. """ vm = uvm_plain_any vm.spawn() # We don't need to monitor the memory for this test because we are # just rebooting and the process dies before pmap gets the RSS. vm.memory_monitor = None # Set up the microVM with 4 vCPUs, 256 MiB of RAM, 0 network ifaces, and # a root file system with the rw permission. The network interfaces is # added after we get a unique MAC and IP. vm.basic_config(vcpu_count=4) vm.add_net_iface() vm.start() # Consume existing metrics lines = vm.get_all_metrics() assert len(lines) == 1 # Rebooting Firecracker sends an exit event and should gracefully kill. # the instance. vm.ssh.run("reboot") vm.mark_killed() # Consume existing metrics datapoints = vm.get_all_metrics() assert len(datapoints) == 2 if platform.machine() != "x86_64": message = ( "Received KVM_SYSTEM_EVENT: type: 2, event: [0]" if version.parse(utils.get_kernel_version()) >= version.parse("5.18") else "Received KVM_SYSTEM_EVENT: type: 2, event: []" ) vm.check_log_message(message) vm.check_log_message("Vmm is stopping.") # Make sure that the FC process was not killed by a seccomp fault assert datapoints[-1]["seccomp"]["num_faults"] == 0 ================================================ FILE: tests/integration_tests/functional/test_signals.py ================================================ # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests scenarios for Firecracker signal handling.""" import json import os import resource as res from signal import SIGBUS, SIGHUP, SIGILL, SIGPIPE, SIGSEGV, SIGSYS, SIGXCPU, SIGXFSZ from time import sleep import pytest signum_str = { SIGBUS: "sigbus", SIGSEGV: "sigsegv", SIGXFSZ: "sigxfsz", SIGXCPU: "sigxcpu", SIGPIPE: "sigpipe", SIGHUP: "sighup", SIGILL: "sigill", SIGSYS: "sigsys", } @pytest.mark.parametrize( "signum", [SIGBUS, SIGSEGV, SIGXFSZ, SIGXCPU, SIGPIPE, SIGHUP, SIGILL, SIGSYS] ) def test_generic_signal_handler(uvm_plain, signum): """ Test signal handling for all handled signals. """ microvm = uvm_plain microvm.spawn() # We don't need to monitor the memory for this test. microvm.memory_monitor = None microvm.basic_config() microvm.start() sleep(0.5) metrics_jail_path = microvm.metrics_file metrics_fd = open(metrics_jail_path, encoding="utf-8") line_metrics = metrics_fd.readlines() assert len(line_metrics) == 1 os.kill(microvm.firecracker_pid, signum) # Firecracker gracefully handles SIGPIPE (doesn't terminate). if signum == int(SIGPIPE): msg = "Received signal 13" # Flush metrics to file, so we can see the SIGPIPE at bottom assert. # This is going to fail if process has exited. microvm.api.actions.put(action_type="FlushMetrics") else: msg = "Shutting down VM after intercepting signal {}".format(signum) microvm.mark_killed() microvm.check_log_message(msg) if signum != SIGSYS: metric_line = json.loads(metrics_fd.readlines()[0]) assert metric_line["signals"][signum_str[signum]] == 1 def test_sigxfsz_handler(uvm_plain_rw): """ Test intercepting and handling SIGXFSZ. """ microvm = uvm_plain_rw microvm.spawn() # We don't need to monitor the memory for this test. microvm.memory_monitor = None # We need to use the Sync file engine type. If we use io_uring we will not # get a SIGXFSZ. We'll instead get an errno 27 File too large as the # completed entry status code. microvm.basic_config(rootfs_io_engine="Sync") microvm.start() metrics_jail_path = microvm.metrics_file metrics_fd = open(metrics_jail_path, encoding="utf-8") line_metrics = metrics_fd.readlines() assert len(line_metrics) == 1 firecracker_pid = microvm.firecracker_pid size = os.path.getsize(metrics_jail_path) # The SIGXFSZ is triggered because the size of rootfs is bigger than # the size of metrics file times 3. Since the metrics file is flushed # twice we have to make sure that the limit is bigger than that # in order to make sure the SIGXFSZ metric is logged res.prlimit(firecracker_pid, res.RLIMIT_FSIZE, (size * 3, res.RLIM_INFINITY)) microvm.mark_killed() msg = "Shutting down VM after intercepting signal 25, code 0" microvm.check_log_message(msg) metric_line = json.loads(metrics_fd.readlines()[0]) assert metric_line["signals"]["sigxfsz"] == 1 def test_handled_signals(uvm_plain): """ Test that handled signals don't kill the microVM. """ microvm = uvm_plain microvm.spawn() # We don't need to monitor the memory for this test. microvm.memory_monitor = None microvm.basic_config(vcpu_count=2) microvm.add_net_iface() microvm.start() # Open a SSH connection to validate the microVM stays alive. # Just validate a simple command: `nproc` cmd = "nproc" _, stdout, stderr = microvm.ssh.run(cmd) assert stderr == "" assert int(stdout) == 2 # We have a handler installed for this signal. # The 35 is the SIGRTMIN for musl libc. # We hardcode this value since the SIGRTMIN python reports # is 34, which is likely the one for glibc. os.kill(microvm.firecracker_pid, 35) # Validate the microVM is still up and running. _, stdout, stderr = microvm.ssh.run(cmd) assert stderr == "" assert int(stdout) == 2 ================================================ FILE: tests/integration_tests/functional/test_snapshot_basic.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Basic tests scenarios for snapshot save/restore.""" import dataclasses import filecmp import logging import os import platform import re import shutil import uuid from pathlib import Path import pytest import host_tools.cargo_build as host import host_tools.drive as drive_tools import host_tools.network as net_tools from framework import utils from framework.properties import global_props from framework.utils import check_filesystem, check_output from framework.utils_vsock import ( ECHO_SERVER_PORT, VSOCK_UDS_PATH, _copy_vsock_data_to_guest, check_guest_connections, check_host_connections, make_blob, make_host_port_path, start_guest_echo_server, ) # Kernel emits this message when it resumes from a snapshot with VMGenID device # present DMESG_VMGENID_RESUME = "random: crng reseeded due to virtual machine fork" def check_vmgenid_update_count(vm, resume_count): """ Kernel will emit the DMESG_VMGENID_RESUME every time we resume from a snapshot """ _, stdout, _ = vm.ssh.check_output("dmesg") assert resume_count == stdout.count(DMESG_VMGENID_RESUME) def _get_guest_drive_size(ssh_connection, guest_dev_name="/dev/vdb"): # `lsblk` command outputs 2 lines to STDOUT: # "SIZE" and the size of the device, in bytes. blksize_cmd = "LSBLK_DEBUG=all lsblk -b {} --output SIZE".format(guest_dev_name) rc, stdout, stderr = ssh_connection.run(blksize_cmd) assert rc == 0, stderr lines = stdout.split("\n") return lines[1].strip() @pytest.mark.parametrize("resume_at_restore", [True, False]) def test_resume(uvm_nano, microvm_factory, resume_at_restore): """Tests snapshot is resumable at or after restoration. Check that a restored microVM is resumable by either a. PUT /snapshot/load with `resume_vm=False`, then calling PATCH /vm resume=True b. PUT /snapshot/load with `resume_vm=True` """ vm = uvm_nano vm.add_net_iface() vm.start() snapshot = vm.snapshot_full() restored_vm = microvm_factory.build() restored_vm.spawn() restored_vm.restore_from_snapshot(snapshot, resume=resume_at_restore) if not resume_at_restore: assert restored_vm.state == "Paused" restored_vm.resume() assert restored_vm.state == "Running" restored_vm.ssh.check_output("true") def test_snapshot_current_version(uvm_nano): """Tests taking a snapshot at the version specified in Cargo.toml Check that it is possible to take a snapshot at the version of the upcoming release (during the release process this ensures that if we release version x.y, then taking a snapshot at version x.y works - something we'd otherwise only be able to test once the x.y binary has been uploaded to S3, at which point it is too late, see also the 1.3 release). """ vm = uvm_nano vm.start() snapshot = vm.snapshot_full() # Fetch Firecracker binary for the latest version fc_binary = uvm_nano.fc_binary_path # Get supported snapshot version from Firecracker binary snapshot_version = ( check_output(f"{fc_binary} --snapshot-version").stdout.strip().splitlines()[0] ) # Verify the output of `--describe-snapshot` command line parameter cmd = [str(fc_binary)] + ["--describe-snapshot", str(snapshot.vmstate)] _, stdout, _ = check_output(cmd) assert snapshot_version in stdout # Testing matrix: # - Guest kernel: All supported ones # - Rootfs: Ubuntu 18.04 # - Microvm: 2vCPU with 512 MB RAM # TODO: Multiple microvm sizes must be tested in the async pipeline. @pytest.mark.parametrize("use_snapshot_editor", [False, True]) def test_cycled_snapshot_restore( bin_vsock_path, tmp_path, uvm_plain_any, microvm_factory, snapshot_type, use_snapshot_editor, cpu_template_any, ): """ Run a cycle of VM restoration and VM snapshot creation where new VM is restored from a snapshot of the previous one. """ # This is an arbitrary selected value. It is big enough to test the # functionality, but small enough to not be annoying long to run. cycles = 3 logger = logging.getLogger("snapshot_sequence") vm = uvm_plain_any vm.spawn() vm.basic_config( vcpu_count=2, mem_size_mib=512, track_dirty_pages=snapshot_type.needs_dirty_page_tracking, ) vm.set_cpu_template(cpu_template_any) vm.add_net_iface() vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path=VSOCK_UDS_PATH) vm.start() vm_blob_path = "/tmp/vsock/test.blob" # Generate a random data file for vsock. blob_path, blob_hash = make_blob(tmp_path) # Copy the data file and a vsock helper to the guest. _copy_vsock_data_to_guest(vm.ssh, blob_path, vm_blob_path, bin_vsock_path) logger.info("Create %s #0.", snapshot_type) # Create a snapshot from a microvm. start_guest_echo_server(vm) snapshot = vm.make_snapshot(snapshot_type) vm.kill() local_port_last = (1 << 30) - 1 for microvm in microvm_factory.build_n_from_snapshot( snapshot, cycles, incremental=True, use_snapshot_editor=use_snapshot_editor ): # Test vsock guest-initiated connections. path = os.path.join( microvm.path, make_host_port_path(VSOCK_UDS_PATH, ECHO_SERVER_PORT) ) check_guest_connections(microvm, path, vm_blob_path, blob_hash) # Test vsock host-initiated connections. path = os.path.join(microvm.jailer.chroot_path(), VSOCK_UDS_PATH) check_host_connections(path, blob_path, blob_hash) m = re.findall( r"vsock muxer: RX pkt: VsockPacketHeader {.*, src_port: (\d+),.*, op: 1,.*}", microvm.log_data, ) assert int(m[0]) == local_port_last + 1 local_port_last = int(m[-1]) # Check that the root device is not corrupted. check_filesystem(microvm.ssh, "squashfs", "/dev/vda") def test_patch_drive_snapshot(uvm_nano, microvm_factory): """ Test that a patched drive is correctly used by guests loaded from snapshot. """ logger = logging.getLogger("snapshot_sequence") # Use a predefined vm instance. basevm = uvm_nano basevm.add_net_iface() # Add a scratch 128MB RW non-root block device. root = Path(basevm.path) scratch_path1 = str(root / "scratch1") scratch_disk1 = drive_tools.FilesystemFile(scratch_path1, size=128) basevm.add_drive("scratch", scratch_disk1.path) basevm.start() # Update drive to have another backing file, double in size. new_file_size_mb = 2 * int(scratch_disk1.size() / (1024 * 1024)) logger.info("Patch drive, new file: size %sMB.", new_file_size_mb) scratch_path2 = str(root / "scratch2") scratch_disk2 = drive_tools.FilesystemFile(scratch_path2, new_file_size_mb) basevm.patch_drive("scratch", scratch_disk2) # Create base snapshot. logger.info("Create FULL snapshot #0.") snapshot = basevm.snapshot_full() # Load snapshot in a new Firecracker microVM. logger.info("Load snapshot, mem %s", snapshot.mem) vm = microvm_factory.build_from_snapshot(snapshot) # Attempt to connect to resumed microvm and verify the new microVM has the # right scratch drive. guest_drive_size = _get_guest_drive_size(vm.ssh) assert guest_drive_size == str(scratch_disk2.size()) def test_load_snapshot_failure_handling(uvm_plain): """ Test error case of loading empty snapshot files. """ vm = uvm_plain vm.spawn(log_level="Info") # Create two empty files for snapshot state and snapshot memory chroot_path = vm.jailer.chroot_path() snapshot_dir = os.path.join(chroot_path, "snapshot") Path(snapshot_dir).mkdir(parents=True, exist_ok=True) snapshot_mem = os.path.join(snapshot_dir, "snapshot_mem") open(snapshot_mem, "w+", encoding="utf-8").close() snapshot_vmstate = os.path.join(snapshot_dir, "snapshot_vmstate") open(snapshot_vmstate, "w+", encoding="utf-8").close() # Hardlink the snapshot files into the microvm jail. jailed_mem = vm.create_jailed_resource(snapshot_mem) jailed_vmstate = vm.create_jailed_resource(snapshot_vmstate) # Load the snapshot with pytest.raises(RuntimeError, match="IO Error: File too short to contain CRC"): vm.api.snapshot_load.put(mem_file_path=jailed_mem, snapshot_path=jailed_vmstate) vm.mark_killed() def test_cmp_full_and_first_diff_mem(uvm_plain_any): """ Compare memory of 2 consecutive full and diff snapshots. Testing matrix: - Guest kernel: All supported ones - Rootfs: Ubuntu 18.04 - Microvm: 2vCPU with 512 MB RAM """ logger = logging.getLogger("snapshot_sequence") vm = uvm_plain_any vm.spawn() vm.basic_config( vcpu_count=2, mem_size_mib=512, track_dirty_pages=True, ) vm.add_net_iface() vm.start() logger.info("Create diff snapshot.") # Create diff snapshot. diff_snapshot = vm.snapshot_diff() logger.info("Create full snapshot.") # Create full snapshot. full_snapshot = vm.snapshot_full(mem_path="mem_full") assert full_snapshot.mem != diff_snapshot.mem assert filecmp.cmp(full_snapshot.mem, diff_snapshot.mem, shallow=False) def test_negative_postload_api(uvm_plain, microvm_factory): """ Test APIs fail after loading from snapshot. """ basevm = uvm_plain basevm.spawn() basevm.basic_config(track_dirty_pages=True) basevm.add_net_iface() basevm.start() # Create base snapshot. snapshot = basevm.snapshot_diff() basevm.kill() # Do not resume, just load, so we can still call APIs that work. microvm = microvm_factory.build_from_snapshot(snapshot) fail_msg = "The requested operation is not supported after starting the microVM" with pytest.raises(RuntimeError, match=fail_msg): microvm.api.actions.put(action_type="InstanceStart") with pytest.raises(RuntimeError, match=fail_msg): microvm.basic_config() def test_negative_snapshot_permissions(uvm_plain_rw, microvm_factory): """ Test missing permission error scenarios. """ basevm = uvm_plain_rw basevm.spawn() basevm.basic_config() basevm.add_net_iface() basevm.start() # Remove write permissions. os.chmod(basevm.jailer.chroot_path(), 0o444) with pytest.raises(RuntimeError, match="Permission denied"): basevm.snapshot_full() # Restore proper permissions. os.chmod(basevm.jailer.chroot_path(), 0o744) # Create base snapshot. snapshot = basevm.snapshot_full() basevm.kill() # Remove permissions for mem file. os.chmod(snapshot.mem, 0o000) microvm = microvm_factory.build() microvm.spawn() expected_err = re.escape( "Load snapshot error: Failed to restore from snapshot: Failed to load guest " "memory: Error creating guest memory from file: Failed to load guest memory: " "Permission denied (os error 13)" ) with pytest.raises(RuntimeError, match=expected_err): microvm.restore_from_snapshot(snapshot, resume=True) microvm.mark_killed() # Remove permissions for state file. os.chmod(snapshot.vmstate, 0o000) microvm = microvm_factory.build() microvm.spawn() expected_err = re.escape( "Load snapshot error: Failed to restore from snapshot: Failed to get snapshot " "state from file: Failed to open snapshot file: Permission denied (os error 13)" ) with pytest.raises(RuntimeError, match=expected_err): microvm.restore_from_snapshot(snapshot, resume=True) microvm.mark_killed() # Restore permissions for state file. os.chmod(snapshot.vmstate, 0o744) os.chmod(snapshot.mem, 0o744) # Remove permissions for block file. os.chmod(snapshot.disks["rootfs"], 0o000) microvm = microvm_factory.build() microvm.spawn() expected_err = "Virtio backend error: Error manipulating the backing file: Permission denied (os error 13)" with pytest.raises(RuntimeError, match=re.escape(expected_err)): microvm.restore_from_snapshot(snapshot, resume=True) microvm.mark_killed() def test_negative_snapshot_create(uvm_nano): """ Test create snapshot before pause. """ vm = uvm_nano vm.start() with pytest.raises(RuntimeError, match="save/restore unavailable while running"): vm.api.snapshot_create.put( mem_file_path="memfile", snapshot_path="statefile", snapshot_type="Full" ) def test_create_large_diff_snapshot(uvm_plain): """ Create large diff snapshot seccomp regression test. When creating a diff snapshot of a microVM with a large memory size, a mmap(MAP_PRIVATE|MAP_ANONYMOUS) is issued. Test that the default seccomp filter allows it. @issue: https://github.com/firecracker-microvm/firecracker/discussions/2811 """ vm = uvm_plain vm.spawn() vm.basic_config(mem_size_mib=16 * 1024, track_dirty_pages=True) vm.start() vm.api.vm.patch(state="Paused") vm.api.snapshot_create.put( mem_file_path="memfile", snapshot_path="statefile", snapshot_type="Diff" ) # If the regression was not fixed, this would have failed. The Firecracker # process would have been taken down. @pytest.mark.parametrize("mem_size", [256, 4096]) def test_diff_snapshot_overlay(uvm_plain_any, microvm_factory, mem_size): """ Tests that if we take a diff snapshot and direct firecracker to write it on top of an existing snapshot file, it will successfully merge them. """ basevm = uvm_plain_any basevm.spawn() basevm.basic_config(track_dirty_pages=True, mem_size_mib=mem_size) basevm.add_net_iface() basevm.start() # The first snapshot taken will always contain all memory (even if its specified as "diff"). # We use a diff snapshot here, as taking a full snapshot does not clear the dirty page tracking, # meaning the `snapshot_diff()` call below would again dump the entire guest memory instead of # only dirty regions. full_snapshot = basevm.snapshot_diff() basevm.resume() # Run some command to dirty some pages basevm.ssh.check_output("true") # First copy the base snapshot somewhere else, so we can make sure # it will actually get updated first_snapshot_backup = Path(basevm.chroot()) / "mem.old" shutil.copyfile(full_snapshot.mem, first_snapshot_backup) # One Microvm object will always write its snapshot files to the same location merged_snapshot = basevm.snapshot_diff() assert full_snapshot.mem == merged_snapshot.mem assert not filecmp.cmp(merged_snapshot.mem, first_snapshot_backup, shallow=False) _ = microvm_factory.build_from_snapshot(merged_snapshot) # Check that the restored VM works def test_snapshot_overwrite_self(uvm_plain_any, microvm_factory): """Tests that if we try to take a snapshot that would overwrite the very file from which the current VM is stored, nothing happens. Note that even though we map the file as MAP_PRIVATE, the documentation of mmap does not specify what should happen if the file is changed after being mmap'd (https://man7.org/linux/man-pages/man2/mmap.2.html). It seems that these changes can propagate to the mmap'd memory region.""" base_vm = uvm_plain_any base_vm.spawn() base_vm.basic_config() base_vm.add_net_iface() base_vm.start() snapshot = base_vm.snapshot_full() base_vm.kill() vm = microvm_factory.build_from_snapshot(snapshot) # When restoring a snapshot, vm.restore_from_snapshot first copies # the memory file (inside of the jailer) to /mem.src currently_loaded = Path(vm.chroot()) / "mem.src" assert currently_loaded.exists() vm.snapshot_full(mem_path="mem.src") vm.resume() # Check the overwriting the snapshot file from which this microvm was originally # restored, with a new snapshot of this vm, does not break the VM def test_vmgenid(uvm_plain_6_1, microvm_factory, snapshot_type): """ Test VMGenID device upon snapshot resume """ base_vm = uvm_plain_6_1 base_vm.spawn() base_vm.basic_config(track_dirty_pages=True) base_vm.add_net_iface() base_vm.start() snapshot = base_vm.make_snapshot(snapshot_type) base_snapshot = snapshot base_vm.kill() for i, vm in enumerate( microvm_factory.build_n_from_snapshot(base_snapshot, 5, incremental=True) ): # We should have as DMESG_VMGENID_RESUME messages as # snapshots we have resumed check_vmgenid_update_count(vm, i + 1) @pytest.mark.skipif( platform.machine() != "aarch64" or ( global_props.host_linux_version_tpl < (6, 4) and global_props.host_os not in ("amzn2", "amzn2023") ), reason="This test requires aarch64 and either kernel 6.4+ or Amazon Linux", ) def test_physical_counter_reset_aarch64(uvm_nano): """ Test that the CNTPCT_EL0 register is reset on VM boot. We assume the smallest VM will not consume more than some MAX_VALUE cycles to be created and snapshotted. The MAX_VALUE is selected by doing a manual run of this test and seeing what the actual counter value is. The assumption here is that if resetting will not occur the guest counter value will be huge as it will be a copy of host value. The host value in its turn will be huge because it will include host OS boot + CI prep + other CI tests ... """ vm = uvm_nano vm.add_net_iface() vm.start() snapshot = vm.snapshot_full() vm.kill() snap_editor = host.get_binary("snapshot-editor") cntpct_el0 = hex(0x603000000013DF01) # If a CPU runs at 3GHz, it will have a counter value of 8_000_000_000 # in 2.66 seconds. The host surely will run for more than 2.66 seconds before # executing this test. max_value = 8_000_000_000 cmd = [ str(snap_editor), "info-vmstate", "vcpu-states", "--vmstate-path", str(snapshot.vmstate), ] _, stdout, _ = utils.check_output(cmd) # The output will look like this: # kvm_mp_state: 0x0 # mpidr: 0x80000000 # 0x6030000000100000 0x0000000e0 # 0x6030000000100002 0xffff00fe33c0 for line in stdout.splitlines(): parts = line.split() if len(parts) == 2: reg_id, reg_value = parts if reg_id == cntpct_el0: assert int(reg_value, 16) < max_value break else: raise RuntimeError("Did not find CNTPCT_EL0 register in snapshot") def test_snapshot_rename_interface(uvm_nano, microvm_factory): """ Test that we can restore a snapshot and point its interface to a different host interface. """ vm = uvm_nano base_iface = vm.add_net_iface() vm.start() snapshot = vm.snapshot_full() # We don't reuse the network namespace as it may conflict with # previous/future devices restored_vm = microvm_factory.build(netns=net_tools.NetNs(str(uuid.uuid4()))) # Override the tap name, but keep the same IP configuration iface_override = dataclasses.replace(base_iface, tap_name="tap_override") restored_vm.spawn() snapshot.net_ifaces.clear() snapshot.net_ifaces.append(iface_override) restored_vm.restore_from_snapshot( snapshot, rename_interfaces={iface_override.dev_name: iface_override.tap_name}, resume=True, ) def test_snapshot_rename_vsock( uvm_nano, microvm_factory, ): """ Test that we can restore a snapshot and point its vsock device to a different unix socket. """ vm = uvm_nano vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/v.sock1") vm.add_net_iface() vm.start() snapshot = vm.snapshot_full() restored_vm = microvm_factory.build() restored_vm.spawn() restored_vm.restore_from_snapshot(snapshot, vsock_override="/v.sock2", resume=True) ================================================ FILE: tests/integration_tests/functional/test_snapshot_editor.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for snapshot-editor tool.""" import platform import pytest import host_tools.cargo_build as host from framework import utils PLATFORM = platform.machine() MIDR_EL1 = hex(0x603000000013C000) @pytest.mark.skipif( PLATFORM != "aarch64", reason="This is aarch64 specific test.", ) def test_remove_regs(uvm_nano, microvm_factory): """ This test verifies `remove-regs` method of `snapshot-editor`. Here we create snapshot and try to romeve MIDR_EL1 register from it. Then we try to restore uVM from the snapshot. """ vm = uvm_nano vm.add_net_iface() vm.start() snapshot = vm.snapshot_full() snap_editor = host.get_binary("snapshot-editor") # Test that MIDR_EL1 is in the snapshot cmd = [ str(snap_editor), "info-vmstate", "vcpu-states", "--vmstate-path", str(snapshot.vmstate), ] _, stdout, _ = utils.check_output(cmd) assert MIDR_EL1 in stdout # Remove MIDR_EL1 register from the snapshot cmd = [ str(snap_editor), "edit-vmstate", "remove-regs", "--vmstate-path", str(snapshot.vmstate), "--output-path", str(snapshot.vmstate), str(MIDR_EL1), ] utils.check_output(cmd) # Test that MIDR_EL1 is not in the snapshot cmd = [ str(snap_editor), "info-vmstate", "vcpu-states", "--vmstate-path", str(snapshot.vmstate), ] _, stdout, _ = utils.check_output(cmd) assert MIDR_EL1 not in stdout # test that we can restore from a snapshot _ = microvm_factory.build_from_snapshot(snapshot) ================================================ FILE: tests/integration_tests/functional/test_snapshot_not_losing_dirty_pages.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Test that the no dirty pages lost in case of error during snapshot creation.""" import subprocess from pathlib import Path import psutil import pytest @pytest.fixture def mount_tmpfs_small(worker_id): """Mount a small tmpfs and return its path""" mnt_path = Path(f"/mnt/{worker_id}") mnt_path.mkdir(parents=True) subprocess.check_call( ["mount", "-o", "size=512M", "-t", "tmpfs", "none", str(mnt_path)] ) try: yield mnt_path finally: subprocess.check_call(["umount", mnt_path]) mnt_path.rmdir() def test_diff_snapshot_works_after_error( microvm_factory, guest_kernel_linux_5_10, rootfs, mount_tmpfs_small ): """ Test that if a partial snapshot errors it will work after and not lose data """ uvm = microvm_factory.build( guest_kernel_linux_5_10, rootfs, jailer_kwargs={"chroot_base": mount_tmpfs_small}, ) vm_mem_size = 128 uvm.time_api_requests = False # The log may be incomplete due to lack of space uvm.spawn() uvm.basic_config(mem_size_mib=vm_mem_size, track_dirty_pages=True) uvm.add_net_iface() uvm.start() chroot = Path(uvm.chroot()) # Create a large file dynamically based on available space fill = chroot / "fill" disk_usage = psutil.disk_usage(chroot) target_size = round(disk_usage.free * 0.9) # Attempt to fill 90% of free space subprocess.check_call(f"fallocate -l {target_size} {fill}", shell=True) with pytest.raises(RuntimeError, match="No space left on device"): uvm.snapshot_diff() fill.unlink() # Now there is enough space for it to work snap2 = uvm.snapshot_diff() uvm.kill() _vm2 = microvm_factory.build_from_snapshot(snap2) ================================================ FILE: tests/integration_tests/functional/test_snapshot_phase1.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Creates snapshots for other tests like test_snapshot_restore_cross_kernel.py """ import json import platform import re import pytest from framework.utils import ( configure_mmds, generate_mmds_get_request, generate_mmds_session_token, ) from framework.utils_cpu_templates import get_cpu_template_name if platform.machine() != "x86_64": pytestmark = pytest.mark.skip("only x86_64 architecture supported") # Default IPv4 address to route MMDS requests. IPV4_ADDRESS = "169.254.169.254" NET_IFACE_FOR_MMDS = "eth3" @pytest.mark.nonci def test_snapshot_phase1( microvm_factory, guest_kernel, rootfs, cpu_template_any, results_dir ): """Create a snapshot and save it to disk""" vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False) vm.spawn(log_level="Info") vm.add_net_iface() vm.basic_config( vcpu_count=2, mem_size_mib=512, ) vm.set_cpu_template(cpu_template_any) guest_kernel_version = re.search("vmlinux-(.*)", vm.kernel_file.name) cpu_template_name = get_cpu_template_name(cpu_template_any, with_type=True) snapshot_artifacts_dir = ( results_dir / f"{guest_kernel_version.group(1)}_{cpu_template_name}_guest_snapshot" ) # Add 4 network devices for i in range(4): vm.add_net_iface() # Add a vsock device vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/v.sock") # Add MMDS configure_mmds(vm, ["eth3"], version="V2") # Add a memory balloon. vm.api.balloon.put(amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1) vm.start() # Populate MMDS. data_store = { "latest": { "meta-data": { "ami-id": "ami-12345678", "reservation-id": "r-fea54097", "local-hostname": "ip-10-251-50-12.ec2.internal", "public-hostname": "ec2-203-0-113-25.compute-1.amazonaws.com", } } } # MMDS should be empty. assert vm.api.mmds.get().json() == {} # Populate MMDS with data. vm.api.mmds.put(**data_store) # Ensure data is persistent inside the data store. assert vm.api.mmds.get().json() == data_store # Iterate and validate connectivity on all ifaces after boot. for i in range(4): exit_code, _, _ = vm.ssh_iface(i).run("sync") assert exit_code == 0 # Validate MMDS. # Configure interface to route MMDS requests vm.ssh.check_output(f"ip route add {IPV4_ADDRESS} dev {NET_IFACE_FOR_MMDS}") # Fetch metadata to ensure MMDS is accessible. token = generate_mmds_session_token(vm.ssh, IPV4_ADDRESS, token_ttl=60) cmd = generate_mmds_get_request(IPV4_ADDRESS, token=token) _, stdout, _ = vm.ssh.run(cmd) assert json.loads(stdout) == data_store # Copy snapshot files to be published to S3 for the 2nd part of the test # Create snapshot artifacts directory specific for the kernel version used. snapshot = vm.snapshot_full() snapshot_artifacts_dir.mkdir(parents=True) snapshot.save_to(snapshot_artifacts_dir) ================================================ FILE: tests/integration_tests/functional/test_snapshot_restore_cross_kernel.py ================================================ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Test to restore snapshots across kernel versions.""" import json import logging import platform from pathlib import Path import pytest from framework.defs import FC_WORKSPACE_DIR from framework.utils import ( generate_mmds_get_request, generate_mmds_session_token, guest_run_fio_iteration, populate_data_store, ) from framework.utils_cpu_templates import get_supported_cpu_templates from framework.utils_vsock import check_vsock_device from integration_tests.functional.test_balloon import ( get_stable_rss_mem, make_guest_dirty_memory, ) pytestmark = pytest.mark.nonci def _test_balloon(microvm): # Check memory usage. first_reading = get_stable_rss_mem(microvm) # Dirty 300MB of pages. make_guest_dirty_memory(microvm.ssh, amount_mib=300) # Check memory usage again. second_reading = get_stable_rss_mem(microvm) assert second_reading > first_reading # Inflate the balloon. Get back 200MB. microvm.api.balloon.patch(amount_mib=200) third_reading = get_stable_rss_mem(microvm) # Ensure that there is a reduction in RSS. assert second_reading > third_reading def _test_mmds(vm, mmds_net_iface): # Populate MMDS. data_store = {"latest": {"meta-data": {"ami-id": "ami-12345678"}}} populate_data_store(vm, data_store) mmds_ipv4_address = "169.254.169.254" vm.guest_ip = mmds_net_iface.guest_ip # Insert new rule into the routing table of the guest. cmd = "ip route add {} dev {}".format( mmds_net_iface.guest_ip, mmds_net_iface.dev_name ) vm.ssh.check_output(cmd) # The base microVM had MMDS version 2 configured, which was persisted # across the snapshot-restore. token = generate_mmds_session_token(vm.ssh, mmds_ipv4_address, token_ttl=60) cmd = generate_mmds_get_request(mmds_ipv4_address, token=token) _, stdout, _ = vm.ssh.run(cmd) assert json.loads(stdout) == data_store def get_snapshot_dirs(): """Get all the snapshot directories""" snapshot_root_name = "snapshot_artifacts" snapshot_root_dir = Path(FC_WORKSPACE_DIR) / snapshot_root_name cpu_templates = [] if platform.machine() == "x86_64": cpu_templates = ["None"] cpu_templates += get_supported_cpu_templates() for cpu_template in cpu_templates: for snapshot_dir in snapshot_root_dir.glob(f"*_{cpu_template}_guest_snapshot"): assert snapshot_dir.is_dir() yield pytest.param(snapshot_dir, id=snapshot_dir.name) @pytest.mark.timeout(600) @pytest.mark.parametrize("snapshot_dir", get_snapshot_dirs()) def test_snap_restore_from_artifacts( microvm_factory, bin_vsock_path, test_fc_session_root_path, snapshot_dir ): """ Restore from snapshots obtained with all supported guest kernel versions. The snapshot artifacts have been generated through the `create_snapshot_artifacts` devtool command. The base microVM snapshotted has been built from the config file at ~/firecracker/tools/create_snapshot_artifact/complex_vm_config.json. """ logger = logging.getLogger("cross_kernel_snapshot_restore") # Iterate through all subdirectories based on CPU template # in the snapshot root dir. logger.info("Working with snapshot artifacts in %s.", snapshot_dir) vm = microvm_factory.build() vm.time_api_requests = False vm.spawn() logger.info("Loading microVM from snapshot...") vm.restore_from_path(snapshot_dir) vm.resume() # Ensure microVM is running. assert vm.state == "Running" # Test that net devices have connectivity after restore. for idx, iface in enumerate(vm.iface.values()): logger.info("Testing net device %s...", iface["iface"].dev_name) vm.ssh_iface(idx).check_output("true") logger.info("Testing data store behavior...") _test_mmds(vm, vm.iface["eth3"]["iface"]) logger.info("Testing balloon device...") _test_balloon(vm) logger.info("Testing vsock device...") check_vsock_device(vm, bin_vsock_path, test_fc_session_root_path, vm.ssh) # Run fio on the guest. # TODO: check the result of FIO or use fsck to check that the root device is # not corrupted. No obvious errors will be returned here. guest_run_fio_iteration(vm.ssh, 0) vm.kill() ================================================ FILE: tests/integration_tests/functional/test_topology.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for ensuring correctness of CPU and cache topology in the guest.""" import platform import subprocess import pytest from packaging import version import framework.utils_cpuid as utils from framework.properties import global_props from framework.utils import get_kernel_version TOPOLOGY_STR = {1: "0", 2: "0,1", 16: "0-15"} PLATFORM = platform.machine() def _check_cpu_topology( test_microvm, expected_cpu_count, expected_threads_per_core, expected_cpus_list ): expected_lscpu_output = {} if PLATFORM == "x86_64": expected_lscpu_output = { "CPU(s)": str(expected_cpu_count), "On-line CPU(s) list": expected_cpus_list, "Thread(s) per core": str(expected_threads_per_core), "Core(s) per socket": str( int(expected_cpu_count / expected_threads_per_core) ), "Socket(s)": "1", "NUMA node(s)": "1", } else: expected_lscpu_output = { "CPU(s)": str(expected_cpu_count), "On-line CPU(s) list": expected_cpus_list, "Thread(s) per core": "1", "Core(s) per cluster": str( int(expected_cpu_count / expected_threads_per_core) ), "Cluster(s)": "1", "NUMA node(s)": "1", } utils.check_guest_cpuid_output( test_microvm, "lscpu", None, ":", expected_lscpu_output ) if PLATFORM == "x86_64": expected_hwloc_output = { "depth 0": "1 Machine (type #0)", "depth 1": "1 Package (type #1)", "depth 2": "1 L3Cache (type #6)", "depth 3": f"{int(expected_cpu_count / expected_threads_per_core)} L2Cache (type #5)", "depth 4": f"{int(expected_cpu_count / expected_threads_per_core)} L1dCache (type #4)", "depth 5": f"{int(expected_cpu_count / expected_threads_per_core)} L1iCache (type #9)", "depth 6": f"{int(expected_cpu_count / expected_threads_per_core)} Core (type #2)", "depth 7": f"{expected_cpu_count} PU (type #3)", } else: expected_hwloc_output = { "depth 0": "1 Machine (type #0)", "depth 1": "1 Package (type #1)", "depth 2": "1 L3Cache (type #6)", "depth 3": f"{expected_cpu_count} L2Cache (type #5)", "depth 4": f"{expected_cpu_count} L1dCache (type #4)", "depth 5": f"{expected_cpu_count} L1iCache (type #9)", "depth 6": f"{expected_cpu_count} Core (type #2)", "depth 7": f"{expected_cpu_count} PU (type #3)", } utils.check_guest_cpuid_output( test_microvm, "hwloc-info", None, ":", expected_hwloc_output ) def _check_cache_topology_x86( test_microvm, num_vcpus_on_lvl_1_cache, num_vcpus_on_lvl_3_cache ): vm = test_microvm expected_lvl_1_str = "{} ({})".format( hex(num_vcpus_on_lvl_1_cache), num_vcpus_on_lvl_1_cache ) expected_lvl_3_str = "{} ({})".format( hex(num_vcpus_on_lvl_3_cache), num_vcpus_on_lvl_3_cache ) cpu_vendor = utils.get_cpu_vendor() expected_level_1_topology = expected_level_3_topology = None if cpu_vendor == utils.CpuVendor.AMD: key_share = "extra cores sharing this cache" expected_level_1_topology = { "level": "0x1 (1)", key_share: expected_lvl_1_str, } expected_level_3_topology = { "level": "0x3 (3)", key_share: expected_lvl_3_str, } elif cpu_vendor == utils.CpuVendor.INTEL: key_share = "maximum IDs for CPUs sharing cache" expected_level_1_topology = { "cache level": "0x1 (1)", key_share: expected_lvl_1_str, } expected_level_3_topology = { "cache level": "0x3 (3)", key_share: expected_lvl_3_str, } utils.check_guest_cpuid_output( vm, "cpuid -1", "--- cache 0 ---", "=", expected_level_1_topology ) utils.check_guest_cpuid_output( vm, "cpuid -1", "--- cache 1 ---", "=", expected_level_1_topology ) utils.check_guest_cpuid_output( vm, "cpuid -1", "--- cache 2 ---", "=", expected_level_1_topology ) utils.check_guest_cpuid_output( vm, "cpuid -1", "--- cache 3 ---", "=", expected_level_3_topology ) def _aarch64_parse_cache_info(test_microvm, no_cpus): def parse_cache_info(info: str): "One line looks like this: /sys/devices/system/cpu/cpuX/cache/{index}/{name}:{value}" cache_info = [] for line in info.splitlines(): parts = line.split("/") index = int(parts[-2][-1]) name, value = parts[-1].split(":") if len(cache_info) == index: cache_info.append({}) cache_info[index][name] = value return cache_info # We will check the cache topology by looking at what each cpu # contains as far as cache info. # For that we are iterating through the hierarchy of folders inside: # /sys/devices/system/cpu/cpuX/cache/indexY/type - the type of the cache # (i.e Instruction, Data, Unified) # /sys/devices/system/cpu/cpuX/cache/indexY/size - size of the cache # /sys/devices/system/cpu/cpuX/cache/indexY/level - L1, L2 or L3 cache. fields = ["level", "type", "size", "coherency_line_size", "number_of_sets"] cmd = f"grep . /sys/devices/system/cpu/cpu{{0..{no_cpus-1}}}/cache/index*/{{{','.join(fields)}}} |sort" _, guest_stdout, guest_stderr = test_microvm.ssh.run(cmd) assert guest_stderr == "" host_result = subprocess.run( cmd, shell=True, executable="/bin/bash", capture_output=True, check=True, encoding="ascii", ) assert host_result.stderr == "" host_stdout = host_result.stdout guest_cache_info = parse_cache_info(guest_stdout) host_cache_info = parse_cache_info(host_stdout) return guest_cache_info, host_cache_info def _check_cache_topology_arm(test_microvm, no_cpus, kernel_version_tpl): guest_cache_info, host_cache_info = _aarch64_parse_cache_info(test_microvm, no_cpus) # Starting from 6.3 kernel cache representation for aarch64 platform has changed. # It is no longer equivalent to the host cache representation. # The main change is in the level 1 cache, so for newer kernels we # compare only level 2 and level 3 caches if kernel_version_tpl < (6, 3): assert guest_cache_info == host_cache_info else: guest_first_non_level_1 = 0 while guest_cache_info[guest_first_non_level_1]["level"] == "1": guest_first_non_level_1 += 1 guest_slice = guest_cache_info[guest_first_non_level_1:] host_first_non_level_1 = 0 while host_cache_info[host_first_non_level_1]["level"] == "1": host_first_non_level_1 += 1 host_slice = host_cache_info[host_first_non_level_1:] assert guest_slice == host_slice @pytest.mark.parametrize("num_vcpus", [1, 2, 16]) @pytest.mark.parametrize("htt", [True, False], ids=["HTT_ON", "HTT_OFF"]) def test_cpu_topology(uvm_plain_any, num_vcpus, htt): """ Check the CPU topology for a microvm with the specified config. """ if htt and PLATFORM == "aarch64": pytest.skip("SMT is configurable only on x86.") # TODO:Remove (or adapt) this once we unify the way we expose the CPU cache hierarchy on # Aarch64 systems. if version.parse(get_kernel_version()) >= version.parse("6.14"): pytest.skip("Starting on 6.14 KVM exposes a different CPU cache hierarchy") vm = uvm_plain_any vm.spawn() vm.basic_config(vcpu_count=num_vcpus, smt=htt) vm.add_net_iface() vm.start() _check_cpu_topology( vm, num_vcpus, 2 if htt and num_vcpus > 1 else 1, TOPOLOGY_STR[num_vcpus] ) @pytest.mark.parametrize("num_vcpus", [1, 2, 16]) @pytest.mark.parametrize("htt", [True, False], ids=["HTT_ON", "HTT_OFF"]) def test_cache_topology(uvm_plain_any, num_vcpus, htt): """ Check the cache topology for a microvm with the specified config. """ if htt and PLATFORM == "aarch64": pytest.skip("SMT is configurable only on x86.") vm = uvm_plain_any vm.spawn() vm.basic_config(vcpu_count=num_vcpus, smt=htt) vm.add_net_iface() vm.start() if PLATFORM == "x86_64": _check_cache_topology_x86(vm, 1 if htt and num_vcpus > 1 else 0, num_vcpus - 1) elif PLATFORM == "aarch64": _check_cache_topology_arm(vm, num_vcpus, global_props.host_linux_version_tpl) else: raise Exception("This test is not run on this platform!") ================================================ FILE: tests/integration_tests/functional/test_uffd.py ================================================ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Test UFFD related functionality when resuming from snapshot.""" import os import re import pytest import requests from framework.utils import Timeout, check_output @pytest.fixture(scope="function", name="snapshot") def snapshot_fxt(microvm_factory, guest_kernel_linux_5_10, rootfs): """Create a snapshot of a microVM.""" basevm = microvm_factory.build(guest_kernel_linux_5_10, rootfs) basevm.spawn() basevm.basic_config(vcpu_count=2, mem_size_mib=256) basevm.add_net_iface() # Add a memory balloon. basevm.api.balloon.put( amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0 ) basevm.start() # Create base snapshot. snapshot = basevm.snapshot_full() basevm.kill() yield snapshot def test_bad_socket_path(uvm_plain, snapshot): """ Test error scenario when socket path does not exist. """ vm = uvm_plain vm.spawn() jailed_vmstate = vm.create_jailed_resource(snapshot.vmstate) expected_msg = re.escape( "Load snapshot error: Failed to restore from snapshot: Failed to load guest " "memory: Error creating guest memory from uffd: Failed to connect to UDS Unix stream: No " "such file or directory (os error 2)" ) with pytest.raises(RuntimeError, match=expected_msg): vm.api.snapshot_load.put( mem_backend={"backend_type": "Uffd", "backend_path": "inexistent"}, snapshot_path=jailed_vmstate, ) vm.mark_killed() def test_unbinded_socket(uvm_plain, snapshot): """ Test error scenario when PF handler has not yet called bind on socket. """ vm = uvm_plain vm.spawn() jailed_vmstate = vm.create_jailed_resource(snapshot.vmstate) socket_path = os.path.join(vm.path, "firecracker-uffd.sock") check_output("touch {}".format(socket_path)) jailed_sock_path = vm.create_jailed_resource(socket_path) expected_msg = re.escape( "Load snapshot error: Failed to restore from snapshot: Failed to load guest " "memory: Error creating guest memory from uffd: Failed to connect to UDS Unix stream: " "Connection refused (os error 111)" ) with pytest.raises(RuntimeError, match=expected_msg): vm.api.snapshot_load.put( mem_backend={"backend_type": "Uffd", "backend_path": jailed_sock_path}, snapshot_path=jailed_vmstate, ) vm.mark_killed() def test_valid_handler(uvm_plain, snapshot): """ Test valid uffd handler scenario. """ vm = uvm_plain vm.memory_monitor = None vm.spawn() vm.restore_from_snapshot(snapshot, resume=True, uffd_handler_name="on_demand") # Inflate balloon. vm.api.balloon.patch(amount_mib=200) # Verify if the restored guest works. vm.ssh.check_output("true") # Deflate balloon. vm.api.balloon.patch(amount_mib=0) # Verify if the restored guest works. vm.ssh.check_output("true") def test_malicious_handler(uvm_plain, snapshot): """ Test malicious uffd handler scenario. The page fault handler panics when receiving a page fault, so no events are handled and snapshot memory regions cannot be loaded into memory. In this case, Firecracker is designed to freeze, instead of silently switching to having the kernel handle page faults, so that it becomes obvious that something went wrong. """ vm = uvm_plain vm.memory_monitor = None vm.spawn() # We expect Firecracker to freeze while resuming from a snapshot # due to the malicious handler's unavailability. try: with Timeout(seconds=30): vm.restore_from_snapshot( snapshot, resume=True, uffd_handler_name="malicious" ) assert False, "Firecracker should freeze" except (TimeoutError, requests.exceptions.ReadTimeout): vm.uffd_handler.mark_killed() ================================================ FILE: tests/integration_tests/functional/test_vmclock.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Test VMclock device emulation""" import pytest @pytest.fixture(scope="function") def vm_with_vmclock(uvm_plain_acpi, bin_vmclock_path): """Create a VM with VMclock support and the `vmclock` test binary under `/tmp/vmclock`""" basevm = uvm_plain_acpi basevm.spawn() basevm.basic_config() basevm.add_net_iface() basevm.start() basevm.ssh.scp_put(bin_vmclock_path, "/tmp/vmclock") yield basevm def parse_vmclock(vm, use_mmap=False): """Parse the VMclock struct inside the guest and return a dictionary with its fields""" cmd = "/tmp/vmclock -m" if use_mmap else "/tmp/vmclock -r" _, stdout, _ = vm.ssh.check_output(cmd) fields = stdout.strip().split("\n") if use_mmap: assert fields[0] == "Reading VMClock with mmap()" else: assert fields[0] == "Reading VMClock with read()" return dict(item.split(": ") for item in fields if item.startswith("VMCLOCK")) def parse_vmclock_from_poll(vm, expected_notifications): """Parse the output of the 'vmclock -p' command in the guest""" _, stdout, _ = vm.ssh.check_output("cat /tmp/vmclock.out") fields = stdout.strip().split("\n") nr_notifications = 0 for line in fields: if line == "Got VMClock notification": nr_notifications += 1 assert nr_notifications == expected_notifications return dict(item.split(": ") for item in fields if item.startswith("VMCLOCK")) @pytest.mark.parametrize("use_mmap", [False, True], ids=["read()", "mmap()"]) def test_vmclock_read_fields(vm_with_vmclock, use_mmap): """Make sure that we expose the expected values in the VMclock struct""" vm = vm_with_vmclock vmclock = parse_vmclock(vm, use_mmap) assert vmclock["VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT"] == "true" assert vmclock["VMCLOCK_FLAG_NOTIFICATION_PRESENT"] == "true" assert vmclock["VMCLOCK_MAGIC"] == "0x4b4c4356" assert vmclock["VMCLOCK_SIZE"] == "0x1000" assert vmclock["VMCLOCK_VERSION"] == "1" assert vmclock["VMCLOCK_CLOCK_STATUS"] == "0" assert vmclock["VMCLOCK_COUNTER_ID"] == "255" assert vmclock["VMCLOCK_DISRUPTION_MARKER"] == "0" assert vmclock["VMCLOCK_VM_GENERATION_COUNTER"] == "0" @pytest.mark.parametrize("use_mmap", [False, True], ids=["read()", "mmap()"]) def test_snapshot_update(vm_with_vmclock, microvm_factory, snapshot_type, use_mmap): """Test that `disruption_marker` and `vm_generation_counter` are updated upon snapshot resume""" basevm = vm_with_vmclock vmclock = parse_vmclock(basevm, use_mmap) assert vmclock["VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT"] == "true" assert vmclock["VMCLOCK_FLAG_NOTIFICATION_PRESENT"] == "true" assert vmclock["VMCLOCK_DISRUPTION_MARKER"] == "0" assert vmclock["VMCLOCK_VM_GENERATION_COUNTER"] == "0" snapshot = basevm.make_snapshot(snapshot_type) basevm.kill() for i, vm in enumerate( microvm_factory.build_n_from_snapshot(snapshot, 5, incremental=True) ): vmclock = parse_vmclock(vm, use_mmap) assert vmclock["VMCLOCK_DISRUPTION_MARKER"] == f"{i+1}" assert vmclock["VMCLOCK_VM_GENERATION_COUNTER"] == f"{i+1}" def test_vmclock_notifications(vm_with_vmclock, microvm_factory, snapshot_type): """Test that Firecracker will send a notification on snapshot load""" basevm = vm_with_vmclock # Launch vmclock utility in polling mode basevm.ssh.check_output("/tmp/vmclock -p > /tmp/vmclock.out 2>&1 &") # We should not have received any notification yet vmclock = parse_vmclock_from_poll(basevm, 0) assert vmclock["VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT"] == "true" assert vmclock["VMCLOCK_FLAG_NOTIFICATION_PRESENT"] == "true" assert vmclock["VMCLOCK_DISRUPTION_MARKER"] == "0" assert vmclock["VMCLOCK_VM_GENERATION_COUNTER"] == "0" snapshot = basevm.make_snapshot(snapshot_type) basevm.kill() for i, vm in enumerate( microvm_factory.build_n_from_snapshot(snapshot, 5, incremental=True) ): vmclock = parse_vmclock_from_poll(vm, i + 1) assert vmclock["VMCLOCK_DISRUPTION_MARKER"] == f"{i+1}" assert vmclock["VMCLOCK_VM_GENERATION_COUNTER"] == f"{i+1}" ================================================ FILE: tests/integration_tests/functional/test_vsock.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for the virtio-vsock device. In order to test the vsock device connection state machine, these tests will: - Generate a 20MiB random data blob; - Use `socat` to start a listening echo server inside the guest VM; - Run 50, concurrent, host-initiated connections, each transfering the random blob to and from the guest echo server; - For every connection, check that the data received back from the echo server hashes to the same value as the data sent; - Start a host echo server, and repeat the process for the same number of guest-initiated connections. """ import os.path import subprocess import time from pathlib import Path from socket import timeout as SocketTimeout import pytest from framework.utils_vsock import ( ECHO_SERVER_PORT, VSOCK_UDS_PATH, HostEchoWorker, _copy_vsock_data_to_guest, check_guest_connections, check_host_connections, check_vsock_device, make_blob, make_host_port_path, start_guest_echo_server, ) from host_tools.fcmetrics import validate_fc_metrics NEGATIVE_TEST_CONNECTION_COUNT = 100 TEST_WORKER_COUNT = 10 def test_vsock(uvm_plain_any, bin_vsock_path, test_fc_session_root_path): """ Test guest and host vsock initiated connections. Check the module docstring for details on the setup. """ vm = uvm_plain_any vm.spawn() vm.basic_config() vm.add_net_iface() vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path=f"/{VSOCK_UDS_PATH}") vm.start() check_vsock_device(vm, bin_vsock_path, test_fc_session_root_path, vm.ssh) metrics = vm.flush_metrics() validate_fc_metrics(metrics) def negative_test_host_connections(vm, blob_path, blob_hash): """Negative test for host-initiated connections. This will start a daemonized echo server on the guest VM, and then spawn `NEGATIVE_TEST_CONNECTION_COUNT` `HostEchoWorker` threads. Closes the UDS sockets while data is in flight. """ uds_path = start_guest_echo_server(vm) workers = [] for _ in range(NEGATIVE_TEST_CONNECTION_COUNT): worker = HostEchoWorker(uds_path, blob_path) workers.append(worker) worker.start() for wrk in workers: wrk.close_uds() wrk.join() # Validate that guest is still up and running. # Should fail if Firecracker exited from SIGPIPE handler. metrics = vm.flush_metrics() validate_fc_metrics(metrics) # Validate that at least 1 `SIGPIPE` signal was received. # Since we are reusing the existing echo server which triggers # reads/writes on the UDS backend connections, these might be closed # before a read() or a write() is about to be performed by the emulation. # The test uses 100 connections it is enough to close at least one # before write(). # # If this ever fails due to 100 closes before read() we must # add extra tooling that will trigger only writes(). assert metrics["signals"]["sigpipe"] > 0 # Validate vsock emulation still accepts connections and works # as expected. Use the default blob size to speed up the test. blob_path, blob_hash = make_blob(os.path.dirname(blob_path)) check_host_connections(uds_path, blob_path, blob_hash) metrics = vm.flush_metrics() validate_fc_metrics(metrics) def test_vsock_epipe(uvm_plain_any, bin_vsock_path, test_fc_session_root_path): """ Vsock negative test to validate SIGPIPE/EPIPE handling. """ vm = uvm_plain_any vm.spawn() vm.basic_config() vm.add_net_iface() vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path=f"/{VSOCK_UDS_PATH}") vm.start() # Generate the random data blob file, 20MB blob_path, blob_hash = make_blob(test_fc_session_root_path, 20 * 2**20) vm_blob_path = "/tmp/vsock/test.blob" # Set up a tmpfs drive on the guest, so we can copy the blob there. # Guest-initiated connections (echo workers) will use this blob. _copy_vsock_data_to_guest(vm.ssh, blob_path, vm_blob_path, bin_vsock_path) # Negative test for host-initiated connections that # are closed with in flight data. negative_test_host_connections(vm, blob_path, blob_hash) metrics = vm.flush_metrics() validate_fc_metrics(metrics) def test_vsock_transport_reset_h2g( uvm_plain_any, microvm_factory, bin_vsock_path, test_fc_session_root_path ): """ Vsock transport reset test. Steps: 1. Start echo server on the guest 2. Start host workers that ping-pong data between guest and host, without closing any of them 3. Pause VM -> Create snapshot -> Resume VM 4. Check that worker sockets no longer work by setting a timeout so the sockets won't block and do a recv operation. 5. If the recv operation timeouts, the connection was closed. Else, the connection was not closed and the test fails. 6. Close VM -> Load VM from Snapshot -> check that vsock device is still working. """ test_vm = uvm_plain_any test_vm.spawn() test_vm.basic_config(vcpu_count=2, mem_size_mib=256) test_vm.add_net_iface() test_vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path=f"/{VSOCK_UDS_PATH}") test_vm.start() # Generate the random data blob file. blob_path, blob_hash = make_blob(test_fc_session_root_path) vm_blob_path = "/tmp/vsock/test.blob" # Set up a tmpfs drive on the guest, so we can copy the blob there. # Guest-initiated connections (echo workers) will use this blob. _copy_vsock_data_to_guest(test_vm.ssh, blob_path, vm_blob_path, bin_vsock_path) # Start guest echo server. path = start_guest_echo_server(test_vm) # Start host workers that connect to the guest server. workers = [] for _ in range(TEST_WORKER_COUNT): worker = HostEchoWorker(path, blob_path) workers.append(worker) worker.start() for wrk in workers: wrk.join() # Create snapshot. snapshot = test_vm.snapshot_full() test_vm.resume() # Check that sockets are no longer working on workers. for worker in workers: # Whatever we send to the server, it should return the same # value. buf = bytearray("TEST\n".encode("utf-8")) try: worker.sock.send(buf) # Arbitrary timeout, we set this so the socket won't block as # it shouldn't receive anything. worker.sock.settimeout(0.25) response = worker.sock.recv(32) assert ( response == b"" ), f"Connection not closed: response received '{response.decode('utf-8')}'" except (SocketTimeout, ConnectionResetError, BrokenPipeError): pass # Terminate VM. metrics = test_vm.flush_metrics() validate_fc_metrics(metrics) test_vm.kill() # Load snapshot. vm2 = microvm_factory.build_from_snapshot(snapshot) # Check that vsock device still works. # Test guest-initiated connections. path = os.path.join(vm2.path, make_host_port_path(VSOCK_UDS_PATH, ECHO_SERVER_PORT)) check_guest_connections(vm2, path, vm_blob_path, blob_hash) # Test host-initiated connections. path = os.path.join(vm2.jailer.chroot_path(), VSOCK_UDS_PATH) check_host_connections(path, blob_path, blob_hash) metrics = vm2.flush_metrics() validate_fc_metrics(metrics) def test_vsock_transport_reset_g2h(uvm_plain_any, microvm_factory): """ Vsock transport reset test. """ test_vm = uvm_plain_any test_vm.spawn() test_vm.basic_config(vcpu_count=2, mem_size_mib=256) test_vm.add_net_iface() test_vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path=f"/{VSOCK_UDS_PATH}") test_vm.start() # Create snapshot and terminate a VM. snapshot = test_vm.snapshot_full() test_vm.kill() for _ in range(5): # Load snapshot. new_vm = microvm_factory.build_from_snapshot(snapshot) # After snap restore all vsock connections should be # dropped. This means guest socat should exit same way # as it did after snapshot was taken. code, _, _ = new_vm.ssh.run("pidof socat") assert code == 1 host_socket_path = os.path.join( new_vm.path, f"{VSOCK_UDS_PATH}_{ECHO_SERVER_PORT}" ) host_socat_commmand = [ "socat", "-dddd", f"UNIX-LISTEN:{host_socket_path},fork", "STDOUT", ] host_socat = subprocess.Popen( host_socat_commmand, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) # Give some time for host socat to create socket time.sleep(0.5) assert Path(host_socket_path).exists() new_vm.create_jailed_resource(host_socket_path) # Create a socat process in the guest which will connect to the host socat guest_socat_commmand = ( f"tmux new -d 'socat - vsock-connect:2:{ECHO_SERVER_PORT}'" ) new_vm.ssh.run(guest_socat_commmand) # socat should be running in the guest now code, _, _ = new_vm.ssh.run("pidof socat") assert code == 0 # Create snapshot. snapshot = new_vm.snapshot_full() new_vm.resume() # After `create_snapshot` + 'restore' calls, connection should be dropped code, _, _ = new_vm.ssh.run("pidof socat") assert code == 1 # Kill host socat as it is not useful anymore host_socat.kill() host_socat.communicate() # Terminate VM. new_vm.kill() def test_vsock_after_override( uvm_plain_any, microvm_factory, bin_vsock_path, test_fc_session_root_path ): """ Test that the Vsock device works correctly after overriding the host UDS path on snapshot restore. """ initial_uds_path = VSOCK_UDS_PATH overridden_uds_path = f"{VSOCK_UDS_PATH}2" test_vm = uvm_plain_any test_vm.spawn() test_vm.basic_config(vcpu_count=2, mem_size_mib=256) test_vm.add_net_iface() test_vm.api.vsock.put( vsock_id="vsock0", guest_cid=3, uds_path=f"/{initial_uds_path}" ) test_vm.start() # Generate the random data blob file. blob_path, blob_hash = make_blob(test_fc_session_root_path) vm_blob_path = "/tmp/vsock/test.blob" # Set up a tmpfs drive on the guest, so we can copy the blob there. # Guest-initiated connections (echo workers) will use this blob. _copy_vsock_data_to_guest(test_vm.ssh, blob_path, vm_blob_path, bin_vsock_path) # Start guest echo server. start_guest_echo_server(test_vm) # Create snapshot and terminate a VM. snapshot = test_vm.snapshot_full() test_vm.kill() vm2 = microvm_factory.build() vm2.spawn() vm2.restore_from_snapshot(snapshot, vsock_override=overridden_uds_path, resume=True) # Check that vsock device still works. # Test guest-initiated connections. path = os.path.join( vm2.path, make_host_port_path(overridden_uds_path, ECHO_SERVER_PORT) ) check_guest_connections(vm2, path, vm_blob_path, blob_hash) # Test host-initiated connections. path = os.path.join(vm2.jailer.chroot_path(), overridden_uds_path) check_host_connections(path, blob_path, blob_hash) metrics = vm2.flush_metrics() validate_fc_metrics(metrics) def test_vsock_override_fails_without_device(uvm_plain_any, microvm_factory): """ Providing an override should fail if there is no vsock device. """ overridden_uds_path = f"{VSOCK_UDS_PATH}2" test_vm = uvm_plain_any test_vm.spawn() test_vm.basic_config(vcpu_count=2, mem_size_mib=256) test_vm.start() snapshot = test_vm.snapshot_full() test_vm.kill() vm2 = microvm_factory.build() vm2.spawn() # The failed snapshot load causes Firecracker to exit. with pytest.raises(RuntimeError, match="Unknown Vsock Device"): vm2.restore_from_snapshot( snapshot, vsock_override=overridden_uds_path, resume=True ) vm2.mark_killed() ================================================ FILE: tests/integration_tests/performance/__init__.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: tests/integration_tests/performance/test_balloon.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for guest-side operations on /balloon resources.""" import concurrent import signal import time import pytest from framework.microvm import HugePagesConfig from framework.utils import ( get_stable_rss_mem, supports_hugetlbfs_discard, track_cpu_utilization, ) NS_IN_MSEC = 1_000_000 def trigger_page_fault_run(vm): """ Clears old data and starts the fast_page_fault_helper script """ vm.ssh.check_output( "rm -f /tmp/fast_page_fault_helper.out && /usr/local/bin/fast_page_fault_helper -s" ) def get_page_fault_duration(vm): """ Waits for the performance data to be available and will read the duration """ _, duration, _ = vm.ssh.check_output( "while [ ! -f /tmp/fast_page_fault_helper.out ]; do sleep 1; done; cat /tmp/fast_page_fault_helper.out" ) return duration @pytest.mark.parametrize("method", ["reporting", "hinting"]) @pytest.mark.nonci def test_hinting_reporting_cpu( microvm_factory, guest_kernel_linux_6_1, rootfs, method, metrics, huge_pages, ): """ Measure the CPU usage when running free page reporting and hinting """ test_microvm = microvm_factory.build( guest_kernel_linux_6_1, rootfs, pci=True, monitor_memory=False, ) test_microvm.spawn(emit_metrics=False) test_microvm.basic_config(vcpu_count=2, mem_size_mib=1024, huge_pages=huge_pages) test_microvm.add_net_iface() free_page_reporting = method == "reporting" free_page_hinting = method == "hinting" # Add a deflated memory balloon. test_microvm.api.balloon.put( amount_mib=0, deflate_on_oom=False, stats_polling_interval_s=0, free_page_reporting=free_page_reporting, free_page_hinting=free_page_hinting, ) test_microvm.start() test_microvm.pin_threads(0) metrics.set_dimensions( { "performance_test": "test_balloon_cpu", # "huge_pages": str(huge_pages), "method": method, "huge_pages": str(huge_pages), **test_microvm.dimensions, } ) test_microvm.ssh.check_output( "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 0 and (i + 1 < runs): time.sleep(sleep_duration) # pylint: disable=C0103 @pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"]) def test_size_reduction(uvm_plain_any, method, huge_pages): """ Verify that ballooning reduces RSS usage on a newly booted guest. """ traditional_balloon = method == "traditional" free_page_reporting = method == "reporting" free_page_hinting = method == "hinting" if huge_pages != HugePagesConfig.NONE: if not supports_hugetlbfs_discard(): pytest.skip("Host does not support hugetlb discard") if traditional_balloon: pytest.skip("Traditional balloon device won't reduce RSS") test_microvm = uvm_plain_any test_microvm.spawn() test_microvm.basic_config(huge_pages=huge_pages) test_microvm.add_net_iface() # Add a memory balloon. test_microvm.api.balloon.put( amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=0, free_page_reporting=free_page_reporting, free_page_hinting=free_page_hinting, ) # Start the microvm. test_microvm.start() get_stable_rss_mem(test_microvm) test_microvm.ssh.check_output( "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 /proc/sys/vm/drop_caches") time.sleep(2) # We take the initial reading of the RSS, then calculate the amount # we need to inflate the balloon with by subtracting it from the # VM size and adding an offset of 10 MiB in order to make sure we # get a lower reading than the initial one. inflate_size = 256 - int(first_reading / 1024) + 10 if traditional_balloon: # Now inflate the balloon test_microvm.api.balloon.patch(amount_mib=inflate_size) elif free_page_hinting: test_microvm.api.balloon_hinting_start.patch() _ = get_stable_rss_mem(test_microvm) if traditional_balloon: # Deflate the balloon completely. test_microvm.api.balloon.patch(amount_mib=0) # Check memory usage again. second_reading = get_stable_rss_mem(test_microvm) # There should be a reduction of at least 10MB. assert first_reading - second_reading >= 10000 ================================================ FILE: tests/integration_tests/performance/test_block.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Performance benchmark for block device emulation.""" import concurrent import os import pytest import framework.utils_fio as fio import host_tools.drive as drive_tools from framework.utils import check_output, track_cpu_utilization # size of the block device used in the test, in MB BLOCK_DEVICE_SIZE_MB = 2048 # Time (in seconds) for which fio "warms up" WARMUP_SEC = 10 # Time (in seconds) for which fio runs after warmup is done RUNTIME_SEC = 30 # VM guest memory size GUEST_MEM_MIB = 1024 def prepare_microvm_for_test(microvm): """Prepares the microvm for running a fio-based performance test by tweaking various performance related parameters.""" _, _, stderr = microvm.ssh.check_output( "echo 'none' > /sys/block/vdb/queue/scheduler" ) assert stderr == "" # First, flush all guest cached data to host, then drop guest FS caches. _, _, stderr = microvm.ssh.check_output("sync") assert stderr == "" _, _, stderr = microvm.ssh.check_output("echo 3 > /proc/sys/vm/drop_caches") assert stderr == "" # Then, flush all host cached data to hardware, also drop host FS caches. check_output("sync") check_output("echo 3 > /proc/sys/vm/drop_caches") def run_fio( microvm, mode: fio.Mode, block_size: int, test_output_dir, fio_engine: fio.Engine ): """Run a fio test in the specified mode with block size bs.""" cmd = fio.build_cmd( "/dev/vdb", BLOCK_DEVICE_SIZE_MB, block_size, mode, microvm.vcpus_count, fio_engine, RUNTIME_SEC, WARMUP_SEC, ) prepare_microvm_for_test(microvm) # Start the CPU load monitor. with concurrent.futures.ThreadPoolExecutor() as executor: cpu_load_future = executor.submit( track_cpu_utilization, microvm.firecracker_pid, RUNTIME_SEC, omit=WARMUP_SEC, ) # Print the fio command in the log and run it rc, _, stderr = microvm.ssh.run(f"cd /tmp; {cmd}") assert rc == 0, stderr assert stderr == "" microvm.ssh.scp_get("/tmp/fio.json", test_output_dir) microvm.ssh.scp_get("/tmp/*.log", test_output_dir) return cpu_load_future.result() def emit_fio_metrics(logs_dir, metrics): """Parses the fio logs in `logs_dir` and emits their contents as CloudWatch metrics""" bw_reads, bw_writes = fio.process_log_files(logs_dir, fio.LogType.BW) for tup in zip(*bw_reads): metrics.put_metric("bw_read", sum(tup), "Kilobytes/Second") for tup in zip(*bw_writes): metrics.put_metric("bw_write", sum(tup), "Kilobytes/Second") clat_reads, clat_writes = fio.process_log_files(logs_dir, fio.LogType.CLAT) # latency values in fio logs are in nanoseconds, but cloudwatch only supports # microseconds as the more granular unit, so need to divide by 1000. for tup in zip(*clat_reads): for value in tup: metrics.put_metric("clat_read", value / 1000, "Microseconds") for tup in zip(*clat_writes): for value in tup: metrics.put_metric("clat_write", value / 1000, "Microseconds") @pytest.mark.nonci @pytest.mark.parametrize("vcpus", [1, 2], ids=["1vcpu", "2vcpu"]) @pytest.mark.parametrize("fio_mode", [fio.Mode.RANDREAD, fio.Mode.RANDWRITE]) @pytest.mark.parametrize("fio_block_size", [4096], ids=["bs4096"]) @pytest.mark.parametrize("fio_engine", [fio.Engine.LIBAIO, fio.Engine.PSYNC]) def test_block_performance( uvm_plain_acpi, vcpus, fio_mode, fio_block_size, fio_engine, io_engine, metrics, results_dir, ): """ Execute block device emulation benchmarking scenarios. """ vm = uvm_plain_acpi vm.spawn(log_level="Info", emit_metrics=True) vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB) vm.add_net_iface() # Add a secondary block device for benchmark tests. fs = drive_tools.FilesystemFile( os.path.join(vm.fsfiles, "scratch"), BLOCK_DEVICE_SIZE_MB ) vm.add_drive("scratch", fs.path, io_engine=io_engine) vm.start() metrics.set_dimensions( { "performance_test": "test_block_performance", "io_engine": io_engine, "fio_mode": fio_mode, "fio_block_size": str(fio_block_size), "fio_engine": fio_engine, **vm.dimensions, } ) vm.pin_threads(0) cpu_util = run_fio(vm, fio_mode, fio_block_size, results_dir, fio_engine) emit_fio_metrics(results_dir, metrics) for thread_name, values in cpu_util.items(): for value in values: metrics.put_metric(f"cpu_utilization_{thread_name}", value, "Percent") @pytest.mark.nonci @pytest.mark.parametrize("vcpus", [1, 2], ids=["1vcpu", "2vcpu"]) @pytest.mark.parametrize("fio_mode", [fio.Mode.RANDREAD]) @pytest.mark.parametrize("fio_block_size", [4096], ids=["bs4096"]) def test_block_vhost_user_performance( uvm_plain_acpi, vcpus, fio_mode, fio_block_size, metrics, results_dir, ): """ Execute block device emulation benchmarking scenarios. """ vm = uvm_plain_acpi vm.spawn(log_level="Info", emit_metrics=True) vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB) vm.add_net_iface() # Add a secondary block device for benchmark tests. fs = drive_tools.FilesystemFile(size=BLOCK_DEVICE_SIZE_MB) vm.add_vhost_user_drive("scratch", fs.path) vm.start() metrics.set_dimensions( { "performance_test": "test_block_performance", "io_engine": "vhost-user", "fio_mode": fio_mode, "fio_block_size": str(fio_block_size), "fio_engine": "libaio", **vm.dimensions, } ) next_cpu = vm.pin_threads(0) vm.disks_vhost_user["scratch"].pin(next_cpu) cpu_util = run_fio(vm, fio_mode, fio_block_size, results_dir, fio.Engine.LIBAIO) emit_fio_metrics(results_dir, metrics) for thread_name, values in cpu_util.items(): for value in values: metrics.put_metric(f"cpu_utilization_{thread_name}", value, "Percent") ================================================ FILE: tests/integration_tests/performance/test_boottime.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests that ensure the boot time to init process is within spec.""" import datetime import re import time import pytest # Regex for obtaining boot time from some string. DEFAULT_BOOT_ARGS = ( "reboot=k panic=1 nomodule 8250.nr_uarts=0" " i8042.noaux i8042.nomux i8042.nopnp i8042.dumbkbd swiotlb=noforce" ) def get_boottime_device_info(vm): """Auxiliary function for asserting the expected boot time.""" boot_time_us = None boot_time_cpu_us = None timestamps = [] timestamp_log_regex = ( r"Guest-boot-time =\s+(\d+) us\s+(\d+) ms,\s+(\d+) CPU us\s+(\d+) CPU ms" ) iterations = 50 sleep_time_s = 0.1 for _ in range(iterations): timestamps = re.findall(timestamp_log_regex, vm.log_data) if timestamps: break time.sleep(sleep_time_s) if timestamps: boot_time_us, _, boot_time_cpu_us, _ = timestamps[0] assert boot_time_us and boot_time_cpu_us, ( f"MicroVM did not boot within {sleep_time_s * iterations}s\n" f"Firecracker logs:\n{vm.log_data}\n" f"Thread backtraces:\n{vm.thread_backtraces}" ) return int(boot_time_us), int(boot_time_cpu_us) def find_events(log_data): """ Parse events in the Firecracker logs Events have this format: TIMESTAMP [LOGLEVEL] event_(start|end): EVENT """ ts_fmt = "%Y-%m-%dT%H:%M:%S.%f" matches = re.findall(r"(.+) \[.+\] event_(start|end): (.*)", log_data) timestamps = {} for ts, when, what in matches: evt1 = timestamps.setdefault(what, {}) evt1[when] = datetime.datetime.strptime(ts[:-3], ts_fmt) for _, val in timestamps.items(): val["duration"] = val["end"] - val["start"] return timestamps def get_systemd_analyze_times(microvm): """ Parse systemd-analyze output """ rc, stdout, stderr = microvm.ssh.run("systemd-analyze") assert rc == 0, stderr assert stderr == "" boot_line = stdout.splitlines()[0] # The line will look like this: # Startup finished in 79ms (kernel) + 231ms (userspace) = 310ms # In the regex we capture the time and the unit for kernel, userspace and total values pattern = r"Startup finished in ([\d.]*)(ms|s)\s+\(kernel\) \+ ([\d.]*)(ms|s)\s+\(userspace\) = ([\d.]*)(ms|s)\s*" kernel, kernel_unit, userspace, userspace_unit, total, total_unit = re.findall( pattern, boot_line )[0] def to_ms(v, unit): match unit: case "ms": return float(v) case "s": return float(v) * 1000 kernel = to_ms(kernel, kernel_unit) userspace = to_ms(userspace, userspace_unit) total = to_ms(total, total_unit) return kernel, userspace, total def launch_vm_with_boot_timer( microvm_factory, guest_kernel_acpi, rootfs_rw, vcpu_count, mem_size_mib, pci_enabled, boot_from_pmem, ): """Launches a microVM with guest-timer and returns the reported metrics for it""" vm = microvm_factory.build( guest_kernel_acpi, rootfs_rw, pci=pci_enabled, monitor_memory=False ) vm.jailer.extra_args.update({"boot-timer": None}) vm.spawn() if not boot_from_pmem: vm.basic_config( vcpu_count=vcpu_count, mem_size_mib=mem_size_mib, boot_args=DEFAULT_BOOT_ARGS + " init=/usr/local/bin/init", enable_entropy_device=True, ) else: vm.basic_config( add_root_device=False, vcpu_count=vcpu_count, mem_size_mib=mem_size_mib, boot_args=DEFAULT_BOOT_ARGS + " init=/usr/local/bin/init rootflags=dax", enable_entropy_device=True, ) vm.add_pmem("pmem", rootfs_rw, True, True) vm.add_net_iface() vm.start() vm.pin_threads(0) boot_time_us, cpu_boot_time_us = get_boottime_device_info(vm) return (vm, boot_time_us, cpu_boot_time_us) def test_boot_timer(microvm_factory, guest_kernel_acpi, rootfs, pci_enabled): """Tests that the boot timer device works""" launch_vm_with_boot_timer( microvm_factory, guest_kernel_acpi, rootfs, 1, 128, pci_enabled, False ) @pytest.mark.parametrize( "vcpu_count,mem_size_mib", [(1, 128), (1, 1024), (2, 2048), (4, 4096)], ) @pytest.mark.parametrize("boot_from_pmem", [True, False], ids=["PmemBoot", "BlockBoot"]) @pytest.mark.nonci def test_boottime( microvm_factory, guest_kernel_acpi, rootfs_rw, vcpu_count, mem_size_mib, boot_from_pmem, pci_enabled, metrics, ): """Test boot time with different guest configurations""" for i in range(10): vm, boot_time_us, cpu_boot_time_us = launch_vm_with_boot_timer( microvm_factory, guest_kernel_acpi, rootfs_rw, vcpu_count, mem_size_mib, pci_enabled, boot_from_pmem, ) if i == 0: metrics.set_dimensions( { "performance_test": "test_boottime", "boot_from_pmem": str(boot_from_pmem), **vm.dimensions, } ) metrics.put_metric( "guest_boot_time", boot_time_us, unit="Microseconds", ) metrics.put_metric( "guest_cpu_boot_time", cpu_boot_time_us, unit="Microseconds", ) events = find_events(vm.log_data) build_time = events["build microvm for boot"]["duration"] metrics.put_metric("build_time", build_time.microseconds, unit="Microseconds") resume_time = events["boot microvm"]["duration"] metrics.put_metric("resume_time", resume_time.microseconds, unit="Microseconds") kernel, userspace, total = get_systemd_analyze_times(vm) metrics.put_metric("systemd_kernel", kernel, unit="Milliseconds") metrics.put_metric("systemd_userspace", userspace, unit="Milliseconds") metrics.put_metric("systemd_total", total, unit="Milliseconds") vm.kill() ================================================ FILE: tests/integration_tests/performance/test_drive_rate_limiter.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for checking the rate limiter on /drives resources.""" import json import os import host_tools.drive as drive_tools MB = 2**20 def check_iops_limit(ssh_connection, block_size, count, min_time, max_time): """Verify if the rate limiter throttles block iops using dd.""" byte_count = block_size * count fio = f"fio --name=fixed-job --direct=1 --rw=write --blocksize={block_size} --size={byte_count} --filename=/dev/vdb --zero_buffers --output-format=json" _, stdout, _ = ssh_connection.check_output(fio) data = json.loads(stdout) runtime_ms = data["jobs"][0]["write"]["runtime"] io_bytes = data["jobs"][0]["write"]["io_bytes"] # Check total read bytes. assert io_bytes == byte_count # Check duration. assert runtime_ms > min_time * 1000 assert runtime_ms < max_time * 1000 def test_patch_drive_limiter(uvm_plain): """ Test replacing the drive rate-limiter after guest boot works. """ test_microvm = uvm_plain test_microvm.spawn() # Set up the microVM with 2 vCPUs, 512 MiB of RAM, 1 network iface, a root # file system, and a scratch drive. test_microvm.basic_config(vcpu_count=2, mem_size_mib=512) test_microvm.add_net_iface() fs1 = drive_tools.FilesystemFile( os.path.join(test_microvm.fsfiles, "scratch"), size=512 ) test_microvm.api.drive.put( drive_id="scratch", path_on_host=test_microvm.create_jailed_resource(fs1.path), is_root_device=False, is_read_only=False, rate_limiter={ "bandwidth": {"size": 10 * MB, "refill_time": 100}, "ops": {"size": 100, "refill_time": 100}, }, ) test_microvm.start() # Validate IOPS stays within above configured limits. # For example, the below call will validate that reading 1000 blocks # of 512b will complete in at 0.8-1.2 seconds ('dd' is not very accurate, # so we target to stay within 30% error). check_iops_limit(test_microvm.ssh, 512, 1000, 0.7, 1.3) check_iops_limit(test_microvm.ssh, 4096, 1000, 0.7, 1.3) # Patch ratelimiter test_microvm.api.drive.patch( drive_id="scratch", rate_limiter={ "bandwidth": {"size": 100 * MB, "refill_time": 100}, "ops": {"size": 200, "refill_time": 100}, }, ) check_iops_limit(test_microvm.ssh, 512, 2000, 0.7, 1.3) check_iops_limit(test_microvm.ssh, 4096, 2000, 0.7, 1.3) # Patch ratelimiter test_microvm.api.drive.patch( drive_id="scratch", rate_limiter={"ops": {"size": 1000, "refill_time": 100}} ) check_iops_limit(test_microvm.ssh, 512, 10000, 0.7, 1.3) check_iops_limit(test_microvm.ssh, 4096, 10000, 0.7, 1.3) ================================================ FILE: tests/integration_tests/performance/test_hotplug_memory.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Tests for verifying the virtio-mem is working correctly This file also contains functional tests for virtio-mem because they need to be run on an ag=1 host due to the use of HugePages. """ import pytest from tenacity import Retrying, retry_if_exception_type, stop_after_delay, wait_fixed from framework.guest_stats import MeminfoGuest from framework.microvm import HugePagesConfig, SnapshotType from framework.properties import global_props from framework.utils import get_resident_memory, supports_hugetlbfs_discard MEMHP_BOOTARGS = "console=ttyS0 reboot=k panic=1 memhp_default_state=online_movable" DEFAULT_CONFIG = {"total_size_mib": 1024, "slot_size_mib": 128, "block_size_mib": 2} def uvm_booted_memhp( uvm, rootfs, _microvm_factory, vhost_user, memhp_config, huge_pages, _uffd_handler, snapshot_type, ): """Boots a VM with the given memory hotplugging config""" uvm.spawn() uvm.memory_monitor = None uvm_config = { "boot_args": MEMHP_BOOTARGS, "huge_pages": huge_pages, # we need enough memory to be able to hotplug up to 16GB "mem_size_mib": 512, } if vhost_user: # We need to setup ssh keys manually because we did not specify rootfs # in microvm_factory.build method ssh_key = rootfs.with_suffix(".id_rsa") uvm.ssh_key = ssh_key uvm.basic_config( **uvm_config, add_root_device=False, track_dirty_pages=( snapshot_type.needs_dirty_page_tracking if snapshot_type else False ), ) uvm.add_vhost_user_drive( "rootfs", rootfs, is_root_device=True, is_read_only=True ) else: uvm.basic_config(**uvm_config) uvm.api.memory_hotplug.put(**memhp_config) uvm.add_net_iface() uvm.start() return uvm def uvm_resumed_memhp( uvm_plain, rootfs, microvm_factory, vhost_user, memhp_config, huge_pages, uffd_handler, snapshot_type, ): """Restores a VM with the given memory hotplugging config after booting and snapshotting""" if vhost_user: pytest.skip("vhost-user doesn't support snapshot/restore") if huge_pages and huge_pages != HugePagesConfig.NONE and not uffd_handler: pytest.skip("Hugepages requires a UFFD handler") uvm = uvm_booted_memhp( uvm_plain, rootfs, microvm_factory, vhost_user, memhp_config, huge_pages, None, snapshot_type, ) snapshot = uvm.make_snapshot(snapshot_type) uvm2 = microvm_factory.build_from_snapshot(snapshot, uffd_handler_name=uffd_handler) uvm2.memory_monitor = None return uvm2 @pytest.fixture( params=[ (uvm_booted_memhp, False, HugePagesConfig.NONE, None, None), (uvm_booted_memhp, False, HugePagesConfig.HUGETLBFS_2MB, None, None), (uvm_booted_memhp, True, HugePagesConfig.NONE, None, None), (uvm_resumed_memhp, False, HugePagesConfig.NONE, None, SnapshotType.FULL), (uvm_resumed_memhp, False, HugePagesConfig.NONE, None, SnapshotType.DIFF), ( uvm_resumed_memhp, False, HugePagesConfig.NONE, None, SnapshotType.DIFF_MINCORE, ), ( uvm_resumed_memhp, False, HugePagesConfig.NONE, "on_demand", SnapshotType.FULL, ), ( uvm_resumed_memhp, False, HugePagesConfig.HUGETLBFS_2MB, "on_demand", SnapshotType.FULL, ), ], ids=[ "booted", "booted-huge-pages", "booted-vhost-user", "resumed", "resumed-diff", "resumed-mincore", "resumed-uffd", "resumed-uffd-huge-pages", ], ) def uvm_any_memhp(request, uvm_plain_6_1, rootfs, microvm_factory): """Fixture that yields a booted or resumed VM with memory hotplugging""" ctor, vhost_user, huge_pages, uffd_handler, snapshot_type = request.param yield ctor( uvm_plain_6_1, rootfs, microvm_factory, vhost_user, DEFAULT_CONFIG, huge_pages, uffd_handler, snapshot_type, ) def validate_metrics(uvm): """Validates that there are no fails in the metrics""" metrics_to_check = ["plug_fails", "unplug_fails", "unplug_all_fails", "state_fails"] if supports_hugetlbfs_discard(): metrics_to_check.append("unplug_discard_fails") uvm.flush_metrics() for metrics in uvm.get_all_metrics(): for k in metrics_to_check: assert ( metrics["memory_hotplug"][k] == 0 ), f"{k}={metrics[k]} is greater than zero" def check_device_detected(uvm): """ Check that the guest kernel has enabled virtio-mem. """ hp_config = uvm.api.memory_hotplug.get().json() _, stdout, _ = uvm.ssh.check_output("dmesg | grep 'virtio_mem'") for line in stdout.splitlines(): _, key, value = line.strip().split(":") key = key.strip() value = int(value.strip(), base=0) match key: case "start address": assert value >= (512 << 30), "start address isn't in past MMIO64 region" case "region size": assert ( value == hp_config["total_size_mib"] << 20 ), "region size doesn't match" case "device block size": assert ( value == hp_config["block_size_mib"] << 20 ), "block size doesn't match" case "plugged size": assert value == 0, "plugged size doesn't match" case "requested size": assert value == 0, "requested size doesn't match" case _: continue def check_memory_usable(uvm): """Allocates memory to verify it's usable (5% margin to avoid OOM-kill)""" mem_available = MeminfoGuest(uvm).get().mem_available.mib() # try to allocate 95% of available memory amount_mib = int(mem_available * 95 / 100) _ = uvm.ssh.check_output(f"/usr/local/bin/fillmem {amount_mib}", timeout=30) # verify the allocation was successful _ = uvm.ssh.check_output("cat /tmp/fillmem_output.txt | grep successful") def check_hotplug(uvm, requested_size_mib): """Verifies memory can be hot(un)plugged""" meminfo = MeminfoGuest(uvm) mem_total_fixed = ( meminfo.get().mem_total.mib() - uvm.api.memory_hotplug.get().json()["plugged_size_mib"] ) uvm.hotplug_memory(requested_size_mib) # verify guest driver received the request _, stdout, _ = uvm.ssh.check_output( "dmesg | grep 'virtio_mem' | grep 'requested size' | tail -1" ) assert ( int(stdout.strip().split(":")[-1].strip(), base=0) == requested_size_mib << 20 ) for attempt in Retrying( retry=retry_if_exception_type(AssertionError), stop=stop_after_delay(5), wait=wait_fixed(1), reraise=True, ): with attempt: # verify guest driver executed the request mem_total_after = meminfo.get().mem_total.mib() assert mem_total_after == mem_total_fixed + requested_size_mib def check_hotunplug(uvm, requested_size_mib): """Verifies memory can be hotunplugged and gets released""" rss_before = get_resident_memory(uvm.ps) check_hotplug(uvm, requested_size_mib) rss_after = get_resident_memory(uvm.ps) print(f"RSS before: {rss_before}, after: {rss_after}") huge_pages = HugePagesConfig(uvm.api.machine_config.get().json()["huge_pages"]) if huge_pages == HugePagesConfig.NONE or supports_hugetlbfs_discard(): assert rss_after < rss_before, "RSS didn't decrease" def test_virtio_mem_hotplug_hotunplug(uvm_any_memhp): """ Check that memory can be hotplugged into the VM. """ uvm = uvm_any_memhp check_device_detected(uvm) check_hotplug(uvm, 1024) check_memory_usable(uvm) check_hotunplug(uvm, 0) # Check it works again check_hotplug(uvm, 1024) check_memory_usable(uvm) validate_metrics(uvm) @pytest.mark.parametrize( "memhp_config", [ {"total_size_mib": 256, "slot_size_mib": 128, "block_size_mib": 64}, {"total_size_mib": 256, "slot_size_mib": 128, "block_size_mib": 128}, {"total_size_mib": 256, "slot_size_mib": 256, "block_size_mib": 64}, {"total_size_mib": 256, "slot_size_mib": 256, "block_size_mib": 256}, ], ids=["all_different", "slot_sized_block", "single_slot", "single_block"], ) def test_virtio_mem_configs(uvm_plain_6_1, memhp_config): """ Check that the virtio mem device is working as expected for different configs """ uvm = uvm_booted_memhp( uvm_plain_6_1, None, None, False, memhp_config, None, None, None ) if not uvm.pci_enabled: pytest.skip( "Skip tests on MMIO transport to save time as we don't expect any difference." ) check_device_detected(uvm) for size in range( 0, memhp_config["total_size_mib"] + 1, memhp_config["block_size_mib"] ): check_hotplug(uvm, size) check_memory_usable(uvm) for size in range( memhp_config["total_size_mib"] - memhp_config["block_size_mib"], -1, -memhp_config["block_size_mib"], ): check_hotunplug(uvm, size) validate_metrics(uvm) def test_snapshot_restore_persistence(uvm_plain_6_1, microvm_factory, snapshot_type): """ Check that hotplugged memory is persisted across snapshot/restore. """ if not uvm_plain_6_1.pci_enabled: pytest.skip( "Skip tests on MMIO transport to save time as we don't expect any difference." ) uvm = uvm_booted_memhp( uvm_plain_6_1, None, microvm_factory, False, DEFAULT_CONFIG, None, None, snapshot_type, ) uvm.hotplug_memory(1024) # Increase /dev/shm size as it defaults to half of the boot memory uvm.ssh.check_output("mount -o remount,size=1024M -t tmpfs tmpfs /dev/shm") uvm.ssh.check_output("dd if=/dev/urandom of=/dev/shm/mem_hp_test bs=1M count=1024") _, checksum_before, _ = uvm.ssh.check_output("sha256sum /dev/shm/mem_hp_test") snapshot = uvm.make_snapshot(snapshot_type) restored_vm = microvm_factory.build_from_snapshot(snapshot) _, checksum_after, _ = restored_vm.ssh.check_output( "sha256sum /dev/shm/mem_hp_test" ) assert checksum_before == checksum_after, "Checksums didn't match" validate_metrics(restored_vm) def test_snapshot_restore_incremental(uvm_plain_6_1, microvm_factory, snapshot_type): """ Check that hotplugged memory is persisted across snapshot/restore. """ if not uvm_plain_6_1.pci_enabled: pytest.skip( "Skip tests on MMIO transport to save time as we don't expect any difference." ) uvm = uvm_booted_memhp( uvm_plain_6_1, None, microvm_factory, False, DEFAULT_CONFIG, None, None, snapshot_type, ) hp_total_size_mib = uvm.api.memory_hotplug.get().json()["total_size_mib"] snapshot = uvm.make_snapshot(snapshot_type) hotplug_count = 16 hp_mem_mib_per_cycle = hp_total_size_mib // hotplug_count # we're not using hugepages, so it's always 4KiB pages guest_pages_per_mib = 1024 // 4 hp_mem_pages_per_cycle = hp_mem_mib_per_cycle * guest_pages_per_mib checksums = [] for i, uvm in enumerate( microvm_factory.build_n_from_snapshot( snapshot, hotplug_count + 1, incremental=True, use_snapshot_editor=True, ) ): uvm.memory_monitor = None # check checksums of previous cycles for j in range(i): _, checksum, _ = uvm.ssh.check_output(f"sha256sum /dev/shm/mem_hp_test_{j}") assert checksum == checksums[j], f"Checksums didn't match for i={i} j={j}" # we run hotplug_count+1 uvms to check all the checksums at the end if i >= hotplug_count: continue total_hp_mem_mib = hp_mem_mib_per_cycle * (i + 1) uvm.hotplug_memory(total_hp_mem_mib) # Increase /dev/shm size as it defaults to half of the boot memory uvm.ssh.check_output( f"mount -o remount,size={total_hp_mem_mib}M -t tmpfs tmpfs /dev/shm" ) uvm.ssh.check_output( f"dd if=/dev/urandom of=/dev/shm/mem_hp_test_{i} bs=1M count={hp_mem_mib_per_cycle}" ) _, checksum, _ = uvm.ssh.check_output(f"sha256sum /dev/shm/mem_hp_test_{i}") checksums.append(checksum) # dirty a page in the middle of the hotplugged memory to verify differential snapshots # This is to test also the case where the page and the slots are not consecutive with the # one we're going to write to avoid issues like #5696. file_to_dirty = i // 2 page_to_dirty = hp_mem_pages_per_cycle // 2 uvm.ssh.check_output( f"dd if=/dev/shm/mem_hp_test_{file_to_dirty} of=/dev/shm/mem_hp_test_{file_to_dirty} " f"bs=4K count=1 skip={page_to_dirty} seek={page_to_dirty} conv=notrunc" ) validate_metrics(uvm) def timed_memory_hotplug(uvm, size, metrics, metric_prefix, fc_metric_name): """Wait for all memory hotplug events to be processed""" uvm.flush_metrics() # poll every 5ms to check completion as the fastest hotplug is around 30ms api_time, total_time = uvm.hotplug_memory(size, poll=0.005) fc_metrics = uvm.flush_metrics() metrics.put_metric( f"{metric_prefix}_api_time", api_time, unit="Seconds", ) metrics.put_metric( f"{metric_prefix}_total_time", total_time, unit="Seconds", ) metrics.put_metric( f"{metric_prefix}_fc_time", fc_metrics["memory_hotplug"][fc_metric_name]["sum_us"], unit="Microseconds", ) @pytest.mark.nonci @pytest.mark.parametrize( "hotplug_size", [ 1024, 2048, 4096, 8192, 16384, ], ) @pytest.mark.parametrize( "huge_pages", [HugePagesConfig.NONE, HugePagesConfig.HUGETLBFS_2MB], ) def test_memory_hotplug_latency( microvm_factory, guest_kernel_linux_6_1, rootfs, hotplug_size, huge_pages, metrics ): """Test the latency of hotplugging memory""" for i in range(20): config = { "total_size_mib": hotplug_size, "slot_size_mib": 128, "block_size_mib": 2, } uvm_plain_6_1 = microvm_factory.build(guest_kernel_linux_6_1, rootfs, pci=True) uvm = uvm_booted_memhp( uvm_plain_6_1, None, None, False, config, None, None, None ) if i == 0: metrics.set_dimensions( { "instance": global_props.instance, "cpu_model": global_props.cpu_model, "host_kernel": f"linux-{global_props.host_linux_version}", "performance_test": "test_memory_hotplug_latency", "hotplug_size": str(hotplug_size), "huge_pages": huge_pages, **uvm.dimensions, } ) timed_memory_hotplug(uvm, hotplug_size, metrics, "hotplug", "plug_agg") timed_memory_hotplug(uvm, 0, metrics, "hotunplug", "unplug_agg") timed_memory_hotplug(uvm, hotplug_size, metrics, "hotplug_2nd", "plug_agg") ================================================ FILE: tests/integration_tests/performance/test_huge_pages.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Integration tests for Firecracker's huge pages support""" import signal import time import pytest from framework import utils from framework.microvm import HugePagesConfig from framework.properties import global_props from framework.utils_ftrace import ftrace_events def check_hugetlbfs_in_use(pid: int, allocation_name: str): """Asserts that the process with the given `pid` is using hugetlbfs pages somewhere. `allocation_name` should be the name of the smaps entry for which we want to verify that huge pages are used. For memfd-backed guest memory, this would be "memfd:guest_mem" (the `guest_mem` part originating from the name we give the memfd in memory.rs), for anonymous memory this would be "/anon_hugepage". Note: in our testing, we do not currently configure vhost-user-blk devices, so we only exercise the "/anon_hugepage" case. """ # Format of a sample smaps entry: # 7fc2bc400000-7fc2cc400000 rw-s 00000000 00:10 25488401 /anon_hugepage # Size: 262144 kB # KernelPageSize: 2048 kB # MMUPageSize: 2048 kB # Rss: 0 kB # Pss: 0 kB # Pss_Dirty: 0 kB # Shared_Clean: 0 kB # Shared_Dirty: 0 kB # Private_Clean: 0 kB # Private_Dirty: 0 kB # Referenced: 0 kB # Anonymous: 0 kB # LazyFree: 0 kB # AnonHugePages: 0 kB # ShmemPmdMapped: 0 kB # FilePmdMapped: 0 kB # Shared_Hugetlb: 0 kB # Private_Hugetlb: 92160 kB # Swap: 0 kB # SwapPss: 0 kB # Locked: 0 kB # THPeligible: 0 # ProtectionKey: 0 cmd = f"cat /proc/{pid}/smaps | grep {allocation_name} -A 23 | grep KernelPageSize" _, stdout, _ = utils.check_output(cmd) kernel_page_size_kib = int(stdout.split()[1]) assert kernel_page_size_kib > 4 def test_hugetlbfs_boot(uvm_plain): """Tests booting a microvm with guest memory backed by 2MB hugetlbfs pages""" uvm_plain.spawn() uvm_plain.basic_config(huge_pages=HugePagesConfig.HUGETLBFS_2MB, mem_size_mib=128) uvm_plain.add_net_iface() uvm_plain.start() check_hugetlbfs_in_use( uvm_plain.firecracker_pid, "/anon_hugepage", ) def test_hugetlbfs_snapshot(microvm_factory, uvm_plain, snapshot_type): """ Test hugetlbfs snapshot restore via uffd Despite guest memory being backed by huge pages, enabling differential snapshots causes KVM to set up guest mappings at 4k granularity """ ### Create Snapshot ### vm = uvm_plain vm.memory_monitor = None vm.spawn() vm.basic_config( huge_pages=HugePagesConfig.HUGETLBFS_2MB, mem_size_mib=128, track_dirty_pages=snapshot_type.needs_dirty_page_tracking, ) vm.add_net_iface() vm.start() check_hugetlbfs_in_use(vm.firecracker_pid, "/anon_hugepage") snapshot = vm.make_snapshot(snapshot_type) vm.kill() ### Restore Snapshot ### vm = microvm_factory.build() vm.spawn() vm.restore_from_snapshot(snapshot, resume=True, uffd_handler_name="on_demand") check_hugetlbfs_in_use(vm.firecracker_pid, "/anon_hugepage") @pytest.mark.parametrize("huge_pages", HugePagesConfig) def test_ept_violation_count( microvm_factory, uvm_plain, metrics, huge_pages, ): """ Tests hugetlbfs snapshot restore with a UFFD handler that pre-faults the entire guest memory on the first page fault. Records metrics about the number of EPT_VIOLATIONS encountered by KVM. """ ### Create Snapshot ### vm = uvm_plain vm.memory_monitor = None vm.spawn() vm.basic_config(huge_pages=huge_pages, mem_size_mib=256) vm.add_net_iface() vm.start() metrics.set_dimensions( { "performance_test": "test_hugetlbfs_snapshot", "huge_pages_config": str(huge_pages), **vm.dimensions, } ) # Wait for microvm to boot. Then spawn fast_page_fault_helper to setup an environment where we can trigger # a lot of fast_page_faults after restoring the snapshot. vm.ssh.check_output( "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 3072MB, it is split in a 2nd region X86_MEMORY_GAP_START = 3072 * 2**20 @pytest.mark.parametrize( "vcpu_count,mem_size_mib", [(1, 128), (1, 1024), (2, 2048), (4, 4096), (32, 4096)], ) @pytest.mark.nonci def test_memory_overhead( microvm_factory, guest_kernel_acpi, rootfs, vcpu_count, mem_size_mib, pci_enabled, metrics, ): """Track Firecracker memory overhead. We take a single measurement as it only varies by a few KiB each run. """ for _ in range(5): microvm = microvm_factory.build( guest_kernel_acpi, rootfs, pci=pci_enabled, monitor_memory=False ) microvm.spawn(emit_metrics=True) microvm.basic_config(vcpu_count=vcpu_count, mem_size_mib=mem_size_mib) microvm.add_net_iface() microvm.start() metrics.set_dimensions( {"performance_test": "test_memory_overhead", **microvm.dimensions} ) snapshot = microvm.snapshot_full() microvm.kill() microvm2 = microvm_factory.build_from_snapshot(snapshot) guest_mem_bytes = mem_size_mib * 2**20 guest_mem_splits = { guest_mem_bytes, X86_MEMORY_GAP_START, } if guest_mem_bytes > X86_MEMORY_GAP_START: guest_mem_splits.add(guest_mem_bytes - X86_MEMORY_GAP_START) mem_stats = defaultdict(int) ps = psutil.Process(microvm2.firecracker_pid) for pmmap in ps.memory_maps(grouped=False): # We publish 'size' and 'rss' (resident). size would be the worst case, # whereas rss is the current paged-in memory. mem_stats["total_size"] += pmmap.size mem_stats["total_rss"] += pmmap.rss pmmap_path = Path(pmmap.path) if pmmap_path.exists() and pmmap_path.name.startswith("firecracker"): mem_stats["binary_size"] += pmmap.size mem_stats["binary_rss"] += pmmap.rss if pmmap.size not in guest_mem_splits: mem_stats["overhead_size"] += pmmap.size mem_stats["overhead_rss"] += pmmap.rss for key, value in mem_stats.items(): metrics.put_metric(key, value, unit="Bytes") mem_info = ps.memory_full_info() for metric in ["uss", "text"]: val = getattr(mem_info, metric) metrics.put_metric(metric, val, unit="Bytes") ================================================ FILE: tests/integration_tests/performance/test_mmds.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests the performance of MMDS token generation and verification.""" import re import pytest from framework.utils import configure_mmds, populate_data_store # Default IPv4 address for MMDS DEFAULT_IPV4 = "169.254.169.254" # Number of iterations for performance measurements ITERATIONS = 500 def parse_curl_timing(timing_line): """Parse curl timing output and extract timing information in milliseconds.""" # curl -w format outputs timing in seconds, convert to milliseconds # Expected format: "time_total:0.123456" match = re.search(r"time_total:([\d.]+)", timing_line) if match: return float(match.group(1)) * 1000 # Convert to milliseconds raise ValueError(f"Could not parse timing from curl output: {timing_line}") @pytest.fixture def mmds_microvm(uvm_plain_any): """Creates a microvm with MMDS configured for performance testing.""" uvm = uvm_plain_any uvm.spawn(log_level="Info") uvm.basic_config() uvm.add_net_iface() # Configure MMDS V2 (requires tokens) configure_mmds(uvm, iface_ids=["eth0"], version="V2", ipv4_address=DEFAULT_IPV4) # Populate with minimal test data test_data = {"latest": {"meta-data": {"instance-id": "i-1234567890abcdef0"}}} populate_data_store(uvm, test_data) uvm.start() uvm.ssh.check_output(f"ip route add {DEFAULT_IPV4} dev eth0") return uvm @pytest.mark.nonci def test_mmds_token(mmds_microvm, metrics): """ Test MMDS token generation performance using curl timing from within the guest. This test measures the time it takes to generate MMDS session tokens using curl's built-in timing capabilities. """ metrics.set_dimensions( { "performance_test": "test_mmds_performance", **mmds_microvm.dimensions, } ) # Measure token generation performance for _ in range(ITERATIONS): # Curl command to generate token with timing token_cmd = ( f'curl -m 2 -s -w "\\ntime_total:%{{time_total}}" ' f'-X PUT -H "X-metadata-token-ttl-seconds: 60" ' f"http://{DEFAULT_IPV4}/latest/api/token" ) _, stdout, stderr = mmds_microvm.ssh.check_output(token_cmd) assert stderr == "", "Error generating token" # Parse timing and token from output lines = stdout.strip().split("\n") token = lines[0].strip() # First line is the token # Verify token was generated successfully assert len(token) > 0, f"Token generation failed. Output: {stdout}" generation_time_ms = parse_curl_timing(lines[-1]) metrics.put_metric("token_generation_time", generation_time_ms, "Milliseconds") # Curl command to verify token with timing request_cmd = ( f'curl -m 2 -s -w "\\ntime_total:%{{time_total}}" ' f'-X GET -H "X-metadata-token: {token}" -H "Accept: application/json" ' f"http://{DEFAULT_IPV4}/latest/meta-data/instance-id" ) _, stdout, stderr = mmds_microvm.ssh.check_output(request_cmd) assert stderr == "", "MMDS request failed" # Parse response and timing lines = stdout.strip().split("\n") response = lines[0].strip() # First line is the response # Verify request was successful assert ( "i-1234567890abcdef0" in response ), f"MMDS request failed. Response: {response}" request_time_ms = parse_curl_timing(lines[-1]) metrics.put_metric("request_time", request_time_ms, "Milliseconds") ================================================ FILE: tests/integration_tests/performance/test_network.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests the network latency of a Firecracker guest.""" import json import re from pathlib import Path import pytest from framework.utils_iperf import IPerf3Test, emit_iperf3_metrics def consume_ping_output(ping_putput): """Consume ping output. Output example: PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data. 64 bytes from 8.8.8.8: icmp_seq=1 ttl=118 time=17.7 ms 64 bytes from 8.8.8.8: icmp_seq=2 ttl=118 time=17.7 ms 64 bytes from 8.8.8.8: icmp_seq=3 ttl=118 time=17.4 ms 64 bytes from 8.8.8.8: icmp_seq=4 ttl=118 time=17.8 ms --- 8.8.8.8 ping statistics --- 4 packets transmitted, 4 received, 0% packet loss, time 3005ms rtt min/avg/max/mdev = 17.478/17.705/17.808/0.210 ms """ output = ping_putput.strip().split("\n") assert len(output) > 2 # Compute percentiles. pattern_time = ".+ bytes from .+: icmp_seq=.+ ttl=.+ time=(.+) ms" for seq in output: time = re.findall(pattern_time, seq) if time: assert len(time) == 1 yield float(time[0]) @pytest.fixture def network_microvm(request, uvm_plain_acpi): """Creates a microvm with the networking setup used by the performance tests in this file. This fixture receives its vcpu count via indirect parameterization""" guest_mem_mib = 1024 guest_vcpus = request.param vm = uvm_plain_acpi vm.spawn(log_level="Info", emit_metrics=True) vm.basic_config(vcpu_count=guest_vcpus, mem_size_mib=guest_mem_mib) vm.add_net_iface() vm.start() vm.pin_threads(0) return vm @pytest.mark.nonci @pytest.mark.parametrize("network_microvm", [1], indirect=True) def test_network_latency(network_microvm, metrics): """ Test network latency by sending pings from the guest to the host. """ rounds = 15 request_per_round = 30 delay = 0.0 metrics.set_dimensions( { "performance_test": "test_network_latency", **network_microvm.dimensions, } ) samples = [] host_ip = network_microvm.iface["eth0"]["iface"].host_ip for _ in range(rounds): _, ping_output, _ = network_microvm.ssh.check_output( f"ping -c {request_per_round} -i {delay} {host_ip}" ) samples.extend(consume_ping_output(ping_output)) for sample in samples: metrics.put_metric("ping_latency", sample, "Milliseconds") @pytest.mark.nonci @pytest.mark.timeout(120) @pytest.mark.parametrize("network_microvm", [1, 2], indirect=True) @pytest.mark.parametrize("payload_length", ["128K", "1024K"], ids=["p128K", "p1024K"]) @pytest.mark.parametrize("mode", ["g2h", "h2g"]) def test_network_tcp_throughput( network_microvm, payload_length, mode, metrics, results_dir, ): """ Iperf between guest and host in both directions for TCP workload. """ base_port = 5000 # Time (in seconds) for which iperf "warms up" warmup_sec = 5 # Time (in seconds) for which iperf runs after warmup is done runtime_sec = 20 metrics.set_dimensions( { "performance_test": "test_network_tcp_throughput", "payload_length": payload_length, "mode": mode, **network_microvm.dimensions, } ) test = IPerf3Test( microvm=network_microvm, base_port=base_port, runtime=runtime_sec, omit=warmup_sec, mode=mode, num_clients=network_microvm.vcpus_count, connect_to=network_microvm.iface["eth0"]["iface"].host_ip, payload_length=payload_length, ) data = test.run_test(network_microvm.vcpus_count + 2) for i, g2h in enumerate(data["g2h"]): Path(results_dir / f"g2h_{i}.json").write_text( json.dumps(g2h), encoding="utf-8" ) for i, h2g in enumerate(data["h2g"]): Path(results_dir / f"h2g_{i}.json").write_text( json.dumps(h2g), encoding="utf-8" ) emit_iperf3_metrics(metrics, data, warmup_sec) ================================================ FILE: tests/integration_tests/performance/test_pmem.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Performance benchmark for pmem device""" import concurrent import os from pathlib import Path import pytest import framework.utils_fio as fio import host_tools.drive as drive_tools from framework.utils import track_cpu_utilization PMEM_DEVICE_SIZE_MB = 2048 PMEM_DEVICE_SIZE_SINGLE_READ_MB = 512 WARMUP_SEC = 10 RUNTIME_SEC = 30 GUEST_MEM_MIB = 1024 def run_fio( microvm, test_output_dir, mode: fio.Mode, block_size: int, fio_engine: fio.Engine ): """Run a normal fio test""" cmd = fio.build_cmd( "/dev/pmem0", PMEM_DEVICE_SIZE_MB, block_size, mode, microvm.vcpus_count, fio_engine, RUNTIME_SEC, WARMUP_SEC, ) with concurrent.futures.ThreadPoolExecutor() as executor: cpu_load_future = executor.submit( track_cpu_utilization, microvm.firecracker_pid, RUNTIME_SEC, omit=WARMUP_SEC, ) rc, _, stderr = microvm.ssh.run(f"cd /tmp; {cmd}") assert rc == 0, stderr assert stderr == "" microvm.ssh.scp_get("/tmp/fio.json", test_output_dir) microvm.ssh.scp_get("/tmp/*.log", test_output_dir) return cpu_load_future.result() def emit_fio_metrics(logs_dir, metrics): """Parses the fio logs and emits bandwidth as metrics""" bw_reads, bw_writes = fio.process_log_files(logs_dir, fio.LogType.BW) for tup in zip(*bw_reads): metrics.put_metric("bw_read", sum(tup), "Kilobytes/Second") for tup in zip(*bw_writes): metrics.put_metric("bw_write", sum(tup), "Kilobytes/Second") clat_reads, clat_writes = fio.process_log_files(logs_dir, fio.LogType.CLAT) # latency values in fio logs are in nanoseconds, but cloudwatch only supports # microseconds as the more granular unit, so need to divide by 1000. for tup in zip(*clat_reads): for value in tup: metrics.put_metric("clat_read", value / 1000, "Microseconds") for tup in zip(*clat_writes): for value in tup: metrics.put_metric("clat_write", value / 1000, "Microseconds") @pytest.mark.nonci @pytest.mark.parametrize("vcpus", [1, 2], ids=["1vcpu", "2vcpu"]) @pytest.mark.parametrize("fio_mode", [fio.Mode.RANDREAD, fio.Mode.RANDWRITE]) @pytest.mark.parametrize("fio_block_size", [4096], ids=["bs4096"]) @pytest.mark.parametrize("fio_engine", [fio.Engine.LIBAIO, fio.Engine.PSYNC]) def test_pmem_performance( uvm_plain_acpi, vcpus, fio_mode, fio_block_size, fio_engine, metrics, results_dir, ): """ Measure performance of pmem device """ vm = uvm_plain_acpi vm.memory_monitor = None vm.spawn() vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB) vm.add_net_iface() # Add a secondary block device for benchmark tests. fs = drive_tools.FilesystemFile( os.path.join(vm.fsfiles, "scratch"), PMEM_DEVICE_SIZE_MB ) vm.add_pmem("scratch", fs.path, False, False) vm.start() vm.pin_threads(0) metrics.set_dimensions( { "performance_test": "test_pmem_performance", "fio_mode": fio_mode, "fio_block_size": str(fio_block_size), "fio_engine": fio_engine, **vm.dimensions, } ) # Do a full read run before benchmarking to deal with shadow page faults. # The impact of shadow page faults is tested in another test. run_fio_single_read(vm, 0, results_dir, fio_block_size) cpu_util = run_fio(vm, results_dir, fio_mode, fio_block_size, fio_engine) emit_fio_metrics(results_dir, metrics) for thread_name, values in cpu_util.items(): for value in values: metrics.put_metric(f"cpu_utilization_{thread_name}", value, "Percent") def run_fio_single_read(microvm, run_index, test_output_dir, block_size: int): """ Run a single full read test with fio. The test is single threaded and uses only `libaio` since we just need to test a sequential """ cmd = fio.build_cmd( "/dev/pmem0", None, block_size, fio.Mode.READ, 1, fio.Engine.LIBAIO, None, None, False, ) rc, _, stderr = microvm.ssh.run(f"cd /tmp; {cmd}") assert rc == 0, stderr assert stderr == "" log_path = Path(test_output_dir) / f"fio_{run_index}.json" microvm.ssh.scp_get("/tmp/fio.json", log_path) def emit_fio_single_read_metrics(logs_dir, metrics): """Process json output of the fio command and emmit `read` metrics""" bw_reads, _ = fio.process_json_files(logs_dir) for reads in bw_reads: metrics.put_metric("bw_read", sum(reads) / 1000, "Kilobytes/Second") @pytest.mark.nonci @pytest.mark.parametrize("fio_block_size", [4096], ids=["bs4096"]) def test_pmem_first_read( microvm_factory, guest_kernel_acpi, rootfs, fio_block_size, metrics, results_dir, ): """ Measure performance of a first full read from the pmem device. Values should be lower than in normal perf test since the first read of each page should also trigger a KVM internal page fault which should slow things down. """ for i in range(10): vm = microvm_factory.build( guest_kernel_acpi, rootfs, pci=True, monitor_memory=False ) vm.spawn() vm.basic_config(mem_size_mib=GUEST_MEM_MIB) vm.add_net_iface() fs = drive_tools.FilesystemFile( os.path.join(vm.fsfiles, "scratch"), PMEM_DEVICE_SIZE_SINGLE_READ_MB, ) vm.add_pmem("scratch", fs.path, False, False) vm.start() vm.pin_threads(0) metrics.set_dimensions( { "performance_test": "test_pmem_first_read", "fio_block_size": str(fio_block_size), **vm.dimensions, } ) run_fio_single_read(vm, i, results_dir, fio_block_size) emit_fio_single_read_metrics(results_dir, metrics) ================================================ FILE: tests/integration_tests/performance/test_process_startup_time.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Test that the process startup time up to socket bind is within spec.""" import os import time import pytest from host_tools.cargo_build import run_seccompiler_bin ITERATIONS = 100 @pytest.mark.nonci def test_startup_time_new_pid_ns( microvm_factory, guest_kernel_linux_5_10, rootfs, metrics ): """ Check startup time when jailer is spawned in a new PID namespace. """ for _ in range(ITERATIONS): microvm = microvm_factory.build(guest_kernel_linux_5_10, rootfs) microvm.jailer.new_pid_ns = True _test_startup_time(microvm, metrics, "new_pid_ns") @pytest.mark.nonci def test_startup_time_daemonize( microvm_factory, guest_kernel_linux_5_10, rootfs, metrics ): """ Check startup time when jailer detaches Firecracker from the controlling terminal. """ for _ in range(ITERATIONS): microvm = microvm_factory.build(guest_kernel_linux_5_10, rootfs) _test_startup_time(microvm, metrics, "daemonize") @pytest.mark.nonci def test_startup_time_custom_seccomp( microvm_factory, guest_kernel_linux_5_10, rootfs, metrics ): """ Check the startup time when using custom seccomp filters. """ for _ in range(ITERATIONS): microvm = microvm_factory.build(guest_kernel_linux_5_10, rootfs) _custom_filter_setup(microvm) _test_startup_time(microvm, metrics, "custom_seccomp") def _test_startup_time(microvm, metrics, test_suffix: str): test_start_time = time.time() microvm.spawn() microvm.basic_config(vcpu_count=2, mem_size_mib=1024) metrics.set_dimensions( {**microvm.dimensions, "performance_test": f"test_startup_time_{test_suffix}"} ) microvm.start() datapoints = microvm.get_all_metrics() # The metrics should be at index 0. # Since metrics are flushed at InstanceStart, the first line will suffice. test_end_time = time.time() fc_metrics = datapoints[0] startup_time_us = fc_metrics["api_server"]["process_startup_time_us"] cpu_startup_time_us = fc_metrics["api_server"]["process_startup_time_cpu_us"] print( "Process startup time is: {} us ({} CPU us)".format( startup_time_us, cpu_startup_time_us ) ) assert cpu_startup_time_us > 0 # Check that startup time is not a huge value (overflow) # This is to catch issues like the ones introduced in PR # https://github.com/firecracker-microvm/firecracker/pull/4305 test_time_delta_us = (test_end_time - test_start_time) * 1000 * 1000 assert startup_time_us < test_time_delta_us assert cpu_startup_time_us < test_time_delta_us metrics.put_metric("startup_cpu_time", cpu_startup_time_us, unit="Microseconds") metrics.put_metric("startup_time", startup_time_us, unit="Microseconds") def _custom_filter_setup(test_microvm): bpf_path = os.path.join(test_microvm.path, "bpf.out") run_seccompiler_bin(bpf_path, binary_dir=test_microvm.fc_binary_path.parent) test_microvm.create_jailed_resource(bpf_path) test_microvm.jailer.extra_args.update({"seccomp-filter": "bpf.out"}) ================================================ FILE: tests/integration_tests/performance/test_rate_limiter.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests that fail if network throughput does not obey rate limits.""" import time from framework import utils from host_tools import cpu_load # The iperf version to run this tests with IPERF_BINARY = "iperf3" # iperf server side log path inside guest GUEST_IPERF_SERVER_LOG = "/tmp/iperf-server.log" # Interval used by iperf to get maximum bandwidth IPERF_TRANSMIT_TIME = 4 # Use a fixed-size TCP window so we get constant flow IPERF_TCP_WINDOW = "256K" # The rate limiting value RATE_LIMIT_BYTES = 10485760 # The initial token bucket size BURST_SIZE = RATE_LIMIT_BYTES * 50 # The refill time for the token bucket REFILL_TIME_MS = 100 RATE_LIMITER_NO_BURST = { "bandwidth": {"size": RATE_LIMIT_BYTES, "refill_time": REFILL_TIME_MS} } RATE_LIMITER_WITH_BURST = { "bandwidth": { "size": RATE_LIMIT_BYTES, "one_time_burst": BURST_SIZE, "refill_time": REFILL_TIME_MS, } } # Deltas that are accepted between expected values and achieved # values throughout the tests MAX_RELATIVE_KBPS_CHANGE = 0.1 MAX_TIME_DIFF = 25 def test_tx_rate_limiting(uvm_plain): """ Run iperf tx with and without rate limiting; check limiting effect. """ test_microvm = uvm_plain test_microvm.spawn() test_microvm.basic_config() # For this test we will be adding three interfaces: # 1. No rate limiting test_microvm.add_net_iface() # 2. Rate limiting without burst test_microvm.add_net_iface(tx_rate_limiter=RATE_LIMITER_NO_BURST) # 3. Rate limiting with burst test_microvm.add_net_iface(tx_rate_limiter=RATE_LIMITER_WITH_BURST) test_microvm.start() _check_tx_rate_limiting(test_microvm) _check_tx_rate_limit_patch(test_microvm) def test_rx_rate_limiting(uvm_plain): """ Run iperf rx with and without rate limiting; check limiting effect. """ test_microvm = uvm_plain test_microvm.spawn() test_microvm.basic_config() # For this test we will be adding three interfaces: # 1. No rate limiting test_microvm.add_net_iface() # 2. Rate limiting without burst test_microvm.add_net_iface(rx_rate_limiter=RATE_LIMITER_NO_BURST) # 3. Rate limiting with burst test_microvm.add_net_iface(rx_rate_limiter=RATE_LIMITER_WITH_BURST) # Start the microvm. test_microvm.start() _check_rx_rate_limiting(test_microvm) _check_rx_rate_limit_patch(test_microvm) def test_rx_rate_limiting_cpu_load(uvm_plain): """ Run iperf rx with rate limiting; verify cpu load is below threshold. """ test_microvm = uvm_plain test_microvm.spawn() test_microvm.basic_config() # Create interface with aggressive rate limiting enabled. rx_rate_limiter_no_burst = { "bandwidth": {"size": 65536, "refill_time": 1000} # 64KBytes # 1s } iface = test_microvm.add_net_iface(rx_rate_limiter=rx_rate_limiter_no_burst) test_microvm.start() # Start iperf server on guest. _start_iperf_server_on_guest(test_microvm) # Run iperf client sending UDP traffic. iperf_cmd = "{} {} -u -c {} -b 1000000000 -t{} -f KBytes".format( test_microvm.netns.cmd_prefix(), IPERF_BINARY, iface.guest_ip, IPERF_TRANSMIT_TIME * 5, ) # Enable monitor that checks if the cpu load is over the threshold. # After multiple runs, the average value for the cpu load # seems to be around 10%. Setting the threshold a little # higher to skip false positives. # We want to monitor the emulation thread, which is currently # the first one created. # A possible improvement is to find it by name. cpu_load_monitor = cpu_load.CpuLoadMonitor( process_pid=test_microvm.firecracker_pid, thread_pid=test_microvm.firecracker_pid, threshold=20, ) with cpu_load_monitor: _run_iperf_on_host(iperf_cmd, test_microvm) def _check_tx_rate_limiting(test_microvm): """Check that the transmit rate is within expectations.""" eth0 = test_microvm.iface["eth0"]["iface"] eth1 = test_microvm.iface["eth1"]["iface"] eth2 = test_microvm.iface["eth2"]["iface"] # First step: get the transfer rate when no rate limiting is enabled. # We are receiving the result in KBytes from iperf. print("Run guest TX iperf for no rate limiting") rate_no_limit_kbps = _get_tx_bandwidth(test_microvm, eth0.host_ip) print("TX rate_no_limit_kbps: {}".format(rate_no_limit_kbps)) # Calculate the number of bytes that are expected to be sent # in each second once the rate limiting is enabled. expected_kbps = int(RATE_LIMIT_BYTES / (REFILL_TIME_MS / 1000.0) / 1024) print("Rate-Limit TX expected_kbps: {}".format(expected_kbps)) # Sanity check that bandwidth with no rate limiting is at least double # than the one expected when rate limiting is in place. assert _relative_change(rate_no_limit_kbps, expected_kbps) > 1.0 # Second step: check bandwidth when rate limiting is on. print("Run guest TX iperf for rate limiting without burst") observed_kbps = _get_tx_bandwidth(test_microvm, eth1.host_ip) assert _relative_change(observed_kbps, expected_kbps) < MAX_RELATIVE_KBPS_CHANGE # Third step: get the number of bytes when rate limiting is on and there is # an initial burst size from where to consume. print("Run guest TX iperf for rate limiting with burst") # Use iperf to obtain the bandwidth when there is burst to consume from, # send exactly BURST_SIZE packets. iperf_cmd = "{} -c {} -n {} -f KBytes -w {} -N".format( IPERF_BINARY, eth2.host_ip, BURST_SIZE, IPERF_TCP_WINDOW ) iperf_out = _run_iperf_on_guest(test_microvm, iperf_cmd) print(iperf_out) _, burst_kbps = _process_iperf_output(iperf_out) print("TX burst_kbps: {}".format(burst_kbps)) # Test that the burst bandwidth is at least as two times the rate limit. assert _relative_change(burst_kbps, expected_kbps) > 1.0 # Since the burst should be consumed, check rate limit is in place. observed_kbps = _get_tx_bandwidth(test_microvm, eth2.host_ip) assert _relative_change(observed_kbps, expected_kbps) < MAX_RELATIVE_KBPS_CHANGE def _check_rx_rate_limiting(test_microvm): """Check that the receiving rate is within expectations.""" eth0 = test_microvm.iface["eth0"]["iface"] eth1 = test_microvm.iface["eth1"]["iface"] eth2 = test_microvm.iface["eth2"]["iface"] # First step: get the transfer rate when no rate limiting is enabled. # We are receiving the result in KBytes from iperf. print("Run guest RX iperf with no rate limiting") rate_no_limit_kbps = _get_rx_bandwidth(test_microvm, eth0.guest_ip) print("RX rate_no_limit_kbps: {}".format(rate_no_limit_kbps)) # Calculate the number of bytes that are expected to be sent # in each second once the rate limiting is enabled. expected_kbps = int(RATE_LIMIT_BYTES / (REFILL_TIME_MS / 1000.0) / 1024) print("Rate-Limit RX expected_kbps: {}".format(expected_kbps)) # Sanity check that bandwidth with no rate limiting is at least double # than the one expected when rate limiting is in place. assert _relative_change(rate_no_limit_kbps, expected_kbps) > 1.0 # Second step: check bandwidth when rate limiting is on. print("Run guest RX iperf for rate limiting without burst") observed_kbps = _get_rx_bandwidth(test_microvm, eth1.guest_ip) assert _relative_change(observed_kbps, expected_kbps) < MAX_RELATIVE_KBPS_CHANGE # Third step: get the number of bytes when rate limiting is on and there is # an initial burst size from where to consume. print("Run guest RX iperf for rate limiting with burst") # Use iperf to obtain the bandwidth when there is burst to consume from, # send exactly BURST_SIZE packets. iperf_cmd = "{} {} -c {} -n {} -f KBytes -w {} -N".format( test_microvm.netns.cmd_prefix(), IPERF_BINARY, eth2.guest_ip, BURST_SIZE, IPERF_TCP_WINDOW, ) iperf_out = _run_iperf_on_host(iperf_cmd, test_microvm) _, burst_kbps = _process_iperf_output(iperf_out) print("RX burst_kbps: {}".format(burst_kbps)) # Test that the burst bandwidth is at least as two times the rate limit. assert _relative_change(burst_kbps, expected_kbps) > 1.0 # Since the burst should be consumed, check rate limit is in place. observed_kbps = _get_rx_bandwidth(test_microvm, eth2.guest_ip) assert _relative_change(observed_kbps, expected_kbps) < MAX_RELATIVE_KBPS_CHANGE def _check_tx_rate_limit_patch(test_microvm): """Patch the TX rate limiters and check the new limits.""" eth0 = test_microvm.iface["eth0"]["iface"] eth1 = test_microvm.iface["eth1"]["iface"] bucket_size = int(RATE_LIMIT_BYTES * 2) expected_kbps = int(bucket_size / (REFILL_TIME_MS / 1000.0) / 1024) # Check that a TX rate limiter can be applied to a previously unlimited # interface. _patch_iface_bw(test_microvm, "eth0", "TX", bucket_size, REFILL_TIME_MS) observed_kbps = _get_tx_bandwidth(test_microvm, eth0.host_ip) assert _relative_change(observed_kbps, expected_kbps) < MAX_RELATIVE_KBPS_CHANGE # Check that a TX rate limiter can be updated. _patch_iface_bw(test_microvm, "eth1", "TX", bucket_size, REFILL_TIME_MS) observed_kbps = _get_tx_bandwidth(test_microvm, eth1.host_ip) assert _relative_change(observed_kbps, expected_kbps) < MAX_RELATIVE_KBPS_CHANGE # Check that a TX rate limiter can be removed. _patch_iface_bw(test_microvm, "eth0", "TX", 0, 0) rate_no_limit_kbps = _get_tx_bandwidth(test_microvm, eth0.host_ip) # Check that bandwidth when rate-limit disabled is at least 1.5x larger # than the one when rate limiting was enabled. assert _relative_change(rate_no_limit_kbps, expected_kbps) > 0.5 def _check_rx_rate_limit_patch(test_microvm): """Patch the RX rate limiters and check the new limits.""" eth0 = test_microvm.iface["eth0"]["iface"] eth1 = test_microvm.iface["eth1"]["iface"] bucket_size = int(RATE_LIMIT_BYTES * 2) expected_kbps = int(bucket_size / (REFILL_TIME_MS / 1000.0) / 1024) # Check that an RX rate limiter can be applied to a previously unlimited # interface. _patch_iface_bw(test_microvm, "eth0", "RX", bucket_size, REFILL_TIME_MS) observed_kbps = _get_rx_bandwidth(test_microvm, eth0.guest_ip) assert _relative_change(observed_kbps, expected_kbps) < MAX_RELATIVE_KBPS_CHANGE # Check that an RX rate limiter can be updated. _patch_iface_bw(test_microvm, "eth1", "RX", bucket_size, REFILL_TIME_MS) observed_kbps = _get_rx_bandwidth(test_microvm, eth1.guest_ip) assert _relative_change(observed_kbps, expected_kbps) < MAX_RELATIVE_KBPS_CHANGE # Check that an RX rate limiter can be removed. _patch_iface_bw(test_microvm, "eth0", "RX", 0, 0) rate_no_limit_kbps = _get_rx_bandwidth(test_microvm, eth0.guest_ip) # Check that bandwidth when rate-limit disabled is at least 1.5x larger # than the one when rate limiting was enabled. assert _relative_change(rate_no_limit_kbps, expected_kbps) > 0.5 def _get_tx_bandwidth(test_microvm, host_ip): """Check that the rate-limited TX bandwidth is close to what we expect.""" _start_iperf_server_on_host(test_microvm.netns.cmd_prefix()) iperf_cmd = "{} -c {} -t {} -f KBytes -w {} -N".format( IPERF_BINARY, host_ip, IPERF_TRANSMIT_TIME, IPERF_TCP_WINDOW ) iperf_out = _run_iperf_on_guest(test_microvm, iperf_cmd) print(iperf_out) _, observed_kbps = _process_iperf_output(iperf_out) print("TX observed_kbps: {}".format(observed_kbps)) return observed_kbps def _get_rx_bandwidth(test_microvm, guest_ip): """Check that the rate-limited RX bandwidth is close to what we expect.""" _start_iperf_server_on_guest(test_microvm) iperf_cmd = "{} {} -c {} -t {} -f KBytes -w {} -N".format( test_microvm.netns.cmd_prefix(), IPERF_BINARY, guest_ip, IPERF_TRANSMIT_TIME, IPERF_TCP_WINDOW, ) iperf_out = _run_iperf_on_host(iperf_cmd, test_microvm) _, observed_kbps = _process_iperf_output(iperf_out) print("RX observed_kbps: {}".format(observed_kbps)) return observed_kbps def _patch_iface_bw(test_microvm, iface_id, rx_or_tx, new_bucket_size, new_refill_time): """Update the bandwidth rate limiter for a given interface. Update the `rx_or_tx` rate limiter, on interface `iface_id` to the new `bucket_size`. """ assert rx_or_tx in ["RX", "TX"] args = { "iface_id": iface_id, "{}_rate_limiter".format(rx_or_tx.lower()): { "bandwidth": {"size": new_bucket_size, "refill_time": new_refill_time} }, } test_microvm.api.network.patch(**args) def _start_iperf_server_on_guest(test_microvm): """Start iperf in server mode through an SSH connection.""" kill_cmd = f"pkill {IPERF_BINARY}" test_microvm.ssh.run(kill_cmd) iperf_cmd = f"{IPERF_BINARY} -sD -f KBytes --logfile {GUEST_IPERF_SERVER_LOG}" test_microvm.ssh.run(iperf_cmd) # Wait for the iperf to start. time.sleep(1) def _run_iperf_on_guest(test_microvm, iperf_cmd): """Run a client related iperf command through an SSH connection.""" return test_microvm.ssh.check_output(iperf_cmd).stdout def _start_iperf_server_on_host(netns_cmd_prefix): """Start iperf in server mode after killing any leftover iperf daemon.""" kill_cmd = f"pkill {IPERF_BINARY}" utils.run_cmd(kill_cmd) iperf_cmd = "{} {} -sD -f KBytes\n".format(netns_cmd_prefix, IPERF_BINARY) utils.check_output(iperf_cmd) # Wait for the iperf daemon to start. time.sleep(1) def _run_iperf_on_host(iperf_cmd, test_microvm): """Execute a client related iperf command locally.""" rc, stdout, stderr = utils.run_cmd(iperf_cmd) assert rc == 0, "stdout:\n{}\nstderr:\n{}\niperf server log:\n{}\n".format( stdout, stderr, test_microvm.ssh.check_output(f"cat {GUEST_IPERF_SERVER_LOG}").stdout, ) print(f"iperf log:\n{stdout}") return stdout def _relative_change(measured, base): """Return the percentage delta between the arguments.""" assert base != 0 return abs(measured - base) / base def _process_iperf_line(line): """Parse iperf3 summary line and return test time and bandwidth.""" test_time = line.split(" ")[2].split("-")[1].strip().split(" ")[0] test_bw = line.split(" ")[5].split(" ")[0].strip() return float(test_time), float(test_bw) def _process_iperf_output(iperf_out): """Parse iperf3 output and return average test time and bandwidth.""" iperf_out_lines = iperf_out.splitlines() send_time = send_bw = rcv_time = rcv_bw = None for line in iperf_out_lines: if line.find("sender") != -1: send_time, send_bw = _process_iperf_line(line) if line.find("receiver") != -1: rcv_time, rcv_bw = _process_iperf_line(line) iperf_out_time = (send_time + rcv_time) / 2.0 iperf_out_bw = (send_bw + rcv_bw) / 2.0 return float(iperf_out_time), float(iperf_out_bw) ================================================ FILE: tests/integration_tests/performance/test_snapshot.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Performance benchmark for snapshot restore.""" import re import signal import tempfile import time from dataclasses import dataclass from functools import lru_cache import pytest import host_tools.drive as drive_tools from framework.microvm import HugePagesConfig, Microvm, SnapshotType USEC_IN_MSEC = 1000 NS_IN_MSEC = 1_000_000 ITERATIONS = 30 @lru_cache def get_scratch_drives(): """Create an array of scratch disks.""" scratchdisks = ["vdb", "vdc", "vdd", "vde"] return [ (drive, drive_tools.FilesystemFile(tempfile.mktemp(), size=64)) for drive in scratchdisks ] @dataclass class SnapshotRestoreTest: """Dataclass encapsulating properties of snapshot restore tests""" vcpus: int = 1 mem: int = 128 nets: int = 3 blocks: int = 3 all_devices: bool = False huge_pages: HugePagesConfig = HugePagesConfig.NONE @property def id(self): """Computes a unique id for this test instance""" return "all_dev" if self.all_devices else f"{self.vcpus}vcpu_{self.mem}mb" def boot_vm(self, microvm_factory, guest_kernel, rootfs, pci_enabled) -> Microvm: """Creates the initial snapshot that will be loaded repeatedly to sample latencies""" vm = microvm_factory.build( guest_kernel, rootfs, monitor_memory=False, pci=pci_enabled, ) vm.spawn(log_level="Info", emit_metrics=True) vm.time_api_requests = False vm.basic_config( vcpu_count=self.vcpus, mem_size_mib=self.mem, rootfs_io_engine="Sync", huge_pages=self.huge_pages, ) for _ in range(self.nets): vm.add_net_iface() if self.blocks > 1: scratch_drives = get_scratch_drives() for name, diskfile in scratch_drives[: (self.blocks - 1)]: vm.add_drive(name, diskfile.path, io_engine="Sync") if self.all_devices: vm.api.balloon.put( amount_mib=0, deflate_on_oom=True, stats_polling_interval_s=1 ) vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/v.sock") vm.start() return vm @pytest.mark.nonci @pytest.mark.parametrize( "test_setup", [ SnapshotRestoreTest(mem=128, vcpus=1), SnapshotRestoreTest(mem=1024, vcpus=1), SnapshotRestoreTest(mem=2048, vcpus=2), SnapshotRestoreTest(mem=4096, vcpus=3), SnapshotRestoreTest(mem=6144, vcpus=4), SnapshotRestoreTest(mem=8192, vcpus=5), SnapshotRestoreTest(mem=10240, vcpus=6), SnapshotRestoreTest(mem=12288, vcpus=7), SnapshotRestoreTest(all_devices=True), ], ids=lambda x: x.id, ) def test_restore_latency( microvm_factory, guest_kernel_linux_5_10, rootfs, pci_enabled, test_setup, metrics ): """ Restores snapshots with vcpu/memory configuration, roughly scaling according to mem = (vcpus - 1) * 2048MB, which resembles firecracker production setups. Also contains a test case for restoring a snapshot will all devices attached to it. We only test a single guest kernel, as the guest kernel does not "participate" in snapshot restore. """ vm = test_setup.boot_vm( microvm_factory, guest_kernel_linux_5_10, rootfs, pci_enabled ) metrics.set_dimensions( { "net_devices": str(test_setup.nets), "block_devices": str(test_setup.blocks), "vsock_devices": str(int(test_setup.all_devices)), "balloon_devices": str(int(test_setup.all_devices)), "huge_pages_config": str(test_setup.huge_pages), "performance_test": "test_restore_latency", "uffd_handler": "None", **vm.dimensions, } ) snapshot = vm.snapshot_full() vm.kill() for microvm in microvm_factory.build_n_from_snapshot( snapshot, ITERATIONS, no_netns_reuse=True ): value = 0 # Parse all metric data points in search of load_snapshot time. microvm.flush_metrics() for data_point in microvm.get_all_metrics(): cur_value = data_point["latencies_us"]["load_snapshot"] if cur_value > 0: value = cur_value / USEC_IN_MSEC break assert value > 0 metrics.put_metric("latency", value, "Milliseconds") # When using the fault-all handler, all guest memory will be faulted in way before the helper tool # wakes up, because it gets faulted in on the first page fault. In this scenario, we are not measuring UFFD # latencies, but KVM latencies of setting up missing EPT entries. @pytest.mark.nonci @pytest.mark.parametrize("uffd_handler", [None, "on_demand", "fault_all"]) @pytest.mark.parametrize("huge_pages", HugePagesConfig) def test_post_restore_latency( microvm_factory, rootfs, guest_kernel_linux_5_10, pci_enabled, metrics, uffd_handler, huge_pages, ): """Collects latency metric of post-restore memory accesses done inside the guest""" if huge_pages != HugePagesConfig.NONE and uffd_handler is None: pytest.skip("huge page snapshots can only be restored using uffd") test_setup = SnapshotRestoreTest(mem=1024, vcpus=2, huge_pages=huge_pages) vm = test_setup.boot_vm( microvm_factory, guest_kernel_linux_5_10, rootfs, pci_enabled ) metrics.set_dimensions( { "net_devices": str(test_setup.nets), "block_devices": str(test_setup.blocks), "vsock_devices": str(int(test_setup.all_devices)), "balloon_devices": str(int(test_setup.all_devices)), "huge_pages_config": str(test_setup.huge_pages), "performance_test": "test_post_restore_latency", "uffd_handler": str(uffd_handler), **vm.dimensions, } ) vm.ssh.check_output( "nohup /usr/local/bin/fast_page_fault_helper >/dev/null 2>&1 steal_before ), f"Steal time did not increase as expected. Before: {steal_before}, After: {steal_after}" def test_pvtime_snapshot(uvm_plain, microvm_factory): """ Test that PVTime steal time is preserved across snapshot/restore and continues increasing post-resume. """ vm = uvm_plain vm.spawn() vm.basic_config() vm.add_net_iface() vm.start() vm.pin_vcpu(0, 0) vm.pin_vcpu(1, 0) hog_cmd = "nohup bash -c 'while true; do :; done' >/dev/null 2>&1 &" vm.ssh.run(hog_cmd) vm.ssh.run(hog_cmd) # Snapshot pre-steal time steal_before = get_steal_time_ms(vm) snapshot = vm.snapshot_full() vm.kill() # Restore microVM from snapshot and resume restored_vm = microvm_factory.build() restored_vm.spawn() restored_vm.restore_from_snapshot(snapshot, resume=False) snapshot.delete() restored_vm.pin_vcpu(0, 0) restored_vm.pin_vcpu(1, 0) restored_vm.resume() # Steal time just after restoring steal_after_snap = get_steal_time_ms(restored_vm) # Ensure steal time persisted tolerance = 10000 # 10.0 seconds tolerance for persistence check persisted = ( steal_before < steal_after_snap and steal_after_snap - steal_before < tolerance ) assert persisted, "Steal time did not persist through snapshot" time.sleep(2) # Steal time after running resumed VM steal_after_resume = get_steal_time_ms(restored_vm) # Ensure steal time continued increasing assert ( steal_after_resume > steal_after_snap ), "Steal time failed to increase after resume" ================================================ FILE: tests/integration_tests/performance/test_vhost_user_metrics.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests to collect Firecracker metrics for vhost-user devices.""" import time import pytest import host_tools.drive as drive_tools @pytest.mark.parametrize("vcpu_count", [1, 2], ids=["1vcpu", "2vcpu"]) def test_vhost_user_block_metrics(uvm_plain_acpi, vcpu_count, metrics): """ This test tries to boot a VM with vhost-user-block as a scratch device, resize the vhost-user scratch drive to have config change notifications, collects and then uploads the related vhost-user FirecrackerMetrics to Cloudwatch. Having vhost-user as root device vs a scratch should not impact metrics, however, we choose to have it as a scratch device because we are interested in config change metrics which we cannot extract when vhost-user is root device (read only rootfs won't have a config change). """ orig_size = 10 # MB # Picked from test_config_change assuming that the intention is to change size from # low->high->low->high and so the numbers are not in monotonic sequence. new_sizes = [20, 10, 30] # MB vm = uvm_plain_acpi vm.spawn(log_level="Info") vm.basic_config(vcpu_count=vcpu_count) vm.add_net_iface() # Add a block device to test resizing. fs = drive_tools.FilesystemFile(size=orig_size) vm.add_vhost_user_drive("scratch", fs.path) vm.start() # vhost-user-block is activated during boot but it takes a while so we wait. # 300msec picked by the limited number of experiments tried to see how long # it takes to get the activate_time_us metrics. time.sleep(0.3) metrics.set_dimensions( { "performance_test": "vhost_user_block_metrics", "io_engine": "vhost-user", **vm.dimensions, } ) fc_metrics = vm.flush_metrics() assert 0 == fc_metrics["vhost_user_block_scratch"]["activate_fails"] assert fc_metrics["vhost_user_block_scratch"]["init_time_us"] assert fc_metrics["vhost_user_block_scratch"]["activate_time_us"] metrics.put_metric( "init_time_us", fc_metrics["vhost_user_block_scratch"]["init_time_us"], unit="Microseconds", ) metrics.put_metric( "activate_time_us", fc_metrics["vhost_user_block_scratch"]["activate_time_us"], unit="Microseconds", ) for new_size in new_sizes: # Instruct the backend to resize the device. # It will both resize the file and update its device config. vm.disks_vhost_user["scratch"].resize(new_size) # Instruct Firecracker to reread device config and notify # the guest of a config change. vm.patch_drive("scratch") fc_metrics = vm.flush_metrics() assert 0 == fc_metrics["vhost_user_block_scratch"]["cfg_fails"] assert fc_metrics["vhost_user_block_scratch"]["config_change_time_us"] metrics.put_metric( "config_change_time_us", fc_metrics["vhost_user_block_scratch"]["config_change_time_us"], unit="Microseconds", ) ================================================ FILE: tests/integration_tests/performance/test_vsock.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests the VSOCK throughput of Firecracker uVMs.""" import json import os from pathlib import Path import pytest from framework.utils_iperf import IPerf3Test, emit_iperf3_metrics from framework.utils_vsock import VSOCK_UDS_PATH, make_host_port_path class VsockIPerf3Test(IPerf3Test): """IPerf3 runner for the vsock throughput performance test""" BASE_PORT = 5201 # How many clients/servers should be spawned per vcpu LOAD_FACTOR = 1 # Time (in seconds) for which iperf "warms up" WARMUP_SEC = 3 # Time (in seconds) for which iperf runs after warmup is done RUNTIME_SEC = 20 # VM guest memory size GUEST_MEM_MIB = 1024 def __init__(self, microvm, mode, payload_length): super().__init__( microvm, self.BASE_PORT, self.RUNTIME_SEC, self.WARMUP_SEC, mode, self.LOAD_FACTOR * microvm.vcpus_count, 2, iperf="/usr/local/bin/iperf3-vsock", payload_length=payload_length, ) # The rootfs does not have iperf3-vsock iperf3_guest = "/tmp/iperf3-vsock" self._microvm.ssh.scp_put(self._iperf, iperf3_guest) self._guest_iperf = iperf3_guest def host_command(self, port_offset): return ( super() .host_command(port_offset) .with_arg("--vsock") .with_arg("-B", os.path.join(self._microvm.path, VSOCK_UDS_PATH)) ) def spawn_iperf3_client(self, client_idx, client_mode_flag): # Bind the UDS in the jailer's root. self._microvm.create_jailed_resource( os.path.join( self._microvm.path, make_host_port_path(VSOCK_UDS_PATH, self._base_port + client_idx), ) ) return super().spawn_iperf3_client(client_idx, client_mode_flag) def guest_command(self, port_offset): return super().guest_command(port_offset).with_arg("--vsock") @pytest.mark.timeout(120) @pytest.mark.nonci @pytest.mark.parametrize("vcpus", [1, 2], ids=["1vcpu", "2vcpu"]) @pytest.mark.parametrize("payload_length", ["64K", "1024K"], ids=["p64K", "p1024K"]) @pytest.mark.parametrize("mode", ["g2h", "h2g"]) def test_vsock_throughput( uvm_plain_acpi, vcpus, payload_length, mode, metrics, results_dir, ): """ Test vsock throughput for multiple vm configurations. """ mem_size_mib = 1024 vm = uvm_plain_acpi vm.spawn(log_level="Info", emit_metrics=True) vm.basic_config(vcpu_count=vcpus, mem_size_mib=mem_size_mib) vm.add_net_iface() # Create a vsock device vm.api.vsock.put(vsock_id="vsock0", guest_cid=3, uds_path="/" + VSOCK_UDS_PATH) vm.start() metrics.set_dimensions( { "performance_test": "test_vsock_throughput", "payload_length": payload_length, "mode": mode, **vm.dimensions, } ) vm.pin_threads(0) test = VsockIPerf3Test(vm, mode, payload_length) data = test.run_test(vm.vcpus_count + 2) for i, g2h in enumerate(data["g2h"]): Path(results_dir / f"g2h_{i}.json").write_text( json.dumps(g2h), encoding="utf-8" ) for i, h2g in enumerate(data["h2g"]): Path(results_dir / f"h2g_{i}.json").write_text( json.dumps(h2g), encoding="utf-8" ) emit_iperf3_metrics(metrics, data, VsockIPerf3Test.WARMUP_SEC) ================================================ FILE: tests/integration_tests/security/__init__.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: tests/integration_tests/security/conftest.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Fixtures for security tests""" import json from pathlib import Path import pytest from host_tools.cargo_build import run_seccompiler_bin @pytest.fixture() def seccompiler(tmp_path): "A seccompiler helper fixture" class Seccompiler: "A seccompiler helper class" def compile(self, data: dict, basic=False, split_output=False) -> Path: "Use seccompiler-bin to compile a filter from a dict" inp = tmp_path / "input.json" inp.write_text(json.dumps(data)) bpf = tmp_path / "output.bpfmap" run_seccompiler_bin( bpf_path=bpf, json_path=inp, basic=basic, split_output=split_output ) return bpf return Seccompiler() ================================================ FILE: tests/integration_tests/security/test_custom_seccomp.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests that the --seccomp-filter parameter works as expected.""" import platform import time from pathlib import Path from framework import utils def install_filter(microvm, bpf_path): """Install seccomp filter in microvm.""" microvm.create_jailed_resource(bpf_path) microvm.jailer.extra_args.update({"seccomp-filter": bpf_path.name}) def test_allow_all(uvm_plain, seccompiler): """Test --seccomp-filter, allowing all syscalls.""" seccomp_filter = { thread: {"default_action": "allow", "filter_action": "trap", "filter": []} for thread in ["vmm", "api", "vcpu"] } bpf_path = seccompiler.compile(seccomp_filter) test_microvm = uvm_plain install_filter(test_microvm, bpf_path) test_microvm.spawn() test_microvm.basic_config() test_microvm.start() utils.assert_seccomp_level(test_microvm.firecracker_pid, "2") def test_working_filter(uvm_plain, seccompiler): """Test --seccomp-filter, rejecting some dangerous syscalls.""" seccomp_filter = { thread: { "default_action": "allow", "filter_action": "kill_process", "filter": [{"syscall": "clone"}, {"syscall": "execve"}], } for thread in ["vmm", "api", "vcpu"] } bpf_path = seccompiler.compile(seccomp_filter) test_microvm = uvm_plain install_filter(test_microvm, bpf_path) test_microvm.spawn() test_microvm.basic_config() test_microvm.start() # level should be 2, with no additional errors utils.assert_seccomp_level(test_microvm.firecracker_pid, "2") def test_failing_filter(uvm_plain, seccompiler): """Test --seccomp-filter, denying some needed syscalls.""" seccomp_filter = { "vmm": {"default_action": "allow", "filter_action": "trap", "filter": []}, "api": {"default_action": "allow", "filter_action": "trap", "filter": []}, "vcpu": { "default_action": "allow", "filter_action": "trap", "filter": [{"syscall": "ioctl"}], }, } bpf_path = seccompiler.compile(seccomp_filter) test_microvm = uvm_plain install_filter(test_microvm, bpf_path) test_microvm.spawn() test_microvm.basic_config(vcpu_count=1) # Try to start the VM with error checking off, because it will fail. # pylint: disable=bare-except try: test_microvm.start() except: pass # Give time for the process to get killed time.sleep(1) # Check the logger output ioctl_num = 16 if platform.machine() == "x86_64" else 29 test_microvm.check_log_message( f"Shutting down VM after intercepting a bad syscall ({ioctl_num})" ) # Check the metrics datapoints = test_microvm.get_metrics() num_faults = 0 for datapoint in datapoints: num_faults += datapoint["seccomp"]["num_faults"] # exit early to avoid potentially broken JSON entries in the logs if num_faults > 0: break assert num_faults == 1 test_microvm.mark_killed() def test_invalid_bpf(uvm_plain): """Test that FC does not start, given an invalid binary filter.""" test_microvm = uvm_plain # Configure VM from JSON. Otherwise, the test will error because # the process will be killed before configuring the API socket. test_microvm.create_jailed_resource(test_microvm.kernel_file) test_microvm.create_jailed_resource(test_microvm.rootfs_file) vm_config_file = Path("framework/vm_config.json") test_microvm.create_jailed_resource(vm_config_file) test_microvm.jailer.extra_args = {"config-file": vm_config_file.name} test_microvm.jailer.extra_args.update({"no-api": None}) bpf_path = Path(test_microvm.path) / "bpf.out" bpf_path.write_bytes(b"Invalid BPF!") test_microvm.create_jailed_resource(bpf_path) test_microvm.jailer.extra_args.update({"seccomp-filter": bpf_path.name}) test_microvm.spawn(serial_out_path=None) # give time for the process to get killed time.sleep(1) assert "Seccomp error: Filter deserialization failed" in test_microvm.log_data test_microvm.mark_killed() ================================================ FILE: tests/integration_tests/security/test_jail.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests that verify the jailer's behavior.""" import http.client as http_client import os import resource import stat import subprocess import time from pathlib import Path import pytest import requests import urllib3 from framework.defs import FC_BINARY_NAME from framework.jailer import JailerContext # These are the permissions that all files/dirs inside the jailer have. REG_PERMS = ( stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH ) DIR_STATS = stat.S_IFDIR | stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR FILE_STATS = stat.S_IFREG | REG_PERMS SOCK_STATS = stat.S_IFSOCK | REG_PERMS # These are the stats of the devices created by tha jailer. CHAR_STATS = stat.S_IFCHR | stat.S_IRUSR | stat.S_IWUSR # Limit on file size in bytes. FSIZE = 2097151 # Limit on number of file descriptors. NOFILE = 1024 # Resource limits to be set by the jailer. RESOURCE_LIMITS = [ "no-file={}".format(NOFILE), "fsize={}".format(FSIZE), ] def check_stats(filepath, stats, uid, gid): """Assert on uid, gid and expected stats for the given path.""" st = os.stat(filepath) assert st.st_gid == gid assert st.st_uid == uid assert st.st_mode ^ stats == 0 def test_empty_jailer_id(uvm_plain): """ Test that the jailer ID cannot be empty. """ test_microvm = uvm_plain # Set the jailer ID to None. test_microvm.jailer = JailerContext( jailer_id="", exec_file=test_microvm.fc_binary_path, ) # If the exception is not thrown, it means that Firecracker was # started successfully, hence there's a bug in the code due to which # we can set an empty ID. with pytest.raises( ChildProcessError, match=r"Invalid instance ID: Invalid len \(0\); the length must be between 1 and 64", ): test_microvm.spawn() def test_exec_file_not_exist(uvm_plain, tmp_path): """ Test the jailer option `--exec-file` """ test_microvm = uvm_plain # Error case 1: No such file exists pseudo_exec_file_path = tmp_path / "pseudo_firecracker_exec_file" fc_dir = Path("/srv/jailer") / pseudo_exec_file_path.name / test_microvm.id fc_dir.mkdir(parents=True, exist_ok=True) test_microvm.jailer.exec_file = pseudo_exec_file_path with pytest.raises( Exception, match=rf"Failed to canonicalize path {pseudo_exec_file_path}:" rf" No such file or directory \(os error 2\)", ): test_microvm.spawn() # Error case 2: Not a file pseudo_exec_dir_path = tmp_path / "firecracker_test_dir" pseudo_exec_dir_path.mkdir() fc_dir = Path("/srv/jailer") / pseudo_exec_dir_path.name / test_microvm.id fc_dir.mkdir(parents=True, exist_ok=True) test_microvm.jailer.exec_file = pseudo_exec_dir_path with pytest.raises( Exception, match=rf"{pseudo_exec_dir_path} is not a file", ): test_microvm.spawn() def test_exec_destination_path_is_symlink(uvm_plain): """ Test the jailer correctly refuses to copy binary into symlink """ test_microvm = uvm_plain firecracker_root_dir = Path(test_microvm.chroot()) firecracker_bin_path = firecracker_root_dir / "firecracker" dummy_path = Path("/srv/dummy") dummy_path.unlink(missing_ok=True) dummy_path.touch() firecracker_bin_path.symlink_to(dummy_path) with pytest.raises( Exception, match=f"Failed to open {firecracker_bin_path}", ): test_microvm.spawn() def test_exec_destination_path_is_hardlink(uvm_plain): """ Test the jailer correctly refuses to copy binary into hardlink """ test_microvm = uvm_plain firecracker_root_dir = Path(test_microvm.chroot()) firecracker_bin_path = firecracker_root_dir / "firecracker" dummy_path = Path("/srv/dummy") dummy_path.unlink(missing_ok=True) dummy_path.touch() firecracker_bin_path.hardlink_to(dummy_path) with pytest.raises( Exception, match=f"Detected hard link at: {firecracker_bin_path}", ): test_microvm.spawn() def test_default_chroot_hierarchy(uvm_plain): """ Test the folder hierarchy created by default by the jailer. """ test_microvm = uvm_plain test_microvm.spawn() # We do checks for all the things inside the chroot that the jailer crates # by default. check_stats( test_microvm.jailer.chroot_path(), DIR_STATS, test_microvm.jailer.uid, test_microvm.jailer.gid, ) check_stats( os.path.join(test_microvm.jailer.chroot_path(), "dev"), DIR_STATS, test_microvm.jailer.uid, test_microvm.jailer.gid, ) check_stats( os.path.join(test_microvm.jailer.chroot_path(), "dev/net"), DIR_STATS, test_microvm.jailer.uid, test_microvm.jailer.gid, ) check_stats( os.path.join(test_microvm.jailer.chroot_path(), "run"), DIR_STATS, test_microvm.jailer.uid, test_microvm.jailer.gid, ) check_stats( os.path.join(test_microvm.jailer.chroot_path(), "dev/net/tun"), CHAR_STATS, test_microvm.jailer.uid, test_microvm.jailer.gid, ) check_stats( os.path.join(test_microvm.jailer.chroot_path(), "dev/kvm"), CHAR_STATS, test_microvm.jailer.uid, test_microvm.jailer.gid, ) check_stats( os.path.join(test_microvm.jailer.chroot_path(), "firecracker"), FILE_STATS, test_microvm.jailer.uid, test_microvm.jailer.gid, ) def test_arbitrary_usocket_location(uvm_plain): """ Test arbitrary location scenario for the api socket. """ test_microvm = uvm_plain test_microvm.jailer.extra_args = {"api-sock": "api.socket"} test_microvm.spawn(serial_out_path=None) check_stats( os.path.join(test_microvm.jailer.chroot_path(), "api.socket"), SOCK_STATS, test_microvm.jailer.uid, test_microvm.jailer.gid, ) class Cgroups: """Helper class to work with cgroups""" def __init__(self): self.root = Path("/sys/fs/cgroup") self.version = 2 # https://rootlesscontaine.rs/getting-started/common/cgroup2/#checking-whether-cgroup-v2-is-already-enabled if not self.root.joinpath("cgroup.controllers").exists(): self.version = 1 def new_cgroup(self, cgname): """Create a new cgroup""" self.root.joinpath(cgname).mkdir(parents=True, exist_ok=True) def move_pid(self, cgname, pid): """Move a PID to a cgroup""" cg_pids = self.root.joinpath(f"{cgname}/cgroup.procs") cg_pids.write_text(f"{pid}\n", encoding="ascii") def enable_controller_in_subtree(self, cgname, controller): """Enable a controller in subtree_control of a cgroup and its ancestors""" # Enable the controller in all ancestors if not already enabled. parent_cg = self.root.joinpath(cgname).parent parent_subtree_control = parent_cg.joinpath("cgroup.subtree_control") if controller not in parent_subtree_control.read_text(encoding="ascii"): self.enable_controller_in_subtree( parent_cg.relative_to(self.root), controller ) subtree_control = self.root.joinpath(f"{cgname}/cgroup.subtree_control") subtree_control.write_text(f"+{controller}", encoding="ascii") assert controller in subtree_control.read_text(encoding="ascii") @pytest.fixture(scope="session", autouse=True) def cgroups_info(): """Return a fixture with the cgroups available in the system""" return Cgroups() def check_cgroups_v1(cgroups, jailer_id, parent_cgroup=FC_BINARY_NAME): """Assert that every cgroupv1 in cgroups is correctly set.""" # We assume sysfs cgroups are mounted here. cgroup_location = "/sys/fs/cgroup" assert os.path.isdir(cgroup_location) for cgroup in cgroups: controller = cgroup.split(".")[0] file_name, value = cgroup.split("=") location = cgroup_location + "/{}/{}/{}/".format( controller, parent_cgroup, jailer_id ) tasks_file = location + "tasks" file = location + file_name assert open(file, "r", encoding="utf-8").readline().strip() == value assert open(tasks_file, "r", encoding="utf-8").readline().strip().isdigit() def check_cgroups_v2(vm): """Assert that every cgroupv2 in cgroups is correctly set.""" cg = Cgroups() assert cg.root.is_dir() parent_cgroup = vm.jailer.parent_cgroup if parent_cgroup is None: parent_cgroup = FC_BINARY_NAME cg_parent = cg.root / parent_cgroup cg_jail = cg_parent / vm.jailer.jailer_id assert len(vm.jailer.cgroups) > 0 for cgroup in vm.jailer.cgroups: controller = cgroup.split(".")[0] file_name, value = cgroup.split("=") procs = cg_jail.joinpath("cgroup.procs").read_text().splitlines() file = cg_jail / file_name assert file.read_text().strip() == value assert all(x.isnumeric() for x in procs) assert str(vm.firecracker_pid) in procs for cgroup in [cg.root, cg_parent, cg_jail]: assert controller in cgroup.joinpath("cgroup.controllers").read_text( encoding="ascii" ) # don't check since there are no children cgroups if cgroup == cg_jail: continue assert controller in cgroup.joinpath("cgroup.subtree_control").read_text( encoding="ascii" ) def get_cpus(node): """Retrieve CPUs from NUMA node.""" sys_node = "/sys/devices/system/node/node" + str(node) assert os.path.isdir(sys_node) node_cpus_path = sys_node + "/cpulist" return open(node_cpus_path, "r", encoding="utf-8").readline().strip() def check_limits(pid, no_file, fsize): """Verify resource limits against expected values.""" # Fetch firecracker process limits for number of open fds (soft, hard) = resource.prlimit(pid, resource.RLIMIT_NOFILE) assert soft == no_file assert hard == no_file # Fetch firecracker process limits for maximum file size (soft, hard) = resource.prlimit(pid, resource.RLIMIT_FSIZE) assert soft == fsize assert hard == fsize def test_cgroups(uvm_plain, cgroups_info): """ Test the cgroups are correctly set by the jailer. """ test_microvm = uvm_plain test_microvm.jailer.cgroup_ver = cgroups_info.version if test_microvm.jailer.cgroup_ver == 2: test_microvm.jailer.cgroups = ["cpu.weight.nice=10"] else: test_microvm.jailer.cgroups = ["cpu.shares=2", "cpu.cfs_period_us=200000"] # Retrieve CPUs from NUMA node 0. node_cpus = get_cpus(0) # Appending the cgroups for numa node 0. test_microvm.jailer.cgroups = test_microvm.jailer.cgroups + [ "cpuset.mems=0", "cpuset.cpus={}".format(node_cpus), ] test_microvm.spawn() if test_microvm.jailer.cgroup_ver == 1: check_cgroups_v1(test_microvm.jailer.cgroups, test_microvm.jailer.jailer_id) else: check_cgroups_v2(test_microvm) def test_cgroups_custom_parent(uvm_plain, cgroups_info): """ Test cgroups when a custom parent cgroup is used. """ test_microvm = uvm_plain test_microvm.jailer.cgroup_ver = cgroups_info.version test_microvm.jailer.parent_cgroup = "custom_cgroup/group2" if test_microvm.jailer.cgroup_ver == 2: test_microvm.jailer.cgroups = ["cpu.weight=2"] else: test_microvm.jailer.cgroups = ["cpu.shares=2", "cpu.cfs_period_us=200000"] # Retrieve CPUs from NUMA node 0. node_cpus = get_cpus(0) test_microvm.jailer.cgroups = test_microvm.jailer.cgroups + [ "cpuset.mems=0", "cpuset.cpus={}".format(node_cpus), ] test_microvm.spawn() if test_microvm.jailer.cgroup_ver == 1: check_cgroups_v1( test_microvm.jailer.cgroups, test_microvm.jailer.jailer_id, test_microvm.jailer.parent_cgroup, ) else: check_cgroups_v2(test_microvm) def test_node_cgroups(uvm_plain, cgroups_info): """ Test the numa node cgroups are correctly set by the jailer. """ test_microvm = uvm_plain test_microvm.jailer.cgroup_ver = cgroups_info.version # Retrieve CPUs from NUMA node 0. node_cpus = get_cpus(0) # Appending the cgroups for numa node 0 test_microvm.jailer.cgroups = ["cpuset.mems=0", "cpuset.cpus={}".format(node_cpus)] test_microvm.spawn() if test_microvm.jailer.cgroup_ver == 1: check_cgroups_v1(test_microvm.jailer.cgroups, test_microvm.jailer.jailer_id) else: check_cgroups_v2(test_microvm) def test_cgroups_without_numa(uvm_plain, cgroups_info): """ Test the cgroups are correctly set by the jailer, without numa assignment. """ test_microvm = uvm_plain test_microvm.jailer.cgroup_ver = cgroups_info.version if test_microvm.jailer.cgroup_ver == 2: test_microvm.jailer.cgroups = ["cpu.weight=2"] else: test_microvm.jailer.cgroups = ["cpu.shares=2", "cpu.cfs_period_us=200000"] test_microvm.spawn() if test_microvm.jailer.cgroup_ver == 1: check_cgroups_v1(test_microvm.jailer.cgroups, test_microvm.jailer.jailer_id) else: check_cgroups_v2(test_microvm) def test_v1_default_cgroups(uvm_plain, cgroups_info): """ Test if the jailer is using cgroup-v1 by default. """ if cgroups_info.version != 1: pytest.skip(reason="Requires system with cgroup-v1 enabled.") test_microvm = uvm_plain test_microvm.jailer.cgroups = ["cpu.shares=2"] test_microvm.spawn() check_cgroups_v1(test_microvm.jailer.cgroups, test_microvm.jailer.jailer_id) @pytest.mark.parametrize( "parent_exists,domain_controller_in_subtree", [(True, False), (True, True), (False, None)], ) def test_cgroups_parent_cgroup_but_no_cgroup( uvm_plain, cgroups_info, parent_exists, domain_controller_in_subtree ): """ Test cgroups when `--parent-cgroup` is used but no `--cgroup` are specified. If the cgroup specified with `--parent-cgroup` exists, the jailer should move to the specified cgroup instead of creating a new cgroup under it. However, if the specified cgroup has domain controllers (e.g. `memory`) enabled in `cgroup.subtree_control`, the move should fail. If the specified cgroup does not exist, the jailer does not move the process to any cgroup and proceeds without error. """ if cgroups_info.version != 2: pytest.skip("cgroupsv2 only") test_microvm = uvm_plain test_microvm.jailer.cgroup_ver = cgroups_info.version # Make it somewhat unique so it doesn't conflict with other test runs parent_cgroup = f"custom_cgroup/{test_microvm.id[:8]}" test_microvm.jailer.parent_cgroup = parent_cgroup if parent_exists: # Create the parent cgroup. cgroups_info.new_cgroup(parent_cgroup) if domain_controller_in_subtree: # Enable "memory" controller in cgroup.subtree_control of the parent. cgroups_info.enable_controller_in_subtree(parent_cgroup, "memory") # Check no --cgroups are specified just in case. assert len(test_microvm.jailer.cgroups) == 0 cg_parent = cgroups_info.root / parent_cgroup if parent_exists: if domain_controller_in_subtree: # The jailer should have failed to move to the `parent_cgroup` # since it has domain controllers enabled in # `cgroup.subtree_control` due to the no internal process # constraint. # https://docs.kernel.org/admin-guide/cgroup-v2.html#no-internal-process-constraint with pytest.raises( ChildProcessError, match=( rf"Failed to move process to cgroup \({cg_parent}\): " r"Resource busy \(os error 16\)" ), ): test_microvm.spawn() else: # The jailer should have moved to the `parent_cgroup` instead of # creating a new cgroup under it and move to the new cgroup. test_microvm.spawn() procs = cg_parent.joinpath("cgroup.procs").read_text().splitlines() assert str(test_microvm.firecracker_pid) in procs else: # The jailer should not have moved to any cgroup and the parent # still does not exist. test_microvm.spawn() assert not cg_parent.exists() def test_args_default_resource_limits(uvm_plain): """ Test the default resource limits are correctly set by the jailer. """ test_microvm = uvm_plain test_microvm.spawn() # Get firecracker's PID pid = test_microvm.firecracker_pid assert pid != 0 # Fetch firecracker process limits for number of open fds (soft, hard) = resource.prlimit(pid, resource.RLIMIT_NOFILE) # Check that the default limit was set. assert soft == 2048 assert hard == 2048 # Fetch firecracker process limits for number of open fds (soft, hard) = resource.prlimit(pid, resource.RLIMIT_FSIZE) # Check that no limit was set assert soft == -1 assert hard == -1 def test_args_resource_limits(uvm_plain): """ Test the resource limits are correctly set by the jailer. """ test_microvm = uvm_plain test_microvm.jailer.resource_limits = RESOURCE_LIMITS test_microvm.spawn() # Get firecracker's PID pid = test_microvm.firecracker_pid assert pid != 0 # Check limit values were correctly set. check_limits(pid, NOFILE, FSIZE) def test_positive_file_size_limit(uvm_plain): """ Test creating vm succeeds when memory size is under `fsize` limit. """ vm_mem_size = 128 jail_limit = (vm_mem_size + 1) << 20 test_microvm = uvm_plain test_microvm.jailer.resource_limits = [f"fsize={jail_limit}"] test_microvm.spawn() test_microvm.basic_config(mem_size_mib=vm_mem_size) # Attempt to start a vm. test_microvm.start() def test_negative_file_size_limit(uvm_plain): """ Test creating snapshot file fails when size exceeds `fsize` limit. """ test_microvm = uvm_plain # limit to 1MB, to account for logs and metrics test_microvm.jailer.resource_limits = [f"fsize={2**20}"] test_microvm.spawn() test_microvm.basic_config() test_microvm.start() test_microvm.pause() # Attempt to create a snapshot. try: test_microvm.api.snapshot_create.put( mem_file_path="/vm.mem", snapshot_path="/vm.vmstate", ) except ( http_client.RemoteDisconnected, urllib3.exceptions.ProtocolError, requests.exceptions.ConnectionError, ) as _error: # Check the microVM received signal `SIGXFSZ` (25), # which corresponds to exceeding file size limit. msg = "Shutting down VM after intercepting signal 25, code 0" test_microvm.check_log_message(msg) time.sleep(1) test_microvm.mark_killed() else: assert False, "Negative test failed" def test_negative_no_file_limit(uvm_plain): """ Test microVM is killed when exceeding `no-file` limit. """ test_microvm = uvm_plain test_microvm.jailer.resource_limits = ["no-file=3"] # pylint: disable=W0703 try: test_microvm.spawn() except ChildProcessError as error: assert "No file descriptors available (os error 24)" in str(error) test_microvm.mark_killed() else: assert False, "Negative test failed" def test_new_pid_ns_resource_limits(uvm_plain): """ Test that Firecracker process inherits jailer resource limits. """ test_microvm = uvm_plain test_microvm.jailer.resource_limits = RESOURCE_LIMITS test_microvm.spawn() # Get Firecracker's PID. fc_pid = test_microvm.firecracker_pid # Check limit values were correctly set. check_limits(fc_pid, NOFILE, FSIZE) def test_new_pid_namespace(uvm_plain): """ Test that Firecracker is spawned in a new PID namespace if requested. """ test_microvm = uvm_plain test_microvm.spawn() # Check that the PID file exists. fc_pid = test_microvm.firecracker_pid # Validate the PID. stdout = subprocess.check_output("pidof firecracker", shell=True) assert str(fc_pid) in stdout.strip().decode() # Get the thread group IDs in each of the PID namespaces of which # Firecracker process is a member of. nstgid_cmd = "cat /proc/{}/status | grep NStgid".format(fc_pid) nstgid_list = ( subprocess.check_output(nstgid_cmd, shell=True) .decode("utf-8") .strip() .split("\t")[1:] ) # Check that Firecracker's PID namespace is nested. `NStgid` should # report two values and the last one should be 1, because Firecracker # becomes the init(1) process of the new PID namespace it is spawned in. assert len(nstgid_list) == 2 assert int(nstgid_list[1]) == 1 assert int(nstgid_list[0]) == fc_pid @pytest.mark.parametrize( "daemonize", [True, False], ) @pytest.mark.parametrize( "new_pid_ns", [True, False], ) def test_firecracker_kill_by_pid(uvm_plain, daemonize, new_pid_ns): """ Test that Firecracker is spawned in a new PID namespace if requested. """ microvm = uvm_plain microvm.jailer.daemonize = daemonize microvm.jailer.new_pid_ns = new_pid_ns microvm.spawn() microvm.basic_config() microvm.add_net_iface() microvm.start() # before killing microvm make sure the Jailer config is what we set it to be. assert ( microvm.jailer.daemonize == daemonize and microvm.jailer.new_pid_ns == new_pid_ns ) microvm.kill() def test_cgroupsv2_written_only_once(uvm_plain, cgroups_info): """ Test that we only write to cgroup.procs once when using CgroupsV2 Assert that the jailer doesn't perform unneccessary create_dir_all and attach_pid calls. This is a regression test for #2856 """ if cgroups_info.version != 2: pytest.skip(reason="Requires system with cgroup-v2 enabled.") uvm = uvm_plain strace_output_path = Path(uvm.path, "strace.out") strace_cmd = [ "strace", "-tt", "--syscall-times=ns", "-y", "-e", "write,mkdir,mkdirat", "-o", strace_output_path, ] uvm.add_pre_cmd(strace_cmd) parent_cgroup = "custom_cgroup/group2" uvm.jailer.cgroup_ver = cgroups_info.version uvm.jailer.parent_cgroup = parent_cgroup # create the parent so that mkdirs doesn't need to cgroups_info.new_cgroup(parent_cgroup) cgroups = { "cpuset.cpus": get_cpus(0), "cpu.weight": 2, "memory.max": 256 * 2**20, "memory.min": 1 * 2**20, } uvm.jailer.cgroups = [f"{k}={v}" for k, v in cgroups.items()] uvm.spawn() uvm.basic_config() uvm.add_net_iface() uvm.start() strace_out = strace_output_path.read_text(encoding="utf-8").splitlines() write_lines = [ line for line in strace_out if "write" in line and f"{uvm.id}/cgroup.procs" in line ] mkdir_lines = [ line for line in strace_out if "mkdir" in line and f"{parent_cgroup}/{uvm.id}" in line ] assert len(write_lines) != len(cgroups), "writes equal to number of cgroups" assert len(write_lines) == 1 assert len(mkdir_lines) != len(cgroups), "mkdir equal to number of cgroups" assert len(mkdir_lines) == 1 ================================================ FILE: tests/integration_tests/security/test_nv.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests ensuring nested virtualization is not present when using CPU templates. We have tests that ensure CPU templates provide a consistent set of features in the guest: - file:../functional/test_cpu_features.py - file:../functional/test_feat_parity.py - Commit: 681e781f999e3390b6d46422a3c7b1a7e36e1b24 These already include the absence of VMX/SVM in the guest. This test is a safety-net to make the test explicit and catch cases where we start providing the feature by mistake. """ def test_no_nested_virtualization(uvm_any_booted): """Validate that guests don't have Nested Virtualization enabled.""" uvm_any_booted.ssh.check_output("[ ! -e /dev/kvm ]") ================================================ FILE: tests/integration_tests/security/test_sec_audit.py ================================================ # Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests ensuring security vulnerabilities are not present in dependencies.""" import json import pytest from framework.ab_test import ( git_ab_test_host_command_if_pr, set_did_not_grow_comparator, ) from framework.utils import CommandReturn from framework.utils_cpuid import CpuVendor, get_cpu_vendor @pytest.mark.skipif( get_cpu_vendor() != CpuVendor.INTEL, reason="The audit is based on cargo.lock which is identical on all platforms", ) def test_cargo_audit(): """ Run cargo audit to check for crates with security vulnerabilities. """ def set_of_vulnerabilities(output: CommandReturn): output = json.loads(output.stdout) return set( frozenset(vulnerability) for vulnerability in output["vulnerabilities"]["list"] ).union( frozenset(warning) for warning_kind, warnings in output["warnings"].items() for warning in warnings ) git_ab_test_host_command_if_pr( "cargo install --locked cargo-audit && cargo audit --deny warnings -q --json", comparator=set_did_not_grow_comparator(set_of_vulnerabilities), ) ================================================ FILE: tests/integration_tests/security/test_seccomp.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests that the seccomp filters don't let denied syscalls through.""" import json import os import platform from pathlib import Path from framework import utils ARCH = platform.machine() def _get_basic_syscall_list(): """Return the JSON list of syscalls that the demo jailer needs.""" sys_list = [ "rt_sigprocmask", "rt_sigaction", "execve", "mmap", "mprotect", "set_tid_address", "read", "close", "brk", "sched_getaffinity", "sigaltstack", "munmap", "exit_group", ] if ARCH == "x86_64": sys_list += [ "arch_prctl", "readlink", "open", "poll", ] elif ARCH == "aarch64": sys_list += ["ppoll"] return sys_list def test_seccomp_ls(bin_seccomp_paths, seccompiler): """ Assert that the seccomp filter denies an unallowed syscall. """ # pylint: disable=subprocess-run-check # The fixture pattern causes a pylint false positive for that rule. # Path to the `ls` binary, which attempts to execute the forbidden # `SYS_access`. ls_command_path = "/bin/ls" demo_jailer = bin_seccomp_paths["demo_jailer"] assert os.path.exists(demo_jailer) json_filter = { "main": { "default_action": "trap", "filter_action": "allow", "filter": [{"syscall": x} for x in _get_basic_syscall_list()], } } # Run seccompiler-bin. bpf_path = seccompiler.compile(json_filter) # Run the mini jailer. outcome = utils.run_cmd([demo_jailer, ls_command_path, bpf_path], shell=False) # The seccomp filters should send SIGSYS (31) to the binary. `ls` doesn't # handle it, so it will exit with error. assert outcome.returncode != 0 def test_advanced_seccomp(bin_seccomp_paths, seccompiler): """ Test seccompiler-bin with `demo_jailer`. Test that the demo jailer (with advanced seccomp) allows the harmless demo binary, denies the malicious demo binary and that an empty allowlist denies everything. """ # pylint: disable=subprocess-run-check # The fixture pattern causes a pylint false positive for that rule. demo_jailer = bin_seccomp_paths["demo_jailer"] demo_harmless = bin_seccomp_paths["demo_harmless"] demo_malicious = bin_seccomp_paths["demo_malicious"] assert os.path.exists(demo_jailer) assert os.path.exists(demo_harmless) assert os.path.exists(demo_malicious) json_filter = { "main": { "default_action": "trap", "filter_action": "allow", "filter": [ *[{"syscall": x} for x in _get_basic_syscall_list()], { "syscall": "write", "args": [ { "index": 0, "type": "dword", "op": "eq", "val": 1, "comment": "stdout fd", }, { "index": 2, "type": "qword", "op": "eq", "val": 14, "comment": "nr of bytes", }, ], }, ], } } # Run seccompiler-bin. bpf_path = seccompiler.compile(json_filter) # Run the mini jailer for harmless binary. outcome = utils.run_cmd([demo_jailer, demo_harmless, bpf_path], shell=False) # The demo harmless binary should have terminated gracefully. assert outcome.returncode == 0 # Run the mini jailer for malicious binary. outcome = utils.run_cmd([demo_jailer, demo_malicious, bpf_path], shell=False) # The demo malicious binary should have received `SIGSYS`. assert outcome.returncode == -31 # Run seccompiler-bin with `--basic` flag. bpf_path = seccompiler.compile(json_filter, basic=True) # Run the mini jailer for malicious binary. outcome = utils.run_cmd([demo_jailer, demo_malicious, bpf_path], shell=False) # The malicious binary also terminates gracefully, since the --basic option # disables all argument checks. assert outcome.returncode == 0 # Run the mini jailer with an empty allowlist. It should trap on any # syscall. json_filter = { "main": {"default_action": "trap", "filter_action": "allow", "filter": []} } # Run seccompiler-bin. bpf_path = seccompiler.compile(json_filter) outcome = utils.run_cmd([demo_jailer, demo_harmless, bpf_path], shell=False) # The demo binary should have received `SIGSYS`. assert outcome.returncode == -31 def test_no_seccomp(uvm_plain): """ Test that Firecracker --no-seccomp installs no filter. """ test_microvm = uvm_plain test_microvm.jailer.extra_args.update({"no-seccomp": None}) test_microvm.spawn() test_microvm.basic_config() test_microvm.start() utils.assert_seccomp_level(test_microvm.firecracker_pid, "0") def test_default_seccomp_level(uvm_plain): """ Test that Firecracker installs a seccomp filter by default. """ test_microvm = uvm_plain test_microvm.spawn() test_microvm.basic_config() test_microvm.start() utils.assert_seccomp_level(test_microvm.firecracker_pid, "2") def test_seccomp_rust_panic(bin_seccomp_paths, seccompiler): """ Test seccompiler-bin with `demo_panic`. Test that the Firecracker filters allow a Rust panic to run its course without triggering a seccomp violation. """ # pylint: disable=subprocess-run-check # The fixture pattern causes a pylint false positive for that rule. demo_panic = bin_seccomp_paths["demo_panic"] assert os.path.exists(demo_panic) fc_filters = Path(f"../resources/seccomp/{ARCH}-unknown-linux-musl.json") fc_filters_data = json.loads(fc_filters.read_text(encoding="ascii")) filter_threads = list(fc_filters_data) bpf_path = seccompiler.compile(fc_filters_data) # Run the panic binary with all filters. for thread in filter_threads: code, _, _ = utils.run_cmd([demo_panic, str(bpf_path), thread], shell=False) # The demo panic binary should have terminated with SIGABRT # and not with a seccomp violation. # On a seccomp violation, the program exits with code -31 for # SIGSYS. Here, we make sure the program exits with -6, which # is for SIGABRT. assert ( code == -6 ), f"Panic binary failed with exit code {code} on {thread} filters." ================================================ FILE: tests/integration_tests/security/test_seccomp_validate.py ================================================ # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Test that validates that seccompiler filters work as expected""" import json import platform import resource from pathlib import Path import pytest import seccomp from framework import utils ARCH = platform.machine() @pytest.fixture def bin_test_syscall(tmp_path): """Build the test_syscall binary.""" test_syscall_bin = tmp_path / "test_syscall" compile_cmd = f"musl-gcc -static host_tools/test_syscalls.c -o {test_syscall_bin}" utils.check_output(compile_cmd) assert test_syscall_bin.exists() yield test_syscall_bin.resolve() def test_validate_filter(seccompiler, bin_test_syscall, monkeypatch, tmp_path): """Assert that the seccomp filter matches the JSON description.""" fc_filter_path = Path(f"../resources/seccomp/{ARCH}-unknown-linux-musl.json") fc_filter = json.loads(fc_filter_path.read_text(encoding="ascii")) # cd to a tmp dir because we may generate a bunch of intermediate files monkeypatch.chdir(tmp_path) # prevent coredumps resource.setrlimit(resource.RLIMIT_CORE, (0, 0)) seccompiler.compile(fc_filter, split_output=True) # With split_output=True, individual .bpf files are created for each thread arch = seccomp.Arch.X86_64 if ARCH == "x86_64" else seccomp.Arch.AARCH64 for thread, filter_data in fc_filter.items(): filter_path = Path(f"{thread}.bpf") # The individual files should already exist from the split output assert ( filter_path.exists() ), f"Expected {filter_path} to be created by seccompiler" # for each rule, run the helper program and execute a syscall for rule in filter_data["filter"]: print(filter_path, rule) syscall = rule["syscall"] # this one cannot be called directly if syscall in ["rt_sigreturn"]: continue syscall_id = seccomp.resolve_syscall(arch, syscall) cmd = f"{bin_test_syscall} {filter_path} {syscall_id}" if "args" not in rule: # syscall should be allowed with any arguments and exit 0 assert utils.run_cmd(cmd).returncode == 0 else: allowed_args = [0] * 4 # if we call it with allowed args, it should exit 0 for arg in rule["args"]: allowed_args[arg["index"]] = arg["val"] allowed_str = " ".join(str(x) for x in allowed_args) assert utils.run_cmd(f"{cmd} {allowed_str}").returncode == 0 # for each allowed arg try a different number for arg in rule["args"]: # We just add 1000000 to the allowed arg and assume it is # not something we allow in another rule. While not perfect # it works in practice. bad_args = allowed_args.copy() bad_args[arg["index"]] = str(arg["val"] + 1_000_000) unallowed_str = " ".join(str(x) for x in bad_args) outcome = utils.run_cmd(f"{cmd} {unallowed_str}") # if we call it with unallowed args, it should exit 159 # 159 = 128 (abnormal termination) + 31 (SIGSYS) assert outcome.returncode == 159 ================================================ FILE: tests/integration_tests/security/test_vulnerabilities.py ================================================ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # # N.B.: Although this repository is released under the Apache-2.0, part of its test requires a # script from the third party "Spectre & Meltdown Checker" project. This script is under the # GPL-3.0-only license. """Tests vulnerabilities mitigations.""" import json from pathlib import Path import pytest import requests from framework import utils from framework.ab_test import git_clone from framework.microvm import MicroVMFactory from framework.properties import global_props CHECKER_URL = "https://raw.githubusercontent.com/speed47/spectre-meltdown-checker/master/spectre-meltdown-checker.sh" CHECKER_FILENAME = "spectre-meltdown-checker.sh" REMOTE_CHECKER_PATH = f"/tmp/{CHECKER_FILENAME}" REMOTE_CHECKER_COMMAND = f"sh {REMOTE_CHECKER_PATH} --no-intel-db --batch json" VULN_DIR = "/sys/devices/system/cpu/vulnerabilities" class SpectreMeltdownChecker: """Helper class to use Spectre & Meltdown Checker""" def __init__(self, path): self.path = path def _parse_output(self, output): return { json.dumps(entry) # dict is unhashable for entry in json.loads(output) if entry["VULNERABLE"] } def get_report_for_guest(self, vm) -> set: """Parses the output of `spectre-meltdown-checker.sh --batch json` and returns the set of issues for which it reported 'Vulnerable'. Sample stdout: ``` [ { "NAME": "SPECTRE VARIANT 1", "CVE": "CVE-2017-5753", "VULNERABLE": false, "INFOS": "Mitigation: usercopy/swapgs barriers and __user pointer sanitization" }, { ... } ] ``` """ vm.ssh.scp_put(self.path, REMOTE_CHECKER_PATH) res = vm.ssh.run(REMOTE_CHECKER_COMMAND) return self._parse_output(res.stdout) def get_report_for_host(self) -> set: """Runs `spectre-meltdown-checker.sh` in the host and returns the set of issues for which it reported 'Vulnerable'. """ res = utils.check_output(f"sh {self.path} --batch json") return self._parse_output(res.stdout) def expected_vulnerabilities(self, cpu_template_name): """ There is a REPTAR exception reported on INTEL_ICELAKE when spectre-meltdown-checker.sh script is run inside the guest from below the tests: test_spectre_meltdown_checker_on_guest and test_spectre_meltdown_checker_on_restored_guest The same script when run on host doesn't report the exception which means the instances are actually not vulnerable to REPTAR. The only reason why the script cannot determine if the guest is vulnerable or not because Firecracker does not expose the microcode version to the guest. The check in spectre_meltdown_checker is here: https://github.com/speed47/spectre-meltdown-checker/blob/0f2edb1a71733c1074550166c5e53abcfaa4d6ca/spectre-meltdown-checker.sh#L6635-L6637 Since we have a test on host and the exception in guest is not valid, we add a check to ignore this exception. """ if ( global_props.cpu_codename in ["INTEL_ICELAKE", "INTEL_SAPPHIRE_RAPIDS"] and cpu_template_name == "None" ): return { '{"NAME": "REPTAR", "CVE": "CVE-2023-23583", "VULNERABLE": true, "INFOS": "Your microcode is too old to mitigate the vulnerability"}' } return set() @pytest.fixture(scope="session", name="spectre_meltdown_checker") def download_spectre_meltdown_checker(tmp_path_factory): """Download spectre / meltdown checker script.""" resp = requests.get(CHECKER_URL, timeout=5) resp.raise_for_status() path = tmp_path_factory.mktemp("tmp", True) / CHECKER_FILENAME path.write_bytes(resp.content) return SpectreMeltdownChecker(path) # Nothing can be sensibly tested in a PR context here @pytest.mark.skipif( global_props.buildkite_pr, reason="Test depends solely on factors external to GitHub repository", ) def test_spectre_meltdown_checker_on_host(spectre_meltdown_checker): """Test with the spectre / meltdown checker on host.""" report = spectre_meltdown_checker.get_report_for_host() assert report == set(), f"Unexpected vulnerabilities: {report}" # Nothing can be sensibly tested here in a PR context @pytest.mark.skipif( global_props.buildkite_pr, reason="Test depends solely on factors external to GitHub repository", ) def test_vulnerabilities_on_host(): """Test vulnerability files on host.""" res = utils.run_cmd(f"grep -r Vulnerable {VULN_DIR}") # if grep finds no matching lines, it exits with status 1 assert res.returncode == 1, res.stdout def get_vuln_files_exception_dict(template): """ Returns a dictionary of expected values for vulnerability files requiring special treatment. """ exception_dict = {} # Exception for mmio_stale_data # ============================= # # Guests with T2S template # -------------------------------------------- # Whether mmio_stale_data is marked as "Vulnerable" or not is determined by the code here. # https://elixir.bootlin.com/linux/v6.1.46/source/arch/x86/kernel/cpu/bugs.c#L431 # Virtualization of FLUSH_L1D has been available and CPUID.(EAX=0x7,ECX=0):EDX[28 (FLUSH_L1D)] # has been passed through to guests only since kernel v6.4. # https://github.com/torvalds/linux/commit/da3db168fb671f15e393b227f5c312c698ecb6ea # Thus, since the FLUSH_L1D bit is masked off prior to kernel v6.4, guests with # IA32_ARCH_CAPABILITIES.FB_CLEAR (bit 17) = 0 (like guests with T2S template which presents # an Intel Skylake CPU) fall into the MMIO_MITIGATION_UCODE_NEEDED branch, marking the # system as vulnerable to MMIO Stale Data. # The value is "Vulnerable: Clear CPU buffers attempted, no microcode" on guests on Intel # Skylake and guests with T2S template but "Mitigation: Clear CPU buffers; SMT Host state # unknown" on kernel v6.4 or later. # In any case, the kernel attempts to clear CPU buffers using VERW instruction and it # is safe to ingore the "Vulnerable" message if the host has the microcode update applied # correctly. Here we expect the common string "Clear CPU buffers" to cover both cases. if template == "T2S": exception_dict["mmio_stale_data"] = "Clear CPU buffers" return exception_dict def check_vulnerabilities_files_on_guest(microvm): """ Check that the guest's vulnerabilities files do not contain `Vulnerable`. See also: https://elixir.bootlin.com/linux/latest/source/Documentation/ABI/testing/sysfs-devices-system-cpu and search for `vulnerabilities`. """ # Retrieve a list of vulnerabilities files available inside guests. vuln_dir = "/sys/devices/system/cpu/vulnerabilities" _, stdout, _ = microvm.ssh.check_output(f"find -D all {vuln_dir} -type f") vuln_files = stdout.splitlines() # Fixtures in this file (test_vulnerabilities.py) add this special field. template = microvm.cpu_template_name # Check that vulnerabilities files in the exception dictionary have the expected values and # the others do not contain "Vulnerable". exceptions = get_vuln_files_exception_dict(template) results = [] for vuln_file in vuln_files: filename = Path(vuln_file).name if filename in exceptions: _, stdout, _ = microvm.ssh.check_output(f"cat {vuln_file}") assert exceptions[filename] in stdout else: cmd = f"grep Vulnerable {vuln_file}" _ecode, stdout, _stderr = microvm.ssh.run(cmd) results.append({"file": vuln_file, "stdout": stdout}) return results @pytest.fixture def microvm_factory_a(record_property): """MicroVMFactory using revision A binaries""" revision_a = global_props.buildkite_revision_a bin_dir = git_clone(Path("../build") / revision_a, revision_a).resolve() record_property("firecracker_bin", str(bin_dir / "firecracker")) uvm_factory = MicroVMFactory(bin_dir) yield uvm_factory uvm_factory.kill() @pytest.fixture def uvm_any_a(microvm_factory_a, uvm_ctor, guest_kernel, rootfs, cpu_template_any): """Return uvm with revision A firecracker Since pytest caches fixtures, this guarantees uvm_any_a will match a vm from uvm_any. See https://docs.pytest.org/en/stable/how-to/fixtures.html#fixtures-can-be-requested-more-than-once-per-test-return-values-are-cached """ return uvm_ctor(microvm_factory_a, guest_kernel, rootfs, cpu_template_any, False) def test_check_vulnerability_files_ab(request, uvm_any_without_pci): """Test vulnerability files on guests""" res_b = check_vulnerabilities_files_on_guest(uvm_any_without_pci) if global_props.buildkite_pr: # we only get the uvm_any_a fixtures if we need it uvm_a = request.getfixturevalue("uvm_any_a") res_a = check_vulnerabilities_files_on_guest(uvm_a) assert res_b <= res_a else: assert not [x for x in res_b if "Vulnerable" in x["stdout"]] def test_spectre_meltdown_checker_on_guest( request, uvm_any_without_pci, spectre_meltdown_checker, ): """Test with the spectre / meltdown checker on any supported guest.""" res_b = spectre_meltdown_checker.get_report_for_guest(uvm_any_without_pci) if global_props.buildkite_pr: # we only get the uvm_any_a fixtures if we need it uvm_a = request.getfixturevalue("uvm_any_a") res_a = spectre_meltdown_checker.get_report_for_guest(uvm_a) assert res_b <= res_a else: assert res_b == spectre_meltdown_checker.expected_vulnerabilities( uvm_any_without_pci.cpu_template_name ) ================================================ FILE: tests/integration_tests/style/__init__.py ================================================ # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 ================================================ FILE: tests/integration_tests/style/test_gitlint.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests ensuring desired style for commit messages.""" import os from framework import utils from framework.ab_test import DEFAULT_A_REVISION def test_gitlint(): """ Test that all commit messages pass the gitlint rules. """ os.environ["LC_ALL"] = "C.UTF-8" os.environ["LANG"] = "C.UTF-8" rc, _, stderr = utils.run_cmd( f"gitlint --commits origin/{DEFAULT_A_REVISION}..HEAD -C ../.gitlint --extra-path framework/gitlint_rules.py", ) assert rc == 0, "Commit message violates gitlint rules: {}".format(stderr) ================================================ FILE: tests/integration_tests/style/test_licenses.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests checking against the existence of licenses in each file.""" import datetime from framework import utils_repo from framework.defs import FC_WORKSPACE_DIR from host_tools.cargo_build import cargo AMAZON_COPYRIGHT_YEARS = range(2018, datetime.datetime.now().year + 1) AMAZON_COPYRIGHT = ( "Copyright {} Amazon.com, Inc. or its affiliates. All Rights Reserved." ) AMAZON_LICENSE = "SPDX-License-Identifier: Apache-2.0" CHROMIUM_COPYRIGHT = "Copyright 2017 The Chromium OS Authors. All rights reserved." CHROMIUM_LICENSE = ( "Use of this source code is governed by a BSD-style license that can be" ) TUNTAP_COPYRIGHT = ( "Copyright TUNTAP, 2017 The Chromium OS Authors. All rights reserved." ) TUNTAP_LICENSE = ( "Use of this source code is governed by a BSD-style license that can be" ) ALIBABA_COPYRIGHT = "Copyright (C) 2019 Alibaba Cloud Computing. All rights reserved." ALIBABA_LICENSE = "SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause" INTEL_COPYRIGHT = "Copyright © 2019 Intel Corporation" INTEL_LICENSE = "SPDX-License-Identifier: Apache-2.0" RIVOS_COPYRIGHT = "Copyright © 2023 Rivos, Inc." RIVOS_LICENSE = "SPDX-License-Identifier: Apache-2.0" ORACLE_COPYRIGHT = "Copyright © 2020, Oracle and/or its affiliates." ORACLE_LICENSE = "SPDX-License-Identifier: Apache-2.0" EXCLUDE = ["build", ".kernel", ".git"] def _has_amazon_copyright(string): for year in AMAZON_COPYRIGHT_YEARS: if AMAZON_COPYRIGHT.format(year) in string: return True return False def _look_for_license(file, license_msg): line = file.readline() while line.startswith("//") or line.startswith("#"): if license_msg in line: return True line = file.readline() return False def _validate_license(filename): """ Validate license all .rs/.py. or .sh file. Python and Rust files should have the licenses on the first 2 lines Shell files license is located on lines 3-4 to account for shebang """ with open(filename, "r", encoding="utf-8") as file: # Find the copyright line while True: line = file.readline() if line.startswith(("// Copyright", "# Copyright")): copyright_info = line break if line == "": return False has_amazon_copyright = _has_amazon_copyright( copyright_info ) and _look_for_license(file, AMAZON_LICENSE) has_chromium_copyright = ( CHROMIUM_COPYRIGHT in copyright_info and _look_for_license(file, CHROMIUM_LICENSE) ) has_tuntap_copyright = TUNTAP_COPYRIGHT in copyright_info and _look_for_license( file, CHROMIUM_LICENSE ) has_alibaba_copyright = ( ALIBABA_COPYRIGHT in copyright_info and _look_for_license(file, ALIBABA_LICENSE) ) has_intel_copyright = INTEL_COPYRIGHT in copyright_info and _look_for_license( file, INTEL_LICENSE ) has_rivos_copyright = RIVOS_COPYRIGHT in copyright_info and _look_for_license( file, RIVOS_LICENSE ) has_oracle_copyright = ORACLE_COPYRIGHT in copyright_info and _look_for_license( file, ORACLE_LICENSE ) return ( has_amazon_copyright or has_chromium_copyright or has_tuntap_copyright or has_alibaba_copyright or has_intel_copyright or has_rivos_copyright or has_oracle_copyright ) def test_for_valid_licenses(): """ Test that all *.py, *.rs and *.sh files contain a valid license. """ python_files = list(utils_repo.git_repo_files(root="..", glob="*.py")) rust_files = list(utils_repo.git_repo_files(root="..", glob="*.rs")) bash_files = list(utils_repo.git_repo_files(root="..", glob="*.sh")) c_files = list(utils_repo.git_repo_files(root="..", glob="*.c")) all_files = rust_files + python_files + bash_files + c_files error_msg = [] for file in all_files: if _validate_license(file) is False: error_msg.append(file) assert not error_msg, f"Files {error_msg} have invalid licenses" def test_dependency_licenses(): """Ensure license compatibility for Firecracker. For a list of currently allowed licenses checkout deny.toml in the root directory. """ toml_file = FC_WORKSPACE_DIR / "Cargo.toml" _, stdout, stderr = cargo( "deny", f"--manifest-path {toml_file} check licenses bans" ) assert "licenses ok" in stdout # "cargo deny" should deny licenses by default but for some reason copyleft is allowed # by it and if we add a dependency which has copyleft licenses "cargo deny" won't report # it unless it is explicitly told to do so from the deny.toml. # Our current deny.toml seems to cover all the cases we need but, # if there is an exception like copyleft (where we don't want and don't deny # in deny.toml and is allowed by cardo deny), we don't want to be left in the dark. # For such cases check "cargo deny" output, make sure that there are no warnings reported # related to the license and take appropriate actions i.e. either add them to allow list # or remove them if they are incompatible with our licenses. license_res = [line for line in stderr.split("\n") if "license" in line] assert not license_res ================================================ FILE: tests/integration_tests/style/test_markdown.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests for markdown style checks.""" import re from framework import utils, utils_repo def test_markdown_style(): """ Test that markdown files adhere to the style rules. """ # Get all *.md files from the project md_files = list(utils_repo.git_repo_files(root="..", glob="*.md")) # Assert if somehow no markdown files were found. assert len(md_files) != 0 needs_format = False # Run commands for md_file in md_files: rc, output, _ = utils.run_cmd( f"bash -c 'diff -u --color {md_file} <(mdformat - < {md_file})'", ) if rc != 0: print(output) needs_format = True assert ( not needs_format ), "Some markdown files need formatting. Either run `./tools/devtool sh mdformat .` in the repository root, or apply the above diffs manually." def test_markdown_internal_links(): """Make sure markdown internal links work""" for md_file in utils_repo.git_repo_files(root="..", glob="*.md"): txt = md_file.read_text(encoding="utf-8") for link in re.findall(r"\[.+?\]\((?P.+?)\)", txt, re.DOTALL): if not re.match("(mailto:|https?://)", link): # internal link, ignore anchors (#) and query (?) parts = link.split("#", maxsplit=1) parts = parts[0].split("?", maxsplit=1) path = md_file.parent / parts[0] assert path.exists(), f"{md_file} {link} {path}" ================================================ FILE: tests/integration_tests/style/test_python.py ================================================ # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests ensuring codebase style compliance for Python.""" import sys from subprocess import run import pytest @pytest.mark.parametrize("formatter", ["black --config tests/pyproject.toml", "isort"]) def test_python_style(formatter): """ Test that python code passes `formatter` """ run( f"{formatter} --check --diff tests tools .buildkite", stdout=sys.stdout, stderr=sys.stderr, shell=True, cwd="..", check=True, ) def test_python_pylint(): """ Test that python code passes linter checks. """ # List of linter commands that should be executed for each file linter_cmd = "pylint --rcfile tests/pyproject.toml --output-format=colorized tests/ tools/ .buildkite/*.py" run( linter_cmd, # we let pytest capture stdout/stderr for us stdout=sys.stdout, stderr=sys.stderr, shell=True, cwd="..", check=True, ) ================================================ FILE: tests/integration_tests/style/test_repo.py ================================================ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests enforcing git repository structure""" import re import subprocess from pathlib import Path import yaml from framework import utils_repo def test_repo_no_spaces_in_paths(): """ Ensure there are no spaces in paths. """ # pylint: disable-next=subprocess-run-check res = subprocess.run( "git ls-files | grep '[[:space:]]'", cwd="..", capture_output=True, shell=True, ) # If grep doesn't find any, it will exit with status 1. Otherwise 0 assert res.returncode == 1, "Some files have spaces:\n" + res.stdout.decode() def test_repo_validate_yaml(): """ Ensure all YAML files are valid """ for path in utils_repo.git_repo_files(root="..", glob="*.y*ml"): yaml.safe_load(path.open(encoding="utf-8")) def test_repo_validate_changelog(): """Make sure the CHANGELOG.md file follows the Keep a Changelog format""" changelog_path = Path("../CHANGELOG.md") changelog = changelog_path.read_text(encoding="utf-8").splitlines() errors = [] for lineno, line in enumerate(changelog, start=1): if line.startswith("## "): if not re.match(r"^## \[.+\]$", line): msg = "Level 2 headings (versions) should be wrapped in []" errors.append((lineno, msg, line)) if line.startswith("### "): if not re.match(r"^### (Added|Changed|Deprecated|Removed|Fixed)$", line): msg = "Unknown Level 3 heading" errors.append((lineno, msg, line)) for lineno, msg, line in errors: print(msg) print(f"\t{lineno}:{line}") assert len(errors) == 0 ================================================ FILE: tests/integration_tests/style/test_rust.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests ensuring codebase style compliance for Rust.""" from collections import defaultdict from framework import utils from host_tools.fcmetrics import extract_fields, find_metrics_files, is_metric_used def test_rust_order(): """Tests that `Cargo.toml` dependencies are alphabetically ordered.""" # Runs `cargo-sort` with the current working directory (`cwd`) as the repository root. _, _, _ = utils.check_output( cmd="cargo-sort --workspace --check --grouped", cwd=".." ) def test_rust_style(): """Test that rust code passes style checks.""" # Check that the output is empty. _, stdout, _ = utils.check_output("cargo fmt --all -- --check") # rustfmt prepends `"Diff in"` to the reported output. assert "Diff in" not in stdout def test_unused_metrics(): """Tests that all metrics defined in Firecracker's metrics.rs files actually have code paths that increment them.""" metrics_files = find_metrics_files() unused = defaultdict(list) assert metrics_files for file_path in metrics_files: fields = extract_fields(file_path) if not fields: continue for field, ty in fields: if not is_metric_used(field, ty): unused[file_path].append((field, ty)) # Grouped output for file_path, fields in unused.items(): print(f"📄 Defined in: {file_path}") print("Possibly Unused: \n") for field, field_type in fields: print(f" ❌ {field} ({field_type})") print() assert ( not unused ), "Unused metrics founds, see stdout. Please either hook them up, or remove them" ================================================ FILE: tests/integration_tests/style/test_swagger.py ================================================ # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Tests ensuring codebase style compliance for the OpenAPI specification.""" from pathlib import Path from openapi_spec_validator import validate from openapi_spec_validator.readers import read_from_filename def validate_swagger(swagger_spec): """Fail if OpenAPI spec is not followed.""" spec_dict, _ = read_from_filename(swagger_spec) validate(spec_dict) def test_firecracker_swagger(): """ Test that Firecracker swagger specification is valid. """ swagger_spec = Path("../src/firecracker/swagger/firecracker.yaml") validate_swagger(swagger_spec) ================================================ FILE: tests/integration_tests/test_kani.py ================================================ # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Proofs ensuring memory safety properties, user-defined assertions, absence of panics and some types of unexpected behavior (e.g., arithmetic overflows). """ import os import platform import pytest from framework import utils PLATFORM = platform.machine() TIMEOUT = 3600 # The `check_output` timeout will always fire before this one, but we need to # set a timeout here to override the default pytest timeout of 180s. @pytest.mark.timeout(TIMEOUT) @pytest.mark.skipif( os.environ.get("BUILDKITE") != "true", reason="Kani's memory requirements likely cannot be satisfied locally", ) def test_kani(results_dir): """ Test all Kani proof harnesses. """ # -Z stubbing is required to enable the stubbing feature # -Z function-contracts is required to enable the function contracts feature # -Z restrict-vtable is required for some virtio queue proofs, which go out of memory otherwise # -j enables kani harnesses to be verified in parallel (required to keep CI time low) # --output-format terse is required by -j # -Z unstable-options is needed to enable the other `-Z` flags _, stdout, _ = utils.check_output( "cargo kani -Z unstable-options -Z stubbing -Z function-contracts -Z restrict-vtable -j --output-format terse --harness-timeout 40m --workspace", timeout=TIMEOUT, ) (results_dir / "kani_log").write_text(stdout, encoding="utf-8") ================================================ FILE: tests/pyproject.toml ================================================ [tool.black] # By default config black will ignore `build` directory # https://black.readthedocs.io/en/stable/usage_and_configuration/the_basics.html#command-line-options # This project has `build` directory, so override default config here exclude = "/(\\.direnv|\\.eggs|\\.git|\\.hg|\\.mypy_cache|\\.nox|\\.tox|\\.venv|venv|\\.svn|\\.ipynb_checkpoints|_build|buck-out|dist|__pypackages__)/" [tool.isort] # https://pycqa.github.io/isort/docs/configuration/multi_line_output_modes.html multi_line_output = 3 profile = "black" [tool.pylint.main] # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the # number of processors available to use, and will cap the count on Windows to # avoid hangs. jobs = 0 score = false # Pickle collected data for later comparisons. persistent = false # Disable the message, report, category or checker with the given id(s). You can # either give multiple identifiers separated by comma (,) or put this option # multiple times (only on the command line, not in the configuration file where # it should appear only once). You can also use "--disable=all" to disable # everything first and then re-enable specific checks. For example, if you want # to run only the similarities checker, you can use "--disable=all # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use "--disable=all --enable=classes # --disable=W". disable = [ "raw-checker-failed", "bad-inline-option", "locally-disabled", "file-ignored", "suppressed-message", "useless-suppression", "deprecated-pragma", "use-implicit-booleaness-not-comparison-to-string", "use-implicit-booleaness-not-comparison-to-zero", "use-symbolic-message-instead", "fixme", "too-many-instance-attributes", "import-error", "too-many-locals", "too-many-arguments", "consider-using-f-string", "consider-using-with", "implicit-str-concat", "line-too-long", "redefined-outer-name", "broad-exception-raised", "duplicate-code", "too-many-positional-arguments", "too-few-public-methods", "too-many-branches", "too-many-statements", ] ================================================ FILE: tests/pytest.ini ================================================ [pytest] ; Omit verbose tracebacks, since they tend to pollute the output. addopts = --tb=short -vv --durations=10 --showlocals -m 'not nonci and not no_block_pr' --json-report --json-report-file=../test_results/test-report.json markers = no_block_pr: tests whose failure does not block PR merging. nonci: mark test as nonci. ; Overwrite the default norecursedirs, which includes 'build'. norecursedirs = .* ; Default timeout for tests. can be overwritten at finer grained levels. timeout = 300 ; Set the cache dir location to our build dir, so we don't litter the source ; tree. cache_dir = ../build/pytest_cache ; Set logger format and level log_level = INFO log_format = %(asctime)s.%(msecs)03d %(name)s: %(levelname)s %(message)s log_cli_level = ERROR log_cli = true ; make those errors, not warnings filterwarnings = error::pytest.PytestUnraisableExceptionWarning error::pytest.PytestUnhandledThreadExceptionWarning ================================================ FILE: tools/ab_plot.py ================================================ #!/usr/bin/env python3 # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Script for creating visualizations for A/B runs. Usage: ab_plot.py path_to_run_a path_to_run_b path_to_run_c ... --output_type pdf/table """ import argparse import glob import json import time from pathlib import Path from typing import Callable, List import matplotlib.pyplot as plt import numpy as np import pandas as pd import scipy import seaborn as sns from matplotlib.backends.backend_pdf import PdfPages pd.set_option("display.float_format", "{:.2f}".format) def check_regression( a_samples: List[float], b_samples: List[float], statistic: Callable = np.mean, *, n_resamples=9999, ): """ Check if 2 sample groups have a statistically big enough difference """ result = scipy.stats.permutation_test( (a_samples, b_samples), lambda x, y: statistic(y) - statistic(x), vectorized=False, n_resamples=n_resamples, ) statistic_a = statistic(a_samples) return result.pvalue, result.statistic / statistic_a, result.statistic def load_data(data_path: Path): """ Recursively collects `metrics.json` files in provided path """ data = [] for name in glob.glob(f"{data_path}/**/metrics.json", recursive=True): with open(name, encoding="utf-8") as f: j = json.load(f) if "performance_test" not in j["dimensions"]: print(f"skipping: {name}") continue metrics = j["metrics"] # Move test name from dimensions into a separate column perf_test = j["dimensions"]["performance_test"] del j["dimensions"]["performance_test"] # These are host specific and will prevent comparison of # different hosts del j["dimensions"]["instance"] del j["dimensions"]["cpu_model"] del j["dimensions"]["host_kernel"] dimensions = frozenset(j["dimensions"].items()) for m in metrics: if "cpu_utilization" in m: continue mm = metrics[m] unit = mm["unit"] values = mm["values"] for i, v in enumerate(values): data.append( { "index": i, "test": perf_test, "metric": m, "value": v, "unit": unit, "dimensions": dimensions, } ) return data def p50(a): """Returns 50th percentile of 1d-array a""" return np.percentile(a, 50) def p90(a): """Returns 90th percentile of 1d-array a""" return np.percentile(a, 90) def create_table(df: pd.DataFrame): """Create an html table per test in the data frame""" for test_value in df["test"].unique(): df_test = df[df["test"] == test_value] # Split dimensions into separate columns df_expanded = df_test.copy() dim_data = [] for _, row in df_expanded.iterrows(): dim_dict = dict(row["dimensions"]) dim_data.append(dim_dict) # Need to reset indexes because otherwise `pd.concat` will add NaN in all # rows where indexes differ dim_df = pd.DataFrame(dim_data).reset_index(drop=True) df_data = df_expanded.drop("dimensions", axis=1).reset_index(drop=True) df_expanded = pd.concat([df_data, dim_df], axis=1) # Use dimension columns as index dim_cols = sorted(list(dim_df.columns)) df_pivoted = df_expanded.pivot_table( values=["value"], index=["metric", "unit"] + dim_cols, columns="group", aggfunc=[p50, p90], ) # Add comparison columns for each group vs first group (A) groups = sorted(df_test["group"].unique()) for baseline in groups: for group in groups: if group == baseline: continue for stat in ["p50", "p90"]: diff_col = (stat, "value", f"{baseline}->{group} %") df_pivoted[diff_col] = ( ( df_pivoted[(stat, "value", group)] - df_pivoted[(stat, "value", baseline)] ) / df_pivoted[(stat, "value", baseline)] * 100.0 ) diff_col = (stat, "value", f"{baseline}->{group} abs") df_pivoted[diff_col] = ( df_pivoted[(stat, "value", group)] - df_pivoted[(stat, "value", baseline)] ) # Sort columns to have a persistent table representation df_pivoted = df_pivoted[sorted(df_pivoted.columns)] test_output_path = f"{test_value}.html" with open(test_output_path, "w", encoding="UTF-8") as writer: writer.write("
") styled = df_pivoted.style.format(precision=2) styled = styled.set_table_attributes("border=1") styled = styled.set_table_styles( [{"selector": 'th:contains("->")', "props": [("min-width", "80px")]}] ) # Apply color gradient to all comparison columns for baseline in groups: for group in groups: if group == baseline: continue for stat in ["p50", "p90"]: diff_col = (stat, "value", f"{baseline}->{group} %") styled = styled.background_gradient( subset=[diff_col], cmap="RdYlGn" ) writer.write(styled.to_html()) writer.write("
") print(f"Ready: {test_output_path}") def create_pdf(args, df: pd.DataFrame): """Create a pdf per test in the data frame""" sns.set_style("whitegrid") metrics = df["metric"].unique() n_groups = len(df["group"].unique()) for test_value in df["test"].unique(): test_output_path = f"{test_value}.pdf" with PdfPages(test_output_path) as pdf: df_test = df[df["test"] == test_value] for dim_value in df_test["dimensions"].unique(): for metric in metrics: metric_data = df_test[ (df_test["metric"] == metric) & (df_test["dimensions"] == dim_value) ] if len(metric_data) == 0: continue additional_title = "" fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6)) if n_groups == 2: # Check if difference is significant a_values = metric_data[metric_data["group"] == "A"][ "value" ].values b_values = metric_data[metric_data["group"] == "B"][ "value" ].values pvalue, diff_rel, diff_abs = check_regression( a_values, b_values ) if ( pvalue <= 0.1 and abs(diff_rel) >= 0.05 and abs(diff_abs) >= 0.0 ): fig.patch.set_facecolor("lightcoral") additional_title = ( f"{diff_rel * 100:+.2f}% ({diff_abs:+.2f}) difference" ) # Make a multi-line title since single line will be too long dim_items = sorted(str(item) for item in dim_value) dim_chunks = [ ", ".join(dim_items[i : i + 4]) for i in range(0, len(dim_items), 4) ] dim_str = "\n".join(dim_chunks) title = f"{metric}\n{dim_str}\n{additional_title}" if additional_title: weight = "bold" else: weight = "normal" fig.suptitle(title, fontsize=10, weight=weight) sns.boxenplot(data=metric_data, x="group", y="value", ax=ax1) ax1.set_ylabel(f"{metric} ({metric_data['unit'].iloc[0]})") metric_data_indexed = metric_data.reset_index() errorbar = (args.errorbar[0], int(args.errorbar[1])) sns.lineplot( data=metric_data_indexed, x="index", y="value", hue="group", ax=ax2, errorbar=errorbar, ) ax2.set_ylabel(f"{metric} ({metric_data['unit'].iloc[0]})") plt.tight_layout() pdf.savefig() plt.close() print(f"Ready: {test_output_path}") if __name__ == "__main__": parser = argparse.ArgumentParser( description="Executes Firecracker's A/B testsuite across the specified commits" ) parser.add_argument( "paths", nargs="+", help="Paths to directories with test runs", type=Path, ) parser.add_argument( "--errorbar", nargs=2, default=["pi", "95"], help="Errorbar configuration for lineplot (type, value)", ) parser.add_argument( "--output_type", default=["pdf"], help="Type of the output to generate", ) args = parser.parse_args() # Data retrieval start_time = time.time() all_data = [] for i, path in enumerate(args.paths): data = load_data(path) print(f"getting data {i} from {path}: {len(data)}") df = pd.DataFrame(data) df["group"] = chr(65 + i) # A, B, C, D, ... all_data.append(df) print(f"Data retrieval: {time.time() - start_time:.2f}s") # Data processing start_time = time.time() df_combined = pd.concat(all_data, ignore_index=True) print(f"Data processing: {time.time() - start_time:.2f}s") # Plotting start_time = time.time() if args.output_type == "pdf": create_pdf(args, df_combined) if args.output_type == "table": create_table(df_combined) print(f"Plotting: {time.time() - start_time:.2f}s") ================================================ FILE: tools/ab_test.py ================================================ #!/usr/bin/env python3 # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Script for running A/B-Tests The script takes two git revisions and a pytest integration test. It utilizes our integration test frameworks --binary-dir parameter to execute the given test using binaries compiled from each revision, and runs a regression test comparing resulting metrics between runs. It performs the A/B-test as follows: For both A and B runs, collect all `metrics.json` files and read all dimentions from them. Script assumes all dimentions are unique within single run and both A and B runs result in the same dimentions. After collection is done, perform statistical regression test across all the list-valued properties collected. """ import argparse import glob import json import os import statistics import subprocess from collections import defaultdict from pathlib import Path from typing import Callable, List, Optional, TypeVar import scipy UNIT_REDUCTIONS = { "Microseconds": "Milliseconds", "Milliseconds": "Seconds", "Bytes": "Kilobytes", "Kilobytes": "Megabytes", "Megabytes": "Gigabytes", "Gigabytes": "Terabytes", "Bits": "Kilobits", "Kilobits": "Megabits", "Megabits": "Gigabits", "Gigabits": "Terabit", "Bytes/Second": "Kilobytes/Second", "Kilobytes/Second": "Megabytes/Second", "Megabytes/Second": "Gigabytes/Second", "Gigabytes/Second": "Terabytes/Second", "Bits/Second": "Kilobits/Second", "Kilobits/Second": "Megabits/Second", "Megabits/Second": "Gigabits/Second", "Gigabits/Second": "Terabits/Second", } INV_UNIT_REDUCTIONS = {v: k for k, v in UNIT_REDUCTIONS.items()} UNIT_SHORTHANDS = { "Seconds": "s", "Microseconds": "μs", "Milliseconds": "ms", "Bytes": "B", "Kilobytes": "KB", "Megabytes": "MB", "Gigabytes": "GB", "Terabytes": "TB", "Bits": "Bit", "Kilobits": "KBit", "Megabits": "MBit", "Gigabits": "GBit", "Terabits": "TBit", "Percent": "%", "Count": "", "Bytes/Second": "B/s", "Kilobytes/Second": "KB/s", "Megabytes/Second": "MB/s", "Gigabytes/Second": "GB/s", "Terabytes/Second": "TB/s", "Bits/Second": "Bit/s", "Kilobits/Second": "KBit/s", "Megabits/Second": "MBit/s", "Gigabits/Second": "GBit/s", "Terabits/Second": "TBit/s", "Count/Second": "Hz", "None": "", } def reduce_value(value, unit): """ Utility function for expressing a value in the largest possible unit in which it would still be >= 1 For example, `reduce_value(1_000_000, Bytes)` would return (1, Megabytes) """ # Could do this recursively, but I am worried about infinite recursion # due to precision problems (e.g. infinite loop of dividing/multiplying by 1000, alternating # between values < 1 and >= 1000). while abs(value) < 1 and unit in INV_UNIT_REDUCTIONS: value *= 1000 unit = INV_UNIT_REDUCTIONS[unit] while abs(value) >= 1000 and unit in UNIT_REDUCTIONS: value /= 1000 unit = UNIT_REDUCTIONS[unit] return value, unit def format_with_reduced_unit(value, unit): """ Utility function for pretty printing a given value by choosing a unit as large as possible, and then outputting its shorthand. For example, `format_with_reduced_unit(1_000_000, Bytes)` would return "1MB". """ reduced_value, reduced_unit = reduce_value(value, unit) formatted_unit = UNIT_SHORTHANDS.get(reduced_unit, reduced_unit) return f"{reduced_value:.2f}{formatted_unit}" # Performance tests that are known to be unstable and exhibit variances of up to 60% of the mean IGNORED = [ # Network throughput on m6a.metal {"instance": "m6a.metal", "performance_test": "test_network_tcp_throughput"}, # Network throughput on m7a.metal {"instance": "m7a.metal-48xl", "performance_test": "test_network_tcp_throughput"}, # vsock throughput on m7a.metal { "instance": "m7a.metal-48xl", "performance_test": "test_vsock_throughput", "mode": "g2h", }, # Network latencies on m8i.metal-{48,96}xl w/ 5.10 host *[ { "instance": instance, "performance_test": "test_network_latency", "host_kernel": "linux-5.10", } for instance in ["m8i.metal-48xl", "m8i.metal-96xl"] ], # block latencies if guest uses async request submission {"fio_engine": "libaio", "metric": "clat_read"}, {"fio_engine": "libaio", "metric": "clat_write"}, # boot time metrics {"performance_test": "test_boottime", "metric": "resume_time"}, # block throughput on m8g {"fio_engine": "libaio", "vcpus": "2", "instance": "m8g.metal-24xl"}, {"fio_engine": "libaio", "vcpus": "2", "instance": "m8g.metal-48xl"}, # memory hotplug metrics: ignore api_time and fc_time metrics, keeping only total_time. *[ { "performance_test": "test_memory_hotplug_latency", "metric": f"{prefix}_{metric}", } for prefix in ["hotplug", "hotunplug", "hotplug_2nd"] for metric in ["api_time", "fc_time"] ], ] def is_ignored(dimensions) -> bool: """Checks whether the given dimensions match an entry in the IGNORED dictionary above""" for high_variance in IGNORED: matching = {key: dimensions[key] for key in high_variance if key in dimensions} if matching == high_variance: return True return False def load_data_series(data_path: Path): """Recursively collects `metrics.json` files in provided path""" data = {} for name in glob.glob(f"{data_path}/**/metrics.json", recursive=True): with open(name, encoding="utf-8") as f: j = json.load(f) metrics = j["metrics"] dimentions = frozenset(j["dimensions"].items()) data[dimentions] = {} for m in metrics: # Ignore certain metrics as we know them to be volatile if "cpu_utilization" in m: continue mm = metrics[m] unit = mm["unit"] values = mm["values"] data[dimentions][m] = (values, unit) return data def uninteresting_dimensions(data): """ Computes the set of dimensions that only ever take on a single value across the entire dataset. """ values_per_dimension = defaultdict(set) for dimension_set in data: for dimension, value in dimension_set: values_per_dimension[dimension].add(value) uninteresting = set() for dimension, distinct_values in values_per_dimension.items(): if len(distinct_values) == 1: uninteresting.add(dimension) return uninteresting def collect_data( tag: str, binary_dir: Path, artifacts: Optional[Path], pytest_opts: str ): """ Executes the specified test using the provided firecracker binaries and stores results into the `test_results/tag` directory """ binary_dir = binary_dir.resolve() print( f"Collecting samples | binaries path: {binary_dir}" + f" | artifacts path: {artifacts}" if artifacts else "" ) test_path = f"test_results/{tag}" test_report_path = f"{test_path}/test-report.json" # It is not possible to just download them here this script is usually run inside docker # and artifacts downloading does not work inside it. if artifacts: subprocess.run( f"./tools/devtool set_current_artifacts {artifacts}", check=True, shell=True ) subprocess.run( f"./tools/test.sh --binary-dir={binary_dir} {pytest_opts} -m '' --json-report-file=../{test_report_path}", env=os.environ, check=True, shell=True, ) return load_data_series(Path(test_path)) def check_regression( a_samples: List[float], b_samples: List[float], *, n_resamples: int = 9999 ): """Checks for a regression by performing a permutation test. A permutation test is a non-parametric test that takes three parameters: Two populations (sets of samples) and a function computing a "statistic" based on two populations. First, the test computes the statistic for the initial populations. It then randomly permutes the two populations (e.g. merges them and then randomly splits them again). For each such permuted population, the statistic is computed. Then, all the statistics are sorted, and the percentile of the statistic for the initial populations is computed. We then look at the fraction of statistics that are larger/smaller than that of the initial populations. The minimum of these two fractions will then become the p-value. The idea is that if the two populations are indeed drawn from the same distribution (e.g. if performance did not change), then permuting will not affect the statistic (indeed, it should be approximately normal-distributed, and the statistic for the initial populations will be somewhere "in the middle"). Useful for performance tests. """ return scipy.stats.permutation_test( (a_samples, b_samples), # Compute the difference of means, such that a positive different indicates potential for regression. lambda x, y: statistics.mean(y) - statistics.mean(x), vectorized=False, n_resamples=n_resamples, ) def analyze_data( data_a, data_b, p_thresh, strength_abs_thresh, noise_threshold, *, n_resamples: int = 9999, ): """ Analyzes the A/B-test data produced by `collect_data`, by performing regression tests as described this script's doc-comment. Returns a mapping of dimensions and properties/metrics to the result of their regression test. """ assert set(data_a.keys()) == set( data_b.keys() ), "A and B run produced incomparable data. This is a bug in the test!" results = {} for dimension_set in data_a: metrics_a = data_a[dimension_set] metrics_b = data_b[dimension_set] assert set(metrics_a.keys()) == set( metrics_b.keys() ), "A and B run produced incomparable data. This is a bug in the test!" for metric, (values_a, unit) in metrics_a.items(): result = check_regression( values_a, metrics_b[metric][0], n_resamples=n_resamples ) results[dimension_set, metric] = (result, unit) # We sort our A/B-Testing results keyed by metric here. The resulting lists of values # will be approximately normal distributed, and we will use this property as a means of error correction. # The idea behind this is that testing the same metric (say, restore_latency) across different scenarios (e.g. # different vcpu counts) will be related in some unknown way (meaning most scenarios will show a change in the same # direction). In particular, if one scenario yields a slight improvement and the next yields a # slight degradation, we take this as evidence towards both being mere noise that cancels out. # # Empirical evidence for this assumption is that # 1. Historically, a true performance change has never shown up in just a single test, it always showed up # across most (if not all) tests for a specific metric. # 2. Analyzing data collected from historical runs shows that across different parameterizations of the same # metric, the collected samples approximately follow mean / variance = const, with the constant independent # of the parameterization. # # Mathematically, this has the following justification: By the central # limit theorem, the means of samples are (approximately) normal distributed. Denote by A # and B the distributions of the mean of samples from the 'A' and 'B' # tests respectively. Under our null hypothesis, the distributions of the # 'A' and 'B' samples are identical (although we dont know what the exact # distributions are), meaning so are A and B, say A ~ B ~ N(mu, sigma^2). # The difference of two normal distributions is also normal distributed, # with the means being subtracted and the variances being added. # Therefore, A - B ~ N(0, 2sigma^2). If we now normalize this distribution by mu (which # corresponds to considering the distribution of relative regressions instead), we get (A-B)/mu ~ N(0, c), with c # being the constant from point 2. above. This means that we can combine the relative means across # different parameterizations, and get a distributions whose expected # value is 0, provided our null hypothesis was true. It is exactly this distribution # for which we collect samples in the dictionary below. Therefore, a sanity check # on the average of the average of the performance changes for a single metric # is a good candidates for a sanity check against false-positives. # # Note that with this approach, for performance changes to "cancel out", we would need essentially a perfect split # between scenarios that improve performance and scenarios that degrade performance, something we have not # ever observed to actually happen. relative_changes_by_metric = defaultdict(list) relative_changes_significant = defaultdict(list) failures = [] for (dimension_set, metric), (result, unit) in results.items(): if is_ignored(dict(dimension_set) | {"metric": metric}): continue print(f"Doing A/B-test for dimensions {dimension_set} and property {metric}") values_a = data_a[dimension_set][metric][0] baseline_mean = statistics.mean(values_a) relative_changes_by_metric[metric].append(result.statistic / baseline_mean) if result.pvalue < p_thresh and abs(result.statistic) > strength_abs_thresh: failures.append((dimension_set, metric, result, unit)) relative_changes_significant[metric].append( result.statistic / baseline_mean ) messages = [] do_not_print_list = uninteresting_dimensions(data_a) for dimension_set, metric, result, unit in failures: # Sanity check as described above if abs(statistics.mean(relative_changes_by_metric[metric])) <= noise_threshold: continue # No data points for this metric were deemed significant if metric not in relative_changes_significant: continue # The significant data points themselves are above the noise threshold if abs(statistics.mean(relative_changes_significant[metric])) > noise_threshold: old_mean = statistics.mean(data_a[dimension_set][metric][0]) new_mean = statistics.mean(data_b[dimension_set][metric][0]) msg = ( f"\033[0;32m[Firecracker A/B-Test Runner]\033[0m A/B-testing shows a change of " f"{format_with_reduced_unit(result.statistic, unit)}, or {result.statistic / old_mean:.2%}, " f"(from {format_with_reduced_unit(old_mean, unit)} to {format_with_reduced_unit(new_mean, unit)}) " f"for metric \033[1m{metric}\033[0m with \033[0;31m\033[1mp={result.pvalue}\033[0m. " f"This means that observing a change of this magnitude or worse, assuming that performance " f"characteristics did not change across the tested commits, has a probability of {result.pvalue:.2%}. " f"Tested Dimensions:\n{json.dumps({k: v for k, v in dimension_set if k not in do_not_print_list}, indent=2, sort_keys=True)}" ) messages.append(msg) assert not messages, "\n" + "\n".join(messages) print("No regressions detected!") T = TypeVar("T") U = TypeVar("U") def binary_ab_test( test_runner: Callable[[Path, Optional[Path], bool], T], comparator: Callable[[T, T], U], *, a_directory: Path, b_directory: Path, a_artifacts: Optional[Path], b_artifacts: Optional[Path], ): """ Similar to `git_ab_test`, but instead of locally checking out different revisions, it operates on directories containing firecracker/jailer binaries """ result_a = test_runner(a_directory, a_artifacts, True) result_b = test_runner(b_directory, b_artifacts, False) return result_a, result_b, comparator(result_a, result_b) def ab_performance_test( a_directory: Path, b_directory: Path, a_artifacts: Optional[Path], b_artifacts: Optional[Path], pytest_opts, p_thresh, strength_abs_thresh, noise_threshold, ): """Does an A/B-test of the specified test with the given firecracker/jailer binaries""" return binary_ab_test( lambda bin_dir, art_dir, is_a: collect_data( is_a and "A" or "B", bin_dir, art_dir, pytest_opts ), lambda ah, be: analyze_data( ah, be, p_thresh, strength_abs_thresh, noise_threshold, n_resamples=int(100 / p_thresh), ), a_directory=a_directory, b_directory=b_directory, a_artifacts=a_artifacts, b_artifacts=b_artifacts, ) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Executes Firecracker's A/B testsuite across the specified commits" ) subparsers = parser.add_subparsers(help="commands", dest="command", required=True) run_parser = subparsers.add_parser( "run", help="Run an specific test of our test suite as an A/B-test across two specified commits", ) run_parser.add_argument( "--binaries-a", help="Directory containing firecracker and jailer binaries to be considered the performance baseline", type=Path, required=True, ) run_parser.add_argument( "--binaries-b", help="Directory containing firecracker and jailer binaries whose performance we want to compare against the results from binaries-a", type=Path, required=True, ) run_parser.add_argument( "--artifacts-a", help="Name of the artifacts directory in the build/artifacts to use for revision A test. If the directory does not exist, the name will be treated as S3 path and artifacts will be downloaded from there.", # Type is string since it can be an s3 path which if passed to `Path` constructor # will be incorrectly modified type=str, required=False, ) run_parser.add_argument( "--artifacts-b", help="Name of the artifacts directory in the build/artifacts to use for revision B test. If the directory does not exist, the name will be treated as S3 path and artifacts will be downloaded from there.", # Type is string since it can be an s3 path which if passed to `Path` constructor # will be incorrectly modified type=str, required=False, ) run_parser.add_argument( "--pytest-opts", help="Parameters to pass through to pytest, for example for test selection", required=True, ) analyze_parser = subparsers.add_parser( "analyze", help="Analyze the results of two manually ran tests based on their test-report.json files", ) analyze_parser.add_argument( "path_a", help="The path to the directory with A run", type=Path, ) analyze_parser.add_argument( "path_b", help="The path to the directory with B run", type=Path, ) parser.add_argument( "--significance", help="The p-value threshold that needs to be crossed for a test result to be considered significant", type=float, default=0.01, ) parser.add_argument( "--absolute-strength", help="The minimum absolute delta required before a regression will be considered valid", type=float, default=0.0, ) parser.add_argument( "--noise-threshold", help="The minimal delta which a metric has to regress on average across all tests that emit it before the regressions will be considered valid.", type=float, default=0.05, ) args = parser.parse_args() if args.command == "run": ab_performance_test( args.binaries_a, args.binaries_b, args.artifacts_a, args.artifacts_b, args.pytest_opts, args.significance, args.absolute_strength, args.noise_threshold, ) else: data_a = load_data_series(args.path_a) data_b = load_data_series(args.path_b) analyze_data( data_a, data_b, args.significance, args.absolute_strength, args.noise_threshold, ) ================================================ FILE: tools/bindgen-patches/0001-change-c_char-to-c_uchar-in-ifrn_name.patch ================================================ diff --git a/src/vmm/src/devices/virtio/net/generated/iff.rs b/src/vmm/src/devices/virtio/net/generated/iff.rs index 04e38396..54111c6b 100644 --- a/src/vmm/src/devices/virtio/net/generated/iff.rs +++ b/src/vmm/src/devices/virtio/net/generated/iff.rs @@ -325,7 +325,7 @@ #[repr(C)] #[derive(Copy, Clone)] pub union ifreq__bindgen_ty_1 { - pub ifrn_name: [::std::os::raw::c_char; 16usize], + pub ifrn_name: [::std::os::raw::c_uchar; 16usize], } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { -- 2.40.1 ================================================ FILE: tools/bindgen-patches/0002-derive-clone-copy-in-io-uring.patch ================================================ diff --git a/src/vmm/src/io_uring/generated.rs b/src/vmm/src/io_uring/generated.rs --- a/src/vmm/src/io_uring/generated.rs +++ b/src/vmm/src/io_uring/generated.rs @@ -206,6 +206,7 @@ [::std::mem::offset_of!(__kernel_timespec, tv_nsec) - 8usize]; }; #[repr(C)] +#[derive(Copy, Clone)] pub struct io_uring_sqe { pub opcode: __u8, pub flags: __u8, @@ -472,6 +473,7 @@ } } #[repr(C)] +#[derive(Copy, Clone)] pub struct io_uring_sqe__bindgen_ty_6 { pub __bindgen_anon_1: __BindgenUnionField, pub optval: __BindgenUnionField<__u64>, @@ -618,12 +620,11 @@ pub const IORING_MSG_SEND_FD: Type = 1; } #[repr(C)] -#[derive(Debug, Default)] +#[derive(Clone, Copy, Debug, Default)] pub struct io_uring_cqe { pub user_data: __u64, pub res: __s32, pub flags: __u32, - pub big_cqe: __IncompleteArrayField<__u64>, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { @@ -633,8 +634,6 @@ [::std::mem::offset_of!(io_uring_cqe, user_data) - 0usize]; ["Offset of field: io_uring_cqe::res"][::std::mem::offset_of!(io_uring_cqe, res) - 8usize]; ["Offset of field: io_uring_cqe::flags"][::std::mem::offset_of!(io_uring_cqe, flags) - 12usize]; - ["Offset of field: io_uring_cqe::big_cqe"] - [::std::mem::offset_of!(io_uring_cqe, big_cqe) - 16usize]; }; #[repr(C)] #[derive(Debug, Default, Copy, Clone, PartialEq)] ================================================ FILE: tools/bindgen-patches/0003-vmclock.patch ================================================ diff --git a/src/vmm/src/devices/acpi/generated/vmclock_abi.rs b/src/vmm/src/devices/acpi/generated/vmclock_abi.rs index e841ca111..134c8393f 100644 --- a/src/vmm/src/devices/acpi/generated/vmclock_abi.rs +++ b/src/vmm/src/devices/acpi/generated/vmclock_abi.rs @@ -16,40 +16,42 @@ clippy::redundant_static_lifetimes )] +use serde::{Deserialize, Serialize}; + pub const __BITS_PER_LONG: u32 = 64; pub const __BITS_PER_LONG_LONG: u32 = 64; pub const __FD_SETSIZE: u32 = 1024; pub const VMCLOCK_MAGIC: u32 = 1263289174; -pub const VMCLOCK_COUNTER_ARM_VCNT: u32 = 0; -pub const VMCLOCK_COUNTER_X86_TSC: u32 = 1; -pub const VMCLOCK_COUNTER_INVALID: u32 = 255; -pub const VMCLOCK_TIME_UTC: u32 = 0; -pub const VMCLOCK_TIME_TAI: u32 = 1; -pub const VMCLOCK_TIME_MONOTONIC: u32 = 2; -pub const VMCLOCK_TIME_INVALID_SMEARED: u32 = 3; -pub const VMCLOCK_TIME_INVALID_MAYBE_SMEARED: u32 = 4; -pub const VMCLOCK_FLAG_TAI_OFFSET_VALID: u32 = 1; -pub const VMCLOCK_FLAG_DISRUPTION_SOON: u32 = 2; -pub const VMCLOCK_FLAG_DISRUPTION_IMMINENT: u32 = 4; -pub const VMCLOCK_FLAG_PERIOD_ESTERROR_VALID: u32 = 8; -pub const VMCLOCK_FLAG_PERIOD_MAXERROR_VALID: u32 = 16; -pub const VMCLOCK_FLAG_TIME_ESTERROR_VALID: u32 = 32; -pub const VMCLOCK_FLAG_TIME_MAXERROR_VALID: u32 = 64; -pub const VMCLOCK_FLAG_TIME_MONOTONIC: u32 = 128; -pub const VMCLOCK_STATUS_UNKNOWN: u32 = 0; -pub const VMCLOCK_STATUS_INITIALIZING: u32 = 1; -pub const VMCLOCK_STATUS_SYNCHRONIZED: u32 = 2; -pub const VMCLOCK_STATUS_FREERUNNING: u32 = 3; -pub const VMCLOCK_STATUS_UNRELIABLE: u32 = 4; -pub const VMCLOCK_SMEARING_STRICT: u32 = 0; -pub const VMCLOCK_SMEARING_NOON_LINEAR: u32 = 1; -pub const VMCLOCK_SMEARING_UTC_SLS: u32 = 2; -pub const VMCLOCK_LEAP_NONE: u32 = 0; -pub const VMCLOCK_LEAP_PRE_POS: u32 = 1; -pub const VMCLOCK_LEAP_PRE_NEG: u32 = 2; -pub const VMCLOCK_LEAP_POS: u32 = 3; -pub const VMCLOCK_LEAP_POST_POS: u32 = 4; -pub const VMCLOCK_LEAP_POST_NEG: u32 = 5; +pub const VMCLOCK_COUNTER_ARM_VCNT: u8 = 0; +pub const VMCLOCK_COUNTER_X86_TSC: u8 = 1; +pub const VMCLOCK_COUNTER_INVALID: u8 = 255; +pub const VMCLOCK_TIME_UTC: u8 = 0; +pub const VMCLOCK_TIME_TAI: u8 = 1; +pub const VMCLOCK_TIME_MONOTONIC: u8 = 2; +pub const VMCLOCK_TIME_INVALID_SMEARED: u8 = 3; +pub const VMCLOCK_TIME_INVALID_MAYBE_SMEARED: u8 = 4; +pub const VMCLOCK_FLAG_TAI_OFFSET_VALID: u64 = 1; +pub const VMCLOCK_FLAG_DISRUPTION_SOON: u64 = 2; +pub const VMCLOCK_FLAG_DISRUPTION_IMMINENT: u64 = 4; +pub const VMCLOCK_FLAG_PERIOD_ESTERROR_VALID: u64 = 8; +pub const VMCLOCK_FLAG_PERIOD_MAXERROR_VALID: u64 = 16; +pub const VMCLOCK_FLAG_TIME_ESTERROR_VALID: u64 = 32; +pub const VMCLOCK_FLAG_TIME_MAXERROR_VALID: u64 = 64; +pub const VMCLOCK_FLAG_TIME_MONOTONIC: u64 = 128; +pub const VMCLOCK_STATUS_UNKNOWN: u8 = 0; +pub const VMCLOCK_STATUS_INITIALIZING: u8 = 1; +pub const VMCLOCK_STATUS_SYNCHRONIZED: u8 = 2; +pub const VMCLOCK_STATUS_FREERUNNING: u8 = 3; +pub const VMCLOCK_STATUS_UNRELIABLE: u8 = 4; +pub const VMCLOCK_SMEARING_STRICT: u8 = 0; +pub const VMCLOCK_SMEARING_NOON_LINEAR: u8 = 1; +pub const VMCLOCK_SMEARING_UTC_SLS: u8 = 2; +pub const VMCLOCK_LEAP_NONE: u8 = 0; +pub const VMCLOCK_LEAP_PRE_POS: u8 = 1; +pub const VMCLOCK_LEAP_PRE_NEG: u8 = 2; +pub const VMCLOCK_LEAP_POS: u8 = 3; +pub const VMCLOCK_LEAP_POST_POS: u8 = 4; +pub const VMCLOCK_LEAP_POST_NEG: u8 = 5; pub type __s8 = ::std::os::raw::c_schar; pub type __u8 = ::std::os::raw::c_uchar; pub type __s16 = ::std::os::raw::c_short; @@ -127,7 +129,7 @@ pub type __sum16 = __u16; pub type __wsum = __u32; pub type __poll_t = ::std::os::raw::c_uint; #[repr(C)] -#[derive(Debug, Default, Copy, Clone, PartialEq)] +#[derive(Debug, Default, Copy, Clone, PartialEq, Serialize, Deserialize)] pub struct vmclock_abi { pub magic: __le32, pub size: __le32, ================================================ FILE: tools/bindgen-patches/0004-vmclock-notify.patch ================================================ diff --git a/src/vmm/src/devices/acpi/generated/vmclock_abi.rs b/src/vmm/src/devices/acpi/generated/vmclock_abi.rs index 134c8393f..80228ad84 100644 --- a/src/vmm/src/devices/acpi/generated/vmclock_abi.rs +++ b/src/vmm/src/devices/acpi/generated/vmclock_abi.rs @@ -38,6 +38,8 @@ pub const VMCLOCK_FLAG_PERIOD_MAXERROR_VALID: u64 = 16; pub const VMCLOCK_FLAG_TIME_ESTERROR_VALID: u64 = 32; pub const VMCLOCK_FLAG_TIME_MAXERROR_VALID: u64 = 64; pub const VMCLOCK_FLAG_TIME_MONOTONIC: u64 = 128; +pub const VMCLOCK_FLAG_VM_GEN_COUNTER_PRESENT: u64 = 256; +pub const VMCLOCK_FLAG_NOTIFICATION_PRESENT: u64 = 512; pub const VMCLOCK_STATUS_UNKNOWN: u8 = 0; pub const VMCLOCK_STATUS_INITIALIZING: u8 = 1; pub const VMCLOCK_STATUS_SYNCHRONIZED: u8 = 2; @@ -153,10 +155,11 @@ pub struct vmclock_abi { pub time_frac_sec: __le64, pub time_esterror_nanosec: __le64, pub time_maxerror_nanosec: __le64, + pub vm_generation_counter: __le64, } #[allow(clippy::unnecessary_operation, clippy::identity_op)] const _: () = { - ["Size of vmclock_abi"][::std::mem::size_of::() - 104usize]; + ["Size of vmclock_abi"][::std::mem::size_of::() - 112usize]; ["Alignment of vmclock_abi"][::std::mem::align_of::() - 8usize]; ["Offset of field: vmclock_abi::magic"][::std::mem::offset_of!(vmclock_abi, magic) - 0usize]; ["Offset of field: vmclock_abi::size"][::std::mem::offset_of!(vmclock_abi, size) - 4usize]; @@ -198,4 +201,6 @@ const _: () = { [::std::mem::offset_of!(vmclock_abi, time_esterror_nanosec) - 88usize]; ["Offset of field: vmclock_abi::time_maxerror_nanosec"] [::std::mem::offset_of!(vmclock_abi, time_maxerror_nanosec) - 96usize]; + ["Offset of field: vmclock_abi::vm_generation_counter"] + [::std::mem::offset_of!(vmclock_abi, vm_generation_counter) - 104usize]; }; ================================================ FILE: tools/bindgen.sh ================================================ #!/bin/bash # -*- shell-script -*- # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # ./tools/devtool shell --privileged # cargo install bindgen-cli # apt update && apt install patch # ./tools/bindgen.sh set -eu # Borrowed from crosvm https://chromium.googlesource.com/chromiumos/platform/crosvm/+/refs/heads/main/tools/impl/bindgen-common.sh#33 replace_linux_int_types() { sed -E -e '/^pub type __(u|s)(8|16|32|64) =/d' -e 's/__u(8|16|32|64)/u\1/g' -e 's/__s(8|16|32|64)/i\1/g' } function info { echo $@ >&2 } function fc-bindgen { cat <src/vmm/src/devices/virtio/net/generated/sockios.rs info "BINDGEN if.h" fc-bindgen "$INCLUDE/linux/if.h" \ --allowlist-var='IF.*' \ --allowlist-type='if.*' \ --allowlist-type="net_device.*" \ -- -D __UAPI_DEF_IF_IFNAMSIZ -D __UAPI_DEF_IF_NET_DEVICE_FLAGS -D __UAPI_DEF_IF_IFREQ -D __UAPI_DEF_IF_IFMAP >src/vmm/src/devices/virtio/net/generated/iff.rs info "BINDGEN if_tun.h" fc-bindgen \ --allowlist-type='sock_fprog' \ --allowlist-var='TUN_.*' \ --allowlist-var='IFF_NO_PI' \ --allowlist-var='IFF_MULTI_QUEUE' \ --allowlist-var='IFF_TAP' \ --allowlist-var='IFF_VNET_HDR' \ --allowlist-var='ETH_.*' \ --allowlist-type='ifreq' \ "$INCLUDE/linux/if_tun.h" >src/vmm/src/devices/virtio/net/generated/if_tun.rs info "BINDGEN virtio_ring.h" fc-bindgen \ --allowlist-var "VIRTIO_RING_F_EVENT_IDX" \ "$INCLUDE/linux/virtio_ring.h" >src/vmm/src/devices/virtio/generated/virtio_ring.rs info "BINDGEN virtio_config.h" fc-bindgen \ --allowlist-var "VIRTIO_F_.*" \ "$INCLUDE/linux/virtio_config.h" >src/vmm/src/devices/virtio/generated/virtio_config.rs info "BINDGEN virtio_blk.h" fc-bindgen \ --allowlist-var "VIRTIO_BLK_.*" \ "$INCLUDE/linux/virtio_blk.h" >src/vmm/src/devices/virtio/generated/virtio_blk.rs info "BINDGEN virtio_net.h" fc-bindgen \ --allowlist-var "VIRTIO_NET_F_.*" \ --allowlist-type "virtio_net_hdr_v1" \ "$INCLUDE/linux/virtio_net.h" >src/vmm/src/devices/virtio/generated/virtio_net.rs info "BINDGEN virtio_ids.h" fc-bindgen \ --allowlist-var "VIRTIO_ID.*" \ "$INCLUDE/linux/virtio_ids.h" >src/vmm/src/devices/virtio/generated/virtio_ids.rs info "BINDGEN virtio_mem.h" fc-bindgen \ --allowlist-var "VIRTIO_MEM.*" \ --allowlist-type "virtio_mem.*" \ "$INCLUDE/linux/virtio_mem.h" >src/vmm/src/devices/virtio/generated/virtio_mem.rs info "BINDGEN prctl.h" fc-bindgen \ --allowlist-var "PR_.*" \ "$INCLUDE/linux/prctl.h" >src/firecracker/src/generated/prctl.rs sed -i '/PR_SET_SPECULATION_CTRL/s/u32/i32/g' src/firecracker/src/generated/prctl.rs info "BINDGEN mpspec_def.h" fc-bindgen $ARCH_X86_INCLUDE/asm/mpspec_def.h \ >src/vmm/src/arch/x86_64/generated/mpspec.rs # https://github.com/rust-lang/rust-bindgen/issues/1274 info "BINDGEN msr-index.h" fc-bindgen $ARCH_X86_INCLUDE/asm/msr-index.h \ --allowlist-var "^MSR_.*$" \ -- \ -Wno-macro-redefined \ >src/vmm/src/arch/x86_64/generated/msr_index.rs perl -i -pe 's/= (\d+);/sprintf("= 0x%x;",$1)/eg' src/vmm/src/arch/x86_64/generated/msr_index.rs info "BINDGEN perf_event.h" grep "MSR_ARCH_PERFMON_" $ARCH_X86_INCLUDE/asm/perf_event.h \ >$ARCH_X86_INCLUDE/asm/perf_event_msr.h fc-bindgen $ARCH_X86_INCLUDE/asm/perf_event_msr.h \ --allowlist-var "^MSR_ARCH_PERFMON_.*$" \ -- \ >src/vmm/src/arch/x86_64/generated/perf_event.rs perl -i -pe 's/= (\d+);/sprintf("= 0x%x;",$1)/eg' src/vmm/src/arch/x86_64/generated/perf_event.rs info "BINDGEN hyperv.h" grep "#define HV_X64_MSR_" $KERNEL_DIR/arch/x86/kvm/hyperv.h \ >$KERNEL_DIR/arch/x86/kvm/hyperv_msr.h fc-bindgen $KERNEL_DIR/arch/x86/kvm/hyperv_msr.h \ --allowlist-var "^HV_X64_MSR_.*$" \ -- \ >src/vmm/src/arch/x86_64/generated/hyperv.rs perl -i -pe 's/= (\d+);/sprintf("= 0x%x;",$1)/eg' src/vmm/src/arch/x86_64/generated/hyperv.rs info "BINDGEN hyperv-tlfs.h" grep "HV_X64_MSR_" $ARCH_X86_INCLUDE/asm/hyperv-tlfs.h \ >$ARCH_X86_INCLUDE/asm/hyperv-tlfs_msr.h fc-bindgen $ARCH_X86_INCLUDE/asm/hyperv-tlfs_msr.h \ --allowlist-var "^HV_X64_MSR_.*$" \ -- \ >src/vmm/src/arch/x86_64/generated/hyperv_tlfs.rs perl -i -pe 's/= (\d+);/sprintf("= 0x%x;",$1)/eg' src/vmm/src/arch/x86_64/generated/hyperv_tlfs.rs info "BINDGEN io_uring.h" fc-bindgen \ --allowlist-var "IORING_.+" \ --allowlist-var "IO_URING_.+" \ --allowlist-var "IOSQE_.+" \ --allowlist-type "io_uring_.+" \ --allowlist-type "io_.qring_offsets" \ "$INCLUDE/linux/io_uring.h" \ >src/vmm/src/io_uring/generated.rs info "BINDGEN asm/prctl.h" fc-bindgen \ --allowlist-var "ARCH_.*" \ "$ARCH_X86_INCLUDE/uapi/asm/prctl.h" >src/vmm/src/arch/x86_64/generated/arch_prctl.rs info "BINDGEN include/uapi/linux/vmclock-abi.h" fc-bindgen \ "$KERNEL_DIR/include/uapi/linux/vmclock-abi.h" > src/vmm/src/devices/acpi/generated/vmclock_abi.rs # Apply any patches info "Apply patches" for PATCH in $(dirname $0)/bindgen-patches/*.patch; do git apply $PATCH done echo "Bindings created correctly! You might want to run ./tools/test_bindings.py to test for ABI incompatibilities" ================================================ FILE: tools/bump-version.sh ================================================ #!/usr/bin/env bash # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 set -eu -o pipefail shopt -s lastpipe FC_TOOLS_DIR=$(dirname $(realpath $0)) source "$FC_TOOLS_DIR/functions" FC_ROOT_DIR=$FC_TOOLS_DIR/.. if [ $# -ne 1 ]; then cat < Example: $0 1.4.0-dev Bump Firecracker release version: 1. Updates Cargo.toml / Cargo.lock EOF exit 1 fi version=$1 # Get current version from the swagger spec. prev_ver=$(get_swagger_version) say "Updating from $prev_ver to $version ..." # Update version in files. files_to_change=( "$FC_ROOT_DIR/src/firecracker/swagger/firecracker.yaml" "$FC_ROOT_DIR/src/firecracker/Cargo.toml" "$FC_ROOT_DIR/src/jailer/Cargo.toml" "$FC_ROOT_DIR/src/rebase-snap/Cargo.toml" "$FC_ROOT_DIR/src/seccompiler/Cargo.toml" "$FC_ROOT_DIR/src/cpu-template-helper/Cargo.toml" "$FC_ROOT_DIR/src/snapshot-editor/Cargo.toml" ) say "Updating source files:" for file in "${files_to_change[@]}"; do say "- $file" if [[ "$file" =~ .+\.toml$ ]]; then # For TOML sed -i "s/^version = \"$prev_ver\"/version = \"$version\"/" "$file" elif [[ "$file" =~ .+\.yaml$ ]]; then # For YAML sed -i "s/version: $prev_ver/version: $version/" "$file" else echo "ERROR: Unrecognized file '$file'" exit 1 fi done # Run `cargo check` to update firecracker and jailer versions in all # `Cargo.lock`. # NOTE: This will break if it finds paths with spaces in them find . -path ./build -prune -o -name Cargo.lock -print |while read -r cargo_lock; do say "Updating $cargo_lock ..." (cd "$(dirname "$cargo_lock")"; cargo check) done ================================================ FILE: tools/devctr/Dockerfile ================================================ FROM public.ecr.aws/lts/ubuntu:24.04 # TODO: use a multi-stage build to reduce the download size when updating this container. # The Rust toolchain layer will get updated most frequently, but we could keep the system # dependencies layer intact for much longer. ARG RUST_TOOLCHAIN="1.93.0" ARG TMP_BUILD_DIR=/tmp/build ARG DEBIAN_FRONTEND=noninteractive ARG PIP_BREAK_SYSTEM_PACKAGES=1 ARG ARCH ENV CARGO_HOME=/usr/local/rust ENV RUSTUP_HOME=/usr/local/rust ENV PATH="$PATH:$CARGO_HOME/bin" ENV LC_ALL=C.UTF-8 ENV QEMU_VER="8.1.1" ENV CROSVM_VER="9d542e6dafa3a85acd1fb6cd6f1adfa1331c4e96" ENV CROSVM_TOOLCHAIN_VER="1.68.2" ENV LIBSECCOMP_VER="v2.6.0" # Build and install Qemu vhost-user-blk backend # RUN apt-get update \ && apt-get -y install --no-install-recommends \ curl gpg gpg-agent \ python3-pip build-essential ninja-build libglib2.0-dev libpixman-1-dev flex bison \ && pip3 install meson \ && mkdir /tmp/qemu_build && cd /tmp/qemu_build \ && curl -sLO https://keys.openpgp.org/vks/v1/by-fingerprint/CEACC9E15534EBABB82D3FA03353C9CEF108B584 \ && curl -sLO https://download.qemu.org/qemu-${QEMU_VER}.tar.xz \ && curl -sLO https://download.qemu.org/qemu-${QEMU_VER}.tar.xz.sig \ && gpg --import CEACC9E15534EBABB82D3FA03353C9CEF108B584 \ && gpg --verify qemu-${QEMU_VER}.tar.xz.sig qemu-${QEMU_VER}.tar.xz \ && tar xf qemu-${QEMU_VER}.tar.xz && cd qemu-${QEMU_VER} \ && ./configure && make -j $(nproc) contrib/vhost-user-blk/vhost-user-blk \ && strip ./build/contrib/vhost-user-blk/vhost-user-blk \ && cp -a ./build/contrib/vhost-user-blk/vhost-user-blk /usr/local/bin \ && pip3 uninstall -y meson \ && apt-get purge -y \ curl gpg gpg-agent \ build-essential ninja-build libglib2.0-dev libpixman-1-dev flex bison \ && apt-get autoremove -y \ && cd && rm -r /tmp/qemu_build # Install system dependencies # RUN apt-get update \ && apt-get -y install --no-install-recommends \ # essential build tools gcc make libc-dev binutils-dev libssl-dev \ # Useful utilities gdbserver \ # Needed in order to be able to compile `userfaultfd-sys`. clang \ curl \ file \ git \ jq \ less \ libbfd-dev \ libdw-dev \ # for aarch64, but can install in x86_64 libfdt-dev \ libiberty-dev \ libcurl4-openssl-dev \ lsof \ musl-tools \ # needed for integration tests net-tools iproute2 iperf3 socat fdisk \ numactl \ iptables \ openssh-client \ pkgconf \ python3 python3-dev python3-pip python3-venv \ screen tmux \ tzdata \ tini \ squashfs-tools zstd \ python3-seccomp \ # for aws-lc-rs cmake \ # for Qemu vhost-user-blk backend libglib2.0-dev \ # for crosvm (vhost-user-blk backend) libcap2 \ # for debugging gdb strace trace-cmd \ && rm -rf /var/lib/apt/lists/* RUN curl -sSL https://install.python-poetry.org | python3 - ENV PATH="/root/.local/bin:$PATH" ARG VENV="/opt/venv" COPY tools/devctr/poetry.lock /tmp/poetry/ COPY tools/devctr/pyproject.toml /tmp/poetry/ RUN cd /tmp/poetry \ && python3 -m venv $VENV \ && . $VENV/bin/activate \ && poetry install --only main --no-directory --no-interaction \ && rm -rf ~/.local/share/virtualenv/ ~/.cache /tmp/poetry \ && cd - ENV VIRTUAL_ENV=$VENV ENV PATH=$VENV/bin:$PATH # apt-get installs it globally, to manually copy it into the venv RUN cp /usr/lib/python3/dist-packages/seccomp.cpython-312-"$ARCH"-linux-gnu.so "$VENV"/lib/python3.12/site-packages/ RUN git clone https://github.com/awslabs/git-secrets /tmp/git-secrets \ && cd /tmp/git-secrets \ && make install \ && cd - \ && rm -rf /tmp/git-secrets # Running the three as a single dockerfile command to avoid inflation of the image: # - Install the Rust toolchain. # - Kani always installs _some_ nightly toolchain, we reuse it for the seccomp filter analysis test. Dynamically # determine the exact toolchain name, and install more components into it. # - Build and install crosvm (used as vhost-user-blk backend) # - Clean up cargo compilation directories # - Always install both x86_64 and aarch64 musl targets, as our rust-toolchain.toml would force on-the-fly installation of both anyway RUN curl https://sh.rustup.rs -sSf | sh -s -- -y --profile minimal --default-toolchain "$RUST_TOOLCHAIN" \ && rustup target add x86_64-unknown-linux-musl \ && rustup target add aarch64-unknown-linux-musl \ && rustup component add llvm-tools-preview clippy rustfmt \ && cargo install --locked grcov cargo-sort cargo-afl \ && cargo install --locked cargo-deny --version 0.19.0 \ && cargo install --locked kani-verifier --version 0.67.0 && cargo kani setup \ \ && NIGHTLY_TOOLCHAIN=$(rustup toolchain list | grep nightly | tr -d '\n') \ && rustup component add rust-src --toolchain "$NIGHTLY_TOOLCHAIN" \ && rustup target add "$ARCH"-unknown-linux-musl --toolchain "$NIGHTLY_TOOLCHAIN" \ && cargo +"$NIGHTLY_TOOLCHAIN" install cargo-udeps \ \ && apt-get update \ && apt-get -y install --no-install-recommends \ libcap-dev \ protobuf-compiler \ && git clone https://github.com/google/crosvm.git /tmp/crosvm \ && cd /tmp/crosvm && git checkout ${CROSVM_VER} \ && git submodule update --init \ && cargo build --no-default-features --release \ && strip ./target/release/crosvm \ && cp -a ./target/release/crosvm /usr/local/bin \ && apt-get purge -y \ libcap-dev \ protobuf-compiler \ && apt-get autoremove -y \ && rm -rf /var/lib/apt/lists/* \ && rustup toolchain uninstall ${CROSVM_TOOLCHAIN_VER}-${ARCH}-unknown-linux-gnu \ && cd && rm -r /tmp/crosvm \ \ && rm -rf "$CARGO_HOME/registry" \ && rm -rf "$CARGO_HOME/git" # help musl-gcc find linux headers RUN cd /usr/include/$ARCH-linux-musl \ && ln -s ../$ARCH-linux-gnu/asm asm \ && ln -s ../linux linux \ && ln -s ../asm-generic asm-generic # Install static version of libseccomp # We need to compile from source because # libseccomp provided by the distribution is not # compiled with musl-gcc and we need this # for our musl builds. # We specify the tag in order to have a fixed version # of the library. RUN apt-get update \ && apt-get -y install \ libtool gperf \ && git clone https://github.com/seccomp/libseccomp /tmp/libseccomp \ && cd /tmp/libseccomp \ && git checkout tags/${LIBSECCOMP_VER} \ && ./autogen.sh \ && CC="musl-gcc -static" ./configure --enable-static=yes --enable-shared=false \ && make install \ && cd \ && apt-get purge -y \ libtool gperf \ && apt-get autoremove -y \ && rm -rf /tmp/libseccomp # Build iperf3-vsock RUN mkdir "$TMP_BUILD_DIR" && cd "$TMP_BUILD_DIR" \ && git clone https://github.com/stefano-garzarella/iperf-vsock \ && cd iperf-vsock && git checkout 9245f9a \ && mkdir build && cd build \ && ../configure "LDFLAGS=--static" --disable-shared && make \ && cp src/iperf3 /usr/local/bin/iperf3-vsock \ && cd / \ && rm -rf "$TMP_BUILD_DIR" # Download the codecov.io uploader RUN cd /usr/local/bin \ && (if [ "$ARCH" = "x86_64" ]; then \ curl -O https://uploader.codecov.io/latest/linux/codecov; else \ curl -O https://uploader.codecov.io/latest/aarch64/codecov; fi) \ && chmod +x codecov \ && cd - # Add cross-compile toolchain for devtool checkbuild command RUN case "${ARCH}" in \ "aarch64") \ apt install -y gcc-x86-64-linux-gnu libc6-dev-amd64-cross linux-libc-dev-amd64-cross && \ rustup target add x86_64-unknown-linux-gnu \ ;; \ "x86_64") \ apt install -y gcc-aarch64-linux-gnu libc6-dev-arm64-cross linux-libc-dev-arm64-cross && \ rustup target add aarch64-unknown-linux-gnu \ ;; \ *) \ echo "Unsupported arch ${ARCH}" && \ exit 1 \ ;; \ esac ADD tools/devctr/ctr_gitconfig /root/.gitconfig ENTRYPOINT ["/usr/bin/tini", "--"] ================================================ FILE: tools/devctr/ctr_gitconfig ================================================ # Add the root firecracker git folder as a safe directory in .gitconfig. # Firecracker root git folder in the container is # bind-mounted to a folder on the host which is mapped to a # user that is different from the user which runs the integ tests. # This difference in ownership is validated against by git. # https://github.blog/2022-04-12-git-security-vulnerability-announced/ [safe] directory = * ================================================ FILE: tools/devctr/pyproject.toml ================================================ [tool.poetry] name = "tool.poetry" version = "0.1.0" description = "Firecracker python dependencies" authors = ["Your Name "] package-mode = false [tool.poetry.dependencies] aws-embedded-metrics = "^3.1.0" black = "^24.3.0" # Gitlint locks this to 8.1.3. Lock from our side too to prevent different versions click = "8.1.3" filelock = "^3.13.4" gitlint = "^0.19.1" ipython = "^8.15.0" isort = "^5.12.0" mdformat = "^0.7.17" mdformat-footnote = "^0.1.1" mdformat-frontmatter = "^2.0.8" mdformat-gfm = "^0.3.5" numpy = "^2.3.4" openapi-spec-validator = "^0.7.1" pandas = "^2.3.3" psutil = "^6.0.0" pylint = "^3" pytest = "^8" pytest-json-report = "^1.5.0" pytest-repeat = "^0.9.4" pytest-rerunfailures = "^14" pytest-timeout = "^2.3.1" pytest-xdist = "^3.5" python = "^3.12.0" PyYAML = "^6.0" requests = "^2.32.4" requests-unixsocket2 = "^0.4.0" scipy = "^1.11.2" seaborn = "^0.13.2" semver = "^3.0.4" setproctitle = "^1.3.2" tenacity = "^8.2.2" [tool.poetry.group.dev.dependencies] [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" ================================================ FILE: tools/devtool ================================================ #!/usr/bin/env bash # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # Firecracker devtool # # Use this script to build and test Firecracker. # # TL;DR # Make sure you have Docker installed and properly configured # (http://docker.com). Then, # building: `./devtool build` # Then find the binaries under build/debug/ # testing: `./devtool test` # Will run the entire test battery; will take several minutes to complete. # deep-dive: `./devtool shell` # Open a shell prompt inside the container. Then build or test (or do # anything, really) manually. # # Still TL;DR: have Docker; ./devtool build; ./devtool test; ./devtool help. # # # Both building and testing are done inside a Docker container. Please make sure # you have Docker up and running on your system (see http:/docker.com) and your # user has permission to run Docker containers. # # The Firecracker sources dir will be bind-mounted inside the development # container (under /firecracker) and any files generated by the build process # will show up under the build/ dir. This includes the final binaries, as well # as any intermediate or cache files. # # By default, all devtool commands run the container transparently, removing # it after the command completes. Any persisting files will be stored under # build/. # If, for any reason, you want to access the container directly, please use # `devtool shell`. This will perform the initial setup (bind-mounting the # sources dir, setting privileges) and will then drop into a BASH shell inside # the container. # # Building: # Run `./devtool build`. # By default, the debug binaries are built and placed under build/debug/. # To build the release version, run `./devtool build --release` instead. # You can then find the binaries under build/release/. # # Testing: # Run `./devtool test`. # This will run the entire integration test battery. The testing system is # based on pytest (http://pytest.org). # # Opening a shell prompt inside the development container: # Run `./devtool shell`. # # Additional information: # Run `./devtool help`. # # # TODO: # - List tests by parsing the `pytest --collect-only` output. # - Add a `./devtool run` command to set up and run Firecracker. # - Add a `./devtool diag` command to help with troubleshooting, by checking # the most common failure conditions. # - Look into caching the Cargo registry within the container and if that # would help with reproducible builds (in addition to pinning Cargo.lock) # Development container image (without tag) DEVCTR_IMAGE_NO_TAG="public.ecr.aws/firecracker/fcuvm" # Development container tag DEVCTR_IMAGE_TAG=${DEVCTR_IMAGE_TAG:-v88} # Development container image (name:tag) # This should be updated whenever we upgrade the development container. # (Yet another step on our way to reproducible builds.) DEVCTR_IMAGE="${DEVCTR_IMAGE_NO_TAG}:${DEVCTR_IMAGE_TAG}" # Full path to the Firecracker tools dir on the host. FC_TOOLS_DIR=$(cd "$(dirname "$0")" && pwd) source "$FC_TOOLS_DIR/functions" # Full path to the Firecracker sources dir on the host. FC_ROOT_DIR=$(cd "${FC_TOOLS_DIR}/.." && pwd) # Full path to the build dir on the host. FC_BUILD_DIR="${FC_ROOT_DIR}/build" # Full path to devctr dir on the host. FC_DEVCTR_DIR="${FC_ROOT_DIR}/tools/devctr" # Path to the linux kernel directory on the host. KERNEL_DIR="${FC_ROOT_DIR}/.kernel" # Full path to the cargo registry dir on the host. This appears on the host # because we want to persist the cargo registry across container invocations. # Otherwise, any rust crates from crates.io would be downloaded again each time # we build or test. CARGO_REGISTRY_DIR="${FC_BUILD_DIR}/cargo_registry" # Full path to the cargo git registry on the host. This serves the same purpose # as CARGO_REGISTRY_DIR, for crates downloaded from GitHub repos instead of # crates.io. CARGO_GIT_REGISTRY_DIR="${FC_BUILD_DIR}/cargo_git_registry" # Full path to the cargo target dir on the host. CARGO_TARGET_DIR="${FC_BUILD_DIR}/cargo_target" # Full path to the Firecracker sources dir, as bind-mounted in the container. CTR_FC_ROOT_DIR="/firecracker" # Full path to the build dir, as bind-mounted in the container. CTR_FC_BUILD_DIR="${CTR_FC_ROOT_DIR}/build" CTR_TEST_RESULTS_DIR="${CTR_FC_ROOT_DIR}/test_results" # Full path to the cargo target dir, as bind-mounted in the container. CTR_CARGO_TARGET_DIR="$CTR_FC_BUILD_DIR/cargo_target" # Path to the microVM images cache dir LOCAL_ARTIFACTS_DIR="build/artifacts" # File with a single line specifing the name of the # currently used artifacts LOCAL_ARTIFACTS_CURRENT_DIR_FILE="build/current_artifacts" # Full path to the public key mapping on the guest PUB_KEY_PATH=/root/.ssh/id_rsa.pub # Full path to the private key mapping on the guest PRIV_KEY_PATH=/root/.ssh/id_rsa # Path to the linux kernel directory, as bind-mounted in the container. CTR_KERNEL_DIR="${CTR_FC_ROOT_DIR}/.kernel" # Get the target prefix to avoid repeated calls to uname -m TARGET_PREFIX="$(uname -m)-unknown-linux-" # Container path to directory where we store built CI artifacts. CTR_CI_ARTIFACTS_PATH="${CTR_FC_ROOT_DIR}/resources/$(uname -m)" DEFAULT_ARTIFACTS_S3_BUCKET=s3://spec.ccfc.min/firecracker-ci # Lockfile used while modifying KVM modules KVM_MODULE_LOCKFILE="/tmp/.kvm_module_lock" # Query default S3 bucket with artifacts and return the most recient path get_newest_s3_artifacts() { local bucket="spec.ccfc.min" local base_prefix="firecracker-ci/" # Query all files in the `firecracker-ci` directory, check files contianing "vmlinux", # sort them by the `LastModified` date, and return the last one (newest). # We need to do it this way as S3 doesn't store `LastModified`` date for directories, # so we need to list all files. local newest_dir=$(aws s3api list-objects-v2 \ --bucket "$bucket" --prefix "$base_prefix" --no-sign-request \ --query 'sort_by(Contents[?contains(Key, `vmlinux`)], &LastModified)[-1].Key' | tr -d '"' | sed "s|^$base_prefix||" | cut -d'/' -f1 ) [ -z "$newest_dir" ] && die "Could not find newest artifacts in S3." echo "$DEFAULT_ARTIFACTS_S3_BUCKET/$newest_dir" } # Function to return local path to artifacts. Accepts the url from function above # as an argument. get_local_artifacts_path() { local path=$1 echo $LOCAL_ARTIFACTS_DIR/"${path//\//-}" } # Check if Docker is available and exit if it's not. # Upon returning from this call, the caller can be certain Docker is available. # ensure_docker() { NEWLINE=$'\n' output=$(which docker 2>&1) ok_or_die "Docker not found. Aborting." \ "Please make sure you have Docker (http://docker.com) installed" \ "and properly configured.${NEWLINE}" \ "Error: $?, command output: ${output}" output=$(docker ps 2>&1) ok_or_die "Error accessing Docker. Please make sure the Docker daemon" \ "is running and that you are part of the docker group.${NEWLINE}" \ "Error: $?, command output: ${output}${NEWLINE}" \ "For more information, see" \ "https://docs.docker.com/install/linux/linux-postinstall/" } # Run a command and retry multiple times if it fails. Once it stops # failing return to normal execution. If there are "retry count" # failures, set the last error code. # $1 - command # $2 - retry count # $3 - sleep interval between retries retry_cmd() { command=$1 retry_cnt=$2 sleep_int=$3 { $command } || { # Command failed, substract one from retry_cnt retry_cnt=$((retry_cnt - 1)) # If retry_cnt is larger than 0, sleep and call again if [ "$retry_cnt" -gt 0 ]; then echo "$command failed, retrying..." sleep "$sleep_int" retry_cmd "$command" "$retry_cnt" "$sleep_int" fi } } # Attempt to download our Docker image. Exit if that fails. # Upon returning from this call, the caller can be certain our Docker image is # available on this system. # ensure_devctr() { # We depend on having Docker present. ensure_docker # Check if we have the container image available locally. Attempt to # download it, if we don't. [[ $(docker images -q "$DEVCTR_IMAGE" | wc -l) -gt 0 ]] || { say "About to pull docker image $DEVCTR_IMAGE" # Run docker pull 5 times in case it fails - sleep 3 seconds # between attempts retry_cmd "docker pull $DEVCTR_IMAGE" 5 3 ok_or_die "Error pulling docker image. Aborting." } } # Make sure the build/ dirs are available. Exit if we can't create them. # Upon returning from this call, the caller can be certain the build/ dirs exist. # ensure_build_dir() { for dir in "$FC_BUILD_DIR" "$CARGO_TARGET_DIR" \ "$CARGO_REGISTRY_DIR" "$CARGO_GIT_REGISTRY_DIR"; do create_dir "$dir" done } build_bin_path() { target="$1" profile="$2" binary="$3" echo "$CARGO_TARGET_DIR/$target/$profile/$binary" } # Fix build/ dir permissions after a privileged container run. # Since the privileged container runs as root, any files it creates will be # owned by root. This fixes that by recursively changing the ownership of build/ # to the current user. # cmd_fix_perms() { # Yes, running Docker to get elevated privileges, just to chown some files # is a dirty hack. run_devctr \ --workdir "$CTR_FC_ROOT_DIR" \ -- \ chown -f -R "$(id -u):$(id -g)" "$CTR_FC_BUILD_DIR" "$CTR_TEST_RESULTS_DIR" "$CTR_CI_ARTIFACTS_PATH" $@ } # Builds the development container from its Dockerfile. # cmd_build_devctr() { docker_file_name=$FC_DEVCTR_DIR/Dockerfile build_args="--build-arg ARCH=$(uname -m)" while [ $# -gt 0 ]; do case "$1" in "-h"|"--help") { cmd_help; exit 1; } ;; "--") { shift; break; } ;; *) die "Unknown argument: $1. Please use --help for help." ;; esac shift done docker build -t "$DEVCTR_IMAGE_NO_TAG" -f "$docker_file_name" $build_args . } # Validate the user supplied kernel version number. # It must be composed of 2 groups of integers separated by dot, with an optional third group. validate_kernel_version() { local version_regex="^([0-9]+.)[0-9]+(.[0-9]+)?$" version="$1" if [ -z "$version" ]; then die "Kernel version cannot be empty." elif [[ ! "$version" =~ $version_regex ]]; then die "Invalid version number: $version (expected: \$Major.\$Minor.\$Patch(optional))." fi } # Helper function to run the dev container. # Usage: run_devctr -- # Example: run_devctr --privileged -- bash -c "echo 'hello world'" run_devctr() { docker_args=() ctr_args=() docker_args_done=false while [[ $# -gt 0 ]]; do [[ "$1" = "--" ]] && { docker_args_done=true shift continue } [[ $docker_args_done = true ]] && ctr_args+=("$1") || docker_args+=("$1") shift done # If we're running in a terminal, pass the terminal to Docker and run # the container interactively [[ -t 0 ]] && docker_args+=("-i") [[ -t 1 ]] && docker_args+=("-t") # Try to pass these environments from host into container for network proxies proxies=(http_proxy HTTP_PROXY https_proxy HTTPS_PROXY no_proxy NO_PROXY) for i in "${proxies[@]}"; do if [[ ! -z ${!i} ]]; then docker_args+=("--env") && docker_args+=("$i=${!i}") fi done # Finally, run the dev container # Use 'z' on the --volume parameter for docker to automatically relabel the # content and allow sharing between containers. docker run "${docker_args[@]}" \ --rm \ --volume /dev:/dev \ --volume "$FC_ROOT_DIR:$CTR_FC_ROOT_DIR:z" \ --volume "$FC_ROOT_DIR/build/cargo_registry:/usr/local/rust/registry:z" \ --volume "$FC_ROOT_DIR/build/cargo_git_registry:/usr/local/rust/git:z" \ --tmpfs /srv:exec,dev,size=32G \ -v /boot:/boot \ --env PYTHONDONTWRITEBYTECODE=1 \ "$DEVCTR_IMAGE" "${ctr_args[@]}" } # Helper function to test that the argument provided is a valid path to a SSH key. # test_key() { ssh-keygen -lf "$1" &>/dev/null ret=$? [ $ret -ne 0 ] && die "$1 is not a valid key file." } create_dir() { # Create a dir for the provided path. dir="$1" mkdir -p "$dir" || die "Error: cannot create dir $dir" [ -x "$dir" ] && [ -w "$dir" ] || \ { say "Wrong permissions for $dir. Attempting to fix them ..." chmod +x+w "$dir" } || \ die "Error: wrong permissions for $dir. Should be +x+w" } # `$0 help` # Show the detailed devtool usage information. # cmd_help() { echo "" echo "Firecracker $(basename $0)" echo "Usage: $(basename $0) [] []" echo "" echo "Global arguments" echo " -y, --unattended Run unattended. Assume the user would always" echo " answer \"yes\" to any confirmation prompt." echo "" echo "Available commands:" echo "" echo " build [--debug|--release] [-l|--libc musl|gnu]" echo " Build the Firecracker binaries." echo " Firecracker is built using the Rust build system (cargo). All arguments after --" echo " will be passed through to cargo." echo " --debug Build the debug binaries. This is the default." echo " --release Build the release binaries." echo " -l, --libc musl|gnu Choose the libc flavor against which Firecracker will" echo " be linked. Default is musl." echo " --ssh-keys Provide the paths to the public and private SSH keys on the host" echo " (in this particular order) required for the git authentication." echo " It is mandatory that both keys are specified." echo "" echo " build_devctr" echo " Builds the development container from its Dockerfile." echo "" echo " checkenv" echo " Performs prerequisites checks needed to execute firecracker." echo "" echo " distclean" echo " Clean up the build tree and remove the docker container." echo "" echo " fix_perms" echo " Fixes permissions when devtool dies in the middle of a privileged session." echo "" echo " fmt" echo " Auto-format all Rust source files, to match the Firecracker requirements." echo " This should be used as the last step in every commit, to ensure that the" echo " Rust style tests pass." echo "" echo " install [-p|--path] [--debug|--release]" echo " Install firecracker, jailer and seccomp binaries to /usr/local/bin or a given path." echo " Only the musl linked binaries are supported." echo " --path Install binaries to a specified path." echo " --debug Install the debug binaries." echo " --release Install the release binaries. This is the default." echo "" echo " help" echo " Display this help message." echo "" echo " shell [--privileged]" echo " Launch the development container and open an interactive BASH shell." echo " -p, --privileged Run the container as root, in privileged mode." echo " Running Firecracker via the jailer requires elevated" echo " privileges, though the build phase does not." echo "" echo " sh CMD..." echo " Launch the development container and run a command." echo "" echo " test [args] [-- []]" echo " Run the Firecracker integration or A/B tests." echo " The Firecracker testing system is based on pytest. All arguments after --" echo " will be passed through to pytest." echo "" echo " Args for the 'test' itself:" echo " -h, --help Print help" echo " -c, --cpuset-cpus cpulist Set a dedicated cpulist to be used by the tests." echo " -m, --cpuset-mems memlist Set a dedicated memlist to be used by the tests." echo " --performance Tweak various setting of the host running the tests (such as C- and P-states)" echo " to achieve consistent performance. Used for running performance tests in CI." echo " --ab Run A/B test." echo " --no-build Skip building step." echo " --no-archive Skip archiving of 'test_result' after the test is done." echo " --no-kvm-check Skip checking for '/dev/kvm' presence." echo " --no-artifacts-check Skip checking existing artifacts." echo "" echo " build_ci_artifacts [all|rootfs|kernels]" echo " Builds the rootfs and guest kernel artifacts we use for our CI." echo " Run './tools/devtool build_ci_artifacts help' for more details about the available commands." echo "" echo " download_ci_artifacts [--force] [s3_uri_1, s3_uri_2 ...]" echo " Downloads artifacts from provided S3 URI (like s3://spec.ccfc.min/firecracker-ci/my_artifacts)" echo " and runs ./tools/setup-ci-artifacts.sh. for each of them." echo " If no arguments are provided, pulls newest artifacts from $DEFAULT_ARTIFACTS_S3_BUCKET" echo " If '--force' is specified, removes previous artifacts with same name" echo "" echo " set_current_artifacts [s3_uri/directory name]" echo " Sets the $LOCAL_ARTIFACTS_CURRENT_DIR_FILE to contain a local path where the artifacts should be." echo " Accepts some name used to generate the final directory name. Mainly used with S3 URI" echo " like 'download_ci_artifacts'. Alternatively it is possible to manually write local " echo " path to artifacts directory into $LOCAL_ARTIFACTS_CURRENT_DIR_FILE file" echo "" echo " ensure_current_artifacts [s3_uri/directory name]" echo " Makes sure the $LOCAL_ARTIFACTS_CURRENT_DIR_FILE file contains a path to current artifacts." echo " If an optional path/URI is provided, try to set it as current artifacts." echo "" cat <]] Run tests in a debugging environment sandbox Run Firecracker in an IPython REPL (in devctr) sandbox_native Run Firecracker in an IPython REPL (AL2023/Ubuntu) mkdocs Use 'cargo doc' to generate rustdoc documentation checkstyle Run style checks checkbuild [--all|-m x86_64|aarch64] Run cargo check on the target architecture (supports cross compilation). EOF } # `$0 build` - build Firecracker # Please see `$0 help` for more information. # cmd_build() { # By default, we'll build the debug binaries. profile="debug" libc="musl" # Parse any command line args. while [ $# -gt 0 ]; do case "$1" in "-h"|"--help") { cmd_help; exit 1; } ;; "--debug") { profile="debug"; } ;; "--release") { profile="release"; } ;; "--rev") { shift; revision=$1; } ;; "--ssh-keys") shift [[ -z "$1" ]] && \ die "Please provide the path to the public SSH key." [[ ! -f "$1" ]] && die "The public key file does not exist: $1." test_key "$1" host_pub_key_path="$1" shift [[ -z "$1" ]] && \ die "Please provide the path to the private SSH key." [[ ! -f "$1" ]] && die "The private key file does not exist: $1." test_key "$1" host_priv_key_path="$1" ;; "-l"|"--libc") shift [[ "$1" =~ ^(musl|gnu)$ ]] || \ die "Invalid libc: $1. Valid options are \"musl\" and \"gnu\"." libc="$1" ;; "--") { shift; break; } ;; *) die "Unknown argument: $1. Please use --help for help." ;; esac shift done # Check prerequisites ensure_devctr ensure_build_dir # Map the public and private keys to the guest if they are specified. [ ! -z "$host_pub_key_path" ] && [ ! -z "$host_priv_key_path" ] && extra_args="--volume $host_pub_key_path:$PUB_KEY_PATH:z \ --volume $host_priv_key_path:$PRIV_KEY_PATH:z" workdir="$CTR_FC_ROOT_DIR" if [ ! -z "$revision" ]; then commitish="$revision" if ! git cat-file -t "$commitish"; then commitish=origin/"$revision"; fi branch_name=tmp-$commitish tmp_dir=$(mktemp -d) git branch $branch_name $commitish git clone -b $branch_name . $tmp_dir pushd $tmp_dir workdir=$tmp_dir extra_args="$extra_args --volume $tmp_dir:$tmp_dir:z" fi # Run the cargo build process inside the container. # We don't need any special privileges for the build phase, so we run the # container as the current user/group. run_devctr \ --privileged \ --workdir "$workdir" \ ${extra_args} \ -- \ ./tools/release.sh --libc $libc --profile $profile ret=$? # Running as root would have created some root-owned files under the build # dir. Let's fix that. cmd_fix_perms if [ ! -z "$revision" ]; then popd git branch -D $branch_name mkdir -p build/"$revision"/examples cp $tmp_dir/build/cargo_target/$(uname -m)-unknown-linux-$libc/$profile/* build/"$revision" cp $tmp_dir/build/cargo_target/$(uname -m)-unknown-linux-$libc/$profile/examples/* build/"$revision"/examples cmd_sh "rm -rf $tmp_dir" fi return $ret } function cmd_make_release { ensure_build_dir run_devctr \ --privileged \ --workdir "$CTR_FC_ROOT_DIR" \ -- \ ./tools/release.sh --libc musl --profile release --make-release sudo chown -Rc $USER: release* } cmd_distclean() { # List of folders to remove. dirs=("build" "test_results") for dir in "${dirs[@]}"; do if [ -d "$dir" ]; then say "Removing $dir" rm -rf "$dir" fi done # Remove devctr if it exists if [ $(docker images -q "$DEVCTR_IMAGE" | wc -l) -eq "1" ]; then say "Removing $DEVCTR_IMAGE" docker rmi -f "$DEVCTR_IMAGE" fi } cmd_download_ci_artifacts() { if [ "$1" = "--force" ]; then FORCE_ARTIFACT_DOWNLOAD=1 shift 1 fi local artifacts_list=$@ if [[ -z ${artifacts_list[@]} ]]; then download_ci_artifacts else for artifacts in ${artifacts_list[@]}; do download_ci_artifacts $artifacts done fi } cmd_set_current_artifacts() { local artifacts=$1 if [ -z $artifacts ]; then say "No artifacts were specified" else local local_artifacts_path=$(get_local_artifacts_path $artifacts)/$(uname -m) echo $local_artifacts_path > $LOCAL_ARTIFACTS_CURRENT_DIR_FILE say "Current artifacts path: " $local_artifacts_path fi } cmd_ensure_current_artifacts() { if [ -f $LOCAL_ARTIFACTS_CURRENT_DIR_FILE ] && [ $# = 0 ]; then local current_local_artifacts_path=$(cat $LOCAL_ARTIFACTS_CURRENT_DIR_FILE) if [ -d $current_local_artifacts_path ]; then say "Current artifacts path: " $current_local_artifacts_path return 0 fi say "Invalid artifact dir! Artifacts will be downloaded again: $current_local_artifacts_path" fi download_ci_artifacts $@ echo $LOCAL_ARTIFACTS_PATH > $LOCAL_ARTIFACTS_CURRENT_DIR_FILE say "Current artifacts path: " $LOCAL_ARTIFACTS_PATH } download_ci_artifacts() { local artifacts=$1 if [ -z $artifacts ]; then local default_artifacts=$(get_newest_s3_artifacts) say "No specific artifacts are defined. Using default artifacts: " $default_artifacts artifacts=$default_artifacts fi # Fetch all the artifacts so they are local local artifacts_arch=$artifacts/$(uname -m) local local_artifacts_path=$(get_local_artifacts_path $artifacts)/$(uname -m) if [ ! -z $FORCE_ARTIFACT_DOWNLOAD ]; then say "Removing " $local_artifacts_path rm -rf $local_artifacts_path fi if [ ! -d "$local_artifacts_path" ]; then say "Fetching artifacts from S3: " $artifacts_arch " into: " $local_artifacts_path mkdir -pv $local_artifacts_path aws s3 sync --no-sign-request "$artifacts_arch" "$local_artifacts_path" ok_or_die "Failed to download artifacts using awscli!" cmd_sh "./tools/setup-ci-artifacts.sh" $local_artifacts_path if [ $? != 0 ]; then rm -rf $local_artifacts_path die "Failed to setup artifacts!" fi else say "Found existing artifacts: " $artifacts_arch " at: " $local_artifacts_path fi LOCAL_ARTIFACTS_PATH=$local_artifacts_path } # Acquire the KVM module lock and run the given command. # Uses flock with a timeout for safe, automatic lock management. # Usage: with_kvm_module_lock [args...] with_kvm_module_lock() { local LOCK_TIMEOUT=120 ( if ! flock -w "$LOCK_TIMEOUT" 9; then say_warn "Timed out waiting for KVM module lock after: ${LOCK_TIMEOUT}s" exit 1 fi echo "Successfully acquired lock" "$@" ) 9>"$KVM_MODULE_LOCKFILE" } # Reload KVM modules with the given vendor module and kvm params. # Always enables avic=1 on AMD. Unloads first if already loaded. # Usage: reload_kvm_modules [kvm_param...] # e.g. reload_kvm_modules kvm_intel nx_huge_pages=never reload_kvm_modules() { local vendor_mod=$1; shift # Unload if already loaded if lsmod | grep -qP "^kvm_(amd|intel)"; then if ! sudo modprobe -r $vendor_mod kvm; then say_warn "Failed to unload KVM modules (${vendor_mod}, kvm) (may be in use)" return 1 fi fi if ! sudo modprobe kvm "$@"; then say_warn "Failed to load kvm module" return 1 fi if [[ $vendor_mod == "kvm_amd" ]]; then if ! sudo modprobe kvm_amd avic=1; then say_warn "Failed to load kvm_amd module" return 1 fi else if ! sudo modprobe $vendor_mod; then say_warn "Failed to load $vendor_mod module" return 1 fi fi } # Determine the KVM vendor module for the current CPU. kvm_vendor_mod() { if grep -q "vmx" /proc/cpuinfo; then echo kvm_intel elif grep -q "svm" /proc/cpuinfo; then echo kvm_amd else # aarch64 echo kvm fi } # Ensure /dev/kvm is available and apply platform-specific KVM tweaks. # - Loads KVM modules if not present # - On Linux 6.1 x86_64: applies nx_huge_pages=never for non-vulnerable CPUs, # checks favordynmods for vulnerable ones # - On AMD: ensures AVIC is enabled setup_kvm() { local kernel_version=$(uname -r) local arch=$(uname -m) local vendor_mod=$(kvm_vendor_mod) local need_kvm_reload=0 local kvm_extra_params=() # Load KVM if not already available if [[ ! -c /dev/kvm ]]; then need_kvm_reload=1 fi local itlb_multihit=/sys/devices/system/cpu/vulnerabilities/itlb_multihit local nx_huge_pages=/sys/module/kvm/parameters/nx_huge_pages # Linux 6.1 x86_64: mitigate boot-time regression if [[ $kernel_version == 6.1.* ]] && [[ $arch == x86_64 ]]; then say "Applying Linux 6.1 boot-time regression mitigations" if grep -q "Not affected" $itlb_multihit; then echo "CPU not vulnerable to iTLB multihit, using kvm.nx_huge_pages=never mitigation" if ! grep -q "never" $nx_huge_pages 2>/dev/null; then kvm_extra_params+=(nx_huge_pages=never) need_kvm_reload=1 fi else echo "CPU vulnerable to iTLB_multihit, checking if favordynmods is enabled" if mount | grep cgroup | grep -q favordynmods; then echo "favordynmods is enabled" else say_warn "cgroups' favordynmods option not enabled; VM creation performance may be impacted" fi fi fi # AMD: ensure AVIC is enabled local avic_param=/sys/module/kvm_amd/parameters/avic if [[ $vendor_mod == "kvm_amd" ]]; then if ! grep -q "Y\|1" $avic_param; then echo "AVIC not enabled, will reload kvm_amd with avic=1" need_kvm_reload=1 fi fi if [[ $need_kvm_reload -eq 1 ]]; then echo "Reloading KVM modules" reload_kvm_modules "$vendor_mod" "${kvm_extra_params[@]}" ok_or_die "Could not reload kvm modules" fi tail -v $itlb_multihit $nx_huge_pages if [[ $vendor_mod == "kvm_amd" ]]; then tail -v $avic_param fi [[ -c /dev/kvm ]] || die "/dev/kvm not found. Aborting." } # Modifies the processors CPU governor and P-state configuration (x86_64 only) for consistent performance. This means # - Disable turbo boost (Intel only) by writing 1 to /sys/devices/system/cpu/intel_pstate/no_turbo # - Lock the CPUs' P-state to the highest non-turbo one (Intel only) by writing 100 to /sys/devices/system/cpu/intel_pstate/{min,max}_perf_pct # - Set the cpu frequency governor to performance by writing "performance" to /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor apply_performance_tweaks() { # m6a instances do not support the amd_pstate driver (yet), so nothing we can do there if [[ -d /sys/devices/system/cpu/intel_pstate ]]; then # Disable turbo boost. Some of our tests are performance tests, and we want minimum variability wrt processor frequency # See also https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/processor_state_control.html echo 1 |sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo &> /dev/null # Save old values to restore later MIN_PERF_PCT=$(cat /sys/devices/system/cpu/intel_pstate/min_perf_pct) MAX_PERF_PCT=$(cat /sys/devices/system/cpu/intel_pstate/max_perf_pct) # Force the CPU to continuously stay in the highest, non-turbo P-state. The P-state will determine the # CPU's clock frequency. # https://www.kernel.org/doc/html/v4.12/admin-guide/pm/intel_pstate.html echo 100 |sudo tee /sys/devices/system/cpu/intel_pstate/min_perf_pct &> /dev/null echo 100 |sudo tee /sys/devices/system/cpu/intel_pstate/max_perf_pct &> /dev/null fi # If CPU is Intel Granite Rapids (Xeon 6, FMS 06-AD-XX), disable C6 and C6P states. # We've observed significant volatility in our performance tests on Intel Granite Rapids CPUs # (Xeon 6, FMS 06-AD-XX), specifically in many of our latency metrics. After spending time investigating # this, it seems like cross-CPU communication becomes prohibitively slow with the deepest C-states # enabled. Since GNR chips have higher core density (96 per socket vs. SPR's 48 per socket), we believe # that the tail latency of transitioning out of the deepest C-states explains the volatility. # Disabling these deep states appear to stabilise the performance, so for consistency in our CI, we will disable them. # NB: The performance volatility only appears to affect Granite Rapids instances with low load # (e.g., our performance integration tests). The assumption is that when the load is high, cores # are unlikely to enter deeper C-states, so inter-CPU communication does not encounter the overhead # of transitioning out of deeper C-states. model=$(awk '/^model\s+:/ {print $3; exit}' /proc/cpuinfo) family=$(awk '/^cpu family\s+:/ {print $4; exit}' /proc/cpuinfo) if [[ "$family" -eq 6 && "$model" -eq 173 ]]; then say "Intel Granite Rapids CPU detected. Disabling C6 and C6P C-states" for state in /sys/devices/system/cpu/cpu[0-9]*/cpuidle/state*/; do if [[ -f "$state/name" && $(cat "$state/name") == C6* ]]; then echo 1 | sudo tee "$state/disable" &> /dev/null fi done fi # The governor is a linux component that can adjust CPU frequency. "performance" tells it to always run CPUs at # their maximum safe frequency. It seems to be the default for Amazon Linux, but it doesn't hurt to make this explicit. # See also https://wiki.archlinux.org/title/CPU_frequency_scaling echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor &> /dev/null } unapply_performance_tweaks() { if [[ -d /sys/devices/system/cpu/intel_pstate ]]; then # reenable turbo boost echo 0 |sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo &> /dev/null # restore p-state limits echo $MIN_PERF_PCT |sudo tee /sys/devices/system/cpu/intel_pstate/min_perf_pct &> /dev/null echo $MAX_PERF_PCT |sudo tee /sys/devices/system/cpu/intel_pstate/max_perf_pct &> /dev/null fi # reenable Granite Rapids C-states model=$(awk '/^model\s+:/ {print $3; exit}' /proc/cpuinfo) family=$(awk '/^cpu family\s+:/ {print $4; exit}' /proc/cpuinfo) if [[ "$family" -eq 6 && "$model" -eq 173 ]]; then for state in /sys/devices/system/cpu/cpu[0-9]*/cpuidle/state*/; do if [[ -f "$state/name" && $(cat "$state/name") == C6* ]]; then echo 0 | sudo tee "$state/disable" &> /dev/null fi done fi # We do not reset the governor, as keeping track of each CPUs configured governor is not trivial here. On our CI # instances, the performance governor is current the default anyway (2023/11/14) } # `$0 test` - run integration tests # Please see `$0 help` for more information. # cmd_test() { do_ab_test=0 do_build=1 do_archive=1 do_kvm_check=1 do_build_dir_check=1 do_artifacts_check=1 # Parse any command line args. while [ $# -gt 0 ]; do case "$1" in "-h"|"--help") { cmd_help; exit 1; } ;; "-c"|"--cpuset-cpus") shift local cpuset_cpus="$1" ;; "-m"|"--cpuset-mems") shift local cpuset_mems="$1" ;; "--artifacts") shift local artifacts="$1" ;; "--performance") local performance_tweaks=1; ;; "--ab") do_ab_test=1 ;; "--no-build") do_build=0 ;; "--no-archive") do_archive=0 ;; "--no-kvm-check") do_kvm_check=0 ;; "--no-build-dir-check") do_build_dir_check=0 ;; "--no-artifacts-check") do_artifacts_check=0 ;; "--") { shift; break; } ;; *) die "Unknown argument: $1. Please use --help for help." ;; esac shift done # Check prerequisites. [ $do_kvm_check != 0 ] && with_kvm_module_lock setup_kvm ensure_devctr [ $do_build_dir_check != 0 ] && ensure_build_dir if [ $do_artifacts_check != 0 ]; then if [ -z $artifacts ]; then cmd_ensure_current_artifacts else cmd_ensure_current_artifacts $artifacts fi fi if [ $do_build != 0 ]; then cmd_build --release if [ -n "$BUILDKITE_PULL_REQUEST_BASE_BRANCH" ]; then cmd_build --release --rev "$BUILDKITE_PULL_REQUEST_BASE_BRANCH" ok_or_die "Failed to build Firecracker!" fi fi # If we got to here, we've got all we need to continue. say "Kernel version: $(uname -r)" say "$(sed '/^processor.*: 0$/,/^processor.*: 1$/!d; /^processor.*: 1$/d' /proc/cpuinfo)" say "RPM firmware versions: $(rpm -q microcode_ctl amd-ucode-firmware linux-firmware)" env |grep -P "^(AWS_EMF_|BUILDKITE|CODECOV_)" > env.list if [[ $performance_tweaks -eq 1 ]]; then if [[ "$(uname --machine)" == "x86_64" ]]; then say "Detected CI and performance tests, tuning CPU frequency scaling and idle states for reduced variability" apply_performance_tweaks fi # It seems that even if the tests using huge pages run sequentially on ag=1 agents, right-sizing the huge pages # pool to the total number of huge pages used across all tests results in spurious failures with pool depletion # anyway (something else on the host seems to be stealing our huge pages, and we cannot "ear mark" them for # Firecracker processes). Thus, just allocate 48GB of them and call it a day. say "Setting up huge pages pool" num_hugetlbfs_pages=24552 huge_pages_old=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages) huge_pages_new=$(echo $num_hugetlbfs_pages |sudo tee /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages) fi if [[ "$huge_pages_new" -ne "$num_hugetlbfs_pages" ]]; then die "Failed to allocate $num_hugetlbfs_pages hugetlbfs pages, only got $huge_pages_new" fi say "Starting test run ..." test_script="./tools/test.sh" if [ $do_ab_test -eq 1 ]; then test_script="./tools/ab_test.py" fi # Testing (running Firecracker via the jailer) needs root access, # in order to set-up the Firecracker jail (manipulating cgroups, net # namespaces, etc). # We need to run a privileged container to get that kind of access. run_devctr \ --privileged \ --security-opt seccomp=unconfined \ --ulimit core=0 \ --ulimit nofile=4096:4096 \ --ulimit memlock=-1:-1 \ --workdir "$CTR_FC_ROOT_DIR" \ --cpuset-cpus="$cpuset_cpus" \ --cpuset-mems="$cpuset_mems" \ --env-file env.list \ -- \ $test_script "$@" ret=$? say "Finished test run ..." # Running as root would have created some root-owned files under the build # dir. Let's fix that. cmd_fix_perms # undo performance tweaks (in case the instance gets recycled for a non-perf test) if [[ $performance_tweaks -eq 1 ]]; then if [[ "$(uname --machine)" == "x86_64" ]]; then unapply_performance_tweaks fi echo $huge_pages_old |sudo tee /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages >/dev/null fi # do not leave behind env.list file rm env.list # archive everything in the `test_result` to speed up upload/download # to s3 if we are in CI if [ $do_archive != 0 ] && [ -n "$BUILDKITE" ] && [ "$BUILDKITE" = "true" ]; then tar -czf data.tar.gz -C test_results . rm -r test_results/* mv data.tar.gz test_results fi return $ret } # `$0 shell` - drop to a shell prompt inside the dev container # Please see `$0 help` for more information. # cmd_shell() { # By default, we run the container as the current user. privileged=false # Parse any command line args. while [ $# -gt 0 ]; do case "$1" in "-h"|"--help") { cmd_help; exit 1; } ;; "-p"|"--privileged") { privileged=true; } ;; "--") { shift; break; } ;; *) die "Unknown argument: $1. Please use --help for help." ;; esac shift done # Make sure we have what we need to continue. ensure_devctr ensure_build_dir if [[ $privileged = true ]]; then # If requested, spin up a privileged container. # say "Dropping to a privileged shell prompt ..." say "Note: $FC_ROOT_DIR is bind-mounted under $CTR_FC_ROOT_DIR" say_warn "You are running as root; any files that get created under" \ "$CTR_FC_ROOT_DIR will be owned by root." run_devctr \ --privileged \ --ulimit nofile=4096:4096 \ --ulimit memlock=-1:-1 \ --security-opt seccomp=unconfined \ --workdir "$CTR_FC_ROOT_DIR" \ -- \ bash ret=$? # Running as root may have created some root-owned files under the build # dir. Let's fix that. # cmd_fix_perms else say "Dropping to shell prompt as user $(whoami) ..." say "Note: $FC_ROOT_DIR is bind-mounted under $CTR_FC_ROOT_DIR" say_warn "You won't be able to run Firecracker via the jailer," \ "but you can still build it." say "You can use \`$0 shell --privileged\` to get a root shell." [ -w /dev/kvm ] || \ say_warn "WARNING: user $(whoami) doesn't have permission to" \ "access /dev/kvm. You won't be able to run Firecracker." run_devctr \ --user "$(id -u):$(id -g)" \ --ulimit nofile=4096:4096 \ --ulimit memlock=-1:-1 \ --device=/dev/kvm:/dev/kvm \ --workdir "$CTR_FC_ROOT_DIR" \ --env PS1="$(whoami)@\h:\w\$ " \ -- \ bash --norc ret=$? fi return $ret } cmd_sh() { ensure_build_dir run_devctr \ --privileged \ --ulimit nofile=4096:4096 \ --ulimit memlock=-1:-1 \ --workdir "$CTR_FC_ROOT_DIR" \ -- \ bash --norc -c "$*" } cmd_sandbox() { cmd_build --release cmd_ensure_current_artifacts cmd_sh "tmux new env PYTEST_ADDOPTS=--pdbcls=IPython.terminal.debugger:TerminalPdb PYTHONPATH=tests IPYTHONDIR=\$PWD/.ipython ipython -i ./tools/sandbox.py $@" cmd_fix_perms ".ipython" } cmd_sandbox_native() { cmd_build --release source /etc/os-release case $ID$VERSION_ID in ubuntu22.04) sudo apt install python3-pip python3.11-dev gcc tmux ;; al2023) sudo yum -y install python3.11-pip python3.11-devel gcc tmux ;; esac python3.11 -m venv sandbox source sandbox/bin/activate pip3.11 install ipython requests requests_unixsocket2 psutil tenacity filelock pip3.11 install jsonschema aws_embedded_metrics pip3.11 install packaging pytest cmd_ensure_current_artifacts tmux neww sudo --preserve-env=HOME,PATH,TMUX env PYTHONPATH=tests IPYTHONDIR=\$PWD/.ipython ipython -i ./tools/sandbox.py $@ } cmd_test_debug() { cmd_ensure_current_artifacts cmd_sh "tmux new ./tools/test.sh --pdb $@" } # Auto-format all source code, to match the Firecracker requirements. For the # moment, this is just a wrapper over `cargo fmt --all` # Example: `devtool fmt` # cmd_fmt() { cmd_sh "cargo fmt --all" cmd_sh "cargo sort" cmd_sh "cd tests; black --config pyproject.toml . ../tools ../.buildkite" cmd_sh "cd tests; isort . ../tools ../.buildkite" cmd_sh "mdformat $(git ls-files '*.md' | tr '\n' ' ')" } cmd_mkdocs() { cmd_sh "cargo doc --workspace --no-deps --document-private-items" } cmd_checkstyle() { if [[ -z "$BUILDKITE" ]]; then cmd_sh "git-secrets --register-aws && git-secrets --scan" fi cmd_test --no-build --no-kvm-check --no-build-dir-check --no-artifacts-check -- -n 4 --dist worksteal integration_tests/style || exit 1 cmd_test --no-build --no-kvm-check --no-build-dir-check --no-artifacts-check -- -n 4 --doctest-modules framework || exit 1 } cmd_checkbuild() { TARGET_ARCH=$(uname -m) SUPPORTED_ARCHS=(x86_64 aarch64) while [ $# -gt 0 ]; do case "$1" in "-h"|"--help") { cmd_help; exit 1; } ;; "-m"|"--arch") { TARGET_ARCH=$2; shift; } ;; "--all") { for arch in ${SUPPORTED_ARCHS[*]}; do say "Running checkbuild -m $arch" cmd_checkbuild -m $arch || return $? done } say "Build check passed for ${SUPPORTED_ARCHS[*]}" return 0 ;; *) die "Unknown argument: $1. Please use --help for help." ;; esac shift done if ! grep -q $TARGET_ARCH <<< "${SUPPORTED_ARCHS[*]}"; then die "Unknown architecture: $TARGET_ARCH. Supported architectures: ${SUPPORTED_ARCHS[*]}" fi # Use GNU target to check build as musl has issues with cross-compilation cmd_sh "cargo clippy --target ${TARGET_ARCH}-unknown-linux-gnu --all --all-targets --all-features -- -D warnings" \ || die "Error running build checks for $TARGET_ARCH" say "Build check passed for $TARGET_ARCH" } # Check if able to run firecracker. # ../docs/getting-started.md#prerequisites ensure_kvm_rw () { [[ -c /dev/kvm && -w /dev/kvm && -r /dev/kvm ]] || \ say_err "FAILED: user $(whoami) doesn't have permission to" \ "access /dev/kvm." } check_kernver () { KERN_MAJOR=5 KERN_MINOR=10 (uname -r | awk -v MAJOR=$KERN_MAJOR -v MINOR=$KERN_MINOR '{ split($0,kver,"."); if( (kver[1] + (kver[2] / 100) ) < MAJOR + (MINOR/100) ) { exit 1; } }') || say_err "FAILED: Kernel version must be >= $KERN_MAJOR.$KERN_MINOR" } # Check Production Host Setup # ../docs/prod-host-setup.md check_KPTI () { (grep -q "^Mitigation: PTI$" \ /sys/devices/system/cpu/vulnerabilities/meltdown) || \ say_warn "WARNING: KPTI NOT SUPPORTED." } check_KSM () { (grep -q "^0$" /sys/kernel/mm/ksm/run) || \ say_warn "WARNING: KSM ENABLED." } check_vulns () { for f in /sys/devices/system/cpu/vulnerabilities/* ; do if $(grep -q "Vulnerable" ${f}) ; then say_warn "WARNING: `basename $f`: VULNERABLE."; fi done } check_swap () { (grep -q "swap.img" /proc/swaps ) && \ say_warn "WARNING: SWAP ENABLED." } check_EPT() { if [ "$(uname --machine)" = "x86_64" ]; then (grep -q "Y" /sys/module/kvm_intel/parameters/ept ; [ $? -ne 1 ]) || \ say_warn "WARNING: EPT DISABLED. Performance will be affected." fi } check_vm() { if [ $(dmesg | grep -c -i "hypervisor detected") -gt 0 ]; then say_warn "WARNING: you are running in a virtual machine." \ "Firecracker is not well tested under nested virtualization." fi } cmd_checkenv() { # Parse any command line args. while [ $# -gt 0 ]; do case "$1" in "-h"|"--help") { cmd_help; exit 1; } ;; *) die "Unknown argument: $1. Please use --help for help." ;; esac shift done PROD_DOC="../docs/prod-host-setup.md" QUICKSTART="../docs/getting-started.md#prerequisites" say "Checking prerequisites for running Firecracker." say "Please check $QUICKSTART in case of any error." ensure_kvm_rw check_kernver check_vm say "Checking Host Security Configuration." say "Please check $PROD_DOC in case of any error." check_KSM check_swap check_EPT check_vulns } cmd_install() { # By default we install release/musl binaries. profile="release" target="$TARGET_PREFIX""musl" install_path="/usr/local/bin" binaries=("firecracker" "jailer" "seccompiler-bin" "rebase-snap" "cpu-template-helper") # Parse any command line args. while [ $# -gt 0 ]; do case "$1" in "-h"|"--help") { cmd_help; exit 1; } ;; "-p"|"--path") shift; install_path=$1; ;; "--debug") { profile="debug"; } ;; "--release") { profile="release"; } ;; *) die "Unknown argument: $1. Please use --help for help." ;; esac shift done # Check that the binaries exist first for binary in "${binaries[@]}"; do bin_path=$( build_bin_path "$target" "$profile" "$binary" ) if [ ! -f "$bin_path" ]; then die "Missing release binary. Needed file: $bin_path\n"\ "To build the binaries, run:\n\t$0 build --$profile" fi done # Install the binaries for binary in "${binaries[@]}"; do say "Installing $binary in $install_path" install -m 755 -D -t "$install_path" "$( build_bin_path "$target" "$profile" "$binary" )" done } cmd_build_ci_artifacts() { # Check prerequisites ensure_devctr # We need to run nested Docker here, so run this container as privileged. run_devctr \ --privileged \ --workdir "$CTR_FC_ROOT_DIR" \ -- \ ./resources/rebuild.sh "$@" cmd_fix_perms } main() { if [ $# = 0 ]; then die "No command provided. Please use \`$0 help\` for help." fi # Parse main command line args. # while [ $# -gt 0 ]; do case "$1" in -h|--help) { cmd_help; exit 1; } ;; -y|--unattended) # purposefully ignored ;; -*) die "Unknown arg: $1. Please use \`$0 help\` for help." ;; *) break ;; esac shift done # $1 is now a command name. Check if it is a valid command and, if so, # run it. # declare -f "cmd_$1" > /dev/null ok_or_die "Unknown command: $1. Please use \`$0 help\` for help." cmd=cmd_$1 shift # $@ is now a list of command-specific args # $cmd "$@" } main "$@" ================================================ FILE: tools/functions ================================================ # -*- shell-script[bash] -*- # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 ################### # Logging helpers # ################### # Naming things is hard ARG0=${BASH_SOURCE[-1]} MY_NAME="Firecracker $(basename "$ARG0")" # Send a decorated message to stderr, followed by a new line # say() { [ -t 1 ] && [ -n "$TERM" ] \ && echo "$(tput setaf 2)[$MY_NAME $(date -Iseconds)]$(tput sgr0) $*" 1>&2 \ || echo "[$MY_NAME] $*" 1>&2 } # Send a text message to stderr # say_err() { [ -t 2 ] && [ -n "$TERM" ] \ && echo -e "$(tput setaf 1)[$MY_NAME] $*$(tput sgr0)" 1>&2 \ || echo -e "[$MY_NAME] $*" 1>&2 } # Send a warning-highlighted text to stderr say_warn() { [ -t 1 ] && [ -n "$TERM" ] \ && echo "$(tput setaf 3)[$MY_NAME] $*$(tput sgr0)" 1>&2 \ || echo "[$MY_NAME] $*" 1>&2 } # Exit with an error message and (optional) code # Usage: die [-c ] # die() { code=1 [[ "$1" = "-c" ]] && { code="$2" shift 2 } say_err "$@" exit $code } # Exit with an error message if the last exit code is not 0 # ok_or_die() { code=$? [[ $code -eq 0 ]] || die -c $code "$@" } # ANSI output helper # https://en.wikipedia.org/wiki/ANSI_escape_code function SGR { local codes=$* printf "\e[%sm" "${codes// /;}" } # Prompt the user for confirmation before proceeding. # Args: # $1 prompt text. # Default: Continue? (y/n) # $2 confirmation input. # Default: y # Return: # exit code 0 for successful confirmation # exit code != 0 if the user declined # get_user_confirmation() { # Fail if STDIN is not a terminal (there's no user to confirm anything) [[ -t 0 ]] || return 1 # Otherwise, ask the user # msg=$([ -n "${1:-}" ] && echo -n "$1" || echo -n "Continue? (y/n) ") yes=$([ -n "${2:-}" ] && echo -n "$2" || echo -n "y") read -p "$msg" c && [ "$c" = "$yes" ] && return 0 return 1 } ####################################### # Release automation common functions # ####################################### # Get version from the swagger file function get_swagger_version { local file=${1:-"$FC_ROOT_DIR/src/firecracker/swagger/firecracker.yaml"} grep -oP 'version: \K.*' "$file" } function check_local_branch_is_release_branch { local LOCAL_BRANCH=$(git rev-parse --abbrev-ref HEAD) local RELEASE_BRANCH=firecracker-v$(echo "$version" |cut -d. -f-2) if [ "$LOCAL_BRANCH" != "$RELEASE_BRANCH" ]; then cat < inside the container to install packages and configure. # The setup script receives $rootfs as the overlay directory (pre-populated # with overlay files). # Usage: build_rootfs build_rootfs() { local IMAGE_NAME=$1 local OUTPUT_DIR=$2 local OVERLAY_DIR=$3 local SETUP_SCRIPT=$4 local ROOTFS_NAME="${IMAGE_NAME//:/-}" local FROM_CTR="public.ecr.aws/docker/library/$IMAGE_NAME" local rootfs="rootfs_${ROOTFS_NAME}_$$" say "Building rootfs for $IMAGE_NAME" mkdir -pv $rootfs cp -rvf "$OVERLAY_DIR"/* "$rootfs" # Run setup script inside the container image, then copy the # resulting filesystem back to the bind-mounted rootfs directory. docker run --env rootfs="$rootfs" --privileged --rm -i \ -v "$PWD:/work" -w "/work" "$FROM_CTR" sh -c ' # Make overlay files available inside the container cp -ruv $rootfs/* / # Run the setup script if [ -e /bin/bash ]; then bash /work/'"$SETUP_SCRIPT"' else sh /work/'"$SETUP_SCRIPT"' fi # Copy filesystem back to bind-mounted rootfs dirs="bin etc home lib lib64 root sbin usr" for d in $dirs; do [ -d "/$d" ] && tar c "/$d" | tar x -C $rootfs; done mkdir -pv $rootfs/dev $rootfs/proc $rootfs/sys $rootfs/run $rootfs/tmp ' # TBD what about /etc/hosts? echo > $rootfs/etc/resolv.conf mkdir -pv "$OUTPUT_DIR" # If manifest file is present in rootfs, move it to output dir [ -f "$rootfs/root/manifest" ] && mv "$rootfs/root/manifest" "$OUTPUT_DIR/$ROOTFS_NAME.manifest" mksquashfs "$rootfs" "$OUTPUT_DIR/$ROOTFS_NAME.squashfs" \ -all-root -noappend -comp zstd rm -rf "$rootfs" } ================================================ FILE: tools/gh_release.py ================================================ #!/usr/bin/env python3 # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Draft a release in GitHub by calling into its API. Assumes all the releases are in the current path. """ import argparse import re import subprocess import tarfile from pathlib import Path from github import Github def build_tarball(release_dir, release_tgz, arch): """Build a release tarball with local assets""" # Do not include signatures in GitHub release since we aren't # making those keys public. # Exclude CPU templates in GitHub release as they are already # available on GitHub without any action (like building a binary). exclude_files = { "RELEASE_NOTES", "SHA256SUMS.sig", *[f.stem for f in Path("tests/data/custom_cpu_templates").glob("*.json")], } with tarfile.open(release_tgz, "w:gz") as tar: files = [x for x in release_dir.rglob("*") if x.is_file()] for asset in files: if asset.name in exclude_files: print(f"Skipping file {asset}") continue if asset.name.endswith(arch): print(f"Setting +x bit for {asset}") asset.chmod(0o755) print(f"Adding {asset} to {release_tgz}") tar.add(asset) def github_release(tag_version, repo, github_token): """Create a draft release in GitHub""" prerelease = False assets = [] for arch in ["x86_64", "aarch64"]: release_dir = Path(f"release-{tag_version}-{arch}") # Build tarball release_tgz = Path(f"firecracker-{tag_version}-{arch}.tgz") print(f"Creating release archive {release_tgz} ...") build_tarball(release_dir, release_tgz, arch) print("Done. Archive successfully created. sha256sum result:") sha256sums = release_tgz.with_suffix(release_tgz.suffix + ".sha256.txt") subprocess.run( f"sha256sum {release_tgz} > {sha256sums}", check=True, shell=True, ) print(sha256sums.read_text("utf-8")) assets.append(release_tgz) assets.append(sha256sums) assets.append(Path("test_results.tar.gz")) message_file = Path(f"release-{tag_version}-x86_64") / "RELEASE_NOTES" message = message_file.read_text() # Create release print("Creating GitHub release draft") gh_client = Github(github_token) gh_repo = gh_client.get_repo(repo) gh_release = gh_repo.create_git_release( tag_version, f"Firecracker {tag_version}", message, draft=True, prerelease=prerelease, ) # Upload assets for asset in assets: content_type = "application/octet-stream" if asset.suffix == ".txt": content_type = "text/plain" elif asset.suffix in {".tgz", ".gz"}: content_type = "application/gzip" print(f"Uploading asset {asset} with content-type={content_type}") gh_release.upload_asset(str(asset), label=asset.name, content_type=content_type) release_url = gh_release.html_url print(f"Draft release created successful. Check it out at {release_url}") def version(version_str: str): """Validate version parameter""" if not re.fullmatch(r"v\d+\.\d+\.\d+", version_str): raise ValueError("version does not match vX.Y.Z") return version_str if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--version", required=True, metavar="vX.Y.Z", help="Firecracker version.", type=version, ) parser.add_argument( "--repository", required=False, default="firecracker-microvm/firecracker" ) parser.add_argument("--github-token", required=True) args = parser.parse_args() github_release( tag_version=args.version, repo=args.repository, github_token=args.github_token, ) ================================================ FILE: tools/release-notes.py ================================================ #!/usr/bin/env python3 # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # pylint:disable=invalid-name """Print changelog of specified version with markdown stripped""" import sys from pathlib import Path if __name__ == "__main__": cur_version = sys.argv[1] with Path(__file__).parent.joinpath("../CHANGELOG.md").open(encoding="UTF-8") as f: changelog_lines = f.readlines() # Skip first 7 lines because they contain the "keep a changelog" metadata changelog_lines = changelog_lines[7:] iterator = iter(changelog_lines) for line in iterator: if line.startswith(f"## [{cur_version}]"): break else: print(f"Could not find changelog entry for version {cur_version}!") sys.exit(1) for line in iterator: if line.startswith("## ["): break if line.startswith("#"): line = line.lstrip("#").lstrip() if line.startswith("-"): line = line.replace("-", "*", 1) print(line, end="") ================================================ FILE: tools/release-prepare.sh ================================================ #!/usr/bin/env bash # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 set -eu -o pipefail shopt -s lastpipe FC_TOOLS_DIR=$(dirname $(realpath $0)) source "$FC_TOOLS_DIR/functions" FC_ROOT_DIR=$FC_TOOLS_DIR/.. if [ $# -ne 1 ]; then cat < Example: $0 0.42.0 Prepare a new Firecracker release: 1. Update the version number 2. Generate CREDITS.md and CHANGELOG.md 3. Commit the result 4. Create a link to PR the changes EOF exit 1 fi version=$1 validate_version "$version" check_local_branch_is_release_branch # Create GitHub PR link ORIGIN_URL=$(git config --get remote.origin.url) GH_USER=$(git config --get github.user) REPO=$(basename "$ORIGIN_URL" .git) LOCAL_BRANCH=$(git rev-parse --abbrev-ref HEAD) RELEASE_BRANCH=firecracker-v$(echo "$version" |cut -d. -f-2) UPSTREAM=upstream # In which branch should the change go, in the main repo? TARGET_BRANCH=main PATCH=$(echo "$version" |cut -d. -f3) # If this is a patch release, the target branch should be the release branch if [ "$PATCH" -gt 0 ]; then TARGET_BRANCH=$RELEASE_BRANCH fi PR_URL="https://github.com/firecracker-microvm/$REPO/compare/$TARGET_BRANCH...$GH_USER:$REPO:$LOCAL_BRANCH?expand=1" # Update version $FC_TOOLS_DIR/bump-version.sh "$version" # Update credits. say "Updating credits..." $FC_TOOLS_DIR/update-credits.sh # Update changelog. say "Updating changelog..." sed -i "s/\[Unreleased\]/\[$version\]/g" "$FC_ROOT_DIR/CHANGELOG.md" # Add all changed files git add -u git commit -s -m "chore: release v$version" -m "Update version number / CHANGELOG / CREDITS" # pretty print code function pp-code { # grey background echo "$(SGR 0 48 5 242)$*$(SGR 0)" } # pretty print a list item function pp-li { bullet=$1; shift # reset bg-color-5 bold echo "$(SGR 0 48 5 101)$bullet$(SGR 0 1) $*$(SGR 0)" } cat < Example: $0 1.1.2 It will create a local git tag and push it to the upstream EOF exit 1 fi version=$1 validate_version "$version" LOCAL_BRANCH=$(git rev-parse --abbrev-ref HEAD) RELEASE_BRANCH=firecracker-v$(echo "$version" |cut -d. -f-2) UPSTREAM=upstream UPSTREAM_URL=$(git remote get-url $UPSTREAM) check_local_branch_is_release_branch # Start by creating a local tag and associate to it a description. say "Creating local tag..." create_local_tag "$version" "$LOCAL_BRANCH" # pretty print a warning function warn { # reset reverse yellow echo "$(SGR 0 7 33)$*$(SGR 0)" } warn "!WARNING! The next step will modify upstream: $UPSTREAM_URL by running:" echo " git push $UPSTREAM v$version" echo " git push $UPSTREAM $RELEASE_BRANCH" get_user_confirmation || die "Cancelling tag push" git push --atomic $UPSTREAM "v$version" git push --atomic $UPSTREAM "$RELEASE_BRANCH" ================================================ FILE: tools/release.sh ================================================ #!/usr/bin/env bash # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # fail if we encounter an error, uninitialized variable or a pipe breaks set -eux -o pipefail FC_TOOLS_DIR=$(dirname $(realpath $0)) source "$FC_TOOLS_DIR/functions" FC_ROOT_DIR=$FC_TOOLS_DIR/.. function get-profile-dir { case $1 in dev) echo debug ;; *) echo "$1" ;; esac } function check_swagger_artifact { # Validate swagger version against target version. local swagger_path version swagger_ver swagger_path=$1 version=$2 swagger_ver=$(get_swagger_version "$swagger_path") if [[ ! $version =~ v$swagger_ver.* ]]; then die "Artifact $swagger_path's version: $swagger_ver does not match release version $version." fi } function check_bin_artifact { # Validate binary version against target version. local bin_path version bin_version bin_path=$1 version=$2 bin_version=$($bin_path --version | head -1 | grep -oP ' \Kv.*') if [[ "$bin_version" != "$version" ]]; then die "Artifact $bin_path's version: $bin_version does not match release version $version." fi } function strip-and-split-debuginfo { local bin=$1 if [ $bin -ot $bin.debug ]; then return fi echo "STRIP $bin" objcopy --only-keep-debug $bin $bin.debug chmod a-x $bin.debug objcopy --preserve-dates --strip-debug --add-gnu-debuglink=$bin.debug $bin } function get-firecracker-version { (cd src/firecracker; echo -n v; cargo pkgid | cut -d# -f2 | cut -d: -f2) } #### MAIN #### # defaults LIBC=musl PROFILE=dev MAKE_RELEASE= #### Option parsing while [[ $# -gt 0 ]]; do case $1 in --help) cat <SHA256SUMS ) ================================================ FILE: tools/sandbox.py ================================================ #!/usr/bin/env python3 # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # pylint:disable=invalid-name """ Run Firecracker in an IPython REPL """ import argparse import json import re from pathlib import Path import host_tools.cargo_build as build_tools from framework.artifacts import disks, kernels from framework.defs import DEFAULT_BINARY_DIR from framework.microvm import MicroVMFactory kernels = list(kernels("vmlinux-*")) rootfs = list(disks("ubuntu*ext4")) def parse_byte_size(param): """ >>> parse_byte_size("1MB") 1048576 """ unit = { "MB": 2**20, "GB": 2**30, } match = re.match(r"(?P\d+)(?P[MG]B)", param.upper()) return int(match.group("val")) * unit[match.group("unit")] parser = argparse.ArgumentParser() parser.add_argument( "--kernel", type=Path, choices=kernels, default=kernels[-1], help=f"Kernel to use. [{kernels[-1]}]", ) parser.add_argument( "--rootfs", type=Path, choices=rootfs, default=rootfs[-1], help=f"Rootfs to use. [{rootfs[-1]}]", ) parser.add_argument("--vcpus", type=int, default=2) parser.add_argument( "--guest-mem-size", type=parse_byte_size, default=128 * 2**20, # 128MB ) parser.add_argument("--rootfs-size", type=parse_byte_size, default=1 * 2**30) # 1GB parser.add_argument("--binary-dir", help="Path to the firecracker binaries") parser.add_argument("--cpu-template-path", help="CPU template to use", type=Path) parser.add_argument( "--debug", action="store_true", default=False, help="Use debug kernel" ) parser.add_argument( "--gdb", action="store_true", default=False, help="Connect to Firecracker guest GDB" ) args = parser.parse_args() print(args) binary_dir = None if args.binary_dir: binary_dir = Path(args.binary_dir).resolve() elif args.gdb: # Build Firecracker with GDB feature if needed print("Building Firecracker with GDB feature...") binary_dir = build_tools.build_gdb() print("Build complete!") else: binary_dir = DEFAULT_BINARY_DIR cpu_template = None if args.cpu_template_path is not None: cpu_template = json.loads(args.cpu_template_path.read_text()) vmfcty = MicroVMFactory(binary_dir) if args.debug or args.gdb: kernel = args.kernel.parent / "debug" / args.kernel.name else: kernel = args.kernel print(f"uvm with kernel {kernel} ...") uvm = vmfcty.build(kernel, args.rootfs) uvm.help.enable_console() uvm.help.resize_disk(uvm.rootfs_file, args.rootfs_size) uvm.spawn(log_show_level=True, validate_api=False) uvm.help.print_log() uvm.add_net_iface() uvm.basic_config(vcpu_count=args.vcpus, mem_size_mib=args.guest_mem_size // 2**20) if cpu_template is not None: uvm.api.cpu_config.put(**cpu_template) print(cpu_template) if args.gdb: uvm.enable_gdb() uvm.help.tmux_gdb() uvm.start() uvm.get_all_metrics() ================================================ FILE: tools/setup-ci-artifacts.sh ================================================ #!/bin/bash # Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # fail if we encounter an error, uninitialized variable or a pipe breaks set -eu -o pipefail TOOLS_DIR=$(dirname $0) source "$TOOLS_DIR/functions" say "Setup CI artifacts" cd $1 say "Generate SSH key to connect from host" if [ ! -s id_rsa ]; then ssh-keygen -f id_rsa -N "" fi for SQUASHFS in *.squashfs; do say "Include SSH key in $SQUASHFS" RSA=$(basename $SQUASHFS .squashfs).id_rsa EXT4=$(basename $SQUASHFS .squashfs).ext4 [ -s $SQUASHFS.orig ] && continue unsquashfs $SQUASHFS mkdir -pv squashfs-root/root/.ssh # copy the SSH key into the rootfs if [ ! -s $RSA ]; then # append SSH key to the squashfs image cp -v id_rsa.pub squashfs-root/root/.ssh/authorized_keys cp -v id_rsa $RSA fi # re-squash mv -v $SQUASHFS $SQUASHFS.orig mksquashfs squashfs-root $SQUASHFS -all-root -noappend -comp zstd # Create rw ext4 image from ro squashfs [ -f $EXT4 ] && continue say "Converting $SQUASHFS to $EXT4" truncate -s 500M $EXT4 mkfs.ext4 -F $EXT4 -d squashfs-root rm -rf squashfs-root done say "Uncompress debuginfo files" find . -name "*.debug.gz" -print0 | xargs -P4 -0 -t -n1 gunzip ================================================ FILE: tools/test-popular-containers/build_rootfs.sh ================================================ #!/bin/bash # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # fail if we encounter an error, uninitialized variable or a pipe breaks set -eu -o pipefail set -x cd $(dirname $0) TOPDIR=$(git rev-parse --show-cdup) source "$TOPDIR/tools/functions" # Get the executing uid and gid for `chown` and `chgrp` USER_UID=$(stat -c '%u' "$TOPDIR") USER_GID=$(stat -c '%g' "$TOPDIR") OVERLAY_DIR="$TOPDIR/resources/overlay" SETUP_SCRIPT="setup-minimal.sh" OUTPUT_DIR=$PWD IMAGES=(amazonlinux:2023 alpine:latest ubuntu:22.04 ubuntu:24.04 ubuntu:25.04 ubuntu:latest) # Generate SSH key for access from host if [ ! -s id_rsa ]; then ssh-keygen -f id_rsa -N "" fi # install rootfs dependencies apt update apt install -y busybox-static cpio curl docker.io tree prepare_docker for img in "${IMAGES[@]}"; do build_rootfs "$img" "$OUTPUT_DIR" "$OVERLAY_DIR" "$SETUP_SCRIPT" rootfs_name="${img//:/-}" cp id_rsa "$rootfs_name.id_rsa" chmod a+r "$rootfs_name.id_rsa" chown "$USER_UID":"$USER_GID" "$rootfs_name.squashfs" "$rootfs_name.id_rsa" done ================================================ FILE: tools/test-popular-containers/fcnet.start ================================================ #!/usr/bin/env sh # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 mount -t sysfs sysfs /sys devs=$(ls /sys/class/net | grep -v lo) for dev in $devs; do ip=$(printf "%d.%d.%d.%d" $(echo -n 0x; cut -d: -f3- -O ' 0x' /sys/class/net/$dev/address)) ip addr add "$ip/30" dev $dev ip link set $dev up done ================================================ FILE: tools/test-popular-containers/setup-minimal.sh ================================================ #!/bin/sh # Copyright 2025 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # Minimal rootfs setup: just enough to boot with SSH and networking. # Runs inside a Docker container via build_rootfs(). set -eux . /etc/os-release # On Ubuntu, installing openssh-server automatically sets up required SSH keys for the server. # AL2023 and Alpine do not do this, so we should setup keys manually via `ssh-keygen`. # Alpine additionally requires /var/empty to be present for sshd to start properly. case $ID in ubuntu) export DEBIAN_FRONTEND=noninteractive apt update apt install -y openssh-server iproute2 udev apt clean ;; amzn) dnf install -y openssh-server iproute systemd-udev passwd tar ssh-keygen -A dnf clean all ;; alpine) apk add openssh openrc tar mkdir -p /var/empty ssh-keygen -A rc-update add sshd rc-update add local default echo "ttyS0::respawn:/sbin/getty -L ttyS0 115200 vt100" >>/etc/inittab apk cache clean ;; esac passwd -d root if [ ! -f /work/id_rsa.pub ]; then echo "Host SSH public key not found" exit 1 fi # Install host SSH public key install -d -m 0700 /root/.ssh cp /work/id_rsa.pub /root/.ssh/authorized_keys chmod 0600 /root/.ssh/authorized_keys if [ -d /usr/lib/systemd ]; then # Enable fcnet for systemd-based images mkdir -pv /etc/systemd/system/sysinit.target.wants ln -svf /etc/systemd/system/fcnet.service /etc/systemd/system/sysinit.target.wants/fcnet.service # The serial getty service hooks up the login prompt to the kernel console # at ttyS0 (where Firecracker connects its serial console). We'll set it up # for autologin to avoid the login prompt. mkdir "/etc/systemd/system/serial-getty@ttyS0.service.d/" cat <<'EOF' >"/etc/systemd/system/serial-getty@ttyS0.service.d/override.conf" [Service] # systemd requires this empty ExecStart line to override ExecStart= ExecStart=-/sbin/agetty --autologin root -o '-p -- \\u' --keep-baud 115200,38400,9600 %I dumb EOF else # Enable fcnet for OpenRC-based images cp -v fcnet.start /etc/local.d fi # Copy /var back to bind-mounted rootfs. # Required for things like systemd and apt to work # ($rootfs variable set via docker --env $rootfs ) cp -r /var $rootfs ================================================ FILE: tools/test-popular-containers/test-docker-rootfs.py ================================================ #!/usr/bin/env python3 # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # pylint:disable=invalid-name """ Test all the squashfs rootfs in the current directory """ import os import sys from pathlib import Path # Hack to be able to import testing framework functions. sys.path.append(os.path.join(os.getcwd(), "tests")) # pylint: disable=wrong-import-position from framework.artifacts import kernels from framework.defs import DEFAULT_BINARY_DIR from framework.microvm import MicroVMFactory # pylint: enable=wrong-import-position kernels = list(kernels("vmlinux-*")) # Use the latest guest kernel kernel = kernels[-1] vmfcty = MicroVMFactory(DEFAULT_BINARY_DIR) # (may take a while to compile Firecracker...) for rootfs in Path(".").glob("*.squashfs"): print(f">>>> Testing {rootfs}") uvm = vmfcty.build(kernel, rootfs) uvm.spawn() uvm.add_net_iface() uvm.basic_config() uvm.start() rc, stdout, stderr = uvm.ssh.run("cat /etc/issue") print(rc, stdout, stderr) ================================================ FILE: tools/test.sh ================================================ #!/bin/bash # Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 # fail if we encounter an error, uninitialized variable or a pipe breaks set -eu -o pipefail TOOLS_DIR=$(dirname $0) source "$TOOLS_DIR/functions" # Set our TMPDIR inside /srv, so all files created in the session end up in one # place say "Create TMPDIR in /srv" export TMPDIR=/srv/tmp mkdir -pv $TMPDIR # Some of the security tests need this (test_jail.py) # Convert the Docker created cgroup so we can create cgroup children # From https://github.com/containerd/containerd/issues/6659 say "cgroups v2: enable nesting" CGROUP=/sys/fs/cgroup if [ -f $CGROUP/cgroup.controllers -a -e $CGROUP/cgroup.type ]; then # move the processes from the root group to the /init group, # otherwise writing subtree_control fails with EBUSY. # An error during moving non-existent process (i.e., "cat") is ignored. mkdir -p $CGROUP/init xargs -rn1 < $CGROUP/cgroup.procs > $CGROUP/init/cgroup.procs || : # enable controllers sed -e 's/ / +/g' -e 's/^/+/' < $CGROUP/cgroup.controllers \ > $CGROUP/cgroup.subtree_control fi if [ -f build/current_artifacts ]; then say "Copy artifacts to /srv/test_artifacts, so hardlinks work" cp -ruvfL $(cat build/current_artifacts) /srv/test_artifacts else # The directory must exist for pytest to function mkdir -p /srv/test_artifacts say_warn "No current artifacts are set. Some tests might break" fi cd tests export PYTEST_ADDOPTS="${PYTEST_ADDOPTS:-} --pdbcls=IPython.terminal.debugger:TerminalPdb" { # disable errexit momentarily so we can capture the exit status set +e pytest "$@" ret=$? set -e } # if the tests failed and we are running in CI, print some disk usage stats # to help troubleshooting if [ $ret != 0 ] && [ "$BUILDKITE" == "true" ]; then df -ih df -h du -h / 2>/dev/null |sort -h |tail -32 fi exit $ret ================================================ FILE: tools/test_bindings.py ================================================ # Copyright 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """ Script used to check if bindgen-generated code creates structs that differ from previously created onces. The script uses `pahole` (man 1 pahole) to gather debug information from two firecracker binaries (script's arguments). It parses pahole output and gathers struct information in a dictionary of the form: ``` { "struct_name": {"size": size_in_bytes, "alignment": alignment_in_bytes}, ... } ``` It also, filters structure names using the "bindings" filter for keeping only bindgen related structs. *NOTE*: this assumes that all bindgen-related structs live under a crate or module name with "bindings" in it. At the moment, this is true. It then iterates through the structs of the firecracker binary built from the older version and checks if there are mismatches with the struct info from the second binary (newer version) ### Usage 1. Create the two binaries ``` # First create the binary with existing bindings $ git checkout main $ ./tools/devtool build $ cp ./build/cargo_target/x86_64-unknown-linux-musl/debug/firecracker firecracker_old # Second create the binary with new bindings $ git checkout new_bindings $ ./tools/devtool build $ cp ./build/cargo_target/x86_64-unknown-linux-musl/debug/firecracker firecracker_new # Run the script $ python3 ./tools/test_bindings.py firecracker_old firecracker_new ``` """ import argparse import logging import re import subprocess import sys logging.basicConfig(level=logging.DEBUG) log = logging.getLogger(__name__) def parse_pahole(pahole_output): """Gather bindings related structs from pahole output Parse pahole output and gather struct information filtering for the 'bindings' keyword. The information gathered is the struct size and its alignment. @param fname: File including pahole output @return: A dictionary where keys are struct names and values struct size and alignment """ ret = {} # regular expression matches the name of the struct, its size and alignment structs = re.findall( rb"struct (.*?)\{.*?/\* size: (\d+).*?\*/.*?\n\} " rb"__attribute__\(\(__aligned__\((\d+)\)\)\)\;", pahole_output, flags=re.DOTALL, ) for struct in structs: struct_name = str(struct[0]) size = int(struct[1]) alignment = int(struct[2]) if "bindings" in struct_name: ret[struct_name] = {"size": size, "alignment": alignment} return ret def pahole(binary: str) -> str: """Runs pahole on a binary and returns its output as a str If pahole fails this will raise a `CalledProcessError` @param binary: binary to run pahole on @return: On success, it will return the stdout of the pahole process """ result = subprocess.run( ["pahole", binary], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True ) return result.stdout def check_pahole_mismatches(old: str, new: str) -> bool: """Checks for pahole mismatches in pahole information between two binaries @param old: old Firecracker binary @param new: new Firecracker binary @return: false if no mismatches found, true otherwise """ pahole_structs_1 = parse_pahole(pahole(old)) pahole_structs_2 = parse_pahole(pahole(new)) # We go through all the structs existing in the old firecracker binary and check for mismatches # in the new one. for name, prop_1 in pahole_structs_1.items(): # Note that the reverse, i.e. a name existing in the new binary but not in the old binary, # is not a problem. That would mean we are making use of some new struct from # bindgen-generated code. That does not break ABI compatibility. if name not in pahole_structs_2: log.warning("struct '%s' does not exist in new binary", name) continue prop_2 = pahole_structs_2[name] # Size mismatches are hard errors if prop_1["size"] != prop_2["size"]: log.error("size of '%s' does not match in two binaries", name) log.error("old: %s", prop_1["size"]) log.error("new: %s", prop_2["size"]) return True # Alignment mismatches just cause warnings if prop_1["alignment"] != prop_2["alignment"]: log.warning("alignment of '%s' does not match in two binaries", name) log.warning("old: %s", prop_1["alignment"]) log.warning("new: %s", prop_2["alignment"]) else: log.info("struct '%s' matches", name) return False if __name__ == "__main__": parser = argparse.ArgumentParser( description="Check bindings ABI compatibility for Firecracker" ) parser.add_argument( "firecracker_old", type=str, metavar="old-firecracker-binary", help="Firecracker binary with old bindings", ) parser.add_argument( "firecracker_new", type=str, metavar="new-firecracker-binary", help="Firecracker binary with new bindings", ) args = parser.parse_args() if check_pahole_mismatches(args.firecracker_old, args.firecracker_new): log.error("Structure layout mismatch") sys.exit(1) else: log.info("Structure layout matches") sys.exit(0) ================================================ FILE: tools/update-credits.sh ================================================ #!/usr/bin/env bash # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 set -e cd "$(dirname "$BASH_SOURCE")/.." # see also ".mailmap" for how email addresses and names are deduplicated { cat <<-'EOH' # Firecracker Credits and Thanks (This file is autogenerated using [update-credits.sh](tools/update-credits.sh).) Firecracker started with the code from the Chrome OS Virtual Machine Monitor ([crosvm](https://github.com/google/crosvm)), a VMM written in Rust with a focus on safety and security. Thanks go to: - [Zach Reizner](https://github.com/zachreizner) - [Dylan Reid](https://github.com/dgreid) - [Daniel Verkamp](https://github.com/danielverkamp) - [Stephen Barber](https://github.com/smibarber) - [Chirantan Ekbote](https://github.com/jynnantonix) - [Jason D. Clinton](https://github.com/jclinton) - Sonny Rao Contributors to the Firecracker repository: EOH echo git log --format='- %aN <%aE>' | LC_ALL=C.UTF-8 sort -uf | grep -v "dependabot" } > CREDITS.md