Showing preview only (3,349K chars total). Download the full file or copy to clipboard to get everything.
Repository: NJU-3DV/SpatialVID
Branch: main
Commit: 23840d4ec122
Files: 538
Total size: 3.1 MB
Directory structure:
gitextract__0evbu59/
├── .gitignore
├── .gitmodules
├── Dockerfile.cuda
├── LICENSE
├── README.md
├── camera_pose_annotation/
│ ├── .gitignore
│ ├── README.md
│ ├── __init__.py
│ ├── camera_tracking/
│ │ ├── __init__.py
│ │ ├── camera_tracking.py
│ │ └── inference_batch.py
│ ├── cvd_opt/
│ │ ├── __init__.py
│ │ ├── cvd_opt.py
│ │ ├── geometry_utils.py
│ │ ├── inference_batch.py
│ │ └── preprocess/
│ │ ├── __init__.py
│ │ ├── core/
│ │ │ ├── __init__.py
│ │ │ ├── corr.py
│ │ │ ├── datasets.py
│ │ │ ├── extractor.py
│ │ │ ├── raft.py
│ │ │ ├── update.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── augmentor.py
│ │ │ ├── flow_viz.py
│ │ │ ├── frame_utils.py
│ │ │ └── utils.py
│ │ ├── inference_batch.py
│ │ └── preprocess_flow.py
│ ├── depth_estimation/
│ │ ├── Depth-Anything/
│ │ │ ├── __init__.py
│ │ │ ├── depth_anything_v2/
│ │ │ │ ├── dinov2.py
│ │ │ │ ├── dinov2_layers/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── attention.py
│ │ │ │ │ ├── block.py
│ │ │ │ │ ├── drop_path.py
│ │ │ │ │ ├── layer_scale.py
│ │ │ │ │ ├── mlp.py
│ │ │ │ │ ├── patch_embed.py
│ │ │ │ │ └── swiglu_ffn.py
│ │ │ │ ├── dpt.py
│ │ │ │ └── util/
│ │ │ │ ├── blocks.py
│ │ │ │ └── transform.py
│ │ │ ├── inference.py
│ │ │ └── inference_batch.py
│ │ ├── UniDepth/
│ │ │ ├── __init__.py
│ │ │ ├── inference.py
│ │ │ ├── inference_batch.py
│ │ │ └── unidepth/
│ │ │ ├── datasets/
│ │ │ │ ├── _2d3ds.py
│ │ │ │ ├── _4dor.py
│ │ │ │ ├── __init__.py
│ │ │ │ ├── a2d2.py
│ │ │ │ ├── adt.py
│ │ │ │ ├── aimotive.py
│ │ │ │ ├── argoverse.py
│ │ │ │ ├── argoverse2.py
│ │ │ │ ├── arkit.py
│ │ │ │ ├── ase.py
│ │ │ │ ├── base_dataset.py
│ │ │ │ ├── bdd.py
│ │ │ │ ├── bedlam.py
│ │ │ │ ├── behave.py
│ │ │ │ ├── blendedmvg.py
│ │ │ │ ├── cityscape.py
│ │ │ │ ├── ddad.py
│ │ │ │ ├── deep360.py
│ │ │ │ ├── dense.py
│ │ │ │ ├── diml.py
│ │ │ │ ├── diode.py
│ │ │ │ ├── dl3dv.py
│ │ │ │ ├── driving_stereo.py
│ │ │ │ ├── dtu_rmvd.py
│ │ │ │ ├── dummy.py
│ │ │ │ ├── dynamic_replica.py
│ │ │ │ ├── eden.py
│ │ │ │ ├── eth3d.py
│ │ │ │ ├── eth3d_rmvd.py
│ │ │ │ ├── facedepth.py
│ │ │ │ ├── flsea.py
│ │ │ │ ├── futurehouse.py
│ │ │ │ ├── gibson.py
│ │ │ │ ├── hammer.py
│ │ │ │ ├── hm3d.py
│ │ │ │ ├── hoi4d.py
│ │ │ │ ├── hrwsi.py
│ │ │ │ ├── hypersim.py
│ │ │ │ ├── ibims.py
│ │ │ │ ├── image_dataset.py
│ │ │ │ ├── ken_burns.py
│ │ │ │ ├── kitti.py
│ │ │ │ ├── kitti360.py
│ │ │ │ ├── kitti_multi.py
│ │ │ │ ├── kitti_rmvd.py
│ │ │ │ ├── lyft.py
│ │ │ │ ├── mapillary.py
│ │ │ │ ├── matrix_city.py
│ │ │ │ ├── matterport3d.py
│ │ │ │ ├── megadepth.py
│ │ │ │ ├── megadepth_s.py
│ │ │ │ ├── midair.py
│ │ │ │ ├── mip.py
│ │ │ │ ├── ms2.py
│ │ │ │ ├── mvimgnet.py
│ │ │ │ ├── mvsynth.py
│ │ │ │ ├── nerds360.py
│ │ │ │ ├── niantic_mapfree.py
│ │ │ │ ├── nuscenes.py
│ │ │ │ ├── nyuv2.py
│ │ │ │ ├── oasis.py
│ │ │ │ ├── pipelines/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── formating.py
│ │ │ │ │ └── transforms.py
│ │ │ │ ├── point_odyssey.py
│ │ │ │ ├── proteus.py
│ │ │ │ ├── samplers copy.py
│ │ │ │ ├── samplers.py
│ │ │ │ ├── scannet.py
│ │ │ │ ├── scannetpp.py
│ │ │ │ ├── sequence_dataset.py
│ │ │ │ ├── sintel copy.py
│ │ │ │ ├── sintel.py
│ │ │ │ ├── sunrgbd.py
│ │ │ │ ├── synscapes.py
│ │ │ │ ├── tartanair.py
│ │ │ │ ├── taskonomy.py
│ │ │ │ ├── tat_rmvd.py
│ │ │ │ ├── theo.py
│ │ │ │ ├── unrealstereo4k.py
│ │ │ │ ├── urbansyn.py
│ │ │ │ ├── utils.py
│ │ │ │ ├── utils_decode.py
│ │ │ │ ├── vkitti.py
│ │ │ │ ├── void.py
│ │ │ │ ├── waymo.py
│ │ │ │ └── wildrgbd.py
│ │ │ ├── layers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── activation.py
│ │ │ │ ├── attention.py
│ │ │ │ ├── convnext.py
│ │ │ │ ├── drop_path.py
│ │ │ │ ├── layer_scale.py
│ │ │ │ ├── mlp.py
│ │ │ │ ├── nystrom.py
│ │ │ │ ├── nystrom_attention.py
│ │ │ │ ├── positional_encoding.py
│ │ │ │ └── upsample.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── backbones/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convnext.py
│ │ │ │ │ ├── convnext2.py
│ │ │ │ │ ├── dinov2.py
│ │ │ │ │ └── metadinov2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── attention.py
│ │ │ │ │ ├── block.py
│ │ │ │ │ ├── dino_head.py
│ │ │ │ │ ├── drop_path.py
│ │ │ │ │ ├── layer_scale.py
│ │ │ │ │ ├── mlp.py
│ │ │ │ │ ├── patch_embed.py
│ │ │ │ │ └── swiglu_ffn.py
│ │ │ │ ├── encoder.py
│ │ │ │ ├── unidepthv1/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── decoder.py
│ │ │ │ │ └── unidepthv1.py
│ │ │ │ └── unidepthv2/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── decoder.py
│ │ │ │ ├── decoder_old.py
│ │ │ │ ├── export.py
│ │ │ │ ├── unidepthv2.py
│ │ │ │ └── unidepthv2_old.py
│ │ │ ├── ops/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── extract_patches/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── compile.sh
│ │ │ │ │ ├── functions/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── extract_patches.py
│ │ │ │ │ ├── modules/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── patch_extractor.py
│ │ │ │ │ ├── setup.py
│ │ │ │ │ ├── src/
│ │ │ │ │ │ ├── cpu/
│ │ │ │ │ │ │ ├── extract_patches_cpu.cpp
│ │ │ │ │ │ │ └── extract_patches_cpu.h
│ │ │ │ │ │ ├── cuda/
│ │ │ │ │ │ │ ├── extract_patches_cuda.h
│ │ │ │ │ │ │ ├── extract_patches_kernel.cu
│ │ │ │ │ │ │ └── extract_patches_kernel.cuh
│ │ │ │ │ │ ├── extract_patches.cpp
│ │ │ │ │ │ └── extract_patches.h
│ │ │ │ │ └── test.py
│ │ │ │ ├── knn/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── compile.sh
│ │ │ │ │ ├── functions/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── knn.py
│ │ │ │ │ ├── setup.py
│ │ │ │ │ └── src/
│ │ │ │ │ ├── knn.cu
│ │ │ │ │ ├── knn.h
│ │ │ │ │ ├── knn_cpu.cpp
│ │ │ │ │ ├── knn_ext.cpp
│ │ │ │ │ └── utils/
│ │ │ │ │ ├── dispatch.cuh
│ │ │ │ │ ├── index_utils.cuh
│ │ │ │ │ ├── mink.cuh
│ │ │ │ │ └── pytorch3d_cutils.h
│ │ │ │ ├── losses/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── arel.py
│ │ │ │ │ ├── confidence.py
│ │ │ │ │ ├── distill.py
│ │ │ │ │ ├── dummy.py
│ │ │ │ │ ├── local_ssi.py
│ │ │ │ │ ├── regression.py
│ │ │ │ │ ├── silog.py
│ │ │ │ │ └── utils.py
│ │ │ │ └── scheduler.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── camera.py
│ │ │ ├── chamfer_distance.py
│ │ │ ├── constants.py
│ │ │ ├── coordinate.py
│ │ │ ├── distributed.py
│ │ │ ├── ema_torch.py
│ │ │ ├── evaluation_depth.py
│ │ │ ├── geometric.py
│ │ │ ├── misc.py
│ │ │ ├── positional_embedding.py
│ │ │ ├── sht.py
│ │ │ ├── validation.py
│ │ │ └── visualization.py
│ │ └── __init__.py
│ └── dynamic_mask/
│ ├── __init__.py
│ ├── inference_batch.py
│ └── sam2/
│ ├── __init__.py
│ ├── automatic_mask_generator.py
│ ├── benchmark.py
│ ├── build_sam.py
│ ├── configs/
│ │ ├── sam2/
│ │ │ ├── sam2_hiera_b+.yaml
│ │ │ ├── sam2_hiera_l.yaml
│ │ │ ├── sam2_hiera_s.yaml
│ │ │ └── sam2_hiera_t.yaml
│ │ ├── sam2.1/
│ │ │ ├── sam2.1_hiera_b+.yaml
│ │ │ ├── sam2.1_hiera_l.yaml
│ │ │ ├── sam2.1_hiera_s.yaml
│ │ │ └── sam2.1_hiera_t.yaml
│ │ └── sam2.1_training/
│ │ └── sam2.1_hiera_b+_MOSE_finetune.yaml
│ ├── csrc/
│ │ └── connected_components.cu
│ ├── modeling/
│ │ ├── __init__.py
│ │ ├── backbones/
│ │ │ ├── __init__.py
│ │ │ ├── hieradet.py
│ │ │ ├── image_encoder.py
│ │ │ └── utils.py
│ │ ├── memory_attention.py
│ │ ├── memory_encoder.py
│ │ ├── position_encoding.py
│ │ ├── sam/
│ │ │ ├── __init__.py
│ │ │ ├── mask_decoder.py
│ │ │ ├── prompt_encoder.py
│ │ │ └── transformer.py
│ │ ├── sam2_base.py
│ │ └── sam2_utils.py
│ ├── sam2_hiera_b+.yaml
│ ├── sam2_hiera_l.yaml
│ ├── sam2_hiera_s.yaml
│ ├── sam2_hiera_t.yaml
│ ├── sam2_image_predictor.py
│ ├── sam2_video_predictor.py
│ ├── sam2_video_predictor_legacy.py
│ └── utils/
│ ├── __init__.py
│ ├── amg.py
│ ├── misc.py
│ └── transforms.py
├── caption/
│ ├── LLM/
│ │ ├── __init__.py
│ │ ├── inference.py
│ │ ├── prompt1.txt
│ │ └── prompt2.txt
│ ├── README.md
│ ├── VQA/
│ │ ├── __init__.py
│ │ ├── inference.py
│ │ └── prompt.txt
│ ├── __init__.py
│ ├── tagging/
│ │ ├── __init__.py
│ │ ├── inference.py
│ │ └── prompt.txt
│ └── utils/
│ ├── __init__.py
│ ├── api_call.py
│ └── combine.py
├── docker-entrypoint.sh
├── requirements/
│ ├── requirements.txt
│ ├── requirements_annotation.txt
│ └── requirements_scoring.txt
├── scoring/
│ ├── README.md
│ ├── __init__.py
│ ├── aesthetic/
│ │ ├── __init__.py
│ │ └── inference.py
│ ├── luminance/
│ │ ├── __init__.py
│ │ └── inference.py
│ ├── motion/
│ │ ├── INSTALL.md
│ │ ├── __init__.py
│ │ └── inference.py
│ └── ocr/
│ ├── __init__.py
│ └── inference.py
├── scripts/
│ ├── annotation.sh
│ ├── caption.sh
│ ├── docker_prepulls.sh
│ ├── download_checkpoints.sh
│ └── scoring.sh
├── utils/
│ ├── README.md
│ ├── __init__.py
│ ├── convert.py
│ ├── cut.py
│ ├── cut_fast.py
│ ├── download_SpatialVID.py
│ ├── download_YouTube.py
│ ├── evaluation.py
│ ├── expand_npz.py
│ ├── extract_frames.py
│ ├── filter.py
│ ├── get_clip.py
│ ├── get_info.py
│ ├── get_instructions.py
│ ├── get_instructions_enhanced.py
│ ├── merge_tables.py
│ ├── normalize_intrinsics.py
│ ├── pack_clip_assets.py
│ ├── quat_to_mat.py
│ ├── read_depth.py
│ ├── read_video.py
│ └── scene_detect.py
└── viser/
├── .clang-format
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── LICENSE
├── README.md
├── docs/
│ ├── .gitignore
│ ├── Makefile
│ ├── source/
│ │ ├── _static/
│ │ │ └── css/
│ │ │ └── custom.css
│ │ ├── _templates/
│ │ │ └── sidebar/
│ │ │ └── brand.html
│ │ ├── camera_handles.md
│ │ ├── client_handles.md
│ │ ├── conf.py
│ │ ├── conventions.md
│ │ ├── development.md
│ │ ├── events.md
│ │ ├── examples/
│ │ │ ├── 00_coordinate_frames.rst
│ │ │ ├── 01_image.rst
│ │ │ ├── 02_gui.rst
│ │ │ ├── 03_gui_callbacks.rst
│ │ │ ├── 04_camera_poses.rst
│ │ │ ├── 05_camera_commands.rst
│ │ │ ├── 06_mesh.rst
│ │ │ ├── 07_record3d_visualizer.rst
│ │ │ ├── 08_smpl_visualizer.rst
│ │ │ ├── 09_urdf_visualizer.rst
│ │ │ ├── 10_realsense.rst
│ │ │ ├── 11_colmap_visualizer.rst
│ │ │ ├── 12_click_meshes.rst
│ │ │ ├── 13_theming.rst
│ │ │ ├── 14_markdown.rst
│ │ │ ├── 15_gui_in_scene.rst
│ │ │ ├── 16_modal.rst
│ │ │ ├── 17_background_composite.rst
│ │ │ ├── 18_splines.rst
│ │ │ ├── 19_get_renders.rst
│ │ │ ├── 20_scene_pointer.rst
│ │ │ ├── 21_set_up_direction.rst
│ │ │ ├── 22_games.rst
│ │ │ ├── 23_plotly.rst
│ │ │ ├── 24_notification.rst
│ │ │ └── 25_smpl_visualizer_skinned.rst
│ │ ├── extras.md
│ │ ├── gui_api.md
│ │ ├── gui_handles.md
│ │ ├── icons.md
│ │ ├── index.md
│ │ ├── infrastructure.md
│ │ ├── scene_api.md
│ │ ├── scene_handles.md
│ │ ├── server.md
│ │ └── transforms.md
│ └── update_example_docs.py
├── examples/
│ ├── 00_coordinate_frames.py
│ ├── 01_image.py
│ ├── 02_gui.py
│ ├── 03_gui_callbacks.py
│ ├── 04_camera_poses.py
│ ├── 05_camera_commands.py
│ ├── 06_mesh.py
│ ├── 07_record3d_visualizer.py
│ ├── 08_smpl_visualizer.py
│ ├── 09_urdf_visualizer.py
│ ├── 10_realsense.py
│ ├── 11_colmap_visualizer.py
│ ├── 12_click_meshes.py
│ ├── 13_theming.py
│ ├── 14_markdown.py
│ ├── 15_gui_in_scene.py
│ ├── 16_modal.py
│ ├── 17_background_composite.py
│ ├── 18_splines.py
│ ├── 19_get_renders.py
│ ├── 20_scene_pointer.py
│ ├── 21_set_up_direction.py
│ ├── 22_games.py
│ ├── 23_plotly.py
│ ├── 24_notification.py
│ ├── 25_smpl_visualizer_skinned.py
│ ├── assets/
│ │ ├── .gitignore
│ │ ├── download_colmap_garden.sh
│ │ ├── download_dragon_mesh.sh
│ │ ├── download_record3d_dance.sh
│ │ └── mdx_example.mdx
│ ├── experimental/
│ │ └── gaussian_splats.py
│ └── quick_save.py
├── pyproject.toml
├── src/
│ └── viser/
│ ├── __init__.py
│ ├── _client_autobuild.py
│ ├── _gui_api.py
│ ├── _gui_handles.py
│ ├── _icons.py
│ ├── _icons_enum.py
│ ├── _icons_enum.pyi
│ ├── _icons_generate_enum.py
│ ├── _messages.py
│ ├── _notification_handle.py
│ ├── _scene_api.py
│ ├── _scene_handles.py
│ ├── _tunnel.py
│ ├── _viser.py
│ ├── client/
│ │ ├── .eslintrc.js
│ │ ├── .gitignore
│ │ ├── index.html
│ │ ├── package.json
│ │ ├── postcss.config.cjs
│ │ ├── public/
│ │ │ ├── hdri/
│ │ │ │ └── potsdamer_platz_1k.hdr
│ │ │ └── manifest.json
│ │ ├── src/
│ │ │ ├── App.css.ts
│ │ │ ├── App.tsx
│ │ │ ├── AppTheme.ts
│ │ │ ├── BrowserWarning.tsx
│ │ │ ├── CameraControls.tsx
│ │ │ ├── ClickUtils.tsx
│ │ │ ├── ControlPanel/
│ │ │ │ ├── BottomPanel.tsx
│ │ │ │ ├── ControlPanel.tsx
│ │ │ │ ├── FloatingPanel.tsx
│ │ │ │ ├── Generated.tsx
│ │ │ │ ├── GuiComponentContext.tsx
│ │ │ │ ├── GuiState.tsx
│ │ │ │ ├── SceneTreeTable.css.ts
│ │ │ │ ├── SceneTreeTable.tsx
│ │ │ │ ├── ServerControls.tsx
│ │ │ │ └── SidebarPanel.tsx
│ │ │ ├── FilePlayback.tsx
│ │ │ ├── Markdown.tsx
│ │ │ ├── MessageHandler.tsx
│ │ │ ├── Modal.tsx
│ │ │ ├── Outlines.tsx
│ │ │ ├── SceneTree.tsx
│ │ │ ├── SceneTreeState.tsx
│ │ │ ├── SearchParamsUtils.tsx
│ │ │ ├── Splatting/
│ │ │ │ ├── GaussianSplats.tsx
│ │ │ │ ├── SplatSortWorker.ts
│ │ │ │ └── WasmSorter/
│ │ │ │ ├── Sorter.mjs
│ │ │ │ ├── Sorter.wasm
│ │ │ │ ├── build.sh
│ │ │ │ └── sorter.cpp
│ │ │ ├── ThreeAssets.tsx
│ │ │ ├── Titlebar.tsx
│ │ │ ├── Utils.ts
│ │ │ ├── WebsocketFunctions.tsx
│ │ │ ├── WebsocketInterface.tsx
│ │ │ ├── WebsocketMessages.tsx
│ │ │ ├── WebsocketServerWorker.ts
│ │ │ ├── WorldTransformUtils.ts
│ │ │ ├── components/
│ │ │ │ ├── Button.tsx
│ │ │ │ ├── ButtonGroup.tsx
│ │ │ │ ├── Checkbox.tsx
│ │ │ │ ├── ComponentStyles.css.ts
│ │ │ │ ├── Dropdown.tsx
│ │ │ │ ├── Folder.css.ts
│ │ │ │ ├── Folder.tsx
│ │ │ │ ├── Markdown.tsx
│ │ │ │ ├── MultiSlider.tsx
│ │ │ │ ├── MultiSliderPrimitive/
│ │ │ │ │ ├── LICENSE
│ │ │ │ │ ├── Marks/
│ │ │ │ │ │ └── Marks.tsx
│ │ │ │ │ ├── MultiSlider/
│ │ │ │ │ │ └── MultiSlider.tsx
│ │ │ │ │ ├── Slider.context.ts
│ │ │ │ │ ├── Slider.module.css
│ │ │ │ │ ├── SliderRoot/
│ │ │ │ │ │ └── SliderRoot.tsx
│ │ │ │ │ ├── Thumb/
│ │ │ │ │ │ └── Thumb.tsx
│ │ │ │ │ ├── Track/
│ │ │ │ │ │ └── Track.tsx
│ │ │ │ │ ├── index.ts
│ │ │ │ │ └── utils/
│ │ │ │ │ ├── get-change-value/
│ │ │ │ │ │ └── get-change-value.ts
│ │ │ │ │ ├── get-client-position/
│ │ │ │ │ │ └── get-client-position.ts
│ │ │ │ │ ├── get-floating-value/
│ │ │ │ │ │ └── get-gloating-value.ts
│ │ │ │ │ ├── get-position/
│ │ │ │ │ │ └── get-position.ts
│ │ │ │ │ └── get-precision/
│ │ │ │ │ └── get-precision.ts
│ │ │ │ ├── NumberInput.tsx
│ │ │ │ ├── PlotlyComponent.tsx
│ │ │ │ ├── ProgressBar.tsx
│ │ │ │ ├── Rgb.tsx
│ │ │ │ ├── Rgba.tsx
│ │ │ │ ├── Slider.tsx
│ │ │ │ ├── TabGroup.tsx
│ │ │ │ ├── TextInput.tsx
│ │ │ │ ├── UploadButton.tsx
│ │ │ │ ├── Vector2.tsx
│ │ │ │ ├── Vector3.tsx
│ │ │ │ ├── common.tsx
│ │ │ │ └── utils.tsx
│ │ │ ├── index.css
│ │ │ ├── index.tsx
│ │ │ └── react-app-env.d.ts
│ │ ├── tsconfig.json
│ │ ├── vite-env.d.ts
│ │ └── vite.config.mts
│ ├── extras/
│ │ ├── __init__.py
│ │ ├── _record3d.py
│ │ ├── _record3d_customized.py
│ │ ├── _record3d_customized_megasam.py
│ │ ├── _urdf.py
│ │ └── colmap/
│ │ ├── __init__.py
│ │ └── _colmap_utils.py
│ ├── infra/
│ │ ├── __init__.py
│ │ ├── _async_message_buffer.py
│ │ ├── _infra.py
│ │ ├── _messages.py
│ │ └── _typescript_interface_gen.py
│ ├── py.typed
│ ├── scripts/
│ │ ├── __init__.py
│ │ └── dev_checks.py
│ ├── theme/
│ │ ├── __init__.py
│ │ └── _titlebar.py
│ └── transforms/
│ ├── __init__.py
│ ├── _base.py
│ ├── _se2.py
│ ├── _se3.py
│ ├── _so2.py
│ ├── _so3.py
│ ├── hints/
│ │ └── __init__.py
│ └── utils/
│ ├── __init__.py
│ └── _utils.py
├── sync_message_defs.py
├── visualize_megasam.py
└── visualize_pose.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[codz]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.buildx-cache/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py.cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
#poetry.toml
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
#pdm.lock
#pdm.toml
.pdm-python
.pdm-build/
# pixi
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
#pixi.lock
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
# in the .venv directory. It is recommended not to include this directory in version control.
.pixi
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.envrc
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
# Abstra
# Abstra is an AI-powered process automation framework.
# Ignore directories containing user credentials, local state, and settings.
# Learn more at https://abstra.io/docs
.abstra/
# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
.vscode/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# Cursor
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
# refer to https://docs.cursor.com/context/ignore-files
test/
checkpoints/
.cursorignore
.cursorindexingignore
# Marimo
marimo/_static/
marimo/_lsp/
__marimo__/
.DS_Store
================================================
FILE: .gitmodules
================================================
[submodule "camera_pose_annotation/base"]
path = camera_pose_annotation/base
url = https://github.com/SpatialVID/base.git
================================================
FILE: Dockerfile.cuda
================================================
# This Dockerfile builds FFmpeg with NVIDIA GPU support and libvmaf from source
# It uses a two-stage build to create a smaller runtime image
# This file is adapted from https://github.com/Netflix/vmaf/blob/master/Dockerfile.cuda
ARG CUDA_BASE_IMAGE=docker.io/nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
ARG RUN_TIME_IMG=docker.io/nvidia/cuda:12.6.3-runtime-ubuntu22.04
# ARG CUDA_BASE_IMAGE=swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/nvidia/cuda:12.6.3-cudnn-devel-ubuntu22.04
# ARG RUN_TIME_IMG=swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/nvidia/cuda:12.6.3-runtime-ubuntu22.04
FROM $CUDA_BASE_IMAGE as builder
ARG VMAF_TAG=master
ARG FFMPEG_TAG=master
RUN DEBIAN_FRONTEND=noninteractive apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y libopenjp2-7-dev \
ninja-build cmake git python3 python3-pip nasm xxd pkg-config curl unzip nvidia-cuda-toolkit
RUN git clone https://github.com/Netflix/vmaf.git && cd vmaf && git checkout $VMAF_TAG
RUN git clone https://github.com/FFmpeg/FFmpeg.git && cd FFmpeg && git checkout $FFMPEG_TAG
RUN git clone https://github.com/FFmpeg/nv-codec-headers.git && cd nv-codec-headers && make && make install
# install vmaf
RUN python3 -m pip install meson
RUN cd vmaf && meson libvmaf/build libvmaf -Denable_cuda=true -Denable_avx512=true --buildtype release && \
ninja -vC libvmaf/build && \
ninja -vC libvmaf/build install
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib/x86_64-linux-gnu/
RUN ldconfig
# install ffmpeg
RUN cd FFmpeg && ./configure \
--enable-libnpp \
--enable-nonfree \
--enable-nvdec \
--enable-nvenc \
--enable-cuvid \
--enable-cuda \
--enable-cuda-nvcc \
--enable-libvmaf \
--enable-ffnvcodec \
--disable-stripping \
--extra-cflags="-I/usr/local/cuda/include" \
--extra-ldflags="-L/usr/local/cuda/lib64 -L/usr/local/cuda/lib64/stubs/"
RUN cd FFmpeg && make -j && make install
RUN mkdir /data
# Create a smaller runtime image
FROM ${RUN_TIME_IMG} as runtime
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates python3 python3-pip python3-venv libnuma-dev libsm6 libxext6 libxrender1 libgl1 git vim && rm -rf /var/lib/apt/lists/*
WORKDIR /workspace
# Copy FFmpeg and libvmaf from builder (installed under /usr/local)
COPY --from=builder /usr/local /usr/local
# copy libraries installed by the builder stage if present
COPY --from=builder /usr/lib/ /usr/lib/
# Link python
RUN ln -sf /usr/bin/python3 /usr/bin/python
RUN python3 -m pip install --no-cache-dir --upgrade pip setuptools wheel
# Copy repository
COPY . /workspace
RUN apt-get update
# Install Python requirements (may still fail for some packages requiring system libs)
RUN python3 -m pip --no-cache-dir install -r requirements/requirements.txt
RUN python3 -m pip --no-cache-dir install -r requirements/requirements_scoring.txt || true
RUN python3 -m pip --no-cache-dir install -r requirements/requirements_annotation.txt || true
# Entrypoint
COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
ENV FFMPEG_PATH=/usr/local/bin/ffmpeg
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
CMD ["bash", "ldconfig"]
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
<h1 align='center'>SpatialVID: A Large-Scale Video Dataset with Spatial Annotations</h1>
<div align='center'>
<a href='https://oiiiwjh.github.io/' target='_blank'>Jiahao Wang</a><sup>1*</sup>
<a href='https://felixyuan-yf.github.io/' target='_blank'>Yufeng Yuan</a><sup>1*</sup>
<a href='https://zrj-cn.github.io/' target='_blank'>Rujie Zheng</a><sup>1*</sup>
<a href='https://linyou.github.io' target='_blank'>Youtian Lin</a><sup>1</sup>
<a href='https://ygaojiany.github.io' target='_blank'>Jian Gao</a><sup>1</sup>
<a href='https://linzhuo.xyz' target='_blank'>Lin-Zhuo Chen</a><sup>1</sup>
</div>
<div align='center'>
<a href='https://openreview.net/profile?id=~yajie_bao5' target='_blank'>Yajie Bao</a><sup>1</sup>
<a href='https://github.com/YeeZ93' target='_blank'>Yi Zhang</a><sup>1</sup>
<a href='https://github.com/ozchango' target='_blank'>Chang Zeng</a><sup>1</sup>
<a href='https://github.com/yxzhou217' target='_blank'>Yanxi Zhou</a><sup>1</sup>
<a href='https://www.xxlong.site/index.html' target='_blank'>Xiaoxiao Long</a><sup>1</sup>
<a href='http://zhuhao.cc/home/' target='_blank'>Hao Zhu</a><sup>1</sup>
</div>
<div align='center'>
<a href='http://zhaoxiangzhang.net/' target='_blank'>Zhaoxiang Zhang</a><sup>2</sup>
<a href='https://cite.nju.edu.cn/People/Faculty/20190621/i5054.html' target='_blank'>Xun Cao</a><sup>1</sup>
<a href='https://yoyo000.github.io/' target='_blank'>Yao Yao</a><sup>1†</sup>
</div>
<div align='center'>
<sup>1</sup>Nanjing University <sup>2</sup>Institute of Automation, Chinese Academy of Science
</div>
<div align='center'>
*Equal Contribution †Corresponding Author
</div>
<div align="center">
<strong>CVPR 2026</strong>
</div>
<br>
<div align="center">
<a href="https://nju-3dv.github.io/projects/SpatialVID/"><img src="https://img.shields.io/static/v1?label=SpatialVID&message=Project&color=purple"></a>
<a href="https://arxiv.org/abs/2509.09676"><img src="https://img.shields.io/static/v1?label=Paper&message=Arxiv&color=red&logo=arxiv"></a>
<a href="https://github.com/NJU-3DV/spatialVID"><img src="https://img.shields.io/static/v1?label=Code&message=Github&color=blue&logo=github"></a>
<a href="https://huggingface.co/SpatialVID"><img src="https://img.shields.io/static/v1?label=Dataset&message=HuggingFace&color=yellow&logo=huggingface"></a>
<a href="https://www.modelscope.cn/organization/SpatialVID"><img src="https://img.shields.io/static/v1?label=Dataset&message=ModelScope&color=4285F4"></a>
</div>
<p align="center">
<img src="assets/overview.png" height=400>
</p>
## 🎉NEWS
+ [2026.02.21] 🎉 SpatialVID is accepted by CVPR 2026!
+ [2025.10.11] 🐳 Docker support is now available, featuring a pre-configured environment with NVIDIA GPU-accelerated FFmpeg.
+ [2025.09.29] 🚀 Depth data for the SpatialVID-HQ dataset is now officially available.
+ [2025.09.24] 🤗 Raw metadata access is now available via a [gated HuggingFace dataset](https://huggingface.co/datasets/SpatialVID/SpatialVID-RAW) to better support community research!!
+ [2025.09.24] 🔭 Enhanced instructions for better camera control are updated.
+ [2025.09.18] 🎆 SpatialVID dataset is now available on both HuggingFace and ModelScope.
+ [2025.09.14] 📢 We have also uploaded the SpatialVID-HQ dataset to ModelScope offering more diverse download options.
+ [2025.09.11] 🔥 Our paper, code and SpatialVID-HQ dataset are released!
**[✍️ Note]** Each video clip is paired with a dedicated annotation folder (named after the video’s id). The folder contains 5 key files, and details regarding these files can be found in [Detailed Explanation of Annotation Files](https://huggingface.co/datasets/SpatialVID/SpatialVID#3-detailed-explanation-of-annotation-files).
## Abstract
Significant progress has been made in spatial intelligence, spanning both spatial reconstruction and world exploration. However, the scalability and real-world fidelity of current models remain severely constrained by the scarcity of large-scale, high-quality training data. While several datasets provide camera pose information, they are typically limited in scale, diversity, and annotation richness, particularly for real-world dynamic scenes with ground-truth camera motion. To this end, we collect **SpatialVID**, a dataset consisting of a large corpus of in-the-wild videos with diverse scenes, camera movements and dense 3D annotations such as per-frame camera poses, depth, and motion instructions. Specifically, we collect more than **21,000 hours** of raw videos, and process them into **2.7 million clips** through a hierarchical filtering pipeline, totaling **7,089 hours** of dynamic content. A subsequent annotation pipeline enriches these clips with detailed spatial and semantic information, including camera poses, depth maps, dynamic masks, structured captions, and serialized motion instructions. Analysis of SpatialVID's data statistics reveals a richness and diversity that directly foster improved model generalization and performance, establishing it as a key asset for the video and 3D vision research community.
## Preparation
This section describes how to set up the environment manually. For a simpler, containerized setup, please refer to the **[Docker Setup and Usage](#docker-setup-and-usage)** section.
### Environment
1. Necessary packages
```bash
git clone --recursive https://github.com/NJU-3DV/SpatialVID.git
cd SpatialVid
conda create -n SpatialVID python=3.10.13
conda activate SpatialVID
pip install -r requirements/requirements.txt
```
2. Package needed for scoring
```bash
pip install paddlepaddle-gpu==3.0.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
pip install -r requirements/requirements_scoring.txt
```
Ignore the warning about `nvidia-nccl-cu12` and `numpy` version, it is not a problem.
About FFMPEG, please refer to the [`INSTALL.md`](scoring/motion/INSTALL.md) for detailed instructions on how to install ffmpeg. After installation, replace the `FFMPEG_PATH` variable in the [`scoring/motion/inference.py`](scoring/motion/inference.py) and [`utils/cut.py`](utils/cut.py) with the actual path to your ffmpeg executable, default is `/usr/local/bin/ffmpeg`.
⚠️ If your videos are in av1 codec instead of h264, you need to install ffmpeg (already in our requirement script), then run the following to make conda support av1 codec:
```bash
pip uninstall opencv-python
conda install -c conda-forge opencv==4.11.0
```
If unfortunately your conda environment still cannot support av1 codec, you can use the `--backend av` option in the scoring scripts to use PyAV as the video reading backend.
But note that using PyAV for frame extraction may lead to slight inaccuracies in frame positioning.
3. Package needed for annotation
```bash
pip install -r requirements/requirements_annotation.txt
```
Compile the extensions for the camera tracking module:
```bash
cd camera_pose_annotation/base
python setup.py install
```
4. [Optional] Package needed for visualization
```bash
pip install plotly
pip install -e viser
```
### Model Weight
Download the model weights used in our experiments:
```bash
bash scripts/download_checkpoints.sh
```
Or you can manually download the model weights from the following links and place them in the appropriate directories.
| Model | File Name | URL |
| ------------------- | ----------------------- | --------------------------------------------------------------------------------------------------------------- |
| Aesthetic Predictor | aesthetic | [🔗](https://github.com/christophschuhmann/improved-aesthetic-predictor/raw/main/sac+logos+ava1-l14-linearMSE.pth) |
| MegaSAM | megasam_final | [🔗](https://github.com/mega-sam/mega-sam/blob/main/checkpoints/megasam_final.pth) |
| RAFT | raft-things | [🔗](https://drive.google.com/uc?id=1MqDajR89k-xLV0HIrmJ0k-n8ZpG6_suM) |
| Depth Anything | Depth-Anything-V2-Large | [🔗](https://huggingface.co/depth-anything/Depth-Anything-V2-Large) |
| UniDepth | unidepth-v2-vitl14 | [🔗](https://huggingface.com/lpiccinelli/unidepth-v2-vitl14) |
| SAM | sam2.1-hiera-large | [🔗](https://huggingface.co/facebook/sam2.1-hiera-large) |
## Quick Start
The whole pipeline is illustrated in the figure below:
<p align="center">
<img src="assets/pipeline.png" height=340>
</p>
1. Scoring
```bash
bash scripts/scoring.sh
```
Inside the [`scoring.sh`](scripts/scoring.sh) script, you need to set the following variables:
- `ROOT_VIDEO` is the directory containing the input video files.
- `OUTPUT_DIR` is the directory where the output files will be saved.
2. Annotation
```bash
bash scripts/annotation.sh
```
Inside the [`annotation.sh`](scripts/annotation.sh) script, you need to set the following variables:
- `CSV` is the CSV file generated by the scoring script, default is `$OUTPUT_DIR/results.csv`.
- `OUTPUT_DIR` is the directory where the output files will be saved.
3. Caption
```bash
bash scripts/caption.sh
```
Inside the [`caption.sh`](scripts/caption.sh) script, you need to set the following variables:
- `CSV` is the CSV file generated by the annotation script, default is `$OUTPUT_DIR/results.csv`.
- `SRC_DIR` is the annotation output directory, default is the same as the `OUTPUT_DIR` in the annotation step.
- `OUTPUT_DIR` is the directory where the output files will be saved.
- The API keys for the LLM models used in the captioning step. You can replace them with your own API keys.
4. Visualization
- You can visualize the `poses.npy` in the `reconstruction` folder of each annotated clip using the [`visualize_pose.py`](viser/visualize_pose.py) script.
- You can visualize the final annotation result(`sgd_cvd_hr.npz`) using the [`visualize_megasam.py`](viser/visualize_megasam.py) script.
Note that if you want to visualize any clip in our dataset, you need to use the script [`pack_clip_assets.py`](utils/pack_clip_assets.py) to unify the depth, RGB frames, intrinsics, extrinsics, etc. of that clip into a single npz file first. And then you can use the visualization script to visualize it.
## Docker Setup and Usage
We provide a Dockerfile to create a fully configured environment that includes all dependencies, including a custom-built FFmpeg with NVIDIA acceleration. This is the recommended way to ensure reproducibility and avoid environment-related issues.
Before you begin, ensure your system environment is similar to the configuration below. Version matching is crucial for a successful compilation.
The GPU needs to support HEVC; refer to the [NVIDIA NVDEC Support Matrix](https://en.wikipedia.org/wiki/NVIDIA_Video_Coding_Engine#NVDEC).
### Prerequisites: Setting up the Host Environment
Before building and running the Docker container, your host machine must be configured to support GPU access for Docker.
1. **NVIDIA Drivers**: Ensure you have the latest NVIDIA drivers installed. You can verify this by running `nvidia-smi`.
2. **Docker Engine**: Install Docker on your system. Follow the official instructions at [docs.docker.com/engine/install/](https://docs.docker.com/engine/install/).
3. **NVIDIA Container Toolkit**: This toolkit allows Docker containers to access the host's NVIDIA GPU. Install it using the following commands (for Debian/Ubuntu):
To run docker containers with GPU support you have to install the [nvidia container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
```bash
# Add the GPG key
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
# Add the repository
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
# Update package lists and install the toolkit
sudo apt-get install -y \
nvidia-container-toolkit=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
nvidia-container-toolkit-base=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
libnvidia-container-tools=${NVIDIA_CONTAINER_TOOLKIT_VERSION} \
libnvidia-container1=${NVIDIA_CONTAINER_TOOLKIT_VERSION}
# Configure Docker to use the NVIDIA runtime
sudo nvidia-ctk runtime configure --runtime=containerd
# Restart the Docker daemon to apply the changes
sudo systemctl restart containerd
```
For other operating systems, please refer to the [official NVIDIA documentation](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html).
4. **Docker Image Pre-pulls [optional]**: To accelerate the build process, we provide a script to pre-pull necessary Docker images from a mirror registry.
```bash
bash scripts/build_gpu_docker.sh
```
### Build and Run the Container
You can also build and run the image using standard Docker commands from the root of the repository.
1. **Build the GPU image**:
```bash
docker build -f Dockerfile.cuda \
--build-arg NUM_JOBS=8 \
-t spatialvid-gpu .
```
2. **Run the container**:
```bash
docker run --gpus all --rm -it \
-v $(pwd):/workspace \
-w /workspace \
-e NVIDIA_DRIVER_CAPABILITIES=compute,video,utility \
spatialvid-gpu bash
```
3. **Verify the environment (inside the container)**:
Once inside the container, you can verify that FFmpeg and PyTorch are correctly installed and can access the GPU.
```bash
# Check the custom FFmpeg build
/usr/local/bin/ffmpeg -version
# Check PyTorch and CUDA availability
python3 -c "import torch; print(f'PyTorch: {torch.__version__}, CUDA: {torch.version.cuda}, GPU Available: {torch.cuda.is_available()}')"
```
## Dataset Download
Our dataset is available on [HuggingFace](https://huggingface.co/SpatialVID) and [ModelScope](https://www.modelscope.cn/organization/SpatialVID).
Apart from downloading the dataset using terminal commands, we provide scripts to download the SpatialVID/SpatialVID-HQ dataset from HuggingFace. Please refer to the [`download_SpatialVID.py`](utils/download_SpatialVID.py) script for more details.
We also provide our script to download the raw videos from YouTube. You can refer to the [`download_YouTube.py`](utils/download_YouTube.py) script for more details.
## License
Please refer to the [LICENSE](LICENSE) file for more details about the license of our code.
⚠️ SpatialVID dataset is released under the [Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode) (CC-BY-NC-SA-4.0). Users must attribute the original source, use the resource only for non-commercial purposes, and release any modified/derived works under the same license. If you are the copyright owner of any video in our dataset and you need it to be removed, please contact us, and we will remove the video samples from our dataset / Github / project webpage / technical presentation as soon as possible.
## References
Thanks to the developers and contributors of the following open-source repositories, whose invaluable work has greatly inspire our project:
- [Open-Sora](https://github.com/hpcaitech/Open-Sora): An initiative dedicated to efficiently producing high-quality video.
- [MegaSaM](https://github.com/mega-sam/mega-sam): An accurate, fast and robust casual structure and motion from casual dynamic videos.
- [Depth Anything V2](https://github.com/DepthAnything/Depth-Anything-V2): A model for monocular depth estimation.
- [UniDepthV2](https://github.com/lpiccinelli-eth/UniDepth): A model for universal monocular metric depth estimation.
- [SAM2](https://github.com/facebookresearch/sam2): A model towards solving promptable visual segmentation in images and videos.
- [Viser](https://viser.studio/latest/): A library for interactive 3D visualization in Python.
Our repository is licensed under the Apache 2.0 License. However, if you use MegaSaM or other components in your work, please follow their license.
## Citation
```bibtex
@article{wang2025spatialvid,
title={Spatialvid: A large-scale video dataset with spatial annotations},
author={Wang, Jiahao and Yuan, Yufeng and Zheng, Rujie and Lin, Youtian and Gao, Jian and Chen, Lin-Zhuo and Bao, Yajie and Zhang, Yi and Zeng, Chang and Zhou, Yanxi and others},
journal={arXiv preprint arXiv:2509.09676},
year={2025}
}
```
================================================
FILE: camera_pose_annotation/.gitignore
================================================
# files
data/*
*.log
*.txt
*.bz2
*.zip
*.ipynb
data_videos
!requirements.txt
!requirements_megasam.txt
#python
*.pyc
__pycache__/
# dir
outputs/
outputs_303/
data_videos/
checkpoints/*
!checkpoints/megasam_final.pth
DROID-SLAM/
.vscode/
================================================
FILE: camera_pose_annotation/README.md
================================================
# Camera Pose Annotation
## Depth Estimation
Use both [Depth-Anything V2](depth_estimation/Depth-Anything) and [UniDepth V2](depth_estimation/UniDepth) to estimate depth maps from images.
Download the pre-trained models from the respective repositories. Skip this step if you already follow the installation instructions in [README](../README.md).
- [Depth-Anything V2](https://huggingface.co/depth-anything/Depth-Anything-V2-Large)
- [UniDepth V2](https://huggingface.co/lpiccinelli/unidepth-v2-vitl14)
To inference depth using Depth-Anything V2, run the following command:
```bash
torchrun --standalone --nproc_per_node ${GPU_NUM} camera_pose_annotation/depth_estimation/Depth-Anything/inference_batch.py \
${CSV} \
--encoder vitl \
--checkpoints_path checkpoints \
--OUTPUT_DIR ${OUTPUT_DIR} \
--bs 16 \
--num_workers ${GPU_NUM}
```
To inference depth using UniDepth V2, run the following command:
```bash
torchrun --standalone --nproc_per_node ${GPU_NUM} camera_pose_annotation/depth_estimation/UniDepth/inference_batch.py \
${CSV} \
--OUTPUT_DIR ${OUTPUT_DIR} \
--checkpoints_path checkpoints \
--bs 32 \
--num_workers ${GPU_NUM}
```
## Camera Tracking
Using a DROID-SLAM based method to track camera poses from videos.
To inference a single video, run the following command:
```bash
python camera_pose_annotation/camera_tracking/camera_tracking.py \
--dir_path ${DIR_PATH} \
--weights checkpoints/megasam_final.pth \
--disable_vis
```
To inference videos in batch, run the following command:
```bash
python camera_pose_annotation/camera_tracking/inference_batch.py ${CSV} \
--OUTPUT_DIR ${OUTPUT_DIR} \
--checkpoints_path checkpoints --gpu_id ${CUDA_VISIBLE_DEVICES} \
--num_workers $((GPU_NUM * 2))
```
## CVD (Camera View Depth) Optimization
### Optical Flow
Infer optical flow using RAFT model.
Download the [`raft_things.pth`](https://drive.google.com/uc?id=1MqDajR89k-xLV0HIrmJ0k-n8ZpG6_suM).
To inference a single video, run the following command:
```bash
python camera_pose_annotation/cvd_opt/preprocess/preprocess_flow.py \
--dir_path ${DIR_PATH} \
--model checkpoints/raft-things.pth \
--mixed_precision
```
To inference videos in batch, run the following command:
```bash
python camera_pose_annotation/cvd_opt/preprocess/inference_batch.py ${CSV} \
--OUTPUT_DIR ${OUTPUT_DIR} \
--checkpoints_path checkpoints --gpu_id ${CUDA_VISIBLE_DEVICES} \
--num_workers $((GPU_NUM * 2))
```
### Optimization
Using the optical flow to optimize the estimated depth maps.
To inference a single video, run the following command:
```bash
python camera_pose_annotation/cvd_opt/cvd_opt.py \
--dir_path ${DIR_PATH} \
--w_grad 2.0 --w_normal 5.0
```
To inference videos in batch, run the following command:
```bash
python camera_pose_annotation/cvd_opt/inference_batch.py ${CSV} \
--OUTPUT_DIR ${OUTPUT_DIR} \
--gpu_id ${CUDA_VISIBLE_DEVICES} \
--num_workers $((GPU_NUM * 2))
```
## Dynamic Mask
Given the limitations of MegaSaM in predicting motion probabilities, we opt to enhance its performance using SAM2.
Specifically, an adaptive thresholding mechanism, calibrated to the system’s motion probability distribution, is first employed to generate initial masks. Subsequently, contour detection is performed to mitigate redundant segmentation of overlapping regions; for each identified contour, four evenly spaced anchor points are sampled along its perimeter to serve as dedicated prompts for the SAM2 model.
Download the pre-trained [SAM2 model](https://huggingface.co/facebook/sam2.1-hiera-large).
Run the following command:
```bash
python camera_pose_annotation/dynamic_mask/inference_batch.py ${CSV} \
--OUTPUT_DIR ${OUTPUT_DIR} \
--checkpoints_path checkpoints --gpu_num ${GPU_NUM} \
--num_workers $((GPU_NUM * 2))
```
================================================
FILE: camera_pose_annotation/__init__.py
================================================
================================================
FILE: camera_pose_annotation/camera_tracking/__init__.py
================================================
================================================
FILE: camera_pose_annotation/camera_tracking/camera_tracking.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test camera tracking on a single scene."""
# pylint: disable=invalid-name
# pylint: disable=g-importing-member
# pylint: disable=g-bad-import-order
# pylint: disable=g-import-not-at-top
# pylint: disable=redefined-outer-name
# pylint: disable=undefined-variable
# pylint: disable=undefined-loop-variable
import sys
sys.path.append("camera_pose_annotation/base/droid_slam")
from droid import Droid
from lietorch import SE3
import argparse
import glob
import os
import cv2
import torch
import numpy as np
from tqdm import tqdm
import torch.nn.functional as F
def image_stream(
image_list,
mono_disp_list,
scene_name,
use_depth=False,
aligns=None,
K=None,
stride=1,
):
"""image generator."""
del scene_name, stride
fx, fy, cx, cy = (
K[0, 0],
K[1, 1],
K[0, 2],
K[1, 2],
) # np.loadtxt(os.path.join(dir_path, 'calibration.txt')).tolist()
for t, (image_file) in enumerate(image_list):
image = cv2.imread(image_file)
# depth = cv2.imread(depth_file, cv2.IMREAD_ANYDEPTH) / 5000.
# depth = np.float32(np.load(depth_file)) / 300.0
# depth = 1. / pt_data["depth"]
mono_disp = mono_disp_list[t]
# mono_disp = np.float32(np.load(disp_file)) #/ 300.0
depth = np.clip(
1.0 / ((1.0 / aligns[2]) * (aligns[0] * mono_disp + aligns[1])),
1e-4,
1e4,
)
depth[depth < 1e-2] = 0.0
# breakpoint()
h0, w0, _ = image.shape
h1 = int(h0 * np.sqrt((384 * 512) / (h0 * w0)))
w1 = int(w0 * np.sqrt((384 * 512) / (h0 * w0)))
image = cv2.resize(image, (w1, h1), interpolation=cv2.INTER_AREA)
image = image[: h1 - h1 % 8, : w1 - w1 % 8]
image = torch.as_tensor(image).permute(2, 0, 1)
depth = torch.as_tensor(depth)
depth = F.interpolate(
depth[None, None], (h1, w1), mode="nearest-exact"
).squeeze()
depth = depth[: h1 - h1 % 8, : w1 - w1 % 8]
mask = torch.ones_like(depth)
intrinsics = torch.as_tensor([fx, fy, cx, cy])
intrinsics[0::2] *= w1 / w0
intrinsics[1::2] *= h1 / h0
if use_depth:
yield t, image[None], depth, intrinsics, mask
else:
yield t, image[None], intrinsics, mask
def save_full_reconstruction(
droid, full_traj, rgb_list, senor_depth_list, motion_prob, scene_name, save_path
):
"""Save full reconstruction."""
from pathlib import Path
t = full_traj.shape[0]
images = np.array(rgb_list[:t]) # droid.video.images[:t].cpu().numpy()
disps = 1.0 / (np.array(senor_depth_list[:t]) + 1e-6)
poses = full_traj # .cpu().numpy()
intrinsics = droid.video.intrinsics[:t].cpu().numpy()
Path(f"{save_path}").mkdir(parents=True, exist_ok=True)
np.save(f"{save_path}/images.npy", images)
np.save(f"{save_path}/disps.npy", disps)
np.save(f"{save_path}/poses.npy", poses)
np.save(f"{save_path}/intrinsics.npy", intrinsics * 8.0)
np.save(f"{save_path}/motion_prob.npy", motion_prob)
intrinsics = intrinsics[0] * 8.0
poses_th = torch.as_tensor(poses, device="cpu")
cam_c2w = SE3(poses_th).inv().matrix().numpy()
K = np.eye(3)
K[0, 0] = intrinsics[0]
K[1, 1] = intrinsics[1]
K[0, 2] = intrinsics[2]
K[1, 2] = intrinsics[3]
max_frames = min(1000, images.shape[0])
if not os.path.exists(save_path):
os.makedirs(save_path)
np.savez(
os.path.join(save_path, f"{scene_name}_droid.npz"),
images=np.uint8(images[:max_frames, ::-1, ...].transpose(0, 2, 3, 1)),
depths=np.float32(1.0 / disps[:max_frames, ...]),
intrinsic=K,
cam_c2w=cam_c2w[:max_frames],
)
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
parser.add_argument("--dir_path", help="path to the dataset")
parser.add_argument("--weights", default="droid.pth")
parser.add_argument("--buffer", type=int, default=1024)
parser.add_argument("--image_size", default=[240, 320])
parser.add_argument("--disable_vis", action="store_true")
parser.add_argument("--beta", type=float, default=0.3)
parser.add_argument(
"--filter_thresh", type=float, default=2.0
) # motion threhold for keyframe
parser.add_argument("--warmup", type=int, default=8)
parser.add_argument("--keyframe_thresh", type=float, default=2.0)
parser.add_argument("--frontend_thresh", type=float, default=12.0)
parser.add_argument("--frontend_window", type=int, default=25)
parser.add_argument("--frontend_radius", type=int, default=2)
parser.add_argument("--frontend_nms", type=int, default=1)
parser.add_argument("--stereo", action="store_true")
parser.add_argument("--depth", action="store_true")
parser.add_argument("--upsample", action="store_true")
parser.add_argument("--scene_name", help="scene_name")
parser.add_argument("--backend_thresh", type=float, default=16.0)
parser.add_argument("--backend_radius", type=int, default=2)
parser.add_argument("--backend_nms", type=int, default=3)
return parser.parse_args()
def main():
args = parse_args()
scene_name = os.path.basename(args.dir_path)
rgb_list = []
senor_depth_list = []
img_path = os.path.join(args.dir_path, "img")
img_list = sorted(glob.glob(os.path.join(img_path, "*.jpg")))
img_list += sorted(glob.glob(os.path.join(img_path, "*.png")))
# NOTE Mono is inverse depth, but metric-depth is depth!
mono_disp_paths = sorted(
glob.glob(os.path.join(args.dir_path, "depth-anything", "*.npy"))
)
metric_depth_paths = sorted(
glob.glob(os.path.join(args.dir_path, "unidepth", "*.npz"))
)
img_0 = cv2.imread(img_list[0])
scales = []
shifts = []
mono_disp_list = []
fovs = []
for t, (mono_disp_file, metric_depth_file) in enumerate(
zip(mono_disp_paths, metric_depth_paths)
):
da_disp = np.float32(np.load(mono_disp_file)) # / 300.0
uni_data = np.load(metric_depth_file)
metric_depth = uni_data["depth"]
fovs.append(uni_data["fov"])
da_disp = cv2.resize(
da_disp,
(metric_depth.shape[1], metric_depth.shape[0]),
interpolation=cv2.INTER_NEAREST_EXACT,
)
mono_disp_list.append(da_disp)
gt_disp = 1.0 / (metric_depth + 1e-8)
# avoid some bug from UniDepth
valid_mask = (metric_depth < 2.0) & (da_disp < 0.02)
gt_disp[valid_mask] = 1e-2
# avoid cases sky dominate entire video
sky_ratio = np.sum(da_disp < 0.01) / (da_disp.shape[0] * da_disp.shape[1])
if sky_ratio > 0.5:
non_sky_mask = da_disp > 0.01
gt_disp_ms = gt_disp[non_sky_mask] - np.median(gt_disp[non_sky_mask]) + 1e-8
da_disp_ms = da_disp[non_sky_mask] - np.median(da_disp[non_sky_mask]) + 1e-8
scale = np.median(gt_disp_ms / da_disp_ms)
shift = np.median(gt_disp[non_sky_mask] - scale * da_disp[non_sky_mask])
else:
gt_disp_ms = gt_disp - np.median(gt_disp) + 1e-8
da_disp_ms = da_disp - np.median(da_disp) + 1e-8
scale = np.median(gt_disp_ms / da_disp_ms)
shift = np.median(gt_disp - scale * da_disp)
gt_disp_ms = gt_disp - np.median(gt_disp) + 1e-8
da_disp_ms = da_disp - np.median(da_disp) + 1e-8
scale = np.median(gt_disp_ms / da_disp_ms)
shift = np.median(gt_disp - scale * da_disp)
scales.append(scale)
shifts.append(shift)
print("************** UNIDEPTH FOV ", np.median(fovs))
ff = img_0.shape[1] / (2 * np.tan(np.radians(np.median(fovs) / 2.0)))
K = np.eye(3)
K[0, 0] = ff * 1.0 # pp_intrinsic[0] * (img_0.shape[1] / (pp_intrinsic[1] * 2))
K[1, 1] = ff * 1.0 # pp_intrinsic[0] * (img_0.shape[0] / (pp_intrinsic[2] * 2))
K[0, 2] = (
img_0.shape[1] / 2.0
) # pp_intrinsic[1]) * (img_0.shape[1] / (pp_intrinsic[1] * 2))
K[1, 2] = (
img_0.shape[0] / 2.0
) # (pp_intrinsic[2]) * (img_0.shape[0] / (pp_intrinsic[2] * 2))
ss_product = np.array(scales) * np.array(shifts)
med_idx = np.argmin(np.abs(ss_product - np.median(ss_product)))
align_scale = scales[med_idx] # np.median(np.array(scales))
align_shift = shifts[med_idx] # np.median(np.array(shifts))
normalize_scale = (
np.percentile((align_scale * np.array(mono_disp_list) + align_shift), 98) / 2.0
)
aligns = (align_scale, align_shift, normalize_scale)
for t, image, depth, intrinsics, mask in tqdm(
image_stream(
img_list,
mono_disp_list,
scene_name,
use_depth=True,
aligns=aligns,
K=K,
)
):
rgb_list.append(image[0])
senor_depth_list.append(depth)
# breakpoint()
if t == 0:
args.image_size = [image.shape[2], image.shape[3]]
droid = Droid(args, device=0)
droid.track(t, image, depth, intrinsics=intrinsics, mask=mask)
# last frame
droid.track_final(t, image, depth, intrinsics=intrinsics, mask=mask)
traj_est, depth_est, motion_prob = droid.terminate(
image_stream(
img_list,
mono_disp_list,
scene_name,
use_depth=True,
aligns=aligns,
K=K,
),
_opt_intr=True, # default is opt_focal
full_ba=True,
scene_name=scene_name,
)
save_full_reconstruction(
droid,
traj_est,
rgb_list,
senor_depth_list,
motion_prob,
args.scene_name,
os.path.join(args.dir_path, "reconstructions"),
)
if __name__ == "__main__":
main()
================================================
FILE: camera_pose_annotation/camera_tracking/inference_batch.py
================================================
"""
Batch inference for camera tracking using multiple GPUs.
This module provides functionality for:
- Parallel camera tracking processing across multiple videos
- Multi-GPU support with automatic device assignment
- Subprocess management for camera tracking pipeline
- Progress tracking and error handling
"""
import pandas as pd
import os
import argparse
import concurrent.futures
from multiprocessing import Manager
import subprocess
import queue
from tqdm import tqdm
def process_single_row(row, index, args, worker_id=0):
"""
Process a single video for camera tracking.
"""
dir_path = os.path.join(args.dir_path, row["id"])
device_id = worker_id % args.gpu_num
cmd = (
f"CUDA_VISIBLE_DEVICES={args.gpu_id[device_id]} python camera_pose_annotation/camera_tracking/camera_tracking.py "
f"--dir_path {dir_path} "
f"--weights {args.checkpoints_path}/megasam_final.pth "
f"--disable_vis"
)
process = subprocess.Popen(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
stdout, stderr = process.communicate()
if process.returncode != 0:
print(f"Error tracking camera for {row['id']}: {stderr.decode()}")
def worker(task_queue, args, worker_id, pbar):
"""
Worker function for parallel camera tracking processing.
"""
while True:
try:
index, row = task_queue.get(timeout=1)
except queue.Empty:
break
process_single_row(row, index, args, worker_id)
task_queue.task_done()
pbar.update(1)
def parse_args():
"""Parse command line arguments for camera tracking batch inference."""
parser = argparse.ArgumentParser()
parser.add_argument("--csv_path", type=str, help="Path to the csv file")
parser.add_argument("--dir_path", type=str, default="./outputs")
parser.add_argument("--checkpoints_path", type=str, default="./checkpoints")
parser.add_argument(
"--gpu_id", type=str, default="0", help="Comma-separated list of GPU IDs to use"
)
parser.add_argument(
"--num_workers",
type=int,
default=4,
help="Number of workers for parallel processing",
)
parser.add_argument(
"--disable_parallel", action="store_true", help="Disable parallel processing"
)
return parser.parse_args()
def main():
args = parse_args()
# Parse GPU configuration
args.gpu_num = len(args.gpu_id.split(","))
args.gpu_id = [int(gpu) for gpu in args.gpu_id.split(",")]
df = pd.read_csv(args.csv_path)
if args.disable_parallel:
# Sequential processing
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
process_single_row(row, index, args)
else:
# Parallel processing with multiple workers
manager = Manager()
task_queue = manager.Queue()
# Add all tasks to queue
for index, row in df.iterrows():
task_queue.put((index, row))
with tqdm(total=len(df), desc="Processing rows") as pbar:
with concurrent.futures.ThreadPoolExecutor(
max_workers=args.num_workers
) as executor:
futures = []
for id in range(args.num_workers):
futures.append(executor.submit(worker, task_queue, args, id, pbar))
for future in concurrent.futures.as_completed(futures):
future.result()
if __name__ == "__main__":
main()
================================================
FILE: camera_pose_annotation/cvd_opt/__init__.py
================================================
================================================
FILE: camera_pose_annotation/cvd_opt/cvd_opt.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Consistent video depth optimization."""
# pylint: disable=invalid-name
# pylint: disable=g-importing-member
# pylint: disable=redefined-outer-name
import argparse
import os
from pathlib import Path
import pandas as pd
from geometry_utils import NormalGenerator
import kornia
from lietorch import SE3
import numpy as np
import torch
import zipfile
import tempfile
import OpenEXR
import Imath
def save_depth(path, depths):
with zipfile.ZipFile(path, "w", zipfile.ZIP_DEFLATED) as z:
for index, depth in enumerate(depths):
height, width = depth.shape
header = OpenEXR.Header(width, height)
header["channels"] = {"Z": Imath.Channel(Imath.PixelType(Imath.PixelType.HALF))}
with tempfile.NamedTemporaryFile(suffix=".exr") as f:
exr = OpenEXR.OutputFile(f.name, header)
exr.writePixels({"Z": depth.astype(np.float16).tobytes()})
exr.close()
z.write(f.name, f"{index:05d}.exr")
def gradient_loss(gt, pred, u):
"""Gradient loss."""
del u
diff = pred - gt
v_gradient = torch.abs(diff[..., 0:-2, 1:-1] - diff[..., 2:, 1:-1]) # * mask_v
h_gradient = torch.abs(diff[..., 1:-1, 0:-2] - diff[..., 1:-1, 2:]) # * mask_h
pred_grad = torch.abs(pred[..., 0:-2, 1:-1] - (pred[..., 2:, 1:-1])) + torch.abs(
pred[..., 1:-1, 0:-2] - pred[..., 1:-1, 2:]
)
gt_grad = torch.abs(gt[..., 0:-2, 1:-1] - (gt[..., 2:, 1:-1])) + torch.abs(
gt[..., 1:-1, 0:-2] - gt[..., 1:-1, 2:]
)
grad_diff = torch.abs(pred_grad - gt_grad)
nearby_mask = (torch.exp(gt[..., 1:-1, 1:-1]) > 1.0).float().detach()
# weight = (1. - torch.exp(-(grad_diff * 5.)).detach())
weight = 1.0 - torch.exp(-(grad_diff * 5.0)).detach()
weight *= nearby_mask
g_loss = torch.mean(h_gradient * weight) + torch.mean(v_gradient * weight)
return g_loss
def si_loss(gt, pred):
log_gt = torch.log(torch.clamp(gt, 1e-3, 1e3)).view(gt.shape[0], -1)
log_pred = torch.log(torch.clamp(pred, 1e-3, 1e3)).view(pred.shape[0], -1)
log_diff = log_gt - log_pred
num_pixels = gt.shape[-2] * gt.shape[-1]
data_loss = torch.sum(log_diff**2, dim=-1) / num_pixels - torch.sum(
log_diff, dim=-1
) ** 2 / (num_pixels**2)
return torch.mean(data_loss)
def sobel_fg_alpha(disp, mode="sobel", beta=10.0):
sobel_grad = kornia.filters.spatial_gradient(disp, mode=mode, normalized=False)
sobel_mag = torch.sqrt(
sobel_grad[:, :, 0, Ellipsis] ** 2 + sobel_grad[:, :, 1, Ellipsis] ** 2
)
alpha = torch.exp(-1.0 * beta * sobel_mag).detach()
return alpha
ALPHA_MOTION = 0.25
RESIZE_FACTOR = 0.5
def consistency_loss(
cam_c2w,
K,
K_inv,
disp_data,
init_disp,
uncertainty,
flows,
flow_masks,
ii,
jj,
compute_normals,
fg_alpha,
w_ratio=1.0,
w_flow=0.2,
w_si=1.0,
w_grad=2.0,
w_normal=4.0,
):
"""Consistency loss."""
_, H, W = disp_data.shape
# mesh grid
xx = torch.arange(0, W).view(1, -1).repeat(H, 1)
yy = torch.arange(0, H).view(-1, 1).repeat(1, W)
xx = xx.view(1, 1, H, W) # .repeat(B ,1 ,1 ,1)
yy = yy.view(1, 1, H, W) # .repeat(B ,1 ,1 ,1)
grid = torch.cat((xx, yy), 1).float().cuda().permute(0, 2, 3, 1) # [None, ...]
loss_flow = 0.0 # flow reprojection loss
loss_d_ratio = 0.0 # depth consistency loss
flows_step = flows.permute(0, 2, 3, 1)
flow_masks_step = flow_masks.permute(0, 2, 3, 1).squeeze(-1)
cam_1to2 = torch.bmm(
torch.linalg.inv(torch.index_select(cam_c2w, dim=0, index=jj)),
torch.index_select(cam_c2w, dim=0, index=ii),
)
# warp disp from target time
pixel_locations = grid + flows_step
resize_factor = torch.tensor([W - 1.0, H - 1.0]).cuda()[None, None, None, ...]
normalized_pixel_locations = 2 * (pixel_locations / resize_factor) - 1.0
disp_sampled = torch.nn.functional.grid_sample(
torch.index_select(disp_data, dim=0, index=jj)[:, None, ...],
normalized_pixel_locations,
align_corners=True,
)
uu = torch.index_select(uncertainty, dim=0, index=ii).squeeze(1)
grid_h = torch.cat([grid, torch.ones_like(grid[..., 0:1])], dim=-1).unsqueeze(-1)
# depth of reference view
ref_depth = 1.0 / torch.clamp(
torch.index_select(disp_data, dim=0, index=ii), 1e-3, 1e3
)
pts_3d_ref = ref_depth[..., None, None] * (K_inv[None, None, None] @ grid_h)
rot = cam_1to2[:, None, None, :3, :3]
trans = cam_1to2[:, None, None, :3, 3:4]
pts_3d_tgt = (rot @ pts_3d_ref) + trans # [:, None, None, :, None]
depth_tgt = pts_3d_tgt[:, :, :, 2:3, 0]
disp_tgt = 1.0 / torch.clamp(depth_tgt, 0.1, 1e3)
# flow consistency loss
pts_2D_tgt = K[None, None, None] @ pts_3d_tgt
flow_masks_step_ = flow_masks_step * (pts_2D_tgt[:, :, :, 2, 0] > 0.1)
pts_2D_tgt = pts_2D_tgt[:, :, :, :2, 0] / torch.clamp(
pts_2D_tgt[:, :, :, 2:, 0], 1e-3, 1e3
)
disp_sampled = torch.clamp(disp_sampled, 1e-3, 1e2)
disp_tgt = torch.clamp(disp_tgt, 1e-3, 1e2)
ratio = torch.maximum(
disp_sampled.squeeze() / disp_tgt.squeeze(),
disp_tgt.squeeze() / disp_sampled.squeeze(),
)
ratio_error = torch.abs(ratio - 1.0) #
loss_d_ratio += torch.sum(
(ratio_error * uu + ALPHA_MOTION * torch.log(1.0 / uu)) * flow_masks_step_
) / (torch.sum(flow_masks_step_) + 1e-8)
flow_error = torch.abs(pts_2D_tgt - pixel_locations)
loss_flow += torch.sum(
(flow_error * uu[..., None] + ALPHA_MOTION * torch.log(1.0 / uu[..., None]))
* flow_masks_step_[..., None]
) / (torch.sum(flow_masks_step_) * 2.0 + 1e-8)
# prior mono-depth reg loss
loss_prior = si_loss(init_disp, disp_data)
KK = torch.inverse(K_inv)
# multi gradient consistency
disp_data_ds = disp_data[:, None, ...]
init_disp_ds = init_disp[:, None, ...]
K_rescale = KK.clone()
K_inv_rescale = torch.inverse(K_rescale)
pred_normal = compute_normals[0](
1.0 / torch.clamp(disp_data_ds, 1e-3, 1e3), K_inv_rescale[None]
)
init_normal = compute_normals[0](
1.0 / torch.clamp(init_disp_ds, 1e-3, 1e3), K_inv_rescale[None]
)
loss_normal = torch.mean(
fg_alpha * (1.0 - torch.sum(pred_normal * init_normal, dim=1))
) # / (1e-8 + torch.sum(fg_alpha))
loss_grad = 0.0
for scale in range(4):
interval = 2**scale
disp_data_ds = torch.nn.functional.interpolate(
disp_data[:, None, ...],
scale_factor=(1.0 / interval, 1.0 / interval),
mode="nearest-exact",
)
init_disp_ds = torch.nn.functional.interpolate(
init_disp[:, None, ...],
scale_factor=(1.0 / interval, 1.0 / interval),
mode="nearest-exact",
)
uncertainty_rs = torch.nn.functional.interpolate(
uncertainty,
scale_factor=(1.0 / interval, 1.0 / interval),
mode="nearest-exact",
)
loss_grad += gradient_loss(
torch.log(disp_data_ds), torch.log(init_disp_ds), uncertainty_rs
)
return (
w_ratio * loss_d_ratio
+ w_si * loss_prior
+ w_flow * loss_flow
+ w_normal * loss_normal
+ loss_grad * w_grad
)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--w_grad", type=float, default=2.0, help="w_grad")
parser.add_argument("--w_normal", type=float, default=6.0, help="w_normal")
parser.add_argument("--dir_path", type=str, default=".", help="directory path")
parser.add_argument("--only_depth", action="store_true", help="only save optimize depth")
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
scene_name = os.path.basename(args.dir_path)
cache_dir = os.path.join(args.dir_path, "cache-flow")
rootdir = os.path.join(args.dir_path, "reconstructions")
print("***************************** ", scene_name)
img_data = np.load(os.path.join(rootdir, "images.npy"))[:, ::-1, ...]
disp_data = np.load(os.path.join(rootdir, "disps.npy")) + 1e-6
intrinsics = np.load(os.path.join(rootdir, "intrinsics.npy"))
poses = np.load(os.path.join(rootdir, "poses.npy"))
mot_prob = np.load(os.path.join(rootdir, "motion_prob.npy"))
flows = np.load(os.path.join(cache_dir, "flows.npy"), allow_pickle=True)
flow_masks = np.load(os.path.join(cache_dir, "flows_masks.npy"), allow_pickle=True)
flow_masks = np.float32(flow_masks)
iijj = np.load(os.path.join(cache_dir, "ii-jj.npy"), allow_pickle=True)
intrinsics = intrinsics[0]
poses_th = torch.as_tensor(poses, device="cpu").float().cuda()
K = np.eye(3)
K[0, 0] = intrinsics[0]
K[1, 1] = intrinsics[1]
K[0, 2] = intrinsics[2]
K[1, 2] = intrinsics[3]
img_data_pt = (
torch.from_numpy(np.ascontiguousarray(img_data)).float().cuda() / 255.0
)
flows = torch.from_numpy(np.ascontiguousarray(flows)).float().cuda()
flow_masks = (
torch.from_numpy(np.ascontiguousarray(flow_masks)).float().cuda()
) # .unsqueeze(1)
iijj = torch.from_numpy(np.ascontiguousarray(iijj)).float().cuda()
ii = iijj[0, ...].long()
jj = iijj[1, ...].long()
K = torch.from_numpy(K).float().cuda()
init_disp = torch.from_numpy(disp_data).float().cuda()
disp_data = torch.from_numpy(disp_data).float().cuda()
assert init_disp.shape == disp_data.shape
init_disp = torch.nn.functional.interpolate(
init_disp.unsqueeze(1),
scale_factor=(RESIZE_FACTOR, RESIZE_FACTOR),
mode="bilinear",
).squeeze(1)
disp_data = torch.nn.functional.interpolate(
disp_data.unsqueeze(1),
scale_factor=(RESIZE_FACTOR, RESIZE_FACTOR),
mode="bilinear",
).squeeze(1)
fg_alpha = sobel_fg_alpha(init_disp[:, None, ...]) > 0.2
fg_alpha = fg_alpha.squeeze(1).float() + 0.2
cvd_prob = torch.nn.functional.interpolate(
torch.from_numpy(mot_prob).unsqueeze(1).cuda(),
scale_factor=(4, 4),
mode="bilinear",
)
cvd_prob[cvd_prob > 0.5] = 0.5
cvd_prob = torch.clamp(cvd_prob, 1e-3, 1.0)
# rescale intrinsic matrix to small resolution
K_o = K.clone()
K[0:2, ...] *= RESIZE_FACTOR
K_inv = torch.linalg.inv(K)
disp_data.requires_grad = False
poses_th.requires_grad = False
uncertainty = cvd_prob
# First optimize scale and shift to align them
log_scale_ = torch.log(torch.ones(init_disp.shape[0]).to(disp_data.device))
shift_ = torch.zeros(init_disp.shape[0]).to(disp_data.device)
log_scale_.requires_grad = True
shift_.requires_grad = True
uncertainty.requires_grad = True
optim = torch.optim.Adam(
[
{"params": log_scale_, "lr": 1e-2},
{"params": shift_, "lr": 1e-2},
{"params": uncertainty, "lr": 1e-2},
]
)
compute_normals = []
compute_normals.append(NormalGenerator(disp_data.shape[-2], disp_data.shape[-1]))
init_disp = torch.clamp(init_disp, 1e-3, 1e3)
for i in range(100):
optim.zero_grad()
cam_c2w = SE3(poses_th).inv().matrix()
scale_ = torch.exp(log_scale_)
loss = consistency_loss(
cam_c2w,
K,
K_inv,
torch.clamp(
disp_data * scale_[..., None, None] + shift_[..., None, None],
1e-3,
1e3,
),
init_disp,
torch.clamp(uncertainty, 1e-4, 1e3),
flows,
flow_masks,
ii,
jj,
compute_normals,
fg_alpha,
)
loss.backward()
uncertainty.grad = torch.nan_to_num(uncertainty.grad, nan=0.0)
log_scale_.grad = torch.nan_to_num(log_scale_.grad, nan=0.0)
shift_.grad = torch.nan_to_num(shift_.grad, nan=0.0)
optim.step()
print("step ", i, loss.item())
# Then optimize depth and uncertainty
disp_data = (
disp_data * torch.exp(log_scale_)[..., None, None].detach()
+ shift_[..., None, None].detach()
)
init_disp = (
init_disp * torch.exp(log_scale_)[..., None, None].detach()
+ shift_[..., None, None].detach()
)
init_disp = torch.clamp(init_disp, 1e-3, 1e3)
disp_data.requires_grad = True
uncertainty.requires_grad = True
poses_th.requires_grad = False # True
optim = torch.optim.Adam(
[
{"params": disp_data, "lr": 5e-3},
{"params": uncertainty, "lr": 5e-3},
]
)
losses = []
for i in range(400):
optim.zero_grad()
cam_c2w = SE3(poses_th).inv().matrix()
loss = consistency_loss(
cam_c2w,
K,
K_inv,
torch.clamp(disp_data, 1e-3, 1e3),
init_disp,
torch.clamp(uncertainty, 1e-4, 1e3),
flows,
flow_masks,
ii,
jj,
compute_normals,
fg_alpha,
w_ratio=1.0,
w_flow=0.2,
w_si=1,
w_grad=args.w_grad,
w_normal=args.w_normal,
)
loss.backward()
disp_data.grad = torch.nan_to_num(disp_data.grad, nan=0.0)
uncertainty.grad = torch.nan_to_num(uncertainty.grad, nan=0.0)
optim.step()
print("step ", i, loss.item())
losses.append(loss)
disp_data_opt = (
torch.nn.functional.interpolate(
disp_data.unsqueeze(1), scale_factor=(2, 2), mode="bilinear"
)
.squeeze(1)
.detach()
.cpu()
.numpy()
)
if args.only_depth:
save_depth(
os.path.join(args.dir_path, "depth_opt.zip"),
disp_data_opt
)
else:
np.savez(
os.path.join(args.dir_path, "sgd_cvd_hr.npz"),
images=np.uint8(img_data_pt.cpu().numpy().transpose(0, 2, 3, 1) * 255.0),
depths=np.clip(np.float16(1.0 / disp_data_opt), 1e-3, 1e2),
intrinsic=K_o.detach().cpu().numpy(),
cam_c2w=cam_c2w.detach().cpu().numpy(),
)
================================================
FILE: camera_pose_annotation/cvd_opt/geometry_utils.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Geometry utils for MegaSaM."""
# pylint: disable=invalid-name
import kornia
import numpy as np
import torch
from torch import jit
from torch import nn
from torch import Tensor # pylint: disable=g-importing-member
import torch.nn.functional as F
@torch.jit.script
def to_homogeneous(input_tensor: Tensor, dim: int = 0) -> Tensor:
"""Converts tensor to homogeneous coordinates by adding ones to the specified dimension."""
ones = torch.ones_like(input_tensor.select(dim, 0).unsqueeze(dim))
output_bkn = torch.cat([input_tensor, ones], dim=dim)
return output_bkn
class BackprojectDepth(nn.Module):
"""Layer that projects points from 2D camera to 3D space.
The 3D points are represented in homogeneous coordinates.
"""
def __init__(self, height: int, width: int):
super().__init__()
self.height = height
self.width = width
xx, yy = torch.meshgrid(
torch.arange(self.width),
torch.arange(self.height),
indexing="xy",
)
pix_coords_2hw = torch.stack((xx, yy), axis=0) + 0.5
pix_coords_13N = (
to_homogeneous(
pix_coords_2hw,
dim=0,
)
.flatten(1)
.unsqueeze(0)
)
# make these tensors into buffers so they are put on the correct GPU
# automatically
self.register_buffer("pix_coords_13N", pix_coords_13N)
# @jit.script_method
def forward(self, depth_b1hw: Tensor, invK_b44: Tensor) -> Tensor:
"""Backprojects spatial points in 2D image space to world space using invK_b44 at the depths defined in depth_b1hw."""
cam_points_b3N = torch.matmul(
invK_b44[:, :3, :3], self.pix_coords_13N.float().cuda()
)
cam_points_b3N = depth_b1hw.flatten(start_dim=2) * cam_points_b3N
cam_points_b4N = to_homogeneous(cam_points_b3N, dim=1)
return cam_points_b4N
class Project3D(jit.ScriptModule):
"""Layer that projects 3D points into the 2D camera."""
def __init__(self, eps: float = 1e-8):
super().__init__()
self.register_buffer("eps", torch.tensor(eps).view(1, 1, 1))
@jit.script_method
def forward(
self, points_b4N: Tensor, K_b44: Tensor, cam_T_world_b44: Tensor
) -> Tensor:
"""Projects spatial points in 3D world space to camera image space using the extrinsics matrix cam_T_world_b44 and intrinsics K_b44."""
P_b44 = K_b44 @ cam_T_world_b44
cam_points_b3N = P_b44[:, :3] @ points_b4N
# from Kornia and OpenCV:
# https://kornia.readthedocs.io/en/latest/_modules/kornia/geometry/conversions.html#convert_points_from_homogeneous
mask = torch.abs(cam_points_b3N[:, 2:]) > self.eps
depth_b1N = cam_points_b3N[:, 2:] + self.eps
scale = torch.where(
mask, 1.0 / depth_b1N, torch.tensor(1.0, device=depth_b1N.device)
)
pix_coords_b2N = cam_points_b3N[:, :2] * scale
return torch.cat([pix_coords_b2N, depth_b1N], dim=1)
class NormalGenerator(nn.Module):
"""Estimates normals from depth maps."""
def __init__(
self,
height: int,
width: int,
smoothing_kernel_size: int = 5,
smoothing_kernel_std: float = 2.0,
):
"""Estimates normals from depth maps."""
super().__init__()
self.height = height
self.width = width
self.backproject = BackprojectDepth(self.height, self.width)
self.kernel_size = smoothing_kernel_size
self.std = smoothing_kernel_std
# @jit.script_method
def forward(self, depth_b1hw: Tensor, invK_b44: Tensor) -> Tensor:
"""Estimates a normal at each location in the depth map."""
# First smoothes incoming depth maps with a gaussian blur, backprojects
# those depth points into world space (see BackprojectDepth), estimates
# the spatial gradient at those points, and finally uses normalized cross
# correlation to estimate a normal vector at each location.
depth_smooth_b1hw = kornia.filters.gaussian_blur2d(
depth_b1hw,
(self.kernel_size, self.kernel_size),
(self.std, self.std),
)
cam_points_b4N = self.backproject(depth_smooth_b1hw, invK_b44)
cam_points_b3hw = cam_points_b4N[:, :3].view(-1, 3, self.height, self.width)
gradients_b32hw = kornia.filters.spatial_gradient(cam_points_b3hw)
return F.normalize(
torch.cross(
gradients_b32hw[:, :, 0],
gradients_b32hw[:, :, 1],
dim=1,
),
dim=1,
)
def get_camera_rays(
world_T_cam_b44,
world_points_b3N,
in_camera_frame,
cam_T_world_b44=None,
eps=1e-4,
):
"""Computes camera rays for given camera data and points, optionally shifts rays to camera frame."""
del eps
if in_camera_frame:
batch_size = world_points_b3N.shape[0]
num_points = world_points_b3N.shape[2]
world_points_b4N = torch.cat(
[
world_points_b3N,
torch.ones(batch_size, 1, num_points).to(world_points_b3N.device),
],
1,
)
camera_points_b3N = torch.matmul(
cam_T_world_b44[:, :3, :4], world_points_b4N
)
rays_b3N = camera_points_b3N
else:
rays_b3N = world_points_b3N - world_T_cam_b44[:, 0:3, 3][:, :, None].expand(
world_points_b3N.shape
)
rays_b3N = torch.nn.functional.normalize(rays_b3N, dim=1)
return rays_b3N
def pose_distance(pose_b44):
"""DVMVS frame pose distance."""
R = pose_b44[:, :3, :3]
t = pose_b44[:, :3, 3]
R_trace = R.diagonal(offset=0, dim1=-1, dim2=-2).sum(-1)
R_measure = torch.sqrt(
2 * (1 - torch.minimum(torch.ones_like(R_trace) * 3.0, R_trace) / 3)
)
t_measure = torch.norm(t, dim=1)
combined_measure = torch.sqrt(t_measure**2 + R_measure**2)
return combined_measure, R_measure, t_measure
def qvec2rotmat(qvec):
"""Quaternion to 3x3 rotation matrix."""
return np.array([
[
1 - 2 * qvec[2] ** 2 - 2 * qvec[3] ** 2,
2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2],
],
[
2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
1 - 2 * qvec[1] ** 2 - 2 * qvec[3] ** 2,
2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1],
],
[
2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
1 - 2 * qvec[1] ** 2 - 2 * qvec[2] ** 2,
],
])
def rotx(t):
"""3D Rotation about the x-axis."""
c = np.cos(t)
s = np.sin(t)
return np.array([[1, 0, 0], [0, c, -s], [0, s, c]])
def roty(t):
"""3D Rotation about the y-axis."""
c = np.cos(t)
s = np.sin(t)
return np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]])
def rotz(t):
"""3D Rotation about the z-axis."""
c = np.cos(t)
s = np.sin(t)
return np.array([[c, -s, 0], [s, c, 0], [0, 0, 1]])
================================================
FILE: camera_pose_annotation/cvd_opt/inference_batch.py
================================================
"""
Batch inference script for CVD (Camera View Depth) optimization.
Processes multiple video clips in parallel using multi-GPU setup.
"""
import pandas as pd
import os
import argparse
import concurrent.futures
from multiprocessing import Manager
import subprocess
import queue
from tqdm import tqdm
def process_single_row(row, index, args, worker_id=0):
"""Process a single video clip for CVD optimization."""
dir_path = os.path.join(args.dir_path, row["id"])
device_id = worker_id % args.gpu_num
# Build command for CVD optimization with specific GPU
cmd = (
f"CUDA_VISIBLE_DEVICES={args.gpu_id[device_id]} python camera_pose_annotation/cvd_opt/cvd_opt.py "
f"--dir_path {dir_path} "
f"--w_grad 2.0 --w_normal 5.0 "
)
if args.only_depth:
cmd += "--only_depth "
process = subprocess.Popen(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
stdout, stderr = process.communicate()
if process.returncode != 0:
print(f"Error optimizing CVD for {row['id']}: {stderr.decode()}")
def worker(task_queue, args, worker_id, pbar):
"""Worker function for parallel CVD optimization processing."""
while True:
try:
index, row = task_queue.get(timeout=1)
except queue.Empty:
break
process_single_row(row, index, args, worker_id)
task_queue.task_done()
pbar.update(1)
def parse_args():
"""Parse command line arguments for CVD batch processing."""
parser = argparse.ArgumentParser()
parser.add_argument("--csv_path", type=str, help="Path to the csv file")
parser.add_argument("--dir_path", type=str, default="./outputs")
parser.add_argument("--only_depth", action="store_true", help="Only save optimized depth")
parser.add_argument(
"--gpu_id", type=str, default="0", help="Comma-separated list of GPU IDs to use"
)
parser.add_argument(
"--num_workers",
type=int,
default=4,
help="Number of workers for parallel processing",
)
parser.add_argument(
"--disable_parallel", action="store_true", help="Disable parallel processing"
)
return parser.parse_args()
def main():
args = parse_args()
# Parse GPU configuration
args.gpu_num = len(args.gpu_id.split(","))
args.gpu_id = [int(gpu) for gpu in args.gpu_id.split(",")]
df = pd.read_csv(args.csv_path)
if args.disable_parallel:
# Sequential processing
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
process_single_row(row, index, args)
else:
# Parallel processing with multiple workers
manager = Manager()
task_queue = manager.Queue()
for index, row in df.iterrows():
task_queue.put((index, row))
with tqdm(total=len(df), desc="Processing rows") as pbar:
with concurrent.futures.ThreadPoolExecutor(
max_workers=args.num_workers
) as executor:
futures = []
for id in range(args.num_workers):
futures.append(executor.submit(worker, task_queue, args, id, pbar))
for future in concurrent.futures.as_completed(futures):
future.result()
if __name__ == "__main__":
main()
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/__init__.py
================================================
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/__init__.py
================================================
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/corr.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Correlation block for MegaSaM."""
import torch
import torch.nn.functional as F
from .utils.utils import bilinear_sampler
# pylint: disable=g-import-not-at-top
try:
import alt_cuda_corr
except: # pylint: disable=bare-except
# alt_cuda_corr is not compiled
pass
class CorrBlock:
"""Correlation block for MegaSaM."""
def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
self.num_levels = num_levels
self.radius = radius
self.corr_pyramid = []
# all pairs correlation
corr = CorrBlock.corr(fmap1, fmap2)
batch, h1, w1, dim, h2, w2 = corr.shape
corr = corr.reshape(batch * h1 * w1, dim, h2, w2)
self.corr_pyramid.append(corr)
for _ in range(self.num_levels - 1):
corr = F.avg_pool2d(corr, 2, stride=2)
self.corr_pyramid.append(corr)
def __call__(self, coords):
r = self.radius
coords = coords.permute(0, 2, 3, 1)
batch, h1, w1, _ = coords.shape
out_pyramid = []
for i in range(self.num_levels):
corr = self.corr_pyramid[i]
dx = torch.linspace(-r, r, 2 * r + 1)
dy = torch.linspace(-r, r, 2 * r + 1)
delta = torch.stack(torch.meshgrid(dy, dx), axis=-1).to(coords.device)
centroid_lvl = coords.reshape(batch * h1 * w1, 1, 1, 2) / 2**i
delta_lvl = delta.view(1, 2 * r + 1, 2 * r + 1, 2)
coords_lvl = centroid_lvl + delta_lvl
corr = bilinear_sampler(corr, coords_lvl)
corr = corr.view(batch, h1, w1, -1)
out_pyramid.append(corr)
out = torch.cat(out_pyramid, dim=-1)
return out.permute(0, 3, 1, 2).contiguous().float()
@classmethod
def corr(cls, fmap1, fmap2):
del cls
batch, dim, ht, wd = fmap1.shape
fmap1 = fmap1.view(batch, dim, ht * wd)
fmap2 = fmap2.view(batch, dim, ht * wd)
corr = torch.matmul(fmap1.transpose(1, 2), fmap2)
corr = corr.view(batch, ht, wd, 1, ht, wd)
return corr / torch.sqrt(torch.tensor(dim).float())
class AlternateCorrBlock:
"""Correlation block for MegaSaM."""
def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
self.num_levels = num_levels
self.radius = radius
self.pyramid = [(fmap1, fmap2)]
for _ in range(self.num_levels):
fmap1 = F.avg_pool2d(fmap1, 2, stride=2)
fmap2 = F.avg_pool2d(fmap2, 2, stride=2)
self.pyramid.append((fmap1, fmap2))
def __call__(self, coords):
coords = coords.permute(0, 2, 3, 1)
# pylint: disable=invalid-name
B, H, W, _ = coords.shape
dim = self.pyramid[0][0].shape[1]
corr_list = []
for i in range(self.num_levels):
r = self.radius
fmap1_i = self.pyramid[0][0].permute(0, 2, 3, 1).contiguous()
fmap2_i = self.pyramid[i][1].permute(0, 2, 3, 1).contiguous()
coords_i = (coords / 2**i).reshape(B, 1, H, W, 2).contiguous()
(corr,) = alt_cuda_corr.forward(fmap1_i, fmap2_i, coords_i, r)
corr_list.append(corr.squeeze(1))
corr = torch.stack(corr_list, dim=1)
corr = corr.reshape(B, -1, H, W)
return corr / torch.sqrt(torch.tensor(dim).float())
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/datasets.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Dataset classes for MegaSaM."""
import glob
import os
import os.path as osp
import random
import numpy as np
import torch
from torch.utils import data
from utils import frame_utils
from utils.augmentor import FlowAugmentor
from utils.augmentor import SparseFlowAugmentor
class FlowDataset(data.Dataset):
"""Base class for flow datasets."""
def __init__(self, aug_params=None, sparse=False):
self.augmentor = None
self.sparse = sparse
if aug_params is not None:
if sparse:
self.augmentor = SparseFlowAugmentor(**aug_params)
else:
self.augmentor = FlowAugmentor(**aug_params)
self.is_test = False
self.init_seed = False
self.flow_list = []
self.image_list = []
self.extra_info = []
def __getitem__(self, index):
if self.is_test:
img1 = frame_utils.read_gen(self.image_list[index][0])
img2 = frame_utils.read_gen(self.image_list[index][1])
img1 = np.array(img1).astype(np.uint8)[..., :3]
img2 = np.array(img2).astype(np.uint8)[..., :3]
img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
return img1, img2, self.extra_info[index]
if not self.init_seed:
worker_info = torch.utils.data.get_worker_info()
if worker_info is not None:
torch.manual_seed(worker_info.id)
np.random.seed(worker_info.id)
random.seed(worker_info.id)
self.init_seed = True
index = index % len(self.image_list)
valid = None
if self.sparse:
flow, valid = frame_utils.readFlowKITTI(self.flow_list[index])
else:
flow = frame_utils.read_gen(self.flow_list[index])
img1 = frame_utils.read_gen(self.image_list[index][0])
img2 = frame_utils.read_gen(self.image_list[index][1])
flow = np.array(flow).astype(np.float32)
img1 = np.array(img1).astype(np.uint8)
img2 = np.array(img2).astype(np.uint8)
# grayscale images
if len(img1.shape) == 2:
img1 = np.tile(img1[..., None], (1, 1, 3))
img2 = np.tile(img2[..., None], (1, 1, 3))
else:
img1 = img1[..., :3]
img2 = img2[..., :3]
if self.augmentor is not None:
if self.sparse:
img1, img2, flow, valid = self.augmentor(img1, img2, flow, valid)
else:
img1, img2, flow = self.augmentor(img1, img2, flow)
img1 = torch.from_numpy(img1).permute(2, 0, 1).float()
img2 = torch.from_numpy(img2).permute(2, 0, 1).float()
flow = torch.from_numpy(flow).permute(2, 0, 1).float()
if valid is not None:
valid = torch.from_numpy(valid)
else:
valid = (flow[0].abs() < 1000) & (flow[1].abs() < 1000)
return img1, img2, flow, valid.float()
def __rmul__(self, v):
self.flow_list = v * self.flow_list
self.image_list = v * self.image_list
return self
def __len__(self):
return len(self.image_list)
class MpiSintel(FlowDataset):
"""MpiSintel dataset."""
def __init__(
self,
aug_params=None,
split='training',
root='datasets/Sintel',
dstype='clean',
):
super(MpiSintel, self).__init__(aug_params)
flow_root = osp.join(root, split, 'flow')
image_root = osp.join(root, split, dstype)
if split == 'test':
self.is_test = True
for scene in os.listdir(image_root):
image_list = sorted(glob(osp.join(image_root, scene, '*.png')))
for i in range(len(image_list) - 1):
self.image_list += [[image_list[i], image_list[i + 1]]]
self.extra_info += [(scene, i)] # scene and frame_id
if split != 'test':
self.flow_list += sorted(glob(osp.join(flow_root, scene, '*.flo')))
class FlyingChairs(FlowDataset):
"""FlyingChairs dataset."""
def __init__(
self,
aug_params=None,
split='train',
root='datasets/FlyingChairs_release/data',
):
super(FlyingChairs, self).__init__(aug_params)
images = sorted(glob(osp.join(root, '*.ppm')))
flows = sorted(glob(osp.join(root, '*.flo')))
assert len(images) // 2 == len(flows)
split_list = np.loadtxt('chairs_split.txt', dtype=np.int32)
for i in range(len(flows)):
exid = split_list[i]
if (split == 'training' and exid == 1) or (
split == 'validation' and exid == 2
):
self.flow_list += [flows[i]]
self.image_list += [[images[2 * i], images[2 * i + 1]]]
class FlyingThings3D(FlowDataset):
"""FlyingThings3D dataset."""
def __init__(
self,
aug_params=None,
root='datasets/FlyingThings3D',
dstype='frames_cleanpass',
):
super(FlyingThings3D, self).__init__(aug_params)
for cam in ['left']:
for direction in ['into_future', 'into_past']:
image_dirs = sorted(glob(osp.join(root, dstype, 'TRAIN/*/*')))
image_dirs = sorted([osp.join(f, cam) for f in image_dirs])
flow_dirs = sorted(glob(osp.join(root, 'optical_flow/TRAIN/*/*')))
flow_dirs = sorted([osp.join(f, direction, cam) for f in flow_dirs])
for idir, fdir in zip(image_dirs, flow_dirs):
images = sorted(glob(osp.join(idir, '*.png')))
flows = sorted(glob(osp.join(fdir, '*.pfm')))
for i in range(len(flows) - 1):
if direction == 'into_future':
self.image_list += [[images[i], images[i + 1]]]
self.flow_list += [flows[i]]
elif direction == 'into_past':
self.image_list += [[images[i + 1], images[i]]]
self.flow_list += [flows[i + 1]]
class KITTI(FlowDataset):
"""KITTI dataset."""
def __init__(self, aug_params=None, split='training', root='datasets/KITTI'):
super(KITTI, self).__init__(aug_params, sparse=True)
if split == 'testing':
self.is_test = True
root = osp.join(root, split)
images1 = sorted(glob(osp.join(root, 'image_2/*_10.png')))
images2 = sorted(glob(osp.join(root, 'image_2/*_11.png')))
for img1, img2 in zip(images1, images2):
frame_id = img1.split('/')[-1]
self.extra_info += [[frame_id]]
self.image_list += [[img1, img2]]
if split == 'training':
self.flow_list = sorted(glob(osp.join(root, 'flow_occ/*_10.png')))
class HD1K(FlowDataset):
"""HD1K dataset."""
def __init__(self, aug_params=None, root='datasets/HD1k'):
super(HD1K, self).__init__(aug_params, sparse=True)
seq_ix = 0
while 1:
flows = sorted(
glob(
os.path.join(root, 'hd1k_flow_gt', 'flow_occ/%06d_*.png' % seq_ix)
)
)
images = sorted(
glob(os.path.join(root, 'hd1k_input', 'image_2/%06d_*.png' % seq_ix))
)
if not flows:
break
for i in range(len(flows) - 1):
self.flow_list += [flows[i]]
self.image_list += [[images[i], images[i + 1]]]
seq_ix += 1
# pylint: disable=invalid-name
def fetch_dataloader(args, TRAIN_DS='C+T+K+S+H'):
"""Create the data loader for the corresponding training set."""
if args.stage == 'chairs':
aug_params = {
'crop_size': args.image_size,
'min_scale': -0.1,
'max_scale': 1.0,
'do_flip': True,
}
train_dataset = FlyingChairs(aug_params, split='training')
elif args.stage == 'things':
aug_params = {
'crop_size': args.image_size,
'min_scale': -0.4,
'max_scale': 0.8,
'do_flip': True,
}
clean_dataset = FlyingThings3D(aug_params, dstype='frames_cleanpass')
final_dataset = FlyingThings3D(aug_params, dstype='frames_finalpass')
train_dataset = clean_dataset + final_dataset
elif args.stage == 'sintel':
aug_params = {
'crop_size': args.image_size,
'min_scale': -0.2,
'max_scale': 0.6,
'do_flip': True,
}
things = FlyingThings3D(aug_params, dstype='frames_cleanpass')
sintel_clean = MpiSintel(aug_params, split='training', dstype='clean')
sintel_final = MpiSintel(aug_params, split='training', dstype='final')
if TRAIN_DS == 'C+T+K+S+H':
kitti = KITTI({
'crop_size': args.image_size,
'min_scale': -0.3,
'max_scale': 0.5,
'do_flip': True,
})
hd1k = HD1K({
'crop_size': args.image_size,
'min_scale': -0.5,
'max_scale': 0.2,
'do_flip': True,
})
train_dataset = (
100 * sintel_clean
+ 100 * sintel_final
+ 200 * kitti
+ 5 * hd1k
+ things
)
elif TRAIN_DS == 'C+T+K/S':
train_dataset = 100 * sintel_clean + 100 * sintel_final + things
else:
raise ValueError('Unknown split: %s' % TRAIN_DS)
elif args.stage == 'kitti':
aug_params = {
'crop_size': args.image_size,
'min_scale': -0.2,
'max_scale': 0.4,
'do_flip': False,
}
train_dataset = KITTI(aug_params, split='training')
else:
raise ValueError('Unknown training set: %s' % args.stage)
train_loader = data.DataLoader(
train_dataset,
batch_size=args.batch_size,
pin_memory=False,
shuffle=True,
num_workers=4,
drop_last=True,
)
print('Training with %d image pairs' % len(train_dataset))
return train_loader
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/extractor.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Network layer classes for MegaSaM."""
import torch
from torch import nn
class ResidualBlock(nn.Module):
"""Residual block for MegaSaM."""
def __init__(self, in_planes, planes, norm_fn='group', stride=1):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv2d(
in_planes, planes, kernel_size=3, padding=1, stride=stride
)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
self.relu = nn.ReLU(inplace=True)
num_groups = planes // 8
if norm_fn == 'group':
self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
if stride != 1:
self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
elif norm_fn == 'batch':
self.norm1 = nn.BatchNorm2d(planes)
self.norm2 = nn.BatchNorm2d(planes)
if stride != 1:
self.norm3 = nn.BatchNorm2d(planes)
elif norm_fn == 'instance':
self.norm1 = nn.InstanceNorm2d(planes)
self.norm2 = nn.InstanceNorm2d(planes)
if stride != 1:
self.norm3 = nn.InstanceNorm2d(planes)
elif norm_fn == 'none':
self.norm1 = nn.Sequential()
self.norm2 = nn.Sequential()
if stride != 1:
self.norm3 = nn.Sequential()
if stride == 1:
self.downsample = None
else:
self.downsample = nn.Sequential(
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3
)
def forward(self, x):
y = x
y = self.relu(self.norm1(self.conv1(y)))
y = self.relu(self.norm2(self.conv2(y)))
if self.downsample is not None:
x = self.downsample(x)
return self.relu(x + y)
class BottleneckBlock(nn.Module):
"""Bottleneck block for MegaSaM."""
def __init__(self, in_planes, planes, norm_fn='group', stride=1):
super(BottleneckBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes // 4, kernel_size=1, padding=0)
self.conv2 = nn.Conv2d(
planes // 4, planes // 4, kernel_size=3, padding=1, stride=stride
)
self.conv3 = nn.Conv2d(planes // 4, planes, kernel_size=1, padding=0)
self.relu = nn.ReLU(inplace=True)
num_groups = planes // 8
if norm_fn == 'group':
self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes // 4)
self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes // 4)
self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
if stride != 1:
self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
elif norm_fn == 'batch':
self.norm1 = nn.BatchNorm2d(planes // 4)
self.norm2 = nn.BatchNorm2d(planes // 4)
self.norm3 = nn.BatchNorm2d(planes)
if stride != 1:
self.norm4 = nn.BatchNorm2d(planes)
elif norm_fn == 'instance':
self.norm1 = nn.InstanceNorm2d(planes // 4)
self.norm2 = nn.InstanceNorm2d(planes // 4)
self.norm3 = nn.InstanceNorm2d(planes)
if stride != 1:
self.norm4 = nn.InstanceNorm2d(planes)
elif norm_fn == 'none':
self.norm1 = nn.Sequential()
self.norm2 = nn.Sequential()
self.norm3 = nn.Sequential()
if stride != 1:
self.norm4 = nn.Sequential()
if stride == 1:
self.downsample = None
else:
self.downsample = nn.Sequential(
nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4
)
def forward(self, x):
y = x
y = self.relu(self.norm1(self.conv1(y)))
y = self.relu(self.norm2(self.conv2(y)))
y = self.relu(self.norm3(self.conv3(y)))
if self.downsample is not None:
x = self.downsample(x)
return self.relu(x + y)
class BasicEncoder(nn.Module):
"""Basic encoder for MegaSaM."""
def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
super(BasicEncoder, self).__init__()
self.norm_fn = norm_fn
if self.norm_fn == 'group':
self.norm1 = nn.GroupNorm(num_groups=8, num_channels=64)
elif self.norm_fn == 'batch':
self.norm1 = nn.BatchNorm2d(64)
elif self.norm_fn == 'instance':
self.norm1 = nn.InstanceNorm2d(64)
elif self.norm_fn == 'none':
self.norm1 = nn.Sequential()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
self.relu1 = nn.ReLU(inplace=True)
self.in_planes = 64
self.layer1 = self._make_layer(64, stride=1)
self.layer2 = self._make_layer(96, stride=2)
self.layer3 = self._make_layer(128, stride=2)
# output convolution
self.conv2 = nn.Conv2d(128, output_dim, kernel_size=1)
self.dropout = None
if dropout > 0:
self.dropout = nn.Dropout2d(p=dropout)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
if m.weight is not None:
nn.init.constant_(m.weight, 1)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def _make_layer(self, dim, stride=1):
layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
layers = (layer1, layer2)
self.in_planes = dim
return nn.Sequential(*layers)
def forward(self, x):
# if input is list, combine batch dimension
is_list = isinstance(x, tuple) or isinstance(x, list)
if is_list:
batch_dim = x[0].shape[0]
x = torch.cat(x, dim=0)
x = self.conv1(x)
x = self.norm1(x)
x = self.relu1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.conv2(x)
if self.training and self.dropout is not None:
x = self.dropout(x)
if is_list:
x = torch.split(x, [batch_dim, batch_dim], dim=0) # pylint: disable=undefined-variable
return x
class SmallEncoder(nn.Module):
"""Small encoder for MegaSaM."""
def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
super(SmallEncoder, self).__init__()
self.norm_fn = norm_fn
if self.norm_fn == 'group':
self.norm1 = nn.GroupNorm(num_groups=8, num_channels=32)
elif self.norm_fn == 'batch':
self.norm1 = nn.BatchNorm2d(32)
elif self.norm_fn == 'instance':
self.norm1 = nn.InstanceNorm2d(32)
elif self.norm_fn == 'none':
self.norm1 = nn.Sequential()
self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3)
self.relu1 = nn.ReLU(inplace=True)
self.in_planes = 32
self.layer1 = self._make_layer(32, stride=1)
self.layer2 = self._make_layer(64, stride=2)
self.layer3 = self._make_layer(96, stride=2)
self.dropout = None
if dropout > 0:
self.dropout = nn.Dropout2d(p=dropout)
self.conv2 = nn.Conv2d(96, output_dim, kernel_size=1)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
if m.weight is not None:
nn.init.constant_(m.weight, 1)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
def _make_layer(self, dim, stride=1):
layer1 = BottleneckBlock(self.in_planes, dim, self.norm_fn, stride=stride)
layer2 = BottleneckBlock(dim, dim, self.norm_fn, stride=1)
layers = (layer1, layer2)
self.in_planes = dim
return nn.Sequential(*layers)
def forward(self, x):
# if input is list, combine batch dimension
is_list = isinstance(x, tuple) or isinstance(x, list)
if is_list:
batch_dim = x[0].shape[0]
x = torch.cat(x, dim=0)
x = self.conv1(x)
x = self.norm1(x)
x = self.relu1(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.conv2(x)
if self.training and self.dropout is not None:
x = self.dropout(x)
if is_list:
x = torch.split(x, [batch_dim, batch_dim], dim=0) # pylint: disable=undefined-variable
return x
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/raft.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""RAFT network for MegaSaM."""
from .corr import AlternateCorrBlock
from .corr import CorrBlock
from .extractor import BasicEncoder
from .extractor import SmallEncoder
import torch
from torch import nn
import torch.nn.functional as F
from .update import BasicUpdateBlock
from .update import SmallUpdateBlock
from .utils.utils import coords_grid
from .utils.utils import upflow8
try:
autocast = torch.cuda.amp.autocast
except: # pylint: disable=bare-except
# dummy autocast for PyTorch < 1.6
class autocast: # pylint: disable=invalid-name
def __init__(self, enabled):
pass
def __enter__(self):
pass
def __exit__(self, *args):
pass
class RAFT(nn.Module):
"""RAFT network for MegaSaM."""
def __init__(self, args):
super(RAFT, self).__init__()
self.args = args
self.mixed_precision = True
if args.small:
self.hidden_dim = hdim = 96
self.context_dim = cdim = 64
args.corr_levels = 4
args.corr_radius = 3
else:
self.hidden_dim = hdim = 128
self.context_dim = cdim = 128
args.corr_levels = 4
args.corr_radius = 4
if 'dropout' not in self.args:
self.args.dropout = 0
if 'alternate_corr' not in self.args:
self.args.alternate_corr = False
# feature network, context network, and update block
if args.small:
self.fnet = SmallEncoder(
output_dim=128, norm_fn='instance', dropout=args.dropout
)
self.cnet = SmallEncoder(
output_dim=hdim + cdim, norm_fn='none', dropout=args.dropout
)
self.update_block = SmallUpdateBlock(self.args, hidden_dim=hdim)
else:
self.fnet = BasicEncoder(
output_dim=256, norm_fn='instance', dropout=args.dropout
)
self.cnet = BasicEncoder(
output_dim=hdim + cdim, norm_fn='batch', dropout=args.dropout
)
self.update_block = BasicUpdateBlock(self.args, hidden_dim=hdim)
def freeze_bn(self):
for m in self.modules():
if isinstance(m, nn.BatchNorm2d):
m.eval()
def initialize_flow(self, img):
"""Flow is represented as difference between two coordinate grids flow = coords1 - coords0."""
# pylint: disable=invalid-name
N, _, H, W = img.shape
coords0 = coords_grid(N, H // 8, W // 8).to(img.device)
coords1 = coords_grid(N, H // 8, W // 8).to(img.device)
# optical flow computed as difference: flow = coords1 - coords0
return coords0, coords1
def upsample_flow(self, flow, mask):
"""Upsample flow field [H/8, W/8, 2] -> [H, W, 2] using convex combination."""
# pylint: disable=invalid-name
N, _, H, W = flow.shape
mask = mask.view(N, 1, 9, 8, 8, H, W)
mask = torch.softmax(mask, dim=2)
up_flow = F.unfold(8 * flow, [3, 3], padding=1)
up_flow = up_flow.view(N, 2, 9, 1, 1, H, W)
up_flow = torch.sum(mask * up_flow, dim=2)
up_flow = up_flow.permute(0, 1, 4, 2, 5, 3)
return up_flow.reshape(N, 2, 8 * H, 8 * W)
def forward(
self,
image1,
image2,
iters=12,
flow_init=None,
upsample=True,
test_mode=False,
):
"""Estimate optical flow between pair of frames."""
image1 = 2 * (image1 / 255.0) - 1.0
image2 = 2 * (image2 / 255.0) - 1.0
image1 = image1.contiguous()
image2 = image2.contiguous()
hdim = self.hidden_dim
cdim = self.context_dim
# run the feature network
with autocast(enabled=self.mixed_precision):
fmap1, fmap2 = self.fnet([image1, image2])
fmap1 = fmap1.float()
fmap2 = fmap2.float()
if self.args.alternate_corr:
corr_fn = AlternateCorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
else:
corr_fn = CorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
# run the context network
with autocast(enabled=self.mixed_precision):
cnet = self.cnet(image1)
net, inp = torch.split(cnet, [hdim, cdim], dim=1)
net = torch.tanh(net)
inp = torch.relu(inp)
coords0, coords1 = self.initialize_flow(image1)
if flow_init is not None:
coords1 = coords1 + flow_init
flow_predictions = []
flow_up = None
for _ in range(iters):
coords1 = coords1.detach()
corr = corr_fn(coords1) # index correlation volume
flow = coords1 - coords0
with autocast(enabled=self.mixed_precision):
net, up_mask, delta_flow = self.update_block(net, inp, corr, flow)
# F(t+1) = F(t) + \Delta(t)
coords1 = coords1 + delta_flow
# upsample predictions
if up_mask is None:
flow_up = upflow8(coords1 - coords0)
else:
flow_up = self.upsample_flow(coords1 - coords0, up_mask)
flow_predictions.append(flow_up)
if test_mode:
if flow_up is None:
raise ValueError('flow_up is None')
return coords1 - coords0, flow_up, net
return flow_predictions
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/update.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Update block for consistent video depth optimization."""
import torch
from torch import nn
import torch.nn.functional as F
class FlowHead(nn.Module):
def __init__(self, input_dim=128, hidden_dim=256):
super(FlowHead, self).__init__()
self.conv1 = nn.Conv2d(input_dim, hidden_dim, 3, padding=1)
self.conv2 = nn.Conv2d(hidden_dim, 2, 3, padding=1)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
return self.conv2(self.relu(self.conv1(x)))
class ConvGRU(nn.Module):
"""GRU with convolution."""
def __init__(self, hidden_dim=128, input_dim=192 + 128):
super(ConvGRU, self).__init__()
self.convz = nn.Conv2d(hidden_dim + input_dim, hidden_dim, 3, padding=1)
self.convr = nn.Conv2d(hidden_dim + input_dim, hidden_dim, 3, padding=1)
self.convq = nn.Conv2d(hidden_dim + input_dim, hidden_dim, 3, padding=1)
def forward(self, h, x):
hx = torch.cat([h, x], dim=1)
z = torch.sigmoid(self.convz(hx))
r = torch.sigmoid(self.convr(hx))
q = torch.tanh(self.convq(torch.cat([r * h, x], dim=1)))
h = (1 - z) * h + z * q
return h
class SepConvGRU(nn.Module):
"""GRU with separate convolution for horizontal and vertical directions."""
def __init__(self, hidden_dim=128, input_dim=192 + 128):
super(SepConvGRU, self).__init__()
self.convz1 = nn.Conv2d(
hidden_dim + input_dim, hidden_dim, (1, 5), padding=(0, 2)
)
self.convr1 = nn.Conv2d(
hidden_dim + input_dim, hidden_dim, (1, 5), padding=(0, 2)
)
self.convq1 = nn.Conv2d(
hidden_dim + input_dim, hidden_dim, (1, 5), padding=(0, 2)
)
self.convz2 = nn.Conv2d(
hidden_dim + input_dim, hidden_dim, (5, 1), padding=(2, 0)
)
self.convr2 = nn.Conv2d(
hidden_dim + input_dim, hidden_dim, (5, 1), padding=(2, 0)
)
self.convq2 = nn.Conv2d(
hidden_dim + input_dim, hidden_dim, (5, 1), padding=(2, 0)
)
def forward(self, h, x):
# horizontal
hx = torch.cat([h, x], dim=1)
z = torch.sigmoid(self.convz1(hx))
r = torch.sigmoid(self.convr1(hx))
q = torch.tanh(self.convq1(torch.cat([r * h, x], dim=1)))
h = (1 - z) * h + z * q
# vertical
hx = torch.cat([h, x], dim=1)
z = torch.sigmoid(self.convz2(hx))
r = torch.sigmoid(self.convr2(hx))
q = torch.tanh(self.convq2(torch.cat([r * h, x], dim=1)))
h = (1 - z) * h + z * q
return h
class SmallMotionEncoder(nn.Module):
"""Small motion encoder for MegaSaM."""
def __init__(self, args):
super(SmallMotionEncoder, self).__init__()
cor_planes = args.corr_levels * (2 * args.corr_radius + 1) ** 2
self.convc1 = nn.Conv2d(cor_planes, 96, 1, padding=0)
self.convf1 = nn.Conv2d(2, 64, 7, padding=3)
self.convf2 = nn.Conv2d(64, 32, 3, padding=1)
self.conv = nn.Conv2d(128, 80, 3, padding=1)
def forward(self, flow, corr):
cor = F.relu(self.convc1(corr))
flo = F.relu(self.convf1(flow))
flo = F.relu(self.convf2(flo))
cor_flo = torch.cat([cor, flo], dim=1)
out = F.relu(self.conv(cor_flo))
return torch.cat([out, flow], dim=1)
class BasicMotionEncoder(nn.Module):
"""Basic motion encoder for MegaSaM."""
def __init__(self, args):
super(BasicMotionEncoder, self).__init__()
cor_planes = args.corr_levels * (2 * args.corr_radius + 1) ** 2
self.convc1 = nn.Conv2d(cor_planes, 256, 1, padding=0)
self.convc2 = nn.Conv2d(256, 192, 3, padding=1)
self.convf1 = nn.Conv2d(2, 128, 7, padding=3)
self.convf2 = nn.Conv2d(128, 64, 3, padding=1)
self.conv = nn.Conv2d(64 + 192, 128 - 2, 3, padding=1)
def forward(self, flow, corr):
cor = F.relu(self.convc1(corr))
cor = F.relu(self.convc2(cor))
flo = F.relu(self.convf1(flow))
flo = F.relu(self.convf2(flo))
cor_flo = torch.cat([cor, flo], dim=1)
out = F.relu(self.conv(cor_flo))
return torch.cat([out, flow], dim=1)
class SmallUpdateBlock(nn.Module):
"""Small update block for MegaSaM."""
def __init__(self, args, hidden_dim=96):
super(SmallUpdateBlock, self).__init__()
self.encoder = SmallMotionEncoder(args)
self.gru = ConvGRU(hidden_dim=hidden_dim, input_dim=82 + 64)
self.flow_head = FlowHead(hidden_dim, hidden_dim=128)
def forward(self, net, inp, corr, flow):
motion_features = self.encoder(flow, corr)
inp = torch.cat([inp, motion_features], dim=1)
net = self.gru(net, inp)
delta_flow = self.flow_head(net)
return net, None, delta_flow
class BasicUpdateBlock(nn.Module):
"""Basic update block for MegaSaM."""
def __init__(self, args, hidden_dim=128, input_dim=128):
super(BasicUpdateBlock, self).__init__()
self.args = args
self.encoder = BasicMotionEncoder(args)
self.gru = SepConvGRU(hidden_dim=hidden_dim, input_dim=128 + hidden_dim)
self.flow_head = FlowHead(hidden_dim, hidden_dim=256)
self.mask = nn.Sequential(
nn.Conv2d(128, 256, 3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 64 * 9, 1, padding=0),
)
def forward(self, net, inp, corr, flow, upsample=True):
motion_features = self.encoder(flow, corr)
inp = torch.cat([inp, motion_features], dim=1)
net = self.gru(net, inp)
delta_flow = self.flow_head(net)
# scale mask to balence gradients
mask = 0.25 * self.mask(net)
return net, mask, delta_flow
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/utils/__init__.py
================================================
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/utils/augmentor.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Augmentation utils for MegaSaM."""
# pylint: disable=g-import-not-at-top
# pylint: disable=g-importing-member
import cv2
import numpy as np
from PIL import Image
cv2.setNumThreads(0)
cv2.ocl.setUseOpenCL(False)
from torchvision.transforms import ColorJitter
class FlowAugmentor:
"""Augmentation for flow for MegaSaM."""
def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=True):
# spatial augmentation params
self.crop_size = crop_size
self.min_scale = min_scale
self.max_scale = max_scale
self.spatial_aug_prob = 0.8
self.stretch_prob = 0.8
self.max_stretch = 0.2
# flip augmentation params
self.do_flip = do_flip
self.h_flip_prob = 0.5
self.v_flip_prob = 0.1
# photometric augmentation params
self.photo_aug = ColorJitter(
brightness=0.4, contrast=0.4, saturation=0.4, hue=0.5 / 3.14
)
self.asymmetric_color_aug_prob = 0.2
self.eraser_aug_prob = 0.5
def color_transform(self, img1, img2):
"""Photometric augmentation."""
# asymmetric
if np.random.rand() < self.asymmetric_color_aug_prob:
img1 = np.array(self.photo_aug(Image.fromarray(img1)), dtype=np.uint8)
img2 = np.array(self.photo_aug(Image.fromarray(img2)), dtype=np.uint8)
# symmetric
else:
image_stack = np.concatenate([img1, img2], axis=0)
image_stack = np.array(
self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8
)
img1, img2 = np.split(image_stack, 2, axis=0)
return img1, img2
def eraser_transform(self, img1, img2, bounds=[50, 100]): # pylint: disable=dangerous-default-value
"""Occlusion augmentation."""
ht, wd = img1.shape[:2]
if np.random.rand() < self.eraser_aug_prob:
mean_color = np.mean(img2.reshape(-1, 3), axis=0)
for _ in range(np.random.randint(1, 3)):
x0 = np.random.randint(0, wd)
y0 = np.random.randint(0, ht)
dx = np.random.randint(bounds[0], bounds[1])
dy = np.random.randint(bounds[0], bounds[1])
img2[y0 : y0 + dy, x0 : x0 + dx, :] = mean_color
return img1, img2
def spatial_transform(self, img1, img2, flow):
"""Spatial augmentation."""
# randomly sample scale
ht, wd = img1.shape[:2]
min_scale = np.maximum(
(self.crop_size[0] + 8) / float(ht), (self.crop_size[1] + 8) / float(wd)
)
scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
scale_x = scale
scale_y = scale
if np.random.rand() < self.stretch_prob:
scale_x *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
scale_y *= 2 ** np.random.uniform(-self.max_stretch, self.max_stretch)
scale_x = np.clip(scale_x, min_scale, None)
scale_y = np.clip(scale_y, min_scale, None)
if np.random.rand() < self.spatial_aug_prob:
# rescale the images
img1 = cv2.resize(
img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR
)
img2 = cv2.resize(
img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR
)
flow = cv2.resize(
flow, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR
)
flow = flow * [scale_x, scale_y]
if self.do_flip:
if np.random.rand() < self.h_flip_prob: # h-flip
img1 = img1[:, ::-1]
img2 = img2[:, ::-1]
flow = flow[:, ::-1] * [-1.0, 1.0]
if np.random.rand() < self.v_flip_prob: # v-flip
img1 = img1[::-1, :]
img2 = img2[::-1, :]
flow = flow[::-1, :] * [1.0, -1.0]
y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0])
x0 = np.random.randint(0, img1.shape[1] - self.crop_size[1])
img1 = img1[y0 : y0 + self.crop_size[0], x0 : x0 + self.crop_size[1]]
img2 = img2[y0 : y0 + self.crop_size[0], x0 : x0 + self.crop_size[1]]
flow = flow[y0 : y0 + self.crop_size[0], x0 : x0 + self.crop_size[1]]
return img1, img2, flow
def __call__(self, img1, img2, flow):
img1, img2 = self.color_transform(img1, img2)
img1, img2 = self.eraser_transform(img1, img2)
img1, img2, flow = self.spatial_transform(img1, img2, flow)
img1 = np.ascontiguousarray(img1)
img2 = np.ascontiguousarray(img2)
flow = np.ascontiguousarray(flow)
return img1, img2, flow
class SparseFlowAugmentor:
"""Augmentation for sparse flow for MegaSaM."""
def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=False):
# spatial augmentation params
self.crop_size = crop_size
self.min_scale = min_scale
self.max_scale = max_scale
self.spatial_aug_prob = 0.8
self.stretch_prob = 0.8
self.max_stretch = 0.2
# flip augmentation params
self.do_flip = do_flip
self.h_flip_prob = 0.5
self.v_flip_prob = 0.1
# photometric augmentation params
self.photo_aug = ColorJitter(
brightness=0.3, contrast=0.3, saturation=0.3, hue=0.3 / 3.14
)
self.asymmetric_color_aug_prob = 0.2
self.eraser_aug_prob = 0.5
def color_transform(self, img1, img2):
image_stack = np.concatenate([img1, img2], axis=0)
image_stack = np.array(
self.photo_aug(Image.fromarray(image_stack)), dtype=np.uint8
)
img1, img2 = np.split(image_stack, 2, axis=0)
return img1, img2
def eraser_transform(self, img1, img2):
ht, wd = img1.shape[:2]
if np.random.rand() < self.eraser_aug_prob:
mean_color = np.mean(img2.reshape(-1, 3), axis=0)
for _ in range(np.random.randint(1, 3)):
x0 = np.random.randint(0, wd)
y0 = np.random.randint(0, ht)
dx = np.random.randint(50, 100)
dy = np.random.randint(50, 100)
img2[y0 : y0 + dy, x0 : x0 + dx, :] = mean_color
return img1, img2
def resize_sparse_flow_map(self, flow, valid, fx=1.0, fy=1.0):
"""Resize sparse flow map."""
ht, wd = flow.shape[:2]
coords = np.meshgrid(np.arange(wd), np.arange(ht))
coords = np.stack(coords, axis=-1)
coords = coords.reshape(-1, 2).astype(np.float32)
flow = flow.reshape(-1, 2).astype(np.float32)
valid = valid.reshape(-1).astype(np.float32)
coords0 = coords[valid >= 1]
flow0 = flow[valid >= 1]
ht1 = int(round(ht * fy))
wd1 = int(round(wd * fx))
coords1 = coords0 * [fx, fy]
flow1 = flow0 * [fx, fy]
xx = np.round(coords1[:, 0]).astype(np.int32)
yy = np.round(coords1[:, 1]).astype(np.int32)
v = (xx > 0) & (xx < wd1) & (yy > 0) & (yy < ht1)
xx = xx[v]
yy = yy[v]
flow1 = flow1[v]
flow_img = np.zeros([ht1, wd1, 2], dtype=np.float32)
valid_img = np.zeros([ht1, wd1], dtype=np.int32)
flow_img[yy, xx] = flow1
valid_img[yy, xx] = 1
return flow_img, valid_img
def spatial_transform(self, img1, img2, flow, valid):
"""Randomly sample scale and apply it to images and flow map."""
ht, wd = img1.shape[:2]
min_scale = np.maximum(
(self.crop_size[0] + 1) / float(ht), (self.crop_size[1] + 1) / float(wd)
)
scale = 2 ** np.random.uniform(self.min_scale, self.max_scale)
scale_x = np.clip(scale, min_scale, None)
scale_y = np.clip(scale, min_scale, None)
if np.random.rand() < self.spatial_aug_prob:
# rescale the images
img1 = cv2.resize(
img1, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR
)
img2 = cv2.resize(
img2, None, fx=scale_x, fy=scale_y, interpolation=cv2.INTER_LINEAR
)
flow, valid = self.resize_sparse_flow_map(
flow, valid, fx=scale_x, fy=scale_y
)
if self.do_flip:
if np.random.rand() < 0.5: # h-flip
img1 = img1[:, ::-1]
img2 = img2[:, ::-1]
flow = flow[:, ::-1] * [-1.0, 1.0]
valid = valid[:, ::-1]
margin_y = 20
margin_x = 50
y0 = np.random.randint(0, img1.shape[0] - self.crop_size[0] + margin_y)
x0 = np.random.randint(
-margin_x, img1.shape[1] - self.crop_size[1] + margin_x
)
y0 = np.clip(y0, 0, img1.shape[0] - self.crop_size[0])
x0 = np.clip(x0, 0, img1.shape[1] - self.crop_size[1])
img1 = img1[y0 : y0 + self.crop_size[0], x0 : x0 + self.crop_size[1]]
img2 = img2[y0 : y0 + self.crop_size[0], x0 : x0 + self.crop_size[1]]
flow = flow[y0 : y0 + self.crop_size[0], x0 : x0 + self.crop_size[1]]
valid = valid[y0 : y0 + self.crop_size[0], x0 : x0 + self.crop_size[1]]
return img1, img2, flow, valid
def __call__(self, img1, img2, flow, valid):
img1, img2 = self.color_transform(img1, img2)
img1, img2 = self.eraser_transform(img1, img2)
img1, img2, flow, valid = self.spatial_transform(img1, img2, flow, valid)
img1 = np.ascontiguousarray(img1)
img2 = np.ascontiguousarray(img2)
flow = np.ascontiguousarray(flow)
valid = np.ascontiguousarray(valid)
return img1, img2, flow, valid
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/utils/flow_viz.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Flow visualization code.
Based on https://github.com/tomrunia/OpticalFlow_Visualization
"""
import numpy as np
def make_colorwheel():
"""Generates a color wheel for optical flow visualization.
Baker et al. "A Database and Evaluation Methodology for Optical Flow"
(ICCV, 2007)
URL: http://vision.middlebury.edu/flow/flowEval-iccv07.pdf
Code follows the original C++ source code of Daniel Scharstein.
Code follows the the Matlab source code of Deqing Sun.
Returns:
np.ndarray: Color wheel
"""
# pylint: disable=invalid-name
RY = 15
YG = 6
GC = 4
CB = 11
BM = 13
MR = 6
ncols = RY + YG + GC + CB + BM + MR
colorwheel = np.zeros((ncols, 3))
col = 0
# RY
colorwheel[0:RY, 0] = 255
colorwheel[0:RY, 1] = np.floor(255 * np.arange(0, RY) / RY)
col = col + RY
# YG
colorwheel[col : col + YG, 0] = 255 - np.floor(255 * np.arange(0, YG) / YG)
colorwheel[col : col + YG, 1] = 255
col = col + YG
# GC
colorwheel[col : col + GC, 1] = 255
colorwheel[col : col + GC, 2] = np.floor(255 * np.arange(0, GC) / GC)
col = col + GC
# CB
colorwheel[col : col + CB, 1] = 255 - np.floor(255 * np.arange(CB) / CB)
colorwheel[col : col + CB, 2] = 255
col = col + CB
# BM
colorwheel[col : col + BM, 2] = 255
colorwheel[col : col + BM, 0] = np.floor(255 * np.arange(0, BM) / BM)
col = col + BM
# MR
colorwheel[col : col + MR, 2] = 255 - np.floor(255 * np.arange(MR) / MR)
colorwheel[col : col + MR, 0] = 255
return colorwheel
def flow_uv_to_colors(u, v, convert_to_bgr=False):
"""Applies the flow color wheel to (possibly clipped) flow components u and v.
According to the C++ source code of Daniel Scharstein
According to the Matlab source code of Deqing Sun
Args:
u (np.ndarray): Input horizontal flow of shape [H,W]
v (np.ndarray): Input vertical flow of shape [H,W]
convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to
False.
Returns:
np.ndarray: Flow visualization image of shape [H,W,3]
"""
flow_image = np.zeros((u.shape[0], u.shape[1], 3), np.uint8)
colorwheel = make_colorwheel() # shape [55x3]
ncols = colorwheel.shape[0]
rad = np.sqrt(np.square(u) + np.square(v))
a = np.arctan2(-v, -u) / np.pi
fk = (a + 1) / 2 * (ncols - 1)
k0 = np.floor(fk).astype(np.int32)
k1 = k0 + 1
k1[k1 == ncols] = 0
f = fk - k0
for i in range(colorwheel.shape[1]):
tmp = colorwheel[:, i]
col0 = tmp[k0] / 255.0
col1 = tmp[k1] / 255.0
col = (1 - f) * col0 + f * col1
idx = rad <= 1
col[idx] = 1 - rad[idx] * (1 - col[idx])
col[~idx] = col[~idx] * 0.75 # out of range
# Note the 2-i => BGR instead of RGB
ch_idx = 2 - i if convert_to_bgr else i
flow_image[:, :, ch_idx] = np.floor(255 * col)
return flow_image
def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False):
"""Expects a two dimensional flow image of shape.
Args:
flow_uv (np.ndarray): Flow UV image of shape [H,W,2]
clip_flow (float, optional): Clip maximum of flow values. Defaults to
None.
convert_to_bgr (bool, optional): Convert output image to BGR. Defaults to
False.
Returns:
np.ndarray: Flow visualization image of shape [H,W,3]
"""
assert flow_uv.ndim == 3, 'input flow must have three dimensions'
assert flow_uv.shape[2] == 2, 'input flow must have shape [H,W,2]'
if clip_flow is not None:
flow_uv = np.clip(flow_uv, 0, clip_flow)
u = flow_uv[:, :, 0]
v = flow_uv[:, :, 1]
rad = np.sqrt(np.square(u) + np.square(v))
rad_max = np.max(rad)
epsilon = 1e-5
u = u / (rad_max + epsilon)
v = v / (rad_max + epsilon)
return flow_uv_to_colors(u, v, convert_to_bgr)
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/utils/frame_utils.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Frame utils for MegaSaM."""
# pylint: disable=invalid-name
# pylint: disable=g-doc-args
# pylint: disable=broad-exception-raised
import os
import re
import cv2
import numpy as np
from PIL import Image
cv2.setNumThreads(0)
cv2.ocl.setUseOpenCL(False)
TAG_CHAR = np.array([202021.25], np.float32)
def readFlow(fn):
"""Read .flo file in Middlebury format."""
# Code adapted from:
# http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
# WARNING: this will work on little-endian architectures (eg Intel x86) only!
# print 'fn = %s'%(fn)
with open(fn, 'rb') as f:
magic = np.fromfile(f, np.float32, count=1)
if 202021.25 != magic:
print('Magic number incorrect. Invalid .flo file')
return None
else:
w = np.fromfile(f, np.int32, count=1)
h = np.fromfile(f, np.int32, count=1)
# print 'Reading %d x %d flo file\n' % (w, h)
data = np.fromfile(f, np.float32, count=2 * int(w) * int(h))
# Reshape data into 3D array (columns, rows, bands)
# The reshape here is for visualization, the original code is (w,h,2)
return np.resize(data, (int(h), int(w), 2))
def readPFM(file):
"""Read PFM file."""
file = open(file, 'rb')
header = file.readline().rstrip()
if header == b'PF':
color = True
elif header == b'Pf':
color = False
else:
raise Exception('Not a PFM file.')
dim_match = re.match(rb'^(\d+)\s(\d+)\s$', file.readline())
if dim_match:
width, height = map(int, dim_match.groups())
else:
raise Exception('Malformed PFM header.')
scale = float(file.readline().rstrip())
if scale < 0: # little-endian
endian = '<'
else:
endian = '>' # big-endian
data = np.fromfile(file, endian + 'f')
shape = (height, width, 3) if color else (height, width)
data = np.reshape(data, shape)
data = np.flipud(data)
return data
def writeFlow(filename, uv, v=None):
"""Write optical flow to file.
If v is None, uv is assumed to contain both u and v channels,
stacked in depth.
Original code by Deqing Sun, adapted from Daniel Scharstein.
"""
nBands = 2
if v is None:
assert uv.ndim == 3
assert uv.shape[2] == 2
u = uv[:, :, 0]
v = uv[:, :, 1]
else:
u = uv
assert u.shape == v.shape
height, width = u.shape
f = open(filename, 'wb')
# write the header
f.write(TAG_CHAR)
np.array(width).astype(np.int32).tofile(f)
np.array(height).astype(np.int32).tofile(f)
# arrange into matrix form
tmp = np.zeros((height, width * nBands))
tmp[:, np.arange(width) * 2] = u
tmp[:, np.arange(width) * 2 + 1] = v
tmp.astype(np.float32).tofile(f)
f.close()
def readFlowKITTI(filename):
flow = cv2.imread(filename, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_COLOR)
flow = flow[:, :, ::-1].astype(np.float32)
flow, valid = flow[:, :, :2], flow[:, :, 2]
flow = (flow - 2**15) / 64.0
return flow, valid
def readDispKITTI(filename):
disp = cv2.imread(filename, cv2.IMREAD_ANYDEPTH) / 256.0
valid = disp > 0.0
flow = np.stack([-disp, np.zeros_like(disp)], -1)
return flow, valid
def writeFlowKITTI(filename, uv):
uv = 64.0 * uv + 2**15
valid = np.ones([uv.shape[0], uv.shape[1], 1])
uv = np.concatenate([uv, valid], axis=-1).astype(np.uint16)
cv2.imwrite(filename, uv[..., ::-1])
def read_gen(file_name, pil=False):
"""Read image or flow file."""
del pil
ext = os.path.splitext(file_name)[-1]
if ext == '.png' or ext == '.jpeg' or ext == '.ppm' or ext == '.jpg':
return Image.open(file_name)
elif ext == '.bin' or ext == '.raw':
return np.load(file_name)
elif ext == '.flo':
return readFlow(file_name).astype(np.float32) # pylint: disable=attribute-error
elif ext == '.pfm':
flow = readPFM(file_name).astype(np.float32)
if len(flow.shape) == 2:
return flow
else:
return flow[:, :, :-1]
return []
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/core/utils/utils.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for MegaSaM."""
# pylint: disable=invalid-name
import numpy as np
from scipy import interpolate
import torch
import torch.nn.functional as F
class InputPadder:
"""Pads images such that dimensions are divisible by 8."""
def __init__(self, dims, mode='sintel'):
self.ht, self.wd = dims[-2:]
pad_ht = (((self.ht // 8) + 1) * 8 - self.ht) % 8
pad_wd = (((self.wd // 8) + 1) * 8 - self.wd) % 8
if mode == 'sintel':
self._pad = [
pad_wd // 2,
pad_wd - pad_wd // 2,
pad_ht // 2,
pad_ht - pad_ht // 2,
]
else:
self._pad = [pad_wd // 2, pad_wd - pad_wd // 2, 0, pad_ht]
def pad(self, *inputs):
return [F.pad(x, self._pad, mode='replicate') for x in inputs]
def unpad(self, x):
ht, wd = x.shape[-2:]
c = [self._pad[2], ht - self._pad[3], self._pad[0], wd - self._pad[1]]
return x[..., c[0] : c[1], c[2] : c[3]]
def forward_interpolate(flow):
"""Interpolate flow map to match the original image size."""
flow = flow.detach().cpu().numpy()
dx, dy = flow[0], flow[1]
ht, wd = dx.shape
x0, y0 = np.meshgrid(np.arange(wd), np.arange(ht))
x1 = x0 + dx
y1 = y0 + dy
x1 = x1.reshape(-1)
y1 = y1.reshape(-1)
dx = dx.reshape(-1)
dy = dy.reshape(-1)
valid = (x1 > 0) & (x1 < wd) & (y1 > 0) & (y1 < ht)
x1 = x1[valid]
y1 = y1[valid]
dx = dx[valid]
dy = dy[valid]
flow_x = interpolate.griddata(
(x1, y1), dx, (x0, y0), method='nearest', fill_value=0
)
flow_y = interpolate.griddata(
(x1, y1), dy, (x0, y0), method='nearest', fill_value=0
)
flow = np.stack([flow_x, flow_y], axis=0)
return torch.from_numpy(flow).float()
def bilinear_sampler(img, coords, mode='bilinear', mask=False):
"""Wrapper for grid_sample, uses pixel coordinates."""
del mode
H, W = img.shape[-2:]
xgrid, ygrid = coords.split([1, 1], dim=-1)
xgrid = 2 * xgrid / (W - 1) - 1
ygrid = 2 * ygrid / (H - 1) - 1
grid = torch.cat([xgrid, ygrid], dim=-1)
img = F.grid_sample(img, grid, align_corners=True)
if mask:
mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1)
return img, mask.float()
return img
def coords_grid(batch, ht, wd):
coords = torch.meshgrid(torch.arange(ht), torch.arange(wd))
coords = torch.stack(coords[::-1], dim=0).float()
return coords[None].repeat(batch, 1, 1, 1)
def upflow8(flow, mode='bilinear'):
new_size = (8 * flow.shape[2], 8 * flow.shape[3])
return 8 * F.interpolate(flow, size=new_size, mode=mode, align_corners=True)
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/inference_batch.py
================================================
"""
Batch inference script for optical flow preprocessing using RAFT model.
Processes multiple video clips in parallel to generate optical flow data for CVD optimization.
"""
import pandas as pd
import os
import argparse
import concurrent.futures
from multiprocessing import Manager
import subprocess
import queue
from tqdm import tqdm
def process_single_row(row, index, args, worker_id=0):
"""Process a single video clip for optical flow generation."""
dir_path = os.path.join(args.dir_path, row["id"])
device_id = worker_id % args.gpu_num
# Build command for optical flow preprocessing with RAFT model
cmd = (
f"CUDA_VISIBLE_DEVICES={args.gpu_id[device_id]} python camera_pose_annotation/cvd_opt/preprocess/preprocess_flow.py "
f"--dir_path {dir_path} "
f"--model {args.checkpoints_path}/raft-things.pth "
f"--mixed_precision"
)
process = subprocess.Popen(
cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
stdout, stderr = process.communicate()
if process.returncode != 0:
print(f"Error generating optical flow for {row['id']}: {stderr.decode()}")
def worker(task_queue, args, worker_id, pbar):
"""Worker function for parallel optical flow preprocessing."""
while True:
try:
index, row = task_queue.get(timeout=1)
except queue.Empty:
break
process_single_row(row, index, args, worker_id)
task_queue.task_done()
pbar.update(1)
def parse_args():
"""Parse command line arguments for optical flow preprocessing."""
parser = argparse.ArgumentParser()
parser.add_argument("--csv_path", type=str, help="Path to the csv file")
parser.add_argument("--dir_path", type=str, default="./outputs")
parser.add_argument("--checkpoints_path", type=str, default="./checkpoints")
parser.add_argument(
"--gpu_id", type=str, default="0", help="Comma-separated list of GPU IDs to use"
)
parser.add_argument(
"--num_workers",
type=int,
default=4,
help="Number of workers for parallel processing",
)
parser.add_argument(
"--disable_parallel", action="store_true", help="Disable parallel processing"
)
return parser.parse_args()
def main():
args = parse_args()
# Parse GPU configuration
args.gpu_num = len(args.gpu_id.split(","))
args.gpu_id = [int(gpu) for gpu in args.gpu_id.split(",")]
df = pd.read_csv(args.csv_path)
if args.disable_parallel:
# Sequential processing
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
process_single_row(row, index, args)
else:
# Parallel processing with multiple workers
manager = Manager()
task_queue = manager.Queue()
for index, row in df.iterrows():
task_queue.put((index, row))
with tqdm(total=len(df), desc="Processing rows") as pbar:
with concurrent.futures.ThreadPoolExecutor(
max_workers=args.num_workers
) as executor:
futures = []
for id in range(args.num_workers):
futures.append(executor.submit(worker, task_queue, args, id, pbar))
for future in concurrent.futures.as_completed(futures):
future.result()
if __name__ == "__main__":
main()
================================================
FILE: camera_pose_annotation/cvd_opt/preprocess/preprocess_flow.py
================================================
# Copyright 2025 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocess flow for MegaSaM."""
import cv2
import tqdm
import argparse
from pathlib import Path # pylint: disable=g-importing-member
from core.utils.utils import InputPadder
from core.raft import RAFT
import glob
import os
import sys
import numpy as np
import torch
def warp_flow(img, flow):
h, w = flow.shape[:2]
flow_new = flow.copy()
flow_new[:, :, 0] += np.arange(w)
flow_new[:, :, 1] += np.arange(h)[:, np.newaxis]
res = cv2.remap(
img, flow_new, None, cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT
)
return res
def resize_flow(flow, img_h, img_w):
# flow = np.load(flow_path)
flow_h, flow_w = flow.shape[0], flow.shape[1]
flow[:, :, 0] *= float(img_w) / float(flow_w)
flow[:, :, 1] *= float(img_h) / float(flow_h)
flow = cv2.resize(flow, (img_w, img_h), cv2.INTER_LINEAR)
return flow
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--model", default="raft-things.pth", help="restore checkpoint")
parser.add_argument("--small", action="store_true", help="use small model")
parser.add_argument("--dir_path", help="dataset for evaluation")
parser.add_argument(
"--num_heads",
default=1,
type=int,
help="number of heads in attention and aggregation",
)
parser.add_argument(
"--position_only",
default=False,
action="store_true",
help="only use position-wise attention",
)
parser.add_argument(
"--position_and_content",
default=False,
action="store_true",
help="use position and content-wise attention",
)
parser.add_argument(
"--mixed_precision", action="store_true", help="use mixed precision"
)
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
model = torch.nn.DataParallel(RAFT(args))
model.load_state_dict(torch.load(args.model))
flow_model = model.module
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
flow_model.to(device).eval()
img_path = os.path.join(args.dir_path, "img")
image_list = sorted(glob.glob(os.path.join(img_path, "*.png"))) # [::stride]
image_list += sorted(glob.glob(os.path.join(img_path, "*.jpg"))) # [::stride]
img_data = []
for t, (image_file) in tqdm.tqdm(enumerate(image_list)):
image = cv2.imread(image_file)[..., ::-1] # rgb
h0, w0, _ = image.shape
h1 = int(h0 * np.sqrt((384 * 512) / (h0 * w0)))
w1 = int(w0 * np.sqrt((384 * 512) / (h0 * w0)))
image = cv2.resize(image, (w1, h1))
image = image[: h1 - h1 % 8, : w1 - w1 % 8].transpose(2, 0, 1)
img_data.append(image)
img_data = np.array(img_data)
flows_low = []
flows_high = []
flow_masks_high = []
flow_init = None
flows_arr_low_bwd = {}
flows_arr_low_fwd = {}
ii = []
jj = []
flows_arr_up = []
masks_arr_up = []
for step in [1, 2, 4, 8, 15]:
flows_arr_low = []
for i in tqdm.tqdm(range(max(0, -step), img_data.shape[0] - max(0, step))):
image1 = (
torch.as_tensor(np.ascontiguousarray(img_data[i : i + 1]))
.float()
.cuda()
)
image2 = (
torch.as_tensor(np.ascontiguousarray(img_data[i + step : i + step + 1]))
.float()
.cuda()
)
ii.append(i)
jj.append(i + step)
with torch.no_grad():
padder = InputPadder(image1.shape)
image1, image2 = padder.pad(image1, image2)
if np.abs(step) > 1:
flow_init = np.stack(
[flows_arr_low_fwd[i], flows_arr_low_bwd[i + step]], axis=0
)
flow_init = (
torch.as_tensor(np.ascontiguousarray(flow_init))
.float()
.cuda()
.permute(0, 3, 1, 2)
)
else:
flow_init = None
flow_low, flow_up, _ = flow_model(
torch.cat([image1, image2], dim=0),
torch.cat([image2, image1], dim=0),
iters=22,
test_mode=True,
flow_init=flow_init,
)
flow_low_fwd = flow_low[0].cpu().numpy().transpose(1, 2, 0)
flow_low_bwd = flow_low[1].cpu().numpy().transpose(1, 2, 0)
flow_up_fwd = resize_flow(
flow_up[0].cpu().numpy().transpose(1, 2, 0),
flow_up.shape[-2] // 2,
flow_up.shape[-1] // 2,
)
flow_up_bwd = resize_flow(
flow_up[1].cpu().numpy().transpose(1, 2, 0),
flow_up.shape[-2] // 2,
flow_up.shape[-1] // 2,
)
bwd2fwd_flow = warp_flow(flow_up_bwd, flow_up_fwd)
fwd_lr_error = np.linalg.norm(flow_up_fwd + bwd2fwd_flow, axis=-1)
fwd_mask_up = fwd_lr_error < 1.0
# flows_arr_low.append(flow_low_fwd)
flows_arr_low_bwd[i + step] = flow_low_bwd
flows_arr_low_fwd[i] = flow_low_fwd
# masks_arr_low.append(fwd_mask_low)
flows_arr_up.append(flow_up_fwd)
masks_arr_up.append(fwd_mask_up)
iijj = np.stack((ii, jj), axis=0)
flows_high = np.array(flows_arr_up).transpose(0, 3, 1, 2)
flow_masks_high = np.array(masks_arr_up)[:, None, ...]
output_path = os.path.join(args.dir_path, "cache-flow")
if not os.path.exists(output_path):
os.makedirs(output_path)
np.save(os.path.join(output_path, "flows.npy"), np.float16(flows_high))
np.save(os.path.join(output_path, "flows_masks.npy"), flow_masks_high)
np.save(os.path.join(output_path, "ii-jj.npy"), iijj)
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/__init__.py
================================================
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the Apache License, Version 2.0
# found in the LICENSE file in the root directory of this source tree.
# References:
# https://github.com/facebookresearch/dino/blob/main/vision_transformer.py
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
from functools import partial
import math
import logging
from typing import Sequence, Tuple, Union, Callable
import torch
import torch.nn as nn
import torch.utils.checkpoint
from torch.nn.init import trunc_normal_
from .dinov2_layers import Mlp, PatchEmbed, SwiGLUFFNFused, MemEffAttention, NestedTensorBlock as Block
logger = logging.getLogger("dinov2")
def named_apply(fn: Callable, module: nn.Module, name="", depth_first=True, include_root=False) -> nn.Module:
if not depth_first and include_root:
fn(module=module, name=name)
for child_name, child_module in module.named_children():
child_name = ".".join((name, child_name)) if name else child_name
named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True)
if depth_first and include_root:
fn(module=module, name=name)
return module
class BlockChunk(nn.ModuleList):
def forward(self, x):
for b in self:
x = b(x)
return x
class DinoVisionTransformer(nn.Module):
def __init__(
self,
img_size=224,
patch_size=16,
in_chans=3,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4.0,
qkv_bias=True,
ffn_bias=True,
proj_bias=True,
drop_path_rate=0.0,
drop_path_uniform=False,
init_values=None, # for layerscale: None or 0 => no layerscale
embed_layer=PatchEmbed,
act_layer=nn.GELU,
block_fn=Block,
ffn_layer="mlp",
block_chunks=1,
num_register_tokens=0,
interpolate_antialias=False,
interpolate_offset=0.1,
):
"""
Args:
img_size (int, tuple): input image size
patch_size (int, tuple): patch size
in_chans (int): number of input channels
embed_dim (int): embedding dimension
depth (int): depth of transformer
num_heads (int): number of attention heads
mlp_ratio (int): ratio of mlp hidden dim to embedding dim
qkv_bias (bool): enable bias for qkv if True
proj_bias (bool): enable bias for proj in attn if True
ffn_bias (bool): enable bias for ffn if True
drop_path_rate (float): stochastic depth rate
drop_path_uniform (bool): apply uniform drop rate across blocks
weight_init (str): weight init scheme
init_values (float): layer-scale init values
embed_layer (nn.Module): patch embedding layer
act_layer (nn.Module): MLP activation layer
block_fn (nn.Module): transformer block class
ffn_layer (str): "mlp", "swiglu", "swiglufused" or "identity"
block_chunks: (int) split block sequence into block_chunks units for FSDP wrap
num_register_tokens: (int) number of extra cls tokens (so-called "registers")
interpolate_antialias: (str) flag to apply anti-aliasing when interpolating positional embeddings
interpolate_offset: (float) work-around offset to apply when interpolating positional embeddings
"""
super().__init__()
norm_layer = partial(nn.LayerNorm, eps=1e-6)
self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models
self.num_tokens = 1
self.n_blocks = depth
self.num_heads = num_heads
self.patch_size = patch_size
self.num_register_tokens = num_register_tokens
self.interpolate_antialias = interpolate_antialias
self.interpolate_offset = interpolate_offset
self.patch_embed = embed_layer(img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
num_patches = self.patch_embed.num_patches
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + self.num_tokens, embed_dim))
assert num_register_tokens >= 0
self.register_tokens = (
nn.Parameter(torch.zeros(1, num_register_tokens, embed_dim)) if num_register_tokens else None
)
if drop_path_uniform is True:
dpr = [drop_path_rate] * depth
else:
dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)] # stochastic depth decay rule
if ffn_layer == "mlp":
logger.info("using MLP layer as FFN")
ffn_layer = Mlp
elif ffn_layer == "swiglufused" or ffn_layer == "swiglu":
logger.info("using SwiGLU layer as FFN")
ffn_layer = SwiGLUFFNFused
elif ffn_layer == "identity":
logger.info("using Identity layer as FFN")
def f(*args, **kwargs):
return nn.Identity()
ffn_layer = f
else:
raise NotImplementedError
blocks_list = [
block_fn(
dim=embed_dim,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
proj_bias=proj_bias,
ffn_bias=ffn_bias,
drop_path=dpr[i],
norm_layer=norm_layer,
act_layer=act_layer,
ffn_layer=ffn_layer,
init_values=init_values,
)
for i in range(depth)
]
if block_chunks > 0:
self.chunked_blocks = True
chunked_blocks = []
chunksize = depth // block_chunks
for i in range(0, depth, chunksize):
# this is to keep the block index consistent if we chunk the block list
chunked_blocks.append([nn.Identity()] * i + blocks_list[i : i + chunksize])
self.blocks = nn.ModuleList([BlockChunk(p) for p in chunked_blocks])
else:
self.chunked_blocks = False
self.blocks = nn.ModuleList(blocks_list)
self.norm = norm_layer(embed_dim)
self.head = nn.Identity()
self.mask_token = nn.Parameter(torch.zeros(1, embed_dim))
self.init_weights()
def init_weights(self):
trunc_normal_(self.pos_embed, std=0.02)
nn.init.normal_(self.cls_token, std=1e-6)
if self.register_tokens is not None:
nn.init.normal_(self.register_tokens, std=1e-6)
named_apply(init_weights_vit_timm, self)
def interpolate_pos_encoding(self, x, w, h):
previous_dtype = x.dtype
npatch = x.shape[1] - 1
N = self.pos_embed.shape[1] - 1
if npatch == N and w == h:
return self.pos_embed
pos_embed = self.pos_embed.float()
class_pos_embed = pos_embed[:, 0]
patch_pos_embed = pos_embed[:, 1:]
dim = x.shape[-1]
w0 = w // self.patch_size
h0 = h // self.patch_size
# we add a small number to avoid floating point error in the interpolation
# see discussion at https://github.com/facebookresearch/dino/issues/8
# DINOv2 with register modify the interpolate_offset from 0.1 to 0.0
w0, h0 = w0 + self.interpolate_offset, h0 + self.interpolate_offset
# w0, h0 = w0 + 0.1, h0 + 0.1
sqrt_N = math.sqrt(N)
sx, sy = float(w0) / sqrt_N, float(h0) / sqrt_N
patch_pos_embed = nn.functional.interpolate(
patch_pos_embed.reshape(1, int(sqrt_N), int(sqrt_N), dim).permute(0, 3, 1, 2),
scale_factor=(sx, sy),
# (int(w0), int(h0)), # to solve the upsampling shape issue
mode="bicubic",
antialias=self.interpolate_antialias
)
assert int(w0) == patch_pos_embed.shape[-2]
assert int(h0) == patch_pos_embed.shape[-1]
patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1).to(previous_dtype)
def prepare_tokens_with_masks(self, x, masks=None):
B, nc, w, h = x.shape
x = self.patch_embed(x)
if masks is not None:
x = torch.where(masks.unsqueeze(-1), self.mask_token.to(x.dtype).unsqueeze(0), x)
x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1)
x = x + self.interpolate_pos_encoding(x, w, h)
if self.register_tokens is not None:
x = torch.cat(
(
x[:, :1],
self.register_tokens.expand(x.shape[0], -1, -1),
x[:, 1:],
),
dim=1,
)
return x
def forward_features_list(self, x_list, masks_list):
x = [self.prepare_tokens_with_masks(x, masks) for x, masks in zip(x_list, masks_list)]
for blk in self.blocks:
x = blk(x)
all_x = x
output = []
for x, masks in zip(all_x, masks_list):
x_norm = self.norm(x)
output.append(
{
"x_norm_clstoken": x_norm[:, 0],
"x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1],
"x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :],
"x_prenorm": x,
"masks": masks,
}
)
return output
def forward_features(self, x, masks=None):
if isinstance(x, list):
return self.forward_features_list(x, masks)
x = self.prepare_tokens_with_masks(x, masks)
for blk in self.blocks:
x = blk(x)
x_norm = self.norm(x)
return {
"x_norm_clstoken": x_norm[:, 0],
"x_norm_regtokens": x_norm[:, 1 : self.num_register_tokens + 1],
"x_norm_patchtokens": x_norm[:, self.num_register_tokens + 1 :],
"x_prenorm": x,
"masks": masks,
}
def _get_intermediate_layers_not_chunked(self, x, n=1):
x = self.prepare_tokens_with_masks(x)
# If n is an int, take the n last blocks. If it's a list, take them
output, total_block_len = [], len(self.blocks)
blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n
for i, blk in enumerate(self.blocks):
x = blk(x)
if i in blocks_to_take:
output.append(x)
assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found"
return output
def _get_intermediate_layers_chunked(self, x, n=1):
x = self.prepare_tokens_with_masks(x)
output, i, total_block_len = [], 0, len(self.blocks[-1])
# If n is an int, take the n last blocks. If it's a list, take them
blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n
for block_chunk in self.blocks:
for blk in block_chunk[i:]: # Passing the nn.Identity()
x = blk(x)
if i in blocks_to_take:
output.append(x)
i += 1
assert len(output) == len(blocks_to_take), f"only {len(output)} / {len(blocks_to_take)} blocks found"
return output
def get_intermediate_layers(
self,
x: torch.Tensor,
n: Union[int, Sequence] = 1, # Layers or n last layers to take
reshape: bool = False,
return_class_token: bool = False,
norm=True
) -> Tuple[Union[torch.Tensor, Tuple[torch.Tensor]]]:
if self.chunked_blocks:
outputs = self._get_intermediate_layers_chunked(x, n)
else:
outputs = self._get_intermediate_layers_not_chunked(x, n)
if norm:
outputs = [self.norm(out) for out in outputs]
class_tokens = [out[:, 0] for out in outputs]
outputs = [out[:, 1 + self.num_register_tokens:] for out in outputs]
if reshape:
B, _, w, h = x.shape
outputs = [
out.reshape(B, w // self.patch_size, h // self.patch_size, -1).permute(0, 3, 1, 2).contiguous()
for out in outputs
]
if return_class_token:
return tuple(zip(outputs, class_tokens))
return tuple(outputs)
def forward(self, *args, is_training=False, **kwargs):
ret = self.forward_features(*args, **kwargs)
if is_training:
return ret
else:
return self.head(ret["x_norm_clstoken"])
def init_weights_vit_timm(module: nn.Module, name: str = ""):
"""ViT weight initialization, original timm impl (for reproducibility)"""
if isinstance(module, nn.Linear):
trunc_normal_(module.weight, std=0.02)
if module.bias is not None:
nn.init.zeros_(module.bias)
def vit_small(patch_size=16, num_register_tokens=0, **kwargs):
model = DinoVisionTransformer(
patch_size=patch_size,
embed_dim=384,
depth=12,
num_heads=6,
mlp_ratio=4,
block_fn=partial(Block, attn_class=MemEffAttention),
num_register_tokens=num_register_tokens,
**kwargs,
)
return model
def vit_base(patch_size=16, num_register_tokens=0, **kwargs):
model = DinoVisionTransformer(
patch_size=patch_size,
embed_dim=768,
depth=12,
num_heads=12,
mlp_ratio=4,
block_fn=partial(Block, attn_class=MemEffAttention),
num_register_tokens=num_register_tokens,
**kwargs,
)
return model
def vit_large(patch_size=16, num_register_tokens=0, **kwargs):
model = DinoVisionTransformer(
patch_size=patch_size,
embed_dim=1024,
depth=24,
num_heads=16,
mlp_ratio=4,
block_fn=partial(Block, attn_class=MemEffAttention),
num_register_tokens=num_register_tokens,
**kwargs,
)
return model
def vit_giant2(patch_size=16, num_register_tokens=0, **kwargs):
"""
Close to ViT-giant, with embed-dim 1536 and 24 heads => embed-dim per head 64
"""
model = DinoVisionTransformer(
patch_size=patch_size,
embed_dim=1536,
depth=40,
num_heads=24,
mlp_ratio=4,
block_fn=partial(Block, attn_class=MemEffAttention),
num_register_tokens=num_register_tokens,
**kwargs,
)
return model
def DINOv2(model_name):
model_zoo = {
"vits": vit_small,
"vitb": vit_base,
"vitl": vit_large,
"vitg": vit_giant2
}
return model_zoo[model_name](
img_size=518,
patch_size=14,
init_values=1.0,
ffn_layer="mlp" if model_name != "vitg" else "swiglufused",
block_chunks=0,
num_register_tokens=0,
interpolate_antialias=False,
interpolate_offset=0.1
)
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/__init__.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
from .mlp import Mlp
from .patch_embed import PatchEmbed
from .swiglu_ffn import SwiGLUFFN, SwiGLUFFNFused
from .block import NestedTensorBlock
from .attention import MemEffAttention
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/attention.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# References:
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/models/vision_transformer.py
import logging
from torch import Tensor
from torch import nn
logger = logging.getLogger("dinov2")
try:
from xformers.ops import memory_efficient_attention, unbind, fmha
XFORMERS_AVAILABLE = True
except ImportError:
logger.warning("xFormers not available")
XFORMERS_AVAILABLE = False
class Attention(nn.Module):
def __init__(
self,
dim: int,
num_heads: int = 8,
qkv_bias: bool = False,
proj_bias: bool = True,
attn_drop: float = 0.0,
proj_drop: float = 0.0,
) -> None:
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = head_dim**-0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim, bias=proj_bias)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x: Tensor) -> Tensor:
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
attn = q @ k.transpose(-2, -1)
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
class MemEffAttention(Attention):
def forward(self, x: Tensor, attn_bias=None) -> Tensor:
if not XFORMERS_AVAILABLE:
assert attn_bias is None, "xFormers is required for nested tensors usage"
return super().forward(x)
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
q, k, v = unbind(qkv, 2)
x = memory_efficient_attention(q, k, v, attn_bias=attn_bias)
x = x.reshape([B, N, C])
x = self.proj(x)
x = self.proj_drop(x)
return x
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/block.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# References:
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
import logging
from typing import Callable, List, Any, Tuple, Dict
import torch
from torch import nn, Tensor
from .attention import Attention, MemEffAttention
from .drop_path import DropPath
from .layer_scale import LayerScale
from .mlp import Mlp
logger = logging.getLogger("dinov2")
try:
from xformers.ops import fmha
from xformers.ops import scaled_index_add, index_select_cat
XFORMERS_AVAILABLE = True
except ImportError:
logger.warning("xFormers not available")
XFORMERS_AVAILABLE = False
class Block(nn.Module):
def __init__(
self,
dim: int,
num_heads: int,
mlp_ratio: float = 4.0,
qkv_bias: bool = False,
proj_bias: bool = True,
ffn_bias: bool = True,
drop: float = 0.0,
attn_drop: float = 0.0,
init_values=None,
drop_path: float = 0.0,
act_layer: Callable[..., nn.Module] = nn.GELU,
norm_layer: Callable[..., nn.Module] = nn.LayerNorm,
attn_class: Callable[..., nn.Module] = Attention,
ffn_layer: Callable[..., nn.Module] = Mlp,
) -> None:
super().__init__()
# print(f"biases: qkv: {qkv_bias}, proj: {proj_bias}, ffn: {ffn_bias}")
self.norm1 = norm_layer(dim)
self.attn = attn_class(
dim,
num_heads=num_heads,
qkv_bias=qkv_bias,
proj_bias=proj_bias,
attn_drop=attn_drop,
proj_drop=drop,
)
self.ls1 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
self.drop_path1 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = ffn_layer(
in_features=dim,
hidden_features=mlp_hidden_dim,
act_layer=act_layer,
drop=drop,
bias=ffn_bias,
)
self.ls2 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
self.drop_path2 = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
self.sample_drop_ratio = drop_path
def forward(self, x: Tensor) -> Tensor:
def attn_residual_func(x: Tensor) -> Tensor:
return self.ls1(self.attn(self.norm1(x)))
def ffn_residual_func(x: Tensor) -> Tensor:
return self.ls2(self.mlp(self.norm2(x)))
if self.training and self.sample_drop_ratio > 0.1:
# the overhead is compensated only for a drop path rate larger than 0.1
x = drop_add_residual_stochastic_depth(
x,
residual_func=attn_residual_func,
sample_drop_ratio=self.sample_drop_ratio,
)
x = drop_add_residual_stochastic_depth(
x,
residual_func=ffn_residual_func,
sample_drop_ratio=self.sample_drop_ratio,
)
elif self.training and self.sample_drop_ratio > 0.0:
x = x + self.drop_path1(attn_residual_func(x))
x = x + self.drop_path1(ffn_residual_func(x)) # FIXME: drop_path2
else:
x = x + attn_residual_func(x)
x = x + ffn_residual_func(x)
return x
def drop_add_residual_stochastic_depth(
x: Tensor,
residual_func: Callable[[Tensor], Tensor],
sample_drop_ratio: float = 0.0,
) -> Tensor:
# 1) extract subset using permutation
b, n, d = x.shape
sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1)
brange = (torch.randperm(b, device=x.device))[:sample_subset_size]
x_subset = x[brange]
# 2) apply residual_func to get residual
residual = residual_func(x_subset)
x_flat = x.flatten(1)
residual = residual.flatten(1)
residual_scale_factor = b / sample_subset_size
# 3) add the residual
x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor)
return x_plus_residual.view_as(x)
def get_branges_scales(x, sample_drop_ratio=0.0):
b, n, d = x.shape
sample_subset_size = max(int(b * (1 - sample_drop_ratio)), 1)
brange = (torch.randperm(b, device=x.device))[:sample_subset_size]
residual_scale_factor = b / sample_subset_size
return brange, residual_scale_factor
def add_residual(x, brange, residual, residual_scale_factor, scaling_vector=None):
if scaling_vector is None:
x_flat = x.flatten(1)
residual = residual.flatten(1)
x_plus_residual = torch.index_add(x_flat, 0, brange, residual.to(dtype=x.dtype), alpha=residual_scale_factor)
else:
x_plus_residual = scaled_index_add(
x, brange, residual.to(dtype=x.dtype), scaling=scaling_vector, alpha=residual_scale_factor
)
return x_plus_residual
attn_bias_cache: Dict[Tuple, Any] = {}
def get_attn_bias_and_cat(x_list, branges=None):
"""
this will perform the index select, cat the tensors, and provide the attn_bias from cache
"""
batch_sizes = [b.shape[0] for b in branges] if branges is not None else [x.shape[0] for x in x_list]
all_shapes = tuple((b, x.shape[1]) for b, x in zip(batch_sizes, x_list))
if all_shapes not in attn_bias_cache.keys():
seqlens = []
for b, x in zip(batch_sizes, x_list):
for _ in range(b):
seqlens.append(x.shape[1])
attn_bias = fmha.BlockDiagonalMask.from_seqlens(seqlens)
attn_bias._batch_sizes = batch_sizes
attn_bias_cache[all_shapes] = attn_bias
if branges is not None:
cat_tensors = index_select_cat([x.flatten(1) for x in x_list], branges).view(1, -1, x_list[0].shape[-1])
else:
tensors_bs1 = tuple(x.reshape([1, -1, *x.shape[2:]]) for x in x_list)
cat_tensors = torch.cat(tensors_bs1, dim=1)
return attn_bias_cache[all_shapes], cat_tensors
def drop_add_residual_stochastic_depth_list(
x_list: List[Tensor],
residual_func: Callable[[Tensor, Any], Tensor],
sample_drop_ratio: float = 0.0,
scaling_vector=None,
) -> Tensor:
# 1) generate random set of indices for dropping samples in the batch
branges_scales = [get_branges_scales(x, sample_drop_ratio=sample_drop_ratio) for x in x_list]
branges = [s[0] for s in branges_scales]
residual_scale_factors = [s[1] for s in branges_scales]
# 2) get attention bias and index+concat the tensors
attn_bias, x_cat = get_attn_bias_and_cat(x_list, branges)
# 3) apply residual_func to get residual, and split the result
residual_list = attn_bias.split(residual_func(x_cat, attn_bias=attn_bias)) # type: ignore
outputs = []
for x, brange, residual, residual_scale_factor in zip(x_list, branges, residual_list, residual_scale_factors):
outputs.append(add_residual(x, brange, residual, residual_scale_factor, scaling_vector).view_as(x))
return outputs
class NestedTensorBlock(Block):
def forward_nested(self, x_list: List[Tensor]) -> List[Tensor]:
"""
x_list contains a list of tensors to nest together and run
"""
assert isinstance(self.attn, MemEffAttention)
if self.training and self.sample_drop_ratio > 0.0:
def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
return self.attn(self.norm1(x), attn_bias=attn_bias)
def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
return self.mlp(self.norm2(x))
x_list = drop_add_residual_stochastic_depth_list(
x_list,
residual_func=attn_residual_func,
sample_drop_ratio=self.sample_drop_ratio,
scaling_vector=self.ls1.gamma if isinstance(self.ls1, LayerScale) else None,
)
x_list = drop_add_residual_stochastic_depth_list(
x_list,
residual_func=ffn_residual_func,
sample_drop_ratio=self.sample_drop_ratio,
scaling_vector=self.ls2.gamma if isinstance(self.ls1, LayerScale) else None,
)
return x_list
else:
def attn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
return self.ls1(self.attn(self.norm1(x), attn_bias=attn_bias))
def ffn_residual_func(x: Tensor, attn_bias=None) -> Tensor:
return self.ls2(self.mlp(self.norm2(x)))
attn_bias, x = get_attn_bias_and_cat(x_list)
x = x + attn_residual_func(x, attn_bias=attn_bias)
x = x + ffn_residual_func(x)
return attn_bias.split(x)
def forward(self, x_or_x_list):
if isinstance(x_or_x_list, Tensor):
return super().forward(x_or_x_list)
elif isinstance(x_or_x_list, list):
assert XFORMERS_AVAILABLE, "Please install xFormers for nested tensors usage"
return self.forward_nested(x_or_x_list)
else:
raise AssertionError
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/drop_path.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# References:
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/drop.py
from torch import nn
def drop_path(x, drop_prob: float = 0.0, training: bool = False):
if drop_prob == 0.0 or not training:
return x
keep_prob = 1 - drop_prob
shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
if keep_prob > 0.0:
random_tensor.div_(keep_prob)
output = x * random_tensor
return output
class DropPath(nn.Module):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
def __init__(self, drop_prob=None):
super(DropPath, self).__init__()
self.drop_prob = drop_prob
def forward(self, x):
return drop_path(x, self.drop_prob, self.training)
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/layer_scale.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# Modified from: https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py#L103-L110
from typing import Union
import torch
from torch import Tensor
from torch import nn
class LayerScale(nn.Module):
def __init__(
self,
dim: int,
init_values: Union[float, Tensor] = 1e-5,
inplace: bool = False,
) -> None:
super().__init__()
self.inplace = inplace
self.gamma = nn.Parameter(init_values * torch.ones(dim))
def forward(self, x: Tensor) -> Tensor:
return x.mul_(self.gamma) if self.inplace else x * self.gamma
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/mlp.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# References:
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/mlp.py
from typing import Callable, Optional
from torch import Tensor, nn
class Mlp(nn.Module):
def __init__(
self,
in_features: int,
hidden_features: Optional[int] = None,
out_features: Optional[int] = None,
act_layer: Callable[..., nn.Module] = nn.GELU,
drop: float = 0.0,
bias: bool = True,
) -> None:
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
self.act = act_layer()
self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
self.drop = nn.Dropout(drop)
def forward(self, x: Tensor) -> Tensor:
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/patch_embed.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# References:
# https://github.com/facebookresearch/dino/blob/master/vision_transformer.py
# https://github.com/rwightman/pytorch-image-models/tree/master/timm/layers/patch_embed.py
from typing import Callable, Optional, Tuple, Union
from torch import Tensor
import torch.nn as nn
def make_2tuple(x):
if isinstance(x, tuple):
assert len(x) == 2
return x
assert isinstance(x, int)
return (x, x)
class PatchEmbed(nn.Module):
"""
2D image to patch embedding: (B,C,H,W) -> (B,N,D)
Args:
img_size: Image size.
patch_size: Patch token size.
in_chans: Number of input image channels.
embed_dim: Number of linear projection output channels.
norm_layer: Normalization layer.
"""
def __init__(
self,
img_size: Union[int, Tuple[int, int]] = 224,
patch_size: Union[int, Tuple[int, int]] = 16,
in_chans: int = 3,
embed_dim: int = 768,
norm_layer: Optional[Callable] = None,
flatten_embedding: bool = True,
) -> None:
super().__init__()
image_HW = make_2tuple(img_size)
patch_HW = make_2tuple(patch_size)
patch_grid_size = (
image_HW[0] // patch_HW[0],
image_HW[1] // patch_HW[1],
)
self.img_size = image_HW
self.patch_size = patch_HW
self.patches_resolution = patch_grid_size
self.num_patches = patch_grid_size[0] * patch_grid_size[1]
self.in_chans = in_chans
self.embed_dim = embed_dim
self.flatten_embedding = flatten_embedding
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_HW, stride=patch_HW)
self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
def forward(self, x: Tensor) -> Tensor:
_, _, H, W = x.shape
patch_H, patch_W = self.patch_size
assert H % patch_H == 0, f"Input image height {H} is not a multiple of patch height {patch_H}"
assert W % patch_W == 0, f"Input image width {W} is not a multiple of patch width: {patch_W}"
x = self.proj(x) # B C H W
H, W = x.size(2), x.size(3)
x = x.flatten(2).transpose(1, 2) # B HW C
x = self.norm(x)
if not self.flatten_embedding:
x = x.reshape(-1, H, W, self.embed_dim) # B H W C
return x
def flops(self) -> float:
Ho, Wo = self.patches_resolution
flops = Ho * Wo * self.embed_dim * self.in_chans * (self.patch_size[0] * self.patch_size[1])
if self.norm is not None:
flops += Ho * Wo * self.embed_dim
return flops
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/swiglu_ffn.py
================================================
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
from typing import Callable, Optional
from torch import Tensor, nn
import torch.nn.functional as F
class SwiGLUFFN(nn.Module):
def __init__(
self,
in_features: int,
hidden_features: Optional[int] = None,
out_features: Optional[int] = None,
act_layer: Callable[..., nn.Module] = None,
drop: float = 0.0,
bias: bool = True,
) -> None:
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
self.w3 = nn.Linear(hidden_features, out_features, bias=bias)
def forward(self, x: Tensor) -> Tensor:
x12 = self.w12(x)
x1, x2 = x12.chunk(2, dim=-1)
hidden = F.silu(x1) * x2
return self.w3(hidden)
try:
from xformers.ops import SwiGLU
XFORMERS_AVAILABLE = True
except ImportError:
SwiGLU = SwiGLUFFN
XFORMERS_AVAILABLE = False
class SwiGLUFFNFused(SwiGLU):
def __init__(
self,
in_features: int,
hidden_features: Optional[int] = None,
out_features: Optional[int] = None,
act_layer: Callable[..., nn.Module] = None,
drop: float = 0.0,
bias: bool = True,
) -> None:
out_features = out_features or in_features
hidden_features = hidden_features or in_features
hidden_features = (int(hidden_features * 2 / 3) + 7) // 8 * 8
super().__init__(
in_features=in_features,
hidden_features=hidden_features,
out_features=out_features,
bias=bias,
)
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dpt.py
================================================
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.transforms import Compose
from .dinov2 import DINOv2
from .util.blocks import FeatureFusionBlock, _make_scratch
from .util.transform import Resize, NormalizeImage, PrepareForNet
def _make_fusion_block(features, use_bn, size=None):
return FeatureFusionBlock(
features,
nn.ReLU(False),
deconv=False,
bn=use_bn,
expand=False,
align_corners=True,
size=size,
)
class ConvBlock(nn.Module):
def __init__(self, in_feature, out_feature):
super().__init__()
self.conv_block = nn.Sequential(
nn.Conv2d(in_feature, out_feature, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(out_feature),
nn.ReLU(True)
)
def forward(self, x):
return self.conv_block(x)
class DPTHead(nn.Module):
def __init__(
self,
in_channels,
features=256,
use_bn=False,
out_channels=[256, 512, 1024, 1024],
use_clstoken=False
):
super(DPTHead, self).__init__()
self.use_clstoken = use_clstoken
self.projects = nn.ModuleList([
nn.Conv2d(
in_channels=in_channels,
out_channels=out_channel,
kernel_size=1,
stride=1,
padding=0,
) for out_channel in out_channels
])
self.resize_layers = nn.ModuleList([
nn.ConvTranspose2d(
in_channels=out_channels[0],
out_channels=out_channels[0],
kernel_size=4,
stride=4,
padding=0),
nn.ConvTranspose2d(
in_channels=out_channels[1],
out_channels=out_channels[1],
kernel_size=2,
stride=2,
padding=0),
nn.Identity(),
nn.Conv2d(
in_channels=out_channels[3],
out_channels=out_channels[3],
kernel_size=3,
stride=2,
padding=1)
])
if use_clstoken:
self.readout_projects = nn.ModuleList()
for _ in range(len(self.projects)):
self.readout_projects.append(
nn.Sequential(
nn.Linear(2 * in_channels, in_channels),
nn.GELU()))
self.scratch = _make_scratch(
out_channels,
features,
groups=1,
expand=False,
)
self.scratch.stem_transpose = None
self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
head_features_1 = features
head_features_2 = 32
self.scratch.output_conv1 = nn.Conv2d(head_features_1, head_features_1 // 2, kernel_size=3, stride=1, padding=1)
self.scratch.output_conv2 = nn.Sequential(
nn.Conv2d(head_features_1 // 2, head_features_2, kernel_size=3, stride=1, padding=1),
nn.ReLU(True),
nn.Conv2d(head_features_2, 1, kernel_size=1, stride=1, padding=0),
nn.ReLU(True),
nn.Identity(),
)
def forward(self, out_features, patch_h, patch_w):
out = []
for i, x in enumerate(out_features):
if self.use_clstoken:
x, cls_token = x[0], x[1]
readout = cls_token.unsqueeze(1).expand_as(x)
x = self.readout_projects[i](torch.cat((x, readout), -1))
else:
x = x[0]
x = x.permute(0, 2, 1).reshape((x.shape[0], x.shape[-1], patch_h, patch_w))
x = self.projects[i](x)
x = self.resize_layers[i](x)
out.append(x)
layer_1, layer_2, layer_3, layer_4 = out
layer_1_rn = self.scratch.layer1_rn(layer_1)
layer_2_rn = self.scratch.layer2_rn(layer_2)
layer_3_rn = self.scratch.layer3_rn(layer_3)
layer_4_rn = self.scratch.layer4_rn(layer_4)
path_4 = self.scratch.refinenet4(layer_4_rn, size=layer_3_rn.shape[2:])
path_3 = self.scratch.refinenet3(path_4, layer_3_rn, size=layer_2_rn.shape[2:])
path_2 = self.scratch.refinenet2(path_3, layer_2_rn, size=layer_1_rn.shape[2:])
path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
out = self.scratch.output_conv1(path_1)
out = F.interpolate(out, (int(patch_h * 14), int(patch_w * 14)), mode="bilinear", align_corners=True)
out = self.scratch.output_conv2(out)
return out
class DepthAnythingV2(nn.Module):
def __init__(
self,
encoder='vitl',
features=256,
out_channels=[256, 512, 1024, 1024],
use_bn=False,
use_clstoken=False
):
super(DepthAnythingV2, self).__init__()
self.intermediate_layer_idx = {
'vits': [2, 5, 8, 11],
'vitb': [2, 5, 8, 11],
'vitl': [4, 11, 17, 23],
'vitg': [9, 19, 29, 39]
}
self.encoder = encoder
self.pretrained = DINOv2(model_name=encoder)
self.depth_head = DPTHead(self.pretrained.embed_dim, features, use_bn, out_channels=out_channels, use_clstoken=use_clstoken)
def forward(self, x):
patch_h, patch_w = x.shape[-2] // 14, x.shape[-1] // 14
features = self.pretrained.get_intermediate_layers(x, self.intermediate_layer_idx[self.encoder], return_class_token=True)
depth = self.depth_head(features, patch_h, patch_w)
depth = F.relu(depth)
return depth.squeeze(1)
@torch.no_grad()
def infer_image(self, raw_image, input_size=518):
image, (h, w) = self.image2tensor(raw_image, input_size)
depth = self.forward(image)
depth = F.interpolate(depth[:, None], (h, w), mode="bilinear", align_corners=True)[0, 0]
return depth.cpu().numpy()
def image2tensor(self, raw_image, input_size=518):
transform = Compose([
Resize(
width=input_size,
height=input_size,
resize_target=False,
keep_aspect_ratio=True,
ensure_multiple_of=14,
resize_method='lower_bound',
image_interpolation_method=cv2.INTER_CUBIC,
),
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
PrepareForNet(),
])
h, w = raw_image.shape[:2]
image = cv2.cvtColor(raw_image, cv2.COLOR_BGR2RGB) / 255.0
image = transform({'image': image})['image']
image = torch.from_numpy(image).unsqueeze(0)
DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
image = image.to(DEVICE)
return image, (h, w)
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/util/blocks.py
================================================
import torch.nn as nn
def _make_scratch(in_shape, out_shape, groups=1, expand=False):
scratch = nn.Module()
out_shape1 = out_shape
out_shape2 = out_shape
out_shape3 = out_shape
if len(in_shape) >= 4:
out_shape4 = out_shape
if expand:
out_shape1 = out_shape
out_shape2 = out_shape * 2
out_shape3 = out_shape * 4
if len(in_shape) >= 4:
out_shape4 = out_shape * 8
scratch.layer1_rn = nn.Conv2d(in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
scratch.layer2_rn = nn.Conv2d(in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
scratch.layer3_rn = nn.Conv2d(in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
if len(in_shape) >= 4:
scratch.layer4_rn = nn.Conv2d(in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups)
return scratch
class ResidualConvUnit(nn.Module):
"""Residual convolution module.
"""
def __init__(self, features, activation, bn):
"""Init.
Args:
features (int): number of features
"""
super().__init__()
self.bn = bn
self.groups=1
self.conv1 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
self.conv2 = nn.Conv2d(features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups)
if self.bn == True:
self.bn1 = nn.BatchNorm2d(features)
self.bn2 = nn.BatchNorm2d(features)
self.activation = activation
self.skip_add = nn.quantized.FloatFunctional()
def forward(self, x):
"""Forward pass.
Args:
x (tensor): input
Returns:
tensor: output
"""
out = self.activation(x)
out = self.conv1(out)
if self.bn == True:
out = self.bn1(out)
out = self.activation(out)
out = self.conv2(out)
if self.bn == True:
out = self.bn2(out)
if self.groups > 1:
out = self.conv_merge(out)
return self.skip_add.add(out, x)
class FeatureFusionBlock(nn.Module):
"""Feature fusion block.
"""
def __init__(
self,
features,
activation,
deconv=False,
bn=False,
expand=False,
align_corners=True,
size=None
):
"""Init.
Args:
features (int): number of features
"""
super(FeatureFusionBlock, self).__init__()
self.deconv = deconv
self.align_corners = align_corners
self.groups=1
self.expand = expand
out_features = features
if self.expand == True:
out_features = features // 2
self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
self.resConfUnit1 = ResidualConvUnit(features, activation, bn)
self.resConfUnit2 = ResidualConvUnit(features, activation, bn)
self.skip_add = nn.quantized.FloatFunctional()
self.size=size
def forward(self, *xs, size=None):
"""Forward pass.
Returns:
tensor: output
"""
output = xs[0]
if len(xs) == 2:
res = self.resConfUnit1(xs[1])
output = self.skip_add.add(output, res)
output = self.resConfUnit2(output)
if (size is None) and (self.size is None):
modifier = {"scale_factor": 2}
elif size is None:
modifier = {"size": self.size}
else:
modifier = {"size": size}
output = nn.functional.interpolate(output, **modifier, mode="bilinear", align_corners=self.align_corners)
output = self.out_conv(output)
return output
================================================
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/util/transform.py
================================================
import numpy as np
import cv2
class Resize(object):
"""Resize sample to given size (width, height).
"""
def __init__(
self,
width,
height,
resize_target=True,
keep_aspect_ratio=False,
ensure_multiple_of=1,
resize_method="lower_bound",
image_interpolation_method=cv2.INTER_AREA,
):
"""Init.
Args:
width (int): desired out
gitextract__0evbu59/
├── .gitignore
├── .gitmodules
├── Dockerfile.cuda
├── LICENSE
├── README.md
├── camera_pose_annotation/
│ ├── .gitignore
│ ├── README.md
│ ├── __init__.py
│ ├── camera_tracking/
│ │ ├── __init__.py
│ │ ├── camera_tracking.py
│ │ └── inference_batch.py
│ ├── cvd_opt/
│ │ ├── __init__.py
│ │ ├── cvd_opt.py
│ │ ├── geometry_utils.py
│ │ ├── inference_batch.py
│ │ └── preprocess/
│ │ ├── __init__.py
│ │ ├── core/
│ │ │ ├── __init__.py
│ │ │ ├── corr.py
│ │ │ ├── datasets.py
│ │ │ ├── extractor.py
│ │ │ ├── raft.py
│ │ │ ├── update.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── augmentor.py
│ │ │ ├── flow_viz.py
│ │ │ ├── frame_utils.py
│ │ │ └── utils.py
│ │ ├── inference_batch.py
│ │ └── preprocess_flow.py
│ ├── depth_estimation/
│ │ ├── Depth-Anything/
│ │ │ ├── __init__.py
│ │ │ ├── depth_anything_v2/
│ │ │ │ ├── dinov2.py
│ │ │ │ ├── dinov2_layers/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── attention.py
│ │ │ │ │ ├── block.py
│ │ │ │ │ ├── drop_path.py
│ │ │ │ │ ├── layer_scale.py
│ │ │ │ │ ├── mlp.py
│ │ │ │ │ ├── patch_embed.py
│ │ │ │ │ └── swiglu_ffn.py
│ │ │ │ ├── dpt.py
│ │ │ │ └── util/
│ │ │ │ ├── blocks.py
│ │ │ │ └── transform.py
│ │ │ ├── inference.py
│ │ │ └── inference_batch.py
│ │ ├── UniDepth/
│ │ │ ├── __init__.py
│ │ │ ├── inference.py
│ │ │ ├── inference_batch.py
│ │ │ └── unidepth/
│ │ │ ├── datasets/
│ │ │ │ ├── _2d3ds.py
│ │ │ │ ├── _4dor.py
│ │ │ │ ├── __init__.py
│ │ │ │ ├── a2d2.py
│ │ │ │ ├── adt.py
│ │ │ │ ├── aimotive.py
│ │ │ │ ├── argoverse.py
│ │ │ │ ├── argoverse2.py
│ │ │ │ ├── arkit.py
│ │ │ │ ├── ase.py
│ │ │ │ ├── base_dataset.py
│ │ │ │ ├── bdd.py
│ │ │ │ ├── bedlam.py
│ │ │ │ ├── behave.py
│ │ │ │ ├── blendedmvg.py
│ │ │ │ ├── cityscape.py
│ │ │ │ ├── ddad.py
│ │ │ │ ├── deep360.py
│ │ │ │ ├── dense.py
│ │ │ │ ├── diml.py
│ │ │ │ ├── diode.py
│ │ │ │ ├── dl3dv.py
│ │ │ │ ├── driving_stereo.py
│ │ │ │ ├── dtu_rmvd.py
│ │ │ │ ├── dummy.py
│ │ │ │ ├── dynamic_replica.py
│ │ │ │ ├── eden.py
│ │ │ │ ├── eth3d.py
│ │ │ │ ├── eth3d_rmvd.py
│ │ │ │ ├── facedepth.py
│ │ │ │ ├── flsea.py
│ │ │ │ ├── futurehouse.py
│ │ │ │ ├── gibson.py
│ │ │ │ ├── hammer.py
│ │ │ │ ├── hm3d.py
│ │ │ │ ├── hoi4d.py
│ │ │ │ ├── hrwsi.py
│ │ │ │ ├── hypersim.py
│ │ │ │ ├── ibims.py
│ │ │ │ ├── image_dataset.py
│ │ │ │ ├── ken_burns.py
│ │ │ │ ├── kitti.py
│ │ │ │ ├── kitti360.py
│ │ │ │ ├── kitti_multi.py
│ │ │ │ ├── kitti_rmvd.py
│ │ │ │ ├── lyft.py
│ │ │ │ ├── mapillary.py
│ │ │ │ ├── matrix_city.py
│ │ │ │ ├── matterport3d.py
│ │ │ │ ├── megadepth.py
│ │ │ │ ├── megadepth_s.py
│ │ │ │ ├── midair.py
│ │ │ │ ├── mip.py
│ │ │ │ ├── ms2.py
│ │ │ │ ├── mvimgnet.py
│ │ │ │ ├── mvsynth.py
│ │ │ │ ├── nerds360.py
│ │ │ │ ├── niantic_mapfree.py
│ │ │ │ ├── nuscenes.py
│ │ │ │ ├── nyuv2.py
│ │ │ │ ├── oasis.py
│ │ │ │ ├── pipelines/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── formating.py
│ │ │ │ │ └── transforms.py
│ │ │ │ ├── point_odyssey.py
│ │ │ │ ├── proteus.py
│ │ │ │ ├── samplers copy.py
│ │ │ │ ├── samplers.py
│ │ │ │ ├── scannet.py
│ │ │ │ ├── scannetpp.py
│ │ │ │ ├── sequence_dataset.py
│ │ │ │ ├── sintel copy.py
│ │ │ │ ├── sintel.py
│ │ │ │ ├── sunrgbd.py
│ │ │ │ ├── synscapes.py
│ │ │ │ ├── tartanair.py
│ │ │ │ ├── taskonomy.py
│ │ │ │ ├── tat_rmvd.py
│ │ │ │ ├── theo.py
│ │ │ │ ├── unrealstereo4k.py
│ │ │ │ ├── urbansyn.py
│ │ │ │ ├── utils.py
│ │ │ │ ├── utils_decode.py
│ │ │ │ ├── vkitti.py
│ │ │ │ ├── void.py
│ │ │ │ ├── waymo.py
│ │ │ │ └── wildrgbd.py
│ │ │ ├── layers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── activation.py
│ │ │ │ ├── attention.py
│ │ │ │ ├── convnext.py
│ │ │ │ ├── drop_path.py
│ │ │ │ ├── layer_scale.py
│ │ │ │ ├── mlp.py
│ │ │ │ ├── nystrom.py
│ │ │ │ ├── nystrom_attention.py
│ │ │ │ ├── positional_encoding.py
│ │ │ │ └── upsample.py
│ │ │ ├── models/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── backbones/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── convnext.py
│ │ │ │ │ ├── convnext2.py
│ │ │ │ │ ├── dinov2.py
│ │ │ │ │ └── metadinov2/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── attention.py
│ │ │ │ │ ├── block.py
│ │ │ │ │ ├── dino_head.py
│ │ │ │ │ ├── drop_path.py
│ │ │ │ │ ├── layer_scale.py
│ │ │ │ │ ├── mlp.py
│ │ │ │ │ ├── patch_embed.py
│ │ │ │ │ └── swiglu_ffn.py
│ │ │ │ ├── encoder.py
│ │ │ │ ├── unidepthv1/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── decoder.py
│ │ │ │ │ └── unidepthv1.py
│ │ │ │ └── unidepthv2/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── decoder.py
│ │ │ │ ├── decoder_old.py
│ │ │ │ ├── export.py
│ │ │ │ ├── unidepthv2.py
│ │ │ │ └── unidepthv2_old.py
│ │ │ ├── ops/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── extract_patches/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── compile.sh
│ │ │ │ │ ├── functions/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── extract_patches.py
│ │ │ │ │ ├── modules/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── patch_extractor.py
│ │ │ │ │ ├── setup.py
│ │ │ │ │ ├── src/
│ │ │ │ │ │ ├── cpu/
│ │ │ │ │ │ │ ├── extract_patches_cpu.cpp
│ │ │ │ │ │ │ └── extract_patches_cpu.h
│ │ │ │ │ │ ├── cuda/
│ │ │ │ │ │ │ ├── extract_patches_cuda.h
│ │ │ │ │ │ │ ├── extract_patches_kernel.cu
│ │ │ │ │ │ │ └── extract_patches_kernel.cuh
│ │ │ │ │ │ ├── extract_patches.cpp
│ │ │ │ │ │ └── extract_patches.h
│ │ │ │ │ └── test.py
│ │ │ │ ├── knn/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── compile.sh
│ │ │ │ │ ├── functions/
│ │ │ │ │ │ ├── __init__.py
│ │ │ │ │ │ └── knn.py
│ │ │ │ │ ├── setup.py
│ │ │ │ │ └── src/
│ │ │ │ │ ├── knn.cu
│ │ │ │ │ ├── knn.h
│ │ │ │ │ ├── knn_cpu.cpp
│ │ │ │ │ ├── knn_ext.cpp
│ │ │ │ │ └── utils/
│ │ │ │ │ ├── dispatch.cuh
│ │ │ │ │ ├── index_utils.cuh
│ │ │ │ │ ├── mink.cuh
│ │ │ │ │ └── pytorch3d_cutils.h
│ │ │ │ ├── losses/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── arel.py
│ │ │ │ │ ├── confidence.py
│ │ │ │ │ ├── distill.py
│ │ │ │ │ ├── dummy.py
│ │ │ │ │ ├── local_ssi.py
│ │ │ │ │ ├── regression.py
│ │ │ │ │ ├── silog.py
│ │ │ │ │ └── utils.py
│ │ │ │ └── scheduler.py
│ │ │ └── utils/
│ │ │ ├── __init__.py
│ │ │ ├── camera.py
│ │ │ ├── chamfer_distance.py
│ │ │ ├── constants.py
│ │ │ ├── coordinate.py
│ │ │ ├── distributed.py
│ │ │ ├── ema_torch.py
│ │ │ ├── evaluation_depth.py
│ │ │ ├── geometric.py
│ │ │ ├── misc.py
│ │ │ ├── positional_embedding.py
│ │ │ ├── sht.py
│ │ │ ├── validation.py
│ │ │ └── visualization.py
│ │ └── __init__.py
│ └── dynamic_mask/
│ ├── __init__.py
│ ├── inference_batch.py
│ └── sam2/
│ ├── __init__.py
│ ├── automatic_mask_generator.py
│ ├── benchmark.py
│ ├── build_sam.py
│ ├── configs/
│ │ ├── sam2/
│ │ │ ├── sam2_hiera_b+.yaml
│ │ │ ├── sam2_hiera_l.yaml
│ │ │ ├── sam2_hiera_s.yaml
│ │ │ └── sam2_hiera_t.yaml
│ │ ├── sam2.1/
│ │ │ ├── sam2.1_hiera_b+.yaml
│ │ │ ├── sam2.1_hiera_l.yaml
│ │ │ ├── sam2.1_hiera_s.yaml
│ │ │ └── sam2.1_hiera_t.yaml
│ │ └── sam2.1_training/
│ │ └── sam2.1_hiera_b+_MOSE_finetune.yaml
│ ├── csrc/
│ │ └── connected_components.cu
│ ├── modeling/
│ │ ├── __init__.py
│ │ ├── backbones/
│ │ │ ├── __init__.py
│ │ │ ├── hieradet.py
│ │ │ ├── image_encoder.py
│ │ │ └── utils.py
│ │ ├── memory_attention.py
│ │ ├── memory_encoder.py
│ │ ├── position_encoding.py
│ │ ├── sam/
│ │ │ ├── __init__.py
│ │ │ ├── mask_decoder.py
│ │ │ ├── prompt_encoder.py
│ │ │ └── transformer.py
│ │ ├── sam2_base.py
│ │ └── sam2_utils.py
│ ├── sam2_hiera_b+.yaml
│ ├── sam2_hiera_l.yaml
│ ├── sam2_hiera_s.yaml
│ ├── sam2_hiera_t.yaml
│ ├── sam2_image_predictor.py
│ ├── sam2_video_predictor.py
│ ├── sam2_video_predictor_legacy.py
│ └── utils/
│ ├── __init__.py
│ ├── amg.py
│ ├── misc.py
│ └── transforms.py
├── caption/
│ ├── LLM/
│ │ ├── __init__.py
│ │ ├── inference.py
│ │ ├── prompt1.txt
│ │ └── prompt2.txt
│ ├── README.md
│ ├── VQA/
│ │ ├── __init__.py
│ │ ├── inference.py
│ │ └── prompt.txt
│ ├── __init__.py
│ ├── tagging/
│ │ ├── __init__.py
│ │ ├── inference.py
│ │ └── prompt.txt
│ └── utils/
│ ├── __init__.py
│ ├── api_call.py
│ └── combine.py
├── docker-entrypoint.sh
├── requirements/
│ ├── requirements.txt
│ ├── requirements_annotation.txt
│ └── requirements_scoring.txt
├── scoring/
│ ├── README.md
│ ├── __init__.py
│ ├── aesthetic/
│ │ ├── __init__.py
│ │ └── inference.py
│ ├── luminance/
│ │ ├── __init__.py
│ │ └── inference.py
│ ├── motion/
│ │ ├── INSTALL.md
│ │ ├── __init__.py
│ │ └── inference.py
│ └── ocr/
│ ├── __init__.py
│ └── inference.py
├── scripts/
│ ├── annotation.sh
│ ├── caption.sh
│ ├── docker_prepulls.sh
│ ├── download_checkpoints.sh
│ └── scoring.sh
├── utils/
│ ├── README.md
│ ├── __init__.py
│ ├── convert.py
│ ├── cut.py
│ ├── cut_fast.py
│ ├── download_SpatialVID.py
│ ├── download_YouTube.py
│ ├── evaluation.py
│ ├── expand_npz.py
│ ├── extract_frames.py
│ ├── filter.py
│ ├── get_clip.py
│ ├── get_info.py
│ ├── get_instructions.py
│ ├── get_instructions_enhanced.py
│ ├── merge_tables.py
│ ├── normalize_intrinsics.py
│ ├── pack_clip_assets.py
│ ├── quat_to_mat.py
│ ├── read_depth.py
│ ├── read_video.py
│ └── scene_detect.py
└── viser/
├── .clang-format
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── LICENSE
├── README.md
├── docs/
│ ├── .gitignore
│ ├── Makefile
│ ├── source/
│ │ ├── _static/
│ │ │ └── css/
│ │ │ └── custom.css
│ │ ├── _templates/
│ │ │ └── sidebar/
│ │ │ └── brand.html
│ │ ├── camera_handles.md
│ │ ├── client_handles.md
│ │ ├── conf.py
│ │ ├── conventions.md
│ │ ├── development.md
│ │ ├── events.md
│ │ ├── examples/
│ │ │ ├── 00_coordinate_frames.rst
│ │ │ ├── 01_image.rst
│ │ │ ├── 02_gui.rst
│ │ │ ├── 03_gui_callbacks.rst
│ │ │ ├── 04_camera_poses.rst
│ │ │ ├── 05_camera_commands.rst
│ │ │ ├── 06_mesh.rst
│ │ │ ├── 07_record3d_visualizer.rst
│ │ │ ├── 08_smpl_visualizer.rst
│ │ │ ├── 09_urdf_visualizer.rst
│ │ │ ├── 10_realsense.rst
│ │ │ ├── 11_colmap_visualizer.rst
│ │ │ ├── 12_click_meshes.rst
│ │ │ ├── 13_theming.rst
│ │ │ ├── 14_markdown.rst
│ │ │ ├── 15_gui_in_scene.rst
│ │ │ ├── 16_modal.rst
│ │ │ ├── 17_background_composite.rst
│ │ │ ├── 18_splines.rst
│ │ │ ├── 19_get_renders.rst
│ │ │ ├── 20_scene_pointer.rst
│ │ │ ├── 21_set_up_direction.rst
│ │ │ ├── 22_games.rst
│ │ │ ├── 23_plotly.rst
│ │ │ ├── 24_notification.rst
│ │ │ └── 25_smpl_visualizer_skinned.rst
│ │ ├── extras.md
│ │ ├── gui_api.md
│ │ ├── gui_handles.md
│ │ ├── icons.md
│ │ ├── index.md
│ │ ├── infrastructure.md
│ │ ├── scene_api.md
│ │ ├── scene_handles.md
│ │ ├── server.md
│ │ └── transforms.md
│ └── update_example_docs.py
├── examples/
│ ├── 00_coordinate_frames.py
│ ├── 01_image.py
│ ├── 02_gui.py
│ ├── 03_gui_callbacks.py
│ ├── 04_camera_poses.py
│ ├── 05_camera_commands.py
│ ├── 06_mesh.py
│ ├── 07_record3d_visualizer.py
│ ├── 08_smpl_visualizer.py
│ ├── 09_urdf_visualizer.py
│ ├── 10_realsense.py
│ ├── 11_colmap_visualizer.py
│ ├── 12_click_meshes.py
│ ├── 13_theming.py
│ ├── 14_markdown.py
│ ├── 15_gui_in_scene.py
│ ├── 16_modal.py
│ ├── 17_background_composite.py
│ ├── 18_splines.py
│ ├── 19_get_renders.py
│ ├── 20_scene_pointer.py
│ ├── 21_set_up_direction.py
│ ├── 22_games.py
│ ├── 23_plotly.py
│ ├── 24_notification.py
│ ├── 25_smpl_visualizer_skinned.py
│ ├── assets/
│ │ ├── .gitignore
│ │ ├── download_colmap_garden.sh
│ │ ├── download_dragon_mesh.sh
│ │ ├── download_record3d_dance.sh
│ │ └── mdx_example.mdx
│ ├── experimental/
│ │ └── gaussian_splats.py
│ └── quick_save.py
├── pyproject.toml
├── src/
│ └── viser/
│ ├── __init__.py
│ ├── _client_autobuild.py
│ ├── _gui_api.py
│ ├── _gui_handles.py
│ ├── _icons.py
│ ├── _icons_enum.py
│ ├── _icons_enum.pyi
│ ├── _icons_generate_enum.py
│ ├── _messages.py
│ ├── _notification_handle.py
│ ├── _scene_api.py
│ ├── _scene_handles.py
│ ├── _tunnel.py
│ ├── _viser.py
│ ├── client/
│ │ ├── .eslintrc.js
│ │ ├── .gitignore
│ │ ├── index.html
│ │ ├── package.json
│ │ ├── postcss.config.cjs
│ │ ├── public/
│ │ │ ├── hdri/
│ │ │ │ └── potsdamer_platz_1k.hdr
│ │ │ └── manifest.json
│ │ ├── src/
│ │ │ ├── App.css.ts
│ │ │ ├── App.tsx
│ │ │ ├── AppTheme.ts
│ │ │ ├── BrowserWarning.tsx
│ │ │ ├── CameraControls.tsx
│ │ │ ├── ClickUtils.tsx
│ │ │ ├── ControlPanel/
│ │ │ │ ├── BottomPanel.tsx
│ │ │ │ ├── ControlPanel.tsx
│ │ │ │ ├── FloatingPanel.tsx
│ │ │ │ ├── Generated.tsx
│ │ │ │ ├── GuiComponentContext.tsx
│ │ │ │ ├── GuiState.tsx
│ │ │ │ ├── SceneTreeTable.css.ts
│ │ │ │ ├── SceneTreeTable.tsx
│ │ │ │ ├── ServerControls.tsx
│ │ │ │ └── SidebarPanel.tsx
│ │ │ ├── FilePlayback.tsx
│ │ │ ├── Markdown.tsx
│ │ │ ├── MessageHandler.tsx
│ │ │ ├── Modal.tsx
│ │ │ ├── Outlines.tsx
│ │ │ ├── SceneTree.tsx
│ │ │ ├── SceneTreeState.tsx
│ │ │ ├── SearchParamsUtils.tsx
│ │ │ ├── Splatting/
│ │ │ │ ├── GaussianSplats.tsx
│ │ │ │ ├── SplatSortWorker.ts
│ │ │ │ └── WasmSorter/
│ │ │ │ ├── Sorter.mjs
│ │ │ │ ├── Sorter.wasm
│ │ │ │ ├── build.sh
│ │ │ │ └── sorter.cpp
│ │ │ ├── ThreeAssets.tsx
│ │ │ ├── Titlebar.tsx
│ │ │ ├── Utils.ts
│ │ │ ├── WebsocketFunctions.tsx
│ │ │ ├── WebsocketInterface.tsx
│ │ │ ├── WebsocketMessages.tsx
│ │ │ ├── WebsocketServerWorker.ts
│ │ │ ├── WorldTransformUtils.ts
│ │ │ ├── components/
│ │ │ │ ├── Button.tsx
│ │ │ │ ├── ButtonGroup.tsx
│ │ │ │ ├── Checkbox.tsx
│ │ │ │ ├── ComponentStyles.css.ts
│ │ │ │ ├── Dropdown.tsx
│ │ │ │ ├── Folder.css.ts
│ │ │ │ ├── Folder.tsx
│ │ │ │ ├── Markdown.tsx
│ │ │ │ ├── MultiSlider.tsx
│ │ │ │ ├── MultiSliderPrimitive/
│ │ │ │ │ ├── LICENSE
│ │ │ │ │ ├── Marks/
│ │ │ │ │ │ └── Marks.tsx
│ │ │ │ │ ├── MultiSlider/
│ │ │ │ │ │ └── MultiSlider.tsx
│ │ │ │ │ ├── Slider.context.ts
│ │ │ │ │ ├── Slider.module.css
│ │ │ │ │ ├── SliderRoot/
│ │ │ │ │ │ └── SliderRoot.tsx
│ │ │ │ │ ├── Thumb/
│ │ │ │ │ │ └── Thumb.tsx
│ │ │ │ │ ├── Track/
│ │ │ │ │ │ └── Track.tsx
│ │ │ │ │ ├── index.ts
│ │ │ │ │ └── utils/
│ │ │ │ │ ├── get-change-value/
│ │ │ │ │ │ └── get-change-value.ts
│ │ │ │ │ ├── get-client-position/
│ │ │ │ │ │ └── get-client-position.ts
│ │ │ │ │ ├── get-floating-value/
│ │ │ │ │ │ └── get-gloating-value.ts
│ │ │ │ │ ├── get-position/
│ │ │ │ │ │ └── get-position.ts
│ │ │ │ │ └── get-precision/
│ │ │ │ │ └── get-precision.ts
│ │ │ │ ├── NumberInput.tsx
│ │ │ │ ├── PlotlyComponent.tsx
│ │ │ │ ├── ProgressBar.tsx
│ │ │ │ ├── Rgb.tsx
│ │ │ │ ├── Rgba.tsx
│ │ │ │ ├── Slider.tsx
│ │ │ │ ├── TabGroup.tsx
│ │ │ │ ├── TextInput.tsx
│ │ │ │ ├── UploadButton.tsx
│ │ │ │ ├── Vector2.tsx
│ │ │ │ ├── Vector3.tsx
│ │ │ │ ├── common.tsx
│ │ │ │ └── utils.tsx
│ │ │ ├── index.css
│ │ │ ├── index.tsx
│ │ │ └── react-app-env.d.ts
│ │ ├── tsconfig.json
│ │ ├── vite-env.d.ts
│ │ └── vite.config.mts
│ ├── extras/
│ │ ├── __init__.py
│ │ ├── _record3d.py
│ │ ├── _record3d_customized.py
│ │ ├── _record3d_customized_megasam.py
│ │ ├── _urdf.py
│ │ └── colmap/
│ │ ├── __init__.py
│ │ └── _colmap_utils.py
│ ├── infra/
│ │ ├── __init__.py
│ │ ├── _async_message_buffer.py
│ │ ├── _infra.py
│ │ ├── _messages.py
│ │ └── _typescript_interface_gen.py
│ ├── py.typed
│ ├── scripts/
│ │ ├── __init__.py
│ │ └── dev_checks.py
│ ├── theme/
│ │ ├── __init__.py
│ │ └── _titlebar.py
│ └── transforms/
│ ├── __init__.py
│ ├── _base.py
│ ├── _se2.py
│ ├── _se3.py
│ ├── _so2.py
│ ├── _so3.py
│ ├── hints/
│ │ └── __init__.py
│ └── utils/
│ ├── __init__.py
│ └── _utils.py
├── sync_message_defs.py
├── visualize_megasam.py
└── visualize_pose.py
Showing preview only (222K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (2762 symbols across 348 files)
FILE: camera_pose_annotation/camera_tracking/camera_tracking.py
function image_stream (line 41) | def image_stream(
function save_full_reconstruction (line 103) | def save_full_reconstruction(
function parse_args (line 145) | def parse_args():
function main (line 177) | def main():
FILE: camera_pose_annotation/camera_tracking/inference_batch.py
function process_single_row (line 21) | def process_single_row(row, index, args, worker_id=0):
function worker (line 42) | def worker(task_queue, args, worker_id, pbar):
function parse_args (line 56) | def parse_args():
function main (line 77) | def main():
FILE: camera_pose_annotation/cvd_opt/cvd_opt.py
function save_depth (line 39) | def save_depth(path, depths):
function gradient_loss (line 52) | def gradient_loss(gt, pred, u):
function si_loss (line 76) | def si_loss(gt, pred):
function sobel_fg_alpha (line 87) | def sobel_fg_alpha(disp, mode="sobel", beta=10.0):
function consistency_loss (line 101) | def consistency_loss(
function parse_args (line 245) | def parse_args():
FILE: camera_pose_annotation/cvd_opt/geometry_utils.py
function to_homogeneous (line 30) | def to_homogeneous(input_tensor: Tensor, dim: int = 0) -> Tensor:
class BackprojectDepth (line 37) | class BackprojectDepth(nn.Module):
method __init__ (line 43) | def __init__(self, height: int, width: int):
method forward (line 70) | def forward(self, depth_b1hw: Tensor, invK_b44: Tensor) -> Tensor:
class Project3D (line 80) | class Project3D(jit.ScriptModule):
method __init__ (line 83) | def __init__(self, eps: float = 1e-8):
method forward (line 89) | def forward(
class NormalGenerator (line 110) | class NormalGenerator(nn.Module):
method __init__ (line 113) | def __init__(
method forward (line 131) | def forward(self, depth_b1hw: Tensor, invK_b44: Tensor) -> Tensor:
function get_camera_rays (line 158) | def get_camera_rays(
function pose_distance (line 191) | def pose_distance(pose_b44):
function qvec2rotmat (line 206) | def qvec2rotmat(qvec):
function rotx (line 227) | def rotx(t):
function roty (line 234) | def roty(t):
function rotz (line 241) | def rotz(t):
FILE: camera_pose_annotation/cvd_opt/inference_batch.py
function process_single_row (line 16) | def process_single_row(row, index, args, worker_id=0):
function worker (line 37) | def worker(task_queue, args, worker_id, pbar):
function parse_args (line 49) | def parse_args():
function main (line 70) | def main():
FILE: camera_pose_annotation/cvd_opt/preprocess/core/corr.py
class CorrBlock (line 30) | class CorrBlock:
method __init__ (line 33) | def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
method __call__ (line 49) | def __call__(self, coords):
method corr (line 73) | def corr(cls, fmap1, fmap2):
class AlternateCorrBlock (line 84) | class AlternateCorrBlock:
method __init__ (line 87) | def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
method __call__ (line 97) | def __call__(self, coords):
FILE: camera_pose_annotation/cvd_opt/preprocess/core/datasets.py
class FlowDataset (line 31) | class FlowDataset(data.Dataset):
method __init__ (line 34) | def __init__(self, aug_params=None, sparse=False):
method __getitem__ (line 49) | def __getitem__(self, index):
method __rmul__ (line 107) | def __rmul__(self, v):
method __len__ (line 112) | def __len__(self):
class MpiSintel (line 116) | class MpiSintel(FlowDataset):
method __init__ (line 119) | def __init__(
class FlyingChairs (line 143) | class FlyingChairs(FlowDataset):
method __init__ (line 146) | def __init__(
class FlyingThings3D (line 168) | class FlyingThings3D(FlowDataset):
method __init__ (line 171) | def __init__(
class KITTI (line 199) | class KITTI(FlowDataset):
method __init__ (line 202) | def __init__(self, aug_params=None, split='training', root='datasets/K...
class HD1K (line 220) | class HD1K(FlowDataset):
method __init__ (line 223) | def __init__(self, aug_params=None, root='datasets/HD1k'):
function fetch_dataloader (line 248) | def fetch_dataloader(args, TRAIN_DS='C+T+K+S+H'):
FILE: camera_pose_annotation/cvd_opt/preprocess/core/extractor.py
class ResidualBlock (line 22) | class ResidualBlock(nn.Module):
method __init__ (line 25) | def __init__(self, in_planes, planes, norm_fn='group', stride=1):
method forward (line 68) | def forward(self, x):
class BottleneckBlock (line 79) | class BottleneckBlock(nn.Module):
method __init__ (line 82) | def __init__(self, in_planes, planes, norm_fn='group', stride=1):
method forward (line 130) | def forward(self, x):
class BasicEncoder (line 142) | class BasicEncoder(nn.Module):
method __init__ (line 145) | def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
method _make_layer (line 185) | def _make_layer(self, dim, stride=1):
method forward (line 193) | def forward(self, x):
class SmallEncoder (line 220) | class SmallEncoder(nn.Module):
method __init__ (line 223) | def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
method _make_layer (line 262) | def _make_layer(self, dim, stride=1):
method forward (line 270) | def forward(self, x):
FILE: camera_pose_annotation/cvd_opt/preprocess/core/raft.py
class autocast (line 34) | class autocast: # pylint: disable=invalid-name
method __init__ (line 36) | def __init__(self, enabled):
method __enter__ (line 39) | def __enter__(self):
method __exit__ (line 42) | def __exit__(self, *args):
class RAFT (line 46) | class RAFT(nn.Module):
method __init__ (line 49) | def __init__(self, args):
method freeze_bn (line 90) | def freeze_bn(self):
method initialize_flow (line 95) | def initialize_flow(self, img):
method upsample_flow (line 105) | def upsample_flow(self, flow, mask):
method forward (line 119) | def forward(
FILE: camera_pose_annotation/cvd_opt/preprocess/core/update.py
class FlowHead (line 23) | class FlowHead(nn.Module):
method __init__ (line 25) | def __init__(self, input_dim=128, hidden_dim=256):
method forward (line 31) | def forward(self, x):
class ConvGRU (line 35) | class ConvGRU(nn.Module):
method __init__ (line 38) | def __init__(self, hidden_dim=128, input_dim=192 + 128):
method forward (line 44) | def forward(self, h, x):
class SepConvGRU (line 55) | class SepConvGRU(nn.Module):
method __init__ (line 58) | def __init__(self, hidden_dim=128, input_dim=192 + 128):
method forward (line 80) | def forward(self, h, x):
class SmallMotionEncoder (line 98) | class SmallMotionEncoder(nn.Module):
method __init__ (line 101) | def __init__(self, args):
method forward (line 109) | def forward(self, flow, corr):
class BasicMotionEncoder (line 118) | class BasicMotionEncoder(nn.Module):
method __init__ (line 121) | def __init__(self, args):
method forward (line 130) | def forward(self, flow, corr):
class SmallUpdateBlock (line 141) | class SmallUpdateBlock(nn.Module):
method __init__ (line 144) | def __init__(self, args, hidden_dim=96):
method forward (line 150) | def forward(self, net, inp, corr, flow):
class BasicUpdateBlock (line 159) | class BasicUpdateBlock(nn.Module):
method __init__ (line 162) | def __init__(self, args, hidden_dim=128, input_dim=128):
method forward (line 175) | def forward(self, net, inp, corr, flow, upsample=True):
FILE: camera_pose_annotation/cvd_opt/preprocess/core/utils/augmentor.py
class FlowAugmentor (line 30) | class FlowAugmentor:
method __init__ (line 33) | def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=T...
method color_transform (line 55) | def color_transform(self, img1, img2):
method eraser_transform (line 73) | def eraser_transform(self, img1, img2, bounds=[50, 100]): # pylint: d...
method spatial_transform (line 88) | def spatial_transform(self, img1, img2, flow):
method __call__ (line 140) | def __call__(self, img1, img2, flow):
class SparseFlowAugmentor (line 152) | class SparseFlowAugmentor:
method __init__ (line 155) | def __init__(self, crop_size, min_scale=-0.2, max_scale=0.5, do_flip=F...
method color_transform (line 176) | def color_transform(self, img1, img2):
method eraser_transform (line 184) | def eraser_transform(self, img1, img2):
method resize_sparse_flow_map (line 197) | def resize_sparse_flow_map(self, flow, valid, fx=1.0, fy=1.0):
method spatial_transform (line 232) | def spatial_transform(self, img1, img2, flow, valid):
method __call__ (line 280) | def __call__(self, img1, img2, flow, valid):
FILE: camera_pose_annotation/cvd_opt/preprocess/core/utils/flow_viz.py
function make_colorwheel (line 24) | def make_colorwheel():
function flow_uv_to_colors (line 76) | def flow_uv_to_colors(u, v, convert_to_bgr=False):
function flow_to_image (line 115) | def flow_to_image(flow_uv, clip_flow=None, convert_to_bgr=False):
FILE: camera_pose_annotation/cvd_opt/preprocess/core/utils/frame_utils.py
function readFlow (line 35) | def readFlow(fn):
function readPFM (line 57) | def readPFM(file):
function writeFlow (line 89) | def writeFlow(filename, uv, v=None):
function readFlowKITTI (line 121) | def readFlowKITTI(filename):
function readDispKITTI (line 129) | def readDispKITTI(filename):
function writeFlowKITTI (line 136) | def writeFlowKITTI(filename, uv):
function read_gen (line 143) | def read_gen(file_name, pil=False):
FILE: camera_pose_annotation/cvd_opt/preprocess/core/utils/utils.py
class InputPadder (line 26) | class InputPadder:
method __init__ (line 29) | def __init__(self, dims, mode='sintel'):
method pad (line 43) | def pad(self, *inputs):
method unpad (line 46) | def unpad(self, x):
function forward_interpolate (line 52) | def forward_interpolate(flow):
function bilinear_sampler (line 86) | def bilinear_sampler(img, coords, mode='bilinear', mask=False):
function coords_grid (line 104) | def coords_grid(batch, ht, wd):
function upflow8 (line 110) | def upflow8(flow, mode='bilinear'):
FILE: camera_pose_annotation/cvd_opt/preprocess/inference_batch.py
function process_single_row (line 16) | def process_single_row(row, index, args, worker_id=0):
function worker (line 36) | def worker(task_queue, args, worker_id, pbar):
function parse_args (line 48) | def parse_args():
function main (line 69) | def main():
FILE: camera_pose_annotation/cvd_opt/preprocess/preprocess_flow.py
function warp_flow (line 32) | def warp_flow(img, flow):
function resize_flow (line 44) | def resize_flow(flow, img_h, img_w):
function parse_args (line 54) | def parse_args():
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2.py
function named_apply (line 26) | def named_apply(fn: Callable, module: nn.Module, name="", depth_first=Tr...
class BlockChunk (line 37) | class BlockChunk(nn.ModuleList):
method forward (line 38) | def forward(self, x):
class DinoVisionTransformer (line 44) | class DinoVisionTransformer(nn.Module):
method __init__ (line 45) | def __init__(
method init_weights (line 172) | def init_weights(self):
method interpolate_pos_encoding (line 179) | def interpolate_pos_encoding(self, x, w, h):
method prepare_tokens_with_masks (line 212) | def prepare_tokens_with_masks(self, x, masks=None):
method forward_features_list (line 233) | def forward_features_list(self, x_list, masks_list):
method forward_features (line 253) | def forward_features(self, x, masks=None):
method _get_intermediate_layers_not_chunked (line 271) | def _get_intermediate_layers_not_chunked(self, x, n=1):
method _get_intermediate_layers_chunked (line 283) | def _get_intermediate_layers_chunked(self, x, n=1):
method get_intermediate_layers (line 297) | def get_intermediate_layers(
method forward (line 323) | def forward(self, *args, is_training=False, **kwargs):
function init_weights_vit_timm (line 331) | def init_weights_vit_timm(module: nn.Module, name: str = ""):
function vit_small (line 339) | def vit_small(patch_size=16, num_register_tokens=0, **kwargs):
function vit_base (line 353) | def vit_base(patch_size=16, num_register_tokens=0, **kwargs):
function vit_large (line 367) | def vit_large(patch_size=16, num_register_tokens=0, **kwargs):
function vit_giant2 (line 381) | def vit_giant2(patch_size=16, num_register_tokens=0, **kwargs):
function DINOv2 (line 398) | def DINOv2(model_name):
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/attention.py
class Attention (line 29) | class Attention(nn.Module):
method __init__ (line 30) | def __init__(
method forward (line 49) | def forward(self, x: Tensor) -> Tensor:
class MemEffAttention (line 65) | class MemEffAttention(Attention):
method forward (line 66) | def forward(self, x: Tensor, attn_bias=None) -> Tensor:
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/block.py
class Block (line 36) | class Block(nn.Module):
method __init__ (line 37) | def __init__(
method forward (line 82) | def forward(self, x: Tensor) -> Tensor:
function drop_add_residual_stochastic_depth (line 110) | def drop_add_residual_stochastic_depth(
function get_branges_scales (line 134) | def get_branges_scales(x, sample_drop_ratio=0.0):
function add_residual (line 142) | def add_residual(x, brange, residual, residual_scale_factor, scaling_vec...
function get_attn_bias_and_cat (line 157) | def get_attn_bias_and_cat(x_list, branges=None):
function drop_add_residual_stochastic_depth_list (line 181) | def drop_add_residual_stochastic_depth_list(
class NestedTensorBlock (line 204) | class NestedTensorBlock(Block):
method forward_nested (line 205) | def forward_nested(self, x_list: List[Tensor]) -> List[Tensor]:
method forward (line 245) | def forward(self, x_or_x_list):
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/drop_path.py
function drop_path (line 15) | def drop_path(x, drop_prob: float = 0.0, training: bool = False):
class DropPath (line 27) | class DropPath(nn.Module):
method __init__ (line 30) | def __init__(self, drop_prob=None):
method forward (line 34) | def forward(self, x):
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/layer_scale.py
class LayerScale (line 16) | class LayerScale(nn.Module):
method __init__ (line 17) | def __init__(
method forward (line 27) | def forward(self, x: Tensor) -> Tensor:
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/mlp.py
class Mlp (line 17) | class Mlp(nn.Module):
method __init__ (line 18) | def __init__(
method forward (line 35) | def forward(self, x: Tensor) -> Tensor:
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/patch_embed.py
function make_2tuple (line 17) | def make_2tuple(x):
class PatchEmbed (line 26) | class PatchEmbed(nn.Module):
method __init__ (line 38) | def __init__(
method forward (line 69) | def forward(self, x: Tensor) -> Tensor:
method flops (line 84) | def flops(self) -> float:
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/swiglu_ffn.py
class SwiGLUFFN (line 13) | class SwiGLUFFN(nn.Module):
method __init__ (line 14) | def __init__(
method forward (line 29) | def forward(self, x: Tensor) -> Tensor:
class SwiGLUFFNFused (line 45) | class SwiGLUFFNFused(SwiGLU):
method __init__ (line 46) | def __init__(
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dpt.py
function _make_fusion_block (line 12) | def _make_fusion_block(features, use_bn, size=None):
class ConvBlock (line 24) | class ConvBlock(nn.Module):
method __init__ (line 25) | def __init__(self, in_feature, out_feature):
method forward (line 34) | def forward(self, x):
class DPTHead (line 38) | class DPTHead(nn.Module):
method __init__ (line 39) | def __init__(
method forward (line 117) | def forward(self, out_features, patch_h, patch_w):
class DepthAnythingV2 (line 153) | class DepthAnythingV2(nn.Module):
method __init__ (line 154) | def __init__(
method forward (line 176) | def forward(self, x):
method infer_image (line 187) | def infer_image(self, raw_image, input_size=518):
method image2tensor (line 196) | def image2tensor(self, raw_image, input_size=518):
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/util/blocks.py
function _make_scratch (line 4) | def _make_scratch(in_shape, out_shape, groups=1, expand=False):
class ResidualConvUnit (line 29) | class ResidualConvUnit(nn.Module):
method __init__ (line 33) | def __init__(self, features, activation, bn):
method forward (line 57) | def forward(self, x):
class FeatureFusionBlock (line 83) | class FeatureFusionBlock(nn.Module):
method __init__ (line 87) | def __init__(
method forward (line 123) | def forward(self, *xs, size=None):
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/util/transform.py
class Resize (line 5) | class Resize(object):
method __init__ (line 9) | def __init__(
method constrain_to_multiple_of (line 51) | def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
method get_size (line 62) | def get_size(self, width, height):
method __call__ (line 109) | def __call__(self, sample):
class NormalizeImage (line 125) | class NormalizeImage(object):
method __init__ (line 129) | def __init__(self, mean, std):
method __call__ (line 133) | def __call__(self, sample):
class PrepareForNet (line 139) | class PrepareForNet(object):
method __init__ (line 143) | def __init__(self):
method __call__ (line 146) | def __call__(self, sample):
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/inference.py
function parse_args (line 32) | def parse_args():
FILE: camera_pose_annotation/depth_estimation/Depth-Anything/inference_batch.py
class ImageDataset (line 41) | class ImageDataset(Dataset):
method __init__ (line 44) | def __init__(self, img_list, input_size):
method __len__ (line 63) | def __len__(self):
method image2tensor (line 66) | def image2tensor(self, raw_image):
method __getitem__ (line 77) | def __getitem__(self, idx):
function parse_args (line 103) | def parse_args():
function collate_fn (line 122) | def collate_fn(batch):
function main (line 133) | def main():
FILE: camera_pose_annotation/depth_estimation/UniDepth/inference.py
function parse_args (line 20) | def parse_args():
function main (line 29) | def main():
FILE: camera_pose_annotation/depth_estimation/UniDepth/inference_batch.py
class ImageDataset (line 21) | class ImageDataset(Dataset):
method __init__ (line 24) | def __init__(self, img_list, input_size):
method __len__ (line 28) | def __len__(self):
method __getitem__ (line 31) | def __getitem__(self, idx):
function collate_fn (line 66) | def collate_fn(batch):
function parse_args (line 77) | def parse_args():
function main (line 93) | def main():
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/_2d3ds.py
class _2D3DS (line 9) | class _2D3DS(SequenceDataset):
method __init__ (line 18) | def __init__(
method preprocess (line 51) | def preprocess(self, results):
method pre_pipeline (line 62) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/_4dor.py
class _4DOR (line 6) | class _4DOR(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/a2d2.py
class A2D2 (line 12) | class A2D2(ImageDataset):
method __init__ (line 20) | def __init__(
method load_dataset (line 47) | def load_dataset(self):
method pre_pipeline (line 74) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/adt.py
class ADT (line 8) | class ADT(SequenceDataset):
method __init__ (line 17) | def __init__(
method preprocess (line 47) | def preprocess(self, results):
method pre_pipeline (line 62) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/aimotive.py
class aiMotive (line 6) | class aiMotive(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/argoverse.py
class Argoverse (line 12) | class Argoverse(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 50) | def load_dataset(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/argoverse2.py
class Argoverse2 (line 6) | class Argoverse2(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/arkit.py
class ARKit (line 6) | class ARKit(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/ase.py
class ASE (line 8) | class ASE(SequenceDataset):
method __init__ (line 17) | def __init__(
method preprocess (line 47) | def preprocess(self, results):
method pre_pipeline (line 61) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/base_dataset.py
class BaseDataset (line 19) | class BaseDataset(Dataset):
method __init__ (line 23) | def __init__(
method __len__ (line 95) | def __len__(self):
method pack_batch (line 98) | def pack_batch(self, results):
method unpack_batch (line 118) | def unpack_batch(self, results):
method _augmentation_space (line 136) | def _augmentation_space(self):
method augment (line 151) | def augment(self, results):
method prepare_depth_eval (line 156) | def prepare_depth_eval(self, inputs, preds):
method prepare_points_eval (line 169) | def prepare_points_eval(self, inputs, preds):
method add_points (line 180) | def add_points(self, inputs):
method accumulate_metrics (line 187) | def accumulate_metrics(
method accumulate_metrics_depth (line 219) | def accumulate_metrics_depth(self, gts, preds, masks):
method accumulate_metrics_3d (line 236) | def accumulate_metrics_3d(self, gts, preds, masks):
method get_evaluation (line 258) | def get_evaluation(self, metrics=None):
method replicate (line 273) | def replicate(self, results):
method log_load_dataset (line 279) | def log_load_dataset(self):
method pre_pipeline (line 284) | def pre_pipeline(self, results):
method eval_mask (line 302) | def eval_mask(self, valid_mask):
method chunk (line 305) | def chunk(self, dataset, chunk_dim=1, pct=1.0):
method preprocess (line 314) | def preprocess(self, results):
method postprocess (line 318) | def postprocess(self, results):
method get_mapper (line 322) | def get_mapper(self):
method get_intrinsics (line 326) | def get_intrinsics(self, idx, image_name):
method get_extrinsics (line 330) | def get_extrinsics(self, idx, image_name):
method load_dataset (line 334) | def load_dataset(self):
method get_single_item (line 338) | def get_single_item(self, idx, sample=None, mapper=None):
method __getitem__ (line 342) | def __getitem__(self, idx):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/bdd.py
class BDD (line 12) | class BDD(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 47) | def load_dataset(self):
method pre_pipeline (line 76) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/bedlam.py
class BEDLAM (line 6) | class BEDLAM(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/behave.py
class Behave (line 6) | class Behave(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/blendedmvg.py
class BlendedMVG (line 6) | class BlendedMVG(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/cityscape.py
class Cityscape (line 12) | class Cityscape(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 50) | def load_dataset(self):
method pre_pipeline (line 75) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/ddad.py
class DDAD (line 12) | class DDAD(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 47) | def load_dataset(self):
method get_mapper (line 72) | def get_mapper(self):
method pre_pipeline (line 80) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/deep360.py
class Deep360 (line 9) | class Deep360(SequenceDataset):
method __init__ (line 18) | def __init__(
method pre_pipeline (line 51) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/dense.py
class DENSE (line 11) | class DENSE(ImageDataset):
method __init__ (line 28) | def __init__(
method load_dataset (line 56) | def load_dataset(self):
method get_intrinsics (line 78) | def get_intrinsics(self, idx, image_name):
method get_mapper (line 81) | def get_mapper(self):
method pre_pipeline (line 87) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/diml.py
class DIML (line 12) | class DIML(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 49) | def load_dataset(self):
method pre_pipeline (line 75) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/diode.py
class DiodeIndoor (line 12) | class DiodeIndoor(ImageDataset):
method __init__ (line 23) | def __init__(
method load_dataset (line 50) | def load_dataset(self):
method get_intrinsics (line 75) | def get_intrinsics(self, *args, **kwargs):
method get_mapper (line 78) | def get_mapper(self):
method pre_pipeline (line 84) | def pre_pipeline(self, results):
class DiodeIndoor_F (line 91) | class DiodeIndoor_F(SequenceDataset):
method __init__ (line 100) | def __init__(
method pre_pipeline (line 132) | def pre_pipeline(self, results):
class DiodeOutdoor (line 139) | class DiodeOutdoor(ImageDataset):
method __init__ (line 151) | def __init__(
method load_dataset (line 187) | def load_dataset(self):
class Diode (line 210) | class Diode(ImageDataset):
method __init__ (line 222) | def __init__(
method load_dataset (line 256) | def load_dataset(self):
method get_intrinsics (line 277) | def get_intrinsics(self, *args, **kwargs):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/dl3dv.py
class DL3DV (line 6) | class DL3DV(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/driving_stereo.py
class DrivingStereo (line 12) | class DrivingStereo(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 50) | def load_dataset(self):
method pre_pipeline (line 78) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/dtu_rmvd.py
class DTURMVD (line 16) | class DTURMVD(SequenceDataset):
method __init__ (line 26) | def __init__(
method pre_pipeline (line 57) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/dummy.py
class Dummy (line 6) | class Dummy(Dataset):
method __init__ (line 10) | def __init__(self, *args, **kwargs):
method get_single_item (line 14) | def get_single_item(self, idx):
method __getitem__ (line 25) | def __getitem__(self, idx):
method __len__ (line 32) | def __len__(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/dynamic_replica.py
class DynReplica (line 6) | class DynReplica(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/eden.py
class EDEN (line 6) | class EDEN(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/eth3d.py
class ETH3D (line 13) | class ETH3D(ImageDataset):
method __init__ (line 22) | def __init__(
method load_dataset (line 48) | def load_dataset(self):
method pre_pipeline (line 70) | def pre_pipeline(self, results):
class ETH3D_F (line 77) | class ETH3D_F(SequenceDataset):
method __init__ (line 86) | def __init__(
method pre_pipeline (line 118) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/eth3d_rmvd.py
class ETH3DRMVD (line 16) | class ETH3DRMVD(SequenceDataset):
method __init__ (line 26) | def __init__(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/facedepth.py
class FaceDepth (line 6) | class FaceDepth(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/flsea.py
class FLSea (line 11) | class FLSea(ImageDataset):
method __init__ (line 34) | def __init__(
method load_dataset (line 63) | def load_dataset(self):
method get_intrinsics (line 87) | def get_intrinsics(self, idx, image_name):
method get_mapper (line 90) | def get_mapper(self):
method pre_pipeline (line 96) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/futurehouse.py
class FutureHouse (line 9) | class FutureHouse(SequenceDataset):
method __init__ (line 18) | def __init__(
method pre_pipeline (line 51) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/gibson.py
class Gibson (line 9) | class Gibson(SequenceDataset):
method __init__ (line 18) | def __init__(
method pre_pipeline (line 51) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/hammer.py
class HAMMER (line 12) | class HAMMER(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 50) | def load_dataset(self):
method pre_pipeline (line 72) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/hm3d.py
class HM3D (line 6) | class HM3D(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/hoi4d.py
class HOI4D (line 6) | class HOI4D(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/hrwsi.py
class HRWSI (line 11) | class HRWSI(ImageDataset):
method __init__ (line 19) | def __init__(
method load_dataset (line 45) | def load_dataset(self):
method pre_pipeline (line 73) | def pre_pipeline(self, results):
method get_mapper (line 79) | def get_mapper(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/hypersim.py
class HyperSim (line 12) | class HyperSim(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 48) | def load_dataset(self):
method get_mapper (line 84) | def get_mapper(self):
method pre_pipeline (line 92) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/ibims.py
class IBims (line 13) | class IBims(ImageDataset):
method __init__ (line 22) | def __init__(
method load_dataset (line 51) | def load_dataset(self):
method pre_pipeline (line 73) | def pre_pipeline(self, results):
class IBims_F (line 80) | class IBims_F(SequenceDataset):
method __init__ (line 89) | def __init__(
method pre_pipeline (line 121) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/image_dataset.py
class ImageDataset (line 23) | class ImageDataset(BaseDataset):
method __init__ (line 24) | def __init__(
method get_single_item (line 49) | def get_single_item(self, idx, sample=None, mapper=None):
method preprocess (line 132) | def preprocess(self, results):
method postprocess (line 151) | def postprocess(self, results):
method __getitem__ (line 161) | def __getitem__(self, idx):
method get_intrinsics (line 173) | def get_intrinsics(self, idx, image_name):
method get_extrinsics (line 180) | def get_extrinsics(self, idx, image_name):
method get_mapper (line 187) | def get_mapper(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/ken_burns.py
class KenBurns (line 12) | class KenBurns(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 48) | def load_dataset(self):
method get_mapper (line 82) | def get_mapper(self):
method pre_pipeline (line 90) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/kitti.py
class KITTI (line 13) | class KITTI(ImageDataset):
method __init__ (line 61) | def __init__(
method load_dataset (line 95) | def load_dataset(self):
method get_intrinsics (line 124) | def get_intrinsics(self, idx, image_name):
method preprocess (line 127) | def preprocess(self, results):
method eval_mask (line 147) | def eval_mask(self, valid_mask, info={}):
method get_mapper (line 165) | def get_mapper(self):
method pre_pipeline (line 171) | def pre_pipeline(self, results):
class KITTIBenchmark (line 181) | class KITTIBenchmark(ImageDataset):
method __init__ (line 190) | def __init__(
method load_dataset (line 226) | def load_dataset(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/kitti360.py
class KITTI360 (line 8) | class KITTI360(SequenceDataset):
method __init__ (line 17) | def __init__(
method preprocess (line 49) | def preprocess(self, results):
method pre_pipeline (line 61) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/kitti_multi.py
class KITTIMulti (line 16) | class KITTIMulti(SequenceDataset):
method __init__ (line 26) | def __init__(
method __len__ (line 69) | def __len__(self):
method preprocess (line 74) | def preprocess(self, results):
method eval_mask (line 86) | def eval_mask(self, valid_mask, info={}):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/kitti_rmvd.py
class KITTIRMVD (line 14) | class KITTIRMVD(SequenceDataset):
method __init__ (line 24) | def __init__(
method eval_mask (line 57) | def eval_mask(self, valid_mask, info={}):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/lyft.py
class Lyft (line 12) | class Lyft(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 47) | def load_dataset(self):
method pre_pipeline (line 81) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/mapillary.py
class Mapillary (line 12) | class Mapillary(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 50) | def load_dataset(self):
method pre_pipeline (line 78) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/matrix_city.py
class MatrixCity (line 6) | class MatrixCity(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/matterport3d.py
class Matterport3D (line 9) | class Matterport3D(SequenceDataset):
method __init__ (line 18) | def __init__(
method pre_pipeline (line 51) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/megadepth.py
class MegaDepth (line 10) | class MegaDepth(ImageDataset):
method __init__ (line 18) | def __init__(
method load_dataset (line 44) | def load_dataset(self):
method pre_pipeline (line 72) | def pre_pipeline(self, results):
method get_mapper (line 79) | def get_mapper(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/megadepth_s.py
class MegaDepthS (line 6) | class MegaDepthS(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/midair.py
class MidAir (line 6) | class MidAir(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/mip.py
class MIP (line 6) | class MIP(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/ms2.py
class MS2 (line 6) | class MS2(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/mvimgnet.py
class MVImgNet (line 91) | class MVImgNet(SequenceDataset):
method __init__ (line 102) | def __init__(
method pre_pipeline (line 132) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/mvsynth.py
class MVSynth (line 6) | class MVSynth(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/nerds360.py
class NeRDS360 (line 6) | class NeRDS360(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/niantic_mapfree.py
class NianticMapFree (line 6) | class NianticMapFree(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/nuscenes.py
class Nuscenes (line 12) | class Nuscenes(ImageDataset):
method __init__ (line 22) | def __init__(
method load_dataset (line 49) | def load_dataset(self):
method get_mapper (line 77) | def get_mapper(self):
method pre_pipeline (line 85) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/nyuv2.py
class NYUv2Depth (line 13) | class NYUv2Depth(ImageDataset):
method __init__ (line 32) | def __init__(
method load_dataset (line 64) | def load_dataset(self):
method pre_pipeline (line 89) | def pre_pipeline(self, results):
method get_intrinsics (line 94) | def get_intrinsics(self, idx, image_name):
method eval_mask (line 97) | def eval_mask(self, valid_mask, info={}):
method get_mapper (line 102) | def get_mapper(self):
method pre_pipeline (line 108) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/oasis.py
class OASISv2 (line 11) | class OASISv2(ImageDataset):
method __init__ (line 19) | def __init__(
method load_dataset (line 46) | def load_dataset(self):
method pre_pipeline (line 70) | def pre_pipeline(self, results):
method get_mapper (line 76) | def get_mapper(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/pipelines/formating.py
class Collect (line 7) | class Collect(object):
method __init__ (line 8) | def __init__(
method __call__ (line 41) | def __call__(self, results):
method __repr__ (line 55) | def __repr__(self):
class AnnotationMask (line 61) | class AnnotationMask(object):
method __init__ (line 62) | def __init__(self, min_value, max_value, custom_fn=lambda x: x):
method __call__ (line 67) | def __call__(self, results):
method __repr__ (line 91) | def __repr__(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/pipelines/transforms.py
class PanoCrop (line 15) | class PanoCrop:
method __init__ (line 16) | def __init__(self, crop_v=0.1):
method _crop_data (line 19) | def _crop_data(self, results, crop_size):
method __call__ (line 50) | def __call__(self, results):
class PanoRoll (line 58) | class PanoRoll:
method __init__ (line 59) | def __init__(self, roll=[-0.5, 0.5]):
method __call__ (line 62) | def __call__(self, results):
class RandomFlip (line 76) | class RandomFlip:
method __init__ (line 90) | def __init__(self, direction="horizontal", prob=0.5, **kwargs):
method __call__ (line 101) | def __call__(self, results):
method __repr__ (line 177) | def __repr__(self):
class Crop (line 184) | class Crop:
method __init__ (line 185) | def __init__(
method _get_crop_size (line 210) | def _get_crop_size(self, image_shape):
method _crop_data (line 231) | def _crop_data(self, results, crop_size):
method __call__ (line 258) | def __call__(self, results):
method __repr__ (line 265) | def __repr__(self):
class KittiCrop (line 272) | class KittiCrop:
method __init__ (line 273) | def __init__(self, crop_size):
method _crop_data (line 276) | def _crop_data(self, results, crop_size):
method __call__ (line 315) | def __call__(self, results):
method __repr__ (line 330) | def __repr__(self):
class RandomMasking (line 336) | class RandomMasking:
method __init__ (line 337) | def __init__(
method __call__ (line 357) | def __call__(self, results):
method _nop (line 387) | def _nop(self, results, down_size, device):
method random_sampling (line 394) | def random_sampling(self, results, mask_ratio, down_size, validity_mas...
method inverse_sampling (line 400) | def inverse_sampling(self, results, mask_ratio, down_size, validity_ma...
method guided_sampling (line 469) | def guided_sampling(self, results, mask_ratio, down_size, validity_mas...
method step (line 487) | def step(self):
method is_warmup (line 497) | def is_warmup(self):
class Rotate (line 501) | class Rotate:
method __init__ (line 502) | def __init__(
method _rotate (line 525) | def _rotate(self, results, angle, center=None, fill_val=0.0):
method __call__ (line 556) | def __call__(self, results):
class RandomColor (line 570) | class RandomColor:
method __init__ (line 571) | def __init__(self, level, prob=0.5):
method _adjust_color_img (line 576) | def _adjust_color_img(self, results, factor=1.0):
method __call__ (line 580) | def __call__(self, results):
class RandomSaturation (line 592) | class RandomSaturation:
method __init__ (line 593) | def __init__(self, level, prob=0.5):
method _adjust_saturation_img (line 598) | def _adjust_saturation_img(self, results, factor=1.0):
method __call__ (line 603) | def __call__(self, results):
class RandomSharpness (line 615) | class RandomSharpness:
method __init__ (line 616) | def __init__(self, level, prob=0.5):
method _adjust_sharpeness_img (line 621) | def _adjust_sharpeness_img(self, results, factor=1.0):
method __call__ (line 626) | def __call__(self, results):
method __repr__ (line 637) | def __repr__(self):
class RandomSolarize (line 644) | class RandomSolarize:
method __init__ (line 645) | def __init__(self, level, prob=0.5):
method _adjust_solarize_img (line 650) | def _adjust_solarize_img(self, results, factor=255.0):
method __call__ (line 654) | def __call__(self, results):
class RandomPosterize (line 666) | class RandomPosterize:
method __init__ (line 667) | def __init__(self, level, prob=0.5):
method _posterize_img (line 672) | def _posterize_img(self, results, factor=1.0):
method __call__ (line 676) | def __call__(self, results):
class RandomEqualize (line 688) | class RandomEqualize:
method __init__ (line 689) | def __init__(self, prob=0.5):
method _imequalize (line 693) | def _imequalize(self, results):
method __call__ (line 697) | def __call__(self, results):
class RandomBrightness (line 704) | class RandomBrightness:
method __init__ (line 705) | def __init__(self, level, prob=0.5):
method _adjust_brightness_img (line 710) | def _adjust_brightness_img(self, results, factor=1.0):
method __call__ (line 714) | def __call__(self, results, level=None):
class RandomContrast (line 726) | class RandomContrast:
method __init__ (line 727) | def __init__(self, level, prob=0.5):
method _adjust_contrast_img (line 732) | def _adjust_contrast_img(self, results, factor=1.0):
method __call__ (line 736) | def __call__(self, results, level=None):
class RandomGamma (line 748) | class RandomGamma:
method __init__ (line 749) | def __init__(self, level, prob=0.5):
method __call__ (line 754) | def __call__(self, results, level=None):
class RandomInvert (line 764) | class RandomInvert:
method __init__ (line 765) | def __init__(self, prob=0.5):
method __call__ (line 768) | def __call__(self, results):
class RandomAutoContrast (line 777) | class RandomAutoContrast:
method __init__ (line 778) | def __init__(self, prob=0.5):
method _autocontrast_img (line 781) | def _autocontrast_img(self, results):
method __call__ (line 786) | def __call__(self, results):
class RandomShear (line 793) | class RandomShear(object):
method __init__ (line 794) | def __init__(
method _shear_img (line 805) | def _shear_img(self, results, magnitude):
method _shear_masks (line 818) | def _shear_masks(self, results, magnitude):
method _shear_gt (line 831) | def _shear_gt(
method __call__ (line 848) | def __call__(self, results):
class RandomTranslate (line 866) | class RandomTranslate(object):
method __init__ (line 867) | def __init__(
method _translate_img (line 877) | def _translate_img(self, results, magnitude):
method _translate_mask (line 899) | def _translate_mask(self, results, magnitude):
method _translate_gt (line 913) | def _translate_gt(
method __call__ (line 931) | def __call__(self, results):
method __repr__ (line 955) | def __repr__(self):
class RandomColorJitter (line 963) | class RandomColorJitter:
method __init__ (line 964) | def __init__(self, level, prob=0.9):
method _adjust_contrast_img (line 975) | def _adjust_contrast_img(self, results, factor=1.0):
method _adjust_sharpness_img (line 982) | def _adjust_sharpness_img(self, results, factor=1.0):
method _adjust_brightness_img (line 989) | def _adjust_brightness_img(self, results, factor=1.0):
method _adjust_saturation_img (line 996) | def _adjust_saturation_img(self, results, factor=1.0):
method _adjust_color_img (line 1003) | def _adjust_color_img(self, results, factor=1.0):
method __call__ (line 1010) | def __call__(self, results):
class RandomGrayscale (line 1028) | class RandomGrayscale:
method __init__ (line 1029) | def __init__(self, prob=0.1, num_output_channels=3):
method __call__ (line 1034) | def __call__(self, results):
function masked_nearest_interpolation (line 1046) | def masked_nearest_interpolation(input, mask, target_size):
class ContextCrop (line 1131) | class ContextCrop:
method __init__ (line 1132) | def __init__(
method _transform_img (line 1156) | def _transform_img(self, results, shapes):
method _transform_masks (line 1167) | def _transform_masks(self, results, shapes):
method _transform_gt (line 1175) | def _transform_gt(self, results, shapes):
method crop (line 1182) | def crop(img, height, width, top, left) -> torch.Tensor:
method test_closest_shape (line 1195) | def test_closest_shape(self, image_shape):
method _get_crop_shapes (line 1209) | def _get_crop_shapes(self, image_shape, ctx=None):
method __call__ (line 1246) | def __call__(self, results):
class RandomFiller (line 1358) | class RandomFiller:
method __init__ (line 1359) | def __init__(self, *args, **kwargs):
method _transform (line 1362) | def _transform(self, results):
method __call__ (line 1388) | def __call__(self, results):
class GaussianBlur (line 1404) | class GaussianBlur:
method __init__ (line 1405) | def __init__(self, kernel_size, sigma=(0.1, 2.0), prob=0.9):
method apply (line 1412) | def apply(self, x, kernel):
method _create_kernel (line 1420) | def _create_kernel(self, sigma):
method __call__ (line 1434) | def __call__(self, results):
class Compose (line 1445) | class Compose:
method __init__ (line 1446) | def __init__(self, transforms):
method __call__ (line 1449) | def __call__(self, results):
method __setattr__ (line 1454) | def __setattr__(self, name: str, value) -> None:
method __repr__ (line 1459) | def __repr__(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/point_odyssey.py
class PointOdyssey (line 6) | class PointOdyssey(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/proteus.py
class Proteus (line 6) | class Proteus(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/samplers copy.py
function _get_numpy_dtype (line 13) | def _get_numpy_dtype(size: int) -> Any:
function _get_torch_dtype (line 17) | def _get_torch_dtype(size: int) -> Any:
function _generate_randperm_indices (line 21) | def _generate_randperm_indices(*, size: int, generator: torch.Generator):
function _shuffle_tensor_slice (line 38) | def _shuffle_tensor_slice(
function _new_shuffle_tensor_slice (line 63) | def _new_shuffle_tensor_slice(
function _make_seed (line 77) | def _make_seed(seed: int, start: int, iter_count: int) -> int:
class ShardedInfiniteSampler (line 82) | class ShardedInfiniteSampler(Sampler):
method __init__ (line 83) | def __init__(
method __iter__ (line 108) | def __iter__(self):
method _iterator (line 121) | def _iterator(self):
method _shuffled_iterator (line 128) | def _shuffled_iterator(self):
class DistributedSamplerNoDuplicate (line 152) | class DistributedSamplerNoDuplicate(torch.utils.data.DistributedSampler):
method __init__ (line 155) | def __init__(self, *args, **kwargs):
class DatasetFromSampler (line 164) | class DatasetFromSampler(torch.utils.data.Dataset):
method __init__ (line 171) | def __init__(self, sampler: Sampler):
method __getitem__ (line 176) | def __getitem__(self, index: int):
method __len__ (line 189) | def __len__(self) -> int:
class DistributedSamplerWrapper (line 197) | class DistributedSamplerWrapper(torch.utils.data.DistributedSampler):
method __init__ (line 212) | def __init__(
method __iter__ (line 238) | def __iter__(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/samplers.py
class DistributedSamplerNoDuplicate (line 4) | class DistributedSamplerNoDuplicate(torch.utils.data.DistributedSampler):
method __init__ (line 7) | def __init__(self, *args, **kwargs):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/scannet.py
class ScanNet (line 6) | class ScanNet(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/scannetpp.py
class ScanNetpp (line 6) | class ScanNetpp(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
class ScanNetpp_F (line 52) | class ScanNetpp_F(SequenceDataset):
method __init__ (line 62) | def __init__(
method pre_pipeline (line 94) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/sequence_dataset.py
class SequenceDataset (line 21) | class SequenceDataset(BaseDataset):
method __init__ (line 36) | def __init__(
method load_dataset (line 73) | def load_dataset(self):
method get_random_idxs (line 113) | def get_random_idxs(self, num_samples_sequence):
method get_test_idxs (line 155) | def get_test_idxs(self, num_samples_sequence, keyframe_idx):
method get_single_sequence (line 183) | def get_single_sequence(self, idx):
method preprocess (line 244) | def preprocess(self, results):
method postprocess (line 269) | def postprocess(self, results):
method __getitem__ (line 279) | def __getitem__(self, idx):
method log_load_dataset (line 291) | def log_load_dataset(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/sintel copy.py
class Sintel (line 6) | class Sintel(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/sintel.py
class Sintel (line 6) | class Sintel(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/sunrgbd.py
class SUNRGBD (line 12) | class SUNRGBD(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 50) | def load_dataset(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/synscapes.py
class Synscapes (line 6) | class Synscapes(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/tartanair.py
class TartanAir (line 6) | class TartanAir(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/taskonomy.py
class Taskonomy (line 12) | class Taskonomy(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 47) | def load_dataset(self):
method get_mapper (line 80) | def get_mapper(self):
method pre_pipeline (line 87) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/tat_rmvd.py
class TATRMVD (line 17) | class TATRMVD(SequenceDataset):
method __init__ (line 27) | def __init__(
method pre_pipeline (line 58) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/theo.py
class Theo (line 8) | class Theo(SequenceDataset):
method __init__ (line 18) | def __init__(
method preprocess (line 48) | def preprocess(self, results):
method pre_pipeline (line 61) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/unrealstereo4k.py
class UnrealStereo4K (line 6) | class UnrealStereo4K(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/urbansyn.py
class UrbanSyn (line 6) | class UrbanSyn(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/utils.py
class ConcatDataset (line 16) | class ConcatDataset(torch.utils.data.ConcatDataset):
method __init__ (line 17) | def __init__(self, datasets, shape_constraints: dict[str, list[int]] =...
method sample_shape (line 29) | def sample_shape(self):
method __getitem__ (line 51) | def __getitem__(self, idxs):
function _paddings (line 56) | def _paddings(image_shape, network_shape):
function collate_fn (line 64) | def collate_fn(in_data: List[List[Dict[str, Any]]], is_batched: bool = T...
function local_scatter (line 104) | def local_scatter(array: list[Any]):
class DatasetFromList (line 123) | class DatasetFromList(torch.utils.data.Dataset): # type: ignore
method __init__ (line 131) | def __init__(self, lst: List[Any], deepcopy: bool = False, serialize: ...
method __len__ (line 175) | def __len__(self) -> int:
method __getitem__ (line 181) | def __getitem__(self, idx: int) -> Any:
function get_weights (line 194) | def get_weights(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/utils_decode.py
function decode_depth (line 14) | def decode_depth(results, h5file, value, idx, depth_scale, name="depth",...
function decode_numpy (line 42) | def decode_numpy(results, h5file, value, idx, name="points", **kwargs):
function decode_tensor (line 54) | def decode_tensor(results, value, idx, name, **kwargs):
function decode_camera (line 61) | def decode_camera(results, value, idx, name, sample, j, **kwargs):
function decode_K (line 69) | def decode_K(results, value, idx, name, **kwargs):
function decode_mask (line 77) | def decode_mask(results, h5file, value, idx, name, **kwargs):
function decode_rgb (line 87) | def decode_rgb(results, h5file, value, idx, name="image", **kwargs):
function decode_flow (line 101) | def decode_flow(results, h5file, value, idx, name, **kwargs):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/vkitti.py
class VKITTI (line 6) | class VKITTI(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/void.py
class VOID (line 12) | class VOID(ImageDataset):
method __init__ (line 21) | def __init__(
method load_dataset (line 50) | def load_dataset(self):
method pre_pipeline (line 75) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/waymo.py
class Waymo (line 6) | class Waymo(SequenceDataset):
method __init__ (line 15) | def __init__(
method pre_pipeline (line 45) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/wildrgbd.py
class WildRGBD (line 6) | class WildRGBD(SequenceDataset):
method __init__ (line 16) | def __init__(
method pre_pipeline (line 46) | def pre_pipeline(self, results):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/activation.py
class SwiGLU (line 6) | class SwiGLU(nn.Module):
method forward (line 7) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class GEGLU (line 12) | class GEGLU(nn.Module):
method forward (line 13) | def forward(self, x: torch.Tensor) -> torch.Tensor:
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/attention.py
class SimpleAttention (line 17) | class SimpleAttention(nn.Module):
method __init__ (line 18) | def __init__(
method forward (line 39) | def forward(
class AttentionBlock (line 81) | class AttentionBlock(nn.Module):
method __init__ (line 82) | def __init__(
method attn (line 109) | def attn(
method forward (line 143) | def forward(
class AttentionLayer (line 168) | class AttentionLayer(nn.Module):
method __init__ (line 169) | def __init__(
method forward (line 200) | def forward(
class AttentionDecoderBlock (line 219) | class AttentionDecoderBlock(nn.Module):
method __init__ (line 220) | def __init__(
method cross_attn (line 253) | def cross_attn(
method self_attn (line 292) | def self_attn(
method forward (line 321) | def forward(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/convnext.py
class CvnxtBlock (line 5) | class CvnxtBlock(nn.Module):
method __init__ (line 6) | def __init__(
method forward (line 33) | def forward(self, x):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/drop_path.py
function drop_path (line 5) | def drop_path(x: torch.Tensor, drop_prob: float = 0.0, training: bool = ...
class DropPath (line 19) | class DropPath(nn.Module):
method __init__ (line 20) | def __init__(self, drop_prob=None):
method forward (line 24) | def forward(self, x):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/layer_scale.py
class LayerScale (line 5) | class LayerScale(nn.Module):
method __init__ (line 6) | def __init__(
method forward (line 16) | def forward(self, x: torch.Tensor) -> torch.Tensor:
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/mlp.py
class MLP (line 9) | class MLP(nn.Module):
method __init__ (line 10) | def __init__(
method forward (line 29) | def forward(self, x: torch.Tensor) -> torch.Tensor:
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/nystrom.py
class NystromSelfAttentionConfig (line 29) | class NystromSelfAttentionConfig(AttentionConfig):
class AvgPool (line 62) | class AvgPool(nn.Module):
method __init__ (line 63) | def __init__(self, n: int):
method forward (line 67) | def forward(self, x: torch.Tensor):
class NystromAttention (line 100) | class NystromAttention(Attention):
method __init__ (line 102) | def __init__(
method forward (line 166) | def forward(
method _triu_mask (line 286) | def _triu_mask(self, dim_1: int, dim_2: int, dim_3: int, **kwargs) -> ...
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/nystrom_attention.py
class NystromBlock (line 12) | class NystromBlock(AttentionBlock):
method __init__ (line 13) | def __init__(
method attn (line 38) | def attn(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/positional_encoding.py
class PositionEmbeddingSine (line 14) | class PositionEmbeddingSine(nn.Module):
method __init__ (line 15) | def __init__(
method forward (line 28) | def forward(
method __repr__ (line 59) | def __repr__(self, _repr_indent=4):
class LearnedSinusoidalPosEmb (line 72) | class LearnedSinusoidalPosEmb(nn.Module):
method __init__ (line 73) | def __init__(self, dim):
method forward (line 79) | def forward(self, x):
function generate_fourier_features (line 87) | def generate_fourier_features(x, max_freq=64, num_bands=16):
function broadcat (line 102) | def broadcat(tensors, dim=-1):
function rotate_half (line 121) | def rotate_half(x):
class VisionRotaryEmbedding (line 128) | class VisionRotaryEmbedding(nn.Module):
method __init__ (line 129) | def __init__(
method forward (line 171) | def forward(self, t, start_index=0):
class VisionRotaryEmbeddingFast (line 186) | class VisionRotaryEmbeddingFast(nn.Module):
method __init__ (line 187) | def __init__(
method forward (line 226) | def forward(self, t):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/upsample.py
class ConvUpsample (line 13) | class ConvUpsample(nn.Module):
method __init__ (line 14) | def __init__(
method forward (line 40) | def forward(self, x: torch.Tensor):
class ConvUpsampleShuffle (line 48) | class ConvUpsampleShuffle(nn.Module):
method __init__ (line 49) | def __init__(
method forward (line 74) | def forward(self, x: torch.Tensor):
class ConvUpsampleShuffleResidual (line 82) | class ConvUpsampleShuffleResidual(nn.Module):
method __init__ (line 83) | def __init__(
method forward (line 129) | def forward(self, x: torch.Tensor):
class ResidualConvUnit (line 137) | class ResidualConvUnit(nn.Module):
method __init__ (line 138) | def __init__(
method forward (line 173) | def forward(self, x):
class ResUpsampleBil (line 183) | class ResUpsampleBil(nn.Module):
method __init__ (line 184) | def __init__(
method forward (line 219) | def forward(self, x: torch.Tensor):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/convnext.py
function get_num_layer_for_convnext (line 14) | def get_num_layer_for_convnext(var_name):
function get_parameter_groups (line 46) | def get_parameter_groups(model, lr, wd=1e-5, ld=0.9, skip_list=None):
class Downsample (line 105) | class Downsample(nn.Module):
method __init__ (line 106) | def __init__(self, in_chs, out_chs, stride=1, dilation=1):
method forward (line 124) | def forward(self, x):
class ConvNeXtBlock (line 130) | class ConvNeXtBlock(nn.Module):
method __init__ (line 141) | def __init__(
method forward (line 208) | def forward(self, x):
class ConvNeXtStage (line 226) | class ConvNeXtStage(nn.Module):
method __init__ (line 227) | def __init__(
method forward (line 289) | def forward(self, x):
class ConvNeXt (line 301) | class ConvNeXt(nn.Module):
method __init__ (line 302) | def __init__(
method _init_weights (line 450) | def _init_weights(self, module):
method forward (line 459) | def forward(self, x, masks=None):
method group_matcher (line 474) | def group_matcher(self, coarse=False):
method set_grad_checkpointing (line 489) | def set_grad_checkpointing(self, enable=True):
method freeze (line 493) | def freeze(self) -> None:
method get_params (line 499) | def get_params(self, lr, wd, ld, *args, **kwargs):
method no_weight_decay (line 503) | def no_weight_decay(self):
method build (line 507) | def build(cls, config):
function checkpoint_filter_fn (line 512) | def checkpoint_filter_fn(state_dict, model):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/convnext2.py
function get_num_layer_for_convnext_single (line 7) | def get_num_layer_for_convnext_single(var_name, depths):
function get_num_layer_for_convnext (line 26) | def get_num_layer_for_convnext(var_name):
function get_parameter_groups (line 57) | def get_parameter_groups(model, lr, wd=1e-5, ld=0.9, skip_list=()):
class LayerNorm (line 112) | class LayerNorm(nn.Module):
method __init__ (line 119) | def __init__(self, normalized_shape, eps=1e-6, data_format="channels_l...
method forward (line 129) | def forward(self, x):
class GRN (line 142) | class GRN(nn.Module):
method __init__ (line 145) | def __init__(self, dim):
method forward (line 150) | def forward(self, x):
class Block (line 156) | class Block(nn.Module):
method __init__ (line 164) | def __init__(self, dim, drop_path=0.0, mult=4, use_checkpoint=False):
method forward (line 179) | def forward(self, x):
class ConvNeXtV2 (line 194) | class ConvNeXtV2(nn.Module):
method __init__ (line 206) | def __init__(
method _init_weights (line 260) | def _init_weights(self, m):
method forward (line 265) | def forward(self, x):
method get_params (line 275) | def get_params(self, lr, wd, ld, *args, **kwargs):
method freeze (line 279) | def freeze(self) -> None:
method build (line 286) | def build(cls, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/dinov2.py
function named_apply (line 19) | def named_apply(
function get_parameter_groups (line 38) | def get_parameter_groups(model, lr, wd=1e-5, ld=0.9, skip_list=()):
class BlockChunk (line 108) | class BlockChunk(nn.ModuleList):
method forward (line 109) | def forward(self, x):
class DinoVisionTransformer (line 115) | class DinoVisionTransformer(nn.Module):
method __init__ (line 116) | def __init__(
method init_weights (line 260) | def init_weights(self):
method interpolate_pos_encoding (line 267) | def interpolate_pos_encoding(self, x, w, h):
method prepare_tokens_with_masks (line 306) | def prepare_tokens_with_masks(self, x, masks=None):
method forward (line 324) | def forward(self, x, masks=None):
method get_params (line 349) | def get_params(self, lr, wd, ld, *args, **kwargs):
method freeze (line 353) | def freeze(self) -> None:
method train (line 359) | def train(self, mode=True):
function init_weights_vit_timm (line 380) | def init_weights_vit_timm(module: nn.Module, name: str = ""):
function vit_small (line 388) | def vit_small(patch_size=16, num_register_tokens=0, export=False, **kwar...
function vit_base (line 402) | def vit_base(patch_size=16, num_register_tokens=0, export=False, **kwargs):
function vit_large (line 416) | def vit_large(patch_size=16, num_register_tokens=0, export=False, **kwar...
function _make_dinov2_model_name (line 430) | def _make_dinov2_model_name(arch_name: str, patch_size: int) -> str:
function _make_dinov2_model (line 435) | def _make_dinov2_model(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/attention.py
class Attention (line 31) | class Attention(nn.Module):
method __init__ (line 32) | def __init__(
method forward (line 51) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class MemEffAttention (line 65) | class MemEffAttention(Attention):
method forward (line 66) | def forward(self, x: torch.Tensor, attn_bias=None) -> torch.Tensor:
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/block.py
class Block (line 34) | class Block(nn.Module):
method __init__ (line 35) | def __init__(
method forward (line 84) | def forward(self, x: torch.Tensor) -> torch.Tensor:
function drop_add_residual_stochastic_depth (line 112) | def drop_add_residual_stochastic_depth(
function get_branges_scales (line 138) | def get_branges_scales(x, sample_drop_ratio=0.0):
function add_residual (line 146) | def add_residual(x, brange, residual, residual_scale_factor, scaling_vec...
function get_attn_bias_and_cat (line 167) | def get_attn_bias_and_cat(x_list, branges=None):
function drop_add_residual_stochastic_depth_list (line 197) | def drop_add_residual_stochastic_depth_list(
class NestedTensorBlock (line 228) | class NestedTensorBlock(Block):
method forward_nested (line 229) | def forward_nested(self, x_list: List[torch.Tensor]) -> List[torch.Ten...
method forward (line 273) | def forward(self, x_or_x_list):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/dino_head.py
class DINOHead (line 13) | class DINOHead(nn.Module):
method __init__ (line 14) | def __init__(
method _init_weights (line 38) | def _init_weights(self, m):
method forward (line 44) | def forward(self, x):
function _build_mlp (line 52) | def _build_mlp(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/drop_path.py
function drop_path (line 15) | def drop_path(x, drop_prob: float = 0.0, training: bool = False):
class DropPath (line 29) | class DropPath(nn.Module):
method __init__ (line 32) | def __init__(self, drop_prob=None):
method forward (line 36) | def forward(self, x):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/layer_scale.py
class LayerScale (line 16) | class LayerScale(nn.Module):
method __init__ (line 17) | def __init__(
method forward (line 27) | def forward(self, x: Tensor) -> Tensor:
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/mlp.py
class Mlp (line 17) | class Mlp(nn.Module):
method __init__ (line 18) | def __init__(
method forward (line 35) | def forward(self, x: Tensor) -> Tensor:
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/patch_embed.py
function make_2tuple (line 17) | def make_2tuple(x):
class PatchEmbed (line 26) | class PatchEmbed(nn.Module):
method __init__ (line 38) | def __init__(
method forward (line 71) | def forward(self, x: Tensor) -> Tensor:
method flops (line 90) | def flops(self) -> float:
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/swiglu_ffn.py
class SwiGLUFFN (line 13) | class SwiGLUFFN(nn.Module):
method __init__ (line 14) | def __init__(
method forward (line 29) | def forward(self, x: Tensor) -> Tensor:
class SwiGLUFFNFused (line 45) | class SwiGLUFFNFused(SwiGLU):
method __init__ (line 46) | def __init__(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/encoder.py
class ModelWrap (line 7) | class ModelWrap(nn.Module):
method __init__ (line 8) | def __init__(self, model) -> None:
method forward (line 12) | def forward(self, x, *args, **kwargs):
function convnextv2_base (line 20) | def convnextv2_base(config, **kwargs):
function convnextv2_large (line 37) | def convnextv2_large(config, **kwargs):
function convnextv2_large_mae (line 54) | def convnextv2_large_mae(config, **kwargs):
function convnextv2_huge (line 71) | def convnextv2_huge(config, **kwargs):
function convnextv2_huge_mae (line 88) | def convnextv2_huge_mae(config, **kwargs):
function convnext_large_pt (line 105) | def convnext_large_pt(config, **kwargs):
function convnext_large (line 127) | def convnext_large(config, **kwargs):
function dinov2_vits14 (line 139) | def dinov2_vits14(config, pretrained: bool = True, **kwargs):
function dinov2_vitb14 (line 158) | def dinov2_vitb14(config, pretrained: bool = True, **kwargs):
function dinov2_vitl14 (line 177) | def dinov2_vitl14(config, pretrained: str = "", **kwargs):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv1/decoder.py
class ListAdapter (line 21) | class ListAdapter(nn.Module):
method __init__ (line 22) | def __init__(self, input_dims: List[int], hidden_dim: int):
method forward (line 33) | def forward(self, x: torch.Tensor, splits: torch.Tensor) -> torch.Tensor:
class CameraHead (line 39) | class CameraHead(nn.Module):
method __init__ (line 40) | def __init__(
method forward (line 82) | def forward(self, features, cls_tokens, pos_embed) -> torch.Tensor:
method set_shapes (line 106) | def set_shapes(self, shapes: Tuple[int, int]):
class DepthHead (line 110) | class DepthHead(nn.Module):
method __init__ (line 111) | def __init__(
method set_original_shapes (line 189) | def set_original_shapes(self, shapes: Tuple[int, int]):
method set_shapes (line 192) | def set_shapes(self, shapes: Tuple[int, int]):
method forward (line 195) | def forward(
class Decoder (line 303) | class Decoder(nn.Module):
method __init__ (line 304) | def __init__(
method _init_weights (line 316) | def _init_weights(self, m):
method get_adapted_features (line 329) | def get_adapted_features(self, features_flat, splits):
method run_camera (line 337) | def run_camera(self, cls_tokens, features, pos_embed, original_shapes,...
method forward (line 364) | def forward(self, inputs, image_metas) -> torch.Tensor:
method no_weight_decay_keywords (line 466) | def no_weight_decay_keywords(self):
method build (line 469) | def build(self, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv1/unidepthv1.py
function _paddings (line 30) | def _paddings(image_shape, network_shape):
function _shapes (line 38) | def _shapes(image_shape, network_shape):
function _preprocess (line 49) | def _preprocess(rgbs, intrinsics, shapes, pads, ratio, output_shapes):
function _postprocess (line 65) | def _postprocess(predictions, intrinsics, shapes, pads, ratio, original_...
class UniDepthV1 (line 97) | class UniDepthV1(
method __init__ (line 104) | def __init__(
method forward_train (line 116) | def forward_train(self, inputs, image_metas):
method forward_test (line 122) | def forward_test(self, inputs, image_metas):
method forward (line 150) | def forward(self, inputs, image_metas):
method encode_decode (line 156) | def encode_decode(self, inputs, image_metas):
method compute_losses (line 235) | def compute_losses(self, outputs, inputs, image_metas):
method infer (line 288) | def infer(self, rgbs: torch.Tensor, intrinsics=None, skip_camera=False):
method load_pretrained (line 375) | def load_pretrained(self, model_file):
method get_params (line 394) | def get_params(self, config):
method device (line 413) | def device(self):
method build (line 416) | def build(self, config):
method build_losses (line 445) | def build_losses(self, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/decoder.py
function orthonormal_init (line 19) | def orthonormal_init(num_tokens, dims):
class ListAdapter (line 35) | class ListAdapter(nn.Module):
method __init__ (line 36) | def __init__(self, input_dims: list[int], hidden_dim: int):
method forward (line 43) | def forward(self, xs: torch.Tensor) -> list[torch.Tensor]:
class CameraHead (line 48) | class CameraHead(nn.Module):
method __init__ (line 49) | def __init__(
method fill_intrinsics (line 85) | def fill_intrinsics(self, x):
method forward (line 101) | def forward(self, features, cls_tokens, pos_embed) -> torch.Tensor:
method set_shapes (line 113) | def set_shapes(self, shapes: tuple[int, int]):
class DepthHead (line 117) | class DepthHead(nn.Module):
method __init__ (line 118) | def __init__(
method set_original_shapes (line 228) | def set_original_shapes(self, shapes: tuple[int, int]):
method set_shapes (line 231) | def set_shapes(self, shapes: tuple[int, int]):
method embed_rays (line 234) | def embed_rays(self, rays):
method condition (line 255) | def condition(self, feat, rays_embeddings):
method process (line 262) | def process(self, features_list, rays_embeddings):
method depth_proj (line 284) | def depth_proj(self, out_features):
method confidence_proj (line 305) | def confidence_proj(self, out_features):
method decode (line 315) | def decode(self, out_features):
method forward (line 320) | def forward(
class Decoder (line 336) | class Decoder(nn.Module):
method __init__ (line 337) | def __init__(
method _init_weights (line 346) | def _init_weights(self, m):
method run_camera (line 361) | def run_camera(self, cls_tokens, features, pos_embed, original_shapes,...
method forward (line 405) | def forward(
method no_weight_decay_keywords (line 465) | def no_weight_decay_keywords(self):
method build (line 468) | def build(self, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/decoder_old.py
class ListAdapter (line 13) | class ListAdapter(nn.Module):
method __init__ (line 14) | def __init__(self, input_dims: list[int], hidden_dim: int):
method forward (line 26) | def forward(self, x: torch.Tensor, splits: torch.Tensor) -> torch.Tensor:
class CameraHead (line 32) | class CameraHead(nn.Module):
method __init__ (line 33) | def __init__(
method fill_intrinsics (line 55) | def fill_intrinsics(self, x):
method forward (line 66) | def forward(self, features, cls_tokens, pos_embed) -> torch.Tensor:
method set_shapes (line 85) | def set_shapes(self, shapes: tuple[int, int]):
class GlobalHead (line 89) | class GlobalHead(nn.Module):
method __init__ (line 90) | def __init__(
method embed_rays (line 111) | def embed_rays(self, rays, shapes):
method set_original_shapes (line 123) | def set_original_shapes(self, shapes: tuple[int, int]):
method set_shapes (line 126) | def set_shapes(self, shapes: tuple[int, int]):
method get_scaleshift (line 129) | def get_scaleshift(self, x):
method forward (line 135) | def forward(self, features, cls_tokens, rays) -> torch.Tensor:
class DepthHead (line 149) | class DepthHead(nn.Module):
method __init__ (line 150) | def __init__(
method set_original_shapes (line 231) | def set_original_shapes(self, shapes: tuple[int, int]):
method set_shapes (line 234) | def set_shapes(self, shapes: tuple[int, int]):
method embed_rays (line 237) | def embed_rays(self, rays, shapes):
method project_rays (line 249) | def project_rays(self, rays, shapes):
method decode_depth (line 257) | def decode_depth(self, latents_16, rays, shapes):
method init_latents (line 308) | def init_latents(self, features, shapes):
method forward (line 317) | def forward(
class Decoder (line 346) | class Decoder(nn.Module):
method __init__ (line 347) | def __init__(
method _init_weights (line 355) | def _init_weights(self, m):
method get_adapted_features (line 370) | def get_adapted_features(self, features_flat, splits):
method run_camera (line 378) | def run_camera(self, cls_tokens, features, pos_embed, original_shapes,...
method run_global (line 408) | def run_global(self, cls_tokens, features, rays):
method forward (line 428) | def forward(self, inputs, image_metas) -> torch.Tensor:
method no_weight_decay_keywords (line 525) | def no_weight_decay_keywords(self):
method build (line 528) | def build(self, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/export.py
class UniDepthV2ONNX (line 18) | class UniDepthV2ONNX(UniDepthV2):
method __init__ (line 19) | def __init__(
method forward (line 27) | def forward(self, rgbs):
class UniDepthV2ONNXcam (line 48) | class UniDepthV2ONNXcam(UniDepthV2):
method __init__ (line 49) | def __init__(
method forward (line 57) | def forward(self, rgbs, rays):
function export (line 80) | def export(model, path, shape=(462, 630), with_camera=False):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/unidepthv2.py
function get_paddings (line 36) | def get_paddings(original_shape, aspect_ratio_range):
function get_resize_factor (line 61) | def get_resize_factor(original_shape, pixels_range, shape_multiplier=14):
function _postprocess (line 80) | def _postprocess(tensor, shapes, paddings, interpolation_mode="bilinear"):
function _postprocess_intrinsics (line 92) | def _postprocess_intrinsics(K, resize_factors, paddings):
class UniDepthV2 (line 111) | class UniDepthV2(
method __init__ (line 118) | def __init__(
method forward_train (line 129) | def forward_train(self, inputs, image_metas):
method forward_test (line 134) | def forward_test(self, inputs, image_metas):
method forward (line 162) | def forward(self, inputs, image_metas):
method compute_losses (line 168) | def compute_losses(self, outputs, inputs, image_metas):
method infer (line 241) | def infer(
method encode_decode (line 341) | def encode_decode(self, inputs, image_metas=[]):
method load_pretrained (line 381) | def load_pretrained(self, model_file):
method get_params (line 396) | def get_params(self, config):
method device (line 415) | def device(self):
method build (line 418) | def build(self, config):
method build_losses (line 462) | def build_losses(self, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/unidepthv2_old.py
function _check_ratio (line 33) | def _check_ratio(image_ratio, ratio_bounds):
function _check_resolution (line 45) | def _check_resolution(shape_constraints, resolution_level):
function _get_closes_num_pixels (line 70) | def _get_closes_num_pixels(image_shape, pixels_bounds):
function _shapes (line 78) | def _shapes(image_shape, shape_constraints):
function _preprocess (line 95) | def _preprocess(rgbs, intrinsics, shapes, ratio):
function _postprocess (line 107) | def _postprocess(outs, ratio, original_shapes, mode="nearest-exact"):
class UniDepthV2old (line 119) | class UniDepthV2old(
method __init__ (line 126) | def __init__(
method forward (line 134) | def forward(self, inputs, image_metas):
method infer (line 198) | def infer(self, rgbs: torch.Tensor, intrinsics=None):
method load_pretrained (line 284) | def load_pretrained(self, model_file):
method device (line 303) | def device(self):
method build (line 306) | def build(self, config):
method build_losses (line 351) | def build_losses(self, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/functions/extract_patches.py
class ExtractPatchesFunction (line 6) | class ExtractPatchesFunction(Function):
method forward (line 8) | def forward(ctx, input, centers, h, w):
method backward (line 15) | def backward(ctx, grad_output):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/modules/patch_extractor.py
class RandomPatchExtractor (line 10) | class RandomPatchExtractor(nn.Module):
method __init__ (line 11) | def __init__(
method forward (line 16) | def forward(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/setup.py
function get_extensions (line 11) | def get_extensions():
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/src/cpu/extract_patches_cpu.cpp
function extract_patches_cpu_forward (line 6) | torch::Tensor extract_patches_cpu_forward(
function extract_patches_cpu_backward (line 15) | std::vector<at::Tensor> extract_patches_cpu_backward(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/src/extract_patches.cpp
function PYBIND11_MODULE (line 4) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/test.py
function extract_patches (line 5) | def extract_patches(input, centers, patch_size):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/functions/knn.py
class _knn_points (line 20) | class _knn_points(Function):
method forward (line 27) | def forward(
method backward (line 97) | def backward(ctx, grad_dists, grad_idx):
function knn_points (line 113) | def knn_points(
function knn_gather (line 199) | def knn_gather(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/setup.py
function get_extensions (line 11) | def get_extensions():
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/src/knn_cpu.cpp
function KNearestNeighborIdxCpu (line 13) | std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCpu(
function KNearestNeighborBackwardCpu (line 75) | std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackwardCpu(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/src/knn_ext.cpp
function PYBIND11_MODULE (line 4) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/losses/arel.py
class ARel (line 7) | class ARel(nn.Module):
method __init__ (line 8) | def __init__(
method forward (line 24) | def forward(
method build (line 40) | def build(cls, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/losses/confidence.py
class Confidence (line 7) | class Confidence(nn.Module):
method __init__ (line 8) | def __init__(
method forward (line 25) | def forward(
method build (line 56) | def build(cls, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/losses/distill.py
class SelfDistill (line 9) | class SelfDistill(nn.Module):
method __init__ (line 10) | def __init__(self, weight: float, output_fn: str = "sqrt", eps: float ...
method forward (line 19) | def forward(
method loss (line 121) | def loss(
method build (line 133) | def build(cls, config):
class TeacherDistill (line 141) | class TeacherDistill(nn.Module):
method __init__ (line 142) | def __init__(
method forward (line 161) | def forward(
method build (line 215) | def build(cls, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/losses/dummy.py
class Dummy (line 5) | class Dummy(nn.Module):
method __init__ (line 6) | def __init__(self, *args, **kwargs):
method forward (line 11) | def forward(self, dummy: torch.Tensor, *args, **kwargs) -> torch.Tensor:
method build (line 15) | def build(cls, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/losses/local_ssi.py
function sample_strong_edges (line 11) | def sample_strong_edges(edges_img, quantile=0.95, reshape=8):
function extract_patches (line 44) | def extract_patches(tensor, sample_coords, patch_size: tuple[int, int] =...
class LocalSSI (line 81) | class LocalSSI(nn.Module):
method __init__ (line 82) | def __init__(
method forward (line 111) | def forward(
method build (line 200) | def build(cls, config):
class EdgeGuidedLocalSSI (line 212) | class EdgeGuidedLocalSSI(nn.Module):
method __init__ (line 213) | def __init__(
method get_edge (line 254) | def get_edge(self, image, mask):
method compute_sample_patch_error (line 273) | def compute_sample_patch_error(
method compute_image_error (line 298) | def compute_image_error(self, input, target, mask, image_size):
method forward (line 311) | def forward(
method build (line 354) | def build(cls, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/losses/regression.py
class Regression (line 7) | class Regression(nn.Module):
method __init__ (line 8) | def __init__(
method forward (line 32) | def forward(
method build (line 52) | def build(cls, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/losses/silog.py
class SILog (line 8) | class SILog(nn.Module):
method __init__ (line 9) | def __init__(
method forward (line 29) | def forward(
method build (line 53) | def build(cls, config):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/losses/utils.py
function masked_mean_var (line 30) | def masked_mean_var(
function masked_mean (line 49) | def masked_mean(data: torch.Tensor, mask: torch.Tensor | None, dim: List...
function masked_quantile (line 60) | def masked_quantile(
function masked_median (line 121) | def masked_median(data: torch.Tensor, mask: torch.Tensor, dim: List[int]):
function masked_median_mad (line 129) | def masked_median_mad(data: torch.Tensor, mask: torch.Tensor, dim: List[...
function masked_weighted_mean_var (line 138) | def masked_weighted_mean_var(
function ssi (line 161) | def ssi(
function ind2sub (line 190) | def ind2sub(idx, cols):
function sub2ind (line 196) | def sub2ind(r, c, cols):
function l2 (line 201) | def l2(input_tensor: torch.Tensor, gamma: float = 1.0, *args, **kwargs) ...
function l1 (line 205) | def l1(input_tensor: torch.Tensor, gamma: float = 1.0, *args, **kwargs) ...
function charbonnier (line 209) | def charbonnier(
function cauchy (line 215) | def cauchy(
function geman_mcclure (line 221) | def geman_mcclure(
function robust_loss (line 227) | def robust_loss(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/scheduler.py
class PlainCosineScheduler (line 6) | class PlainCosineScheduler(object):
method __init__ (line 7) | def __init__(
method get_scheduler (line 31) | def get_scheduler(self):
method step (line 50) | def step(self):
method __getitem__ (line 56) | def __getitem__(self, it):
class CosineScheduler (line 61) | class CosineScheduler(object):
method __init__ (line 62) | def __init__(
method get_schedulers (line 88) | def get_schedulers(self, group):
method step (line 109) | def step(self):
method __getitem__ (line 117) | def __getitem__(self, it):
method get (line 121) | def get(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/camera.py
function invert_pinhole (line 16) | def invert_pinhole(K):
class Camera (line 30) | class Camera:
method __init__ (line 37) | def __init__(self, params=None, K=None):
method project (line 57) | def project(self, xyz):
method unproject (line 60) | def unproject(self, uv):
method get_projection_mask (line 63) | def get_projection_mask(self):
method get_overlap_mask (line 66) | def get_overlap_mask(self):
method reconstruct (line 69) | def reconstruct(self, depth):
method resize (line 78) | def resize(self, factor):
method to (line 83) | def to(self, device, non_blocking=False):
method get_rays (line 88) | def get_rays(self, shapes, noisy=False):
method get_pinhole_rays (line 94) | def get_pinhole_rays(self, shapes, noisy=False):
method flip (line 100) | def flip(self, H, W, direction="horizontal"):
method clone (line 112) | def clone(self):
method crop (line 115) | def crop(self, left, top, right=None, bottom=None):
method get_new_fov (line 123) | def get_new_fov(self, new_shape, original_shape):
method mask_overlap_projection (line 132) | def mask_overlap_projection(self, projected):
method _pad_params (line 156) | def _pad_params(self):
method flatten_cameras (line 169) | def flatten_cameras(cameras): # -> list[Camera]:
method _stack_or_cat_cameras (line 182) | def _stack_or_cat_cameras(cameras, func, **kwargs):
method __torch_function__ (line 196) | def __torch_function__(cls, func, types, args=(), kwargs=None):
method device (line 212) | def device(self):
method hfov (line 217) | def hfov(self):
method vfov (line 221) | def vfov(self):
method max_fov (line 225) | def max_fov(self):
class Pinhole (line 229) | class Pinhole(Camera):
method __init__ (line 230) | def __init__(self, params=None, K=None):
method project (line 239) | def project(self, pcd):
method unproject (line 255) | def unproject(self, uv):
method reconstruct (line 269) | def reconstruct(self, depth):
class EUCM (line 276) | class EUCM(Camera):
method __init__ (line 277) | def __init__(self, params):
method project (line 281) | def project(self, xyz):
method unproject (line 307) | def unproject(self, uv):
class Spherical (line 331) | class Spherical(Camera):
method __init__ (line 332) | def __init__(self, params):
method resize (line 336) | def resize(self, factor):
method crop (line 341) | def crop(self, left, top, right, bottom):
method project (line 359) | def project(self, xyz):
method unproject (line 371) | def unproject(self, uv):
method reconstruct (line 388) | def reconstruct(self, depth):
method get_new_fov (line 394) | def get_new_fov(self, new_shape, original_shape):
method hfov (line 400) | def hfov(self):
method vfov (line 404) | def vfov(self):
method max_fov (line 408) | def max_fov(self):
class OPENCV (line 412) | class OPENCV(Camera):
method __init__ (line 413) | def __init__(self, params):
method project (line 423) | def project(self, xyz):
method unproject (line 496) | def unproject(self, uv, max_iters: int = 10):
class Fisheye624 (line 697) | class Fisheye624(Camera):
method __init__ (line 698) | def __init__(self, params):
method project (line 705) | def project(self, xyz):
method unproject (line 778) | def unproject(self, uv, max_iters: int = 10):
class MEI (line 977) | class MEI(Camera):
method __init__ (line 978) | def __init__(self, params):
method unproject (line 985) | def unproject(self, uv, max_iters: int = 20):
method project (line 1084) | def project(self, xyz):
class BatchCamera (line 1145) | class BatchCamera(Camera):
method __init__ (line 1151) | def __init__(self, params, K, original_class, cameras):
method project (line 1158) | def project(self, points_3d):
method unproject (line 1167) | def unproject(self, points_2d):
method crop (line 1173) | def crop(self, left, top, right=None, bottom=None):
method resize (line 1182) | def resize(self, ratio):
method reconstruct (line 1186) | def reconstruct(self, depth):
method get_projection_mask (line 1195) | def get_projection_mask(self):
method to (line 1200) | def to(self, device, non_blocking=False):
method reshape (line 1207) | def reshape(self, *shape):
method get_new_fov (line 1223) | def get_new_fov(self, new_shape, original_shape):
method squeeze (line 1229) | def squeeze(self, dim):
method __getitem__ (line 1237) | def __getitem__(self, idx):
method __setitem__ (line 1251) | def __setitem__(self, idx, value):
method __len__ (line 1271) | def __len__(self):
method from_camera (line 1275) | def from_camera(cls, camera):
method is_perspective (line 1279) | def is_perspective(self):
method is_spherical (line 1283) | def is_spherical(self):
method is_eucm (line 1287) | def is_eucm(self):
method is_fisheye (line 1291) | def is_fisheye(self):
method is_pinhole (line 1295) | def is_pinhole(self):
method hfov (line 1299) | def hfov(self):
method vfov (line 1303) | def vfov(self):
method max_fov (line 1307) | def max_fov(self):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/chamfer_distance.py
function _validate_chamfer_reduction_inputs (line 16) | def _validate_chamfer_reduction_inputs(
function _handle_pointcloud_input (line 33) | def _handle_pointcloud_input(
class ChamferDistance (line 59) | class ChamferDistance(torch.nn.Module):
method forward (line 60) | def forward(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/coordinate.py
function coords_grid (line 4) | def coords_grid(b, h, w, homogeneous=False, device=None, noisy=False):
function normalize_coords (line 23) | def normalize_coords(coords, h, w):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/distributed.py
function is_dist_avail_and_initialized (line 16) | def is_dist_avail_and_initialized():
function get_rank (line 24) | def get_rank():
function get_local_rank (line 30) | def get_local_rank() -> int:
function get_local_size (line 41) | def get_local_size() -> int:
function get_world_size (line 53) | def get_world_size():
function barrier (line 59) | def barrier():
function is_main_process (line 65) | def is_main_process():
function is_rank_zero (line 69) | def is_rank_zero(args):
function get_dist_info (line 73) | def get_dist_info():
function setup_multi_processes (line 83) | def setup_multi_processes(cfg):
function setup_slurm (line 126) | def setup_slurm(backend: str, port: str) -> None:
function sync_tensor_across_gpus (line 153) | def sync_tensor_across_gpus(t, dim=0, cat=True):
function sync_string_across_gpus (line 179) | def sync_string_across_gpus(keys: list[str], device, dim=0):
function create_local_process_group (line 195) | def create_local_process_group() -> None:
function _get_global_gloo_group (line 211) | def _get_global_gloo_group():
function all_gather (line 218) | def all_gather(data, group=None):
function local_broadcast_process_authkey (line 234) | def local_broadcast_process_authkey():
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/ema_torch.py
class DummyExponentialMovingAverage (line 17) | class DummyExponentialMovingAverage:
method __init__ (line 18) | def __init__(self, *args, **kwargs):
method _get_parameters (line 21) | def _get_parameters(self, *args, **kwargs):
method get_current_decay (line 24) | def get_current_decay(self, *args, **kwargs):
method update (line 27) | def update(self, *args, **kwargs):
method copy_to (line 30) | def copy_to(self, *args, **kwargs):
method store (line 33) | def store(self, *args, **kwargs):
method restore (line 36) | def restore(self, *args, **kwargs):
method average_parameters (line 40) | def average_parameters(self, *args, **kwargs):
method to (line 46) | def to(self, *args, **kwargs):
method state_dict (line 49) | def state_dict(self, *args, **kwargs):
method load_state_dict (line 52) | def load_state_dict(self, *args, **kwargs):
class ExponentialMovingAverage (line 56) | class ExponentialMovingAverage:
method __init__ (line 84) | def __init__(
method _get_parameters (line 110) | def _get_parameters(
method get_current_decay (line 131) | def get_current_decay(self):
method update (line 138) | def update(self, parameters: Optional[Iterable[torch.nn.Parameter]] = ...
method copy_to (line 164) | def copy_to(
method store (line 180) | def store(self, parameters: Optional[Iterable[torch.nn.Parameter]] = N...
method restore (line 192) | def restore(
method average_parameters (line 218) | def average_parameters(
method to (line 248) | def to(self, device=None, dtype=None) -> None:
method state_dict (line 274) | def state_dict(self) -> dict:
method load_state_dict (line 286) | def load_state_dict(self, state_dict: dict) -> None:
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/evaluation_depth.py
function chamfer_dist (line 12) | def chamfer_dist(tensor1, tensor2):
function auc (line 21) | def auc(tensor1, tensor2, thresholds):
function delta (line 37) | def delta(tensor1, tensor2, exponent):
function tau (line 42) | def tau(tensor1, tensor2, perc):
function ssi (line 47) | def ssi(tensor1, tensor2):
function si (line 59) | def si(tensor1, tensor2):
function arel (line 63) | def arel(tensor1, tensor2):
function d_auc (line 68) | def d_auc(tensor1, tensor2):
function f1_score (line 74) | def f1_score(tensor1, tensor2, thresholds):
function eval_depth (line 132) | def eval_depth(
function eval_3d (line 150) | def eval_3d(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/geometric.py
function generate_rays (line 13) | def generate_rays(
function spherical_zbuffer_to_euclidean (line 57) | def spherical_zbuffer_to_euclidean(spherical_tensor: torch.Tensor) -> to...
function spherical_to_euclidean (line 77) | def spherical_to_euclidean(spherical_tensor: torch.Tensor) -> torch.Tensor:
function euclidean_to_spherical (line 93) | def euclidean_to_spherical(spherical_tensor: torch.Tensor) -> torch.Tensor:
function euclidean_to_spherical_zbuffer (line 109) | def euclidean_to_spherical_zbuffer(euclidean_tensor: torch.Tensor) -> to...
function unproject_points (line 118) | def unproject_points(
function project_points (line 161) | def project_points(
function downsample (line 208) | def downsample(data: torch.Tensor, downsample_factor: int = 2):
function flat_interpolate (line 228) | def flat_interpolate(
function dilate (line 256) | def dilate(image, kernel_size: int | tuple[int, int]):
function erode (line 272) | def erode(image, kernel_size: int | tuple[int, int]):
function iou (line 288) | def iou(mask1: torch.Tensor, mask2: torch.Tensor) -> torch.Tensor:
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/misc.py
function max_stack (line 18) | def max_stack(tensors: list[torch.Tensor]) -> torch.Tensor:
function last_stack (line 24) | def last_stack(tensors: list[torch.Tensor]) -> torch.Tensor:
function first_stack (line 28) | def first_stack(tensors: list[torch.Tensor]) -> torch.Tensor:
function softmax_stack (line 33) | def softmax_stack(
function mean_stack (line 42) | def mean_stack(tensors: list[torch.Tensor]) -> torch.Tensor:
function sum_stack (line 49) | def sum_stack(tensors: list[torch.Tensor]) -> torch.Tensor:
function convert_module_to_f16 (line 55) | def convert_module_to_f16(l):
function convert_module_to_f32 (line 65) | def convert_module_to_f32(l):
function format_seconds (line 75) | def format_seconds(seconds):
function get_params (line 81) | def get_params(module, lr, wd):
function get_num_layer_for_swin (line 128) | def get_num_layer_for_swin(var_name, num_max_layer, layers_per_stage):
function get_params_layerdecayswin (line 146) | def get_params_layerdecayswin(module, lr, wd, ld):
function log (line 173) | def log(t, eps: float = 1e-5):
function l2norm (line 177) | def l2norm(t):
function exists (line 181) | def exists(val):
function identity (line 185) | def identity(t, *args, **kwargs):
function divisible_by (line 189) | def divisible_by(numer, denom):
function first (line 193) | def first(arr, d=None):
function default (line 199) | def default(val, d):
function maybe (line 205) | def maybe(fn):
function once (line 215) | def once(fn):
function _many (line 229) | def _many(fn):
function load_pretrained (line 242) | def load_pretrained(state_dict, checkpoint):
function load_checkpoint_swin (line 257) | def load_checkpoint_swin(model, checkpoint_model):
function add_padding_metas (line 359) | def add_padding_metas(out, image_metas):
function remove_padding (line 369) | def remove_padding(out, paddings):
function remove_padding_metas (line 378) | def remove_padding_metas(out, image_metas):
function ssi_helper (line 388) | def ssi_helper(tensor1, tensor2):
function calculate_mean_values (line 398) | def calculate_mean_values(names, values):
function remove_leading_dim (line 416) | def remove_leading_dim(infos):
function recursive_index (line 425) | def recursive_index(infos, index):
function to_cpu (line 434) | def to_cpu(infos):
function recursive_to (line 443) | def recursive_to(infos, device, non_blocking, cls):
function masked_mean (line 454) | def masked_mean(
class ProfileMethod (line 471) | class ProfileMethod:
method __init__ (line 472) | def __init__(self, model, func_name, track_statistics=True, verbose=Fa...
method __enter__ (line 479) | def __enter__(self):
method __exit__ (line 487) | def __exit__(self, exc_type, exc_val, exc_tb):
function profile_method (line 519) | def profile_method(track_statistics=True, verbose=False):
class ProfileFunction (line 531) | class ProfileFunction:
method __init__ (line 532) | def __init__(self, func_name, track_statistics=True, verbose=False):
method __enter__ (line 538) | def __enter__(self):
method __exit__ (line 546) | def __exit__(self, exc_type, exc_val, exc_tb):
function profile_function (line 574) | def profile_function(track_statistics=True, verbose=False):
function squeeze_list (line 586) | def squeeze_list(nested_list, dim, current_dim=0):
function match_gt (line 596) | def match_gt(tensor1, tensor2, padding1, padding2, mode: str = "bilinear"):
function match_intrinsics (line 645) | def match_intrinsics(K1, tensor1, tensor2, padding1, padding2):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/positional_embedding.py
class PositionEmbeddingSine (line 14) | class PositionEmbeddingSine(nn.Module):
method __init__ (line 15) | def __init__(
method forward (line 28) | def forward(
method __repr__ (line 59) | def __repr__(self, _repr_indent=4):
class LearnedSinusoidalPosEmb (line 72) | class LearnedSinusoidalPosEmb(nn.Module):
method __init__ (line 73) | def __init__(self, dim):
method forward (line 79) | def forward(self, x):
function broadcat (line 87) | def broadcat(tensors, dim=-1):
function rotate_half (line 106) | def rotate_half(x):
class VisionRotaryEmbedding (line 113) | class VisionRotaryEmbedding(nn.Module):
method __init__ (line 114) | def __init__(
method forward (line 156) | def forward(self, t, start_index=0):
class VisionRotaryEmbeddingFast (line 171) | class VisionRotaryEmbeddingFast(nn.Module):
method __init__ (line 172) | def __init__(
method forward (line 211) | def forward(self, t):
function generate_fourier_features (line 218) | def generate_fourier_features(
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/sht.py
function rsh_cart_0 (line 11) | def rsh_cart_0(xyz: torch.Tensor):
function rsh_cart_1 (line 36) | def rsh_cart_1(xyz: torch.Tensor):
function rsh_cart_2 (line 67) | def rsh_cart_2(xyz: torch.Tensor):
function rsh_cart_3 (line 110) | def rsh_cart_3(xyz: torch.Tensor):
function rsh_cart_4 (line 160) | def rsh_cart_4(xyz: torch.Tensor):
function rsh_cart_5 (line 228) | def rsh_cart_5(xyz: torch.Tensor):
function rsh_cart_6 (line 329) | def rsh_cart_6(xyz: torch.Tensor):
function rsh_cart_7 (line 509) | def rsh_cart_7(xyz: torch.Tensor):
function rsh_cart_8 (line 833) | def rsh_cart_8(xyz: torch.Tensor):
class SphHarm (line 1414) | class SphHarm(torch.nn.Module):
method __init__ (line 1415) | def __init__(self, m, n, dtype=torch.float32) -> None:
method device (line 1437) | def device(self):
method forward (line 1440) | def forward(self, points: torch.Tensor) -> torch.Tensor:
method _gen_recurrence_mask (line 1475) | def _gen_recurrence_mask(self) -> tuple[torch.Tensor, torch.Tensor]:
method _recursive (line 1528) | def _recursive(self, i: int, p_val: torch.Tensor, x: torch.Tensor) -> ...
method _init_legendre (line 1539) | def _init_legendre(self):
method _gen_associated_legendre (line 1558) | def _gen_associated_legendre(self, x: torch.Tensor) -> torch.Tensor:
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/validation.py
function original_image (line 15) | def original_image(batch, preds=None):
function log_metrics (line 52) | def log_metrics(metrics_all, step):
function validate (line 64) | def validate(model, test_loaders, step, context):
FILE: camera_pose_annotation/depth_estimation/UniDepth/unidepth/utils/visualization.py
function colorize (line 17) | def colorize(
function image_grid (line 39) | def image_grid(imgs: list[np.ndarray], rows: int, cols: int) -> np.ndarray:
function get_pointcloud_from_rgbd (line 57) | def get_pointcloud_from_rgbd(
function save_file_ply (line 107) | def save_file_ply(xyz, rgb, pc_file):
function log_train_artifacts (line 136) | def log_train_artifacts(rgbs, gts, preds, ds_name, step, infos={}):
FILE: camera_pose_annotation/dynamic_mask/inference_batch.py
function compress (line 26) | def compress(dyn_masks, save_path=None):
function segment_sky (line 41) | def segment_sky(image):
function predict_mask (line 77) | def predict_mask(predictor, row, args, device):
function worker (line 185) | def worker(task_queue, progress_queue, args, id):
function parse_args (line 213) | def parse_args():
function main (line 242) | def main():
FILE: camera_pose_annotation/dynamic_mask/sam2/automatic_mask_generator.py
class SAM2AutomaticMaskGenerator (line 36) | class SAM2AutomaticMaskGenerator:
method __init__ (line 37) | def __init__(
method from_pretrained (line 153) | def from_pretrained(cls, model_id: str, **kwargs) -> "SAM2AutomaticMas...
method generate (line 170) | def generate(self, image: np.ndarray) -> List[Dict[str, Any]]:
method _generate_masks (line 224) | def _generate_masks(self, image: np.ndarray) -> MaskData:
method _process_crop (line 251) | def _process_crop(
method _process_batch (line 294) | def _process_batch(
method postprocess_small_regions (line 387) | def postprocess_small_regions(
method refine_with_m2m (line 437) | def refine_with_m2m(self, points, point_labels, low_res_masks, points_...
FILE: camera_pose_annotation/dynamic_mask/sam2/build_sam.py
function build_sam2 (line 71) | def build_sam2(
function build_sam2_video_predictor (line 100) | def build_sam2_video_predictor(
function _hf_download (line 144) | def _hf_download(model_id):
function build_sam2_hf (line 152) | def build_sam2_hf(model_id, **kwargs):
function build_sam2_video_predictor_hf (line 157) | def build_sam2_video_predictor_hf(model_id, **kwargs):
function _load_checkpoint (line 164) | def _load_checkpoint(model, ckpt_path):
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/backbones/hieradet.py
function do_pool (line 25) | def do_pool(x: torch.Tensor, pool: nn.Module, norm: nn.Module = None) ->...
class MultiScaleAttention (line 39) | class MultiScaleAttention(nn.Module):
method __init__ (line 40) | def __init__(
method forward (line 56) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class MultiScaleBlock (line 84) | class MultiScaleBlock(nn.Module):
method __init__ (line 85) | def __init__(
method forward (line 134) | def forward(self, x: torch.Tensor) -> torch.Tensor:
class Hiera (line 169) | class Hiera(nn.Module):
method __init__ (line 174) | def __init__(
method _get_pos_embed (line 273) | def _get_pos_embed(self, hw: Tuple[int, int]) -> torch.Tensor:
method forward (line 283) | def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
method get_layer_id (line 301) | def get_layer_id(self, layer_name):
method get_num_layers (line 316) | def get_num_layers(self) -> int:
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/backbones/image_encoder.py
class ImageEncoder (line 14) | class ImageEncoder(nn.Module):
method __init__ (line 15) | def __init__(
method forward (line 29) | def forward(self, sample: torch.Tensor):
class FpnNeck (line 45) | class FpnNeck(nn.Module):
method __init__ (line 52) | def __init__(
method forward (line 102) | def forward(self, xs: List[torch.Tensor]):
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/backbones/utils.py
function window_partition (line 16) | def window_partition(x, window_size):
function window_unpartition (line 39) | def window_unpartition(windows, window_size, pad_hw, hw):
class PatchEmbed (line 63) | class PatchEmbed(nn.Module):
method __init__ (line 68) | def __init__(
method forward (line 89) | def forward(self, x: torch.Tensor) -> torch.Tensor:
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/memory_attention.py
class MemoryAttentionLayer (line 17) | class MemoryAttentionLayer(nn.Module):
method __init__ (line 19) | def __init__(
method _forward_sa (line 58) | def _forward_sa(self, tgt, query_pos):
method _forward_ca (line 66) | def _forward_ca(self, tgt, memory, query_pos, pos, num_k_exclude_rope=0):
method forward (line 83) | def forward(
class MemoryAttention (line 102) | class MemoryAttention(nn.Module):
method __init__ (line 103) | def __init__(
method forward (line 119) | def forward(
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/memory_encoder.py
class MaskDownSampler (line 17) | class MaskDownSampler(nn.Module):
method __init__ (line 26) | def __init__(
method forward (line 57) | def forward(self, x):
class CXBlock (line 62) | class CXBlock(nn.Module):
method __init__ (line 74) | def __init__(
method forward (line 104) | def forward(self, x):
class Fuser (line 120) | class Fuser(nn.Module):
method __init__ (line 121) | def __init__(self, layer, num_layers, dim=None, input_projection=False):
method forward (line 130) | def forward(self, x):
class MemoryEncoder (line 138) | class MemoryEncoder(nn.Module):
method __init__ (line 139) | def __init__(
method forward (line 158) | def forward(
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/position_encoding.py
class PositionEmbeddingSine (line 16) | class PositionEmbeddingSine(nn.Module):
method __init__ (line 22) | def __init__(
method _encode_xy (line 53) | def _encode_xy(self, x, y):
method encode_boxes (line 73) | def encode_boxes(self, x, y, w, h):
method encode_points (line 81) | def encode_points(self, x, y, labels):
method _pe (line 90) | def _pe(self, B, device, *cache_key):
method forward (line 127) | def forward(self, x: torch.Tensor):
class PositionEmbeddingRandom (line 133) | class PositionEmbeddingRandom(nn.Module):
method __init__ (line 138) | def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = N...
method _pe_encoding (line 147) | def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
method forward (line 156) | def forward(self, size: Tuple[int, int]) -> torch.Tensor:
method forward_with_coords (line 169) | def forward_with_coords(
function init_t_xy (line 185) | def init_t_xy(end_x: int, end_y: int):
function compute_axial_cis (line 192) | def compute_axial_cis(dim: int, end_x: int, end_y: int, theta: float = 1...
function reshape_for_broadcast (line 204) | def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
function apply_rotary_enc (line 212) | def apply_rotary_enc(
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/sam/mask_decoder.py
class MaskDecoder (line 15) | class MaskDecoder(nn.Module):
method __init__ (line 16) | def __init__(
method forward (line 110) | def forward(
method predict_masks (line 168) | def predict_masks(
method _get_stability_scores (line 247) | def _get_stability_scores(self, mask_logits):
method _dynamic_multimask_via_stability (line 259) | def _dynamic_multimask_via_stability(self, all_mask_logits, all_iou_sc...
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/sam/prompt_encoder.py
class PromptEncoder (line 17) | class PromptEncoder(nn.Module):
method __init__ (line 18) | def __init__(
method get_dense_pe (line 68) | def get_dense_pe(self) -> torch.Tensor:
method _embed_points (line 79) | def _embed_points(
method _embed_boxes (line 123) | def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
method _embed_masks (line 134) | def _embed_masks(self, masks: torch.Tensor) -> torch.Tensor:
method _get_batch_size (line 139) | def _get_batch_size(
method _get_device (line 157) | def _get_device(self) -> torch.device:
method forward (line 160) | def forward(
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/sam/transformer.py
class TwoWayTransformer (line 19) | class TwoWayTransformer(nn.Module):
method __init__ (line 20) | def __init__(
method forward (line 65) | def forward(
class TwoWayAttentionBlock (line 112) | class TwoWayAttentionBlock(nn.Module):
method __init__ (line 113) | def __init__(
method forward (line 156) | def forward(
class Attention (line 190) | class Attention(nn.Module):
method __init__ (line 196) | def __init__(
method _separate_heads (line 220) | def _separate_heads(self, x: Tensor, num_heads: int) -> Tensor:
method _recombine_heads (line 225) | def _recombine_heads(self, x: Tensor) -> Tensor:
method forward (line 230) | def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
class RoPEAttention (line 251) | class RoPEAttention(Attention):
method __init__ (line 254) | def __init__(
method forward (line 275) | def forward(
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/sam2_base.py
class SAM2Base (line 22) | class SAM2Base(torch.nn.Module):
method __init__ (line 23) | def __init__(
method device (line 198) | def device(self):
method forward (line 201) | def forward(self, *args, **kwargs):
method _build_sam_heads (line 207) | def _build_sam_heads(self):
method _forward_sam_heads (line 257) | def _forward_sam_heads(
method _use_mask_as_output (line 415) | def _use_mask_as_output(self, backbone_features, high_res_features, ma...
method forward_image (line 467) | def forward_image(self, img_batch: torch.Tensor):
method _prepare_backbone_features (line 481) | def _prepare_backbone_features(self, backbone_out):
method _prepare_memory_conditioned_features (line 497) | def _prepare_memory_conditioned_features(
method _encode_new_memory (line 678) | def _encode_new_memory(
method _track_step (line 728) | def _track_step(
method _encode_memory_in_output (line 789) | def _encode_memory_in_output(
method track_step (line 814) | def track_step(
method _use_multimask (line 881) | def _use_multimask(self, is_init_cond_frame, point_inputs):
method _apply_non_overlapping_constraints (line 891) | def _apply_non_overlapping_constraints(self, pred_masks):
FILE: camera_pose_annotation/dynamic_mask/sam2/modeling/sam2_utils.py
function select_closest_cond_frames (line 19) | def select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_f...
function get_1d_sine_pe (line 64) | def get_1d_sine_pe(pos_inds, dim, temperature=10000):
function get_activation_fn (line 77) | def get_activation_fn(activation):
function get_clones (line 88) | def get_clones(module, N):
class DropPath (line 92) | class DropPath(nn.Module):
method __init__ (line 94) | def __init__(self, drop_prob=0.0, scale_by_keep=True):
method forward (line 99) | def forward(self, x):
class MLP (line 112) | class MLP(nn.Module):
method __init__ (line 113) | def __init__(
method forward (line 131) | def forward(self, x):
class LayerNorm2d (line 141) | class LayerNorm2d(nn.Module):
method __init__ (line 142) | def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
method forward (line 148) | def forward(self, x: torch.Tensor) -> torch.Tensor:
function sample_box_points (line 156) | def sample_box_points(
function sample_random_points_from_errors (line 202) | def sample_random_points_from_errors(gt_masks, pred_masks, num_pt=1):
function sample_one_point_from_error_center (line 252) | def sample_one_point_from_error_center(gt_masks, pred_masks, padding=True):
function get_next_point (line 317) | def get_next_point(gt_masks, pred_masks, method):
FILE: camera_pose_annotation/dynamic_mask/sam2/sam2_image_predictor.py
class SAM2ImagePredictor (line 20) | class SAM2ImagePredictor:
method __init__ (line 21) | def __init__(
method from_pretrained (line 69) | def from_pretrained(cls, model_id: str, **kwargs) -> "SAM2ImagePredict...
method set_image (line 86) | def set_image(
method set_image_batch (line 132) | def set_image_batch(
method predict_batch (line 175) | def predict_batch(
method predict (line 237) | def predict(
method _prep_prompts (line 305) | def _prep_prompts(
method _predict (line 337) | def _predict(
method get_image_embedding (line 440) | def get_image_embedding(self) -> torch.Tensor:
method device (line 456) | def device(self) -> torch.device:
method reset_predictor (line 459) | def reset_predictor(self) -> None:
FILE: camera_pose_annotation/dynamic_mask/sam2/sam2_video_predictor.py
class SAM2VideoPredictor (line 19) | class SAM2VideoPredictor(SAM2Base):
method __init__ (line 22) | def __init__(
method init_state (line 42) | def init_state(
method from_pretrained (line 102) | def from_pretrained(cls, model_id: str, **kwargs) -> "SAM2VideoPredict...
method _obj_id_to_idx (line 118) | def _obj_id_to_idx(self, inference_state, obj_id):
method _obj_idx_to_id (line 152) | def _obj_idx_to_id(self, inference_state, obj_idx):
method _get_obj_num (line 156) | def _get_obj_num(self, inference_state):
method add_new_points_or_box (line 161) | def add_new_points_or_box(
method add_new_points (line 295) | def add_new_points(self, *args, **kwargs):
method add_new_mask (line 300) | def add_new_mask(
method _get_orig_video_res_output (line 383) | def _get_orig_video_res_output(self, inference_state, any_res_masks):
method _consolidate_temp_output_across_obj (line 405) | def _consolidate_temp_output_across_obj(
method propagate_in_video_preflight (line 480) | def propagate_in_video_preflight(self, inference_state):
method propagate_in_video (line 546) | def propagate_in_video(
method clear_all_prompts_in_frame (line 633) | def clear_all_prompts_in_frame(
method reset_state (line 676) | def reset_state(self, inference_state):
method _reset_tracking_results (line 689) | def _reset_tracking_results(self, inference_state):
method _get_image_feature (line 704) | def _get_image_feature(self, inference_state, frame_idx, batch_size):
method _run_single_frame_inference (line 737) | def _run_single_frame_inference(
method _run_memory_encoder (line 805) | def _run_memory_encoder(
method _get_maskmem_pos_enc (line 841) | def _get_maskmem_pos_enc(self, inference_state, current_out):
method remove_object (line 867) | def remove_object(self, inference_state, obj_id, strict=False, need_ou...
method _clear_non_cond_mem_around_input (line 956) | def _clear_non_cond_mem_around_input(self, inference_state, frame_idx):
class SAM2VideoPredictorVOS (line 976) | class SAM2VideoPredictorVOS(SAM2VideoPredictor):
method __init__ (line 979) | def __init__(self, *args, **kwargs):
method _compile_all_components (line 983) | def _compile_all_components(self):
method forward_image (line 1013) | def forward_image(self, img_batch: torch.Tensor):
method _forward_sam_heads (line 1036) | def _forward_sam_heads(
method _encode_new_memory (line 1171) | def _encode_new_memory(
FILE: camera_pose_annotation/dynamic_mask/sam2/sam2_video_predictor_legacy.py
class SAM2VideoPredictor (line 18) | class SAM2VideoPredictor(SAM2Base):
method __init__ (line 21) | def __init__(
method init_state (line 44) | def init_state(
method from_pretrained (line 114) | def from_pretrained(cls, model_id: str, **kwargs) -> "SAM2VideoPredict...
method _obj_id_to_idx (line 130) | def _obj_id_to_idx(self, inference_state, obj_id):
method _obj_idx_to_id (line 164) | def _obj_idx_to_id(self, inference_state, obj_idx):
method _get_obj_num (line 168) | def _get_obj_num(self, inference_state):
method add_new_points_or_box (line 173) | def add_new_points_or_box(
method add_new_points (line 316) | def add_new_points(self, *args, **kwargs):
method add_new_mask (line 321) | def add_new_mask(
method _get_orig_video_res_output (line 404) | def _get_orig_video_res_output(self, inference_state, any_res_masks):
method _consolidate_temp_output_across_obj (line 426) | def _consolidate_temp_output_across_obj(
method _get_empty_mask_ptr (line 556) | def _get_empty_mask_ptr(self, inference_state, frame_idx):
method propagate_in_video_preflight (line 593) | def propagate_in_video_preflight(self, inference_state):
method propagate_in_video (line 663) | def propagate_in_video(
method _add_output_per_object (line 747) | def _add_output_per_object(
method clear_all_prompts_in_frame (line 777) | def clear_all_prompts_in_frame(
method reset_state (line 848) | def reset_state(self, inference_state):
method _reset_tracking_results (line 860) | def _reset_tracking_results(self, inference_state):
method _get_image_feature (line 879) | def _get_image_feature(self, inference_state, frame_idx, batch_size):
method _run_single_frame_inference (line 912) | def _run_single_frame_inference(
method _run_memory_encoder (line 980) | def _run_memory_encoder(
method _get_maskmem_pos_enc (line 1016) | def _get_maskmem_pos_enc(self, inference_state, current_out):
method remove_object (line 1042) | def remove_object(self, inference_state, obj_id, strict=False, need_ou...
method _clear_non_cond_mem_around_input (line 1155) | def _clear_non_cond_mem_around_input(self, inference_state, frame_idx):
FILE: camera_pose_annotation/dynamic_mask/sam2/utils/amg.py
class MaskData (line 18) | class MaskData:
method __init__ (line 24) | def __init__(self, **kwargs) -> None:
method __setitem__ (line 31) | def __setitem__(self, key: str, item: Any) -> None:
method __delitem__ (line 37) | def __delitem__(self, key: str) -> None:
method __getitem__ (line 40) | def __getitem__(self, key: str) -> Any:
method items (line 43) | def items(self) -> ItemsView[str, Any]:
method filter (line 46) | def filter(self, keep: torch.Tensor) -> None:
method cat (line 61) | def cat(self, new_stats: "MaskData") -> None:
method to_numpy (line 74) | def to_numpy(self) -> None:
function is_box_near_crop_edge (line 80) | def is_box_near_crop_edge(
function box_xyxy_to_xywh (line 93) | def box_xyxy_to_xywh(box_xyxy: torch.Tensor) -> torch.Tensor:
function batch_iterator (line 100) | def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None,...
function mask_to_rle_pytorch (line 109) | def mask_to_rle_pytorch(tensor: torch.Tensor) -> List[Dict[str, Any]]:
function rle_to_mask (line 140) | def rle_to_mask(rle: Dict[str, Any]) -> np.ndarray:
function area_from_rle (line 154) | def area_from_rle(rle: Dict[str, Any]) -> int:
function calculate_stability_score (line 158) | def calculate_stability_score(
function build_point_grid (line 181) | def build_point_grid(n_per_side: int) -> np.ndarray:
function build_all_layer_point_grids (line 191) | def build_all_layer_point_grids(
function generate_crop_boxes (line 202) | def generate_crop_boxes(
function uncrop_boxes_xyxy (line 239) | def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch...
function uncrop_points (line 248) | def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Te...
function uncrop_masks (line 257) | def uncrop_masks(
function remove_small_regions (line 269) | def remove_small_regions(
function coco_encode_rle (line 296) | def coco_encode_rle(uncompressed_rle: Dict[str, Any]) -> Dict[str, Any]:
function batched_mask_to_box (line 305) | def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
FILE: camera_pose_annotation/dynamic_mask/sam2/utils/misc.py
function get_sdpa_settings (line 17) | def get_sdpa_settings():
function get_connected_components (line 47) | def get_connected_components(mask):
function mask_to_box (line 66) | def mask_to_box(masks: torch.Tensor):
function _load_img_as_tensor (line 92) | def _load_img_as_tensor(img_path, image_size):
class AsyncVideoFrameLoader (line 104) | class AsyncVideoFrameLoader:
method __init__ (line 109) | def __init__(
method __getitem__ (line 147) | def __getitem__(self, index):
method __len__ (line 168) | def __len__(self):
function load_video_frames (line 172) | def load_video_frames(
function load_video_frames_from_jpg_images (line 213) | def load_video_frames_from_jpg_images(
function load_video_frames_from_video_file (line 280) | def load_video_frames_from_video_file(
function fill_holes_in_mask_scores (line 312) | def fill_holes_in_mask_scores(mask, max_area):
function concat_points (line 341) | def concat_points(old_point_inputs, new_points, new_labels):
FILE: camera_pose_annotation/dynamic_mask/sam2/utils/transforms.py
class SAM2Transforms (line 15) | class SAM2Transforms(nn.Module):
method __init__ (line 16) | def __init__(
method __call__ (line 37) | def __call__(self, x):
method forward_batch (line 41) | def forward_batch(self, img_list):
method transform_coords (line 46) | def transform_coords(
method transform_boxes (line 66) | def transform_boxes(
method postprocess_masks (line 76) | def postprocess_masks(self, masks: torch.Tensor, orig_hw) -> torch.Ten...
FILE: caption/LLM/inference.py
function get_pose (line 16) | def get_pose(pose_dir):
function get_prompt (line 54) | def get_prompt(pose_dir, prompt_dir, vqa_caption, dist_level):
function process_single_row (line 78) | def process_single_row(args, row):
function worker (line 108) | def worker(args, task_queue, pbar):
function parse_args (line 133) | def parse_args():
function main (line 165) | def main():
FILE: caption/VQA/inference.py
function encode_image (line 17) | def encode_image(image_path):
function get_prompt (line 35) | def get_prompt(fig_dir, prompt_text):
function process_single_row (line 63) | def process_single_row(args, row):
function worker (line 86) | def worker(args, task_queue, pbar):
function parse_args (line 98) | def parse_args():
function main (line 126) | def main():
FILE: caption/tagging/inference.py
function parse_category_tags (line 15) | def parse_category_tags(tag_caption):
function process_single_row (line 54) | def process_single_row(args, json_file):
function worker (line 84) | def worker(args, task_queue, pbar):
function parse_args (line 97) | def parse_args():
function main (line 122) | def main():
FILE: caption/utils/api_call.py
function api_call (line 4) | def api_call(prompt_text, model, api_key, base_domain):
FILE: caption/utils/combine.py
function parse_text_to_json (line 12) | def parse_text_to_json(text):
function vqa_parse_text_to_json (line 103) | def vqa_parse_text_to_json(text):
function process_single_row (line 127) | def process_single_row(args, clip_id):
function worker (line 162) | def worker(args, task_queue, pbar):
function parse_args (line 174) | def parse_args():
function main (line 189) | def main():
FILE: scoring/aesthetic/inference.py
function merge_scores (line 26) | def merge_scores(gathered_list: list, csv: pd.DataFrame, column):
class VideoTextDataset (line 50) | class VideoTextDataset(torch.utils.data.Dataset):
method __init__ (line 53) | def __init__(self, csv_path, fig_load_dir, transform=None):
method __getitem__ (line 59) | def __getitem__(self, index):
method __len__ (line 75) | def __len__(self):
class MLP (line 79) | class MLP(nn.Module):
method __init__ (line 82) | def __init__(self, input_size):
method forward (line 96) | def forward(self, x):
class AestheticScorer (line 100) | class AestheticScorer(nn.Module):
method __init__ (line 103) | def __init__(self, input_size, device):
method forward (line 111) | def forward(self, x):
function parse_args (line 118) | def parse_args():
function main (line 142) | def main():
FILE: scoring/luminance/inference.py
function merge_scores (line 21) | def merge_scores(gathered_list: list, csv: pd.DataFrame):
class VideoDataset (line 57) | class VideoDataset(torch.utils.data.Dataset):
method __init__ (line 60) | def __init__(self, csv_path, fig_load_dir):
method __getitem__ (line 65) | def __getitem__(self, index):
method __len__ (line 80) | def __len__(self):
function parse_args (line 84) | def parse_args():
function main (line 100) | def main():
FILE: scoring/motion/inference.py
function get_ffmpeg_acceleration (line 18) | def get_ffmpeg_acceleration():
function process_single_row (line 42) | def process_single_row(video_path, args, process_id):
function calculate_score (line 84) | def calculate_score(row, args):
function worker1 (line 93) | def worker1(task_queue, progress_queue, args, process_id):
function worker2 (line 105) | def worker2(task_queue, results_queue, args):
function parse_args (line 117) | def parse_args():
function main (line 139) | def main():
FILE: scoring/ocr/inference.py
function process_single_row (line 18) | def process_single_row(row, args, model):
function worker (line 50) | def worker(task_queue, result_queue, args, id):
function parse_args (line 74) | def parse_args():
function main (line 92) | def main():
FILE: utils/convert.py
function scan_recursively (line 18) | def scan_recursively(root):
function get_filelist (line 33) | def get_filelist(file_path, exts=None):
function split_by_capital (line 53) | def split_by_capital(name):
function process_general_videos (line 65) | def process_general_videos(root, output):
FILE: utils/cut.py
function get_ffmpeg_acceleration (line 19) | def get_ffmpeg_acceleration():
function seconds_to_timecode (line 40) | def seconds_to_timecode(seconds: float) -> str:
function build_precise_cut_cmd (line 54) | def build_precise_cut_cmd(
function _build_video_filters (line 153) | def _build_video_filters(shorter_size, args, accel_type) -> list[str]:
function _build_encoder_args (line 182) | def _build_encoder_args(accel_type) -> list[str]:
function process_single_row (line 205) | def process_single_row(row, args, process_id):
function _cleanup (line 302) | def _cleanup(path: str):
function parse_args (line 315) | def parse_args():
function _worker_fn (line 356) | def _worker_fn(task: tuple, args, process_id: int) -> tuple:
function save_results (line 379) | def save_results(all_results: list, csv: pd.DataFrame, args):
function main (line 439) | def main():
FILE: utils/cut_fast.py
function process_single_row (line 32) | def process_single_row(row, save_dir, keep_audio=False):
function process_video_group (line 138) | def process_video_group(group_df, save_dir, keep_audio=False):
function worker (line 165) | def worker(task_queue, results_queue, video_save_dir, keep_audio=False):
function parse_args (line 185) | def parse_args():
function main (line 220) | def main():
FILE: utils/download_SpatialVID.py
function main (line 5) | def main():
FILE: utils/download_YouTube.py
function ytb_download (line 27) | def ytb_download(url, json_info, output_dir="ytb_videos/"):
function main (line 89) | async def main(csv_path, output_dir, max_workers=10, shards=0, total=-1,...
function entry (line 119) | def entry(
function add_download (line 152) | def add_download(csv_path):
FILE: utils/evaluation.py
function load_file (line 22) | def load_file(cam_pos_file, mask_file, device):
function anomaly_detection (line 43) | def anomaly_detection(cam_pos, time_steps, threshold, device):
function move_distance (line 73) | def move_distance(cam_pos, time_steps, device):
function quaternion_multiply (line 91) | def quaternion_multiply(q1, q2):
function rotation_angle (line 111) | def rotation_angle(cam_rotate, time_steps, device):
function trajectory_turns (line 131) | def trajectory_turns(cam_pos, time_steps, device, threshold=0.45):
function dynamic_ratio (line 167) | def dynamic_ratio(masks):
function process_single_row (line 178) | def process_single_row(row, index, args, device):
function worker (line 203) | def worker(task_queue, result_queue, args, worker_id):
function parse_args (line 222) | def parse_args():
FILE: utils/expand_npz.py
function expand (line 9) | def expand(loaded_data):
FILE: utils/extract_frames.py
function extract_frames_opencv (line 18) | def extract_frames_opencv(
function extract_frames_av (line 57) | def extract_frames_av(
function _calc_expected_frames (line 127) | def _calc_expected_frames(num_frames, interval):
function _verify_frames (line 135) | def _verify_frames(img_dir, expected_frames):
function process_single_row (line 151) | def process_single_row(row, row_index, args):
function worker (line 207) | def worker(task_queue, progress_queue, failed_indices, args):
function parse_args (line 221) | def parse_args():
function main (line 261) | def main():
FILE: utils/filter.py
function main (line 13) | def main(args):
function parse_args (line 64) | def parse_args():
FILE: utils/get_clip.py
function process_single_row (line 16) | def process_single_row(row, args):
function worker (line 84) | def worker(task_queue, results_queue, args):
function parse_args (line 96) | def parse_args():
function main (line 123) | def main():
FILE: utils/get_info.py
function get_video_length (line 16) | def get_video_length(cap, method="header"):
function get_video_info (line 27) | def get_video_info(args):
function main (line 77) | def main(args):
function parse_args (line 139) | def parse_args():
FILE: utils/get_instructions.py
function filter_poses (line 17) | def filter_poses(poses_array, alpha):
function poses_to_multi_instructions (line 63) | def poses_to_multi_instructions(poses_array, translation_thresh, rotatio...
function process_single_row (line 134) | def process_single_row(args, row):
function worker (line 172) | def worker(task_queue, args, pbar):
function args_parser (line 186) | def args_parser():
function main (line 223) | def main():
FILE: utils/get_instructions_enhanced.py
function filter_poses (line 16) | def filter_poses(poses_array, alpha):
function poses_to_multi_instructions (line 48) | def poses_to_multi_instructions(poses_array, translation_thresh, rotatio...
function calculate_relative_scale (line 115) | def calculate_relative_scale(total_distance, num_poses, f_translation, m...
function voter (line 125) | def voter(args, row, interval, alpha):
function collect_all_results (line 153) | def collect_all_results(args, row, param_combinations):
function get_mutually_exclusive_groups (line 164) | def get_mutually_exclusive_groups():
function remove_conflicting_instructions (line 173) | def remove_conflicting_instructions(instructions, conflict_groups):
function smart_instruction_selection (line 189) | def smart_instruction_selection(non_conflicting_inst):
function collect_interval_based_votes (line 226) | def collect_interval_based_votes(all_results, param_combinations):
function vote_for_final_instructions (line 262) | def vote_for_final_instructions(all_results, param_combinations=None):
function merge_consecutive_instructions (line 305) | def merge_consecutive_instructions(instructions):
function process_single_row (line 319) | def process_single_row(args, row, param_combinations):
function generate_param_combinations (line 338) | def generate_param_combinations(args):
function worker (line 345) | def worker(task_queue, args, param_combinations, pbar):
function args_parser (line 357) | def args_parser():
function main (line 381) | def main():
FILE: utils/merge_tables.py
function read_csv_file (line 11) | def read_csv_file(file_path):
function merge_tables_from_files (line 16) | def merge_tables_from_files(file_list, output_file, merge_on=None):
function main (line 52) | def main():
FILE: utils/normalize_intrinsics.py
function possess_single_row (line 22) | def possess_single_row(row, args):
function worker (line 41) | def worker(task_queue, args, pbar):
function parse_args (line 55) | def parse_args():
FILE: utils/pack_clip_assets.py
function load_video (line 18) | def load_video(clip_path, indexes_path, height=720, width=1280):
function load_intrinsics (line 49) | def load_intrinsics(intrinsics_path, tgt_width=1024, tgt_height=576):
function main (line 74) | def main():
FILE: utils/quat_to_mat.py
class Pose (line 25) | class Pose:
method __call__ (line 31) | def __call__(self, R=None, t=None):
method invert (line 63) | def invert(self, pose, use_inverse=False): # c2w <==> w2c
method compose (line 84) | def compose(self, pose_list):
method compose_pair (line 100) | def compose_pair(self, pose_a, pose_b):
method scale_center (line 119) | def scale_center(self, pose, scale):
function quaternion_to_matrix (line 137) | def quaternion_to_matrix(quaternions, eps: float = 1e-8):
function pose_from_quaternion (line 172) | def pose_from_quaternion(pose):
function possess_single_row (line 197) | def possess_single_row(row, index, args):
function worker (line 226) | def worker(task_queue, args, pbar):
function parse_args (line 238) | def parse_args():
FILE: utils/read_depth.py
function read_depth (line 6) | def read_depth(zip_file_path):
FILE: utils/read_video.py
function read_video_av (line 23) | def read_video_av(
function _read_from_stream (line 107) | def _read_from_stream(
function read_video_cv2 (line 195) | def read_video_cv2(filename, start_pts=None, end_pts=None, pts_unit="pts"):
function read_video (line 247) | def read_video(video_path, backend="av"):
FILE: utils/scene_detect.py
function timecode_to_seconds (line 29) | def timecode_to_seconds(timecode):
function seconds_to_timecode (line 35) | def seconds_to_timecode(seconds):
function process_single_row (line 43) | def process_single_row(
function timecode_to_frames (line 130) | def timecode_to_frames(timecode, fps):
function worker (line 137) | def worker(task_queue, results_queue, args):
function parse_args (line 159) | def parse_args():
function main (line 213) | def main():
FILE: viser/docs/source/conf.py
function docstring (line 243) | def docstring(app, what, name, obj, options, lines):
function setup (line 250) | def setup(app):
FILE: viser/docs/update_example_docs.py
class ExampleMetadata (line 15) | class ExampleMetadata:
method from_path (line 23) | def from_path(path: pathlib.Path) -> ExampleMetadata:
function get_example_paths (line 46) | def get_example_paths(examples_dir: pathlib.Path) -> Iterable[pathlib.Pa...
function main (line 55) | def main(
FILE: viser/examples/01_image.py
function main (line 18) | def main() -> None:
FILE: viser/examples/02_gui.py
function main (line 12) | def main() -> None:
FILE: viser/examples/03_gui_callbacks.py
function main (line 14) | def main() -> None:
FILE: viser/examples/04_camera_poses.py
function _ (line 15) | def _(client: viser.ClientHandle) -> None:
FILE: viser/examples/05_camera_commands.py
function _ (line 19) | def _(client: viser.ClientHandle) -> None:
FILE: viser/examples/07_record3d_visualizer.py
function main (line 18) | def main(
FILE: viser/examples/08_smpl_visualizer.py
class SmplOutputs (line 24) | class SmplOutputs:
class SmplHelper (line 31) | class SmplHelper:
method __init__ (line 34) | def __init__(self, model_path: Path) -> None:
method get_outputs (line 49) | def get_outputs(self, betas: np.ndarray, joint_rotmats: np.ndarray) ->...
function main (line 76) | def main(model_path: Path) -> None:
class GuiElements (line 120) | class GuiElements:
function make_gui_elements (line 133) | def make_gui_elements(
FILE: viser/examples/09_urdf_visualizer.py
function create_robot_control_sliders (line 24) | def create_robot_control_sliders(
function main (line 55) | def main(
FILE: viser/examples/10_realsense.py
function realsense_pipeline (line 20) | def realsense_pipeline(fps: int = 30):
function point_cloud_arrays_from_frames (line 42) | def point_cloud_arrays_from_frames(
function main (line 94) | def main():
FILE: viser/examples/11_colmap_visualizer.py
function main (line 24) | def main(
FILE: viser/examples/12_click_meshes.py
function main (line 13) | def main() -> None:
FILE: viser/examples/13_theming.py
function main (line 12) | def main():
FILE: viser/examples/14_markdown.py
function _ (line 29) | def _(_):
function _ (line 34) | def _(_):
FILE: viser/examples/15_gui_in_scene.py
function _ (line 22) | def _(client: viser.ClientHandle) -> None:
FILE: viser/examples/16_modal.py
function main (line 10) | def main():
FILE: viser/examples/18_splines.py
function main (line 13) | def main() -> None:
FILE: viser/examples/19_get_renders.py
function main (line 13) | def main():
FILE: viser/examples/20_scene_pointer.py
function _ (line 47) | def _(client: viser.ClientHandle) -> None:
FILE: viser/examples/21_set_up_direction.py
function main (line 10) | def main() -> None:
FILE: viser/examples/22_games.py
function main (line 16) | def main() -> None:
function play_connect_4 (line 28) | def play_connect_4(server: viser.ViserServer) -> None:
function play_tic_tac_toe (line 88) | def play_tic_tac_toe(server: viser.ViserServer) -> None:
FILE: viser/examples/23_plotly.py
function create_sinusoidal_wave (line 15) | def create_sinusoidal_wave(t: float) -> go.Figure:
function main (line 36) | def main() -> None:
FILE: viser/examples/24_notification.py
function main (line 10) | def main() -> None:
FILE: viser/examples/25_smpl_visualizer_skinned.py
class SmplOutputs (line 30) | class SmplOutputs:
class SmplHelper (line 37) | class SmplHelper:
method __init__ (line 40) | def __init__(self, model_path: Path) -> None:
method get_outputs (line 55) | def get_outputs(self, betas: np.ndarray, joint_rotmats: np.ndarray) ->...
function main (line 82) | def main(model_path: Path) -> None:
class GuiElements (line 147) | class GuiElements:
function make_gui_elements (line 160) | def make_gui_elements(
FILE: viser/examples/experimental/gaussian_splats.py
class SplatFile (line 18) | class SplatFile(TypedDict):
function load_splat_file (line 31) | def load_splat_file(splat_path: Path, center: bool = False) -> SplatFile:
function load_ply_file (line 75) | def load_ply_file(ply_file_path: Path, center: bool = False) -> SplatFile:
function main (line 108) | def main(splat_paths: tuple[Path, ...]) -> None:
FILE: viser/examples/quick_save.py
function process_folder (line 20) | def process_folder(
function main (line 145) | def main(
FILE: viser/src/viser/_client_autobuild.py
function _check_viser_yarn_running (line 12) | def _check_viser_yarn_running() -> bool:
function ensure_client_is_built (line 28) | def ensure_client_is_built() -> None:
function _install_sandboxed_node (line 91) | def _install_sandboxed_node() -> Path:
function _modified_time_recursive (line 120) | def _modified_time_recursive(dir: Path) -> float:
FILE: viser/src/viser/_gui_api.py
function _hex_from_hls (line 80) | def _hex_from_hls(h: float, l: float, s: float) -> str:
function _compute_step (line 90) | def _compute_step(x: float | None) -> float: # type: ignore
function _compute_precision_digits (line 103) | def _compute_precision_digits(x: float) -> int:
class _RootGuiContainer (line 120) | class _RootGuiContainer:
function _apply_default_order (line 127) | def _apply_default_order(order: float | None) -> float:
function get_type_hints_cached (line 142) | def get_type_hints_cached(cls: type[Any]) -> dict[str, Any]:
class _FileUploadState (line 146) | class _FileUploadState(TypedDict):
class GuiApi (line 156) | class GuiApi:
method __init__ (line 165) | def __init__(
method _handle_gui_updates (line 203) | def _handle_gui_updates(
method _handle_file_transfer_start (line 262) | def _handle_file_transfer_start(
method _handle_file_transfer_part (line 277) | def _handle_file_transfer_part(
method _get_container_id (line 338) | def _get_container_id(self) -> str:
method _set_container_id (line 342) | def _set_container_id(self, container_id: str) -> None:
method reset (line 346) | def reset(self) -> None:
method set_panel_label (line 350) | def set_panel_label(self, label: str | None) -> None:
method configure_theme (line 358) | def configure_theme(
method add_folder (line 433) | def add_folder(
method add_modal (line 471) | def add_modal(
method add_tab_group (line 500) | def add_tab_group(
method add_markdown (line 538) | def add_markdown(
method add_plotly (line 580) | def add_plotly(
method add_button (line 656) | def add_button(
method add_upload_button (line 704) | def add_upload_button(
method add_button_group (line 761) | def add_button_group(
method add_button_group (line 772) | def add_button_group(
method add_button_group (line 782) | def add_button_group(
method add_checkbox (line 824) | def add_checkbox(
method add_text (line 864) | def add_text(
method add_number (line 904) | def add_number(
method add_vector2 (line 973) | def add_vector2(
method add_vector3 (line 1035) | def add_vector3(
method add_dropdown (line 1099) | def add_dropdown(
method add_dropdown (line 1111) | def add_dropdown(
method add_dropdown (line 1122) | def add_dropdown(
method add_progress_bar (line 1169) | def add_progress_bar(
method add_slider (line 1212) | def add_slider(
method add_multi_slider (line 1292) | def add_multi_slider(
method add_rgb (line 1375) | def add_rgb(
method add_rgba (line 1415) | def add_rgba(
method _create_gui_input (line 1454) | def _create_gui_input(
FILE: viser/src/viser/_gui_handles.py
function _make_unique_id (line 33) | def _make_unique_id() -> str:
class GuiContainerProtocol (line 38) | class GuiContainerProtocol(Protocol):
class SupportsRemoveProtocol (line 44) | class SupportsRemoveProtocol(Protocol):
method remove (line 45) | def remove(self) -> None: ...
class _GuiHandleState (line 49) | class _GuiHandleState(Generic[T]):
class _GuiInputHandle (line 80) | class _GuiInputHandle(Generic[T]):
method order (line 102) | def order(self) -> float:
method value (line 107) | def value(self) -> T:
method value (line 112) | def value(self, value: T | onp.ndarray) -> None:
method update_timestamp (line 143) | def update_timestamp(self) -> float:
method disabled (line 148) | def disabled(self) -> bool:
method disabled (line 154) | def disabled(self, disabled: bool) -> None:
method visible (line 164) | def visible(self) -> bool:
method visible (line 170) | def visible(self, visible: bool) -> None:
method __post_init__ (line 179) | def __post_init__(self) -> None:
method remove (line 189) | def remove(self) -> None:
class GuiInputHandle (line 206) | class GuiInputHandle(_GuiInputHandle[T], Generic[T]):
method on_update (line 215) | def on_update(
class GuiEvent (line 224) | class GuiEvent(Generic[TGuiHandle]):
class GuiButtonHandle (line 238) | class GuiButtonHandle(_GuiInputHandle[bool]):
method on_click (line 243) | def on_click(
class UploadedFile (line 252) | class UploadedFile:
class GuiUploadButtonHandle (line 262) | class GuiUploadButtonHandle(_GuiInputHandle[UploadedFile]):
method on_upload (line 268) | def on_upload(
class GuiButtonGroupHandle (line 277) | class GuiButtonGroupHandle(_GuiInputHandle[StringType], Generic[StringTy...
method on_click (line 282) | def on_click(
method disabled (line 290) | def disabled(self) -> bool:
method disabled (line 295) | def disabled(self, disabled: bool) -> None:
class GuiDropdownHandle (line 301) | class GuiDropdownHandle(GuiInputHandle[StringType], Generic[StringType]):
method options (line 309) | def options(self) -> tuple[StringType, ...]:
method options (line 320) | def options(self, options: Iterable[StringType]) -> None:
class GuiTabGroupHandle (line 342) | class GuiTabGroupHandle:
method order (line 354) | def order(self) -> float:
method add_tab (line 358) | def add_tab(self, label: str, icon: IconName | None = None) -> GuiTabH...
method __post_init__ (line 373) | def __post_init__(self) -> None:
method remove (line 377) | def remove(self) -> None:
method _sync_with_client (line 386) | def _sync_with_client(self) -> None:
class GuiFolderHandle (line 401) | class GuiFolderHandle:
method order (line 414) | def order(self) -> float:
method __enter__ (line 418) | def __enter__(self) -> GuiFolderHandle:
method __exit__ (line 423) | def __exit__(self, *args) -> None:
method __post_init__ (line 429) | def __post_init__(self) -> None:
method remove (line 434) | def remove(self) -> None:
class GuiModalHandle (line 446) | class GuiModalHandle:
method __enter__ (line 456) | def __enter__(self) -> GuiModalHandle:
method __exit__ (line 461) | def __exit__(self, *args) -> None:
method __post_init__ (line 467) | def __post_init__(self) -> None:
method close (line 470) | def close(self) -> None:
class GuiTabHandle (line 481) | class GuiTabHandle:
method __enter__ (line 491) | def __enter__(self) -> GuiTabHandle:
method __exit__ (line 496) | def __exit__(self, *args) -> None:
method __post_init__ (line 502) | def __post_init__(self) -> None:
method remove (line 505) | def remove(self) -> None:
function _get_data_url (line 525) | def _get_data_url(url: str, image_root: Path | None) -> str:
function _parse_markdown (line 551) | def _parse_markdown(markdown: str, image_root: Path | None) -> str:
class GuiProgressBarHandle (line 563) | class GuiProgressBarHandle:
method value (line 575) | def value(self) -> float:
method value (line 581) | def value(self, value: float) -> None:
method animated (line 592) | def animated(self) -> bool:
method animated (line 597) | def animated(self, animated: bool) -> None:
method order (line 607) | def order(self) -> float:
method visible (line 612) | def visible(self) -> bool:
method visible (line 618) | def visible(self, visible: bool) -> None:
method __post_init__ (line 627) | def __post_init__(self) -> None:
method remove (line 632) | def remove(self) -> None:
class GuiMarkdownHandle (line 641) | class GuiMarkdownHandle:
method content (line 653) | def content(self) -> str:
method content (line 659) | def content(self, content: str) -> None:
method order (line 669) | def order(self) -> float:
method visible (line 674) | def visible(self) -> bool:
method visible (line 680) | def visible(self, visible: bool) -> None:
method __post_init__ (line 689) | def __post_init__(self) -> None:
method remove (line 694) | def remove(self) -> None:
class GuiPlotlyHandle (line 703) | class GuiPlotlyHandle:
method figure (line 715) | def figure(self) -> go.Figure:
method figure (line 721) | def figure(self, figure: go.Figure) -> None:
method aspect (line 735) | def aspect(self) -> float:
method aspect (line 741) | def aspect(self, aspect: float) -> None:
method order (line 751) | def order(self) -> float:
method visible (line 756) | def visible(self) -> bool:
method visible (line 762) | def visible(self, visible: bool) -> None:
method __post_init__ (line 771) | def __post_init__(self) -> None:
method remove (line 776) | def remove(self) -> None:
FILE: viser/src/viser/_icons.py
function svg_from_icon (line 9) | def svg_from_icon(icon_name: IconName) -> str:
FILE: viser/src/viser/_icons_enum.py
class _IconStringConverter (line 9) | class _IconStringConverter(type):
method __getattr__ (line 10) | def __getattr__(self, __name: str) -> IconName:
class Icon (line 17) | class Icon(metaclass=_IconStringConverter):
FILE: viser/src/viser/_icons_enum.pyi
class Icon (line 8) | class Icon:
FILE: viser/src/viser/_icons_generate_enum.py
function enum_name_from_icon (line 10) | def enum_name_from_icon(name: str) -> str:
FILE: viser/src/viser/_messages.py
class Message (line 45) | class Message(infra.Message):
method redundancy_key (line 49) | def redundancy_key(self) -> str:
function tag_class (line 74) | def tag_class(tag: str) -> Callable[[T], T]:
class RunJavascriptMessage (line 85) | class RunJavascriptMessage(Message):
method redundancy_key (line 93) | def redundancy_key(self) -> str:
class NotificationMessage (line 99) | class NotificationMessage(Message):
class RemoveNotificationMessage (line 113) | class RemoveNotificationMessage(Message):
class ViewerCameraMessage (line 120) | class ViewerCameraMessage(Message):
class ScenePointerMessage (line 137) | class ScenePointerMessage(Message):
class ScenePointerEnableMessage (line 151) | class ScenePointerEnableMessage(Message):
method redundancy_key (line 158) | def redundancy_key(self) -> str:
class CameraFrustumMessage (line 165) | class CameraFrustumMessage(Message):
class GlbMessage (line 181) | class GlbMessage(Message):
class FrameMessage (line 190) | class FrameMessage(Message):
class BatchedAxesMessage (line 201) | class BatchedAxesMessage(Message):
class GridMessage (line 215) | class GridMessage(Message):
class LabelMessage (line 237) | class LabelMessage(Message):
class Gui3DMessage (line 245) | class Gui3DMessage(Message):
class PointCloudMessage (line 254) | class PointCloudMessage(Message):
method __post_init__ (line 268) | def __post_init__(self):
class MeshBoneMessage (line 279) | class MeshBoneMessage(Message):
class MeshMessage (line 286) | class MeshMessage(Message):
method __post_init__ (line 304) | def __post_init__(self):
class SkinnedMeshMessage (line 311) | class SkinnedMeshMessage(MeshMessage):
method __post_init__ (line 321) | def __post_init__(self):
class SetBoneOrientationMessage (line 334) | class SetBoneOrientationMessage(Message):
method redundancy_key (line 344) | def redundancy_key(self) -> str:
class SetBonePositionMessage (line 349) | class SetBonePositionMessage(Message):
method redundancy_key (line 359) | def redundancy_key(self) -> str:
class TransformControlsMessage (line 364) | class TransformControlsMessage(Message):
class SetCameraPositionMessage (line 387) | class SetCameraPositionMessage(Message):
class SetCameraUpDirectionMessage (line 394) | class SetCameraUpDirectionMessage(Message):
class SetCameraLookAtMessage (line 401) | class SetCameraLookAtMessage(Message):
class SetCameraFovMessage (line 408) | class SetCameraFovMessage(Message):
class SetOrientationMessage (line 415) | class SetOrientationMessage(Message):
class SetPositionMessage (line 425) | class SetPositionMessage(Message):
class TransformControlsUpdateMessage (line 435) | class TransformControlsUpdateMessage(Message):
class BackgroundImageMessage (line 446) | class BackgroundImageMessage(Message):
class ImageMessage (line 455) | class ImageMessage(Message):
class RemoveSceneNodeMessage (line 466) | class RemoveSceneNodeMessage(Message):
class SetSceneNodeVisibilityMessage (line 473) | class SetSceneNodeVisibilityMessage(Message):
class SetSceneNodeClickableMessage (line 481) | class SetSceneNodeClickableMessage(Message):
class SceneNodeClickMessage (line 489) | class SceneNodeClickMessage(Message):
class ResetSceneMessage (line 501) | class ResetSceneMessage(Message):
class ResetGuiMessage (line 506) | class ResetGuiMessage(Message):
class GuiAddFolderMessage (line 512) | class GuiAddFolderMessage(Message):
class GuiAddMarkdownMessage (line 523) | class GuiAddMarkdownMessage(Message):
class GuiAddProgressBarMessage (line 533) | class GuiAddProgressBarMessage(Message):
class GuiAddPlotlyMessage (line 545) | class GuiAddPlotlyMessage(Message):
class GuiAddTabGroupMessage (line 556) | class GuiAddTabGroupMessage(Message):
class _GuiAddInputBase (line 567) | class _GuiAddInputBase(Message):
class GuiModalMessage (line 581) | class GuiModalMessage(Message):
class GuiCloseModalMessage (line 588) | class GuiCloseModalMessage(Message):
class GuiAddButtonMessage (line 594) | class GuiAddButtonMessage(_GuiAddInputBase):
class GuiAddUploadButtonMessage (line 604) | class GuiAddUploadButtonMessage(_GuiAddInputBase):
class GuiAddSliderMessage (line 612) | class GuiAddSliderMessage(_GuiAddInputBase):
class GuiAddMultiSliderMessage (line 623) | class GuiAddMultiSliderMessage(_GuiAddInputBase):
class GuiAddNumberMessage (line 635) | class GuiAddNumberMessage(_GuiAddInputBase):
class GuiAddRgbMessage (line 645) | class GuiAddRgbMessage(_GuiAddInputBase):
class GuiAddRgbaMessage (line 651) | class GuiAddRgbaMessage(_GuiAddInputBase):
class GuiAddCheckboxMessage (line 657) | class GuiAddCheckboxMessage(_GuiAddInputBase):
class GuiAddVector2Message (line 663) | class GuiAddVector2Message(_GuiAddInputBase):
class GuiAddVector3Message (line 673) | class GuiAddVector3Message(_GuiAddInputBase):
class GuiAddTextMessage (line 683) | class GuiAddTextMessage(_GuiAddInputBase):
class GuiAddDropdownMessage (line 689) | class GuiAddDropdownMessage(_GuiAddInputBase):
class GuiAddButtonGroupMessage (line 696) | class GuiAddButtonGroupMessage(_GuiAddInputBase):
class GuiRemoveMessage (line 702) | class GuiRemoveMessage(Message):
class GuiUpdateMessage (line 709) | class GuiUpdateMessage(Message):
method redundancy_key (line 720) | def redundancy_key(self) -> str:
class ThemeConfigurationMessage (line 731) | class ThemeConfigurationMessage(Message):
class CatmullRomSplineMessage (line 744) | class CatmullRomSplineMessage(Message):
class CubicBezierSplineMessage (line 758) | class CubicBezierSplineMessage(Message):
class GaussianSplatsMessage (line 770) | class GaussianSplatsMessage(Message):
class GetRenderRequestMessage (line 791) | class GetRenderRequestMessage(Message):
class GetRenderResponseMessage (line 801) | class GetRenderResponseMessage(Message):
class FileTransferStart (line 808) | class FileTransferStart(Message):
method redundancy_key (line 820) | def redundancy_key(self) -> str:
class FileTransferPart (line 825) | class FileTransferPart(Message):
method redundancy_key (line 835) | def redundancy_key(self) -> str:
class FileTransferPartAck (line 840) | class FileTransferPartAck(Message):
method redundancy_key (line 849) | def redundancy_key(self) -> str:
class ShareUrlRequest (line 860) | class ShareUrlRequest(Message):
class ShareUrlUpdated (line 865) | class ShareUrlUpdated(Message):
class ShareUrlDisconnect (line 872) | class ShareUrlDisconnect(Message):
class SetGuiPanelLabelMessage (line 877) | class SetGuiPanelLabelMessage(Message):
FILE: viser/src/viser/_notification_handle.py
class _NotificationHandleState (line 12) | class _NotificationHandleState:
class NotificationHandle (line 24) | class NotificationHandle:
method _sync_with_client (line 29) | def _sync_with_client(self, first: bool = False) -> None:
method title (line 43) | def title(self) -> str:
method title (line 48) | def title(self, title: str) -> None:
method body (line 56) | def body(self) -> str:
method body (line 61) | def body(self, body: str) -> None:
method loading (line 69) | def loading(self) -> bool:
method loading (line 74) | def loading(self, loading: bool) -> None:
method with_close_button (line 82) | def with_close_button(self) -> bool:
method with_close_button (line 87) | def with_close_button(self, with_close_button: bool) -> None:
method auto_close (line 95) | def auto_close(self) -> int | Literal[False]:
method auto_close (line 101) | def auto_close(self, auto_close: int | Literal[False]) -> None:
method color (line 109) | def color(self) -> Color | None:
method color (line 114) | def color(self, color: Color | None) -> None:
method remove (line 121) | def remove(self) -> None:
FILE: viser/src/viser/_scene_api.py
function _colors_to_uint8 (line 48) | def _colors_to_uint8(colors: onp.ndarray) -> onpt.NDArray[onp.uint8]:
function _encode_rgb (line 64) | def _encode_rgb(rgb: RgbTupleOrArray) -> int:
function _encode_image_binary (line 75) | def _encode_image_binary(
function cast_vector (line 103) | def cast_vector(vector: TVector | onp.ndarray, length: int) -> TVector:
class SceneApi (line 111) | class SceneApi:
method __init__ (line 117) | def __init__(
method set_up_direction (line 172) | def set_up_direction(
method set_global_visibility (line 242) | def set_global_visibility(self, visible: bool) -> None:
method add_glb (line 257) | def add_glb(
method add_spline_catmull_rom (line 291) | def add_spline_catmull_rom(
method add_spline_cubic_bezier (line 346) | def add_spline_cubic_bezier(
method add_camera_frustum (line 402) | def add_camera_frustum(
method add_frame (line 467) | def add_frame(
method add_batched_axes (line 516) | def add_batched_axes(
method add_grid (line 572) | def add_grid(
method add_label (line 632) | def add_label(
method add_point_cloud (line 658) | def add_point_cloud(
method add_mesh_skinned (line 715) | def add_mesh_skinned(
method add_mesh_simple (line 836) | def add_mesh_simple(
method add_mesh_trimesh (line 902) | def add_mesh_trimesh(
method _add_gaussian_splats (line 938) | def _add_gaussian_splats(
method add_box (line 1007) | def add_box(
method add_icosphere (line 1045) | def add_icosphere(
method set_background_image (line 1087) | def set_background_image(
method add_image (line 1133) | def add_image(
method add_transform_controls (line 1177) | def add_transform_controls(
method reset (line 1270) | def reset(self) -> None:
method _get_client_handle (line 1274) | def _get_client_handle(self, client_id: ClientId) -> ClientHandle:
method _handle_transform_controls_updates (line 1292) | def _handle_transform_controls_updates(
method _handle_node_click_updates (line 1314) | def _handle_node_click_updates(
method _handle_scene_pointer_updates (line 1334) | def _handle_scene_pointer_updates(
method on_pointer_event (line 1351) | def on_pointer_event(
method on_pointer_callback_removed (line 1402) | def on_pointer_callback_removed(
method remove_pointer_callback (line 1418) | def remove_pointer_callback(
method add_3d_gui_container (line 1447) | def add_3d_gui_container(
FILE: viser/src/viser/_scene_handles.py
class ScenePointerEvent (line 20) | class ScenePointerEvent:
method event (line 39) | def event(self):
class _SceneNodeHandleState (line 48) | class _SceneNodeHandleState:
class SceneNodeHandle (line 65) | class SceneNodeHandle:
method _make (line 71) | def _make(
method wxyz (line 92) | def wxyz(self) -> onp.ndarray:
method wxyz (line 99) | def wxyz(self, wxyz: tuple[float, float, float, float] | onp.ndarray) ...
method position (line 109) | def position(self) -> onp.ndarray:
method position (line 116) | def position(self, position: tuple[float, float, float] | onp.ndarray)...
method visible (line 126) | def visible(self) -> bool:
method visible (line 131) | def visible(self, visible: bool) -> None:
method remove (line 139) | def remove(self) -> None:
class SceneNodePointerEvent (line 147) | class SceneNodePointerEvent(Generic[TSceneNodeHandle]):
class _ClickableSceneNodeHandle (line 170) | class _ClickableSceneNodeHandle(SceneNodeHandle):
method on_click (line 171) | def on_click(
class CameraFrustumHandle (line 186) | class CameraFrustumHandle(_ClickableSceneNodeHandle):
class PointCloudHandle (line 191) | class PointCloudHandle(SceneNodeHandle):
class BatchedAxesHandle (line 196) | class BatchedAxesHandle(_ClickableSceneNodeHandle):
class FrameHandle (line 201) | class FrameHandle(_ClickableSceneNodeHandle):
class MeshHandle (line 206) | class MeshHandle(_ClickableSceneNodeHandle):
class GaussianSplatHandle (line 211) | class GaussianSplatHandle(_ClickableSceneNodeHandle):
class MeshSkinnedHandle (line 219) | class MeshSkinnedHandle(_ClickableSceneNodeHandle):
class BoneState (line 228) | class BoneState:
class MeshSkinnedBoneHandle (line 237) | class MeshSkinnedBoneHandle:
method wxyz (line 243) | def wxyz(self) -> onp.ndarray:
method wxyz (line 250) | def wxyz(self, wxyz: tuple[float, float, float, float] | onp.ndarray) ...
method position (line 262) | def position(self) -> onp.ndarray:
method position (line 269) | def position(self, position: tuple[float, float, float] | onp.ndarray)...
class GlbHandle (line 282) | class GlbHandle(_ClickableSceneNodeHandle):
class ImageHandle (line 287) | class ImageHandle(_ClickableSceneNodeHandle):
class LabelHandle (line 292) | class LabelHandle(SceneNodeHandle):
class _TransformControlsState (line 297) | class _TransformControlsState:
class TransformControlsHandle (line 304) | class TransformControlsHandle(_ClickableSceneNodeHandle):
method update_timestamp (line 310) | def update_timestamp(self) -> float:
method on_update (line 313) | def on_update(
class Gui3dContainerHandle (line 322) | class Gui3dContainerHandle(SceneNodeHandle):
method __enter__ (line 332) | def __enter__(self) -> Gui3dContainerHandle:
method __exit__ (line 337) | def __exit__(self, *args) -> None:
method __post_init__ (line 343) | def __post_init__(self) -> None:
method remove (line 346) | def remove(self) -> None:
FILE: viser/src/viser/_tunnel.py
function _is_multiprocess_ok (line 15) | def _is_multiprocess_ok() -> bool:
class ViserTunnel (line 25) | class ViserTunnel:
method __init__ (line 32) | def __init__(self, share_domain: str, local_port: int) -> None:
method on_disconnect (line 63) | def on_disconnect(self, callback: Callable[[], None]) -> None:
method on_connect (line 73) | def on_connect(self, callback: Callable[[int], None]) -> None:
method get_url (line 123) | def get_url(self) -> str | None:
method get_status (line 127) | def get_status(
method close (line 132) | def close(self) -> None:
function _connect_job (line 150) | def _connect_job(
function _make_tunnel (line 185) | async def _make_tunnel(
function _simple_proxy (line 239) | async def _simple_proxy(
FILE: viser/src/viser/_viser.py
class _BackwardsCompatibilityShim (line 30) | class _BackwardsCompatibilityShim:
method __getattr__ (line 34) | def __getattr__(self, name: str) -> Any:
class _CameraHandleState (line 67) | class _CameraHandleState:
class CameraHandle (line 81) | class CameraHandle:
method __init__ (line 85) | def __init__(self, client: ClientHandle) -> None:
method client (line 99) | def client(self) -> ClientHandle:
method wxyz (line 104) | def wxyz(self) -> npt.NDArray[onp.float64]:
method wxyz (line 114) | def wxyz(self, wxyz: tuple[float, float, float, float] | onp.ndarray) ...
method position (line 149) | def position(self) -> npt.NDArray[onp.float64]:
method position (line 160) | def position(self, position: tuple[float, float, float] | onp.ndarray)...
method _update_wxyz (line 169) | def _update_wxyz(self) -> None:
method fov (line 180) | def fov(self) -> float:
method fov (line 187) | def fov(self, fov: float) -> None:
method aspect (line 195) | def aspect(self) -> float:
method update_timestamp (line 201) | def update_timestamp(self) -> float:
method look_at (line 206) | def look_at(self) -> npt.NDArray[onp.float64]:
method look_at (line 212) | def look_at(self, look_at: tuple[float, float, float] | onp.ndarray) -...
method up_direction (line 221) | def up_direction(self) -> npt.NDArray[onp.float64]:
method up_direction (line 227) | def up_direction(
method on_update (line 237) | def on_update(
method get_render (line 244) | def get_render(
class ClientHandle (line 299) | class ClientHandle(_BackwardsCompatibilityShim if not TYPE_CHECKING else...
method __init__ (line 311) | def __init__(
method flush (line 332) | def flush(self) -> None:
method atomic (line 337) | def atomic(self) -> ContextManager[None]:
method send_file_download (line 350) | def send_file_download(
method add_notification (line 391) | def add_notification(
class ViserServer (line 433) | class ViserServer(_BackwardsCompatibilityShim if not TYPE_CHECKING else ...
method __init__ (line 457) | def __init__(
method get_host (line 579) | def get_host(self) -> str:
method get_port (line 587) | def get_port(self) -> int:
method request_share_url (line 596) | def request_share_url(self, verbose: bool = True) -> str | None:
method disconnect_share_url (line 652) | def disconnect_share_url(self) -> None:
method stop (line 661) | def stop(self) -> None:
method get_clients (line 667) | def get_clients(self) -> dict[int, ClientHandle]:
method on_client_connect (line 677) | def on_client_connect(
method on_client_disconnect (line 697) | def on_client_disconnect(
method flush (line 704) | def flush(self) -> None:
method atomic (line 709) | def atomic(self) -> ContextManager[None]:
method send_file_download (line 722) | def send_file_download(
method _start_scene_recording (line 735) | def _start_scene_recording(self) -> RecordHandle:
FILE: viser/src/viser/client/src/App.tsx
type ViewerContextContents (line 53) | type ViewerContextContents = {
function ViewerRoot (line 117) | function ViewerRoot() {
function ViewerContents (line 207) | function ViewerContents({ children }: { children: React.ReactNode }) {
function ViewerCanvas (line 285) | function ViewerCanvas({ children }: { children: React.ReactNode }) {
function AdaptiveDpr (line 477) | function AdaptiveDpr() {
function Viewer2DCanvas (line 502) | function Viewer2DCanvas() {
function BackgroundImage (line 534) | function BackgroundImage() {
function SceneContextSetter (line 644) | function SceneContextSetter() {
function Root (line 653) | function Root() {
function ViserLogo (line 670) | function ViserLogo() {
FILE: viser/src/viser/client/src/BrowserWarning.tsx
function BrowserWarning (line 5) | function BrowserWarning() {
FILE: viser/src/viser/client/src/CameraControls.tsx
function SynchronizedCameraControls (line 11) | function SynchronizedCameraControls() {
FILE: viser/src/viser/client/src/ClickUtils.tsx
function ndcFromPointerXy (line 9) | function ndcFromPointerXy(
function opencvXyFromPointerXy (line 31) | function opencvXyFromPointerXy(
FILE: viser/src/viser/client/src/ControlPanel/BottomPanel.tsx
function BottomPanel (line 12) | function BottomPanel({
FILE: viser/src/viser/client/src/ControlPanel/ControlPanel.tsx
constant ROOT_CONTAINER_ID (line 43) | const ROOT_CONTAINER_ID = "root";
function ControlPanel (line 45) | function ControlPanel(props: {
function ConnectionStatus (line 154) | function ConnectionStatus() {
function ShareButton (line 192) | function ShareButton() {
FILE: viser/src/viser/client/src/ControlPanel/FloatingPanel.tsx
function FloatingPanel (line 35) | function FloatingPanel({
FILE: viser/src/viser/client/src/ControlPanel/Generated.tsx
function GeneratedGuiContainer (line 27) | function GeneratedGuiContainer({
function GuiContainer (line 58) | function GuiContainer({ containerId }: { containerId: string }) {
function GeneratedInput (line 85) | function GeneratedInput(props: { guiId: string }) {
function assertNeverType (line 130) | function assertNeverType(x: never): never {
FILE: viser/src/viser/client/src/ControlPanel/GuiComponentContext.tsx
type GuiComponentContext (line 4) | interface GuiComponentContext {
FILE: viser/src/viser/client/src/ControlPanel/GuiState.tsx
type GuiConfig (line 8) | type GuiConfig = Messages.GuiAddComponentMessage;
function isGuiConfig (line 10) | function isGuiConfig(message: Messages.Message): message is GuiConfig {
type GuiState (line 14) | interface GuiState {
type GuiActions (line 37) | interface GuiActions {
function computeRelativeLuminance (line 77) | function computeRelativeLuminance(color: string) {
function useGuiState (line 89) | function useGuiState(initialServer: string) {
type UseGui (line 187) | type UseGui = ReturnType<typeof useGuiState>;
FILE: viser/src/viser/client/src/ControlPanel/SceneTreeTable.tsx
function SceneTreeTable (line 18) | function SceneTreeTable() {
function setOverrideVisibility (line 50) | function setOverrideVisibility(name: string, visible: boolean | undefine...
function rerenderTable (line 63) | function rerenderTable() {
FILE: viser/src/viser/client/src/ControlPanel/ServerControls.tsx
function ServerControls (line 16) | function ServerControls() {
FILE: viser/src/viser/client/src/ControlPanel/SidebarPanel.tsx
function SidebarPanel (line 21) | function SidebarPanel({
FILE: viser/src/viser/client/src/FilePlayback.tsx
function deserializeGzippedMsgpackFile (line 24) | async function deserializeGzippedMsgpackFile<T>(
type SerializedMessages (line 71) | interface SerializedMessages {
function PlaybackFromFile (line 77) | function PlaybackFromFile({ fileUrl }: { fileUrl: string }) {
FILE: viser/src/viser/client/src/Markdown.tsx
function rehypeCodeblock (line 28) | function rehypeCodeblock(): void | Transformer<Root, Root> {
function MdxText (line 42) | function MdxText(props: React.ComponentPropsWithoutRef<typeof Text>) {
function MdxAnchor (line 46) | function MdxAnchor(props: React.ComponentPropsWithoutRef<typeof Anchor>) {
function MdxTitle (line 50) | function MdxTitle(
function MdxList (line 57) | function MdxList(
function MdxListItem (line 77) | function MdxListItem(
function MdxCode (line 85) | function MdxCode(
function MdxBlockquote (line 92) | function MdxBlockquote(
function MdxCite (line 98) | function MdxCite(
function MdxTable (line 119) | function MdxTable(props: React.ComponentPropsWithoutRef<typeof Table>) {
function MdxImage (line 123) | function MdxImage(props: React.ComponentPropsWithoutRef<typeof Image>) {
function parseMarkdown (line 148) | async function parseMarkdown(markdown: string) {
function Markdown (line 165) | function Markdown(props: { children?: string }) {
FILE: viser/src/viser/client/src/MessageHandler.tsx
function threeColorBufferFromUint8Buffer (line 35) | function threeColorBufferFromUint8Buffer(colors: ArrayBuffer) {
function useMessageHandler (line 50) | function useMessageHandler() {
function useFileDownloadHandler (line 1067) | function useFileDownloadHandler() {
function FrameSynchronizedMessageHandler (line 1156) | function FrameSynchronizedMessageHandler() {
FILE: viser/src/viser/client/src/Modal.tsx
function ViserModal (line 7) | function ViserModal() {
function GeneratedModal (line 18) | function GeneratedModal({
FILE: viser/src/viser/client/src/Outlines.tsx
type OutlinesProps (line 69) | type OutlinesProps = JSX.IntrinsicElements["group"] & {
FILE: viser/src/viser/client/src/SceneTree.tsx
type MakeObject (line 16) | type MakeObject<T extends THREE.Object3D = THREE.Object3D> = (
class SceneNode (line 21) | class SceneNode<T extends THREE.Object3D = THREE.Object3D> {
method constructor (line 27) | constructor(
type UseSceneTree (line 49) | type UseSceneTree = ReturnType<typeof useSceneTreeState>;
function SceneNodeThreeChildren (line 51) | function SceneNodeThreeChildren(props: {
function SceneNodeLabel (line 109) | function SceneNodeLabel(props: { name: string }) {
function SceneNodeThreeObject (line 131) | function SceneNodeThreeObject(props: {
FILE: viser/src/viser/client/src/SceneTreeState.tsx
type SceneTreeState (line 9) | interface SceneTreeState {
type SceneTreeActions (line 14) | interface SceneTreeActions extends SceneTreeState {
function useSceneTreeState (line 39) | function useSceneTreeState(
FILE: viser/src/viser/client/src/SearchParamsUtils.tsx
function syncSearchParamServer (line 7) | function syncSearchParamServer(server: string) {
FILE: viser/src/viser/client/src/Splatting/GaussianSplats.tsx
type SplatState (line 35) | interface SplatState {
function useGaussianSplatStore (line 45) | function useGaussianSplatStore() {
function SplatRenderContext (line 71) | function SplatRenderContext({
function SplatRenderer (line 282) | function SplatRenderer() {
function mergeGaussianGroups (line 444) | function mergeGaussianGroups(groupBufferFromName: {
function useGaussianMeshProps (line 484) | function useGaussianMeshProps(gaussianBuffer: Uint32Array, numGroups: nu...
FILE: viser/src/viser/client/src/Splatting/SplatSortWorker.ts
type SorterWorkerIncoming (line 6) | type SorterWorkerIncoming =
FILE: viser/src/viser/client/src/Splatting/WasmSorter/Sorter.mjs
function locateFile (line 9) | function locateFile(path){if(Module["locateFile"]){return Module["locate...
function updateMemoryViews (line 9) | function updateMemoryViews(){var b=wasmMemory.buffer;Module["HEAP8"]=HEA...
function preRun (line 9) | function preRun(){if(Module["preRun"]){if(typeof Module["preRun"]=="func...
function initRuntime (line 9) | function initRuntime(){runtimeInitialized=true;callRuntimeCallbacks(__AT...
function postRun (line 9) | function postRun(){if(Module["postRun"]){if(typeof Module["postRun"]=="f...
function addOnPreRun (line 9) | function addOnPreRun(cb){__ATPRERUN__.unshift(cb)}
function addOnInit (line 9) | function addOnInit(cb){__ATINIT__.unshift(cb)}
function addOnPostRun (line 9) | function addOnPostRun(cb){__ATPOSTRUN__.unshift(cb)}
function addRunDependency (line 9) | function addRunDependency(id){runDependencies++;Module["monitorRunDepend...
function removeRunDependency (line 9) | function removeRunDependency(id){runDependencies--;Module["monitorRunDep...
function abort (line 9) | function abort(what){Module["onAbort"]?.(what);what="Aborted("+what+")";...
function findWasmBinary (line 9) | function findWasmBinary(){if(Module["locateFile"]){var f="Sorter.wasm";i...
function getBinarySync (line 9) | function getBinarySync(file){if(file==wasmBinaryFile&&wasmBinary){return...
function getBinaryPromise (line 9) | function getBinaryPromise(binaryFile){if(!wasmBinary){return new Promise...
function instantiateArrayBuffer (line 9) | function instantiateArrayBuffer(binaryFile,imports,receiver){return getB...
function instantiateAsync (line 9) | function instantiateAsync(binary,binaryFile,imports,callback){if(!binary...
function getWasmImports (line 9) | function getWasmImports(){return{a:wasmImports}}
function createWasm (line 9) | function createWasm(){var info=getWasmImports();function receiveInstance...
class ExceptionInfo (line 9) | class ExceptionInfo{constructor(excPtr){this.excPtr=excPtr;this.ptr=excP...
method constructor (line 9) | constructor(excPtr){this.excPtr=excPtr;this.ptr=excPtr-24}
method set_type (line 9) | set_type(type){HEAPU32[this.ptr+4>>2]=type}
method get_type (line 9) | get_type(){return HEAPU32[this.ptr+4>>2]}
method set_destructor (line 9) | set_destructor(destructor){HEAPU32[this.ptr+8>>2]=destructor}
method get_destructor (line 9) | get_destructor(){return HEAPU32[this.ptr+8>>2]}
method set_caught (line 9) | set_caught(caught){caught=caught?1:0;HEAP8[this.ptr+12]=caught}
method get_caught (line 9) | get_caught(){return HEAP8[this.ptr+12]!=0}
method set_rethrown (line 9) | set_rethrown(rethrown){rethrown=rethrown?1:0;HEAP8[this.ptr+13]=rethrown}
method get_rethrown (line 9) | get_rethrown(){return HEAP8[this.ptr+13]!=0}
method init (line 9) | init(type,destructor){this.set_adjusted_ptr(0);this.set_type(type);thi...
method set_adjusted_ptr (line 9) | set_adjusted_ptr(adjustedPtr){HEAPU32[this.ptr+16>>2]=adjustedPtr}
method get_adjusted_ptr (line 9) | get_adjusted_ptr(){return HEAPU32[this.ptr+16>>2]}
method get_exception_ptr (line 9) | get_exception_ptr(){var isPointer=___cxa_is_pointer_type(this.get_type...
function onComplete (line 9) | function onComplete(typeConverters){var myTypeConverters=getTypeConverte...
function sharedRegisterType (line 9) | function sharedRegisterType(rawType,registeredInstance,options={}){var n...
function registerType (line 9) | function registerType(rawType,registeredInstance,options={}){if(!("argPa...
function getInstanceTypeName (line 9) | function getInstanceTypeName(handle){return handle.$$.ptrType.registered...
function RegisteredPointer_fromWireType (line 9) | function RegisteredPointer_fromWireType(ptr){var rawPointer=this.getPoin...
method isAliasOf (line 9) | isAliasOf(other){if(!(this instanceof ClassHandle)){return false}if(!(ot...
method clone (line 9) | clone(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.pr...
method delete (line 9) | delete(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this.$$.d...
method isDeleted (line 9) | isDeleted(){return!this.$$.ptr}
method deleteLater (line 9) | deleteLater(){if(!this.$$.ptr){throwInstanceAlreadyDeleted(this)}if(this...
function ClassHandle (line 9) | function ClassHandle(){}
function RegisteredClass (line 9) | function RegisteredClass(name,constructor,instancePrototype,rawDestructo...
function constNoSmartPtrRawPointerToWireType (line 9) | function constNoSmartPtrRawPointerToWireType(destructors,handle){if(hand...
function genericPointerToWireType (line 9) | function genericPointerToWireType(destructors,handle){var ptr;if(handle=...
function nonConstNoSmartPtrRawPointerToWireType (line 9) | function nonConstNoSmartPtrRawPointerToWireType(destructors,handle){if(h...
function readPointer (line 9) | function readPointer(pointer){return this["fromWireType"](HEAPU32[pointe...
method getPointee (line 9) | getPointee(ptr){if(this.rawGetPointee){ptr=this.rawGetPointee(ptr)}retur...
method destructor (line 9) | destructor(ptr){this.rawDestructor?.(ptr)}
function RegisteredPointer (line 9) | function RegisteredPointer(name,registeredClass,isReference,isConst,isSm...
function makeDynCaller (line 9) | function makeDynCaller(){if(signature.includes("j")){return getDynCaller...
function visit (line 9) | function visit(type){if(seen[type]){return}if(registeredTypes[type]){ret...
function usesDestructorStack (line 9) | function usesDestructorStack(argTypes){for(var i=1;i<argTypes.length;++i...
function newFunc (line 9) | function newFunc(constructor,argumentList){if(!(constructor instanceof F...
function createJsInvoker (line 9) | function createJsInvoker(argTypes,isClassMethodFunc,returns,isAsync){var...
function craftInvokerFunction (line 9) | function craftInvokerFunction(humanName,argTypes,classType,cppInvokerFun...
function unboundTypesHandler (line 9) | function unboundTypesHandler(){throwUnboundTypeError(`Cannot call ${huma...
function decodeMemoryView (line 9) | function decodeMemoryView(handle){var size=HEAPU32[handle>>2];var data=H...
method fromWireType (line 9) | fromWireType(value){var length=HEAPU32[value>>2];var payload=value+4;var...
method toWireType (line 9) | toWireType(destructors,value){if(value instanceof ArrayBuffer){value=new...
method destructorFunction (line 9) | destructorFunction(ptr){_free(ptr)}
method destructorFunction (line 9) | destructorFunction(ptr){_free(ptr)}
method constructor (line 9) | constructor(message){super(message);this.name="BindingError"}
method constructor (line 9) | constructor(message){super(message);this.name="InternalError"}
function run (line 9) | function run(){if(runDependencies>0){return}preRun();if(runDependencies>...
FILE: viser/src/viser/client/src/Splatting/WasmSorter/sorter.cpp
function dot_f32x4 (line 11) | __attribute__((always_inline)) inline float
function min_i32x4 (line 23) | __attribute__((always_inline)) inline int32_t min_i32x4(v128_t vector) {
function max_i32x4 (line 33) | __attribute__((always_inline)) inline int32_t max_i32x4(v128_t vector) {
class Sorter (line 41) | class Sorter {
method Sorter (line 47) | Sorter(
method sort (line 72) | emscripten::val sort(const emscripten::val &Tz_cam_groups_val) {
function EMSCRIPTEN_BINDINGS (line 185) | EMSCRIPTEN_BINDINGS(c) {
FILE: viser/src/viser/client/src/ThreeAssets.tsx
type AllPossibleThreeJSMaterials (line 28) | type AllPossibleThreeJSMaterials =
function disposeNode (line 185) | function disposeNode(node: any) {
function disposeMaterial (line 201) | function disposeMaterial(material: AllPossibleThreeJSMaterials) {
function scaledLineSegments (line 421) | function scaledLineSegments(points: [number, number, number][], thicknes...
function LineSegmentInstance (line 508) | function LineSegmentInstance(props: {
function OutlinesIfHovered (line 546) | function OutlinesIfHovered(
FILE: viser/src/viser/client/src/Titlebar.tsx
type ArrayElement (line 22) | type ArrayElement<ArrayType extends readonly unknown[]> =
type TitlebarContent (line 24) | type TitlebarContent = NonNullable<
function assertUnreachable (line 27) | function assertUnreachable(x: never): never {
function getIcon (line 31) | function getIcon(
function TitlebarButton (line 54) | function TitlebarButton(
function MobileTitlebarButton (line 74) | function MobileTitlebarButton(
function TitlebarImage (line 94) | function TitlebarImage(
function Titlebar (line 125) | function Titlebar() {
FILE: viser/src/viser/client/src/Utils.ts
type DragEvents (line 2) | interface DragEvents {
function isTouchEvent (line 9) | function isTouchEvent(
function isMouseEvent (line 14) | function isMouseEvent(
FILE: viser/src/viser/client/src/WebsocketFunctions.tsx
function useThrottledMessageSender (line 7) | function useThrottledMessageSender(throttleMilliseconds: number) {
function makeThrottledMessageSender (line 13) | function makeThrottledMessageSender(
function isTexture (line 42) | function isTexture(
FILE: viser/src/viser/client/src/WebsocketInterface.tsx
function WebsocketMessageProducer (line 9) | function WebsocketMessageProducer() {
FILE: viser/src/viser/client/src/WebsocketMessages.tsx
type RunJavascriptMessage (line 9) | interface RunJavascriptMessage {
type NotificationMessage (line 17) | interface NotificationMessage {
type RemoveNotificationMessage (line 47) | interface RemoveNotificationMessage {
type ViewerCameraMessage (line 56) | interface ViewerCameraMessage {
type ScenePointerMessage (line 72) | interface ScenePointerMessage {
type ScenePointerEnableMessage (line 83) | interface ScenePointerEnableMessage {
type CameraFrustumMessage (line 94) | interface CameraFrustumMessage {
type GlbMessage (line 109) | interface GlbMessage {
type FrameMessage (line 119) | interface FrameMessage {
type BatchedAxesMessage (line 134) | interface BatchedAxesMessage {
type GridMessage (line 146) | interface GridMessage {
type LabelMessage (line 165) | interface LabelMessage {
type Gui3DMessage (line 174) | interface Gui3DMessage {
type PointCloudMessage (line 189) | interface PointCloudMessage {
type MeshBoneMessage (line 201) | interface MeshBoneMessage {
type MeshMessage (line 211) | interface MeshMessage {
type SkinnedMeshMessage (line 230) | interface SkinnedMeshMessage {
type SetBoneOrientationMessage (line 253) | interface SetBoneOrientationMessage {
type SetBonePositionMessage (line 265) | interface SetBonePositionMessage {
type TransformControlsMessage (line 275) | interface TransformControlsMessage {
type SetCameraPositionMessage (line 295) | interface SetCameraPositionMessage {
type SetCameraUpDirectionMessage (line 303) | interface SetCameraUpDirectionMessage {
type SetCameraLookAtMessage (line 311) | interface SetCameraLookAtMessage {
type SetCameraFovMessage (line 319) | interface SetCameraFovMessage {
type SetOrientationMessage (line 329) | interface SetOrientationMessage {
type SetPositionMessage (line 340) | interface SetPositionMessage {
type TransformControlsUpdateMessage (line 351) | interface TransformControlsUpdateMessage {
type BackgroundImageMessage (line 361) | interface BackgroundImageMessage {
type ImageMessage (line 371) | interface ImageMessage {
type RemoveSceneNodeMessage (line 383) | interface RemoveSceneNodeMessage {
type SetSceneNodeVisibilityMessage (line 391) | interface SetSceneNodeVisibilityMessage {
type SetSceneNodeClickableMessage (line 400) | interface SetSceneNodeClickableMessage {
type SceneNodeClickMessage (line 409) | interface SceneNodeClickMessage {
type ResetSceneMessage (line 421) | interface ResetSceneMessage {
type ResetGuiMessage (line 428) | interface ResetGuiMessage {
type GuiAddFolderMessage (line 435) | interface GuiAddFolderMessage {
type GuiAddMarkdownMessage (line 448) | interface GuiAddMarkdownMessage {
type GuiAddProgressBarMessage (line 460) | interface GuiAddProgressBarMessage {
type GuiAddPlotlyMessage (line 489) | interface GuiAddPlotlyMessage {
type GuiAddTabGroupMessage (line 502) | interface GuiAddTabGroupMessage {
type _GuiAddInputBase (line 516) | interface _GuiAddInputBase {
type GuiAddButtonMessage (line 531) | interface GuiAddButtonMessage {
type GuiAddUploadButtonMessage (line 563) | interface GuiAddUploadButtonMessage {
type GuiAddSliderMessage (line 596) | interface GuiAddSliderMessage {
type GuiAddMultiSliderMessage (line 616) | interface GuiAddMultiSliderMessage {
type GuiAddNumberMessage (line 638) | interface GuiAddNumberMessage {
type GuiAddRgbMessage (line 657) | interface GuiAddRgbMessage {
type GuiAddRgbaMessage (line 672) | interface GuiAddRgbaMessage {
type GuiAddCheckboxMessage (line 687) | interface GuiAddCheckboxMessage {
type GuiAddVector2Message (line 702) | interface GuiAddVector2Message {
type GuiAddVector3Message (line 721) | interface GuiAddVector3Message {
type GuiAddTextMessage (line 740) | interface GuiAddTextMessage {
type GuiAddDropdownMessage (line 755) | interface GuiAddDropdownMessage {
type GuiAddButtonGroupMessage (line 771) | interface GuiAddButtonGroupMessage {
type GuiModalMessage (line 787) | interface GuiModalMessage {
type GuiCloseModalMessage (line 797) | interface GuiCloseModalMessage {
type GuiRemoveMessage (line 805) | interface GuiRemoveMessage {
type GuiUpdateMessage (line 813) | interface GuiUpdateMessage {
type ThemeConfigurationMessage (line 822) | interface ThemeConfigurationMessage {
type CatmullRomSplineMessage (line 863) | interface CatmullRomSplineMessage {
type CubicBezierSplineMessage (line 878) | interface CubicBezierSplineMessage {
type GaussianSplatsMessage (line 891) | interface GaussianSplatsMessage {
type GetRenderRequestMessage (line 900) | interface GetRenderRequestMessage {
type GetRenderResponseMessage (line 911) | interface GetRenderResponseMessage {
type FileTransferStart (line 919) | interface FileTransferStart {
type FileTransferPart (line 932) | interface FileTransferPart {
type FileTransferPartAck (line 943) | interface FileTransferPartAck {
type ShareUrlRequest (line 954) | interface ShareUrlRequest {
type ShareUrlUpdated (line 961) | interface ShareUrlUpdated {
type ShareUrlDisconnect (line 969) | interface ShareUrlDisconnect {
type SetGuiPanelLabelMessage (line 976) | interface SetGuiPanelLabelMessage {
type Message (line 981) | type Message =
type GuiAddComponentMessage (line 1053) | type GuiAddComponentMessage =
FILE: viser/src/viser/client/src/WebsocketServerWorker.ts
type WsWorkerIncoming (line 5) | type WsWorkerIncoming =
type WsWorkerOutgoing (line 10) | type WsWorkerOutgoing =
function collectArrayBuffers (line 16) | function collectArrayBuffers(obj: any, buffers: Set<ArrayBuffer>) {
FILE: viser/src/viser/client/src/WorldTransformUtils.ts
function computeT_threeworld_world (line 7) | function computeT_threeworld_world(viewer: ViewerContextContents) {
function rayToViserCoords (line 22) | function rayToViserCoords(
FILE: viser/src/viser/client/src/components/Button.tsx
function ButtonComponent (line 9) | function ButtonComponent({
FILE: viser/src/viser/client/src/components/ButtonGroup.tsx
function ButtonGroupComponent (line 7) | function ButtonGroupComponent({
FILE: viser/src/viser/client/src/components/Checkbox.tsx
function CheckboxComponent (line 7) | function CheckboxComponent({
FILE: viser/src/viser/client/src/components/Dropdown.tsx
function DropdownComponent (line 7) | function DropdownComponent({
FILE: viser/src/viser/client/src/components/Folder.tsx
function FolderComponent (line 10) | function FolderComponent({
FILE: viser/src/viser/client/src/components/Markdown.tsx
function MarkdownComponent (line 6) | function MarkdownComponent({
FILE: viser/src/viser/client/src/components/MultiSlider.tsx
function MultiSliderComponent (line 9) | function MultiSliderComponent({
FILE: viser/src/viser/client/src/components/MultiSliderPrimitive/Marks/Marks.tsx
type MarksProps (line 6) | interface MarksProps {
function Marks (line 16) | function Marks({
Condensed preview — 538 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (3,400K chars).
[
{
"path": ".gitignore",
"chars": 4730,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[codz]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packag"
},
{
"path": ".gitmodules",
"chars": 124,
"preview": "[submodule \"camera_pose_annotation/base\"]\n\tpath = camera_pose_annotation/base\n\turl = https://github.com/SpatialVID/base."
},
{
"path": "Dockerfile.cuda",
"chars": 3271,
"preview": "# This Dockerfile builds FFmpeg with NVIDIA GPU support and libvmaf from source\n# It uses a two-stage build to create a "
},
{
"path": "LICENSE",
"chars": 11357,
"preview": " Apache License\n Version 2.0, January 2004\n "
},
{
"path": "README.md",
"chars": 17218,
"preview": "<h1 align='center'>SpatialVID: A Large-Scale Video Dataset with Spatial Annotations</h1>\n<div align='center'>\n <a hre"
},
{
"path": "camera_pose_annotation/.gitignore",
"chars": 238,
"preview": "# files\ndata/*\n*.log\n*.txt\n*.bz2\n*.zip\n*.ipynb\ndata_videos\n!requirements.txt\n!requirements_megasam.txt\n\n#python\n*.pyc\n__"
},
{
"path": "camera_pose_annotation/README.md",
"chars": 3813,
"preview": "# Camera Pose Annotation\n\n## Depth Estimation\nUse both [Depth-Anything V2](depth_estimation/Depth-Anything) and [UniDept"
},
{
"path": "camera_pose_annotation/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "camera_pose_annotation/camera_tracking/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "camera_pose_annotation/camera_tracking/camera_tracking.py",
"chars": 10516,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/camera_tracking/inference_batch.py",
"chars": 3527,
"preview": "\"\"\"\nBatch inference for camera tracking using multiple GPUs.\n\nThis module provides functionality for:\n- Parallel camera "
},
{
"path": "camera_pose_annotation/cvd_opt/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "camera_pose_annotation/cvd_opt/cvd_opt.py",
"chars": 14840,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/geometry_utils.py",
"chars": 7390,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/inference_batch.py",
"chars": 3357,
"preview": "\"\"\"\nBatch inference script for CVD (Camera View Depth) optimization.\nProcesses multiple video clips in parallel using mu"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/corr.py",
"chars": 3694,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/datasets.py",
"chars": 9856,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/extractor.py",
"chars": 8746,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/raft.py",
"chars": 5541,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/update.py",
"chars": 6010,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/utils/augmentor.py",
"chars": 9476,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/utils/flow_viz.py",
"chars": 4393,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/utils/frame_utils.py",
"chars": 4564,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/core/utils/utils.py",
"chars": 3219,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/inference_batch.py",
"chars": 3421,
"preview": "\"\"\"\nBatch inference script for optical flow preprocessing using RAFT model.\nProcesses multiple video clips in parallel t"
},
{
"path": "camera_pose_annotation/cvd_opt/preprocess/preprocess_flow.py",
"chars": 6698,
"preview": "# Copyright 2025 DeepMind Technologies Limited\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2.py",
"chars": 15178,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n#\n# This source code is licensed under the Apache License, Version "
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/__init__.py",
"chars": 382,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/attention.py",
"chars": 2343,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/block.py",
"chars": 9332,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/drop_path.py",
"chars": 1160,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/layer_scale.py",
"chars": 823,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/mlp.py",
"chars": 1272,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/patch_embed.py",
"chars": 2832,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dinov2_layers/swiglu_ffn.py",
"chars": 1859,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/dpt.py",
"chars": 7391,
"preview": "import cv2\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torchvision.transforms import Compose"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/util/blocks.py",
"chars": 3987,
"preview": "import torch.nn as nn\n\n\ndef _make_scratch(in_shape, out_shape, groups=1, expand=False):\n scratch = nn.Module()\n\n o"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/depth_anything_v2/util/transform.py",
"chars": 6075,
"preview": "import numpy as np\nimport cv2\n\n\nclass Resize(object):\n \"\"\"Resize sample to given size (width, height).\n \"\"\"\n\n d"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/inference.py",
"chars": 2741,
"preview": "\"\"\"\nSingle-threaded inference script for Depth-Anything V2 model.\nProcesses images in a directory to generate depth maps"
},
{
"path": "camera_pose_annotation/depth_estimation/Depth-Anything/inference_batch.py",
"chars": 7310,
"preview": "\"\"\"\nDistributed batch inference script for Depth-Anything V2 model.\nProcesses video frames to generate depth maps using "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/inference.py",
"chars": 2686,
"preview": "\"\"\"\nSingle-threaded inference script for UniDepth V2 model.\nProcesses images in a directory to generate depth maps and c"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/inference_batch.py",
"chars": 5962,
"preview": "\"\"\"\nDistributed batch inference script for UniDepth V2 model.\nProcesses video frames to generate depth maps and camera i"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/_2d3ds.py",
"chars": 2216,
"preview": "from typing import Any\n\nimport torch\n\nfrom unidepth.datasets.pipelines import Compose, PanoCrop, PanoRoll\nfrom unidepth."
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/_4dor.py",
"chars": 1642,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass _4DOR(SequenceDataset):\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/__init__.py",
"chars": 2427,
"preview": "from ._2d3ds import _2D3DS\nfrom ._4dor import _4DOR\nfrom .a2d2 import A2D2\nfrom .adt import ADT\nfrom .aimotive import ai"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/a2d2.py",
"chars": 2314,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/adt.py",
"chars": 2451,
"preview": "from typing import Any\n\nimport torch\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass ADT(Sequenc"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/aimotive.py",
"chars": 1582,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass aiMotive(SequenceDataset)"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/argoverse.py",
"chars": 2111,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/argoverse2.py",
"chars": 1480,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass Argoverse2(SequenceDatase"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/arkit.py",
"chars": 1474,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass ARKit(SequenceDataset):\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/ase.py",
"chars": 2268,
"preview": "from typing import Any\n\nimport torch\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass ASE(Sequenc"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/base_dataset.py",
"chars": 12812,
"preview": "import os\nfrom abc import abstractmethod\nfrom copy import deepcopy\nfrom math import ceil, log\nfrom typing import Any, Di"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/bdd.py",
"chars": 2541,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/bedlam.py",
"chars": 1542,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass BEDLAM(SequenceDataset):\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/behave.py",
"chars": 1645,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass Behave(SequenceDataset):\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/blendedmvg.py",
"chars": 1554,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass BlendedMVG(SequenceDatase"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/cityscape.py",
"chars": 2253,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/ddad.py",
"chars": 2459,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/deep360.py",
"chars": 1753,
"preview": "from typing import Any\n\nimport torch\n\nfrom unidepth.datasets.pipelines import Compose, PanoCrop, PanoRoll\nfrom unidepth."
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/dense.py",
"chars": 2437,
"preview": "import os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDataset\nfrom un"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/diml.py",
"chars": 2327,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/diode.py",
"chars": 8152,
"preview": "import os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDataset\nfrom un"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/dl3dv.py",
"chars": 1480,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass DL3DV(SequenceDataset):\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/driving_stereo.py",
"chars": 2474,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/dtu_rmvd.py",
"chars": 1805,
"preview": "import json\nimport os\nfrom typing import Any\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/dummy.py",
"chars": 1029,
"preview": "import numpy as np\nimport torch\nfrom torch.utils.data import Dataset\n\n\nclass Dummy(Dataset):\n train_split = None\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/dynamic_replica.py",
"chars": 1577,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass DynReplica(SequenceDatase"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/eden.py",
"chars": 1539,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass EDEN(SequenceDataset):\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/eth3d.py",
"chars": 3731,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/eth3d_rmvd.py",
"chars": 1495,
"preview": "import json\nimport os\nfrom typing import Any\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/facedepth.py",
"chars": 1570,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass FaceDepth(SequenceDataset"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/flsea.py",
"chars": 2828,
"preview": "import os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDataset\nfrom un"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/futurehouse.py",
"chars": 1760,
"preview": "from typing import Any\n\nimport torch\n\nfrom unidepth.datasets.pipelines import Compose, PanoCrop, PanoRoll\nfrom unidepth."
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/gibson.py",
"chars": 1750,
"preview": "from typing import Any\n\nimport torch\n\nfrom unidepth.datasets.pipelines import Compose, PanoCrop, PanoRoll\nfrom unidepth."
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/hammer.py",
"chars": 2235,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/hm3d.py",
"chars": 1463,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass HM3D(SequenceDataset):\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/hoi4d.py",
"chars": 1562,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass HOI4D(SequenceDataset):\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/hrwsi.py",
"chars": 2229,
"preview": "import os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDataset\nfrom un"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/hypersim.py",
"chars": 2978,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/ibims.py",
"chars": 3749,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/image_dataset.py",
"chars": 7052,
"preview": "import io\nimport os\nfrom time import time\nfrom typing import Any, Dict, List, Tuple\n\nimport numpy as np\nimport tables\nim"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/ken_burns.py",
"chars": 2922,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/kitti.py",
"chars": 8363,
"preview": "import os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDataset\nfrom un"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/kitti360.py",
"chars": 2206,
"preview": "from typing import Any\n\nimport torch\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass KITTI360(Se"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/kitti_multi.py",
"chars": 3337,
"preview": "import json\nimport os\nfrom typing import Any\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/kitti_rmvd.py",
"chars": 2308,
"preview": "import json\nimport os\nfrom typing import Any\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.pipeli"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/lyft.py",
"chars": 2497,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/mapillary.py",
"chars": 2560,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/matrix_city.py",
"chars": 1557,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass MatrixCity(SequenceDatase"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/matterport3d.py",
"chars": 1762,
"preview": "from typing import Any\n\nimport torch\n\nfrom unidepth.datasets.pipelines import Compose, PanoCrop, PanoRoll\nfrom unidepth."
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/megadepth.py",
"chars": 2336,
"preview": "import os\n\nimport h5py\nimport numpy as np\n\nfrom unidepth.datasets.image_dataset import ImageDataset\nfrom unidepth.datase"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/megadepth_s.py",
"chars": 1570,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass MegaDepthS(SequenceDatase"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/midair.py",
"chars": 1564,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass MidAir(SequenceDataset):\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/mip.py",
"chars": 1627,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass MIP(SequenceDataset):\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/ms2.py",
"chars": 1559,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass MS2(SequenceDataset):\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/mvimgnet.py",
"chars": 3484,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\nINVALID_SEQUENCES = [\n \"1/000"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/mvsynth.py",
"chars": 1611,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass MVSynth(SequenceDataset):"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/nerds360.py",
"chars": 1473,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass NeRDS360(SequenceDataset)"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/niantic_mapfree.py",
"chars": 1553,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass NianticMapFree(SequenceDa"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/nuscenes.py",
"chars": 2624,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/nyuv2.py",
"chars": 3242,
"preview": "import os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDataset\nfrom un"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/oasis.py",
"chars": 2204,
"preview": "import os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDataset\nfrom un"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/pipelines/__init__.py",
"chars": 613,
"preview": "from .formating import AnnotationMask, Collect\nfrom .transforms import (Compose, ContextCrop, Crop, GaussianBlur, KittiC"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/pipelines/formating.py",
"chars": 3106,
"preview": "from collections.abc import Sequence\n\nimport numpy as np\nimport torch\n\n\nclass Collect(object):\n def __init__(\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/pipelines/transforms.py",
"chars": 53732,
"preview": "import os\nimport random\nfrom copy import deepcopy\nfrom math import ceil, exp, log, log2, log10, tanh\nfrom typing import "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/point_odyssey.py",
"chars": 1562,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass PointOdyssey(SequenceData"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/proteus.py",
"chars": 1565,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass Proteus(SequenceDataset):"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/samplers copy.py",
"chars": 7808,
"preview": "import itertools\nimport warnings\nfrom operator import itemgetter\nfrom typing import Any, Optional\n\nimport numpy as np\nim"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/samplers.py",
"chars": 570,
"preview": "import torch\n\n\nclass DistributedSamplerNoDuplicate(torch.utils.data.DistributedSampler):\n \"\"\"A distributed sampler th"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/scannet.py",
"chars": 1472,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass ScanNet(SequenceDataset):"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/scannetpp.py",
"chars": 3004,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass ScanNetpp(SequenceDataset"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/sequence_dataset.py",
"chars": 11174,
"preview": "import json\nimport os\nfrom functools import partial\nfrom typing import Any, Dict, Tuple\n\nimport h5py\nimport numpy as np\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/sintel copy.py",
"chars": 1581,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass Sintel(SequenceDataset):\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/sintel.py",
"chars": 1441,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass Sintel(SequenceDataset):\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/sunrgbd.py",
"chars": 2096,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/synscapes.py",
"chars": 1550,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass Synscapes(SequenceDataset"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/tartanair.py",
"chars": 1571,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass TartanAir(SequenceDataset"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/taskonomy.py",
"chars": 2830,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/tat_rmvd.py",
"chars": 1848,
"preview": "import json\nimport os\nfrom copy import deepcopy\nfrom typing import Any\n\nimport h5py\nimport numpy as np\nimport torch\n\nfro"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/theo.py",
"chars": 2229,
"preview": "from typing import Any\n\nimport torch\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass Theo(Sequen"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/unrealstereo4k.py",
"chars": 1561,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass UnrealStereo4K(SequenceDa"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/urbansyn.py",
"chars": 1548,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass UrbanSyn(SequenceDataset)"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/utils.py",
"chars": 8902,
"preview": "import copy\nimport multiprocessing as mp\nimport pickle\nfrom collections import defaultdict\nfrom typing import Any, Dict,"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/utils_decode.py",
"chars": 4637,
"preview": "import io\n\nimport cv2\nimport numpy as np\nimport torch\nimport torchvision\nimport torchvision.transforms.v2.functional as "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/vkitti.py",
"chars": 1579,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass VKITTI(SequenceDataset):\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/void.py",
"chars": 2310,
"preview": "import json\nimport os\n\nimport h5py\nimport numpy as np\nimport torch\n\nfrom unidepth.datasets.image_dataset import ImageDat"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/waymo.py",
"chars": 1555,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass Waymo(SequenceDataset):\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/datasets/wildrgbd.py",
"chars": 1494,
"preview": "from typing import Any\n\nfrom unidepth.datasets.sequence_dataset import SequenceDataset\n\n\nclass WildRGBD(SequenceDataset)"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/__init__.py",
"chars": 647,
"preview": "from .activation import GEGLU, SwiGLU\nfrom .attention import AttentionBlock, AttentionDecoderBlock, AttentionLayer\nfrom "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/activation.py",
"chars": 374,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\nclass SwiGLU(nn.Module):\n def forward(self, x: t"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/attention.py",
"chars": 11639,
"preview": "\"\"\"\nAuthor: Luigi Piccinelli\nLicensed under the CC-BY NC 4.0 license (http://creativecommons.org/licenses/by-nc/4.0/)\n\"\""
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/convnext.py",
"chars": 1212,
"preview": "import torch\nimport torch.nn as nn\n\n\nclass CvnxtBlock(nn.Module):\n def __init__(\n self,\n dim,\n k"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/drop_path.py",
"chars": 722,
"preview": "import torch\nimport torch.nn as nn\n\n\ndef drop_path(x: torch.Tensor, drop_prob: float = 0.0, training: bool = False):\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/layer_scale.py",
"chars": 462,
"preview": "import torch\nimport torch.nn as nn\n\n\nclass LayerScale(nn.Module):\n def __init__(\n self,\n dim: int,\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/mlp.py",
"chars": 1004,
"preview": "import torch\nimport torch.nn as nn\n\nfrom unidepth.utils.misc import default\n\nfrom .activation import SwiGLU\n\n\nclass MLP("
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/nystrom.py",
"chars": 11405,
"preview": "# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.\n#\n# This source code is licensed under the BSD l"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/nystrom_attention.py",
"chars": 2303,
"preview": "from functools import partial\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom einops import rea"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/positional_encoding.py",
"chars": 7943,
"preview": "\"\"\"\nAuthor: Luigi Piccinelli\nLicensed under the CC-BY NC 4.0 license (http://creativecommons.org/licenses/by-nc/4.0/)\n\"\""
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/layers/upsample.py",
"chars": 6412,
"preview": "\"\"\"\nAuthor: Luigi Piccinelli\nLicensed under the CC-BY NC 4.0 license (http://creativecommons.org/licenses/by-nc/4.0/)\n\"\""
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/__init__.py",
"chars": 157,
"preview": "from .unidepthv1 import UniDepthV1\nfrom .unidepthv2 import UniDepthV2, UniDepthV2old\n\n__all__ = [\n \"UniDepthV1\",\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/__init__.py",
"chars": 179,
"preview": "from .convnext import ConvNeXt\nfrom .convnext2 import ConvNeXtV2\nfrom .dinov2 import _make_dinov2_model\n\n__all__ = [\n "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/convnext.py",
"chars": 21239,
"preview": "from collections import OrderedDict\nfrom functools import partial\nfrom typing import Callable, Optional, Sequence, Tuple"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/convnext2.py",
"chars": 10003,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom timm.models.layers import DropPath, trunc_normal"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/dinov2.py",
"chars": 16719,
"preview": "import contextlib\nimport logging\nimport math\nfrom functools import partial\nfrom typing import Callable, Sequence\n\nimport"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/__init__.py",
"chars": 425,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/attention.py",
"chars": 2484,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/block.py",
"chars": 9848,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/dino_head.py",
"chars": 2099,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/drop_path.py",
"chars": 1175,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/layer_scale.py",
"chars": 824,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/mlp.py",
"chars": 1272,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/patch_embed.py",
"chars": 2974,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/backbones/metadinov2/swiglu_ffn.py",
"chars": 1859,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the l"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/encoder.py",
"chars": 6556,
"preview": "import torch\nimport torch.nn as nn\n\nfrom unidepth.models.backbones import ConvNeXt, ConvNeXtV2, _make_dinov2_model\n\n\ncla"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv1/__init__.py",
"chars": 68,
"preview": "from .unidepthv1 import UniDepthV1\n\n__all__ = [\n \"UniDepthV1\",\n]\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv1/decoder.py",
"chars": 18801,
"preview": "\"\"\"\nAuthor: Luigi Piccinelli\nLicensed under the CC-BY NC 4.0 license (http://creativecommons.org/licenses/by-nc/4.0/)\n\"\""
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv1/unidepthv1.py",
"chars": 16614,
"preview": "\"\"\"\nAuthor: Luigi Piccinelli\nLicensed under the CC-BY NC 4.0 license (http://creativecommons.org/licenses/by-nc/4.0/)\n\"\""
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/__init__.py",
"chars": 131,
"preview": "from .unidepthv2 import UniDepthV2\nfrom .unidepthv2_old import UniDepthV2old\n\n__all__ = [\n \"UniDepthV2\",\n \"UniDept"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/decoder.py",
"chars": 18571,
"preview": "\"\"\"\nAuthor: Luigi Piccinelli\nLicensed under the CC-BY NC 4.0 license (http://creativecommons.org/licenses/by-nc/4.0/)\n\"\""
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/decoder_old.py",
"chars": 21465,
"preview": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom einops import rearrange\nfrom timm.models.layers "
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/export.py",
"chars": 5096,
"preview": "\"\"\"\nAuthor: Luigi Piccinelli\nLicensed under the CC-BY NC 4.0 license (http://creativecommons.org/licenses/by-nc/4.0/)\n\"\""
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/unidepthv2.py",
"chars": 17203,
"preview": "\"\"\"\nAuthor: Luigi Piccinelli\nLicensed under the CC-BY NC 4.0 license (http://creativecommons.org/licenses/by-nc/4.0/)\n\"\""
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/models/unidepthv2/unidepthv2_old.py",
"chars": 12826,
"preview": "import importlib\nimport warnings\nfrom copy import deepcopy\nfrom math import ceil\n\nimport torch\nimport torch.nn as nn\nimp"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/__init__.py",
"chars": 206,
"preview": "from .losses import (ARel, Confidence, Dummy, EdgeGuidedLocalSSI, LocalSSI,\n Regression, SelfDistill"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/__init__.py",
"chars": 149,
"preview": "from .functions import ExtractPatchesFunction\nfrom .modules import RandomPatchExtractor\n\n__all__ = [\"ExtractPatchesFunct"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/compile.sh",
"chars": 145,
"preview": "#!/usr/bin/env bash\n\nif [ -z \"$TORCH_CUDA_ARCH_LIST\" ]; then\n export TORCH_CUDA_ARCH_LIST=\"7.5 8.0 8.6+PTX\"\nfi\n\npytho"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/functions/__init__.py",
"chars": 90,
"preview": "from .extract_patches import ExtractPatchesFunction\n\n__all__ = [\"ExtractPatchesFunction\"]\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/functions/extract_patches.py",
"chars": 1136,
"preview": "import RandomPatchExtraction\nimport torch\nfrom torch.autograd import Function\n\n\nclass ExtractPatchesFunction(Function):\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/modules/__init__.py",
"chars": 86,
"preview": "from .patch_extractor import RandomPatchExtractor\n\n__all__ = [\"RandomPatchExtractor\"]\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/modules/patch_extractor.py",
"chars": 1199,
"preview": "from __future__ import absolute_import, division, print_function\n\nimport torch\nimport torch.nn.functional as F\nfrom torc"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/setup.py",
"chars": 1673,
"preview": "import glob\nimport os\n\nimport torch\nfrom setuptools import find_packages, setup\nfrom torch.utils.cpp_extension import CU"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/src/cpu/extract_patches_cpu.cpp",
"chars": 450,
"preview": "#include <vector>\n\n#include <torch/extension.h>\n#include <ATen/cuda/CUDAContext.h>\n\ntorch::Tensor extract_patches_cpu_fo"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/src/cpu/extract_patches_cpu.h",
"chars": 346,
"preview": "#pragma once\n#include <torch/extension.h>\n#include <vector>\n\n\ntorch::Tensor extract_patches_cpu_forward(\n const torch"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/src/cuda/extract_patches_cuda.h",
"chars": 525,
"preview": "#ifndef EXTRACT_PATCHES_CUDA_H\n#define EXTRACT_PATCHES_CUDA_H\n\n#include <torch/extension.h>\n#include <vector>\n#include <"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/src/cuda/extract_patches_kernel.cu",
"chars": 4624,
"preview": "#include <cuda_runtime.h>\n#include <torch/extension.h>\n\n#include \"cuda/extract_patches_kernel.cuh\"\n#include \"cuda/extrac"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/src/cuda/extract_patches_kernel.cuh",
"chars": 812,
"preview": "#ifndef EXTRACT_PATCHES_KERNEL_CUH\n#define EXTRACT_PATCHES_KERNEL_CUH\n\n#include <torch/extension.h>\n#include <vector>\n#i"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/src/extract_patches.cpp",
"chars": 271,
"preview": "\n#include \"extract_patches.h\"\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"extract_patches_forward\", &extract_pa"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/src/extract_patches.h",
"chars": 1008,
"preview": "#pragma once\n\n#include \"cpu/extract_patches_cpu.h\"\n\n#ifdef WITH_CUDA\n#include \"cuda/extract_patches_cuda.h\"\n#endif\n\n#inc"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/extract_patches/test.py",
"chars": 594,
"preview": "import RandomPatchExtraction\nimport torch\n\n\ndef extract_patches(input, centers, patch_size):\n h, w = patch_size\n o"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/__init__.py",
"chars": 101,
"preview": "from .functions.knn import knn_gather, knn_points\n\n__all__ = [\n \"knn_points\",\n \"knn_gather\",\n]\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/compile.sh",
"chars": 185,
"preview": "#!/usr/bin/env bash\n\nexport TORCH_CUDA_ARCH_LIST=\"6.1 7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX\" \n# export FORCE_CUDA=1 #if you do"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/functions/__init__.py",
"chars": 91,
"preview": "from .knn import knn_gather, knn_points\n\n__all__ = [\n \"knn_points\",\n \"knn_gather\",\n]\n"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/functions/knn.py",
"chars": 10310,
"preview": "# Copyright (c) Meta Platforms, Inc. and affiliates.\n# All rights reserved.\n#\n# This source code is licensed under the B"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/setup.py",
"chars": 1622,
"preview": "import glob\nimport os\n\nimport torch\nfrom setuptools import find_packages, setup\nfrom torch.utils.cpp_extension import CU"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/src/knn.cu",
"chars": 22244,
"preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/src/knn.h",
"chars": 5449,
"preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/src/knn_cpu.cpp",
"chars": 4192,
"preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/src/knn_ext.cpp",
"chars": 271,
"preview": "#include <torch/extension.h>\n#include \"knn.h\"\n\nPYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n#ifdef WITH_CUDA\n m.def(\"knn_"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/src/utils/dispatch.cuh",
"chars": 10395,
"preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/src/utils/index_utils.cuh",
"chars": 5334,
"preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/src/utils/mink.cuh",
"chars": 4720,
"preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/knn/src/utils/pytorch3d_cutils.h",
"chars": 522,
"preview": "/*\n * Copyright (c) Meta Platforms, Inc. and affiliates.\n * All rights reserved.\n *\n * This source code is licensed unde"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/losses/__init__.py",
"chars": 244,
"preview": "from .arel import ARel\nfrom .confidence import Confidence\nfrom .distill import SelfDistill, TeacherDistill\nfrom .dummy i"
},
{
"path": "camera_pose_annotation/depth_estimation/UniDepth/unidepth/ops/losses/arel.py",
"chars": 1325,
"preview": "import torch\nimport torch.nn as nn\n\nfrom .utils import FNS, masked_mean\n\n\nclass ARel(nn.Module):\n def __init__(\n "
}
]
// ... and 338 more files (download for full content)
About this extraction
This page contains the full source code of the NJU-3DV/SpatialVID GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 538 files (3.1 MB), approximately 838.2k tokens, and a symbol index with 2762 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.