gitextract_oqcovuov/

├── .gitignore
├── LICENSE
├── README.md
├── ape/
│   ├── __init__.py
│   ├── checkpoint/
│   │   ├── __init__.py
│   │   └── detection_checkpoint.py
│   ├── data/
│   │   ├── __init__.py
│   │   ├── build.py
│   │   ├── build_copypaste.py
│   │   ├── build_multi_dataset.py
│   │   ├── build_multi_dataset_copypaste.py
│   │   ├── common_copypaste.py
│   │   ├── dataset_mapper.py
│   │   ├── dataset_mapper_copypaste.py
│   │   ├── dataset_mapper_detr_instance.py
│   │   ├── dataset_mapper_detr_instance_exp.py
│   │   ├── dataset_mapper_detr_panoptic.py
│   │   ├── dataset_mapper_detr_panoptic_copypaste.py
│   │   ├── dataset_mapper_detr_semantic.py
│   │   ├── datasets/
│   │   │   ├── __init__.py
│   │   │   ├── coco.py
│   │   │   ├── d_cube.py
│   │   │   ├── flickr30k.py
│   │   │   ├── gqa.py
│   │   │   ├── grit.py
│   │   │   ├── inst_categories.py
│   │   │   ├── lvis_coco.py
│   │   │   ├── lvis_coco_panoptic.py
│   │   │   ├── lvis_v1_coco_category_image_count.py
│   │   │   ├── objects365.py
│   │   │   ├── odinw_categories.py
│   │   │   ├── odinw_instance.py
│   │   │   ├── odinw_prompts.py
│   │   │   ├── oid.py
│   │   │   ├── openimages_v6_category_image_count.py
│   │   │   ├── pascal_voc_external.py
│   │   │   ├── phrasecut.py
│   │   │   ├── refcoco.py
│   │   │   ├── register_bdd100k_panoseg.py
│   │   │   ├── register_bdd100k_semseg.py
│   │   │   ├── register_pascal_context.py
│   │   │   ├── register_voc_seg.py
│   │   │   ├── sa1b.py
│   │   │   ├── seginw_categories.py
│   │   │   ├── seginw_instance.py
│   │   │   ├── visualgenome.py
│   │   │   └── visualgenome_categories.py
│   │   ├── detection_utils.py
│   │   ├── mapper_utils.py
│   │   ├── samplers/
│   │   │   ├── __init__.py
│   │   │   └── distributed_sampler_multi_dataset.py
│   │   └── transforms/
│   │       ├── __init__.py
│   │       ├── augmentation_aa.py
│   │       └── augmentation_lsj.py
│   ├── engine/
│   │   ├── __init__.py
│   │   ├── defaults.py
│   │   └── train_loop.py
│   ├── evaluation/
│   │   ├── __init__.py
│   │   ├── d3_evaluation.py
│   │   ├── evaluator.py
│   │   ├── instance_evaluation.py
│   │   ├── lvis_evaluation.py
│   │   ├── multi_dataset_evaluator.py
│   │   ├── oideval.py
│   │   ├── refcoco_evaluation.py
│   │   └── refcocoeval.py
│   ├── layers/
│   │   ├── __init__.py
│   │   ├── csrc/
│   │   │   ├── MsDeformAttn/
│   │   │   │   ├── ms_deform_attn.h
│   │   │   │   ├── ms_deform_attn_cpu.cpp
│   │   │   │   ├── ms_deform_attn_cpu.h
│   │   │   │   ├── ms_deform_attn_cuda.cu
│   │   │   │   ├── ms_deform_attn_cuda.h
│   │   │   │   └── ms_deform_im2col_cuda.cuh
│   │   │   ├── cuda_version.cu
│   │   │   └── vision.cpp
│   │   ├── fuse_helper.py
│   │   ├── multi_scale_deform_attn.py
│   │   ├── vision_language_align.py
│   │   ├── vision_language_fusion.py
│   │   └── zero_shot_fc.py
│   ├── model_zoo/
│   │   ├── __init__.py
│   │   └── model_zoo.py
│   ├── modeling/
│   │   ├── __init__.py
│   │   ├── ape_deta/
│   │   │   ├── __init__.py
│   │   │   ├── ape_deta.py
│   │   │   ├── assigner.py
│   │   │   ├── deformable_criterion.py
│   │   │   ├── deformable_detr.py
│   │   │   ├── deformable_detr_segm.py
│   │   │   ├── deformable_detr_segm_vl.py
│   │   │   ├── deformable_transformer.py
│   │   │   ├── deformable_transformer_vl.py
│   │   │   ├── fast_rcnn.py
│   │   │   ├── misc.py
│   │   │   └── segmentation.py
│   │   ├── backbone/
│   │   │   ├── __init__.py
│   │   │   ├── utils_eva.py
│   │   │   ├── utils_eva02.py
│   │   │   ├── vit.py
│   │   │   ├── vit_eva.py
│   │   │   ├── vit_eva02.py
│   │   │   └── vit_eva_clip.py
│   │   ├── deta/
│   │   │   ├── __init__.py
│   │   │   ├── assigner.py
│   │   │   ├── deformable_criterion.py
│   │   │   ├── deformable_detr.py
│   │   │   ├── deformable_detr_segm.py
│   │   │   ├── deformable_transformer.py
│   │   │   ├── misc.py
│   │   │   └── segmentation.py
│   │   └── text/
│   │       ├── __init__.py
│   │       ├── bert_wrapper.py
│   │       ├── clip_wrapper.py
│   │       ├── clip_wrapper_eva01.py
│   │       ├── clip_wrapper_eva02.py
│   │       ├── clip_wrapper_open.py
│   │       ├── eva01_clip/
│   │       │   ├── README.md
│   │       │   ├── __init__.py
│   │       │   ├── clip.py
│   │       │   ├── eva_clip.py
│   │       │   ├── eva_model.py
│   │       │   ├── model.py
│   │       │   ├── simple_tokenizer.py
│   │       │   └── vit_model.py
│   │       ├── eva02_clip/
│   │       │   ├── __init__.py
│   │       │   ├── constants.py
│   │       │   ├── eva_vit_model.py
│   │       │   ├── factory.py
│   │       │   ├── hf_configs.py
│   │       │   ├── hf_model.py
│   │       │   ├── loss.py
│   │       │   ├── model.py
│   │       │   ├── modified_resnet.py
│   │       │   ├── openai.py
│   │       │   ├── pretrained.py
│   │       │   ├── rope.py
│   │       │   ├── timm_model.py
│   │       │   ├── tokenizer.py
│   │       │   ├── transform.py
│   │       │   ├── transformer.py
│   │       │   └── utils.py
│   │       ├── llama2_wrapper.py
│   │       ├── t5_wrapper.py
│   │       ├── text_encoder.py
│   │       └── utils.py
│   └── utils/
│       ├── __init__.py
│       ├── box_ops.py
│       ├── misc.py
│       └── plot_utils.py
├── configs/
│   ├── ADE20kFull_SemanticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── ADE20k_PanopticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_160k.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── ADE20k_SemanticSegmentation/
│   │   ├── ape_deta/
│   │   │   ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │   │   ├── ape_deta_vitl_eva02_lsj1024.py
│   │   │   ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │   │   └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   │   └── deformable_deta/
│   │       └── deformable_deta_segm_r50_160k.py
│   ├── BDD10k_PanopticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── BDD10k_SemanticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── COCO_Detection/
│   │   ├── deformable_deta/
│   │   │   ├── deformable_deta_r50_12ep.py
│   │   │   ├── deformable_deta_r50_24ep.py
│   │   │   ├── deformable_deta_vitb_clip_openai_lsj1024_cp_12ep.py
│   │   │   ├── deformable_deta_vitb_lsj1024_12ep.py
│   │   │   ├── deformable_deta_vitg_eva_lsj1024_12ep.py
│   │   │   ├── deformable_deta_vitg_eva_lsj1024_cp_12ep.py
│   │   │   ├── deformable_deta_vitl_eva02_lsj1024_cp_12ep.py
│   │   │   ├── deformable_deta_vitl_eva_lsj1024_cp_12ep.py
│   │   │   ├── deformable_deta_vitl_lsj1024_12ep.py
│   │   │   └── models/
│   │   │       └── deformable_deta_r50.py
│   │   └── deformable_detr/
│   │       ├── deformable_detr_r50_50ep.py
│   │       ├── deformable_detr_r50_two_stage_50ep.py
│   │       ├── deformable_detr_r50_with_box_refinement_50ep.py
│   │       ├── improved_deformable_detr_r50_12ep.py
│   │       ├── improved_deformable_detr_r50_50ep.py
│   │       ├── improved_deformable_detr_r50_two_stage_12ep.py
│   │       ├── improved_deformable_detr_r50_two_stage_50ep.py
│   │       └── models/
│   │           ├── deformable_detr_r50.py
│   │           └── improved_deformable_detr_r50.py
│   ├── COCO_InstanceSegmentation/
│   │   ├── ape_deta/
│   │   │   ├── ape_deta_r50_12ep.py
│   │   │   ├── ape_deta_r50_vlf_12ep.py
│   │   │   ├── ape_deta_vite_eva02_clip_lsj1024_cp_12ep_fsdp.py
│   │   │   ├── ape_deta_vite_eva02_clip_lsj1024_cp_32x90k_fsdp.py
│   │   │   ├── ape_deta_vitg_eva01_clip_lsj1536_cp_128x45k.py
│   │   │   ├── ape_deta_vitg_eva01_clip_lsj1536_cp_64x90k.py
│   │   │   ├── ape_deta_vitg_eva01_lsj1536_cp_64x90k.py
│   │   │   ├── ape_deta_vitl_eva02_clip_lsj1024_cp_12ep.py
│   │   │   ├── ape_deta_vitl_eva02_clip_lsj1024_cp_12ep_fsdp.py
│   │   │   ├── ape_deta_vitl_eva02_clip_lsj1536_cp_128x45k.py
│   │   │   ├── ape_deta_vitl_eva02_clip_lsj1536_cp_64x90k.py
│   │   │   ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_12ep.py
│   │   │   ├── ape_deta_vitl_eva02_lsj1024_cp_12ep.py
│   │   │   ├── ape_deta_vitl_eva02_lsj1536_cp_128x90k.py
│   │   │   ├── ape_deta_vitl_eva02_lsj1536_cp_12ep.py
│   │   │   ├── ape_deta_vitl_eva02_lsj1536_cp_64x90k.py
│   │   │   ├── ape_deta_vitl_eva02_vlf_lsj1024_cp_12ep.py
│   │   │   ├── ape_deta_vitl_lsj1024_cp_12ep.py
│   │   │   ├── ape_deta_vitt_eva02_lsj1024_cp_12ep.py
│   │   │   ├── ape_deta_vitt_eva02_vlf_lsj1024_cp_12ep.py
│   │   │   └── models/
│   │   │       └── ape_deta_r50.py
│   │   └── deformable_deta/
│   │       ├── deformable_deta_segm_r50_12ep.py
│   │       ├── deformable_deta_segm_r50_24ep.py
│   │       ├── deformable_deta_segm_vitl_eva02_lsj1024_cp_12ep.py
│   │       └── models/
│   │           └── deformable_deta_segm_r50.py
│   ├── COCO_PanopticSegmentation/
│   │   ├── ape_deta/
│   │   │   ├── ape_deta_r50_12ep.py
│   │   │   ├── ape_deta_r50_12ep_separated.py
│   │   │   ├── ape_deta_r50_24ep.py
│   │   │   ├── ape_deta_r50_lsj1024.py
│   │   │   ├── ape_deta_r50_vlf_lsj1024.py
│   │   │   ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │   │   ├── ape_deta_vitl_eva02_lsj1024.py
│   │   │   ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │   │   └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   │   └── deformable_deta/
│   │       ├── deformable_deta_segm_r50_12ep.py
│   │       ├── deformable_deta_segm_r50_24ep.py
│   │       ├── deformable_deta_segm_r50_36ep.py
│   │       └── deformable_deta_segm_r50_50ep.py
│   ├── COCO_REFCOCO/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_12ep.py
│   │       ├── ape_deta_r50_24ep.py
│   │       ├── ape_deta_r50_36ep.py
│   │       ├── ape_deta_r50_vlf_12ep.py
│   │       ├── ape_deta_r50_vlf_36ep.py
│   │       ├── ape_deta_r50_vlf_bert_36ep.py
│   │       ├── ape_deta_vitl_eva02_lsj1024_12ep.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024_36ep.py
│   │       └── ape_deta_vitl_lsj1024_12ep.py
│   ├── COCO_SA1B_InstanceSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_24ep.py
│   │       └── ape_deta_r50_24ep_mp.py
│   ├── COCO_SA1B_PanopticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_24ep.py
│   │       ├── ape_deta_r50_24ep_lp.py
│   │       └── ape_deta_r50_24ep_vlf_lp.py
│   ├── COCO_SemanticSegmentation/
│   │   ├── ape_deta/
│   │   │   ├── ape_deta_r50_12ep.py
│   │   │   ├── ape_deta_r50_vlf_lsj1024_12ep.py
│   │   │   └── ape_deta_vitl_eva02_lsj1024_12ep.py
│   │   └── deformable_deta/
│   │       └── deformable_deta_segm_r50_12ep.py
│   ├── Cityscapes_PanopticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── D3_InstanceSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── Flickr30k_VisualGrounding/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_12ep.py
│   │       ├── ape_deta_r50_vlf_12ep.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       └── ape_deta_vitl_eva02_vlf_lsj1024.py
│   ├── GQA_VisualGrounding/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_12ep.py
│   │       ├── ape_deta_r50_12ep_eval_odinw13.py
│   │       ├── ape_deta_r50_12ep_eval_odinw35.py
│   │       ├── ape_deta_r50_vlf_12ep.py
│   │       ├── ape_deta_r50_vlf_12ep_eval_odinw13.py
│   │       └── ape_deta_r50_vlf_12ep_eval_odinw35.py
│   ├── GRIT_SA1B_VisualGrounding/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_24ep.py
│   │       └── ape_deta_r50_vlf_24ep.py
│   ├── GRIT_VisualGrounding/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_400k.py
│   │       ├── ape_deta_r50_vlf_400k.py
│   │       └── ape_deta_r50_vlf_lsj224_256x50k.py
│   ├── LVISCOCOCOCOSTUFF_O365_OID_VG/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_lsj1024_cp_50ep.py
│   │       ├── ape_deta_vitl_eva02_lsj1024_cp_180k.py
│   │       ├── ape_deta_vitl_eva02_lsj1024_cp_720k.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024_cp_180k.py
│   │       └── ape_deta_vitl_eva02_vlf_lsj1024_cp_720k.py
│   ├── LVISCOCOCOCOSTUFF_O365_OID_VGR_REFCOCO/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_lsj1024_cp_180k.py
│   │       ├── ape_deta_vitl_eva02_lsj1024_cp_720k.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024_cp_1080k.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024_cp_180k.py
│   │       └── ape_deta_vitl_eva02_vlf_lsj1024_cp_720k.py
│   ├── LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024_cp_1080k.py
│   │       └── ape_deta_vitl_eva02_vlf_lsj1024_cp_2160k.py
│   ├── LVISCOCOCOCOSTUFF_O365_OID_VGR_SA1B_REFCOCO_GQA_PhraseCut_Flickr30k/
│   │   └── ape_deta/
│   │       ├── ape_deta_vite_eva02_clip_vlf_lsj1024_cp_16x4_1080k.py
│   │       ├── ape_deta_vite_eva02_clip_vlf_lsj1024_cp_16x4_1080k_mdl_fsdp.py
│   │       ├── ape_deta_vite_eva02_clip_vlf_lsj1024_cp_32x2_540k_mdl_fsdp.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_08x8x270k.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_1080k.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_16x4_1080k.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_16x4_1080k_mdl.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_16x4_1080k_mdl_llama2.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_16x4x270k.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_16x4x270k_mdl.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_16x4x270k_mdl_llama2.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_16x4x337k_mdl.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_32x2x270k.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_48x2x270k.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_64x1x270k.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1536_cp_08x8x270k.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1536_cp_32x2x270k.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1536_cp_64x270k.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024_cp_1080k.py
│   │       ├── ape_deta_vitt_eva02_vlf_lsj1024_cp_16x4_1080k.py
│   │       ├── ape_deta_vitt_eva02_vlf_lsj1024_cp_16x4_1080k_mdl.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024_cp_64x1_270k_mdl.py
│   ├── LVISCOCOCOCOSTUFF_PanopticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_lsj1024_cp_50ep.py
│   │       └── ape_deta_vitl_eva02_lsj1024_cp_24ep.py
│   ├── LVISCOCOCOCOSTUFF_REFCOCO/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_lsj1024_50ep.py
│   │       ├── ape_deta_r50_lsj1024_cp_50ep.py
│   │       ├── ape_deta_r50_vlf_lsj1024_cp_50ep.py
│   │       ├── ape_deta_r50_vlf_lsj1024_cp_bert_50ep.py
│   │       ├── ape_deta_vitl_eva02_lsj1024_24ep.py
│   │       └── ape_deta_vitl_eva02_vlf_lsj1024_50ep.py
│   ├── LVISCOCO_COCOSTUFF_O365_OID_VG_REFCOCO/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024_cp_180k.py
│   │       └── ape_deta_vitl_eva02_vlf_lsj1024_cp_720k.py
│   ├── LVISCOCO_COCOSTUFF_PanopticSegmentation/
│   │   └── ape_deta/
│   │       └── ape_deta_r50_lsj1024_cp_50ep.py
│   ├── LVIS_Detection/
│   │   └── deformable_deta/
│   │       ├── deformable_deta_r50_lsj1024_24ep.py
│   │       ├── deformable_deta_vitb_lsj1024_24ep.py
│   │       ├── deformable_deta_vitg_eva_lsj1024_24ep.py
│   │       ├── deformable_deta_vitg_eva_lsj1024_cp_24ep.py
│   │       ├── deformable_deta_vitl_eva02_lsj1024_cp_24ep.py
│   │       ├── deformable_deta_vitl_eva_lsj1024_cp_24ep.py
│   │       └── deformable_deta_vitl_lsj1024_24ep.py
│   ├── LVIS_InstanceSegmentation/
│   │   ├── ape_deta/
│   │   │   ├── ape_deta_r50_24ep.py
│   │   │   ├── ape_deta_r50_vlf_24ep.py
│   │   │   ├── ape_deta_vite_eva02_clip_lsj1024_cp_24ep_fsdp.py
│   │   │   ├── ape_deta_vitl_eva02_clip_lsj1024_cp_24ep.py
│   │   │   ├── ape_deta_vitl_eva02_clip_lsj1536_cp_64x90k.py
│   │   │   ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_cp_24ep.py
│   │   │   ├── ape_deta_vitl_eva02_lsj1024_cp_24ep.py
│   │   │   ├── ape_deta_vitl_eva02_lsj1536_cp_64x90k.py
│   │   │   ├── ape_deta_vitl_eva02_vlf_lsj1024_cp_24ep.py
│   │   │   ├── ape_deta_vitt_eva02_lsj1024_cp_24ep.py
│   │   │   └── ape_deta_vitt_eva02_vlf_lsj1024_cp_24ep.py
│   │   └── deformable_deta/
│   │       ├── deformable_deta_segm_vitl_eva02_4scale_lsj1024_cp_24ep.py
│   │       └── deformable_deta_segm_vitl_eva02_lsj1024_cp_24ep.py
│   ├── LVIS_SA1B_InstanceSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_50ep.py
│   │       ├── ape_deta_r50_50ep_eval_odinw13.py
│   │       ├── ape_deta_r50_50ep_eval_odinw35.py
│   │       ├── ape_deta_r50_50ep_eval_seginw.py
│   │       ├── ape_deta_r50_50ep_iouloss_lp.py
│   │       └── ape_deta_r50_50ep_mp.py
│   ├── ODinW_Detection/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_13.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_35.py
│   │       ├── ape_deta_vitl_eva02_lsj1024_13.py
│   │       ├── ape_deta_vitl_eva02_lsj1024_35.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024_13.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024_35.py
│   │       ├── ape_deta_vitt_eva02_vlf_lsj1024_13.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024_35.py
│   ├── PascalContext459_SemanticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── PascalContext59_SemanticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── PascalVOC20_SemanticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── PascalVOCParts_PanopticSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_12ep.py
│   │       └── ape_deta_r50_vlf_12ep.py
│   ├── PhraseCut_VisualGrounding/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_12ep.py
│   │       ├── ape_deta_r50_vlf_12ep.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       └── ape_deta_vitl_eva02_vlf_lsj1024.py
│   ├── REFCOCO_VisualGrounding/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_12ep.py
│   │       ├── ape_deta_r50_bert_vlf_12ep.py
│   │       ├── ape_deta_r50_vlf_12ep.py
│   │       ├── ape_deta_vitl_eva02_clip_lsj1024_12ep.py
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024_12ep.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024_12ep.py
│   │       └── ape_deta_vitl_lsj1024_12ep.py
│   ├── Roboflow_Detection/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── SegInW_InstanceSegmentation/
│   │   └── ape_deta/
│   │       ├── ape_deta_vitl_eva02_clip_vlf_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_lsj1024.py
│   │       ├── ape_deta_vitl_eva02_vlf_lsj1024.py
│   │       └── ape_deta_vitt_eva02_vlf_lsj1024.py
│   ├── VisualGenome_VisualGrounding/
│   │   └── ape_deta/
│   │       ├── ape_deta_r50_12ep.py
│   │       ├── ape_deta_r50_12ep_eval_odinw13.py
│   │       ├── ape_deta_r50_12ep_eval_odinw35.py
│   │       ├── ape_deta_r50_vlf_12ep.py
│   │       ├── ape_deta_r50_vlf_12ep_eval_odinw13.py
│   │       └── ape_deta_r50_vlf_12ep_eval_odinw35.py
│   └── common/
│       ├── backbone/
│       │   ├── vite_eva02_clip_1024.py
│       │   ├── vite_eva02_clip_1536.py
│       │   ├── vitg_eva01.py
│       │   ├── vitg_eva01_1536.py
│       │   ├── vitg_eva01_clip_1024.py
│       │   ├── vitg_eva01_clip_1536.py
│       │   ├── vitl_eva02.py
│       │   ├── vitl_eva02_1536.py
│       │   ├── vitl_eva02_clip.py
│       │   ├── vitl_eva02_clip_1536.py
│       │   └── vitt_eva02.py
│       └── data/
│           ├── ade20k_panoptic.py
│           ├── ade20k_panoptic_lsj1024.py
│           ├── ade20k_semantic.py
│           ├── ade20k_semantic_lsj1024.py
│           ├── ade20kfull_semantic_lsj1024.py
│           ├── bdd10k_panoptic_lsj1024.py
│           ├── bdd10k_semantic_lsj1024.py
│           ├── cityscapes_panoptic_lsj1024.py
│           ├── cityscapes_semantic_lsj1024.py
│           ├── coco_instance.py
│           ├── coco_instance_lsj1024.py
│           ├── coco_instance_lsj1024_cp.py
│           ├── coco_instance_lsj1536_cp.py
│           ├── coco_panoptic.py
│           ├── coco_panoptic_lsj1024.py
│           ├── coco_panoptic_separated.py
│           ├── coco_refcoco_instance.py
│           ├── coco_refcoco_instance_lsj1024.py
│           ├── coco_sa1b_instance.py
│           ├── coco_sa1b_panoptic.py
│           ├── coco_semantic.py
│           ├── coco_semantic_lsj1024.py
│           ├── constants.py
│           ├── d3_instance_lsj1024.py
│           ├── flickr30k_instance.py
│           ├── flickr30k_instance_lsj1024.py
│           ├── gqa_region_instance.py
│           ├── grit_instance.py
│           ├── grit_instance_lsj224.py
│           ├── grit_sa1b_instance.py
│           ├── lvis_instance_lsj1024_cp.py
│           ├── lvis_instance_lsj1536_cp.py
│           ├── lvis_sa1b_instance.py
│           ├── lviscoco_cocostuff_o365_oid_vg_refcoco_panoptic_lsj1024_cp.py
│           ├── lviscoco_cocostuff_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_o365_oid_refcoco_panoptic_lsj1024.py
│           ├── lviscocococostuff_o365_oid_refcoco_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_o365_oid_vg_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_o365_oid_vg_refcoco_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_o365_oid_vgr_refcoco_group_by_image_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_o365_oid_vgr_refcoco_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_o365_oid_vgr_sa1b_refcoco_group_by_image_gqa_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_o365_oid_vgr_sa1b_refcoco_group_by_image_gqa_panoptic_lsj1536_cp.py
│           ├── lviscocococostuff_o365_oid_vgr_sa1b_refcoco_group_by_image_gqa_phrasecut_flickr30k_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_o365_oid_vgr_sa1b_refcoco_group_by_image_gqa_phrasecut_flickr30k_panoptic_lsj1024_cp_mdl.py
│           ├── lviscocococostuff_o365_oid_vgr_sa1b_refcoco_group_by_image_gqa_phrasecut_flickr30k_panoptic_lsj1536_cp.py
│           ├── lviscocococostuff_o365_oid_vgr_sa1b_refcoco_group_by_image_gqa_phrasecut_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_o365_oid_vgr_sa1b_refcoco_group_by_image_gqa_phrasecut_panoptic_lsj1536_cp.py
│           ├── lviscocococostuff_o365_oid_vgr_sa1b_refcoco_group_by_image_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_refcoco_group_by_image_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_refcoco_panoptic_lsj1024.py
│           ├── lviscocococostuff_refcoco_panoptic_lsj1024_cp.py
│           ├── lviscocococostuff_sa1b_panoptic.py
│           ├── o365_instance_lsj1024.py
│           ├── odinw13_instance.py
│           ├── odinw13_instance_lsj1024.py
│           ├── odinw13_instance_lsj1536.py
│           ├── odinw35_instance.py
│           ├── odinw35_instance_lsj1024.py
│           ├── odinw35_instance_lsj1536.py
│           ├── odinwvoc_instance_lsj1024.py
│           ├── pascalcontext459_semantic_lsj1024.py
│           ├── pascalcontext59_semantic_lsj1024.py
│           ├── pascalvoc20_semantic_lsj1024.py
│           ├── pascalvocpart_panoptic.py
│           ├── phrasecut_instance.py
│           ├── phrasecut_instance_lsj1024.py
│           ├── refcoco_group_by_image_instance.py
│           ├── refcoco_group_by_image_instance_lsj1024.py
│           ├── refcoco_instance.py
│           ├── refcoco_instance_lsj1024.py
│           ├── roboflow100_instance_lsj1024.py
│           ├── seginw_instance.py
│           ├── seginw_instance_lsj1024.py
│           ├── seginw_instance_lsj1536.py
│           └── vgregion_instance.py
├── datasets/
│   ├── README.md
│   ├── prepare_ade20k_full_sem_seg.py
│   ├── prepare_coco_semantic_annos_from_panoptic_annos.py
│   ├── prepare_pascal_context.py
│   └── prepare_voc_sem_seg.py
├── demo/
│   ├── .gitattributes
│   ├── README.md
│   ├── app.py
│   ├── demo_lazy.py
│   ├── pre-requirements.txt
│   ├── predictor_lazy.py
│   └── requirements.txt
├── requirements.txt
├── scripts/
│   ├── eval_APE-L_A.sh
│   ├── eval_APE-L_B.sh
│   ├── eval_APE-L_C.sh
│   ├── eval_APE-L_D.sh
│   ├── eval_APE-Ti.sh
│   ├── eval_flops.sh
│   └── eval_time.sh
├── setup.py
└── tools/
    ├── analyze_model.py
    ├── eva_interpolate_patch_14to16.py
    ├── train_net.py
    ├── train_net_fsdp.py
    └── visualize_json_results.py