gitextract_nz69m5ai/

├── COCO_caption_prompts_30k.txt
├── DiT-ToCa/
│   ├── cache_functions/
│   │   ├── __init__.py
│   │   ├── attention.py
│   │   ├── cache_cutfresh.py
│   │   ├── cache_init.py
│   │   ├── cal_type.py
│   │   ├── force_init.py
│   │   ├── force_scheduler.py
│   │   ├── fresh_ratio_scheduler.py
│   │   ├── global_force_fresh.py
│   │   ├── score_evaluate.py
│   │   ├── scores.py
│   │   ├── token_merge.py
│   │   └── update_cache.py
│   ├── diffusion/
│   │   ├── __init__.py
│   │   ├── diffusion_utils.py
│   │   ├── gaussian_diffusion.py
│   │   ├── respace.py
│   │   └── timestep_sampler.py
│   ├── download.py
│   ├── environment-dit.yml
│   ├── models.py
│   ├── sample.py
│   ├── sample_ddp.py
│   └── train.py
├── DrawBench200.txt
├── LICENSE
├── Open-Sora/
│   ├── Dockerfile
│   ├── LICENSE
│   ├── README.md
│   ├── assets/
│   │   └── texts/
│   │       ├── VBench/
│   │       │   ├── all_category.txt
│   │       │   ├── all_dimension.txt
│   │       │   ├── all_i2v.txt
│   │       │   ├── prompts_per_category/
│   │       │   │   ├── animal.txt
│   │       │   │   ├── architecture.txt
│   │       │   │   ├── food.txt
│   │       │   │   ├── human.txt
│   │       │   │   ├── lifestyle.txt
│   │       │   │   ├── plant.txt
│   │       │   │   ├── scenery.txt
│   │       │   │   └── vehicles.txt
│   │       │   └── prompts_per_dimension/
│   │       │       ├── appearance_style.txt
│   │       │       ├── color.txt
│   │       │       ├── human_action.txt
│   │       │       ├── multiple_objects.txt
│   │       │       ├── object_class.txt
│   │       │       ├── overall_consistency.txt
│   │       │       ├── scene.txt
│   │       │       ├── spatial_relationship.txt
│   │       │       ├── subject_consistency.txt
│   │       │       ├── temporal_flickering.txt
│   │       │       └── temporal_style.txt
│   │       ├── imagenet_id.txt
│   │       ├── imagenet_labels.txt
│   │       ├── rand_types.txt
│   │       ├── t2i_samples.txt
│   │       ├── t2i_sigma.txt
│   │       ├── t2v_car.txt
│   │       ├── t2v_latte.txt
│   │       ├── t2v_pllava.txt
│   │       ├── t2v_ref.txt
│   │       ├── t2v_samples.txt
│   │       ├── t2v_short.txt
│   │       ├── t2v_sora.txt
│   │       ├── ucf101_id.txt
│   │       └── ucf101_labels.txt
│   ├── build/
│   │   └── lib/
│   │       ├── opensora/
│   │       │   ├── acceleration/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── checkpoint.py
│   │       │   │   ├── communications.py
│   │       │   │   ├── parallel_states.py
│   │       │   │   ├── plugin.py
│   │       │   │   └── shardformer/
│   │       │   │       ├── __init__.py
│   │       │   │       ├── modeling/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   └── t5.py
│   │       │   │       └── policy/
│   │       │   │           ├── __init__.py
│   │       │   │           └── t5_encoder.py
│   │       │   ├── datasets/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── aspect.py
│   │       │   │   ├── bucket.py
│   │       │   │   ├── dataloader.py
│   │       │   │   ├── datasets.py
│   │       │   │   ├── read_video.py
│   │       │   │   ├── sampler.py
│   │       │   │   ├── utils.py
│   │       │   │   └── video_transforms.py
│   │       │   └── models/
│   │       │       ├── cache_functions/
│   │       │       │   ├── __init__.py
│   │       │       │   ├── attention.py
│   │       │       │   ├── cache_cutfresh.py
│   │       │       │   ├── cache_init.py
│   │       │       │   ├── force_init.py
│   │       │       │   ├── force_scheduler.py
│   │       │       │   ├── fresh_ratio_scheduler.py
│   │       │       │   ├── global_force_fresh.py
│   │       │       │   ├── score_evaluate.py
│   │       │       │   ├── scores.py
│   │       │       │   ├── token_merge.py
│   │       │       │   └── update_cache.py
│   │       │       ├── dit/
│   │       │       │   ├── __init__.py
│   │       │       │   └── dit.py
│   │       │       ├── latte/
│   │       │       │   ├── __init__.py
│   │       │       │   └── latte.py
│   │       │       ├── layers/
│   │       │       │   ├── __init__.py
│   │       │       │   └── blocks.py
│   │       │       ├── pixart/
│   │       │       │   ├── __init__.py
│   │       │       │   ├── pixart.py
│   │       │       │   └── pixart_sigma.py
│   │       │       ├── stdit/
│   │       │       │   ├── __init__.py
│   │       │       │   ├── stdit.py
│   │       │       │   ├── stdit2.py
│   │       │       │   ├── stdit3 copy.py
│   │       │       │   └── stdit3.py
│   │       │       └── text_encoder/
│   │       │           ├── __init__.py
│   │       │           ├── classes.py
│   │       │           ├── clip.py
│   │       │           └── t5.py
│   │       ├── tools/
│   │       │   ├── caption/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── acceleration/
│   │       │   │   │   ├── __init__.py
│   │       │   │   │   └── llava/
│   │       │   │   │       ├── __init__.py
│   │       │   │   │       └── policies/
│   │       │   │   │           ├── __init__.py
│   │       │   │   │           ├── llama.py
│   │       │   │   │           └── mistral.py
│   │       │   │   ├── camera_motion/
│   │       │   │   │   ├── __init__.py
│   │       │   │   │   ├── camera_motion.py
│   │       │   │   │   ├── detect.py
│   │       │   │   │   ├── utils.py
│   │       │   │   │   └── visualizer.py
│   │       │   │   ├── camera_motion_detect.py
│   │       │   │   ├── caption_gpt4.py
│   │       │   │   ├── caption_llama3.py
│   │       │   │   ├── caption_llava.py
│   │       │   │   └── utils.py
│   │       │   ├── datasets/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── analyze.py
│   │       │   │   ├── convert.py
│   │       │   │   ├── datautil.py
│   │       │   │   ├── filter_panda10m.py
│   │       │   │   ├── split.py
│   │       │   │   ├── transform.py
│   │       │   │   └── utils.py
│   │       │   ├── frame_interpolation/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── interpolation.py
│   │       │   │   ├── networks/
│   │       │   │   │   ├── __init__.py
│   │       │   │   │   ├── amt_g.py
│   │       │   │   │   └── blocks/
│   │       │   │   │       ├── __init__.py
│   │       │   │   │       ├── feat_enc.py
│   │       │   │   │       ├── ifrnet.py
│   │       │   │   │       ├── multi_flow.py
│   │       │   │   │       └── raft.py
│   │       │   │   └── utils/
│   │       │   │       ├── __init__.py
│   │       │   │       ├── dist_utils.py
│   │       │   │       ├── flow_utils.py
│   │       │   │       └── utils.py
│   │       │   ├── scene_cut/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── convert_id_to_path.py
│   │       │   │   ├── cut.py
│   │       │   │   └── scene_detect.py
│   │       │   └── scoring/
│   │       │       ├── aesthetic/
│   │       │       │   ├── __init__.py
│   │       │       │   └── inference.py
│   │       │       └── matching/
│   │       │           ├── __init__.py
│   │       │           └── inference.py
│   │       ├── vbench/
│   │       │   ├── __init__.py
│   │       │   ├── aesthetic_quality.py
│   │       │   ├── appearance_style.py
│   │       │   ├── background_consistency.py
│   │       │   ├── cli/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── evaluate.py
│   │       │   │   ├── static_filter.py
│   │       │   │   └── vbench.py
│   │       │   ├── color.py
│   │       │   ├── dynamic_degree.py
│   │       │   ├── human_action.py
│   │       │   ├── imaging_quality.py
│   │       │   ├── motion_smoothness.py
│   │       │   ├── multiple_objects.py
│   │       │   ├── object_class.py
│   │       │   ├── overall_consistency.py
│   │       │   ├── scene.py
│   │       │   ├── spatial_relationship.py
│   │       │   ├── subject_consistency.py
│   │       │   ├── temporal_flickering.py
│   │       │   ├── temporal_style.py
│   │       │   ├── third_pary/
│   │       │   │   ├── 0.txt
│   │       │   │   ├── RAFT/
│   │       │   │   │   ├── __init__.py
│   │       │   │   │   └── core/
│   │       │   │   │       ├── __init__.py
│   │       │   │   │       ├── corr.py
│   │       │   │   │       ├── datasets.py
│   │       │   │   │       ├── extractor.py
│   │       │   │   │       ├── raft.py
│   │       │   │   │       ├── update.py
│   │       │   │   │       └── utils_core/
│   │       │   │   │           ├── __init__.py
│   │       │   │   │           ├── augmentor.py
│   │       │   │   │           ├── flow_viz.py
│   │       │   │   │           ├── frame_utils.py
│   │       │   │   │           └── utils.py
│   │       │   │   ├── ViCLIP/
│   │       │   │   │   ├── __init__.py
│   │       │   │   │   ├── simple_tokenizer.py
│   │       │   │   │   ├── viclip.py
│   │       │   │   │   ├── viclip_text.py
│   │       │   │   │   └── viclip_vision.py
│   │       │   │   ├── __init__.py
│   │       │   │   ├── amt/
│   │       │   │   │   ├── benchmarks/
│   │       │   │   │   │   ├── __init__.py
│   │       │   │   │   │   ├── adobe240.py
│   │       │   │   │   │   ├── gopro.py
│   │       │   │   │   │   ├── snu_film.py
│   │       │   │   │   │   ├── speed_parameters.py
│   │       │   │   │   │   ├── ucf101.py
│   │       │   │   │   │   ├── vimeo90k.py
│   │       │   │   │   │   ├── vimeo90k_tta.py
│   │       │   │   │   │   └── xiph.py
│   │       │   │   │   ├── datasets/
│   │       │   │   │   │   ├── __init__.py
│   │       │   │   │   │   ├── adobe_datasets.py
│   │       │   │   │   │   ├── gopro_datasets.py
│   │       │   │   │   │   └── vimeo_datasets.py
│   │       │   │   │   ├── flow_generation/
│   │       │   │   │   │   ├── __init__.py
│   │       │   │   │   │   ├── gen_flow.py
│   │       │   │   │   │   └── liteflownet/
│   │       │   │   │   │       ├── __init__.py
│   │       │   │   │   │       └── run.py
│   │       │   │   │   ├── losses/
│   │       │   │   │   │   ├── __init__.py
│   │       │   │   │   │   └── loss.py
│   │       │   │   │   ├── metrics/
│   │       │   │   │   │   ├── __init__.py
│   │       │   │   │   │   └── psnr_ssim.py
│   │       │   │   │   └── networks/
│   │       │   │   │       ├── AMT-G.py
│   │       │   │   │       ├── AMT-L.py
│   │       │   │   │       ├── AMT-S.py
│   │       │   │   │       └── blocks/
│   │       │   │   │           ├── __init__.py
│   │       │   │   │           ├── feat_enc.py
│   │       │   │   │           ├── ifrnet.py
│   │       │   │   │           ├── multi_flow.py
│   │       │   │   │           └── raft.py
│   │       │   │   ├── grit_model.py
│   │       │   │   ├── grit_src/
│   │       │   │   │   └── centernet2/
│   │       │   │   │       └── centernet/
│   │       │   │   │           ├── __init__.py
│   │       │   │   │           ├── config.py
│   │       │   │   │           └── modeling/
│   │       │   │   │               ├── __init__.py
│   │       │   │   │               ├── backbone/
│   │       │   │   │               │   ├── __init__.py
│   │       │   │   │               │   ├── bifpn.py
│   │       │   │   │               │   ├── bifpn_fcos.py
│   │       │   │   │               │   ├── dla.py
│   │       │   │   │               │   ├── dlafpn.py
│   │       │   │   │               │   ├── fpn_p5.py
│   │       │   │   │               │   └── res2net.py
│   │       │   │   │               ├── debug.py
│   │       │   │   │               ├── dense_heads/
│   │       │   │   │               │   ├── __init__.py
│   │       │   │   │               │   ├── centernet.py
│   │       │   │   │               │   ├── centernet_head.py
│   │       │   │   │               │   └── utils.py
│   │       │   │   │               ├── layers/
│   │       │   │   │               │   ├── __init__.py
│   │       │   │   │               │   ├── deform_conv.py
│   │       │   │   │               │   ├── heatmap_focal_loss.py
│   │       │   │   │               │   ├── iou_loss.py
│   │       │   │   │               │   └── ml_nms.py
│   │       │   │   │               ├── meta_arch/
│   │       │   │   │               │   ├── __init__.py
│   │       │   │   │               │   └── centernet_detector.py
│   │       │   │   │               └── roi_heads/
│   │       │   │   │                   ├── __init__.py
│   │       │   │   │                   ├── custom_fast_rcnn.py
│   │       │   │   │                   ├── custom_roi_heads.py
│   │       │   │   │                   └── fed_loss.py
│   │       │   │   ├── tag2Text/
│   │       │   │   │   ├── __init__.py
│   │       │   │   │   ├── med.py
│   │       │   │   │   ├── swin_transformer.py
│   │       │   │   │   ├── tag2text.py
│   │       │   │   │   ├── tag_class.py
│   │       │   │   │   └── vit.py
│   │       │   │   └── umt/
│   │       │   │       ├── __init__.py
│   │       │   │       ├── datasets/
│   │       │   │       │   ├── __init__.py
│   │       │   │       │   ├── build.py
│   │       │   │       │   ├── kinetics.py
│   │       │   │       │   ├── kinetics_sparse.py
│   │       │   │       │   ├── mae.py
│   │       │   │       │   ├── masking_generator.py
│   │       │   │       │   ├── mixup.py
│   │       │   │       │   ├── rand_augment.py
│   │       │   │       │   ├── random_erasing.py
│   │       │   │       │   ├── ssv2.py
│   │       │   │       │   ├── transforms.py
│   │       │   │       │   ├── video_transforms.py
│   │       │   │       │   └── volume_transforms.py
│   │       │   │       ├── functional.py
│   │       │   │       └── models/
│   │       │   │           ├── __init__.py
│   │       │   │           ├── clip.py
│   │       │   │           ├── modeling_finetune.py
│   │       │   │           ├── modeling_pretrain.py
│   │       │   │           └── modeling_pretrain_umt.py
│   │       │   └── utils.py
│   │       └── vbench2_beta_i2v/
│   │           ├── __init__.py
│   │           ├── camera_motion.py
│   │           ├── crop_to_diff_ratio.py
│   │           ├── i2v_background.py
│   │           ├── i2v_subject.py
│   │           └── utils.py
│   ├── configs/
│   │   ├── dit/
│   │   │   ├── inference/
│   │   │   │   ├── 16x256x256.py
│   │   │   │   ├── 1x256x256-class.py
│   │   │   │   └── 1x256x256.py
│   │   │   └── train/
│   │   │       ├── 16x256x256.py
│   │   │       └── 1x256x256.py
│   │   ├── latte/
│   │   │   ├── inference/
│   │   │   │   ├── 16x256x256-class.py
│   │   │   │   └── 16x256x256.py
│   │   │   └── train/
│   │   │       └── 16x256x256.py
│   │   ├── opensora/
│   │   │   ├── inference/
│   │   │   │   ├── 16x256x256.py
│   │   │   │   ├── 16x512x512-rflow.py
│   │   │   │   ├── 16x512x512.py
│   │   │   │   └── 64x512x512.py
│   │   │   └── train/
│   │   │       ├── 16x256x256-mask.py
│   │   │       ├── 16x256x256-spee-rflow.py
│   │   │       ├── 16x256x256-spee.py
│   │   │       ├── 16x256x256.py
│   │   │       ├── 16x512x512.py
│   │   │       ├── 360x512x512.py
│   │   │       ├── 64x512x512-sp.py
│   │   │       └── 64x512x512.py
│   │   ├── opensora-v1-1/
│   │   │   ├── inference/
│   │   │   │   ├── sample-ref.py
│   │   │   │   └── sample.py
│   │   │   └── train/
│   │   │       ├── benchmark.py
│   │   │       ├── image.py
│   │   │       ├── image_rflow.py
│   │   │       ├── stage1.py
│   │   │       ├── stage2.py
│   │   │       ├── stage3.py
│   │   │       └── video.py
│   │   └── opensora-v1-2/
│   │       └── inference/
│   │           └── sample.py
│   ├── docs/
│   │   ├── acceleration.md
│   │   ├── commands.md
│   │   ├── config.md
│   │   ├── data_processing.md
│   │   ├── datasets.md
│   │   ├── installation.md
│   │   ├── report_01.md
│   │   ├── report_02.md
│   │   ├── report_03.md
│   │   ├── structure.md
│   │   ├── vae.md
│   │   └── zh_CN/
│   │       ├── README.md
│   │       ├── READMEv1.1.md
│   │       ├── acceleration.md
│   │       ├── commands.md
│   │       ├── datasets.md
│   │       ├── report_v1.md
│   │       ├── report_v2.md
│   │       ├── report_v3.md
│   │       ├── structure.md
│   │       └── vae.md
│   ├── environment-opensora.yml
│   ├── eval/
│   │   ├── README.md
│   │   ├── human_eval/
│   │   │   ├── generate.sh
│   │   │   └── launch.sh
│   │   ├── loss/
│   │   │   ├── eval_loss.py
│   │   │   ├── launch.sh
│   │   │   └── tabulate_rl_loss.py
│   │   ├── sample.sh
│   │   ├── vae/
│   │   │   ├── cal_flolpips.py
│   │   │   ├── cal_lpips.py
│   │   │   ├── cal_psnr.py
│   │   │   ├── cal_ssim.py
│   │   │   ├── eval_common_metric.py
│   │   │   ├── flolpips/
│   │   │   │   ├── correlation/
│   │   │   │   │   └── correlation.py
│   │   │   │   ├── flolpips.py
│   │   │   │   ├── pretrained_networks.py
│   │   │   │   ├── pwcnet.py
│   │   │   │   └── utils.py
│   │   │   └── script/
│   │   │       └── eval.sh
│   │   ├── vbench/
│   │   │   ├── VBench_full_info.json
│   │   │   ├── calc_vbench.py
│   │   │   ├── launch.sh
│   │   │   ├── launch_calc.sh
│   │   │   └── tabulate_vbench_scores.py
│   │   └── vbench_i2v/
│   │       ├── calc_vbench_i2v.py
│   │       ├── json_to_txt.py
│   │       ├── launch.sh
│   │       └── launch_calc.sh
│   ├── gradio/
│   │   ├── README.md
│   │   ├── app.py
│   │   └── requirements.txt
│   ├── notebooks/
│   │   ├── inference.ipynb
│   │   └── launch.ipynb
│   ├── opensora/
│   │   ├── __init__.py
│   │   ├── acceleration/
│   │   │   ├── __init__.py
│   │   │   ├── checkpoint.py
│   │   │   ├── communications.py
│   │   │   ├── parallel_states.py
│   │   │   ├── plugin.py
│   │   │   └── shardformer/
│   │   │       ├── __init__.py
│   │   │       ├── modeling/
│   │   │       │   ├── __init__.py
│   │   │       │   └── t5.py
│   │   │       └── policy/
│   │   │           ├── __init__.py
│   │   │           └── t5_encoder.py
│   │   ├── datasets/
│   │   │   ├── __init__.py
│   │   │   ├── aspect.py
│   │   │   ├── bucket.py
│   │   │   ├── dataloader.py
│   │   │   ├── datasets.py
│   │   │   ├── read_video.py
│   │   │   ├── sampler.py
│   │   │   ├── utils.py
│   │   │   └── video_transforms.py
│   │   ├── models/
│   │   │   ├── __init__.py
│   │   │   ├── cache_functions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── attention.py
│   │   │   │   ├── cache_cutfresh.py
│   │   │   │   ├── cache_init.py
│   │   │   │   ├── force_init.py
│   │   │   │   ├── force_scheduler.py
│   │   │   │   ├── fresh_ratio_scheduler.py
│   │   │   │   ├── global_force_fresh.py
│   │   │   │   ├── score_evaluate.py
│   │   │   │   ├── scores.py
│   │   │   │   ├── token_merge.py
│   │   │   │   └── update_cache.py
│   │   │   ├── dit/
│   │   │   │   ├── __init__.py
│   │   │   │   └── dit.py
│   │   │   ├── latte/
│   │   │   │   ├── __init__.py
│   │   │   │   └── latte.py
│   │   │   ├── layers/
│   │   │   │   ├── __init__.py
│   │   │   │   └── blocks.py
│   │   │   ├── pixart/
│   │   │   │   └── pixart.py
│   │   │   ├── stdit/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── stdit.py
│   │   │   │   ├── stdit2.py
│   │   │   │   └── stdit3.py
│   │   │   ├── text_encoder/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── classes.py
│   │   │   │   ├── clip.py
│   │   │   │   └── t5.py
│   │   │   └── vae/
│   │   │       ├── __init__.py
│   │   │       ├── discriminator.py
│   │   │       ├── losses.py
│   │   │       ├── lpips.py
│   │   │       ├── utils.py
│   │   │       ├── vae.py
│   │   │       ├── vae_temporal.py
│   │   │       └── video_sdxl/
│   │   │           └── blocks.py
│   │   ├── registry.py
│   │   ├── schedulers/
│   │   │   ├── __init__.py
│   │   │   ├── dpms/
│   │   │   │   ├── __init__.py
│   │   │   │   └── dpm_solver.py
│   │   │   ├── iddpm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── diffusion_utils.py
│   │   │   │   ├── gaussian_diffusion.py
│   │   │   │   ├── respace.py
│   │   │   │   ├── speed.py
│   │   │   │   └── timestep_sampler.py
│   │   │   └── rf/
│   │   │       ├── __init__.py
│   │   │       └── rectified_flow.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── ckpt_utils.py
│   │       ├── config_utils.py
│   │       ├── inference_utils.py
│   │       ├── lr_scheduler.py
│   │       ├── misc.py
│   │       └── train_utils.py
│   ├── opensora.egg-info/
│   │   ├── PKG-INFO
│   │   ├── SOURCES.txt
│   │   ├── dependency_links.txt
│   │   ├── requires.txt
│   │   └── top_level.txt
│   ├── pyproject.toml
│   ├── requirements/
│   │   ├── requirements-cu121.txt
│   │   ├── requirements-data.txt
│   │   ├── requirements-eval.txt
│   │   ├── requirements-pllava.txt
│   │   ├── requirements-vae.txt
│   │   └── requirements.txt
│   ├── scripts/
│   │   ├── inference.py
│   │   ├── inference_vae.py
│   │   └── misc/
│   │       ├── extract_feat.py
│   │       └── launch_extract_feat.sh
│   ├── setup.py
│   ├── tests/
│   │   ├── test_attn.py
│   │   └── test_lr_scheduler.py
│   └── tools/
│       ├── __init__.py
│       ├── caption/
│       │   ├── README.md
│       │   ├── __init__.py
│       │   ├── acceleration/
│       │   │   ├── __init__.py
│       │   │   └── llava/
│       │   │       ├── __init__.py
│       │   │       └── policies/
│       │   │           ├── __init__.py
│       │   │           ├── llama.py
│       │   │           └── mistral.py
│       │   ├── camera_motion/
│       │   │   ├── __init__.py
│       │   │   ├── camera_motion.py
│       │   │   ├── detect.py
│       │   │   ├── requirements.txt
│       │   │   ├── utils.py
│       │   │   └── visualizer.py
│       │   ├── camera_motion_detect.py
│       │   ├── caption_gpt4.py
│       │   ├── caption_llama3.py
│       │   ├── caption_llava.py
│       │   ├── pllava_dir/
│       │   │   └── caption_pllava.py
│       │   └── utils.py
│       ├── datasets/
│       │   ├── README.md
│       │   ├── __init__.py
│       │   ├── analyze.py
│       │   ├── convert.py
│       │   ├── datautil.py
│       │   ├── filter_panda10m.py
│       │   ├── split.py
│       │   ├── transform.py
│       │   └── utils.py
│       ├── frame_interpolation/
│       │   ├── README.md
│       │   ├── __init__.py
│       │   ├── interpolation.py
│       │   ├── networks/
│       │   │   ├── __init__.py
│       │   │   ├── amt_g.py
│       │   │   └── blocks/
│       │   │       ├── __init__.py
│       │   │       ├── feat_enc.py
│       │   │       ├── ifrnet.py
│       │   │       ├── multi_flow.py
│       │   │       └── raft.py
│       │   └── utils/
│       │       ├── __init__.py
│       │       ├── dist_utils.py
│       │       ├── flow_utils.py
│       │       └── utils.py
│       ├── scene_cut/
│       │   ├── README.md
│       │   ├── __init__.py
│       │   ├── convert_id_to_path.py
│       │   ├── cut.py
│       │   └── scene_detect.py
│       └── scoring/
│           ├── README.md
│           ├── __init__.py
│           ├── aesthetic/
│           │   ├── __init__.py
│           │   └── inference.py
│           ├── matching/
│           │   ├── __init__.py
│           │   └── inference.py
│           ├── ocr/
│           │   ├── __init__.py
│           │   ├── dbnetpp.py
│           │   └── inference.py
│           └── optical_flow/
│               ├── __init__.py
│               ├── inference.py
│               └── unimatch/
│                   ├── __init__.py
│                   ├── attention.py
│                   ├── backbone.py
│                   ├── geometry.py
│                   ├── matching.py
│                   ├── position.py
│                   ├── reg_refine.py
│                   ├── transformer.py
│                   ├── trident_conv.py
│                   ├── unimatch.py
│                   └── utils.py
├── PixArt-alpha-ToCa/
│   ├── Dockerfile
│   ├── README(PixArt-alpha).md
│   ├── app/
│   │   ├── app.py
│   │   ├── app_512.py
│   │   ├── app_controlnet.py
│   │   ├── app_lcm.py
│   │   ├── style.css
│   │   └── style_controlnet.css
│   ├── asset/
│   │   ├── docs/
│   │   │   ├── pixart-dreambooth.md
│   │   │   ├── pixart.md
│   │   │   ├── pixart_comfyui.md
│   │   │   ├── pixart_controlnet.md
│   │   │   ├── pixart_inpaint.md
│   │   │   ├── pixart_lcm.md
│   │   │   └── sasolver.md
│   │   ├── examples.py
│   │   └── samples.txt
│   ├── configs/
│   │   ├── PixArt_xl2_internal.py
│   │   ├── PixArt_xl2_sam.py
│   │   ├── pixart_app_config/
│   │   │   ├── PixArt_xl2_img1024_controlHed.py
│   │   │   ├── PixArt_xl2_img1024_dreambooth.py
│   │   │   └── PixArt_xl2_img512_controlHed.py
│   │   └── pixart_config/
│   │       ├── PixArt_xl2_img1024_internal.py
│   │       ├── PixArt_xl2_img1024_internalms.py
│   │       ├── PixArt_xl2_img1024_lcm.py
│   │       ├── PixArt_xl2_img256_SAM.py
│   │       ├── PixArt_xl2_img256_internal.py
│   │       ├── PixArt_xl2_img512_internal.py
│   │       └── PixArt_xl2_img512_internalms.py
│   ├── diffusion/
│   │   ├── __init__.py
│   │   ├── data/
│   │   │   ├── __init__.py
│   │   │   ├── builder.py
│   │   │   ├── datasets/
│   │   │   │   ├── Dreambooth.py
│   │   │   │   ├── InternalData.py
│   │   │   │   ├── InternalData_ms.py
│   │   │   │   ├── SA.py
│   │   │   │   ├── __init__.py
│   │   │   │   ├── pixart_control.py
│   │   │   │   └── utils.py
│   │   │   └── transforms.py
│   │   ├── dpm_solver.py
│   │   ├── iddpm.py
│   │   ├── lcm_scheduler.py
│   │   ├── model/
│   │   │   ├── __init__.py
│   │   │   ├── builder.py
│   │   │   ├── cache_functions/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── attention.py
│   │   │   │   ├── cache_cutfresh.py
│   │   │   │   ├── cache_init.py
│   │   │   │   ├── force_init.py
│   │   │   │   ├── force_scheduler.py
│   │   │   │   ├── fresh_ratio_scheduler.py
│   │   │   │   ├── global_force_fresh.py
│   │   │   │   ├── score_evaluate.py
│   │   │   │   ├── scores.py
│   │   │   │   ├── token_merge.py
│   │   │   │   └── update_cache.py
│   │   │   ├── diffusion_utils.py
│   │   │   ├── dpm_solver.py
│   │   │   ├── edm_sample.py
│   │   │   ├── gaussian_diffusion.py
│   │   │   ├── hed.py
│   │   │   ├── llava/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── llava_mpt.py
│   │   │   │   └── mpt/
│   │   │   │       ├── attention.py
│   │   │   │       ├── blocks.py
│   │   │   │       ├── configuration_mpt.py
│   │   │   │       ├── modeling_mpt.py
│   │   │   │       ├── norm.py
│   │   │   │       └── param_init_fns.py
│   │   │   ├── nets/
│   │   │   │   ├── PixArt.py
│   │   │   │   ├── PixArtMS.py
│   │   │   │   ├── PixArt_blocks.py
│   │   │   │   ├── __init__.py
│   │   │   │   └── pixart_controlnet.py
│   │   │   ├── respace.py
│   │   │   ├── sa_solver.py
│   │   │   ├── t5.py
│   │   │   ├── timestep_sampler.py
│   │   │   └── utils.py
│   │   ├── sa_sampler.py
│   │   ├── sa_solver_diffusers.py
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── checkpoint.py
│   │       ├── data_sampler.py
│   │       ├── dist_utils.py
│   │       ├── logger.py
│   │       ├── lr_scheduler.py
│   │       ├── misc.py
│   │       └── optimizer.py
│   ├── docker-compose.yml
│   ├── docker-entrypoint.sh
│   ├── docker-readme.md
│   ├── environment-pixart.yml
│   ├── environment.yml
│   ├── notebooks/
│   │   ├── PixArt_xl2_img512_internal_for_pokemon_sample_training.py
│   │   ├── convert-checkpoint-to-diffusers.ipynb
│   │   ├── infer.ipynb
│   │   └── train.ipynb
│   ├── requirements.txt
│   ├── scripts/
│   │   ├── infer_pixart_8_bits.py
│   │   ├── inference.py
│   │   ├── inference_ddp.py
│   │   ├── inference_lcm.py
│   │   ├── interface.py
│   │   ├── interface_controlnet.py
│   │   ├── pipeline_pixart_inpaint.py
│   │   └── pipeline_pixart_reference.py
│   ├── timing_analysis.py
│   ├── timing_info.json
│   ├── tools/
│   │   ├── VLM_caption_lightning.py
│   │   ├── convert_pixart_alpha_to_diffusers.py
│   │   ├── download.py
│   │   └── extract_features.py
│   ├── train.sh
│   ├── train_latents.py
│   └── train_scripts/
│       ├── train.py
│       ├── train_controlnet.py
│       ├── train_diffusers.py
│       ├── train_dreambooth.py
│       ├── train_pixart_lcm.py
│       ├── train_pixart_lcm_lora.py
│       └── train_pixart_lora_hf.py
├── PixArt-alpha-ToCa-tools/
│   └── clip_score.py
├── README.md
└── flux-ToCa/
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── demo_gr.py
    ├── demo_st.py
    ├── demo_st_fill.py
    ├── docs/
    │   ├── fill.md
    │   ├── image-variation.md
    │   ├── structural-conditioning.md
    │   └── text-to-image.md
    ├── model_cards/
    │   ├── FLUX.1-dev.md
    │   └── FLUX.1-schnell.md
    ├── model_licenses/
    │   ├── LICENSE-FLUX1-dev
    │   └── LICENSE-FLUX1-schnell
    ├── pyproject.toml
    ├── setup.py
    └── src/
        ├── flux/
        │   ├── __init__.py
        │   ├── __main__.py
        │   ├── _version.py
        │   ├── api.py
        │   ├── cli.py
        │   ├── cli_control.py
        │   ├── cli_fill.py
        │   ├── cli_redux.py
        │   ├── ideas/
        │   │   ├── __init__.py
        │   │   └── cache_denoise.py
        │   ├── math.py
        │   ├── model.py
        │   ├── modules/
        │   │   ├── autoencoder.py
        │   │   ├── cache_functions/
        │   │   │   ├── __init__.py
        │   │   │   ├── attention.py
        │   │   │   ├── cache_cutfresh.py
        │   │   │   ├── cache_init.py
        │   │   │   ├── cal_type.py
        │   │   │   ├── force_init.py
        │   │   │   ├── force_scheduler.py
        │   │   │   ├── fresh_ratio_scheduler.py
        │   │   │   ├── global_force_fresh.py
        │   │   │   ├── score_evaluate.py
        │   │   │   ├── scores.py
        │   │   │   ├── support_set_selection.py
        │   │   │   ├── token_merge.py
        │   │   │   └── update_cache.py
        │   │   ├── conditioner.py
        │   │   ├── image_embedders.py
        │   │   ├── layers.py
        │   │   └── lora.py
        │   ├── sampling.py
        │   └── util.py
        ├── geneval_flux.py
        └── sample.py