From 33b4f1a55cc0fc6b8be138d5097cf7ad8ec487fc Mon Sep 17 00:00:00 2001 From: Ligeng Zhu Date: Tue, 7 Jan 2025 10:48:15 +0800 Subject: [PATCH] Cosmos Nemotron release --- data_prepare/coyo/coyo_downloader.py | 2 ++ data_prepare/coyo/coyo_splitter.py | 2 ++ data_prepare/mmc4/mmc4_downloader.py | 2 ++ data_prepare/mmc4/mmc4_filter_and_counter.py | 2 ++ data_prepare/mmc4/mmc4_merger.py | 2 ++ data_prepare/panda_split.py | 2 ++ data_prepare/sft/ART1_2.py | 2 ++ data_prepare/sft/ESTVQA.py | 2 ++ data_prepare/sft/LSVT.py | 2 ++ data_prepare/sft/POIE.py | 2 ++ data_prepare/sft/ReCTS.py | 2 ++ data_prepare/sft/SROIE.py | 2 ++ data_prepare/sft/merge_idefics2.py | 2 ++ data_prepare/sft/merge_llava_onevision.py | 2 ++ data_prepare/sft/merge_llava_onevision_eagle.py | 2 ++ data_prepare/sft/mtwi.py | 2 ++ data_prepare/sft/preprocess_art_shangy.py | 2 ++ data_prepare/sft/preprocess_cambrian.py | 2 ++ data_prepare/sft/preprocess_cambrian_eagle.py | 2 ++ data_prepare/sft/preprocess_docreason.py | 2 ++ data_prepare/sft/preprocess_flan.py | 2 ++ data_prepare/sft/preprocess_idefics2.py | 2 ++ data_prepare/sft/preprocess_idefics2_eagle.py | 2 ++ data_prepare/sft/preprocess_kvqa.py | 2 ++ data_prepare/sft/preprocess_llava_onevision.py | 2 ++ data_prepare/sft/preprocess_m3it.py | 2 ++ data_prepare/sft/preprocess_metamathqa.py | 2 ++ data_prepare/sft/preprocess_viquae.py | 2 ++ data_prepare/sft/split_vflan.py | 2 ++ data_prepare/sft/unichart_pretrain.py | 2 ++ data_prepare/sft/unichart_sft.py | 2 ++ llava/__init__.py | 2 ++ llava/cli/eval.py | 2 ++ llava/cli/infer.py | 2 ++ llava/cli/run.py | 2 ++ llava/cli/upload2hf.py | 2 ++ llava/constants.py | 2 ++ llava/conversation.py | 2 ++ llava/data/__init__.py | 2 ++ llava/data/base.py | 2 ++ llava/data/builder.py | 2 ++ llava/data/collate.py | 2 ++ llava/data/dataset.py | 2 ++ llava/data/dataset_impl/__init__.py | 2 ++ llava/data/dataset_impl/coyo_qa.py | 2 ++ llava/data/dataset_impl/coyo_recap.py | 2 ++ llava/data/dataset_impl/dummy.py | 2 ++ llava/data/dataset_impl/lita.py | 2 ++ llava/data/dataset_impl/llava.py | 2 ++ llava/data/dataset_impl/llava_cot.py | 2 ++ llava/data/dataset_impl/panda70m.py | 2 ++ llava/data/dataset_impl/sam.py | 2 ++ llava/data/dataset_impl/textocr.py | 2 ++ llava/data/dataset_impl/utils.py | 2 ++ llava/data/datasets_mixture.py | 2 ++ llava/data/simple_vila_webdataset.py | 2 ++ llava/data/utils.py | 2 ++ llava/entry.py | 2 ++ llava/eval/__init__.py | 2 ++ llava/eval/cinepile.py | 2 ++ llava/eval/egoschema.py | 2 ++ llava/eval/eval_refcoco.py | 2 ++ llava/eval/eventbench.py | 2 ++ llava/eval/lmms/models/__init__.py | 2 ++ llava/eval/lmms/models/vila_internal.py | 2 ++ llava/eval/lmms/tasks/__init__.py | 2 ++ llava/eval/lmms/tasks/videomme.py | 2 ++ llava/eval/m4c_evaluator.py | 2 ++ llava/eval/mathvista.py | 2 ++ llava/eval/mathvista_utils/calculate_score.py | 2 ++ llava/eval/mathvista_utils/extract_answer.py | 2 ++ llava/eval/mathvista_utils/prompts/ext_ans.py | 2 ++ llava/eval/mathvista_utils/utilities.py | 2 ++ llava/eval/mmmu_utils/eval_utils.py | 2 ++ llava/eval/model_refcoco.py | 2 ++ llava/eval/model_vqa_video.py | 2 ++ llava/eval/model_vqa_videodemo.py | 2 ++ llava/eval/rtl.py | 2 ++ llava/eval/textvqa.py | 2 ++ llava/eval/video/eval_benchmark_1_correctness.py | 2 ++ llava/eval/video/eval_benchmark_2_detailed_orientation.py | 2 ++ llava/eval/video/eval_benchmark_3_context.py | 2 ++ llava/eval/video/eval_benchmark_4_temporal.py | 2 ++ llava/eval/video/eval_benchmark_5_consistency.py | 2 ++ llava/eval/video/eval_video_qa.py | 2 ++ llava/eval/video/model_vqa_videodemo_benchmark.py | 2 ++ llava/eval/video/utils.py | 2 ++ llava/eval/vision_niah_vila/eval_vision_niah.py | 2 ++ llava/eval/vision_niah_vila/produce_haystack_embedding.py | 2 ++ llava/eval/vision_niah_vila/produce_needle_embedding.py | 2 ++ llava/eval/vision_niah_vila/zigzag_ring_attn/modeling_qwen2.py | 2 ++ llava/eval/vision_niah_vila/zigzag_ring_attn/monkey_patch.py | 2 ++ llava/eval/vision_niah_vila/zigzag_ring_attn/prepare_inputs.py | 2 ++ llava/eval/vnbench.py | 2 ++ llava/media.py | 2 ++ llava/mm_utils.py | 2 ++ llava/model/FloatPointQuantizeTorch.py | 2 ++ llava/model/FloatPointQuantizeTriton.py | 2 ++ llava/model/__init__.py | 2 ++ llava/model/apply_delta.py | 2 ++ llava/model/builder.py | 2 ++ llava/model/coat/activation/__init__.py | 2 ++ .../activation/fake_quantization/FloatPointQuantizeTorch.py | 2 ++ .../activation/fake_quantization/FloatPointQuantizeTriton.py | 2 ++ .../coat/activation/fake_quantization/quantize_function.py | 2 ++ llava/model/coat/activation/fake_quantization/utils.py | 2 ++ llava/model/coat/activation/models/_fp8_quantization_config.py | 2 ++ llava/model/coat/activation/models/_fp8_weightcache.py | 2 ++ llava/model/coat/activation/models/_fp8manager.py | 2 ++ llava/model/coat/activation/models/coat_llama.py | 2 ++ .../model/coat/activation/models/coat_llama_convert_from_hf.py | 2 ++ llava/model/coat/activation/models/coat_olmo.py | 2 ++ llava/model/coat/activation/real_quantization/__init__.py | 2 ++ llava/model/coat/activation/real_quantization/_dequantize.py | 2 ++ llava/model/coat/activation/real_quantization/_division.py | 2 ++ .../coat/activation/real_quantization/_division_transpose.py | 2 ++ llava/model/coat/activation/real_quantization/_memory_io.py | 2 ++ llava/model/coat/activation/real_quantization/_quantize.py | 2 ++ .../coat/activation/real_quantization/_quantize_pertensor.py | 2 ++ .../real_quantization/_quantize_pertensor_transpose.py | 2 ++ llava/model/coat/activation/real_quantization/_transpose.py | 2 ++ llava/model/coat/activation/real_quantization/add_bwd.py | 2 ++ llava/model/coat/activation/real_quantization/add_fwd.py | 2 ++ llava/model/coat/activation/real_quantization/common.py | 2 ++ llava/model/coat/activation/real_quantization/fp8linear.py | 2 ++ .../coat/activation/real_quantization/func_layernorm_noparam.py | 2 ++ llava/model/coat/activation/real_quantization/func_quantize.py | 2 ++ llava/model/coat/activation/real_quantization/func_rmsnorm.py | 2 ++ llava/model/coat/activation/real_quantization/gelu_bwd.py | 2 ++ .../model/coat/activation/real_quantization/gelu_bwd_legacy.py | 2 ++ llava/model/coat/activation/real_quantization/gelu_fwd.py | 2 ++ llava/model/coat/activation/real_quantization/linear.py | 2 ++ llava/model/coat/activation/real_quantization/mul_bwd.py | 2 ++ llava/model/coat/activation/real_quantization/mul_bwd_legacy.py | 2 ++ .../model/coat/activation/real_quantization/mul_bwd_silu_fwd.py | 2 ++ llava/model/coat/activation/real_quantization/mul_fwd.py | 2 ++ llava/model/coat/activation/real_quantization/silu_bwd.py | 2 ++ .../model/coat/activation/real_quantization/silu_bwd_legacy.py | 2 ++ llava/model/coat/activation/real_quantization/silu_fwd.py | 2 ++ llava/model/coat/activation/utils.py | 2 ++ llava/model/coat/fp8_trainer.py | 2 ++ llava/model/coat/optimizer/fp8_adamw.py | 2 ++ llava/model/coat/optimizer/kernels/setup.py | 2 ++ llava/model/configuration_llava.py | 2 ++ llava/model/consolidate.py | 2 ++ llava/model/encoders/__init__.py | 2 ++ llava/model/encoders/base.py | 2 ++ llava/model/encoders/image/__init__.py | 2 ++ llava/model/encoders/image/basic.py | 2 ++ llava/model/encoders/video/__init__.py | 2 ++ llava/model/encoders/video/basic.py | 2 ++ llava/model/encoders/video/tsp.py | 2 ++ llava/model/language_model/builder.py | 2 ++ llava/model/language_model/configuration_quantize.py | 2 ++ llava/model/language_model/fp8_qwen2_convert_from_hf.py | 2 ++ llava/model/language_model/fp8activationqwen2.py | 2 ++ llava/model/language_model/fp8activationresidualqwen2.py | 2 ++ llava/model/language_model/fp8linearqwen2.py | 2 ++ llava/model/language_model/llava_llama.py | 2 ++ llava/model/language_model/qllama.py | 2 ++ llava/model/language_model/qllava_qllama.py | 2 ++ llava/model/language_model/qmemllama.py | 2 ++ llava/model/language_model/realqmemllama.py | 2 ++ llava/model/liger/cross_entropy.py | 2 ++ llava/model/liger/utils.py | 2 ++ llava/model/llava_arch.py | 2 ++ llava/model/loss.py | 2 ++ llava/model/make_delta.py | 2 ++ llava/model/multimodal_encoder/builder.py | 2 ++ llava/model/multimodal_encoder/clip_encoder.py | 2 ++ llava/model/multimodal_encoder/image_processor.py | 2 ++ .../model/multimodal_encoder/intern/configuration_intern_vit.py | 2 ++ llava/model/multimodal_encoder/intern/flash_attention.py | 2 ++ llava/model/multimodal_encoder/intern/modeling_intern_vit.py | 2 ++ llava/model/multimodal_encoder/intern_encoder.py | 2 ++ llava/model/multimodal_encoder/radio_encoder.py | 2 ++ llava/model/multimodal_encoder/radio_torchhub_encoder.py | 2 ++ llava/model/multimodal_encoder/siglip/__init__.py | 2 ++ llava/model/multimodal_encoder/siglip/modeling_siglip.py | 2 ++ llava/model/multimodal_encoder/siglip_encoder.py | 2 ++ llava/model/multimodal_encoder/vision_encoder.py | 2 ++ llava/model/multimodal_encoder/visualize_features.py | 2 ++ llava/model/multimodal_projector/base_projector.py | 2 ++ llava/model/multimodal_projector/builder.py | 2 ++ llava/model/qfunction.py | 2 ++ llava/model/qlinear_te.py | 2 ++ llava/model/quantization/FloatPointQuantizeTorch.py | 2 ++ llava/model/quantization/FloatPointQuantizeTriton.py | 2 ++ llava/model/quantization/QAct.py | 2 ++ llava/model/quantization/QAdd.py | 2 ++ llava/model/quantization/QFunction.py | 2 ++ llava/model/quantization/QGELU.py | 2 ++ llava/model/quantization/QIdentity.py | 2 ++ llava/model/quantization/QLayerNorm.py | 2 ++ llava/model/quantization/QLinear.py | 2 ++ llava/model/quantization/QMul.py | 2 ++ llava/model/quantization/Qconfig.py | 2 ++ llava/model/quantization/__init__.py | 2 ++ llava/model/quantization/utils.py | 2 ++ llava/model/qutils.py | 2 ++ llava/model/realquantize/common.py | 2 ++ llava/model/realquantize/division.py | 2 ++ llava/model/realquantize/division_transpose.py | 2 ++ llava/model/realquantize/linear.py | 2 ++ llava/model/realquantize/quantize_and_transpose.py | 2 ++ llava/model/realquantize/trans_grad_bias.py | 2 ++ llava/model/utils/__init__.py | 2 ++ llava/model/utils/packing.py | 2 ++ llava/model/utils/utils.py | 2 ++ llava/train/__init__.py | 2 ++ llava/train/args.py | 2 ++ llava/train/callbacks/autoresume_callback.py | 2 ++ llava/train/deepspeed_replace/runtime/zero/mics.py | 2 ++ llava/train/llava_trainer.py | 2 ++ llava/train/sequence_parallel/__init__.py | 2 ++ llava/train/sequence_parallel/all_to_all.py | 2 ++ llava/train/sequence_parallel/globals.py | 2 ++ llava/train/sequence_parallel/hybrid_attn.py | 2 ++ llava/train/sequence_parallel/input_utils.py | 2 ++ llava/train/sequence_parallel/monkey_patch.py | 2 ++ llava/train/sequence_parallel/ring/__init__.py | 2 ++ llava/train/sequence_parallel/ring/ring_flash_attn.py | 2 ++ llava/train/sequence_parallel/ring/ring_flash_attn_varlen.py | 2 ++ llava/train/sequence_parallel/ring/stripe_flash_attn.py | 2 ++ llava/train/sequence_parallel/ring/triton_utils.py | 2 ++ llava/train/sequence_parallel/ring/utils.py | 2 ++ llava/train/sequence_parallel/ring/zigzag_ring_flash_attn.py | 2 ++ .../sequence_parallel/ring/zigzag_ring_flash_attn_varlen.py | 2 ++ llava/train/sequence_parallel/ulysses_attn.py | 2 ++ llava/train/slurm_utils.py | 2 ++ llava/train/train.py | 2 ++ llava/train/train_hybrid.py | 2 ++ llava/train/train_llm_to_long.py | 2 ++ llava/train/train_ln.py | 2 ++ llava/train/train_mem.py | 2 ++ llava/train/train_mem_ln.py | 2 ++ llava/train/transformer_normalize_monkey_patch.py | 2 ++ llava/train/utils.py | 2 ++ llava/trl/__init__.py | 2 ++ llava/trl/core.py | 2 ++ llava/trl/environment/__init__.py | 2 ++ llava/trl/environment/base_environment.py | 2 ++ llava/trl/extras/__init__.py | 2 ++ llava/trl/extras/best_of_n_sampler.py | 2 ++ llava/trl/extras/dataset_formatting.py | 2 ++ llava/trl/import_utils.py | 2 ++ llava/trl/models/__init__.py | 2 ++ llava/trl/models/modeling_base.py | 2 ++ llava/trl/models/modeling_sd_base.py | 2 ++ llava/trl/models/modeling_value_head.py | 2 ++ llava/trl/models/utils.py | 2 ++ llava/trl/trainer/__init__.py | 2 ++ llava/trl/trainer/base.py | 2 ++ llava/trl/trainer/ddpo_config.py | 2 ++ llava/trl/trainer/ddpo_trainer.py | 2 ++ llava/trl/trainer/dpo_trainer.py | 2 ++ llava/trl/trainer/iterative_sft_trainer.py | 2 ++ llava/trl/trainer/model_config.py | 2 ++ llava/trl/trainer/ppo_config.py | 2 ++ llava/trl/trainer/ppo_trainer.py | 2 ++ llava/trl/trainer/reward_config.py | 2 ++ llava/trl/trainer/reward_trainer.py | 2 ++ llava/trl/trainer/sft_trainer.py | 2 ++ llava/trl/trainer/utils.py | 2 ++ llava/utils/__init__.py | 2 ++ llava/utils/distributed.py | 2 ++ llava/utils/io.py | 2 ++ llava/utils/logging.py | 2 ++ llava/utils/media.py | 2 ++ llava/utils/merge_lora_weights_and_save_hf_model.py | 2 ++ llava/utils/tokenizer.py | 2 ++ llava/utils/utils.py | 2 ++ llava/wids/__init__.py | 2 ++ llava/wids/wids.py | 2 ++ llava/wids/wids_bench.py | 2 ++ llava/wids/wids_cleanup.py | 2 ++ llava/wids/wids_dir.py | 2 ++ llava/wids/wids_dl.py | 2 ++ llava/wids/wids_index.py | 2 ++ llava/wids/wids_lru.py | 2 ++ llava/wids/wids_mmtar.py | 2 ++ llava/wids/wids_specs.py | 2 ++ llava/wids/wids_tar.py | 2 ++ server.py | 2 ++ 284 files changed, 568 insertions(+) diff --git a/data_prepare/coyo/coyo_downloader.py b/data_prepare/coyo/coyo_downloader.py index 14f46c2c..82192215 100644 --- a/data_prepare/coyo/coyo_downloader.py +++ b/data_prepare/coyo/coyo_downloader.py @@ -136,3 +136,5 @@ async def main(data_list): asyncio.run(main(metadata_list)) + + diff --git a/data_prepare/coyo/coyo_splitter.py b/data_prepare/coyo/coyo_splitter.py index 18ac6e14..e53ddc85 100644 --- a/data_prepare/coyo/coyo_splitter.py +++ b/data_prepare/coyo/coyo_splitter.py @@ -46,3 +46,5 @@ f.write(str(len(samples2write))) counter += 1 + + diff --git a/data_prepare/mmc4/mmc4_downloader.py b/data_prepare/mmc4/mmc4_downloader.py index 17147f39..51ee9f53 100644 --- a/data_prepare/mmc4/mmc4_downloader.py +++ b/data_prepare/mmc4/mmc4_downloader.py @@ -151,3 +151,5 @@ async def main(data_list): asyncio.run(main(all_data)) + + diff --git a/data_prepare/mmc4/mmc4_filter_and_counter.py b/data_prepare/mmc4/mmc4_filter_and_counter.py index 6bfd3b82..d1f07060 100644 --- a/data_prepare/mmc4/mmc4_filter_and_counter.py +++ b/data_prepare/mmc4/mmc4_filter_and_counter.py @@ -56,3 +56,5 @@ with open(os.path.join(output_path, pkl.replace(".pkl", ".count")), "w") as f: f.write(str(len(filtered_annotation))) + + diff --git a/data_prepare/mmc4/mmc4_merger.py b/data_prepare/mmc4/mmc4_merger.py index d9a0dde3..0ae0bafe 100644 --- a/data_prepare/mmc4/mmc4_merger.py +++ b/data_prepare/mmc4/mmc4_merger.py @@ -46,3 +46,5 @@ with open(os.path.join(output_path, shard_name.replace(".jsonl", ".pkl")), "wb") as f: pickle.dump(data_list, f) + + diff --git a/data_prepare/panda_split.py b/data_prepare/panda_split.py index 092df3dc..43d410b2 100644 --- a/data_prepare/panda_split.py +++ b/data_prepare/panda_split.py @@ -107,3 +107,5 @@ def split_video_to_clips( import fire fire.Fire(split_video_to_clips) + + diff --git a/data_prepare/sft/ART1_2.py b/data_prepare/sft/ART1_2.py index d7201bbe..9c8e9fb7 100644 --- a/data_prepare/sft/ART1_2.py +++ b/data_prepare/sft/ART1_2.py @@ -86,3 +86,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s jsonl_file.write("\n") # Add a newline after each JSON object print("Processing complete.") + + diff --git a/data_prepare/sft/ESTVQA.py b/data_prepare/sft/ESTVQA.py index 3142e445..37adea62 100644 --- a/data_prepare/sft/ESTVQA.py +++ b/data_prepare/sft/ESTVQA.py @@ -33,3 +33,5 @@ def is_english(text): jsonl_file.write("\n") print("Processing complete.") + + diff --git a/data_prepare/sft/LSVT.py b/data_prepare/sft/LSVT.py index bcf1b6b4..14aa0e2e 100644 --- a/data_prepare/sft/LSVT.py +++ b/data_prepare/sft/LSVT.py @@ -84,3 +84,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s jsonl_file.write("\n") # Add a newline after each JSON object print("Processing complete.") + + diff --git a/data_prepare/sft/POIE.py b/data_prepare/sft/POIE.py index 5657e420..5e243a70 100644 --- a/data_prepare/sft/POIE.py +++ b/data_prepare/sft/POIE.py @@ -85,3 +85,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s jsonl_file.write("\n") # Add a newline after each JSON object print("Processing complete.") + + diff --git a/data_prepare/sft/ReCTS.py b/data_prepare/sft/ReCTS.py index 72e3c706..87860b06 100644 --- a/data_prepare/sft/ReCTS.py +++ b/data_prepare/sft/ReCTS.py @@ -105,3 +105,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s jsonl_file.write("\n") # Add a newline after each JSON object print("Processing complete.") + + diff --git a/data_prepare/sft/SROIE.py b/data_prepare/sft/SROIE.py index a71202c1..663c3a46 100644 --- a/data_prepare/sft/SROIE.py +++ b/data_prepare/sft/SROIE.py @@ -66,3 +66,5 @@ # Now 'images' contains all the opened images from the image_root directory print(f"Successfully opened {len(images)} out of {len(image_files)} images.") + + diff --git a/data_prepare/sft/merge_idefics2.py b/data_prepare/sft/merge_idefics2.py index fc07cd29..cea5ce33 100644 --- a/data_prepare/sft/merge_idefics2.py +++ b/data_prepare/sft/merge_idefics2.py @@ -47,3 +47,5 @@ def load_jsonl(file_path): for item in all_data: json.dump(item, f) f.write("\n") + + diff --git a/data_prepare/sft/merge_llava_onevision.py b/data_prepare/sft/merge_llava_onevision.py index ff809d9b..c503b3c9 100644 --- a/data_prepare/sft/merge_llava_onevision.py +++ b/data_prepare/sft/merge_llava_onevision.py @@ -99,3 +99,5 @@ def load_jsonl(file_path): import fire fire.Fire(main) + + diff --git a/data_prepare/sft/merge_llava_onevision_eagle.py b/data_prepare/sft/merge_llava_onevision_eagle.py index 4fa6cea6..35aae1b9 100644 --- a/data_prepare/sft/merge_llava_onevision_eagle.py +++ b/data_prepare/sft/merge_llava_onevision_eagle.py @@ -83,3 +83,5 @@ def load_jsonl(file_path): import fire fire.Fire(main) + + diff --git a/data_prepare/sft/mtwi.py b/data_prepare/sft/mtwi.py index 2851751d..890fc8ee 100644 --- a/data_prepare/sft/mtwi.py +++ b/data_prepare/sft/mtwi.py @@ -148,3 +148,5 @@ def clip(x): jsonl_file.write("\n") # Add a newline after each JSON object print("Processing complete.") + + diff --git a/data_prepare/sft/preprocess_art_shangy.py b/data_prepare/sft/preprocess_art_shangy.py index 7f0de6c3..c4b6b3ec 100644 --- a/data_prepare/sft/preprocess_art_shangy.py +++ b/data_prepare/sft/preprocess_art_shangy.py @@ -87,3 +87,5 @@ def convert_txt_to_jsonl(input_file, output_file): output_file = "./art500k_processed.jsonl" base_path = "./" convert_txt_to_jsonl(input_file, output_file) + + diff --git a/data_prepare/sft/preprocess_cambrian.py b/data_prepare/sft/preprocess_cambrian.py index 5ebbe344..48ff3bc1 100644 --- a/data_prepare/sft/preprocess_cambrian.py +++ b/data_prepare/sft/preprocess_cambrian.py @@ -70,3 +70,5 @@ def check_sample(sample): with open("cambrian_doc_1275k.json", "w") as f: json.dump(cambrian_doc_1275k, f) + + diff --git a/data_prepare/sft/preprocess_cambrian_eagle.py b/data_prepare/sft/preprocess_cambrian_eagle.py index c979149d..f0444b99 100644 --- a/data_prepare/sft/preprocess_cambrian_eagle.py +++ b/data_prepare/sft/preprocess_cambrian_eagle.py @@ -75,3 +75,5 @@ def check_sample(sample): with open(os.path.join(base_path, "cambrian_adlr_train.json"), "w") as f: json.dump(cambrian_eagle, f) + + diff --git a/data_prepare/sft/preprocess_docreason.py b/data_prepare/sft/preprocess_docreason.py index 8971e64e..eb06fff4 100644 --- a/data_prepare/sft/preprocess_docreason.py +++ b/data_prepare/sft/preprocess_docreason.py @@ -29,3 +29,5 @@ with open(json_file_processed, "w") as f: json.dump(records, f) + + diff --git a/data_prepare/sft/preprocess_flan.py b/data_prepare/sft/preprocess_flan.py index efde19b3..0ddb420d 100644 --- a/data_prepare/sft/preprocess_flan.py +++ b/data_prepare/sft/preprocess_flan.py @@ -70,3 +70,5 @@ with open(os.path.join(save_path, "text_flan_1m.pkl"), "wb") as f: pickle.dump(filtered_samples, f) + + diff --git a/data_prepare/sft/preprocess_idefics2.py b/data_prepare/sft/preprocess_idefics2.py index 6657c91d..65083279 100644 --- a/data_prepare/sft/preprocess_idefics2.py +++ b/data_prepare/sft/preprocess_idefics2.py @@ -130,3 +130,5 @@ def process_dataset(args): # Map the process_dataset function to the arguments for _ in tqdm(pool.imap_unordered(process_dataset, args), total=len(args), desc="Processing datasets"): pass + + diff --git a/data_prepare/sft/preprocess_idefics2_eagle.py b/data_prepare/sft/preprocess_idefics2_eagle.py index 175d30cb..8354d35b 100644 --- a/data_prepare/sft/preprocess_idefics2_eagle.py +++ b/data_prepare/sft/preprocess_idefics2_eagle.py @@ -170,3 +170,5 @@ def main( import fire fire.Fire(main) + + diff --git a/data_prepare/sft/preprocess_kvqa.py b/data_prepare/sft/preprocess_kvqa.py index 92c07244..e024882f 100644 --- a/data_prepare/sft/preprocess_kvqa.py +++ b/data_prepare/sft/preprocess_kvqa.py @@ -45,3 +45,5 @@ json.dump(new_records, f) print(len(new_records)) + + diff --git a/data_prepare/sft/preprocess_llava_onevision.py b/data_prepare/sft/preprocess_llava_onevision.py index 8ca52fb7..1f741861 100644 --- a/data_prepare/sft/preprocess_llava_onevision.py +++ b/data_prepare/sft/preprocess_llava_onevision.py @@ -103,3 +103,5 @@ def main( import fire fire.Fire(main) + + diff --git a/data_prepare/sft/preprocess_m3it.py b/data_prepare/sft/preprocess_m3it.py index 19b16a2f..b7d954b2 100644 --- a/data_prepare/sft/preprocess_m3it.py +++ b/data_prepare/sft/preprocess_m3it.py @@ -82,3 +82,5 @@ save_filename = os.path.join(save_path, save_filename) with open(save_filename, "wb") as f: pickle.dump(dataset, f) + + diff --git a/data_prepare/sft/preprocess_metamathqa.py b/data_prepare/sft/preprocess_metamathqa.py index e3ee52e9..a5151db4 100644 --- a/data_prepare/sft/preprocess_metamathqa.py +++ b/data_prepare/sft/preprocess_metamathqa.py @@ -25,3 +25,5 @@ with open(json_file_processed, "w") as f: json.dump(records, f) + + diff --git a/data_prepare/sft/preprocess_viquae.py b/data_prepare/sft/preprocess_viquae.py index 3487dac7..6d933c50 100644 --- a/data_prepare/sft/preprocess_viquae.py +++ b/data_prepare/sft/preprocess_viquae.py @@ -45,3 +45,5 @@ def base64_to_pil_image(base64_string): with open(os.path.join(base_path, "viquae_processed.json"), "w") as f: json.dump(new_records, f) + + diff --git a/data_prepare/sft/split_vflan.py b/data_prepare/sft/split_vflan.py index 93d0715f..13a78053 100644 --- a/data_prepare/sft/split_vflan.py +++ b/data_prepare/sft/split_vflan.py @@ -57,3 +57,5 @@ print(f"Finished writing part-{counter:05d}.pkl!") counter += 1 + + diff --git a/data_prepare/sft/unichart_pretrain.py b/data_prepare/sft/unichart_pretrain.py index a544a829..47b8b3cd 100644 --- a/data_prepare/sft/unichart_pretrain.py +++ b/data_prepare/sft/unichart_pretrain.py @@ -57,3 +57,5 @@ pbar.update(1) print("Processing complete.") + + diff --git a/data_prepare/sft/unichart_sft.py b/data_prepare/sft/unichart_sft.py index 81e46c70..3bb29054 100644 --- a/data_prepare/sft/unichart_sft.py +++ b/data_prepare/sft/unichart_sft.py @@ -59,3 +59,5 @@ pbar.update(1) print("Processing complete.") + + diff --git a/llava/__init__.py b/llava/__init__.py index 37a40560..1d61b293 100755 --- a/llava/__init__.py +++ b/llava/__init__.py @@ -1,2 +1,4 @@ from .entry import * from .media import * + + diff --git a/llava/cli/eval.py b/llava/cli/eval.py index 53939abb..78997e46 100644 --- a/llava/cli/eval.py +++ b/llava/cli/eval.py @@ -198,3 +198,5 @@ def main() -> None: if __name__ == "__main__": main() + + diff --git a/llava/cli/infer.py b/llava/cli/infer.py index b45ecbdd..f4c74c01 100644 --- a/llava/cli/infer.py +++ b/llava/cli/infer.py @@ -42,3 +42,5 @@ def main() -> None: if __name__ == "__main__": main() + + diff --git a/llava/cli/run.py b/llava/cli/run.py index e2fea74a..8af114f5 100644 --- a/llava/cli/run.py +++ b/llava/cli/run.py @@ -132,3 +132,5 @@ def main() -> None: if __name__ == "__main__": main() + + diff --git a/llava/cli/upload2hf.py b/llava/cli/upload2hf.py index 5387915b..ca6d0e62 100644 --- a/llava/cli/upload2hf.py +++ b/llava/cli/upload2hf.py @@ -222,3 +222,5 @@ def main(): if __name__ == "__main__": main() + + diff --git a/llava/constants.py b/llava/constants.py index bb1ff810..84385fdf 100755 --- a/llava/constants.py +++ b/llava/constants.py @@ -30,3 +30,5 @@ "image": "", "video": "", } + + diff --git a/llava/conversation.py b/llava/conversation.py index 0d8b857a..e9572392 100755 --- a/llava/conversation.py +++ b/llava/conversation.py @@ -189,3 +189,5 @@ def auto_set_conversation_mode(model_name_or_path: str) -> str: logger.info(f"Setting conversation mode to `{v}` based on model name/path `{model_name_or_path}`.") default_conversation = conv_templates[v] return + + diff --git a/llava/data/__init__.py b/llava/data/__init__.py index 87b1dc8b..d4eadccb 100755 --- a/llava/data/__init__.py +++ b/llava/data/__init__.py @@ -3,3 +3,5 @@ from .dataset_impl import * from .datasets_mixture import * from .simple_vila_webdataset import VILAWebDataset + + diff --git a/llava/data/base.py b/llava/data/base.py index a4cbcf19..8450b744 100644 --- a/llava/data/base.py +++ b/llava/data/base.py @@ -84,3 +84,5 @@ def __getitem__(self, index: int) -> Dict[str, Any]: def __len__(self) -> int: return len(self.instances) + + diff --git a/llava/data/builder.py b/llava/data/builder.py index 54c7a05f..ae1ae967 100644 --- a/llava/data/builder.py +++ b/llava/data/builder.py @@ -213,3 +213,5 @@ def build_dataset_legacy( data_args=data_args, training_args=training_args, ) + + diff --git a/llava/data/collate.py b/llava/data/collate.py index fac3fe5f..193f5afd 100644 --- a/llava/data/collate.py +++ b/llava/data/collate.py @@ -88,3 +88,5 @@ def __call__(self, instances: Sequence[Dict[str, Any]]) -> Dict[str, Any]: "labels": labels, "attention_mask": attention_mask, } + + diff --git a/llava/data/dataset.py b/llava/data/dataset.py index 54679038..2659d735 100755 --- a/llava/data/dataset.py +++ b/llava/data/dataset.py @@ -1576,3 +1576,5 @@ def make_supervised_data_module( train_dataset=train_dataset, data_collator=data_collator, ) + + diff --git a/llava/data/dataset_impl/__init__.py b/llava/data/dataset_impl/__init__.py index d5a8bc7f..107173e2 100644 --- a/llava/data/dataset_impl/__init__.py +++ b/llava/data/dataset_impl/__init__.py @@ -2,3 +2,5 @@ from .lita import * from .llava import * from .llava_cot import * + + diff --git a/llava/data/dataset_impl/coyo_qa.py b/llava/data/dataset_impl/coyo_qa.py index 18666457..ce764fcb 100644 --- a/llava/data/dataset_impl/coyo_qa.py +++ b/llava/data/dataset_impl/coyo_qa.py @@ -203,3 +203,5 @@ def __getitem__(self, i) -> Dict[str, torch.Tensor]: data_dict["block_sizes"] = block_sizes return data_dict + + diff --git a/llava/data/dataset_impl/coyo_recap.py b/llava/data/dataset_impl/coyo_recap.py index caff3a5c..8979799c 100755 --- a/llava/data/dataset_impl/coyo_recap.py +++ b/llava/data/dataset_impl/coyo_recap.py @@ -88,3 +88,5 @@ def __init__( else: self.caption_choice = data_args.caption_choice print(f"Current caption choice: {self.caption_choice}.") + + diff --git a/llava/data/dataset_impl/dummy.py b/llava/data/dataset_impl/dummy.py index 007d569f..2173f0cc 100644 --- a/llava/data/dataset_impl/dummy.py +++ b/llava/data/dataset_impl/dummy.py @@ -92,3 +92,5 @@ def process(self, instance: Dict[str, Any]) -> List[Dict[str, Any]]: # Add media to the beginning of the first message messages[0]["value"] = medias + [messages[0]["value"]] return messages + + diff --git a/llava/data/dataset_impl/lita.py b/llava/data/dataset_impl/lita.py index 9788b397..561a55bc 100644 --- a/llava/data/dataset_impl/lita.py +++ b/llava/data/dataset_impl/lita.py @@ -241,3 +241,5 @@ def process(self, instance: Dict[str, Any]) -> List[Dict[str, Any]]: video = Video(instance["video_path"]) messages[0]["value"] = [video, messages[0]["value"]] return messages + + diff --git a/llava/data/dataset_impl/llava.py b/llava/data/dataset_impl/llava.py index c7041fa6..78b5c0c6 100644 --- a/llava/data/dataset_impl/llava.py +++ b/llava/data/dataset_impl/llava.py @@ -132,3 +132,5 @@ def process(self, instance: Dict[str, Any]) -> List[Dict[str, Any]]: new_value = [*img_list, value.replace(DEFAULT_IMAGE_TOKEN, "").strip()] messages[0]["value"] = new_value return messages + + diff --git a/llava/data/dataset_impl/llava_cot.py b/llava/data/dataset_impl/llava_cot.py index 61f7bd1d..fbf52c10 100644 --- a/llava/data/dataset_impl/llava_cot.py +++ b/llava/data/dataset_impl/llava_cot.py @@ -174,3 +174,5 @@ def process_multi_img(self, instance: Dict[str, Any], index: int) -> List[Dict[s assert len(medias) == 0, f"#Num of does not match the number of images in the instance. {instance}" return messages + + diff --git a/llava/data/dataset_impl/panda70m.py b/llava/data/dataset_impl/panda70m.py index 3d9a2dae..47123e88 100755 --- a/llava/data/dataset_impl/panda70m.py +++ b/llava/data/dataset_impl/panda70m.py @@ -229,3 +229,5 @@ def cleanup_corrupted_videos( jinfo = json.load(open(json_path)) img_t = load_video(video_path, jinfo=jinfo) print(img_t) + + diff --git a/llava/data/dataset_impl/sam.py b/llava/data/dataset_impl/sam.py index 604d8067..cc68597b 100755 --- a/llava/data/dataset_impl/sam.py +++ b/llava/data/dataset_impl/sam.py @@ -230,3 +230,5 @@ def __getitem__(self, i) -> Dict[str, torch.Tensor]: for idx, data in enumerate(dst): print(idx, data.keys()) # nvcode: off + + diff --git a/llava/data/dataset_impl/textocr.py b/llava/data/dataset_impl/textocr.py index aa37ba32..40c38b94 100755 --- a/llava/data/dataset_impl/textocr.py +++ b/llava/data/dataset_impl/textocr.py @@ -289,3 +289,5 @@ def __getitem__(self, index): for idx in range(2): pprint(dataset[idx]) + + diff --git a/llava/data/dataset_impl/utils.py b/llava/data/dataset_impl/utils.py index e3dc986f..8870ffd4 100644 --- a/llava/data/dataset_impl/utils.py +++ b/llava/data/dataset_impl/utils.py @@ -27,3 +27,5 @@ def _remove_media_tokens(text: str) -> str: for token in ["", "