diff --git a/data_prepare/coyo/coyo_downloader.py b/data_prepare/coyo/coyo_downloader.py index 14f46c2c..82192215 100644 --- a/data_prepare/coyo/coyo_downloader.py +++ b/data_prepare/coyo/coyo_downloader.py @@ -136,3 +136,5 @@ async def main(data_list): asyncio.run(main(metadata_list)) + + diff --git a/data_prepare/coyo/coyo_splitter.py b/data_prepare/coyo/coyo_splitter.py index 18ac6e14..e53ddc85 100644 --- a/data_prepare/coyo/coyo_splitter.py +++ b/data_prepare/coyo/coyo_splitter.py @@ -46,3 +46,5 @@ f.write(str(len(samples2write))) counter += 1 + + diff --git a/data_prepare/mmc4/mmc4_downloader.py b/data_prepare/mmc4/mmc4_downloader.py index 17147f39..51ee9f53 100644 --- a/data_prepare/mmc4/mmc4_downloader.py +++ b/data_prepare/mmc4/mmc4_downloader.py @@ -151,3 +151,5 @@ async def main(data_list): asyncio.run(main(all_data)) + + diff --git a/data_prepare/mmc4/mmc4_filter_and_counter.py b/data_prepare/mmc4/mmc4_filter_and_counter.py index 6bfd3b82..d1f07060 100644 --- a/data_prepare/mmc4/mmc4_filter_and_counter.py +++ b/data_prepare/mmc4/mmc4_filter_and_counter.py @@ -56,3 +56,5 @@ with open(os.path.join(output_path, pkl.replace(".pkl", ".count")), "w") as f: f.write(str(len(filtered_annotation))) + + diff --git a/data_prepare/mmc4/mmc4_merger.py b/data_prepare/mmc4/mmc4_merger.py index d9a0dde3..0ae0bafe 100644 --- a/data_prepare/mmc4/mmc4_merger.py +++ b/data_prepare/mmc4/mmc4_merger.py @@ -46,3 +46,5 @@ with open(os.path.join(output_path, shard_name.replace(".jsonl", ".pkl")), "wb") as f: pickle.dump(data_list, f) + + diff --git a/data_prepare/panda_split.py b/data_prepare/panda_split.py index 092df3dc..43d410b2 100644 --- a/data_prepare/panda_split.py +++ b/data_prepare/panda_split.py @@ -107,3 +107,5 @@ def split_video_to_clips( import fire fire.Fire(split_video_to_clips) + + diff --git a/data_prepare/sft/ART1_2.py b/data_prepare/sft/ART1_2.py index d7201bbe..9c8e9fb7 100644 --- a/data_prepare/sft/ART1_2.py +++ b/data_prepare/sft/ART1_2.py @@ -86,3 +86,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s jsonl_file.write("\n") # Add a newline after each JSON object print("Processing complete.") + + diff --git a/data_prepare/sft/ESTVQA.py b/data_prepare/sft/ESTVQA.py index 3142e445..37adea62 100644 --- a/data_prepare/sft/ESTVQA.py +++ b/data_prepare/sft/ESTVQA.py @@ -33,3 +33,5 @@ def is_english(text): jsonl_file.write("\n") print("Processing complete.") + + diff --git a/data_prepare/sft/LSVT.py b/data_prepare/sft/LSVT.py index bcf1b6b4..14aa0e2e 100644 --- a/data_prepare/sft/LSVT.py +++ b/data_prepare/sft/LSVT.py @@ -84,3 +84,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s jsonl_file.write("\n") # Add a newline after each JSON object print("Processing complete.") + + diff --git a/data_prepare/sft/POIE.py b/data_prepare/sft/POIE.py index 5657e420..5e243a70 100644 --- a/data_prepare/sft/POIE.py +++ b/data_prepare/sft/POIE.py @@ -85,3 +85,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s jsonl_file.write("\n") # Add a newline after each JSON object print("Processing complete.") + + diff --git a/data_prepare/sft/ReCTS.py b/data_prepare/sft/ReCTS.py index 72e3c706..87860b06 100644 --- a/data_prepare/sft/ReCTS.py +++ b/data_prepare/sft/ReCTS.py @@ -105,3 +105,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s jsonl_file.write("\n") # Add a newline after each JSON object print("Processing complete.") + + diff --git a/data_prepare/sft/SROIE.py b/data_prepare/sft/SROIE.py index a71202c1..663c3a46 100644 --- a/data_prepare/sft/SROIE.py +++ b/data_prepare/sft/SROIE.py @@ -66,3 +66,5 @@ # Now 'images' contains all the opened images from the image_root directory print(f"Successfully opened {len(images)} out of {len(image_files)} images.") + + diff --git a/data_prepare/sft/merge_idefics2.py b/data_prepare/sft/merge_idefics2.py index fc07cd29..cea5ce33 100644 --- a/data_prepare/sft/merge_idefics2.py +++ b/data_prepare/sft/merge_idefics2.py @@ -47,3 +47,5 @@ def load_jsonl(file_path): for item in all_data: json.dump(item, f) f.write("\n") + + diff --git a/data_prepare/sft/merge_llava_onevision.py b/data_prepare/sft/merge_llava_onevision.py index ff809d9b..c503b3c9 100644 --- a/data_prepare/sft/merge_llava_onevision.py +++ b/data_prepare/sft/merge_llava_onevision.py @@ -99,3 +99,5 @@ def load_jsonl(file_path): import fire fire.Fire(main) + + diff --git a/data_prepare/sft/merge_llava_onevision_eagle.py b/data_prepare/sft/merge_llava_onevision_eagle.py index 4fa6cea6..35aae1b9 100644 --- a/data_prepare/sft/merge_llava_onevision_eagle.py +++ b/data_prepare/sft/merge_llava_onevision_eagle.py @@ -83,3 +83,5 @@ def load_jsonl(file_path): import fire fire.Fire(main) + + diff --git a/data_prepare/sft/mtwi.py b/data_prepare/sft/mtwi.py index 2851751d..890fc8ee 100644 --- a/data_prepare/sft/mtwi.py +++ b/data_prepare/sft/mtwi.py @@ -148,3 +148,5 @@ def clip(x): jsonl_file.write("\n") # Add a newline after each JSON object print("Processing complete.") + + diff --git a/data_prepare/sft/preprocess_art_shangy.py b/data_prepare/sft/preprocess_art_shangy.py index 7f0de6c3..c4b6b3ec 100644 --- a/data_prepare/sft/preprocess_art_shangy.py +++ b/data_prepare/sft/preprocess_art_shangy.py @@ -87,3 +87,5 @@ def convert_txt_to_jsonl(input_file, output_file): output_file = "./art500k_processed.jsonl" base_path = "./" convert_txt_to_jsonl(input_file, output_file) + + diff --git a/data_prepare/sft/preprocess_cambrian.py b/data_prepare/sft/preprocess_cambrian.py index 5ebbe344..48ff3bc1 100644 --- a/data_prepare/sft/preprocess_cambrian.py +++ b/data_prepare/sft/preprocess_cambrian.py @@ -70,3 +70,5 @@ def check_sample(sample): with open("cambrian_doc_1275k.json", "w") as f: json.dump(cambrian_doc_1275k, f) + + diff --git a/data_prepare/sft/preprocess_cambrian_eagle.py b/data_prepare/sft/preprocess_cambrian_eagle.py index c979149d..f0444b99 100644 --- a/data_prepare/sft/preprocess_cambrian_eagle.py +++ b/data_prepare/sft/preprocess_cambrian_eagle.py @@ -75,3 +75,5 @@ def check_sample(sample): with open(os.path.join(base_path, "cambrian_adlr_train.json"), "w") as f: json.dump(cambrian_eagle, f) + + diff --git a/data_prepare/sft/preprocess_docreason.py b/data_prepare/sft/preprocess_docreason.py index 8971e64e..eb06fff4 100644 --- a/data_prepare/sft/preprocess_docreason.py +++ b/data_prepare/sft/preprocess_docreason.py @@ -29,3 +29,5 @@ with open(json_file_processed, "w") as f: json.dump(records, f) + + diff --git a/data_prepare/sft/preprocess_flan.py b/data_prepare/sft/preprocess_flan.py index efde19b3..0ddb420d 100644 --- a/data_prepare/sft/preprocess_flan.py +++ b/data_prepare/sft/preprocess_flan.py @@ -70,3 +70,5 @@ with open(os.path.join(save_path, "text_flan_1m.pkl"), "wb") as f: pickle.dump(filtered_samples, f) + + diff --git a/data_prepare/sft/preprocess_idefics2.py b/data_prepare/sft/preprocess_idefics2.py index 6657c91d..65083279 100644 --- a/data_prepare/sft/preprocess_idefics2.py +++ b/data_prepare/sft/preprocess_idefics2.py @@ -130,3 +130,5 @@ def process_dataset(args): # Map the process_dataset function to the arguments for _ in tqdm(pool.imap_unordered(process_dataset, args), total=len(args), desc="Processing datasets"): pass + + diff --git a/data_prepare/sft/preprocess_idefics2_eagle.py b/data_prepare/sft/preprocess_idefics2_eagle.py index 175d30cb..8354d35b 100644 --- a/data_prepare/sft/preprocess_idefics2_eagle.py +++ b/data_prepare/sft/preprocess_idefics2_eagle.py @@ -170,3 +170,5 @@ def main( import fire fire.Fire(main) + + diff --git a/data_prepare/sft/preprocess_kvqa.py b/data_prepare/sft/preprocess_kvqa.py index 92c07244..e024882f 100644 --- a/data_prepare/sft/preprocess_kvqa.py +++ b/data_prepare/sft/preprocess_kvqa.py @@ -45,3 +45,5 @@ json.dump(new_records, f) print(len(new_records)) + + diff --git a/data_prepare/sft/preprocess_llava_onevision.py b/data_prepare/sft/preprocess_llava_onevision.py index 8ca52fb7..1f741861 100644 --- a/data_prepare/sft/preprocess_llava_onevision.py +++ b/data_prepare/sft/preprocess_llava_onevision.py @@ -103,3 +103,5 @@ def main( import fire fire.Fire(main) + + diff --git a/data_prepare/sft/preprocess_m3it.py b/data_prepare/sft/preprocess_m3it.py index 19b16a2f..b7d954b2 100644 --- a/data_prepare/sft/preprocess_m3it.py +++ b/data_prepare/sft/preprocess_m3it.py @@ -82,3 +82,5 @@ save_filename = os.path.join(save_path, save_filename) with open(save_filename, "wb") as f: pickle.dump(dataset, f) + + diff --git a/data_prepare/sft/preprocess_metamathqa.py b/data_prepare/sft/preprocess_metamathqa.py index e3ee52e9..a5151db4 100644 --- a/data_prepare/sft/preprocess_metamathqa.py +++ b/data_prepare/sft/preprocess_metamathqa.py @@ -25,3 +25,5 @@ with open(json_file_processed, "w") as f: json.dump(records, f) + + diff --git a/data_prepare/sft/preprocess_viquae.py b/data_prepare/sft/preprocess_viquae.py index 3487dac7..6d933c50 100644 --- a/data_prepare/sft/preprocess_viquae.py +++ b/data_prepare/sft/preprocess_viquae.py @@ -45,3 +45,5 @@ def base64_to_pil_image(base64_string): with open(os.path.join(base_path, "viquae_processed.json"), "w") as f: json.dump(new_records, f) + + diff --git a/data_prepare/sft/split_vflan.py b/data_prepare/sft/split_vflan.py index 93d0715f..13a78053 100644 --- a/data_prepare/sft/split_vflan.py +++ b/data_prepare/sft/split_vflan.py @@ -57,3 +57,5 @@ print(f"Finished writing part-{counter:05d}.pkl!") counter += 1 + + diff --git a/data_prepare/sft/unichart_pretrain.py b/data_prepare/sft/unichart_pretrain.py index a544a829..47b8b3cd 100644 --- a/data_prepare/sft/unichart_pretrain.py +++ b/data_prepare/sft/unichart_pretrain.py @@ -57,3 +57,5 @@ pbar.update(1) print("Processing complete.") + + diff --git a/data_prepare/sft/unichart_sft.py b/data_prepare/sft/unichart_sft.py index 81e46c70..3bb29054 100644 --- a/data_prepare/sft/unichart_sft.py +++ b/data_prepare/sft/unichart_sft.py @@ -59,3 +59,5 @@ pbar.update(1) print("Processing complete.") + + diff --git a/llava/__init__.py b/llava/__init__.py index 37a40560..1d61b293 100755 --- a/llava/__init__.py +++ b/llava/__init__.py @@ -1,2 +1,4 @@ from .entry import * from .media import * + + diff --git a/llava/cli/eval.py b/llava/cli/eval.py index 53939abb..78997e46 100644 --- a/llava/cli/eval.py +++ b/llava/cli/eval.py @@ -198,3 +198,5 @@ def main() -> None: if __name__ == "__main__": main() + + diff --git a/llava/cli/infer.py b/llava/cli/infer.py index b45ecbdd..f4c74c01 100644 --- a/llava/cli/infer.py +++ b/llava/cli/infer.py @@ -42,3 +42,5 @@ def main() -> None: if __name__ == "__main__": main() + + diff --git a/llava/cli/run.py b/llava/cli/run.py index e2fea74a..8af114f5 100644 --- a/llava/cli/run.py +++ b/llava/cli/run.py @@ -132,3 +132,5 @@ def main() -> None: if __name__ == "__main__": main() + + diff --git a/llava/cli/upload2hf.py b/llava/cli/upload2hf.py index 5387915b..ca6d0e62 100644 --- a/llava/cli/upload2hf.py +++ b/llava/cli/upload2hf.py @@ -222,3 +222,5 @@ def main(): if __name__ == "__main__": main() + + diff --git a/llava/constants.py b/llava/constants.py index bb1ff810..84385fdf 100755 --- a/llava/constants.py +++ b/llava/constants.py @@ -30,3 +30,5 @@ "image": "", "video": "", } + + diff --git a/llava/conversation.py b/llava/conversation.py index 0d8b857a..e9572392 100755 --- a/llava/conversation.py +++ b/llava/conversation.py @@ -189,3 +189,5 @@ def auto_set_conversation_mode(model_name_or_path: str) -> str: logger.info(f"Setting conversation mode to `{v}` based on model name/path `{model_name_or_path}`.") default_conversation = conv_templates[v] return + + diff --git a/llava/data/__init__.py b/llava/data/__init__.py index 87b1dc8b..d4eadccb 100755 --- a/llava/data/__init__.py +++ b/llava/data/__init__.py @@ -3,3 +3,5 @@ from .dataset_impl import * from .datasets_mixture import * from .simple_vila_webdataset import VILAWebDataset + + diff --git a/llava/data/base.py b/llava/data/base.py index a4cbcf19..8450b744 100644 --- a/llava/data/base.py +++ b/llava/data/base.py @@ -84,3 +84,5 @@ def __getitem__(self, index: int) -> Dict[str, Any]: def __len__(self) -> int: return len(self.instances) + + diff --git a/llava/data/builder.py b/llava/data/builder.py index 54c7a05f..ae1ae967 100644 --- a/llava/data/builder.py +++ b/llava/data/builder.py @@ -213,3 +213,5 @@ def build_dataset_legacy( data_args=data_args, training_args=training_args, ) + + diff --git a/llava/data/collate.py b/llava/data/collate.py index fac3fe5f..193f5afd 100644 --- a/llava/data/collate.py +++ b/llava/data/collate.py @@ -88,3 +88,5 @@ def __call__(self, instances: Sequence[Dict[str, Any]]) -> Dict[str, Any]: "labels": labels, "attention_mask": attention_mask, } + + diff --git a/llava/data/dataset.py b/llava/data/dataset.py index 54679038..2659d735 100755 --- a/llava/data/dataset.py +++ b/llava/data/dataset.py @@ -1576,3 +1576,5 @@ def make_supervised_data_module( train_dataset=train_dataset, data_collator=data_collator, ) + + diff --git a/llava/data/dataset_impl/__init__.py b/llava/data/dataset_impl/__init__.py index d5a8bc7f..107173e2 100644 --- a/llava/data/dataset_impl/__init__.py +++ b/llava/data/dataset_impl/__init__.py @@ -2,3 +2,5 @@ from .lita import * from .llava import * from .llava_cot import * + + diff --git a/llava/data/dataset_impl/coyo_qa.py b/llava/data/dataset_impl/coyo_qa.py index 18666457..ce764fcb 100644 --- a/llava/data/dataset_impl/coyo_qa.py +++ b/llava/data/dataset_impl/coyo_qa.py @@ -203,3 +203,5 @@ def __getitem__(self, i) -> Dict[str, torch.Tensor]: data_dict["block_sizes"] = block_sizes return data_dict + + diff --git a/llava/data/dataset_impl/coyo_recap.py b/llava/data/dataset_impl/coyo_recap.py index caff3a5c..8979799c 100755 --- a/llava/data/dataset_impl/coyo_recap.py +++ b/llava/data/dataset_impl/coyo_recap.py @@ -88,3 +88,5 @@ def __init__( else: self.caption_choice = data_args.caption_choice print(f"Current caption choice: {self.caption_choice}.") + + diff --git a/llava/data/dataset_impl/dummy.py b/llava/data/dataset_impl/dummy.py index 007d569f..2173f0cc 100644 --- a/llava/data/dataset_impl/dummy.py +++ b/llava/data/dataset_impl/dummy.py @@ -92,3 +92,5 @@ def process(self, instance: Dict[str, Any]) -> List[Dict[str, Any]]: # Add media to the beginning of the first message messages[0]["value"] = medias + [messages[0]["value"]] return messages + + diff --git a/llava/data/dataset_impl/lita.py b/llava/data/dataset_impl/lita.py index 9788b397..561a55bc 100644 --- a/llava/data/dataset_impl/lita.py +++ b/llava/data/dataset_impl/lita.py @@ -241,3 +241,5 @@ def process(self, instance: Dict[str, Any]) -> List[Dict[str, Any]]: video = Video(instance["video_path"]) messages[0]["value"] = [video, messages[0]["value"]] return messages + + diff --git a/llava/data/dataset_impl/llava.py b/llava/data/dataset_impl/llava.py index c7041fa6..78b5c0c6 100644 --- a/llava/data/dataset_impl/llava.py +++ b/llava/data/dataset_impl/llava.py @@ -132,3 +132,5 @@ def process(self, instance: Dict[str, Any]) -> List[Dict[str, Any]]: new_value = [*img_list, value.replace(DEFAULT_IMAGE_TOKEN, "").strip()] messages[0]["value"] = new_value return messages + + diff --git a/llava/data/dataset_impl/llava_cot.py b/llava/data/dataset_impl/llava_cot.py index 61f7bd1d..fbf52c10 100644 --- a/llava/data/dataset_impl/llava_cot.py +++ b/llava/data/dataset_impl/llava_cot.py @@ -174,3 +174,5 @@ def process_multi_img(self, instance: Dict[str, Any], index: int) -> List[Dict[s assert len(medias) == 0, f"#Num of does not match the number of images in the instance. {instance}" return messages + + diff --git a/llava/data/dataset_impl/panda70m.py b/llava/data/dataset_impl/panda70m.py index 3d9a2dae..47123e88 100755 --- a/llava/data/dataset_impl/panda70m.py +++ b/llava/data/dataset_impl/panda70m.py @@ -229,3 +229,5 @@ def cleanup_corrupted_videos( jinfo = json.load(open(json_path)) img_t = load_video(video_path, jinfo=jinfo) print(img_t) + + diff --git a/llava/data/dataset_impl/sam.py b/llava/data/dataset_impl/sam.py index 604d8067..cc68597b 100755 --- a/llava/data/dataset_impl/sam.py +++ b/llava/data/dataset_impl/sam.py @@ -230,3 +230,5 @@ def __getitem__(self, i) -> Dict[str, torch.Tensor]: for idx, data in enumerate(dst): print(idx, data.keys()) # nvcode: off + + diff --git a/llava/data/dataset_impl/textocr.py b/llava/data/dataset_impl/textocr.py index aa37ba32..40c38b94 100755 --- a/llava/data/dataset_impl/textocr.py +++ b/llava/data/dataset_impl/textocr.py @@ -289,3 +289,5 @@ def __getitem__(self, index): for idx in range(2): pprint(dataset[idx]) + + diff --git a/llava/data/dataset_impl/utils.py b/llava/data/dataset_impl/utils.py index e3dc986f..8870ffd4 100644 --- a/llava/data/dataset_impl/utils.py +++ b/llava/data/dataset_impl/utils.py @@ -27,3 +27,5 @@ def _remove_media_tokens(text: str) -> str: for token in ["", "