Skip to content

Commit

Permalink
Cosmos Nemotron release
Browse files Browse the repository at this point in the history
  • Loading branch information
Lyken17 committed Jan 7, 2025
1 parent 37ae801 commit 33b4f1a
Show file tree
Hide file tree
Showing 284 changed files with 568 additions and 0 deletions.
2 changes: 2 additions & 0 deletions data_prepare/coyo/coyo_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,3 +136,5 @@ async def main(data_list):


asyncio.run(main(metadata_list))


2 changes: 2 additions & 0 deletions data_prepare/coyo/coyo_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,5 @@
f.write(str(len(samples2write)))

counter += 1


2 changes: 2 additions & 0 deletions data_prepare/mmc4/mmc4_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,5 @@ async def main(data_list):


asyncio.run(main(all_data))


2 changes: 2 additions & 0 deletions data_prepare/mmc4/mmc4_filter_and_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,5 @@

with open(os.path.join(output_path, pkl.replace(".pkl", ".count")), "w") as f:
f.write(str(len(filtered_annotation)))


2 changes: 2 additions & 0 deletions data_prepare/mmc4/mmc4_merger.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,5 @@

with open(os.path.join(output_path, shard_name.replace(".jsonl", ".pkl")), "wb") as f:
pickle.dump(data_list, f)


2 changes: 2 additions & 0 deletions data_prepare/panda_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,5 @@ def split_video_to_clips(
import fire

fire.Fire(split_video_to_clips)


2 changes: 2 additions & 0 deletions data_prepare/sft/ART1_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s
jsonl_file.write("\n") # Add a newline after each JSON object

print("Processing complete.")


2 changes: 2 additions & 0 deletions data_prepare/sft/ESTVQA.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,5 @@ def is_english(text):
jsonl_file.write("\n")

print("Processing complete.")


2 changes: 2 additions & 0 deletions data_prepare/sft/LSVT.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s
jsonl_file.write("\n") # Add a newline after each JSON object

print("Processing complete.")


2 changes: 2 additions & 0 deletions data_prepare/sft/POIE.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s
jsonl_file.write("\n") # Add a newline after each JSON object

print("Processing complete.")


2 changes: 2 additions & 0 deletions data_prepare/sft/ReCTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,5 @@ def coords_list2bbox(coords_list: List[List[int]], width: int, height: int) -> s
jsonl_file.write("\n") # Add a newline after each JSON object

print("Processing complete.")


2 changes: 2 additions & 0 deletions data_prepare/sft/SROIE.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,5 @@

# Now 'images' contains all the opened images from the image_root directory
print(f"Successfully opened {len(images)} out of {len(image_files)} images.")


2 changes: 2 additions & 0 deletions data_prepare/sft/merge_idefics2.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,5 @@ def load_jsonl(file_path):
for item in all_data:
json.dump(item, f)
f.write("\n")


2 changes: 2 additions & 0 deletions data_prepare/sft/merge_llava_onevision.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,5 @@ def load_jsonl(file_path):
import fire

fire.Fire(main)


2 changes: 2 additions & 0 deletions data_prepare/sft/merge_llava_onevision_eagle.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,5 @@ def load_jsonl(file_path):
import fire

fire.Fire(main)


2 changes: 2 additions & 0 deletions data_prepare/sft/mtwi.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,5 @@ def clip(x):
jsonl_file.write("\n") # Add a newline after each JSON object

print("Processing complete.")


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_art_shangy.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,5 @@ def convert_txt_to_jsonl(input_file, output_file):
output_file = "./art500k_processed.jsonl"
base_path = "./"
convert_txt_to_jsonl(input_file, output_file)


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_cambrian.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,5 @@ def check_sample(sample):

with open("cambrian_doc_1275k.json", "w") as f:
json.dump(cambrian_doc_1275k, f)


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_cambrian_eagle.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,5 @@ def check_sample(sample):

with open(os.path.join(base_path, "cambrian_adlr_train.json"), "w") as f:
json.dump(cambrian_eagle, f)


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_docreason.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,5 @@

with open(json_file_processed, "w") as f:
json.dump(records, f)


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_flan.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,5 @@

with open(os.path.join(save_path, "text_flan_1m.pkl"), "wb") as f:
pickle.dump(filtered_samples, f)


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_idefics2.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,5 @@ def process_dataset(args):
# Map the process_dataset function to the arguments
for _ in tqdm(pool.imap_unordered(process_dataset, args), total=len(args), desc="Processing datasets"):
pass


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_idefics2_eagle.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,5 @@ def main(
import fire

fire.Fire(main)


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_kvqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,5 @@
json.dump(new_records, f)

print(len(new_records))


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_llava_onevision.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,5 @@ def main(
import fire

fire.Fire(main)


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_m3it.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,5 @@
save_filename = os.path.join(save_path, save_filename)
with open(save_filename, "wb") as f:
pickle.dump(dataset, f)


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_metamathqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,5 @@

with open(json_file_processed, "w") as f:
json.dump(records, f)


2 changes: 2 additions & 0 deletions data_prepare/sft/preprocess_viquae.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,5 @@ def base64_to_pil_image(base64_string):

with open(os.path.join(base_path, "viquae_processed.json"), "w") as f:
json.dump(new_records, f)


2 changes: 2 additions & 0 deletions data_prepare/sft/split_vflan.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,5 @@
print(f"Finished writing part-{counter:05d}.pkl!")

counter += 1


2 changes: 2 additions & 0 deletions data_prepare/sft/unichart_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,5 @@
pbar.update(1)

print("Processing complete.")


2 changes: 2 additions & 0 deletions data_prepare/sft/unichart_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,5 @@
pbar.update(1)

print("Processing complete.")


2 changes: 2 additions & 0 deletions llava/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
from .entry import *
from .media import *


2 changes: 2 additions & 0 deletions llava/cli/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,5 @@ def main() -> None:

if __name__ == "__main__":
main()


2 changes: 2 additions & 0 deletions llava/cli/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,5 @@ def main() -> None:

if __name__ == "__main__":
main()


2 changes: 2 additions & 0 deletions llava/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,5 @@ def main() -> None:

if __name__ == "__main__":
main()


2 changes: 2 additions & 0 deletions llava/cli/upload2hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,5 @@ def main():

if __name__ == "__main__":
main()


2 changes: 2 additions & 0 deletions llava/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,5 @@
"image": "<image>",
"video": "<vila/video>",
}


2 changes: 2 additions & 0 deletions llava/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,5 @@ def auto_set_conversation_mode(model_name_or_path: str) -> str:
logger.info(f"Setting conversation mode to `{v}` based on model name/path `{model_name_or_path}`.")
default_conversation = conv_templates[v]
return


2 changes: 2 additions & 0 deletions llava/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
from .dataset_impl import *
from .datasets_mixture import *
from .simple_vila_webdataset import VILAWebDataset


2 changes: 2 additions & 0 deletions llava/data/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,5 @@ def __getitem__(self, index: int) -> Dict[str, Any]:

def __len__(self) -> int:
return len(self.instances)


2 changes: 2 additions & 0 deletions llava/data/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,3 +213,5 @@ def build_dataset_legacy(
data_args=data_args,
training_args=training_args,
)


2 changes: 2 additions & 0 deletions llava/data/collate.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,5 @@ def __call__(self, instances: Sequence[Dict[str, Any]]) -> Dict[str, Any]:
"labels": labels,
"attention_mask": attention_mask,
}


2 changes: 2 additions & 0 deletions llava/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1576,3 +1576,5 @@ def make_supervised_data_module(
train_dataset=train_dataset,
data_collator=data_collator,
)


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
from .lita import *
from .llava import *
from .llava_cot import *


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/coyo_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,3 +203,5 @@ def __getitem__(self, i) -> Dict[str, torch.Tensor]:
data_dict["block_sizes"] = block_sizes

return data_dict


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/coyo_recap.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,5 @@ def __init__(
else:
self.caption_choice = data_args.caption_choice
print(f"Current caption choice: {self.caption_choice}.")


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,5 @@ def process(self, instance: Dict[str, Any]) -> List[Dict[str, Any]]:
# Add media to the beginning of the first message
messages[0]["value"] = medias + [messages[0]["value"]]
return messages


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/lita.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,5 @@ def process(self, instance: Dict[str, Any]) -> List[Dict[str, Any]]:
video = Video(instance["video_path"])
messages[0]["value"] = [video, messages[0]["value"]]
return messages


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,5 @@ def process(self, instance: Dict[str, Any]) -> List[Dict[str, Any]]:
new_value = [*img_list, value.replace(DEFAULT_IMAGE_TOKEN, "").strip()]
messages[0]["value"] = new_value
return messages


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/llava_cot.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,5 @@ def process_multi_img(self, instance: Dict[str, Any], index: int) -> List[Dict[s
assert len(medias) == 0, f"#Num of <images> does not match the number of images in the instance. {instance}"

return messages


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/panda70m.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,3 +229,5 @@ def cleanup_corrupted_videos(
jinfo = json.load(open(json_path))
img_t = load_video(video_path, jinfo=jinfo)
print(img_t)


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,3 +230,5 @@ def __getitem__(self, i) -> Dict[str, torch.Tensor]:
for idx, data in enumerate(dst):
print(idx, data.keys())
# nvcode: off


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/textocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,5 @@ def __getitem__(self, index):

for idx in range(2):
pprint(dataset[idx])


2 changes: 2 additions & 0 deletions llava/data/dataset_impl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ def _remove_media_tokens(text: str) -> str:
for token in ["<image>", "<video>"]:
text = text.replace(token + "\n", "").replace("\n" + token, "").replace(token, "")
return text.strip()


2 changes: 2 additions & 0 deletions llava/data/datasets_mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,5 @@ def add_dataset(dataset):

def register_datasets_mixtures():
pass


2 changes: 2 additions & 0 deletions llava/data/simple_vila_webdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,5 @@ def merge(a: dict, b: dict, path=[], strict=False):

# if idx >= 5:
# break


2 changes: 2 additions & 0 deletions llava/data/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@ def from_bytesio(cls, file_path: str, decode_audio: bool = True, decoder: str =
print(f"unsupported type {type(file_path)}")
video_cls = select_video_class(decoder)
return video_cls(video_file, pathlib.Path(file_path).name, decode_audio)


2 changes: 2 additions & 0 deletions llava/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,5 @@ def load(

model = load_pretrained_model(model_path, model_name, model_base, **kwargs)[1]
return model


2 changes: 2 additions & 0 deletions llava/eval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@

EVAL_ROOT = "scripts/eval"
TASKS = io.load(os.path.join(os.path.dirname(__file__), "registry.yaml"))


2 changes: 2 additions & 0 deletions llava/eval/cinepile.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,5 @@ def main() -> None:

if __name__ == "__main__":
main()


2 changes: 2 additions & 0 deletions llava/eval/egoschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,5 @@ def main() -> None:

if __name__ == "__main__":
main()


2 changes: 2 additions & 0 deletions llava/eval/eval_refcoco.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,5 @@ def computeIoU(bbox1, bbox2):
except Exception as e:
print(e, flush=True)
continue


2 changes: 2 additions & 0 deletions llava/eval/eventbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,5 @@ def main() -> None:

if __name__ == "__main__":
main()


2 changes: 2 additions & 0 deletions llava/eval/lmms/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
AVAILABLE_MODELS = {
"vila_internal": "VILA",
}


2 changes: 2 additions & 0 deletions llava/eval/lmms/models/vila_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,5 @@ def generate_until_multi_round(self, requests: List[Instance]) -> List[str]:

def loglikelihood(self, requests: List[Instance]) -> List[Tuple[float, bool]]:
raise NotImplementedError


2 changes: 2 additions & 0 deletions llava/eval/lmms/tasks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@


2 changes: 2 additions & 0 deletions llava/eval/lmms/tasks/videomme.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,5 @@ def videomme_doc_to_text_subtitle(doc: Dict[str, Any], num_frames: int) -> str:
prompt += "\n".join(doc["options"]) + "\n"
prompt += "The best answer is:"
return prompt


Loading

0 comments on commit 33b4f1a

Please sign in to comment.