From c2865dbab50521ec61d0b94b36950c25f93b8adf Mon Sep 17 00:00:00 2001 From: Mistaken User Date: Mon, 6 Feb 2023 10:15:19 +0000 Subject: [PATCH 1/3] requirements fix --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 47d6288..4386b52 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -sklearn +scikit-learn funcy argparse scikit-multilearn From 6dd2029023ea9ba19f8fa02bdf62c6374bb74162 Mon Sep 17 00:00:00 2001 From: UlkuTuncerKucuktas Date: Mon, 6 Feb 2023 17:20:45 +0300 Subject: [PATCH 2/3] add progress bar --- cocosplit.py | 9 +++++---- requirements.txt | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cocosplit.py b/cocosplit.py index aeb6282..463efa2 100644 --- a/cocosplit.py +++ b/cocosplit.py @@ -4,6 +4,7 @@ from sklearn.model_selection import train_test_split from skmultilearn.model_selection import iterative_train_test_split import numpy as np +from tqdm import tqdm def save_coco(file, info, licenses, images, annotations, categories): @@ -13,14 +14,14 @@ def save_coco(file, info, licenses, images, annotations, categories): def filter_annotations(annotations, images): image_ids = funcy.lmap(lambda i: int(i['id']), images) - return funcy.lfilter(lambda a: int(a['image_id']) in image_ids, annotations) - + filtered_annotations = funcy.lfilter(lambda a: int(a['image_id']) in image_ids, tqdm(annotations, desc='Filtering Annotations')) + return filtered_annotations def filter_images(images, annotations): - annotation_ids = funcy.lmap(lambda i: int(i['image_id']), annotations) + filtered_images = funcy.lfilter(lambda a: int(a['id']) in annotation_ids, tqdm(images, desc='Filtering Images')) + return filtered_images - return funcy.lfilter(lambda a: int(a['id']) in annotation_ids, images) parser = argparse.ArgumentParser(description='Splits COCO annotations file into training and test sets.') diff --git a/requirements.txt b/requirements.txt index 4386b52..40b21b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -scikit-learn +sklearn funcy argparse scikit-multilearn +tqdm \ No newline at end of file From c4024af5a204e956684545624dc9194a67155703 Mon Sep 17 00:00:00 2001 From: UlkuTuncerKucuktas Date: Mon, 6 Feb 2023 17:27:51 +0300 Subject: [PATCH 3/3] add seed so it would create same result every tun --- cocosplit.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cocosplit.py b/cocosplit.py index 463efa2..92aa76c 100644 --- a/cocosplit.py +++ b/cocosplit.py @@ -6,6 +6,7 @@ import numpy as np from tqdm import tqdm +np.random.seed(42) def save_coco(file, info, licenses, images, annotations, categories): with open(file, 'wt', encoding='UTF-8') as coco: @@ -68,7 +69,7 @@ def main(args): annotations = funcy.lremove(lambda i: i['category_id'] not in annotation_categories , annotations) - X_train, y_train, X_test, y_test = iterative_train_test_split(np.array([annotations]).T,np.array([ annotation_categories]).T, test_size = 1-args.split) + X_train, y_train, X_test, y_test = iterative_train_test_split(np.array([annotations]).T,np.array([ annotation_categories]).T, test_size = 1-args.split, random_state=42) save_coco(args.train, info, licenses, filter_images(images, X_train.reshape(-1)), X_train.reshape(-1).tolist(), categories) save_coco(args.test, info, licenses, filter_images(images, X_test.reshape(-1)), X_test.reshape(-1).tolist(), categories) @@ -77,7 +78,7 @@ def main(args): else: - X_train, X_test = train_test_split(images, train_size=args.split) + X_train, X_test = train_test_split(images, train_size=args.split, random_state=42) anns_train = filter_annotations(annotations, X_train) anns_test=filter_annotations(annotations, X_test)