OFRenamer fix and increased speed

Basically the script was moving files to the new location, but deleting it straight after. Some cloud services would get confused and start putting the remote file in the remote root folder because we deleted the local file before the cloud service could update the location. This was a bug I had for a year now and I finally found the issue and fixed it and now I have to delete thousands of duplicate images and folders whilst lagging :) You'll only notice the speed increase if scrape_names is False and scrape_paid_content is True. We skip getting the subscriber list if you're only scraping paid content. Metadata kept duplicated posts that were deleted by the model. I've fixed that. Some metadata files should decrease in size.
UltimaHoarder · Nov 23, 2020 · f24c063 · f24c063
1 parent fcbea61
commit f24c063
Show file tree

Hide file tree

Showing 5 changed files with 95 additions and 85 deletions.
diff --git a/apis/onlyfans/onlyfans.py b/apis/onlyfans/onlyfans.py
@@ -208,7 +208,7 @@ class start():
     def __init__(self, sessions=[], custom_request=callable) -> None:
         sessions = api_helper.copy_sessions(sessions)
         self.sessions = sessions
-        self.auth = None
+        self.auth = {}
         self.custom_request = custom_request
         self.auth_details = None
         self.max_threads = -1
@@ -381,7 +381,7 @@ def get_subscriptions(self, refresh=True, extra_info=True, limit=20, offset=0):
         if not refresh:
             subscriptions = authed.get(
                 "subscriptions")
-            if subscriptions:
+            if subscriptions != None:
                 return subscriptions
         link = links(global_limit=limit, global_offset=offset).subscriptions
         session = self.sessions[0]
@@ -433,6 +433,7 @@ def multi(item, session=None):
             return valid_subscriptions
         pool = api_helper.multiprocessing()
         # offset_array = api_helper.assign_session(offset_array, self.sessions,key_two="session",show_item=True)
+        # offset_array= offset_array[:16]
         results += pool.starmap(multi, product(
             offset_array, [session]))
 
@@ -566,7 +567,7 @@ def get_mass_messages(self, resume=None, refresh=True, limit=10, offset=0):
         link = links(global_limit=limit,
                      global_offset=offset).mass_messages_api
         results = self.request(link=link)
-        items = results.get("list",[])
+        items = results.get("list", [])
         if not items:
             return items
         if resume:

diff --git a/datascraper/main_datascraper.py b/datascraper/main_datascraper.py
@@ -93,6 +93,7 @@ def start_datascraper():
                 site_name = "OnlyFans"
                 subscription_array = []
                 auth_count = -1
+                jobs = json_site_settings["jobs"]
                 for json_auth in json_auth_array:
                     api = OnlyFans.start(
                         original_sessions)
@@ -108,18 +109,17 @@ def start_datascraper():
                     setup = module.account_setup(api)
                     if not setup:
                         continue
-                    jobs = json_site_settings["jobs"]
                     if jobs["scrape_names"]:
                         array = module.manage_subscriptions(api, auth_count)
                         subscription_array += array
-                    if jobs["scrape_paid_content"]:
-                        paid_contents = api.get_paid_content()
-                        paid_content = module.paid_content_scraper(api)
                     apis.append(api)
                 subscription_list = module.format_options(
                     subscription_array, "usernames")
-                x = main_helper.process_names(
-                    module, subscription_list, auto_scrape_names, json_auth_array, apis, json_config, site_name_lower, site_name)
+                if jobs["scrape_paid_content"]:
+                    paid_content = module.paid_content_scraper(apis)
+                if jobs["scrape_names"]:
+                    x = main_helper.process_names(
+                        module, subscription_list, auto_scrape_names, json_auth_array, apis, json_config, site_name_lower, site_name)
                 x = main_helper.process_downloads(apis, module)
                 print
             elif site_name_lower == "starsavn":

diff --git a/extras/OFRenamer/start.py b/extras/OFRenamer/start.py
@@ -85,7 +85,7 @@ def update(old_filepath, new_filepath):
 def start(subscription, api_type, api_path, site_name, json_settings):
     metadata = getattr(subscription.scraped, api_type)
     download_info = subscription.download_info
-    base_directory = download_info["directory"]
+    root_directory = download_info["directory"]
     date_format = json_settings["date_format"]
     text_length = json_settings["text_length"]
     reformats = {}
@@ -99,26 +99,27 @@ def start(subscription, api_type, api_path, site_name, json_settings):
     option["username"] = username
     option["date_format"] = date_format
     option["maximum_length"] = text_length
-    option["directory"] = base_directory
+    option["directory"] = root_directory
     formatted = format_types(reformats).check_unique()
     unique = formatted["unique"]
     for key, value in reformats.items():
         key2 = getattr(unique, key)[0]
         reformats[key] = value.split(key2, 1)[0]+key2
         print
     print
-    a, b, c = prepare_reformat(option, keep_vars=True).reformat(reformats)
+    a, base_directory, c = prepare_reformat(option, keep_vars=True).reformat(reformats)
+    download_info["base_directory"] = base_directory
     print
     all_files = []
-    for root, subdirs, files in os.walk(b):
+    for root, subdirs, files in os.walk(base_directory):
         x = [os.path.join(root, x) for x in files]
         all_files.extend(x)
     for media_type, value in metadata:
         if media_type == "Texts":
             continue
         for status, value2 in value:
             fixed, new_directories = fix_directories(
-                value2, base_directory, site_name, api_path, media_type, username, all_files, json_settings)
+                value2, root_directory, site_name, api_path, media_type, username, all_files, json_settings)
             for new_directory in new_directories:
                 directory = os.path.abspath(new_directory)
                 os.makedirs(directory, exist_ok=True)

diff --git a/helpers/main_helper.py b/helpers/main_helper.py
@@ -167,8 +167,9 @@ def format_image(filepath, timestamp):
             if os_name == "Windows":
                 from win32_setctime import setctime
                 setctime(filepath, timestamp)
-                print(filepath)
+                print(f"Updated Creation Time {filepath}")
             os.utime(filepath, (timestamp, timestamp))
+            print(f"Updated Modification Time {filepath}")
         except Exception as e:
             continue
         break
@@ -398,9 +399,10 @@ def update_config(json_config, file_name="config.json"):
 
 
 def choose_auth(array):
-    string = ""
     names = []
     array = [{"auth_count": -1, "username": "All"}]+array
+    string = ""
+    seperator = " | "
     name_count = len(array)
     if name_count > 1:
 
@@ -410,7 +412,7 @@ def choose_auth(array):
             string += str(count)+" = "+name
             names.append(x)
             if count+1 != name_count:
-                string += " | "
+                string += seperator
 
             count += 1
 
@@ -426,7 +428,8 @@ def choose_auth(array):
 def choose_option(subscription_list, auto_scrape_names):
     names = subscription_list[0]
     if names:
-        print("Names: Username = username | "+subscription_list[1])
+        seperator = " | "
+        print(f"Names: Username = username {seperator} {subscription_list[1]}")
         if not auto_scrape_names:
             value = "1"
             value = input().strip()
@@ -469,7 +472,7 @@ def process_downloads(apis, module):
             if download_info:
                 module.download_media(api, subscription)
                 delete_empty_directories(
-                    download_info["model_directory"])
+                    download_info["base_directory"])
                 send_webhook(subscription)
 
 
@@ -558,8 +561,13 @@ def start(directory):
         for root, dirnames, files in os.walk(directory, topdown=False):
             for dirname in dirnames:
                 full_path = os.path.realpath(os.path.join(root, dirname))
-                if not os.listdir(full_path):
-                    os.rmdir(full_path)
+                contents = os.listdir(full_path)
+                if not contents:
+                    shutil.rmtree(full_path, ignore_errors=True)
+                else:
+                    content_count = len(contents)
+                    if content_count ==1 and "desktop.ini" in contents:
+                        shutil.rmtree(full_path, ignore_errors=True)
     x = start(directory)
     if os.path.exists(directory):
         if not os.listdir(directory):
@@ -576,6 +584,7 @@ def multiprocessing():
 
 def module_chooser(domain, json_sites):
     string = "Site: "
+    seperator = " | "
     site_names = []
     wl = ["onlyfans"]
     bl = ["patreon"]
@@ -590,7 +599,7 @@ def module_chooser(domain, json_sites):
         string += str(count)+" = "+x
         site_names.append(x)
         if count+1 != site_count:
-            string += " | "
+            string += seperator
 
         count += 1
     string += "x = Exit"

diff --git a/modules/onlyfans.py b/modules/onlyfans.py
@@ -76,6 +76,7 @@ def account_setup(api):
     status = False
     auth = api.login()
     if auth:
+        jobs = json_settings["jobs"]
         profile_directory = json_global_settings["profile_directories"][0]
         profile_directory = os.path.abspath(profile_directory)
         profile_directory = os.path.join(profile_directory, auth["username"])
@@ -90,7 +91,8 @@ def account_setup(api):
             export_archive(mass_messages, metadata_filepath,
                            json_settings)
         # chats = api.get_chats()
-        subscriptions = api.get_subscriptions()
+        if jobs["scrape_names"]:
+            subscriptions = api.get_subscriptions()
         status = True
     return status
 
@@ -279,41 +281,45 @@ def profile_scraper(api, site_name, api_type, username, text_length, base_direct
             break
 
 
-def paid_content_scraper(api):
-    paid_contents = api.get_paid_content(refresh=False)
-    results = []
-    for paid_content in paid_contents:
-        metadata_locations = {}
-        author = paid_content.get("author")
-        author = paid_content.get("fromUser", author)
-        subscription = create_subscription(author)
-        subscription.sessions = api.sessions
-        subscription.download_info["directory"] = j_directory
-        username = subscription.username
-        model_directory = os.path.join(j_directory, username)
-        api_type = paid_content["responseType"].capitalize()+"s"
-        subscription.download_info["metadata_locations"] = j_directory
-        subscription.download_info["metadata_locations"] = metadata_locations
-        site_name = "OnlyFans"
-        media_type = format_media_types()
-        formatted_directories = format_directories(
-            j_directory, site_name, username, metadata_directory_format, media_type, api_type)
-        metadata_directory = formatted_directories["metadata_directory"]
-        metadata_path = os.path.join(
-            metadata_directory, api_type+".json")
-        metadata_locations[api_type] = metadata_path
-        new_metadata = media_scraper([paid_content], api,
-                                     formatted_directories, username, api_type)
-        for directory in new_metadata["directories"]:
-            os.makedirs(directory, exist_ok=True)
-        api_path = os.path.join(api_type, "")
-        new_metadata_object = process_metadata(
-            api, new_metadata, formatted_directories, subscription, api_type, api_path, metadata_path, site_name)
-        new_metadata_set = new_metadata_object.convert()
-        if export_metadata:
-            export_archive(new_metadata_set, metadata_path, json_settings)
-        download_media(api, subscription)
-    return results
+def paid_content_scraper(apis):
+    for api in apis:
+        paid_contents = api.get_paid_content(check=True)
+        authed = api.auth
+        authed["subscriptions"] = authed.get("subscriptions", [])
+        for paid_content in paid_contents:
+            author = paid_content.get("author")
+            author = paid_content.get("fromUser", author)
+            subscription = api.get_subscription(author["id"])
+            if not subscription:
+                subscription = create_subscription(author)
+                authed["subscriptions"].append(subscription)
+            api_type = paid_content["responseType"].capitalize()+"s"
+            api_media = getattr(subscription.scraped, api_type)
+            api_media.append(paid_content)
+            print
+        for subscription in authed["subscriptions"]:
+            string = f"Scraping - {subscription.username}"
+            print(string)
+            subscription.sessions = api.sessions
+            username = subscription.username
+            site_name = "OnlyFans"
+            media_type = format_media_types()
+            for api_type, paid_content in subscription.scraped:
+                formatted_directories = format_directories(
+                    j_directory, site_name, username, metadata_directory_format, media_type, api_type)
+                metadata_directory = formatted_directories["metadata_directory"]
+                metadata_path = os.path.join(
+                    metadata_directory, api_type+".json")
+                new_metadata = media_scraper(paid_content, api,
+                                             formatted_directories, username, api_type)
+                if new_metadata:
+                    api_path = os.path.join(api_type, "")
+                    new_metadata_object = process_metadata(
+                        api, new_metadata, formatted_directories, subscription, api_type, api_path, metadata_path, site_name)
+                    new_metadata_set = new_metadata_object.convert()
+                    if export_metadata:
+                        export_archive(new_metadata_set,
+                                       metadata_path, json_settings)
 
 
 def format_media_types():
@@ -477,12 +483,10 @@ def process_metadata(api, new_metadata, formatted_directories, subscription, api
     if legacy_metadata_object:
         new_metadata_object = compare_metadata(
             new_metadata_object, legacy_metadata_object)
-    if not subscription.download_info:
-        subscription.download_info["directory"] = j_directory
-        subscription.download_info["model_directory"] = os.path.join(
-            j_directory, subscription.username)
-        subscription.download_info["webhook"] = webhook
-        subscription.download_info["metadata_locations"] = {}
+        if not subscription.download_info:
+            subscription.download_info["metadata_locations"] = {}
+    subscription.download_info["directory"] = j_directory
+    subscription.download_info["webhook"] = webhook
     subscription.download_info["metadata_locations"][api_type] = archive_path
     subscription.set_scraped(api_type, new_metadata_object)
     new_metadata_object = ofrenamer.start(
@@ -670,8 +674,17 @@ def test(new_item, old_item):
     new_found = None
     if old_item.media_id == None:
         for link in old_item.links:
-            link = link.split("?")[0]
-            if any(link in new_link for new_link in new_item.links):
+            # Handle Links
+            if "?" in link:
+                link2 = link.split("?")[0]
+            elif ";ip=" in link:
+                a = urlparse(link)
+                link2 = os.path.basename(a.path)
+            else:
+                link2 = link
+                input(
+                    f"NEW LINK DETECTED, PLEASE OPEN AN ISSUE ON GITHUB AND PASTE THE NEW LINK THERE SO I CAN HANDLE THE LINK, THANKS.\nLINK: {link}")
+            if any(link2 in new_link for new_link in new_item.links):
                 new_found = new_item
                 break
             print
@@ -719,8 +732,8 @@ def compare_metadata(new_metadata: media_types, old_metadata: media_types) -> me
                 if not old_items:
                     for a in old_status:
                         new_found = test(new_item, a)
-                        print
-                        break
+                        if new_found:
+                            break
                     if not new_found:
                         old_status.append(new_item)
                         print
@@ -865,21 +878,6 @@ def media_scraper(results, api, formatted_directories, username, api_type, paren
                 filename, ext = os.path.splitext(filename)
                 ext = ext.__str__().replace(".", "").split('?')[0]
                 price = new_dict["price"]
-                # media_directory = os.path.join(
-                #     model_directory, sorted_directories["unsorted"])
-                # new_dict["paid"] = False
-                # if new_dict["price"]:
-                #     if api_type in ["Messages", "Mass Messages"]:
-                #         new_dict["paid"] = True
-                #     else:
-                #         if media["id"] not in media_api["preview"] and media["canView"]:
-                #             new_dict["paid"] = True
-                # if sort_free_paid_posts:
-                #     media_directory = os.path.join(
-                #         model_directory, sorted_directories["free"])
-                #     if new_dict["paid"]:
-                #         media_directory = os.path.join(
-                #             model_directory, sorted_directories["paid"])
                 new_dict["text"] = text
 
                 option = {}
@@ -1072,14 +1070,15 @@ def format_options(f_list, choice_type):
     count = 0
     names = []
     string = ""
+    seperator = " | "
     if name_count > 1:
         if "usernames" == choice_type:
             for x in f_list:
                 name = x.username
                 string += str(count)+" = "+name
                 names.append([x.auth_count, name])
                 if count+1 != name_count:
-                    string += " | "
+                    string += seperator
                 count += 1
         if "apis" == choice_type:
             names = f_list
@@ -1090,6 +1089,6 @@ def format_options(f_list, choice_type):
                     name = api["api_type"]
                 string += str(count)+" = "+name
                 if count+1 != name_count:
-                    string += " | "
+                    string += seperator
                 count += 1
     return [names, string]