Skip to content

Commit

Permalink
OFRenamer fix and increased speed
Browse files Browse the repository at this point in the history
Basically the script was moving files to the new location, but deleting it straight after.

Some cloud services would get confused and start putting the remote file in the remote root folder because we deleted the local file before the cloud service could update the location.

This was a bug I had for a year now and I finally found the issue and fixed it and now I have to delete thousands of duplicate images and folders whilst lagging :)

You'll only notice the speed increase if scrape_names is False and scrape_paid_content is True. We skip getting the subscriber list if you're only scraping paid content.

Metadata kept duplicated posts that were deleted by the model. I've fixed that. Some metadata files should decrease in size.
  • Loading branch information
UltimaHoarder committed Nov 23, 2020
1 parent fcbea61 commit f24c063
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 85 deletions.
7 changes: 4 additions & 3 deletions apis/onlyfans/onlyfans.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ class start():
def __init__(self, sessions=[], custom_request=callable) -> None:
sessions = api_helper.copy_sessions(sessions)
self.sessions = sessions
self.auth = None
self.auth = {}
self.custom_request = custom_request
self.auth_details = None
self.max_threads = -1
Expand Down Expand Up @@ -381,7 +381,7 @@ def get_subscriptions(self, refresh=True, extra_info=True, limit=20, offset=0):
if not refresh:
subscriptions = authed.get(
"subscriptions")
if subscriptions:
if subscriptions != None:
return subscriptions
link = links(global_limit=limit, global_offset=offset).subscriptions
session = self.sessions[0]
Expand Down Expand Up @@ -433,6 +433,7 @@ def multi(item, session=None):
return valid_subscriptions
pool = api_helper.multiprocessing()
# offset_array = api_helper.assign_session(offset_array, self.sessions,key_two="session",show_item=True)
# offset_array= offset_array[:16]
results += pool.starmap(multi, product(
offset_array, [session]))

Expand Down Expand Up @@ -566,7 +567,7 @@ def get_mass_messages(self, resume=None, refresh=True, limit=10, offset=0):
link = links(global_limit=limit,
global_offset=offset).mass_messages_api
results = self.request(link=link)
items = results.get("list",[])
items = results.get("list", [])
if not items:
return items
if resume:
Expand Down
12 changes: 6 additions & 6 deletions datascraper/main_datascraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def start_datascraper():
site_name = "OnlyFans"
subscription_array = []
auth_count = -1
jobs = json_site_settings["jobs"]
for json_auth in json_auth_array:
api = OnlyFans.start(
original_sessions)
Expand All @@ -108,18 +109,17 @@ def start_datascraper():
setup = module.account_setup(api)
if not setup:
continue
jobs = json_site_settings["jobs"]
if jobs["scrape_names"]:
array = module.manage_subscriptions(api, auth_count)
subscription_array += array
if jobs["scrape_paid_content"]:
paid_contents = api.get_paid_content()
paid_content = module.paid_content_scraper(api)
apis.append(api)
subscription_list = module.format_options(
subscription_array, "usernames")
x = main_helper.process_names(
module, subscription_list, auto_scrape_names, json_auth_array, apis, json_config, site_name_lower, site_name)
if jobs["scrape_paid_content"]:
paid_content = module.paid_content_scraper(apis)
if jobs["scrape_names"]:
x = main_helper.process_names(
module, subscription_list, auto_scrape_names, json_auth_array, apis, json_config, site_name_lower, site_name)
x = main_helper.process_downloads(apis, module)
print
elif site_name_lower == "starsavn":
Expand Down
11 changes: 6 additions & 5 deletions extras/OFRenamer/start.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def update(old_filepath, new_filepath):
def start(subscription, api_type, api_path, site_name, json_settings):
metadata = getattr(subscription.scraped, api_type)
download_info = subscription.download_info
base_directory = download_info["directory"]
root_directory = download_info["directory"]
date_format = json_settings["date_format"]
text_length = json_settings["text_length"]
reformats = {}
Expand All @@ -99,26 +99,27 @@ def start(subscription, api_type, api_path, site_name, json_settings):
option["username"] = username
option["date_format"] = date_format
option["maximum_length"] = text_length
option["directory"] = base_directory
option["directory"] = root_directory
formatted = format_types(reformats).check_unique()
unique = formatted["unique"]
for key, value in reformats.items():
key2 = getattr(unique, key)[0]
reformats[key] = value.split(key2, 1)[0]+key2
print
print
a, b, c = prepare_reformat(option, keep_vars=True).reformat(reformats)
a, base_directory, c = prepare_reformat(option, keep_vars=True).reformat(reformats)
download_info["base_directory"] = base_directory
print
all_files = []
for root, subdirs, files in os.walk(b):
for root, subdirs, files in os.walk(base_directory):
x = [os.path.join(root, x) for x in files]
all_files.extend(x)
for media_type, value in metadata:
if media_type == "Texts":
continue
for status, value2 in value:
fixed, new_directories = fix_directories(
value2, base_directory, site_name, api_path, media_type, username, all_files, json_settings)
value2, root_directory, site_name, api_path, media_type, username, all_files, json_settings)
for new_directory in new_directories:
directory = os.path.abspath(new_directory)
os.makedirs(directory, exist_ok=True)
Expand Down
25 changes: 17 additions & 8 deletions helpers/main_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,9 @@ def format_image(filepath, timestamp):
if os_name == "Windows":
from win32_setctime import setctime
setctime(filepath, timestamp)
print(filepath)
print(f"Updated Creation Time {filepath}")
os.utime(filepath, (timestamp, timestamp))
print(f"Updated Modification Time {filepath}")
except Exception as e:
continue
break
Expand Down Expand Up @@ -398,9 +399,10 @@ def update_config(json_config, file_name="config.json"):


def choose_auth(array):
string = ""
names = []
array = [{"auth_count": -1, "username": "All"}]+array
string = ""
seperator = " | "
name_count = len(array)
if name_count > 1:

Expand All @@ -410,7 +412,7 @@ def choose_auth(array):
string += str(count)+" = "+name
names.append(x)
if count+1 != name_count:
string += " | "
string += seperator

count += 1

Expand All @@ -426,7 +428,8 @@ def choose_auth(array):
def choose_option(subscription_list, auto_scrape_names):
names = subscription_list[0]
if names:
print("Names: Username = username | "+subscription_list[1])
seperator = " | "
print(f"Names: Username = username {seperator} {subscription_list[1]}")
if not auto_scrape_names:
value = "1"
value = input().strip()
Expand Down Expand Up @@ -469,7 +472,7 @@ def process_downloads(apis, module):
if download_info:
module.download_media(api, subscription)
delete_empty_directories(
download_info["model_directory"])
download_info["base_directory"])
send_webhook(subscription)


Expand Down Expand Up @@ -558,8 +561,13 @@ def start(directory):
for root, dirnames, files in os.walk(directory, topdown=False):
for dirname in dirnames:
full_path = os.path.realpath(os.path.join(root, dirname))
if not os.listdir(full_path):
os.rmdir(full_path)
contents = os.listdir(full_path)
if not contents:
shutil.rmtree(full_path, ignore_errors=True)
else:
content_count = len(contents)
if content_count ==1 and "desktop.ini" in contents:
shutil.rmtree(full_path, ignore_errors=True)
x = start(directory)
if os.path.exists(directory):
if not os.listdir(directory):
Expand All @@ -576,6 +584,7 @@ def multiprocessing():

def module_chooser(domain, json_sites):
string = "Site: "
seperator = " | "
site_names = []
wl = ["onlyfans"]
bl = ["patreon"]
Expand All @@ -590,7 +599,7 @@ def module_chooser(domain, json_sites):
string += str(count)+" = "+x
site_names.append(x)
if count+1 != site_count:
string += " | "
string += seperator

count += 1
string += "x = Exit"
Expand Down
125 changes: 62 additions & 63 deletions modules/onlyfans.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def account_setup(api):
status = False
auth = api.login()
if auth:
jobs = json_settings["jobs"]
profile_directory = json_global_settings["profile_directories"][0]
profile_directory = os.path.abspath(profile_directory)
profile_directory = os.path.join(profile_directory, auth["username"])
Expand All @@ -90,7 +91,8 @@ def account_setup(api):
export_archive(mass_messages, metadata_filepath,
json_settings)
# chats = api.get_chats()
subscriptions = api.get_subscriptions()
if jobs["scrape_names"]:
subscriptions = api.get_subscriptions()
status = True
return status

Expand Down Expand Up @@ -279,41 +281,45 @@ def profile_scraper(api, site_name, api_type, username, text_length, base_direct
break


def paid_content_scraper(api):
paid_contents = api.get_paid_content(refresh=False)
results = []
for paid_content in paid_contents:
metadata_locations = {}
author = paid_content.get("author")
author = paid_content.get("fromUser", author)
subscription = create_subscription(author)
subscription.sessions = api.sessions
subscription.download_info["directory"] = j_directory
username = subscription.username
model_directory = os.path.join(j_directory, username)
api_type = paid_content["responseType"].capitalize()+"s"
subscription.download_info["metadata_locations"] = j_directory
subscription.download_info["metadata_locations"] = metadata_locations
site_name = "OnlyFans"
media_type = format_media_types()
formatted_directories = format_directories(
j_directory, site_name, username, metadata_directory_format, media_type, api_type)
metadata_directory = formatted_directories["metadata_directory"]
metadata_path = os.path.join(
metadata_directory, api_type+".json")
metadata_locations[api_type] = metadata_path
new_metadata = media_scraper([paid_content], api,
formatted_directories, username, api_type)
for directory in new_metadata["directories"]:
os.makedirs(directory, exist_ok=True)
api_path = os.path.join(api_type, "")
new_metadata_object = process_metadata(
api, new_metadata, formatted_directories, subscription, api_type, api_path, metadata_path, site_name)
new_metadata_set = new_metadata_object.convert()
if export_metadata:
export_archive(new_metadata_set, metadata_path, json_settings)
download_media(api, subscription)
return results
def paid_content_scraper(apis):
for api in apis:
paid_contents = api.get_paid_content(check=True)
authed = api.auth
authed["subscriptions"] = authed.get("subscriptions", [])
for paid_content in paid_contents:
author = paid_content.get("author")
author = paid_content.get("fromUser", author)
subscription = api.get_subscription(author["id"])
if not subscription:
subscription = create_subscription(author)
authed["subscriptions"].append(subscription)
api_type = paid_content["responseType"].capitalize()+"s"
api_media = getattr(subscription.scraped, api_type)
api_media.append(paid_content)
print
for subscription in authed["subscriptions"]:
string = f"Scraping - {subscription.username}"
print(string)
subscription.sessions = api.sessions
username = subscription.username
site_name = "OnlyFans"
media_type = format_media_types()
for api_type, paid_content in subscription.scraped:
formatted_directories = format_directories(
j_directory, site_name, username, metadata_directory_format, media_type, api_type)
metadata_directory = formatted_directories["metadata_directory"]
metadata_path = os.path.join(
metadata_directory, api_type+".json")
new_metadata = media_scraper(paid_content, api,
formatted_directories, username, api_type)
if new_metadata:
api_path = os.path.join(api_type, "")
new_metadata_object = process_metadata(
api, new_metadata, formatted_directories, subscription, api_type, api_path, metadata_path, site_name)
new_metadata_set = new_metadata_object.convert()
if export_metadata:
export_archive(new_metadata_set,
metadata_path, json_settings)


def format_media_types():
Expand Down Expand Up @@ -477,12 +483,10 @@ def process_metadata(api, new_metadata, formatted_directories, subscription, api
if legacy_metadata_object:
new_metadata_object = compare_metadata(
new_metadata_object, legacy_metadata_object)
if not subscription.download_info:
subscription.download_info["directory"] = j_directory
subscription.download_info["model_directory"] = os.path.join(
j_directory, subscription.username)
subscription.download_info["webhook"] = webhook
subscription.download_info["metadata_locations"] = {}
if not subscription.download_info:
subscription.download_info["metadata_locations"] = {}
subscription.download_info["directory"] = j_directory
subscription.download_info["webhook"] = webhook
subscription.download_info["metadata_locations"][api_type] = archive_path
subscription.set_scraped(api_type, new_metadata_object)
new_metadata_object = ofrenamer.start(
Expand Down Expand Up @@ -670,8 +674,17 @@ def test(new_item, old_item):
new_found = None
if old_item.media_id == None:
for link in old_item.links:
link = link.split("?")[0]
if any(link in new_link for new_link in new_item.links):
# Handle Links
if "?" in link:
link2 = link.split("?")[0]
elif ";ip=" in link:
a = urlparse(link)
link2 = os.path.basename(a.path)
else:
link2 = link
input(
f"NEW LINK DETECTED, PLEASE OPEN AN ISSUE ON GITHUB AND PASTE THE NEW LINK THERE SO I CAN HANDLE THE LINK, THANKS.\nLINK: {link}")
if any(link2 in new_link for new_link in new_item.links):
new_found = new_item
break
print
Expand Down Expand Up @@ -719,8 +732,8 @@ def compare_metadata(new_metadata: media_types, old_metadata: media_types) -> me
if not old_items:
for a in old_status:
new_found = test(new_item, a)
print
break
if new_found:
break
if not new_found:
old_status.append(new_item)
print
Expand Down Expand Up @@ -865,21 +878,6 @@ def media_scraper(results, api, formatted_directories, username, api_type, paren
filename, ext = os.path.splitext(filename)
ext = ext.__str__().replace(".", "").split('?')[0]
price = new_dict["price"]
# media_directory = os.path.join(
# model_directory, sorted_directories["unsorted"])
# new_dict["paid"] = False
# if new_dict["price"]:
# if api_type in ["Messages", "Mass Messages"]:
# new_dict["paid"] = True
# else:
# if media["id"] not in media_api["preview"] and media["canView"]:
# new_dict["paid"] = True
# if sort_free_paid_posts:
# media_directory = os.path.join(
# model_directory, sorted_directories["free"])
# if new_dict["paid"]:
# media_directory = os.path.join(
# model_directory, sorted_directories["paid"])
new_dict["text"] = text

option = {}
Expand Down Expand Up @@ -1072,14 +1070,15 @@ def format_options(f_list, choice_type):
count = 0
names = []
string = ""
seperator = " | "
if name_count > 1:
if "usernames" == choice_type:
for x in f_list:
name = x.username
string += str(count)+" = "+name
names.append([x.auth_count, name])
if count+1 != name_count:
string += " | "
string += seperator
count += 1
if "apis" == choice_type:
names = f_list
Expand All @@ -1090,6 +1089,6 @@ def format_options(f_list, choice_type):
name = api["api_type"]
string += str(count)+" = "+name
if count+1 != name_count:
string += " | "
string += seperator
count += 1
return [names, string]

0 comments on commit f24c063

Please sign in to comment.