Skip to content

Commit

Permalink
Added max_threads to config
Browse files Browse the repository at this point in the history
Can choose between 1 to X amount of threads

Any int lower than 1 is max threads (0,-1,etc)

This is also useful if your proxy only allows x amount of connections.
  • Loading branch information
UltimaHoarder committed Oct 11, 2020
1 parent 8acd4f7 commit 32aeb86
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 111 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,12 @@ Open:

"%m-%d-%Y"

[#multithreading](README.md#multithreading):
[#max_threads](README.md#max_threads):

Default = true
Default = -1

If set to false, you will download files 1 by 1. (If you don't have fast internet, may god help you.)
I'd reccomend leaving it set to true.
When number is set below 1, it will use all threads.
Set a number higher than 0 to limit threads.

[#min_drive_space](README.md#min_drive_space):

Expand Down
6 changes: 4 additions & 2 deletions classes/make_settings.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
class config(object):
def __init__(self, settings={}, supported={}):
class Settings(object):
def __init__(self, auto_site_choice="", export_type="json", multithreading=True, min_drive_space=0, webhooks=[], exit_on_completion=False, infinite_loop=True, loop_timeout="0", socks5_proxy=[], cert="", global_user_agent=""):
def __init__(self, auto_site_choice="", export_type="json", max_threads=-1, min_drive_space=0, webhooks=[], exit_on_completion=False, infinite_loop=True, loop_timeout="0", socks5_proxy=[], cert="", global_user_agent=""):
self.auto_site_choice = auto_site_choice
self.export_type = export_type
self.multithreading = multithreading
self.max_threads = max_threads
self.min_drive_space = min_drive_space
self.webhooks = webhooks
self.exit_on_completion = exit_on_completion
Expand Down Expand Up @@ -245,6 +245,8 @@ def __init__(self, option={}):
self.extra_auth = option.get('extra_auth', False)
self.choose_auth = option.get('choose_auth', False)
self.merge_auth = option.get('merge_auth', False)
if "multithreading" in settings:
settings.pop("multithreading")
self.settings = Settings(**settings)
self.supported = Supported(**supported)

Expand Down
6 changes: 4 additions & 2 deletions extras/OFRenamer/start.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def start(post, metadata_categories):
metadata_categories2 = metadata_categories
meta_categories = list(os.path.split(metadata_categories2))
q = main_helper.find_between(
model_folder, *meta_categories).replace(os.sep,"")
model_folder, *meta_categories).replace(os.sep, "")
meta_categories.insert(-1, q)
categories = os.path.join(*meta_categories)
file_directory_formatted = model.directory.split(categories)
Expand Down Expand Up @@ -94,7 +94,9 @@ def update(filepath):
folder = reformat.directory
continue
last_path = folder.split(
username+os.sep)[1]
username+os.sep)
last_path = last_path[1] if len(
last_path) > 1 else last_path[0]
last_path = last_path.replace(file_directory_formatted, "")
directory = main_helper.get_directory(
download_path, site_name)
Expand Down
13 changes: 12 additions & 1 deletion helpers/main_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import socket
import psutil
import shutil
from multiprocessing.dummy import Pool as ThreadPool

import requests
from bs4 import BeautifulSoup
Expand All @@ -34,6 +35,7 @@
json_global_settings = None
min_drive_space = 0
webhooks = None
max_threads = -1
os_name = platform.system()


Expand All @@ -58,12 +60,13 @@ def setup_logger(name, log_file, level=logging.INFO):


def assign_vars(config):
global json_global_settings, min_drive_space, webhooks
global json_global_settings, min_drive_space, webhooks, max_threads

json_config = config
json_global_settings = json_config["settings"]
min_drive_space = json_global_settings["min_drive_space"]
webhooks = json_global_settings["webhooks"]
max_threads = json_global_settings["max_threads"]


def rename_duplicates(seen, filename):
Expand Down Expand Up @@ -613,3 +616,11 @@ def start(directory):
if os.path.exists(directory):
if not os.listdir(directory):
os.rmdir(directory)


def multiprocessing():
if max_threads < 1:
pool = ThreadPool()
else:
pool = ThreadPool(max_threads)
return pool
41 changes: 19 additions & 22 deletions modules/bbwchan.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@
import os
import json
from itertools import product
import multiprocessing
from multiprocessing.dummy import Pool as ThreadPool
from datetime import datetime
import classes.prepare_download as prepare_download
from types import SimpleNamespace
from multiprocessing import cpu_count

multiprocessing = main_helper.multiprocessing
log_download = main_helper.setup_logger('downloads', 'downloads.log')

json_config = None
json_global_settings = None
multithreading = None
max_threads = -1
json_settings = None
auto_choice = None
j_directory = None
Expand All @@ -26,19 +28,17 @@
maximum_length = None
webhook = None

max_threads = multiprocessing.cpu_count()
log_download = main_helper.setup_logger('downloads', 'downloads.log')


def assign_vars(config, site_settings, site_name):
global json_config, multithreading, json_settings, auto_choice, j_directory, overwrite_files, date_format, file_directory_format,file_name_format, boards, ignored_keywords, webhook, maximum_length
global json_config, max_threads, json_settings, auto_choice, j_directory, overwrite_files, date_format, file_directory_format, file_name_format, boards, ignored_keywords, webhook, maximum_length

json_config = config
json_global_settings = json_config["settings"]
multithreading = json_global_settings["multithreading"]
max_threads = json_global_settings["max_threads"]
json_settings = site_settings
auto_choice = json_settings["auto_choice"]
j_directory = main_helper.get_directory(json_settings['download_paths'], site_name)
j_directory = main_helper.get_directory(
json_settings['download_paths'], site_name)
file_directory_format = json_settings["file_directory_format"]
file_name_format = json_settings["file_name_format"]
overwrite_files = json_settings["overwrite_files"]
Expand All @@ -59,10 +59,7 @@ def start_datascraper(session, board_name, site_name, link_type, choice_type=Non

print("Board: " + board_name)
array = scrape_choice(board_name)
if multithreading:
pool = ThreadPool(max_threads)
else:
pool = ThreadPool(1)
pool = multiprocessing()
threads = board_scraper(session, array[0], "")
archive_threads = []
threads = threads + archive_threads
Expand Down Expand Up @@ -171,7 +168,7 @@ def thread_scraper(thread_id, board_name, session, directory):
if not text:
new_directory = new_directory.replace(" - ", "")
file_path = main_helper.reformat(new_directory, None, None, file_name,
text, ext, date_object, post["name"], file_directory_format, file_name_format, date_format, maximum_length)
text, ext, date_object, post["name"], file_directory_format, file_name_format, date_format, maximum_length)
media["download_path"] = file_path
found = True
if found:
Expand All @@ -198,7 +195,8 @@ def download(thread, session, directory):
if "download_path" not in media:
continue
link = "https://bbw-chan.nl" + media["path"]
r = main_helper.json_request(session, link, "HEAD", True, False)
r = main_helper.json_request(
session, link, "HEAD", True, False)
if not isinstance(r, requests.Response):
return_bool = False
count += 1
Expand All @@ -213,7 +211,8 @@ def download(thread, session, directory):
if main_helper.check_for_dupe_file(download_path, content_length):
return_bool = False
break
r = main_helper.json_request(session, link, "GET", True, False)
r = main_helper.json_request(
session, link, "GET", True, False)
if not isinstance(r, requests.Response):
return_bool = False
count += 1
Expand Down Expand Up @@ -251,19 +250,17 @@ def download(thread, session, directory):
string += "Name: "+board_name+"\n"
string += "Directory: " + directory+"\n"
print(string)
if multithreading:
pool = ThreadPool()
else:
pool = ThreadPool(1)
pool = multiprocessing()
pool.starmap(download, product(media_set, [session], [directory]))


def create_session():
session = requests.Session()
max_threads2 = cpu_count()
session.mount(
'http://', HTTPAdapter(pool_connections=max_threads, pool_maxsize=max_threads))
'http://', HTTPAdapter(pool_connections=max_threads2, pool_maxsize=max_threads2))
session.mount(
'https://', HTTPAdapter(pool_connections=max_threads, pool_maxsize=max_threads))
'https://', HTTPAdapter(pool_connections=max_threads2, pool_maxsize=max_threads2))
print("Welcome Anon")
option_string = "board or thread link"
array = dict()
Expand Down
41 changes: 19 additions & 22 deletions modules/fourchan.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,18 @@
import os
import json
from itertools import product
import multiprocessing
from multiprocessing.dummy import Pool as ThreadPool
from datetime import datetime
import random
import classes.prepare_download as prepare_download
from multiprocessing import cpu_count

multiprocessing = main_helper.multiprocessing
log_download = main_helper.setup_logger('downloads', 'downloads.log')

# Open config.json and fill in OPTIONAL information
json_config = None
json_global_settings = None
multithreading = None
max_threads = -1
json_settings = None
auto_choice = None
j_directory = None
Expand All @@ -27,19 +29,17 @@
maximum_length = None
webhook = None

max_threads = multiprocessing.cpu_count()
log_download = main_helper.setup_logger('downloads', 'downloads.log')


def assign_vars(config, site_settings, site_name):
global json_config, multithreading, json_settings, auto_choice, j_directory, overwrite_files, date_format, file_directory_format,file_name_format, boards, ignored_keywords, webhook, maximum_length
global json_config, max_threads, json_settings, auto_choice, j_directory, overwrite_files, date_format, file_directory_format, file_name_format, boards, ignored_keywords, webhook, maximum_length

json_config = config
json_global_settings = json_config["settings"]
multithreading = json_global_settings["multithreading"]
max_threads = json_global_settings["max_threads"]
json_settings = site_settings
auto_choice = json_settings["auto_choice"]
j_directory = main_helper.get_directory(json_settings['download_paths'], site_name)
j_directory = main_helper.get_directory(
json_settings['download_paths'], site_name)
file_directory_format = json_settings["file_directory_format"]
file_name_format = json_settings["file_name_format"]
overwrite_files = json_settings["overwrite_files"]
Expand All @@ -59,10 +59,7 @@ def start_datascraper(session, board_name, site_name, link_type, choice_type=Non
return [False, info]
print("Board: " + board_name)
array = scrape_choice(board_name)
if multithreading:
pool = ThreadPool()
else:
pool = ThreadPool(1)
pool = multiprocessing()
threads = board_scraper(session, array[0], "")
archive_threads = board_scraper(session, array[1], "archive")
threads = threads + archive_threads
Expand Down Expand Up @@ -174,7 +171,7 @@ def thread_scraper(thread_id, board_name, session, directory):
new_directory = new_directory.replace(" - ", "")
date_object = datetime.fromtimestamp(post["time"])
file_path = main_helper.reformat(new_directory, None, None, file_name,
text, ext, date_object, post["name"], file_directory_format, file_name_format, date_format, maximum_length)
text, ext, date_object, post["name"], file_directory_format, file_name_format, date_format, maximum_length)
post["download_path"] = file_path
found = True
if found:
Expand Down Expand Up @@ -202,7 +199,8 @@ def download(thread, session, directory):
ext = media["ext"].replace(".", "")
filename = str(media["tim"])+"."+ext
link = "http://i.4cdn.org/" + board_name + "/" + filename
r = main_helper.json_request(session, link, "HEAD", True, False)
r = main_helper.json_request(
session, link, "HEAD", True, False)
if not isinstance(r, requests.Response):
return_bool = False
count += 1
Expand Down Expand Up @@ -240,7 +238,8 @@ def download(thread, session, directory):
except Exception as e:
if delete:
os.unlink(download_path)
main_helper.log_error.exception(str(e) + "\n Tries: "+str(count))
main_helper.log_error.exception(
str(e) + "\n Tries: "+str(count))
count += 1
continue
main_helper.format_image(download_path, timestamp)
Expand All @@ -252,20 +251,18 @@ def download(thread, session, directory):
string += "Name: "+board_name+"\n"
string += "Directory: " + directory+"\n"
print(string)
if multithreading:
pool = ThreadPool()
else:
pool = ThreadPool(1)
pool = multiprocessing()
os.makedirs(directory, exist_ok=True)
pool.starmap(download, product(media_set, [session], [directory]))


def create_session():
session = requests.Session()
max_threads2 = cpu_count()
session.mount(
'http://', HTTPAdapter(pool_connections=max_threads, pool_maxsize=max_threads))
'http://', HTTPAdapter(pool_connections=max_threads2, pool_maxsize=max_threads2))
session.mount(
'https://', HTTPAdapter(pool_connections=max_threads, pool_maxsize=max_threads))
'https://', HTTPAdapter(pool_connections=max_threads2, pool_maxsize=max_threads2))
print("Welcome Anon")
option_string = "board or thread link"
array = dict()
Expand Down
Loading

0 comments on commit 32aeb86

Please sign in to comment.