Skip to content

Commit

Permalink
Export to CSV
Browse files Browse the repository at this point in the history
Added the option to export to CSV.
Check the readme for the "export_type" options
Update config :^)
  • Loading branch information
SecretShell committed Oct 21, 2019
1 parent efa8c66 commit 433128f
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 13 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,15 @@ auto_choice:
c = Videos

You can automatically choose what you want to scrape if you add it in the config file.

|**NEW**| export_type:

Default = "json"

a = "json"
b = "csv"

You can export an archive to different formats.

overwrite_files:

Expand Down
1 change: 1 addition & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"settings": {
"auto_site_choice": "",
"auto_choice": "",
"export_type": "json",
"multithreading": true,
"user-agent": ""
},
Expand Down
2 changes: 1 addition & 1 deletion modules/four_chan.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import requests
from bs4 import BeautifulSoup
from modules.helpers import reformat
from modules.helpers import *

import os
import json
Expand Down
30 changes: 29 additions & 1 deletion modules/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@
import os
from bs4 import BeautifulSoup
import platform
import csv
import itertools
import json


# Open config.json and fill in OPTIONAL information
json_config = json.load(open('config.json'))
json_global_settings = json_config["settings"]
export_type = json_global_settings["export_type"]
def parse_links(site_name, input_link):
if site_name in {"onlyfans", "justforfans"}:
username = input_link.rsplit('/', 1)[-1]
Expand Down Expand Up @@ -65,9 +71,31 @@ def format_media_set(media_set):
x["invalid"].extend(y[1])
return x


def format_image(directory, timestamp):
os_name = platform.system()
if os_name == "Windows":
from win32_setctime import setctime
setctime(directory, timestamp)


def export_archive(data, archive_directory):
# Not Finished
if export_type == "json":
with open(archive_directory+".json", 'w') as outfile:
json.dump(data, outfile)
if export_type == "csv":
with open(archive_directory+'.csv', mode='w', newline='') as csv_file:
fieldnames = []
if data["valid"]:
fieldnames.extend(data["valid"][0].keys())
elif data["invalid"]:
fieldnames.extend(data["invalid"][0].keys())
header = [""]+fieldnames
if len(fieldnames) > 1:
writer = csv.DictWriter(csv_file, fieldnames=header)
writer.writeheader()
for item in data["valid"]:
writer.writerow({**{"": "valid"}, **item})
for item in data["invalid"]:
writer.writerow({**{"": "invalid"}, **item})
8 changes: 3 additions & 5 deletions modules/justforfans.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import requests
from bs4 import BeautifulSoup
from modules.helpers import reformat
from modules.helpers import format_media_set
from modules.helpers import *

import os
import json
Expand Down Expand Up @@ -238,9 +237,8 @@ def media_scraper(session, site_name, only_links, link, location, media_type, di
print("DIRECTORY - " + directory)
os.makedirs(directory, exist_ok=True)
os.makedirs(metadata_directory, exist_ok=True)

with open(metadata_directory+location+".json", 'w') as outfile:
json.dump(media_set, outfile)
archive_directory = metadata_directory+location
export_archive(media_set, archive_directory)
return [media_set, directory]


Expand Down
9 changes: 3 additions & 6 deletions modules/onlyfans.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import requests
from bs4 import BeautifulSoup
from modules.helpers import reformat
from modules.helpers import format_media_set
from modules.helpers import format_image
from modules.helpers import *

import os
import json
Expand Down Expand Up @@ -219,9 +217,8 @@ def media_scraper(session, site_name, only_links, link, location, media_type, di
print("DIRECTORY - " + directory)
os.makedirs(directory, exist_ok=True)
os.makedirs(metadata_directory, exist_ok=True)

with open(metadata_directory+location+".json", 'w') as outfile:
json.dump(media_set, outfile)
archive_directory = metadata_directory+location
export_archive(media_set, archive_directory)
return [media_set, directory]


Expand Down

0 comments on commit 433128f

Please sign in to comment.