-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTPMDB_creator.py
149 lines (118 loc) · 4.52 KB
/
TPMDB_creator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import sqlite3
import os
import TPMDB_txt_scraper
import random
from operator import itemgetter
MOVIES_TXT_FOLDER = ''#Insert the folder where you have saved all your txt files.
def create_database():
conn = sqlite3.connect('movie_db_13_10_2021_1_no_jobs.db')
c = conn.cursor()
c.execute("""CREATE TABLE IF NOT EXISTS movies (
title_url text PRIMARY KEY,
title text,
year integer,
lenght integer,
tagline text,
synopsis text,
poster_url text,
members integer,
lists integer,
likes integer,
rating real
)""")
c.execute("""CREATE TABLE IF NOT EXISTS crew (
name_url text PRIMARY KEY,
name text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS crew_movie_relation (
title_url text,
name_url text,
job text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS cast (
name_url text PRIMARY KEY,
name text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS cast_movie_relation (
title_url text,
name_url text,
character text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS genre (
genre_url text PRIMARY KEY,
genre text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS genre_movie_relation (
title_url text,
genre_url text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS studio (
studio_url text PRIMARY KEY,
studio text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS studio_movie_relation (
title_url text,
studio_url text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS language (
language_url text PRIMARY KEY,
language text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS language_movie_relation (
title_url text,
language_url text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS country (
country_url text PRIMARY KEY,
country text
)""")
c.execute("""CREATE TABLE IF NOT EXISTS country_movie_relation (
title_url text,
country_url text
)""")
conn.commit()
conn.close()
def write_in_database():
create_database()
conn = sqlite3.connect('movie_db_13_10_2021_1_no_jobs.db')
c = conn.cursor()
sample = os.listdir(MOVIES_TXT_FOLDER)
n = 0
for file in sample:
info = TPMDB_txt_scraper.get_info_from_file(file)
c.execute("""INSERT INTO movies VALUES (
:title_url, :title, :year, :length, :tagline, :synopsis,
:poster_url, :members, :lists, :likes, :rating)""", info)
for human in info['crew']:
c.execute("INSERT or IGNORE INTO crew VALUES(:name_url, :name)", {'name_url':human['name_url'].split('/')[-1], 'name':human['name']})
c.execute("""INSERT INTO crew_movie_relation VALUES(
:title_url, :name_url, :job)""",
{'title_url':info['title_url'], 'name_url':human['name_url'].split('/')[-1], 'job':human['job']})
for human in info['cast']:
c.execute("INSERT or IGNORE INTO cast VALUES(:name_url, :name)", {'name_url':human['name_url'], 'name':human['name']})
for character in human['characters']:
c.execute("""INSERT INTO cast_movie_relation VALUES(
:title_url, :name_url, :character)""",
{'title_url':info['title_url'], 'name_url':human['name_url'], 'character':character})
for genre in info['genre']:
c.execute("INSERT or IGNORE INTO genre VALUES(:genre_url, :genre)", genre)
c.execute("INSERT INTO genre_movie_relation VALUES (:title_url, :genre_url)", {'title_url': info['title_url'], 'genre_url':genre['genre_url']})
for studio in info['studio']:
c.execute("INSERT or IGNORE INTO studio VALUES(:studio_url, :studio)", studio)
c.execute("INSERT INTO studio_movie_relation VALUES (:title_url, :studio_url)", {'title_url': info['title_url'], 'studio_url':studio['studio_url']})
for language in info['language']:
c.execute("INSERT or IGNORE INTO language VALUES(:language_url, :language)", language)
c.execute("INSERT INTO language_movie_relation VALUES (:title_url, :language_url)", {'title_url': info['title_url'], 'language_url':language['language_url']})
for country in info['country']:
c.execute("INSERT or IGNORE INTO country VALUES(:country_url, :country)", country)
c.execute("INSERT INTO country_movie_relation VALUES (:title_url, :country_url)", {'title_url': info['title_url'], 'country_url':country['country_url']})
n+=1
if n % 1000 == 0:
print(n)
conn.commit()
conn.commit()
conn.close()
def main():
write_in_database()
if __name__ == "__main__":
main()