-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
73 lines (53 loc) · 1.93 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import random
import csv
driver = webdriver.Chrome()
driver1 = webdriver.Chrome()
driver.get('https://nilsolsson.se/sv/Produkter')
table_info = ['title', 'price', 'description', 'image', 'sub_categ', 'categ']
csv_data=[]
def crawl(d):
data={}
product_links=d.find_elements_by_class_name("itemnamelink")
for product in product_links:
link=product.get_attribute("href")
driver1.get(link)
data['title']=driver1.find_element_by_class_name('title').text
data['price']=driver1.find_element_by_class_name('price').text
data['description']=driver1.find_element_by_class_name('description').text
data['image']=driver1.find_element_by_class_name('image_links').get_attribute("href")
list_categ=driver1.find_elements_by_class_name('breadlink')
if len(list_categ)>3:
data['sub_categ']=list_categ[-1].text
data['categ']=list_categ[-2].text
else:
data['sub_categ']=""
data['categ']=list_categ[-1].text
csv_data.append(data)
data={}
link_categ=[]
all_categ=driver.find_elements_by_class_name("group-link")
for categ in all_categ:
link_categ.append(categ.get_attribute("href"))
for l in link_categ:
driver.get(l)
link_sub_categ=[]
try:
all_sub_categ=driver.find_elements_by_class_name("group-link")
for sub_categ in all_sub_categ:
link_sub_categ.append(sub_categ.get_attribute("href"))
except:
pass
if link_sub_categ:
for sub_l in link_sub_categ:
driver.get(sub_l)
crawl(driver)
else:
crawl(driver)
with open('data.csv', 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames = table_info)
writer.writeheader()
writer.writerows(csv_data)