-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyppeteer_example2.py
168 lines (143 loc) · 5.1 KB
/
pyppeteer_example2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import asyncio
from pyppeteer import launch
from terminaltables import SingleTable
from colorclass import Color
async def get_browser():
return await launch()
async def get_page(browser, url):
page = await browser.newPage()
await page.goto(url)
return page
async def create_account(page):
# Click on create account to aceess app
selector = "#createAccountBt"
await page.click(selector)
async def select_top30(page):
# Show top 30 currencies by market capitalization
selector_top_list = "#navSubTop"
await page.waitForSelector(selector_top_list)
await page.click(selector_top_list)
selector_top_30 = ".setCoinLimitBt[data-v='30']"
await page.click(selector_top_30)
async def add_eur(page):
# Select EUR fiat currency for the whole app
selector_currency = "#nHp_currencyBt"
await page.click(selector_currency)
selector_add_currency = "#currencyAddBt"
await page.click(selector_add_currency)
selector_search = "input#addCurrencySearchTf"
await page.type(selector_search, 'eur')
selector_euro = "#addCurrencySearchResults > #add_currency_EUR"
await page.waitForSelector(selector_euro)
selector_euro_add = "#add_currency_EUR > .addRemCurrencyBt"
await page.click(selector_euro_add)
selector_use_euro = "#currencyBox > div[data-symbol='EUR']"
await page.click(selector_use_euro)
async def extract_currency(page, currency):
# Extract currency symbol
symbol = await page.evaluate(
"currency => currency.textContent",
currency
)
symbol = symbol.strip()
# Click on current currency
await currency.click()
selector_name = ".popUpItTitle"
await page.waitForSelector(selector_name)
# Extract currency name
name = await page.querySelectorEval(
selector_name,
"elem => elem.textContent"
)
name = name.strip()
# Extract currency actual price
selector_price = "#highLowBox"
price = await page.querySelectorEval(
selector_price,
"elem => elem.textContent"
)
_price = [
line.strip() for line in price.splitlines() if len(line.strip())]
price = parse_number(_price[1])
# Extract currency 24h difference and percentage
selector_24h = "#profitLossBox"
price_24h = await page.querySelectorEval(
selector_24h,
"elem => elem.textContent"
)
_price_24h = [
line.strip() for line in price_24h.splitlines() if len(line.strip())]
perce_24h = parse_number(_price_24h[6])
price_24h = parse_number(_price_24h[-2])
# Extract currency capitalization rank
selector_rank = "#profitLossBox ~ div.BG2.BOR_down"
rank = await page.querySelectorEval(
selector_rank,
"elem => elem.textContent"
)
rank = int(rank.strip("Rank"))
selector_close = ".popUpItCloseBt"
await page.click(selector_close)
return {
"name": name,
"symbol": symbol,
"price": price,
"price24h": price_24h,
"percentage24h": perce_24h,
"rank": rank
}
async def navigate_top30_detail(page):
# Iterate over the displayed currencies and extract data
select_all_displayed_currencies = "#fullCoinList > [data-arr-nr]"
select_currency = "#fullCoinList > [data-arr-nr='{}'] .L1S1"
currencies = await page.querySelectorAll(select_all_displayed_currencies)
total = len(currencies)
datas = []
for num in range(total):
currency = await page.querySelectorEval(
select_currency.format(num),
"(elem) => elem.scrollIntoView()"
)
currency = await page.querySelector(select_currency.format(num))
datas.append(await extract_currency(page, currency))
return datas
async def scrape_cmc_io(url):
browser = await get_browser()
page = await get_page(browser, url)
await create_account(page)
await select_top30(page)
await add_eur(page)
currencies_data = await navigate_top30_detail(page)
show_biggest_24h_winners(currencies_data)
def show_biggest_24h_winners(data):
# Nicely print results on the terminal
sorted_data = sorted(data, key=lambda x: x.get('percentage24h'))
table_data = [[
"Currency",
"Symbol",
"Actual price (€)",
"24h price diff. (€)",
"24h % diff",
"Rank"
]]
_red = Color("{autored}{}{/autored}")
_green = Color("{green}{}{/green}")
for row in sorted_data:
if row['percentage24h'] < 0:
_colored_row = [_red.format(value) for value in row.values()]
else:
_colored_row = [_green.format(value) for value in row.values()]
table_data.append(_colored_row)
table = SingleTable(table_data)
table.title = "24h TOP 30 Currencies"
table.justify_columns = {2: 'right', 3: 'right', 4: 'right', 5: 'right'}
print(table.table)
def parse_number(str_num):
# Helper to parse numeric strigns
for symbol in ["€", "%", ","]:
str_num = str_num.replace(symbol, "")
return float(str_num)
if __name__ == "__main__":
url = "http://coinmarketcap.io"
loop = asyncio.get_event_loop()
result = loop.run_until_complete(scrape_cmc_io(url))