-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathhistorical_import.py
121 lines (98 loc) · 4.57 KB
/
historical_import.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os
from pathlib import Path
import ccxt
import sys
import csv
market = 'future' # 'future' or 'spot'
timeframe = '1d' # 1m 5m 1h 1d 1w 1m
# press run button to start
# check line 85 in case that you want frequency under 1h
# execute in the directory that you want data to go to
# folder will be /data/{market}/{timeframe} , line 78
# -----------------------------------------------------------------------------
root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(''))))
sys.path.append(root + '/python')
# -----------------------------------------------------------------------------
# def fetchTickers(market) -> dict:
# exchange = ccxt.binance({
# 'timeout': 10000,
# 'enableRateLimit': True,
# 'options': {
# 'defaultType': market,}
# })
# exchange.loadMarkets()
# # Data container
# ticker_list = []
# #Fetch OHLCV data
# ticker_list = exchange.fetchTickers()
# for key in list(filter(lambda x: '_' in x, ticker_list.keys())): del ticker_list[key]
# # Return OHLCV data
# return ticker_list
def retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
num_retries = 0
try:
num_retries += 1
ohlcv = exchange.fetch_ohlcv(symbol, timeframe, since, limit)
# print('Fetched', len(ohlcv), symbol, 'candles from', exchange.iso8601 (ohlcv[0][0]), 'to', exchange.iso8601 (ohlcv[-1][0]))
return ohlcv
except Exception:
if num_retries > max_retries:
raise # Exception('Failed to fetch', timeframe, symbol, 'OHLCV in', max_retries, 'attempts')
def scrape_ohlcv(exchange, max_retries, symbol, timeframe, since, limit):
earliest_timestamp = exchange.milliseconds()
timeframe_duration_in_seconds = exchange.parse_timeframe(timeframe)
timeframe_duration_in_ms = timeframe_duration_in_seconds * 1000
timedelta = limit * timeframe_duration_in_ms
all_ohlcv = []
while True:
fetch_since = earliest_timestamp - timedelta
ohlcv = retry_fetch_ohlcv(exchange, max_retries, symbol, timeframe, fetch_since, limit)
# if we have reached the beginning of history
if ohlcv[0][0] >= earliest_timestamp:
break
earliest_timestamp = ohlcv[0][0]
all_ohlcv = ohlcv + all_ohlcv
print(len(all_ohlcv), symbol, 'candles in total from', exchange.iso8601(all_ohlcv[0][0]), 'to', exchange.iso8601(all_ohlcv[-1][0]))
# if we have reached the checkpoint
if fetch_since < since:
break
return all_ohlcv
def write_to_csv(filename, data):
p = Path("./data/" + market + '/' + timeframe )
p.mkdir(parents=True, exist_ok=True)
full_path = p / str(filename + '.csv')
with Path(full_path).open('w+', newline='') as output_file:
csv_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
csv_writer.writerows(data)
# will need to increase limit if ever want to look at timeframe under 1h
def scrape_candles_to_csv(max_retries, timeframe, since, market, limit=100000):
# instantiate the exchange by id
exchange = ccxt.binance({
'enableRateLimit': True,
'options': {
'defaultType': market, }
})
# convert since from string to milliseconds integer if needed
if isinstance(since, str):
since = exchange.parse8601(since)
# preload all markets from the exchange
exchange.load_markets()
ticker_list = exchange.fetchTickers()
# delete some funny tickers for future expiration
for key in list(filter(lambda x: '_' in x, ticker_list.keys())): del ticker_list[key]
# delete any key that doesn't have USDT
for key in list(filter(lambda x: 'USDT' not in x, ticker_list.keys())): del ticker_list[key]
# fetch all candles
for key in ticker_list.keys():
ohlcv = scrape_ohlcv(exchange, max_retries, key, timeframe, since, limit)
# save them to csv file
key = key.replace('/', '_')
# add column names
ohlcv.insert(0, ['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
write_to_csv(key, ohlcv)
print('Saved', len(ohlcv), 'candles from', exchange.iso8601(ohlcv[0][0]), 'to', exchange.iso8601(ohlcv[-1][0]), 'to', key+'.csv')
print(len(ticker_list), 'tickers saved')
# -----------------------------------------------------------------------------
if __name__ == '__main__':
# My first __name__ == '__main__' code. 31 March 2022.
scrape_candles_to_csv(max_retries=5, timeframe=timeframe, since='2017-01-0100:00:00Z', market=market)