-
Notifications
You must be signed in to change notification settings - Fork 0
/
dvc.lock
119 lines (119 loc) · 3.2 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
schema: '2.0'
stages:
scrape:
cmd: python -m telex.pipeline.scrape.scraping --output-folder "data/raw/raw"
deps:
- path: telex/pipeline/scrape
md5: 7da4762f14f0811c50e13c610379d4c4.dir
size: 2265
nfiles: 2
params:
params.py:
MAX_PAGES: 187
PER_PAGE: 400
SLEEP_TIME: 0.5
outs:
- path: data/raw
md5: 170069251545593c2f2fd9f421249eeb.dir
size: 592970611
nfiles: 74711
preprocess:
cmd: python -m telex.pipeline.preprocess.preprocessing --input-folder "data/raw/raw"
--output-folder "data/preprocessed"
deps:
- path: data/raw
md5: 170069251545593c2f2fd9f421249eeb.dir
size: 592970611
nfiles: 74711
- path: telex/pipeline/preprocess
md5: 68ddf686b26a16aac649f8e905483b3b.dir
size: 1883
nfiles: 2
outs:
- path: data/preprocessed
md5: 4c7d4461073c9b5b8cd526c4fada9878.dir
size: 288517245
nfiles: 1
train:
cmd: python -m telex.pipeline.train.training --input-file "data/preprocessed/telex.json"
--output-folder "data/models"
deps:
- path: data/preprocessed
md5: 4c7d4461073c9b5b8cd526c4fada9878.dir
size: 288517245
nfiles: 1
- path: telex/pipeline/train
md5: cfa4c55926d00d6ad8d680f142c83c08.dir
size: 1769
nfiles: 2
params:
params.py:
BigramParams:
MODEL: bigram
MODEL: transformer
TrainingParams:
SEED: 42
LR: 0.001
SPLIT:
- 0.9
- 0.05
- 0.05
BLOCK_SIZE: 100
BATCH_SIZE: 64
EPOCHS: 1
STEPS: 5000
TransformerParams:
MODEL: transformer
D_MODEL: 120
HEADS: 6
D_FF: 512
DROPOUT: 0.1
NUM_LAYERS: 2
VOCAB: "AÁBCDEÉFGHIÍJKLMNOÓÖŐPQRSTUÚÜŰVWXYZaábcdeéfghiíjklmnoóöőpqrstuúüűvwxyz0123456789.,;:?!-–_()[]{}'\"\
/\\@#$%^&*+=|~` "
outs:
- path: data/models
md5: c70961aae9d65e913b3bc57c689df1a5.dir
size: 2111358
nfiles: 1
evaluate:
cmd: python -m telex.pipeline.evaluate.evaluation --model-file "data/models/model.pt"
--input-file "data/preprocessed/telex.json"
deps:
- path: data/models
md5: 49cf68016696a153031893a3e48d6141.dir
size: 2124928
nfiles: 2
- path: data/preprocessed
md5: 4c7d4461073c9b5b8cd526c4fada9878.dir
size: 288517245
nfiles: 1
- path: telex/pipeline/evaluate
md5: 35911ce397d1fd191bbd311332f69408.dir
size: 3247
nfiles: 2
params:
params.py:
BigramParams:
MODEL: bigram
MODEL: transformer
TrainingParams:
SEED: 42
LR: 0.001
SPLIT:
- 0.9
- 0.05
- 0.05
BLOCK_SIZE: 100
BATCH_SIZE: 64
EPOCHS: 1
STEPS: 5000
TransformerParams:
MODEL: transformer
D_MODEL: 120
HEADS: 6
D_FF: 512
DROPOUT: 0.1
NUM_LAYERS: 2
VOCAB: "AÁBCDEÉFGHIÍJKLMNOÓÖŐPQRSTUÚÜŰVWXYZaábcdeéfghiíjklmnoóöőpqrstuúüűvwxyz0123456789.,;:?!-–_()[]{}'\"\
/\\@#$%^&*+=|~` "