-
Notifications
You must be signed in to change notification settings - Fork 1
/
dvc.lock
2919 lines (2919 loc) · 120 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
schema: '2.0'
stages:
aggregate_embeddings@mmlw-roberta-large:
cmd: PYTHONPATH=. python scripts/embed/aggregate_embeddings.py --embeddings-dir
data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings
deps:
- hash: md5
md5: 1a086db46b90b0f3c4c66c3ecefe8adb.dir
nfiles: 53
path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings
size: 24415235644
- hash: md5
md5: edb817e03c0c1c20822eda0e445f5083
path: scripts/embed/aggregate_embeddings.py
size: 1839
outs:
- hash: md5
md5: 0d84b4da5513feeb6ca9bad70a2ff164
path: data/embeddings/pl-court-raw/mmlw-roberta-large/agg_embeddings.pt
size: 1725566207
build_graph_dataset:
cmd: PYTHONPATH=. python scripts/dataset/build_graph_dataset.py --dataset-dir
data/datasets/pl/raw --embeddings-root-dir data/embeddings/pl-court-raw/mmlw-roberta-large/
--target-dir data/datasets/pl/graph
deps:
- hash: md5
md5: 5dd44be2eea852bcce3d0918ff8b97da.dir
nfiles: 17
path: data/datasets/pl/raw
size: 10234880729
- hash: md5
md5: 0d84b4da5513feeb6ca9bad70a2ff164
path: data/embeddings/pl-court-raw/mmlw-roberta-large/agg_embeddings.pt
size: 1725566207
- hash: md5
md5: fbb5585b8c3ef28255801d38c9248f8e
path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings/config.yaml
size: 502
- hash: md5
md5: 730e3d92be26408bd6dc26606b4c22ff
path: juddges/data/pl_court_graph.py
size: 4974
- hash: md5
md5: e7f76dc4f24d884291e1f0b66d8244a8
path: scripts/dataset/build_graph_dataset.py
size: 1159
outs:
- hash: md5
md5: f2820796cff4578c11ffcb0fa6cdadd7.dir
nfiles: 2
path: data/datasets/pl/graph/data
size: 1823760294
- hash: md5
md5: 68b09dd0ce741e6ee1fff4e37c954fa6
path: data/datasets/pl/graph/metadata.yaml
size: 564
build_instruct_dataset_en:
cmd: PYTHONPATH=. python scripts/dataset/build_instruct_dataset_en.py --repo-id
JuDDGES/en-court-instruct
deps:
- hash: md5
md5: 39e530fbd8c7f3a696e117ee13578e1f
path: scripts/dataset/build_instruct_dataset_en.py
size: 5203
embed@mmlw-roberta-large:
cmd: PYTHONPATH=. python scripts/embed/embed_text.py embedding_model=mmlw-roberta-large
deps:
- path: configs/embedding.yaml
hash: md5
md5: 9a163f8656c6efa150fd7f939bb32e49
size: 477
- path: configs/embedding_model/mmlw-roberta-large.yaml
hash: md5
md5: 22f36cfd196c0fdc3cfd8a036d52b606
size: 52
- path: data/datasets/pl/raw
hash: md5
md5: 622ba21868561c26fb6877ad95bfb5c5.dir
size: 10234505621
nfiles: 17
- path: scripts/embed/embed_text.py
hash: md5
md5: d9f127f2e92afa40f23ebcd6cf540cb9
size: 3743
outs:
- path: data/embeddings/pl-court-raw/mmlw-roberta-large/all_embeddings
hash: md5
md5: a8a4a370199cce269899df89f4e33fdc.dir
size: 23430894782
nfiles: 51
evaluate_api_models@en-court-instruct-open_ai_gpt-4o-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json
--num-proc=-1
deps:
- hash: md5
md5: 8f70e2baa0b0ae8a320577f5c8a60011
path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json
size: 679432
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: ac30bcf3c40000cab61e0914b56aba85
path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/metrics_997.json
size: 157
evaluate_api_models@en-court-instruct-open_ai_gpt-4o-mini-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json
--num-proc=-1
deps:
- hash: md5
md5: 2a0819011b3eac56e497201a9f67e310
path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json
size: 690306
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: d70eb0821aff9c9e874a421b80f7f697
path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/metrics_997.json
size: 155
evaluate_api_models@pl-court-instruct-open_ai_gpt-4o-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json
--num-proc=-1
deps:
- hash: md5
md5: 7c5833fdd1419163b286baaa3d71e084
path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json
size: 1965252
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: 65c808d4aebd8efe37b94a5128a19de6
path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/metrics_997.json
size: 306
evaluate_api_models@pl-court-instruct-open_ai_gpt-4o-mini-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json
--num-proc=-1
deps:
- hash: md5
md5: 839c911f542cd7c60c9ae52ef95e9907
path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json
size: 1812429
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: fe43f0d25b500a0f2fb2d8199b8034fd
path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/metrics_997.json
size: 305
evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json
--num-proc=-1
deps:
- hash: md5
md5: 761018c0a306fbee63dad2fbc119110d
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json
size: 821683
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: 265776ba10a7b24b66e6bac1131e0c48
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_42.json
size: 149
evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json
--num-proc=-1
deps:
- hash: md5
md5: a7361535b440251d6ce6232a15cfcdf2
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json
size: 818877
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: 97fa8dfaa5e57633e8fb6a7d073177f5
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_7312.json
size: 147
evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json
--num-proc=-1
deps:
- hash: md5
md5: 94924275d576271875fecf22c0f9b39e
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json
size: 817490
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: c3552161ec68d8cc6a8e5b75f02e22e2
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/metrics_997.json
size: 147
evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json
--num-proc=-1
deps:
- hash: md5
md5: 4246a4fafba5e130aac3db6c1c61ce30
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json
size: 675578
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: 016d1c87b2925c6f941400d178bee018
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_42.json
size: 157
evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json
--num-proc=-1
deps:
- hash: md5
md5: f0b806eebca2f3ddf49d0ff821856b45
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json
size: 670935
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: a8459393feb773fea85ede4b831b3fa6
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_7312.json
size: 157
evaluate_en@en-court-instruct-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json
--num-proc=-1
deps:
- hash: md5
md5: 4e968cac351ad48ad786d1ecccbbc967
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json
size: 670674
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: 21bc79aad7ab2e97b75e1d3fb18a2263
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/metrics_997.json
size: 157
evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json
--num-proc=-1
deps:
- hash: md5
md5: 4fe25ad80a20ea5d6200136176b3e4ca
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json
size: 705218
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: 0b2f663a1cbc3ef08c363ec8adc53c15
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_42.json
size: 151
evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json
--num-proc=-1
deps:
- hash: md5
md5: cf4fdbf0e26e6c793bdca4edd6e365c0
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json
size: 703876
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: 604b5cee14ec6520b88bafecc962e031
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_7312.json
size: 152
evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json
--num-proc=-1
deps:
- hash: md5
md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json
size: 705894
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: a91ec5b434bebd8ce1d2000e0a033cb9
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/metrics_997.json
size: 152
evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json
--num-proc=-1
deps:
- hash: md5
md5: 313fa5a662f37cacae4980a04830f422
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json
size: 642688
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: f0d37c5ac017c0e488b7c3bed01c7093
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_42.json
size: 156
evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json
--num-proc=-1
deps:
- hash: md5
md5: 4ed8db93aa14f1cc98e276d3989efa9e
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json
size: 642730
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: a1521ab06a56258759953bb02ae87e24
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_7312.json
size: 157
evaluate_en@en-court-instruct-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate.py --output-file
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json
--num-proc=-1
deps:
- hash: md5
md5: 787c129090aa1b64e337b236a4391402
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json
size: 642477
- hash: md5
md5: 0644efb76af2c5461185e37a07ba2c17
path: scripts/sft/evaluate.py
size: 697
outs:
- hash: md5
md5: f3339245ea358de4b1348c8393153946
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/metrics_997.json
size: 157
evaluate_llm_as_judge_api_models@en-gpt_4o_mini-open_ai_gpt-4o-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json
out_metric_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o/judge_metrics_997.json
prompt=en
deps:
- hash: md5
md5: 8f70e2baa0b0ae8a320577f5c8a60011
path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/outputs_997.json
size: 679432
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 1ad8736bed0fff4e88a9c32775f370bf
path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o/judge_metrics_997.json
size: 481
evaluate_llm_as_judge_api_models@en-gpt_4o_mini-open_ai_gpt-4o-mini-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json
out_metric_file=data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json
prompt=en
deps:
- hash: md5
md5: 2a0819011b3eac56e497201a9f67e310
path: data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/outputs_997.json
size: 690306
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: bd272bea099716c0c2e689a2d19c0071
path:
data/experiments/predict/en-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json
size: 488
evaluate_llm_as_judge_api_models@pl-gpt_4o_mini-open_ai_gpt-4o-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json
out_metric_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/judge_metrics_997.json
prompt=pl
deps:
- hash: md5
md5: 7c5833fdd1419163b286baaa3d71e084
path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/outputs_997.json
size: 1965252
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 867f10aeb55a3bd46b08c8a75c3bfc60
path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o/judge_metrics_997.json
size: 1176
evaluate_llm_as_judge_api_models@pl-gpt_4o_mini-open_ai_gpt-4o-mini-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json
out_metric_file=data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json
prompt=pl
deps:
- hash: md5
md5: 839c911f542cd7c60c9ae52ef95e9907
path: data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/outputs_997.json
size: 1812429
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 24037233e5abe74fe13f69dd4fc5e26a
path:
data/experiments/predict/pl-court-instruct/open_ai_gpt-4o-mini/judge_metrics_997.json
size: 1173
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json
prompt=en
deps:
- hash: md5
md5: 761018c0a306fbee63dad2fbc119110d
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json
size: 821683
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 77ecbff8c82afbfd6fec098fb87e1218
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json
size: 478
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json
prompt=en
deps:
- hash: md5
md5: a7361535b440251d6ce6232a15cfcdf2
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_7312.json
size: 818877
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: f25c9ad98ef817e976def98d6b7d3b5d
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_7312.json
size: 482
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json
prompt=en
deps:
- hash: md5
md5: 94924275d576271875fecf22c0f9b39e
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_997.json
size: 817490
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 4395c32931d25a1bd9aa092c5a0e5460
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_997.json
size: 478
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_42.json
prompt=en
deps:
- hash: md5
md5: 4246a4fafba5e130aac3db6c1c61ce30
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_42.json
size: 675578
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 5f2cea81c873a3b85ef95ba9a6dc90a5
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_42.json
size: 487
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_7312.json
prompt=en
deps:
- hash: md5
md5: f0b806eebca2f3ddf49d0ff821856b45
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_7312.json
size: 670935
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 5cc45cac8a7607e42a8a394593d33396
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_7312.json
size: 486
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-fine-tuned-en-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_997.json
prompt=en
deps:
- hash: md5
md5: 4e968cac351ad48ad786d1ecccbbc967
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/outputs_997.json
size: 670674
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 90c2b0cd132130d0b9d3a60bf6fdd69b
path:
data/experiments/predict/en-court-instruct/Unsloth-Llama-3-8B-Instruct-fine-tuned-en/judge_metrics_997.json
size: 486
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json
prompt=en
deps:
- hash: md5
md5: 4fe25ad80a20ea5d6200136176b3e4ca
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_42.json
size: 705218
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 69901f631da4ffefd09e7cbfac39cd89
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_42.json
size: 480
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json
prompt=en
deps:
- hash: md5
md5: cf4fdbf0e26e6c793bdca4edd6e365c0
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_7312.json
size: 703876
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 860b5c00ace1f2967db9b5a977cfc3ad
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_7312.json
size: 478
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json
prompt=en
deps:
- hash: md5
md5: 94c30cf8fe7db71afc58a5c9cdbc0d9f
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/outputs_997.json
size: 705894
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 860b5c00ace1f2967db9b5a977cfc3ad
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407/judge_metrics_997.json
size: 478
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_42.json
prompt=en
deps:
- hash: md5
md5: 313fa5a662f37cacae4980a04830f422
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_42.json
size: 642688
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 974e972a09d844a77840029d642e8077
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_42.json
size: 486
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_7312.json
prompt=en
deps:
- hash: md5
md5: 4ed8db93aa14f1cc98e276d3989efa9e
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_7312.json
size: 642730
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 8a9712eb10a8da99d86bab8968fd3207
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_7312.json
size: 485
evaluate_llm_as_judge_en@gpt_4o_mini-Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json
out_metric_file=data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_997.json
prompt=en
deps:
- hash: md5
md5: 787c129090aa1b64e337b236a4391402
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/outputs_997.json
size: 642477
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 34de8eabaebe6a96b4b664b664f222e2
path:
data/experiments/predict/en-court-instruct/Unsloth-Mistral-Nemo-Instruct-2407-fine-tuned-en/judge_metrics_997.json
size: 484
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_42.json
prompt=pl
deps:
- hash: md5
md5: c3e404c898e3e193ac3aa910187b4f9f
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_42.json
size: 1734129
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 198f24599357bc230bf9f1e39a235a44
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_42.json
size: 1172
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_7312.json
prompt=pl
deps:
- hash: md5
md5: d4a2ab2393a58f0d7e1897859eccb626
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_7312.json
size: 1734772
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 81cfdaa675ef2118cf923e57cc54d201
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_7312.json
size: 1161
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_997.json
prompt=pl
deps:
- hash: md5
md5: 8f4f6bc97e33b3b2728bebb7620a4968
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/outputs_997.json
size: 1731689
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: c5861ffaa439ba9bbd95b954d6ab1f3d
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct/judge_metrics_997.json
size: 1168
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-fine-tuned-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_42.json
prompt=pl
deps:
- hash: md5
md5: dfd5d7389b312686428cc967aea5a5b9
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_42.json
size: 1860743
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: abcd5722e84ec3e81ff8cf28b8a887cb
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_42.json
size: 1165
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-fine-tuned-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_7312.json
prompt=pl
deps:
- hash: md5
md5: 8fa2faeda5a577c06cd6bf35b8702330
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_7312.json
size: 1857569
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 4b77a3d10cd6027e7e141ba80e9678c2
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_7312.json
size: 1160
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-11B-v2.2-Instruct-fine-tuned-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_997.json
prompt=pl
deps:
- hash: md5
md5: ba53d76f701eddb60a182de49d992878
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/outputs_997.json
size: 1857855
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 9e60a1ed6002a0349656c0bd23bc7b1c
path:
data/experiments/predict/pl-court-instruct/Bielik-11B-v2.2-Instruct-fine-tuned/judge_metrics_997.json
size: 1164
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_42.json
prompt=pl
deps:
- hash: md5
md5: 2dc39513a04910c5d0c54380166639d9
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_42.json
size: 2029644
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 243da4df07c6dfb5199b925e3f5c07aa
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_42.json
size: 1137
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_7312.json
prompt=pl
deps:
- hash: md5
md5: ae39bf31296ffe82c0f6a3e8c9ff63aa
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_7312.json
size: 2014399
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 8098cc937d57455ca47d32c3449159a3
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_7312.json
size: 1129
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_997.json
prompt=pl
deps:
- hash: md5
md5: fac04d78ad020b50f79fc7277a037e8e
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/outputs_997.json
size: 2016400
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: f1390b2d50893a17c90fc277dc363d6a
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1/judge_metrics_997.json
size: 1139
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_42.json
prompt=pl
deps:
- hash: md5
md5: 178eb0649617d4a698da6c9e315e84c5
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_42.json
size: 2034749
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 302b957707520fa327d1da0edf18baa3
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_42.json
size: 1167
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-7312:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_7312.json
prompt=pl
deps:
- hash: md5
md5: 743ea22448bc73a7a991da075fca8841
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_7312.json
size: 2031343
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 789f0906846251d3f0cab78d111f9c56
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_7312.json
size: 1163
evaluate_llm_as_judge_pl@gpt_4o_mini-Bielik-7B-Instruct-v0.1-fine-tuned-997:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json
out_metric_file=data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_997.json
prompt=pl
deps:
- hash: md5
md5: 433a4b2aa7870a134277a265d099a588
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/outputs_997.json
size: 2029482
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 90f3ed04ef29c5cd29b7ec8f02a780a1
path:
data/experiments/predict/pl-court-instruct/Bielik-7B-Instruct-v0.1-fine-tuned/judge_metrics_997.json
size: 1163
evaluate_llm_as_judge_pl@gpt_4o_mini-Unsloth-Llama-3-8B-Instruct-42:
cmd: PYTHONPATH=. python scripts/sft/evaluate_llm_as_judge.py api_model=gpt_4o_mini
answers_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json
out_metric_file=data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json
prompt=pl
deps:
- hash: md5
md5: e99c88720116c951087b6125e5f4be4d
path:
data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/outputs_42.json
size: 2008073
- hash: md5
md5: 79a02fb864cb279f93fc4171043bb31c
path: scripts/sft/evaluate_llm_as_judge.py
size: 2253
outs:
- hash: md5
md5: 9d9fba0cf2169e9dd9f69579a2182b8e
path:
data/experiments/predict/pl-court-instruct/Unsloth-Llama-3-8B-Instruct/judge_metrics_42.json