-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_pat.py
60 lines (49 loc) · 1.55 KB
/
train_pat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# -*- coding: utf-8 -*-
"""
Created on Thu May 23 10:31:19 2024
@author: tesla
"""
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from datasets import Dataset
from setfit import SetFitModel, Trainer, TrainingArguments
import torch
from sklearn.model_selection import train_test_split
from setfit import SetFitModel, Trainer, TrainingArguments, sample_dataset
torch.cuda.is_available()
def compute_metrics(y_pred, y_test):
accuracy = float(accuracy_score(y_test, y_pred))
precision = float(precision_score(y_test, y_pred))
recall = float(recall_score(y_test, y_pred))
f1 = float(f1_score(y_test, y_pred))
return {
'accuracy': accuracy,
'precision': precision,
'recall': recall,
'f1': f1
}
data = pd.read_feather(r"\data\train_pat.feather")
train, eval_ds=train_test_split(data, test_size=0.5, random_state=42)
eval_ds=Dataset.from_pandas(eval_ds)
train_ds=Dataset.from_pandas(train)
# Load a SetFit model from Hub
model = SetFitModel.from_pretrained("mixedbread-ai/mxbai-embed-large-v1")
args = TrainingArguments(
batch_size=4,
num_epochs=1,
#evaluation_strategy="epoch",
save_strategy="epoch",
#load_best_model_at_end=True
)
trainer = Trainer(
model=model,
args=args,
train_dataset=train_ds,
metric=compute_metrics,
)
# Train and evaluate
trainer.train()
metrics = trainer.evaluate(eval_ds)
print(metrics)