-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerator_model.py
488 lines (397 loc) · 17.2 KB
/
generator_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
# Hugging Face and Pytorch imports
import torch
import huggingface_hub as hf_hub
# noinspection PyPackageRequirements
from haystack.dataclasses import StreamingChunk
from transformers import AutoConfig
# Haystack imports
# noinspection PyPackageRequirements
from haystack.components.generators import HuggingFaceLocalGenerator
from haystack_integrations.components.generators.google_ai import GoogleAIGeminiGenerator
# noinspection PyPackageRequirements
from haystack.components.generators import HuggingFaceAPIGenerator
# noinspection PyPackageRequirements
from haystack.utils import ComponentDevice, Device
# noinspection PyPackageRequirements
from haystack.utils.auth import Secret
# Other imports
from typing import Optional, Union, Callable
from abc import ABC, abstractmethod
from haystack_integrations.components.generators.ollama import OllamaGenerator
from haystack_integrations.components.generators.llama_cpp import LlamaCppGenerator
import os
import urllib.request
# import time
def get_secret(secret_file: str) -> str:
"""
Read a secret from a file.
Args:
secret_file (str): Path to the file containing the secret.
Returns:
str: The content of the secret file, or an empty string if an error occurs.
"""
try:
with open(secret_file, 'r') as file:
secret_text: str = file.read().strip()
except FileNotFoundError:
print(f"The file '{secret_file}' does not exist.")
secret_text = ""
except Exception as e:
print(f"An error occurred: {e}")
raise e
return secret_text
class GeneratorModel(ABC):
"""
A class that represents a Large Language Model (LLM) generator.
This class provides functionality to generate text using a large language model
that allows any supported model to have a similar interface for text generation.
This allows a Hugging Face model and a Google AI model to be used interchangeably.
In the future I may add additional options for other language models.
Public Methods:
generate(prompt: str): Generate text using the given prompt.
The class handles the initialization of the language model and the generation
of text using the model internally. It also manages the configuration of the
generation parameters.
"""
def __init__(self, verbose: bool = False) -> None:
"""
Initialize the GeneratorModel instance.
Args:
verbose (bool, optional): Whether to print verbose output. Defaults to False.
"""
self._verbose: bool = verbose
if self._verbose:
print("Warming up Large Language Model")
self._model_name: Optional[str] = None
self._model: Optional[Union[HuggingFaceLocalGenerator, GoogleAIGeminiGenerator, HuggingFaceAPIModel]] = None
self._verbose: bool = verbose
@property
def verbose(self) -> bool:
return self._verbose
@verbose.setter
def verbose(self, value: bool) -> None:
self._verbose = value
@property
def generator_component(self) -> Union[HuggingFaceLocalGenerator, HuggingFaceAPIGenerator, GoogleAIGeminiGenerator]:
"""
Get the generator component of the language model.
Returns:
Union[HuggingFaceLocalGenerator, GoogleAIGeminiGenerator]: The generator component of the language model
"""
return self._model
@property
@abstractmethod
def context_length(self) -> Optional[int]:
"""
Get the generator component of the language model.
Returns:
Union[HuggingFaceLocalGenerator, GoogleAIGeminiGenerator]: The generator component of the language model
"""
pass
@property
@abstractmethod
def embedding_dimensions(self) -> Optional[int]:
"""
Get the embedding dimensions of the language model.
Returns:
Optional[int]: The embedding dimensions of the language model, if available. Otherwise, returns None.
"""
pass
@property
@abstractmethod
def language_model(self) -> Optional[object]:
"""
Get the language model instance.
Returns:
Returns the language model instance used by this generator. If it is an API model, returns None.
"""
pass
@abstractmethod
def generate(self, prompt: str) -> str:
"""
Generate text using the given prompt.
Args:
prompt (str): The prompt to use for text generation.
Returns:
str: The generated text.
"""
# To be implemented in a subclass
pass
@property
def model_name(self) -> Optional[str]:
return self._model_name
class StreamingGeneratorModel(GeneratorModel, ABC):
def __init__(self, verbose: bool = False,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
) -> None:
super().__init__(verbose)
self._streaming_callback: Optional[Callable[[StreamingChunk], None]] = streaming_callback
@abstractmethod
def generate(self, prompt: str) -> str:
pass
@property
def streaming_callback(self) -> Optional[Callable[[StreamingChunk], None]]:
return self._streaming_callback
@streaming_callback.setter
def streaming_callback(self, value: callable(StreamingChunk)) -> None:
self._streaming_callback = value
def _default_streaming_callback_func(self, chunk: StreamingChunk):
# This is a callback function that is used to stream the output of the generator.
# If you are not using a streaming generator, you can ignore this method.
if self._streaming_callback is not None:
self._streaming_callback(chunk)
class HuggingFaceModel(StreamingGeneratorModel, ABC):
def __init__(self,
model_name: str = 'google/gemma-1.1-2b-it',
max_new_tokens: int = 500,
temperature: float = 0.6,
password: Optional[str] = None,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
verbose: bool = False) -> None:
"""
Initialize the HuggingFaceModel instance.
Args:
model_name (str, optional): Name of the Hugging Face model to use. Defaults to 'google/gemma-1.1-2b-it'.
max_new_tokens (int, optional): Maximum number of new tokens to generate. Defaults to 500.
temperature (float, optional): Temperature for text generation. Defaults to 0.6.
password (Optional[str], optional): Password for Hugging Face authentication. Defaults to None.
verbose (bool, optional): Whether to print verbose output. Defaults to False.
"""
super().__init__(verbose, streaming_callback)
if self._verbose:
print("Warming up Hugging Face Large Language Model: " + model_name)
self._max_new_tokens: int = max_new_tokens
self._temperature: float = temperature
self._model_name: str = model_name
if password is not None:
hf_hub.login(password, add_to_git_credential=False)
@property
def max_new_tokens(self) -> int:
return self._max_new_tokens
@property
def temperature(self) -> float:
return self._temperature
@property
def context_length(self) -> Optional[int]:
try:
config: AutoConfig = AutoConfig.from_pretrained(self._model_name)
except Exception as e:
print(f"An error occurred: {e}")
return None
context_length: Optional[int] = getattr(config, 'max_position_embeddings', None)
if context_length is None:
context_length = getattr(config, 'n_positions', None)
if context_length is None:
context_length = getattr(config, 'max_sequence_length', None)
return context_length
@property
def embedding_dimensions(self) -> Optional[int]:
# TODO: Need to test if this really gives us the embedder dims.
# Does NOT work correctly for SentenceTransformersTextEmbedder. There should be a better approach.
try:
config: AutoConfig = AutoConfig.from_pretrained(self._model_name)
except Exception as e:
print(f"An error occurred: {e}")
return None
embedding_dims: Optional[int] = getattr(config, 'hidden_size', None)
return embedding_dims
def generate(self, prompt: str) -> str:
# start_time = time.time()
# result = self._model.run(prompt)
# end_time = time.time()
# if self._verbose:
# print(f"Generation took {end_time - start_time} seconds")
return self._model.run(prompt)
class HuggingFaceLocalModel(HuggingFaceModel):
"""
A class that represents a Hugging Face Large Language Model (LLM) generator.
"""
def __init__(self,
model_name: str = 'google/gemma-1.1-2b-it',
max_new_tokens: int = 500,
temperature: float = 0.6,
password: Optional[str] = None,
task: str = "text-generation",
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
verbose: bool = True) -> None:
"""
Initialize the HuggingFaceLocalModel instance.
Args:
model_name (str, optional): Name of the language model to use. Defaults to 'google/gemma-1.1-2b-it'.
max_new_tokens (int, optional): Maximum number of new tokens to generate. Defaults to 500.
temperature (float, optional): Temperature for text generation. Defaults to 0.6.
password (Optional[str], optional): Password for Hugging Face authentication. Defaults to None.
task (str, optional): The task to perform using the language model. Defaults to "text-generation".
verbose (bool, optional): Whether to print verbose output. Defaults to True.
"""
super().__init__(verbose=verbose, model_name=model_name, max_new_tokens=max_new_tokens,
temperature=temperature, password=password, streaming_callback=streaming_callback)
# Local model related variables
self._task: str = task
self._has_cuda: bool = torch.cuda.is_available()
self._torch_device: torch.device = torch.device("cuda" if self._has_cuda else "cpu")
self._component_device: ComponentDevice = ComponentDevice(Device.gpu() if self._has_cuda else Device.cpu())
self._warmed_up: bool = False
self._model: HuggingFaceLocalGenerator = HuggingFaceLocalGenerator(
model=self._model_name,
task="text-generation",
device=self._component_device,
streaming_callback=self._default_streaming_callback_func,
generation_kwargs={
"max_new_tokens": self._max_new_tokens,
"temperature": self._temperature,
"do_sample": True,
})
def warm_up(self) -> None:
if not self._warmed_up:
self._model.warm_up()
self._warmed_up = True
def language_model(self) -> object:
return self._model.pipeline.model
class HuggingFaceAPIModel(HuggingFaceModel):
"""
A class that represents a Hugging Face Large Language Model (LLM) generator.
"""
def __init__(self,
model_name: str = 'google/gemma-1.1-2b-it',
max_new_tokens: int = 500,
password: Optional[str] = None,
temperature: float = 0.6,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
verbose: bool = False) -> None:
"""
Initialize the GeneratorModel instance.
Args:
model_name (str): Name of the language model to use.
"""
super().__init__(verbose=verbose, model_name=model_name, max_new_tokens=max_new_tokens,
temperature=temperature, password=password, streaming_callback=streaming_callback)
self._max_new_tokens: int = max_new_tokens
self._temperature: float = temperature
self._model_name: str = model_name
self._model: HuggingFaceAPIGenerator = HuggingFaceAPIGenerator(
api_type="serverless_inference_api",
api_params={
"model": self._model_name,
},
token=Secret.from_token(password),
streaming_callback=self._default_streaming_callback_func,
generation_kwargs={
"max_new_tokens": self._max_new_tokens,
"temperature": self._temperature,
"do_sample": True,
})
def generate(self, prompt: str) -> str:
return self._model.run(prompt)
def language_model(self) -> None:
return None
class OllamaModel(StreamingGeneratorModel):
def __init__(self,
model_name: str = 'gemma2',
url="http://localhost:11434",
temperature: float = 0.6,
streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
verbose: bool = True) -> None:
super().__init__(verbose=verbose, streaming_callback=streaming_callback)
if self._verbose:
print("Warming up Ollama Large Language Model: " + model_name)
self._model: OllamaGenerator = OllamaGenerator(
model=model_name,
url=url,
streaming_callback=self._default_streaming_callback_func,
generation_kwargs={
"temperature": temperature,
# "num_gpu": 1, # Number of GPUs to use
# "num_ctx": 2048, # Reduce context window
# "num_batch": 512, # Reduce batch size
# "mirostat": 0, # Disable mirostat sampling
# "seed": 42, # Set a fixed seed for reproducibility
},
)
def generate(self, prompt: str) -> str:
return self._model.run(prompt)
@property
def context_length(self) -> Optional[int]:
return None
@property
def embedding_dimensions(self) -> Optional[int]:
return None
@property
def language_model(self) -> None:
return None
class LlamaCppModel(GeneratorModel):
def __init__(self,
model_link: str = 'https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q3_K_S.gguf', # noqa: E501
context_length: int = 2048,
max_tokens: int = 512,
temperature: float = 0.6,
verbose: bool = True) -> None:
super().__init__(verbose=verbose)
self._warmed_up: bool = False
self._model_link = model_link
# Take name of the model from the link. Everything after the last /
self._model_name = model_link.split("/")[-1]
self._context_length = context_length
self._max_tokens = max_tokens
self._temperature = temperature
if self._verbose:
print("Warming up LlamaCPP Large Language Model: " + self._model_name)
# Check if model is already downloaded and download if necessary
self._download_model()
self._model: LlamaCppGenerator = LlamaCppGenerator(
model=self._model_name,
n_ctx=self._context_length,
n_batch=512,
model_kwargs={"n_gpu_layers": -1},
generation_kwargs={"max_tokens": self._max_tokens, "temperature": self._temperature},
)
def generate(self, prompt: str) -> str:
return self._model.run(prompt)
@property
def context_length(self) -> Optional[int]:
return self._context_length
@property
def embedding_dimensions(self) -> Optional[int]:
return None
@property
def language_model(self) -> None:
return None
def warm_up(self) -> None:
if not self._warmed_up:
self._model.warm_up()
self._warmed_up = True
def _download_model(self):
# Checks if the file already exists before downloading
if not os.path.isfile(self._model_name):
urllib.request.urlretrieve(self._model_link, self._model_name)
print("Model file downloaded successfully: " + self._model_name)
else:
print("Model file already exists: " + self._model_name)
class GoogleGeminiModel(GeneratorModel):
"""
A class that represents a Google AI Large Language Model (LLM) generator.
"""
def __init__(self, password: Optional[str] = None, verbose: bool = False) -> None:
"""
Initialize the GoogleGeminiModel instance.
Args:
password (Optional[str], optional): Password for Google AI authentication. Defaults to None.
"""
super().__init__(verbose=verbose)
if self._verbose:
print("Warming up Gemini Large Language Model")
self._model: GoogleAIGeminiGenerator = GoogleAIGeminiGenerator(
model="gemini-pro",
api_key=Secret.from_token(password)
)
def generate(self, prompt: str) -> str:
return self._model.run(prompt)
@property
def context_length(self) -> Optional[int]:
return None
@property
def embedding_dimensions(self) -> Optional[int]:
return None
@property
def language_model(self) -> None:
return None