-
Notifications
You must be signed in to change notification settings - Fork 15
/
model_hf.rs
20 lines (17 loc) · 846 Bytes
/
model_hf.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
use gguf::GgufLoader;
use llm_models::local_model::*;
fn main() {
// Using
let _model = GgufLoader::default()
.hf_quant_file_url("https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/blob/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf")
.load()
.unwrap();
// By default we attempt to extract everything we need from the GGUF file.
// If you need to specifiy the tokenizer or chat template to use, you can add a hf repo to load from.
let _model = GgufLoader::default()
.hf_quant_file_url("https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/blob/main/Meta-Llama-3.1-8B-Instruct-Q8_0.gguf")
.hf_config_repo_id("meta-llama/Meta-Llama-3-8B-Instruct")
.load()
.unwrap();
// model.local_model_path can now be used to load the model into the inference engine.
}