import llama_cpp
1model_id = "NousResearch/Hermes-3-Llama-3.1-8B"
2llm = llama_cpp.Llama(
"/big_storage/llms/models/Hermes-3-Llama-3.1-8B.Q6_K.gguf",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
model_id
),
n_gpu_layers=-1,
flash_attn=True,
n_ctx=8192,
verbose=False,
chat_format="chatml-function-calling"
)
- 1
- Hermes 3 by NousResearch
- 2
- llama-cpp-python python bindings for llama.cpp