Show the code
import torch
from typing import List
from threading import Thread
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from transformers.generation import LogitsProcessor
= "Qwen/Qwen3-0.6B"
model_id # model = AutoModelForCausalLM.from_pretrained(
# model_id, cache_dir="/big_storage/llms/hf_models/"
# ).to("cuda")
= AutoTokenizer.from_pretrained(model_id)
tokenizer # streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)