Show the code
import torch
from typing import List
from threading import Thread
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from transformers.generation import LogitsProcessor
model_id = "Qwen/Qwen3-0.6B"
# model = AutoModelForCausalLM.from_pretrained(
# model_id, cache_dir="/big_storage/llms/hf_models/"
# ).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(model_id)
# streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)