| First name | Last name | |
|---|---|---|
| John | Smith | sales@example.com |
| Jane | Doe | support@example.com |
Slides:
https://alonsosilvaallende.github.io/2025-PyCon-Chile
This presentation:
https://github.com/alonsosilvaallende/2025-PyCon-Chile/blob/main/PyConChile.ipynb
Unstructured text:
“My name is John Smith and you can contact me at sales@example.com and she is Jane Doe and can be contacted at support@example.com”
→ Structured data:
| First name | Last name | |
|---|---|---|
| John | Smith | sales@example.com |
| Jane | Doe | support@example.com |
Unstructured text → Structured data
| Department | |
|---|---|
| I would like to have more information related to the new product. | Sales |
| I cannot exit Vim in my computer. Could you help me with that? | IT |
| Are there any openings at your company? | HR |
Unstructured text:
“Alice loves Bob but she hates Charles”
→ Structured data:
Unstructured text:
“What’s the temperature in San Francisco now? How about tomorrow?”
→ Structured data:
| Tool | Tool arguments |
|---|---|
| get_current_temperature | {'location': 'San Francisco'} |
| get_temperature_by_date | {'location': 'San Francisco', 'date': '2025-11-10'} |
The three approaches—modifying weights, improving prompts, and constraining generation—are complementary and should NOT be viewed as mutually exclusive.
Installation:
Documentation: https://tinyurl.com/litelines
Getting Started colab notebooks:
https://tinyurl.com/litelines-hf
License: Apache-2.0
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
MODEL_ID = "Qwen/Qwen3-1.7B" # https://huggingface.co/Qwen/Qwen3-1.7B
device = torch.device("cuda") # "cuda", "cpu" or "mps"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID).to(device)user_input = "Hello"
messages = [{"role": "user", "content": user_input}]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
enable_thinking=False,
tools=[],
return_tensors="pt",
return_dict=True,
).to(model.device)
prompt_length = inputs["input_ids"].shape[-1]
generated = model.generate(
**inputs, temperature=0.1, logits_processor=[],
max_new_tokens=100
)
tokenizer.decode(generated[0][prompt_length:-1])'Hello! How can I assist you today? 😊'
user_input = "Hello"
messages = [{"role": "user", "content": user_input}]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
enable_thinking=False,
tools=[],
return_tensors="pt",
return_dict=True,
).to(model.device)
prompt_length = inputs["input_ids"].shape[-1]
generated = model.generate(
**inputs, temperature=0.1, logits_processor=[],
max_new_tokens=100
)
tokenizer.decode(generated[0][prompt_length:-1])def generate_response(user_input, tools=[],
logits_processor=[], enable_thinking=False):
messages = [{"role": "user", "content": user_input}]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
enable_thinking=False,
tools=tools,
return_tensors="pt",
return_dict=True,
).to(model.device)
prompt_length = inputs["input_ids"].shape[-1]
generated = model.generate(
**inputs, temperature=0.1, logits_processor=logits_processor,
max_new_tokens=100
)
return tokenizer.decode(generated[0][prompt_length:-1])from transformers import TextIteratorStreamer
from threading import Thread
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
def generate_streamed_response(user_input,
tools=[],
logits_processor=[], enable_thinking=False):
messages = [{"role": "user", "content": user_input}]
inputs = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
enable_thinking=False,
tools=tools,
return_tensors="pt",
return_dict=True,
).to(model.device)
generation_kwargs = dict(
inputs,
streamer=streamer,
logits_processor=logits_processor,
max_new_tokens=100,
temperature=0.1
)
thread = Thread(target=model.generate, kwargs=generation_kwargs)
thread.start()
assistant_response = ""
for chunk in streamer:
clean_chunk = chunk.split("<|im_end|>")[0]
assistant_response += clean_chunk
print(clean_chunk, end="")
thread.join()
return assistant_response'B.'
'No.'
https://molab.marimo.io/notebooks/nb_sbGwmqQdNC5NPQjg7Eu8qi
user_input = "Extract Jason is 25 years old"
from pydantic import BaseModel, Field
class Person(BaseModel):
name: str = Field(..., description="The person's name")
age: int = Field(..., description="The person's age in years")
from openai import pydantic_function_tool
tool = pydantic_function_tool(Person)
assistant_response = generate_response(user_input, tools=[tool])
print(assistant_response)<tool_call>
{"name": "Person", "arguments": {"name": "Jason", "age": 25}}
</tool_call>
user_input = "What's the sentiment of the text: That's awesome!"
from typing import Literal
class Sentiment(BaseModel):
label: Literal["Positive", "Negative"] = Field(
..., description="The sentiment conveyed by the text"
)
tool = pydantic_function_tool(Sentiment)
assistant_response = generate_response(
user_input,
tools=[tool],
)
print(assistant_response)<tool_call>
{"name": "Sentiment", "arguments": {"label": "Positive"}}
</tool_call>
user_input = "That's awesome!"
from typing import Literal
class Sentiment(BaseModel):
label: Literal["Positive", "Negative"] = Field(
..., description="The sentiment conveyed by the text"
)
tool = pydantic_function_tool(Sentiment)
assistant_response = generate_response(
user_input,
tools=[tool],
)
print(assistant_response)The sentiment of the text "That's awesome!" is Positive.
user_input = "That's awesome!"
class Sentiment(BaseModel):
label: Literal["Positive", "Negative"] = Field(
..., description="The sentiment conveyed by the text"
)
tool = pydantic_function_tool(Sentiment)
processor = SchemaProcessor(
response_format=Sentiment, tokenizer=tokenizer, include_tool_call=True
)
assistant_response = generate_response(
user_input,
tools=[tool],
logits_processor=[processor]
)
print(assistant_response)<tool_call>
{"name": "Sentiment", "arguments": {"label": "Positive"}}
</tool_call>
user_input = "Extract Jason is 25 years old"
class Person(BaseModel):
name: str = Field(..., description="The person's name")
age: int = Field(..., description="The person's age in years")
tool = pydantic_function_tool(Person)
processor = SchemaProcessor(
response_format=Person, tokenizer=tokenizer, include_tool_call=True
)
assistant_response = generate_response(
user_input,
tools=[tool],
logits_processor=[processor]
)
print(assistant_response)<tool_call>
{"name": "Person", "arguments": {"name": "Jason", "age": 25}}
</tool_call>