Structured Output
Structured Output for Data Generation with LLMs
from typing import Dict, List
from datasets import Dataset
from pydantic import BaseModel, Field
from bespokelabs import curator
# Define our structured output models
class Poem(BaseModel):
poem: str = Field(description="A poem.")
class Poems(BaseModel):
poems: List[Poem] = Field(description="A list of poems.")
# Create a custom LLM class with specialized prompting and parsing
class Poet(curator.LLM):
response_format = Poems
def prompt(self, input: Dict) -> str:
return f"Write two poems about {input['topic']}."
def parse(self, input: Dict, response: Poems) -> Dict:
return [{"topic": input["topic"], "poem": p.poem} for p in response.poems]
# Initialize our custom LLM
poet = Poet(model_name="gpt-4o-mini")
# Create a dataset of topics
topics = Dataset.from_dict({
"topic": [
"Urban loneliness in a bustling city",
"Beauty of Bespoke Labs's Curator library"
]
})
# Generate poems
poem = poet(topics)
print(poem.dataset.to_pandas())
# Output:
# topic poem
# 0 Urban loneliness in a bustling city In the city's heart, where the lights never di...
# 1 Urban loneliness in a bustling city Steps echo loudly, pavement slick with rain,\n...
# 2 Beauty of Bespoke Labs's Curator library In the heart of Curation's realm,\nWhere art...
# 3 Beauty of Bespoke Labs's Curator library Step within the library's embrace,\nA sanctu...How This Works:
Chaining LLM calls with structured output
Last updated