Handling Multimodal Data in Curator
You can easily run multimodal synthetic data generation using curator. This guide shows you how to define and run multimodal synthetic data generation using curator in three easy steps.
Step 1: Creating a multimodal dataset
from datasets import Dataset
ingredients = [
{"spice_level": ingredient[0], "image_url": ingredient[1]}
for ingredient in [
("very spicy", "https://cdn.tasteatlas.com//images/ingredients/fcee541cd2354ed8b68b50d1aa1acad8.jpeg"),
("not so spicy", "https://cdn.tasteatlas.com//images/dishes/da5fd425608f48b09555f5257a8d3a86.jpg"),
]
]
ingredients = Dataset.from_list(ingredients)
Step 2: Writing the Curator Block
from bespokelabs import curator
class RecipeGenerator(curator.LLM):
"""A recipe generator that generates recipes for different ingredient images."""
def prompt(self, input: dict) -> str:
"""Generate a prompt using the ingredients."""
prompt = f"Create me a {input['spice_level']} recipe from the ingredients image."
return prompt, curator.types.Image(url=input["image_url"])
def parse(self, input: dict, response: str) -> dict:
"""Parse the model response along with the input to the model into the desired output format.."""
return {
"recipe": response,
}
Step 3: Running the curator block using gpt-4o-mini on the ingredients dataset
recipe_generator = RecipeGenerator(
model_name="gpt-4o-mini",
backend="openai",
)
# Generate recipes for all ingredients
recipes = recipe_generator(ingredients)
# Print results
print(recipes.dataset.to_pandas())
Last updated