-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy path07_image_agent.py
More file actions
90 lines (69 loc) · 3 KB
/
07_image_agent.py
File metadata and controls
90 lines (69 loc) · 3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
This example demonstrates how to use images with WorkflowAI agents. It shows how to:
1. Pass image inputs to an agent
2. Analyze city photos for identification
3. Structure detailed visual analysis results
"""
import asyncio
import os
from typing import Optional
from dotenv import load_dotenv
from pydantic import BaseModel, Field # pyright: ignore [reportUnknownVariableType]
import workflowai
from workflowai import WorkflowAIError
from workflowai.core.domain.model import Model
from workflowai.fields import Image
class ImageInput(BaseModel):
image: Image = Field(description="The image to analyze")
class ImageOutput(BaseModel):
city: str = Field(default="", description="Name of the city shown in the image")
country: str = Field(default="", description="Name of the country where the city is located")
confidence: Optional[float] = Field(
default=None,
description="Confidence level in the identification (0-1)",
)
@workflowai.agent(id="city-identifier", model=Model.GEMINI_2_0_FLASH_LATEST)
async def identify_city_from_image(image_input: ImageInput) -> ImageOutput:
"""
Analyze the provided image and identify the city and country shown in it.
If the image shows a recognizable landmark or cityscape, identify the city and country.
If uncertain, indicate lower confidence or leave fields empty.
Focus on:
- Famous landmarks
- Distinctive architecture
- Recognizable skylines
- Cultural elements that identify the location
Return empty strings if the city/country cannot be determined with reasonable confidence.
"""
...
async def main():
current_dir = os.path.dirname(os.path.abspath(__file__))
image_path = os.path.join(current_dir, "assets", "new-york-city.jpg")
# With a properly async function you should use an async open
# see https://github.com/Tinche/aiofiles for example
with open(image_path, "rb") as image_file: # noqa: ASYNC230
import base64
content = base64.b64encode(image_file.read()).decode("utf-8")
image = Image(content_type="image/jpeg", data=content)
try:
agent_run = await identify_city_from_image.run(
ImageInput(image=image),
)
except WorkflowAIError as e:
print(f"Failed to run task. Code: {e.error.code}. Message: {e.error.message}")
return
print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n")
print(f"Cost: ${agent_run.cost_usd:.10f}")
print(f"Latency: {agent_run.duration_seconds:.2f}s")
# Example using URL for Image
image_url = "https://workflowai.blob.core.windows.net/workflowai-public/fixtures/paris.jpg"
image = Image(url=image_url)
agent_run = await identify_city_from_image.run(
ImageInput(image=image),
)
print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n")
print(f"Cost: ${agent_run.cost_usd:.10f}")
print(f"Latency: {agent_run.duration_seconds:.2f}s")
if __name__ == "__main__":
load_dotenv(override=True)
asyncio.run(main())