WorkflowAI
diff --git a/‎.env.sample‎
Lines changed: 5 additions & 1 deletion b/‎.env.sample‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 60 additions & 1 deletion b/‎README.md‎
Lines changed: 60 additions & 1 deletion
diff --git a/‎examples/city_to_capital_task.py‎
Lines changed: 0 additions & 39 deletions b/‎examples/city_to_capital_task.py‎
Lines changed: 0 additions & 39 deletions
diff --git a/‎examples/images/assets/new-york-city.jpg‎
553 KB b/‎examples/images/assets/new-york-city.jpg‎
553 KB
diff --git a/‎examples/images/assets/paris.jpg‎
73.5 KB b/‎examples/images/assets/paris.jpg‎
73.5 KB
diff --git a/‎examples/images/city_identifier.py‎
Lines changed: 78 additions & 0 deletions b/‎examples/images/city_identifier.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎examples/pdf_answer.py‎
Lines changed: 77 additions & 0 deletions b/‎examples/pdf_answer.py‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎examples/pdfs/sec-form-4.pdf‎
101 KB b/‎examples/pdfs/sec-form-4.pdf‎
101 KB
@@ -1,4 +1,8 @@
-WORKFLOWAI_API_URL=
+# Only change this URL if you are self-hosting WorkflowAI
+WORKFLOWAI_API_URL=https://run.workflowai.com
+
+# Your WorkflowAI API key
+# [Get your API key here](https://workflowai.com/organization/settings/api-keys)
 WORKFLOWAI_API_KEY=
 
 # Used when running e2e tests
 
@@ -1,6 +1,6 @@
 # WorkflowAI Python
 
-A library to use WorkflowAI with Python
+A library to use [WorkflowAI](https://workflowai.com) with Python
 
 ## Context
 
@@ -172,6 +172,65 @@ def say_hello(input: Input) -> AsyncIterator[Run[Output]]:
     ...
 ```
 
+### Images
+
+Add images as input to an agent by using the `Image` class. The `content` should be a base64 encoded string.
+
+```python
+from workflowai.fields import Image
+
+class ImageInput(BaseModel):
+    image: Image = Field(description="The image to analyze")
+
+# use base64 to include the image inline
+image = Image(content_type='image/jpeg', data='<base 64 encoded data>')
+
+# You can also use the `url` property to pass an image URL.
+image = Image(url="https://example.com/image.jpg")
+```
+
+An example of using image as input is available in [city_identifier.py](./examples/images/city_identifier.py).
+
+### Files (PDF, .txt, ...)
+
+Use the `File` class to pass files as input to an agent. Different LLMs support different file types.
+
+```python
+from workflowai.fields import File
+...
+
+class PDFQuestionInput(BaseModel):
+    pdf: File = Field(description="The PDF document to analyze")
+    question: str = Field(description="The question to answer about the PDF content")
+
+class PDFAnswerOutput(BaseModel):
+    answer: str = Field(description="The answer to the question based on the PDF content")
+    quotes: List[str] = Field(description="Relevant quotes from the PDF that support the answer")
+
+@workflowai.agent(id="pdf-answer", model=Model.CLAUDE_3_5_SONNET_LATEST)
+async def answer_pdf_question(input: PDFQuestionInput) -> PDFAnswerOutput:
+    """
+    Analyze the provided PDF document and answer the given question.
+    Provide a clear and concise answer based on the content found in the PDF.
+    """
+    ...
+
+pdf = File(content_type='application/pdf', data='<base 64 encoded data>')
+question = "What are the key findings in this report?"
+
+output = await answer_pdf_question(PDFQuestionInput(pdf=pdf, question=question))
+# Print the answer and supporting quotes
+print("Answer:", output.answer)
+print("\nSupporting quotes:")
+for quote in output.quotes:
+    print(f"- {quote}")
+```
+An example of using a PDF as input is available in [pdf_answer.py](./examples/pdf_answer.py).
+
+### Audio
+
+[todo]
+
 ### Tools
 
 Tools allow enhancing an agent's capabilities by allowing it to call external functions.
 
@@ -0,0 +1,78 @@
+from pydantic import BaseModel, Field
+from typing import Optional
+import asyncio
+import workflowai
+from workflowai.core.domain.model import Model
+from workflowai.fields import Image
+import os
+from dotenv import load_dotenv
+from workflowai import Run, WorkflowAIError
+
+# Load environment variables from .env file
+load_dotenv()
+
+class ImageInput(BaseModel):
+    image: Image = Field(description="The image to analyze")
+
+class ImageOutput(BaseModel):
+    city: str = Field(default="", description="Name of the city shown in the image")
+    country: str = Field(default="", description="Name of the country where the city is located")
+    confidence: Optional[float] = Field(
+        default=None, 
+        description="Confidence level in the identification (0-1)"
+    )
+
+@workflowai.agent(id="city-identifier", model=Model.GEMINI_1_5_FLASH_LATEST)
+async def identify_city_from_image(input: ImageInput) -> Run[ImageOutput]:
+    """
+    Analyze the provided image and identify the city and country shown in it.
+    If the image shows a recognizable landmark or cityscape, identify the city and country.
+    If uncertain, indicate lower confidence or leave fields empty.
+    
+    Focus on:
+    - Famous landmarks
+    - Distinctive architecture
+    - Recognizable skylines
+    - Cultural elements that identify the location
+    
+    Return empty strings if the city/country cannot be determined with reasonable confidence.
+    """
+    ...
+
+async def run_city_identifier():
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    image_path = os.path.join(current_dir, "assets", "new-york-city.jpg")
+    
+    with open(image_path, "rb") as image_file:
+        import base64
+        content = base64.b64encode(image_file.read()).decode("utf-8")
+    
+    image = Image(content_type='image/jpeg', data=content)
+    try:
+        agent_run = await identify_city_from_image(
+            ImageInput(image=image),
+            use_cache="auto"
+        )
+    except WorkflowAIError as e:
+        print(f"Failed to run task. Code: {e.error.code}. Message: {e.error.message}")
+        return
+
+    print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n")
+    print(f"Cost: ${agent_run.cost_usd:.10f}")
+    print(f"Latency: {agent_run.duration_seconds:.2f}s")
+
+    # using URL for Image
+    # TODO: replace with a Github URL
+    image_url = "https://t4.ftcdn.net/jpg/02/96/15/35/360_F_296153501_B34baBHDkFXbl5RmzxpiOumF4LHGCvAE.jpg"
+    image = Image(url=image_url)
+    agent_run = await identify_city_from_image(
+        ImageInput(image=image),
+        use_cache="auto"
+    )
+
+    print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n")
+    print(f"Cost: ${agent_run.cost_usd:.10f}")
+    print(f"Latency: {agent_run.duration_seconds:.2f}s")
+
+if __name__ == "__main__":
+    asyncio.run(run_city_identifier()) 
@@ -0,0 +1,77 @@
+from pydantic import BaseModel, Field
+import asyncio
+import workflowai
+from workflowai.core.domain.model import Model
+from workflowai.fields import File
+import os
+from dotenv import load_dotenv
+from workflowai import Run, WorkflowAIError
+from typing import List
+
+# Load environment variables from .env file
+load_dotenv()
+
+class PDFQuestionInput(BaseModel):
+    pdf: File = Field(description="The PDF document to analyze")
+    question: str = Field(description="The question to answer about the PDF content")
+
+class PDFAnswerOutput(BaseModel):
+    answer: str = Field(description="The answer to the question based on the PDF content")
+    quotes: List[str] = Field(description="Relevant quotes from the PDF that support the answer")
+
+@workflowai.agent(id="pdf-answer", model=Model.CLAUDE_3_5_SONNET_LATEST)
+async def answer_pdf_question(input: PDFQuestionInput) -> Run[PDFAnswerOutput]:
+    """
+    Analyze the provided PDF document and answer the given question.
+    Provide a clear and concise answer based on the content found in the PDF.
+    
+    Focus on:
+    - Accurate information extraction from the PDF
+    - Direct and relevant answers to the question
+    - Context-aware responses that consider the full document
+    - Citing specific sections or pages when relevant
+    
+    If the question cannot be answered based on the PDF content,
+    provide a clear explanation of why the information is not available.
+    """
+    ...
+
+async def run_pdf_answer():
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    pdf_path = os.path.join(current_dir, "pdfs", "sec-form-4.pdf")
+    
+    with open(pdf_path, "rb") as pdf_file:
+        import base64
+        content = base64.b64encode(pdf_file.read()).decode("utf-8")
+    
+    pdf = File(content_type='application/pdf', data=content)
+    question = "How many stocks were sold? What is the total amount in USD?"
+    
+    try:
+        agent_run = await answer_pdf_question(
+            PDFQuestionInput(pdf=pdf, question=question),
+            use_cache="auto"
+        )
+    except WorkflowAIError as e:
+        print(f"Failed to run task. Code: {e.error.code}. Message: {e.error.message}")
+        return
+
+    print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n")
+    print(f"Cost: ${agent_run.cost_usd:.10f}")
+    print(f"Latency: {agent_run.duration_seconds:.2f}s")
+
+    # # using URL for PDF
+    # pdf_url = "https://example.com/sample.pdf"
+    # pdf = File(url=pdf_url)
+    # question = "What are the key findings in the conclusion?"
+    # agent_run = await answer_pdf_question(
+    #     PDFQuestionInput(pdf=pdf, question=question),
+    #     use_cache="auto"
+    # )
+
+    # print("\n--------\nAgent output:\n", agent_run.output, "\n--------\n")
+    # print(f"Cost: ${agent_run.cost_usd:.10f}")
+    # print(f"Latency: {agent_run.duration_seconds:.2f}s")
+
+if __name__ == "__main__":
+    asyncio.run(run_pdf_answer())