跳转到主要内容
使用 DALL-E 生成图像、创建变体、进行编辑,并使用视觉模型分析图像——全部通过一个统一的 Client 完成。
对于图像生成,使用 openai/dall-e-3 可获得最佳质量。对于视觉任务,openai/gpt-5.2 具有出色的性能。

进阶示例:在工作流中加入图像

如果你已经构建了一个基于文本的智能体(Chat → Tools → MCP → 流式传输),图像通常是你接下来会添加的能力:
  1. 从一个 prompt 生成 一张图像
  2. 编辑 / 生成变体 现有图像
  3. 使用视觉模型 分析 图像
下文将从最简单的调用(生成)开始,然后逐步加入编辑和视觉相关能力。

图像生成

使用 DALL-E 模型从文本 prompt 生成图像。
import asyncio
from dedalus_labs import AsyncDedalus
from dotenv import load_dotenv

load_dotenv()

async def generate_image():
    """Generate image from text."""
    client = AsyncDedalus()
    response = await client.images.generate(
        prompt="Dedalus flying through clouds",
        model="openai/dall-e-3",
    )
    print(response.data[0].url)

if __name__ == "__main__":
    asyncio.run(generate_image())

图像编辑

通过提供源图像、蒙版以及描述期望修改内容的 prompt,可以编辑现有图像。
import asyncio
import httpx
from dedalus_labs import AsyncDedalus
from dotenv import load_dotenv

load_dotenv()

async def edit_image():
    """Edit image (using generated image as both source and mask)."""

    client = AsyncDedalus()

    # 生成测试图像(DALL·E 的输出是有效的 RGBA PNG)
    gen_response = await client.images.generate(
        prompt="A white cat on a cushion",
        model="openai/dall-e-2",
        size="512x512",
    )

    # 下载生成的图像
    async with httpx.AsyncClient() as http:
        img_data = await http.get(gen_response.data[0].url)
        img_bytes = img_data.content

    # 使用同一张图像作为源图和蒙版(仅用于测试该端点是否正常工作)
    response = await client.images.edit(
        image=img_bytes,
        mask=img_bytes,
        prompt="A white cat with sunglasses",
        model="openai/dall-e-2",
    )
    print(response.data[0].url)

if __name__ == "__main__":
    asyncio.run(edit_image())

图像变体

为现有图像生成多个变体。
import asyncio
from pathlib import Path
from dedalus_labs import AsyncDedalus
from dotenv import load_dotenv

load_dotenv()

async def create_variations():
    """Create image variations."""
    client = AsyncDedalus()

    image_path = Path("image.png")
    if not image_path.exists():
        print("Skipped: image.png not found")
        return

    response = await client.images.create_variation(
        image=image_path.read_bytes(),
        model="openai/dall-e-2",
        n=2,
    )
    for img in response.data:
        print(img.url)

if __name__ == "__main__":
    asyncio.run(create_variations())

视觉:分析 URL 图片

使用视觉模型来分析并描述来自 URL 的图片。
import asyncio
from dedalus_labs import AsyncDedalus
from dotenv import load_dotenv

load_dotenv()

async def vision_url():
    """Analyze image from URL."""
    client = AsyncDedalus()
    completion = await client.chat.completions.create(
        model="openai/gpt-5.2",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What's in this image?"},
                    {
                        "type": "image_url",
                        "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"},
                    },
                ],
            }
        ],
    )
    print(completion.choices[0].message.content)

if __name__ == "__main__":
    asyncio.run(vision_url())

视觉:使用 Base64 分析本地图片

通过将本地图片编码为 base64 来进行分析。
import asyncio
import base64
from pathlib import Path
from dedalus_labs import AsyncDedalus
from dotenv import load_dotenv

load_dotenv()

async def vision_base64():
    """Analyze local image via base64."""
    client = AsyncDedalus()

    image_path = Path("image.png")
    if not image_path.exists():
        print("Skipped: image.png not found")
        return

    b64 = base64.b64encode(image_path.read_bytes()).decode()
    completion = await client.chat.completions.create(
        model="openai/gpt-5.2",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Describe this image."},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},
                ],
            }
        ],
    )
    print(completion.choices[0].message.content)

if __name__ == "__main__":
    asyncio.run(vision_base64())

高级:使用 DedalusRunner 编排图像

使用 DedalusRunner 将生成、编辑和视觉能力组合起来,构建复杂的图像处理工作流。
Python
import asyncio
import httpx
from dedalus_labs import AsyncDedalus, DedalusRunner
from dotenv import load_dotenv

load_dotenv()

class ImageToolSuite:
    """Helper that exposes image endpoints as DedalusRunner tools."""

    def __init__(self, client: AsyncDedalus):
        self._client = client

    async def generate_concept_art(
        self,
        prompt: str,
        model: str = "openai/dall-e-3",
        size: str = "1024x1024",
    ) -> str:
        """Create concept art and return the hosted image URL."""
        response = await self._client.images.generate(
            prompt=prompt,
            model=model,
            size=size,
        )
        return response.data[0].url

    async def edit_concept_art(
        self,
        prompt: str,
        reference_url: str,
        mask_url: str | None = None,
        model: str = "openai/dall-e-2",
    ) -> str:
        """Apply edits to the referenced image URL and return a new URL."""

        if not reference_url:
            raise ValueError("reference_url must be provided when editing an image.")

        async with httpx.AsyncClient() as http:
            base_image = await http.get(reference_url)
            mask_bytes = await http.get(mask_url) if mask_url else None

        edit_kwargs = {
            "image": base_image.content,
            "prompt": prompt,
            "model": model,
        }
        if mask_bytes:
            edit_kwargs["mask"] = mask_bytes.content

        response = await self._client.images.edit(**edit_kwargs)
        return response.data[0].url

    async def describe_image(
        self,
        image_url: str,
        question: str = "Describe this image.",
        model: str = "openai/gpt-5.2",
    ) -> str:
        """Run a lightweight vision pass against an existing image URL."""
        completion = await self._client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": question},
                        {"type": "image_url", "image_url": {"url": image_url}},
                    ],
                }
            ],
        )
        return completion.choices[0].message.content

async def runner_storyboard():
    """Demonstrate DedalusRunner + agent-as-tool pattern for image workflows."""

    client = AsyncDedalus()
    runner = DedalusRunner(client, verbose=True)
    image_tools = ImageToolSuite(client)

    instructions = (
        "你是一位创意总监。使用提供的工具生成概念艺术图,"
        "可选择对其进行优化,然后描述最终渲染结果。始终在文本模型上保持"
        "主要对话,并依赖工具进行图像工作。"
    )

    result = await runner.run(
        instructions=instructions,
        input="Create a retro Dedalus mission patch, refine it with a neon palette, and describe it.",
        model="openai/gpt-5.2",
        tools=[
            image_tools.generate_concept_art,
            image_tools.edit_concept_art,
            image_tools.describe_image,
        ],
        max_steps=4,
        verbose=True,
        debug=False,
    )

    print("Runner final output:", result.final_output)
    print("Tools invoked:", result.tools_called)

if __name__ == "__main__":
    asyncio.run(runner_storyboard())

后续步骤

通过模型上下文协议(MCP)将这些文档以编程方式连接到 Claude、VSCode 等,实现实时解答。