Dedalus Docs / Home

使用 DALL-E 生成图像、创建变体、进行编辑，并使用视觉模型分析图像——全部通过一个统一的 Client 完成。

对于图像生成，使用 openai/dall-e-3 可获得最佳质量。对于视觉任务，openai/gpt-5.2 具有出色的性能。

## 进阶示例：在工作流中加入图像

如果你已经构建了一个基于文本的智能体（Chat → Tools → MCP → 流式传输），图像通常是你接下来会添加的能力：

从一个 prompt 生成一张图像
编辑 / 生成变体 现有图像
使用视觉模型分析图像

下文将从最简单的调用（生成）开始，然后逐步加入编辑和视觉相关能力。

图像生成

使用 DALL-E 模型从文本 prompt 生成图像。

import asyncio
from dedalus_labs import AsyncDedalus
from dotenv import load_dotenv

load_dotenv()

async def generate_image():
    """Generate image from text."""
    client = AsyncDedalus()
    response = await client.images.generate(
        prompt="Dedalus flying through clouds",
        model="openai/dall-e-3",
    )
    print(response.data[0].url)

if __name__ == "__main__":
    asyncio.run(generate_image())

Image Editing

通过提供源图像、蒙版以及描述期望修改内容的 prompt，可以编辑现有图像。

import asyncio
import httpx
from dedalus_labs import AsyncDedalus
from dotenv import load_dotenv

load_dotenv()

async def edit_image():
    """Edit image (using generated image as both source and mask)."""

    client = AsyncDedalus()

    # 生成测试图像（DALL·E 的输出是有效的 RGBA PNG）
    gen_response = await client.images.generate(
        prompt="A white cat on a cushion",
        model="openai/dall-e-2",
        size="512x512",
    )

    # 下载生成的图像
    async with httpx.AsyncClient() as http:
        img_data = await http.get(gen_response.data[0].url)
        img_bytes = img_data.content

    # 使用同一张图像作为源图和蒙版（仅用于测试该端点是否正常工作）
    response = await client.images.edit(
        image=img_bytes,
        mask=img_bytes,
        prompt="A white cat with sunglasses",
        model="openai/dall-e-2",
    )
    print(response.data[0].url)

if __name__ == "__main__":
    asyncio.run(edit_image())

图像变体

为现有图像生成多个变体。

import asyncio
from pathlib import Path
from dedalus_labs import AsyncDedalus
from dotenv import load_dotenv

load_dotenv()

async def create_variations():
    """Create image variations."""
    client = AsyncDedalus()

    image_path = Path("image.png")
    if not image_path.exists():
        print("Skipped: image.png not found")
        return

    response = await client.images.create_variation(
        image=image_path.read_bytes(),
        model="openai/dall-e-2",
        n=2,
    )
    for img in response.data:
        print(img.url)

if __name__ == "__main__":
    asyncio.run(create_variations())

视觉：分析 URL 图片

使用视觉模型来分析并描述来自 URL 的图片。

import asyncio
from dedalus_labs import AsyncDedalus
from dotenv import load_dotenv

load_dotenv()

async def vision_url():
    """Analyze image from URL."""
    client = AsyncDedalus()
    completion = await client.chat.completions.create(
        model="openai/gpt-5.2",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What's in this image?"},
                    {
                        "type": "image_url",
                        "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"},
                    },
                ],
            }
        ],
    )
    print(completion.choices[0].message.content)

if __name__ == "__main__":
    asyncio.run(vision_url())

视觉：使用 Base64 分析本地图片

通过将本地图片编码为 base64 来进行分析。

import asyncio
import base64
from pathlib import Path
from dedalus_labs import AsyncDedalus
from dotenv import load_dotenv

load_dotenv()

async def vision_base64():
    """Analyze local image via base64."""
    client = AsyncDedalus()

    image_path = Path("image.png")
    if not image_path.exists():
        print("Skipped: image.png not found")
        return

    b64 = base64.b64encode(image_path.read_bytes()).decode()
    completion = await client.chat.completions.create(
        model="openai/gpt-5.2",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "Describe this image."},
                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{b64}"}},
                ],
            }
        ],
    )
    print(completion.choices[0].message.content)

if __name__ == "__main__":
    asyncio.run(vision_base64())

## 高级：使用 DedalusRunner 编排图像

使用 DedalusRunner 将生成、编辑和视觉能力组合起来，构建复杂的图像处理工作流。

Python

import asyncio
import httpx
from dedalus_labs import AsyncDedalus, DedalusRunner
from dotenv import load_dotenv

load_dotenv()

class ImageToolSuite:
    """Helper that exposes image endpoints as DedalusRunner tools."""

    def __init__(self, client: AsyncDedalus):
        self._client = client

    async def generate_concept_art(
        self,
        prompt: str,
        model: str = "openai/dall-e-3",
        size: str = "1024x1024",
    ) -> str:
        """Create concept art and return the hosted image URL."""
        response = await self._client.images.generate(
            prompt=prompt,
            model=model,
            size=size,
        )
        return response.data[0].url

    async def edit_concept_art(
        self,
        prompt: str,
        reference_url: str,
        mask_url: str | None = None,
        model: str = "openai/dall-e-2",
    ) -> str:
        """Apply edits to the referenced image URL and return a new URL."""

        if not reference_url:
            raise ValueError("reference_url must be provided when editing an image.")

        async with httpx.AsyncClient() as http:
            base_image = await http.get(reference_url)
            mask_bytes = await http.get(mask_url) if mask_url else None

        edit_kwargs = {
            "image": base_image.content,
            "prompt": prompt,
            "model": model,
        }
        if mask_bytes:
            edit_kwargs["mask"] = mask_bytes.content

        response = await self._client.images.edit(**edit_kwargs)
        return response.data[0].url

    async def describe_image(
        self,
        image_url: str,
        question: str = "Describe this image.",
        model: str = "openai/gpt-5.2",
    ) -> str:
        """Run a lightweight vision pass against an existing image URL."""
        completion = await self._client.chat.completions.create(
            model=model,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": question},
                        {"type": "image_url", "image_url": {"url": image_url}},
                    ],
                }
            ],
        )
        return completion.choices[0].message.content

async def runner_storyboard():
    """Demonstrate DedalusRunner + agent-as-tool pattern for image workflows."""

    client = AsyncDedalus()
    runner = DedalusRunner(client, verbose=True)
    image_tools = ImageToolSuite(client)

    instructions = (
        "你是一位创意总监。使用提供的工具生成概念艺术图,"
        "可选择对其进行优化,然后描述最终渲染结果。始终在文本模型上保持"
        "主要对话,并依赖工具进行图像工作。"
    )

    result = await runner.run(
        instructions=instructions,
        input="Create a retro Dedalus mission patch, refine it with a neon palette, and describe it.",
        model="openai/gpt-5.2",
        tools=[
            image_tools.generate_concept_art,
            image_tools.edit_concept_art,
            image_tools.describe_image,
        ],
        max_steps=4,
        verbose=True,
        debug=False,
    )

    print("Runner final output:", result.final_output)
    print("Tools invoked:", result.tools_called)

if __name__ == "__main__":
    asyncio.run(runner_storyboard())

后续步骤

查看端到端智能体：使用场景 — 多模态模式示例
部署自己的 MCP 服务器：MCP 快速开始 — 为智能体自托管工具
构建聊天服务器：Cookbook：聊天服务器 — 在生产环境中部署并提供智能体服务

通过模型上下文协议（MCP）将这些文档以编程方式连接到 Claude、VSCode 等，实现实时解答。

快速开始

指南

特性

示例

使用场景

更新日志

图像与视觉

图像生成

Image Editing

图像变体

视觉：分析 URL 图片

视觉：使用 Base64 分析本地图片

后续步骤

快速开始

指南

特性

示例

使用场景

更新日志

​图像生成

​Image Editing

​图像变体

​视觉：分析 URL 图片

​视觉：使用 Base64 分析本地图片

​后续步骤

图像生成

Image Editing

图像变体

视觉：分析 URL 图片

视觉：使用 Base64 分析本地图片

后续步骤