Evaluation Feedback Loop

Have an agent generate output, then evaluate it with a second LLM call and loop until the quality meets your criteria. Restate persists each iteration, so if the process crashes, it resumes from the last completed evaluation without re-running earlier iterations.

Example: code generation with quality check

A generator agent writes code, then an evaluator agent checks it. If the evaluation fails, the generator retries with the feedback. Each iteration is a durable step.

workflow-evaluator-optimizer.ts

const generate = async (ctx: restate.Context, {task}: { task: string }) => {
  const model = wrapLanguageModel({
    model: openai("gpt-5.4"),
    middleware: durableCalls(ctx, { maxRetryAttempts: 3 }),
  });

  let feedback = "";
  const maxIterations = 3;

  for (let i = 0; i < maxIterations; i++) {
    // Step 1: Generate code
    const { text: code } = await generateText({
      model,
      system: "You are a code generator. Write clean, correct code.",
      prompt: feedback
        ? `Task: ${task}\n\nPrevious attempt was rejected:\n${feedback}\n\nPlease fix the issues.`
        : `Task: ${task}`,
    });

    // Step 2: Evaluate the code
    const { text: evaluation } = await generateText({
      model,
      system: `You are a code reviewer. Evaluate the code for correctness,
            readability, and edge cases. Respond with PASS if acceptable,
            or FAIL: <feedback> with specific issues to fix.`,
      prompt: `Task: ${task}\n\nCode:\n${code}`,
    });

    if (evaluation.startsWith("PASS")) {
      return { code, iterations: i + 1 };
    }

    feedback = evaluation;
  }

  return { code: "Max iterations reached", iterations: maxIterations };
};

const agent = restate.service({
  name: "CodeGenerator",
  handlers: {
    generate: restate.createServiceHandler(
      { input: schema(CodeGenRequestSchema) },
      generate,
    ),
  },
});

Run this example

Install Restate and launch it:

npm install --global @restatedev/restate-server@latest @restatedev/restate@latest
restate-server

Get the example:

restate example typescript-vercel-ai-tour-of-agents && cd typescript-vercel-ai-tour-of-agents
npm install

Export your OpenAI API key and run the agent:

export OPENAI_API_KEY=sk-...

npx tsx ./src/workflow-evaluator-optimizer.ts

restate deployments register http://localhost:9080 --force --yes # dev only: overrides previous registrations

Sends a request to the agent:

curl localhost:8080/restate/call/CodeGenerator/generate \
--json '{
    "task": "Write a TypeScript function that implements a retry mechanism with exponential backoff"
}'

workflow_evaluator_optimizer.py

generator = Agent(
    name="CodeGenerator",
    instructions="You are a code generator. Write clean, correct code.",
)

evaluator = Agent(
    name="CodeEvaluator",
    instructions="""You are a code reviewer. Evaluate the code for correctness,
    readability, and edge cases. Respond with PASS if acceptable,
    or FAIL: <feedback> with specific issues to fix.""",
)

code_service = restate.Service("CodeGenerator")


@code_service.handler()
async def generate(ctx: restate.Context, req: CodeRequest) -> dict:
    feedback = ""
    max_iterations = 3

    for i in range(max_iterations):
        # Step 1: Generate code
        prompt = (
            f"Task: {req.task}\n\nPrevious attempt was rejected:\n{feedback}\n\nPlease fix the issues."
            if feedback
            else f"Task: {req.task}"
        )
        gen_result = await DurableRunner.run(generator, prompt)
        code = gen_result.final_output

        # Step 2: Evaluate the code
        eval_result = await DurableRunner.run(
            evaluator, f"Task: {req.task}\n\nCode:\n{code}"
        )
        evaluation = eval_result.final_output

        if evaluation.startswith("PASS"):
            return {"code": code, "iterations": i + 1}

        feedback = evaluation

    return {"code": "Max iterations reached", "iterations": max_iterations}

Run this example

Install Restate and launch it:

restate-server

Get the example:

restate example python-openai-agents-tour-of-agents && cd python-openai-agents-tour-of-agents

Export your OpenAI API key and run the agent:

export OPENAI_API_KEY=sk-...

uv run app/workflow_evaluator_optimizer.py

restate deployments register http://localhost:9080 --force --yes # dev only: overrides previous registrations

Send a request:

curl localhost:8080/restate/call/CodeGenerator/generate \
  --json '{"task": "Write a function that checks if a string is a palindrome"}'

workflow_evaluator_optimizer.py

# AGENTS
generator = Agent(
    model="gemini-2.5-flash",
    name="code_generator",
    instruction="You are a code generator. Write clean, correct code.",
)
gen_app = App(name=APP_NAME, root_agent=generator, plugins=[RestatePlugin()])
gen_runner = Runner(app=gen_app, session_service=RestateSessionService())

evaluator = Agent(
    model="gemini-2.5-flash",
    name="code_evaluator",
    instruction="""You are a code reviewer. Evaluate the code for correctness,
    readability, and edge cases. Respond with PASS if acceptable,
    or FAIL: <feedback> with specific issues to fix.""",
)
eval_app = App(name=APP_NAME, root_agent=evaluator, plugins=[RestatePlugin()])
eval_runner = Runner(app=eval_app, session_service=RestateSessionService())

# AGENT SERVICE
code_service = restate.VirtualObject("CodeGenerator")


@code_service.handler()
async def generate(ctx: restate.ObjectContext, req: CodeRequest) -> dict:
    feedback = ""
    max_iterations = 3

    for i in range(max_iterations):
        # Step 1: Generate code
        prompt = (
            f"Task: {req.task}\n\nPrevious attempt was rejected:\n{feedback}\n\nPlease fix the issues."
            if feedback
            else f"Task: {req.task}"
        )
        events = gen_runner.run_async(
            user_id=ctx.key(),
            session_id=str(ctx.uuid()),
            new_message=Content(role="user", parts=[Part.from_text(text=prompt)]),
        )
        code = await parse_agent_response(events)

        # Step 2: Evaluate the code
        events = eval_runner.run_async(
            user_id=ctx.key(),
            session_id=str(ctx.uuid()),
            new_message=Content(
                role="user",
                parts=[Part.from_text(text=f"Task: {req.task}\n\nCode:\n{code}")],
            ),
        )
        evaluation = await parse_agent_response(events)
        if evaluation.startswith("PASS"):
            return {"code": code, "iterations": i + 1}
        feedback = evaluation

    return {"code": "Max iterations reached", "iterations": max_iterations}

Run this example

Install Restate and launch it:

restate-server

Get the example:

restate example python-google-adk-tour-of-agents && cd python-google-adk-tour-of-agents

Export your Google API key and run the agent:

export GOOGLE_API_KEY=your-api-key

uv run app/workflow_evaluator_optimizer.py

restate deployments register http://localhost:9080 --force --yes # dev only: overrides previous registrations

Send a request:

curl localhost:8080/restate/call/CodeGenerator/user123/generate \
  --json '{"task": "Write a function that checks if a string is a palindrome"}'

workflow_evaluator_optimizer.py

generator = Agent(
    "openai:gpt-5.4",
    system_prompt="You are a code generator. Write clean, correct code.",
)
restate_generator = RestateAgent(generator)

evaluator = Agent(
    "openai:gpt-5.4",
    system_prompt="""You are a code reviewer. Evaluate the code for correctness,
    readability, and edge cases. Respond with PASS if acceptable,
    or FAIL: <feedback> with specific issues to fix.""",
)
restate_evaluator = RestateAgent(evaluator)

code_service = restate.Service("CodeGenerator")


@code_service.handler()
async def generate(_ctx: restate.Context, req: CodeRequest) -> dict:
    feedback = ""
    max_iterations = 3

    for i in range(max_iterations):
        # Step 1: Generate code
        prompt = (
            f"Task: {req.task}\n\nPrevious attempt was rejected:\n{feedback}\n\nPlease fix the issues."
            if feedback
            else f"Task: {req.task}"
        )
        gen_result = await restate_generator.run(prompt)
        code = gen_result.output

        # Step 2: Evaluate the code
        eval_result = await restate_evaluator.run(f"Task: {req.task}\n\nCode:\n{code}")
        evaluation = eval_result.output

        if evaluation.startswith("PASS"):
            return {"code": code, "iterations": i + 1}

        feedback = evaluation

    return {"code": "Max iterations reached", "iterations": max_iterations}

Run this example

Install Restate and launch it:

restate-server

Get the example:

restate example python-pydantic-ai-tour-of-agents && cd python-pydantic-ai-tour-of-agents

Export your OpenAI API key and run the agent:

export OPENAI_API_KEY=sk-...

uv run app/workflow_evaluator_optimizer.py

restate deployments register http://localhost:9080 --force --yes # dev only: overrides previous registrations

Send a request:

curl localhost:8080/restate/call/CodeGenerator/generate \
  --json '{"task": "Write a function that checks if a string is a palindrome"}'

workflow_evaluator_optimizer.py

generator = create_agent(
    model=init_chat_model("openai:gpt-5.4"),
    system_prompt="You are a code generator. Write clean, correct code.",
    middleware=[RestateMiddleware()],
)

evaluator = create_agent(
    model=init_chat_model("openai:gpt-5.4"),
    system_prompt=(
        "You are a code reviewer. Evaluate the code for correctness, "
        "readability, and edge cases. Respond with PASS if acceptable, or "
        "FAIL: <feedback> with specific issues to fix."
    ),
    middleware=[RestateMiddleware()],
)

code_service = restate.Service("CodeGenerator")


@code_service.handler()
async def generate(_ctx: restate.Context, req: CodeRequest) -> dict:
    feedback = ""
    max_iterations = 3

    for i in range(max_iterations):
        # Step 1: Generate code
        prompt = (
            f"Task: {req.task}\n\nPrevious attempt was rejected:\n{feedback}\n\nPlease fix the issues."
            if feedback
            else f"Task: {req.task}"
        )
        gen_result = await generator.ainvoke({"messages": prompt})
        code = gen_result["messages"][-1].content

        # Step 2: Evaluate the code
        eval_result = await evaluator.ainvoke(
            {"messages": f"Task: {req.task}\n\nCode:\n{code}"}
        )
        evaluation = eval_result["messages"][-1].content

        if evaluation.startswith("PASS"):
            return {"code": code, "iterations": i + 1}

        feedback = evaluation

    return {"code": "Max iterations reached", "iterations": max_iterations}

Run this example

Install Restate and launch it:

restate-server

Get the example:

restate example python-langchain-tour-of-agents && cd python-langchain-tour-of-agents

Export your OpenAI API key and run the agent:

export OPENAI_API_KEY=sk-...

uv run app/workflow_evaluator_optimizer.py

restate deployments register http://localhost:9080 --force --yes # dev only: overrides previous registrations

Send a request:

curl localhost:8080/restate/call/CodeGenerator/generate \
  --json '{"task": "Write a function that checks if a string is a palindrome"}'

workflow-evaluator-optimizer.ts

const generate = async (ctx: restate.Context, {task}: { task: string }) => {
    let feedback = "";
    const maxIterations = 3;

    for (let i = 0; i < maxIterations; i++) {
      // Step 1: Generate code
      const code = await ctx.run(
        `Generate code (attempt ${i + 1})`,
        async () =>
          llmCall(
            feedback
              ? `You are a code generator. Write clean, correct code.\n\nTask: ${task}\n\nPrevious attempt was rejected:\n${feedback}\n\nPlease fix the issues.`
              : `You are a code generator. Write clean, correct code.\n\nTask: ${task}`,
          ),
        { maxRetryAttempts: 3 },
      );

      // Step 2: Evaluate the code
      const evaluation = await ctx.run(
        `Evaluate code (attempt ${i + 1})`,
        async () =>
          llmCall(
            `You are a code reviewer. Evaluate the code for correctness,
            readability, and edge cases. Respond with PASS if acceptable,
            or FAIL: <feedback> with specific issues to fix.\n\nTask: ${task}\n\nCode:\n${code.text}`,
          ),
        { maxRetryAttempts: 3 },
      );

      if (evaluation.text.startsWith("PASS")) {
        return { code: code.text, iterations: i + 1 };
      }

      feedback = evaluation.text;
    }

    return { code: "Max iterations reached", iterations: maxIterations };
};

const agent = restate.service({
  name: "CodeGenerator",
  handlers: {
    generate: restate.createServiceHandler(
        { input: schema(CodeGenRequestSchema) },
        generate,
    ),
  },
});

Run this example

Install Restate and launch it:

restate-server

Get the example:

restate example typescript-restate-tour-of-agents && cd typescript-restate-tour-of-agents
npm install

Export your API key:

export OPENAI_API_KEY=sk-...

npx tsx ./src/workflow-evaluator-optimizer.ts

restate deployments register http://localhost:9080 --force --yes # dev only: overrides previous registrations

Send a request:

curl localhost:8080/restate/call/CodeGenerator/generate \
  --json '{"task": "Write a function that checks if a string is a palindrome"}'

workflow_evaluator_optimizer.py

code_service = restate.Service("CodeGenerator")


@code_service.handler()
async def generate(ctx: restate.Context, req: CodeRequest) -> dict:
    feedback = ""
    max_iterations = 3

    for i in range(max_iterations):
        # Step 1: Generate code
        prompt = (
            f"Task: {req.task}\n\nPrevious attempt was rejected:\n{feedback}\n\nPlease fix the issues."
            if feedback
            else f"Task: {req.task}"
        )
        code = await ctx.run_typed(
            f"Generate code (attempt {i + 1})",
            llm_call,
            RunOptions(max_attempts=3),
            messages=f"You are a code generator. Write clean, correct code. {prompt}",
        )

        # Step 2: Evaluate the code
        evaluation = await ctx.run_typed(
            f"Evaluate code (attempt {i + 1})",
            llm_call,
            RunOptions(max_attempts=3),
            messages=f"""You are a code reviewer. Evaluate the code for correctness,
            readability, and edge cases. Respond with PASS if acceptable,
            or FAIL: <feedback> with specific issues to fix.
            Task: {req.task}\n\nCode:\n{code.content}""",
        )

        if evaluation.content and evaluation.content.startswith("PASS"):
            return {"code": code.content, "iterations": i + 1}

        feedback = evaluation.content or ""

    return {"code": "Max iterations reached", "iterations": max_iterations}

Run this example

Install Restate and launch it:

restate-server

Get the example:

restate example python-restate-tour-of-agents && cd python-restate-tour-of-agents

Export your API key:

export OPENAI_API_KEY=sk-...

uv run app/workflow_evaluator_optimizer.py

restate deployments register http://localhost:9080 --force --yes # dev only: overrides previous registrations

Send a request:

curl localhost:8080/restate/call/CodeGenerator/generate \
  --json '{"task": "Write a function that checks if a string is a palindrome"}'

Each generate and evaluate call is persisted in the journal. If the process crashes after a successful generation but before evaluation, the generated code is replayed from the journal without calling the LLM again.

​Example: code generation with quality check

Example: code generation with quality check