from pydantic import BaseModel, Field from typing import Optional from enum import Enum class Difficulty(str, Enum): easy = "easy" medium = "medium" hard = "hard" class DebugAction(BaseModel): """Agent submits fixed code.""" code: str = Field(..., description="The corrected Python code") class TestResult(BaseModel): """Result of a single test case execution.""" test_name: str passed: bool expected: str = "" actual: str = "" error: str = "" class DebugObservation(BaseModel): """What the agent sees after reset or step.""" task_id: str difficulty: Difficulty description: str buggy_code: str test_descriptions: list[str] test_results: list[TestResult] = Field(default_factory=list) stdout: str = "" stderr: str = "" step_count: int = 0 max_steps: int = 5 reward: float = 0.0 done: bool = False total_tests: int = 0 tests_passed: int = 0 class DebugState(BaseModel): """Episode metadata exposed via /state.""" episode_id: str task_id: str difficulty: Difficulty step_count: int = 0 max_steps: int = 5 last_reward: float = 0.0 cumulative_reward: float = 0.0 tests_passed: int = 0 total_tests: int = 0 done: bool = False # --- API request / response models --- class ResetRequest(BaseModel): task_id: Optional[str] = None seed: Optional[int] = None class ResetResponse(BaseModel): episode_id: str observation: DebugObservation class StepRequest(BaseModel): action: DebugAction class StepResponse(BaseModel): observation: DebugObservation reward: float done: bool info: dict = Field(default_factory=dict)