Spaces:
Sleeping
Sleeping
File size: 1,707 Bytes
c14504c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | from pydantic import BaseModel, Field
from typing import Optional
from enum import Enum
class Difficulty(str, Enum):
easy = "easy"
medium = "medium"
hard = "hard"
class DebugAction(BaseModel):
"""Agent submits fixed code."""
code: str = Field(..., description="The corrected Python code")
class TestResult(BaseModel):
"""Result of a single test case execution."""
test_name: str
passed: bool
expected: str = ""
actual: str = ""
error: str = ""
class DebugObservation(BaseModel):
"""What the agent sees after reset or step."""
task_id: str
difficulty: Difficulty
description: str
buggy_code: str
test_descriptions: list[str]
test_results: list[TestResult] = Field(default_factory=list)
stdout: str = ""
stderr: str = ""
step_count: int = 0
max_steps: int = 5
reward: float = 0.0
done: bool = False
total_tests: int = 0
tests_passed: int = 0
class DebugState(BaseModel):
"""Episode metadata exposed via /state."""
episode_id: str
task_id: str
difficulty: Difficulty
step_count: int = 0
max_steps: int = 5
last_reward: float = 0.0
cumulative_reward: float = 0.0
tests_passed: int = 0
total_tests: int = 0
done: bool = False
# --- API request / response models ---
class ResetRequest(BaseModel):
task_id: Optional[str] = None
seed: Optional[int] = None
class ResetResponse(BaseModel):
episode_id: str
observation: DebugObservation
class StepRequest(BaseModel):
action: DebugAction
class StepResponse(BaseModel):
observation: DebugObservation
reward: float
done: bool
info: dict = Field(default_factory=dict)
|