Spaces:
Sleeping
Sleeping
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Code Debug Arena | OpenEnv</title> | |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism-themes/1.9.0/prism-vsc-dark-plus.min.css"> | |
| <style> | |
| /* ββ Reset & Base βββββββββββββββββββββββββββββββββββββββββ */ | |
| *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } | |
| :root { | |
| --bg: #0d1117; | |
| --bg-card: #161b22; | |
| --bg-editor: #1e1e1e; | |
| --bg-input: #21252b; | |
| --bg-hover: #30363d; | |
| --text: #e6edf3; | |
| --text-muted: #8b949e; | |
| --text-dim: #484f58; | |
| --accent: #58a6ff; | |
| --green: #3fb950; | |
| --orange: #d29922; | |
| --red: #f85149; | |
| --purple: #bc8cff; | |
| --border: #30363d; | |
| --radius: 12px; | |
| --max-w: 1100px; | |
| } | |
| html { scroll-behavior: smooth; } | |
| body { | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif; | |
| background: var(--bg); | |
| color: var(--text); | |
| line-height: 1.6; | |
| overflow-x: hidden; | |
| } | |
| code, pre, textarea, .mono { font-family: 'Cascadia Code', 'Fira Code', 'JetBrains Mono', Consolas, monospace; } | |
| a { color: var(--accent); text-decoration: none; } | |
| a:hover { text-decoration: underline; } | |
| .container { max-width: var(--max-w); margin: 0 auto; padding: 0 24px; } | |
| /* ββ Fade-in animation ββββββββββββββββββββββββββββββββββββ */ | |
| .fade-in { | |
| opacity: 0; | |
| transform: translateY(28px); | |
| transition: opacity 0.65s ease-out, transform 0.65s ease-out; | |
| } | |
| .fade-in.visible { opacity: 1; transform: translateY(0); } | |
| /* ββ Status dot βββββββββββββββββββββββββββββββββββββββββββ */ | |
| @keyframes pulse-ring { | |
| 0% { transform: scale(.85); opacity: 1; } | |
| 100% { transform: scale(2.4); opacity: 0; } | |
| } | |
| .status-dot { | |
| position: relative; display: inline-block; | |
| width: 10px; height: 10px; border-radius: 50%; | |
| background: var(--green); vertical-align: middle; margin-right: 6px; | |
| } | |
| .status-dot::before { | |
| content: ''; position: absolute; inset: 0; | |
| border-radius: 50%; background: var(--green); | |
| animation: pulse-ring 1.6s ease-out infinite; | |
| } | |
| /* ββ Blinking cursor ββββββββββββββββββββββββββββββββββββββ */ | |
| @keyframes blink { 0%,50%{opacity:1} 51%,100%{opacity:0} } | |
| .cursor { | |
| display: inline-block; width: 8px; height: 16px; | |
| background: var(--green); vertical-align: text-bottom; | |
| animation: blink 1s step-end infinite; | |
| } | |
| /* ββ Progress bar βββββββββββββββββββββββββββββββββββββββββ */ | |
| @keyframes fillBar { from { width: 0; } } | |
| .progress-track { | |
| height: 10px; border-radius: 5px; | |
| background: var(--bg-hover); overflow: hidden; margin-top: 6px; | |
| } | |
| .progress-fill { | |
| height: 100%; border-radius: 5px; | |
| background: linear-gradient(90deg, var(--green), var(--accent)); | |
| animation: fillBar 1.2s ease-out forwards; | |
| transition: width 0.6s ease-out; | |
| } | |
| /* ββ Badge ββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .badge { | |
| display: inline-block; padding: 2px 10px; border-radius: 20px; | |
| font-size: 12px; font-weight: 600; text-transform: uppercase; letter-spacing: .5px; | |
| } | |
| .badge-easy { background: rgba(63,185,80,.15); color: var(--green); } | |
| .badge-medium { background: rgba(210,153,34,.15); color: var(--orange); } | |
| .badge-hard { background: rgba(248,81,73,.15); color: var(--red); } | |
| .badge-meta { background: rgba(88,166,255,.12); color: var(--accent); border: 1px solid rgba(88,166,255,.25); } | |
| /* ββ Buttons ββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .btn { | |
| display: inline-block; padding: 10px 24px; border-radius: 8px; | |
| font-size: 14px; font-weight: 600; cursor: pointer; border: none; | |
| transition: all .2s; | |
| } | |
| .btn-primary { background: var(--accent); color: #fff; } | |
| .btn-primary:hover { background: #79c0ff; text-decoration: none; } | |
| .btn-outline { background: transparent; color: var(--text); border: 1px solid var(--border); } | |
| .btn-outline:hover { background: var(--bg-hover); text-decoration: none; } | |
| .btn-green { background: var(--green); color: #fff; } | |
| .btn-green:hover { background: #56d364; text-decoration: none; } | |
| .btn:disabled { opacity: .4; cursor: not-allowed; } | |
| /* ββ Cards ββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .card { | |
| background: var(--bg-card); border: 1px solid var(--border); | |
| border-radius: var(--radius); padding: 24px; | |
| transition: border-color .2s, transform .2s; | |
| } | |
| .card:hover { border-color: var(--accent); transform: translateY(-2px); } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| HERO | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .hero { | |
| text-align: center; padding: 80px 0 60px; | |
| background: radial-gradient(ellipse at 50% 0%, rgba(88,166,255,.08) 0%, transparent 70%); | |
| } | |
| .hero h1 { | |
| font-size: clamp(32px, 5vw, 52px); font-weight: 800; | |
| letter-spacing: -1px; line-height: 1.15; margin-bottom: 16px; | |
| } | |
| .hero h1 span { color: var(--accent); } | |
| .hero p.subtitle { | |
| font-size: 18px; color: var(--text-muted); max-width: 620px; margin: 0 auto 28px; | |
| } | |
| .hero-badges { margin-bottom: 28px; display: flex; gap: 12px; justify-content: center; flex-wrap: wrap; } | |
| .hero-cta { display: flex; gap: 12px; justify-content: center; flex-wrap: wrap; } | |
| .hero-stats { | |
| display: flex; gap: 36px; justify-content: center; margin-top: 40px; flex-wrap: wrap; | |
| } | |
| .hero-stat { text-align: center; } | |
| .hero-stat .num { font-size: 28px; font-weight: 700; color: var(--accent); } | |
| .hero-stat .label { font-size: 13px; color: var(--text-muted); } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| LIVE ARENA | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .arena { padding: 60px 0; } | |
| .arena h2 { font-size: 28px; font-weight: 700; margin-bottom: 8px; } | |
| .arena p.desc { color: var(--text-muted); margin-bottom: 24px; } | |
| .arena-grid { | |
| display: grid; grid-template-columns: 1fr 1fr; gap: 16px; | |
| } | |
| @media (max-width: 768px) { .arena-grid { grid-template-columns: 1fr; } } | |
| .arena-panel { | |
| background: var(--bg-editor); border: 1px solid var(--border); | |
| border-radius: var(--radius); overflow: hidden; | |
| } | |
| .panel-header { | |
| padding: 10px 16px; background: var(--bg-card); | |
| border-bottom: 1px solid var(--border); | |
| display: flex; align-items: center; justify-content: space-between; | |
| font-size: 13px; font-weight: 600; color: var(--text-muted); | |
| } | |
| .panel-body { padding: 0; } | |
| .panel-body pre { margin: 0; padding: 16px; font-size: 13px; max-height: 320px; overflow: auto; } | |
| .panel-body textarea { | |
| width: 100%; min-height: 280px; padding: 16px; | |
| background: var(--bg-editor); color: var(--text); border: none; | |
| font-size: 13px; resize: vertical; outline: none; | |
| } | |
| .arena-controls { | |
| display: flex; gap: 12px; align-items: center; margin-top: 16px; flex-wrap: wrap; | |
| } | |
| .task-select { | |
| padding: 8px 14px; border-radius: 8px; | |
| background: var(--bg-card); color: var(--text); border: 1px solid var(--border); | |
| font-size: 14px; cursor: pointer; | |
| } | |
| .task-select option { background: var(--bg-card); } | |
| /* ββ Test results βββββββββββββββββββββββββββββββββββββββββ */ | |
| .test-results { margin-top: 16px; } | |
| .test-row { | |
| display: flex; align-items: center; gap: 10px; | |
| padding: 8px 14px; border-radius: 8px; margin-bottom: 6px; | |
| font-size: 13px; opacity: 0; transform: translateX(-16px); | |
| transition: all .4s ease-out; | |
| } | |
| .test-row.show { opacity: 1; transform: translateX(0); } | |
| .test-row.pass { background: rgba(63,185,80,.1); border-left: 3px solid var(--green); } | |
| .test-row.fail { background: rgba(248,81,73,.1); border-left: 3px solid var(--red); } | |
| .test-icon { font-size: 16px; } | |
| .test-name { flex: 1; } | |
| .test-detail { color: var(--text-muted); font-size: 12px; } | |
| /* ββ Reward display βββββββββββββββββββββββββββββββββββββββ */ | |
| .reward-display { | |
| margin-top: 20px; padding: 16px 20px; | |
| background: var(--bg-card); border: 1px solid var(--border); border-radius: var(--radius); | |
| } | |
| .reward-label { font-size: 13px; color: var(--text-muted); margin-bottom: 4px; } | |
| .reward-value { font-size: 24px; font-weight: 700; } | |
| /* ββ Terminal βββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .terminal { | |
| margin-top: 16px; background: #0a0a0a; border: 1px solid var(--border); | |
| border-radius: var(--radius); overflow: hidden; | |
| } | |
| .terminal-header { | |
| padding: 8px 16px; background: #1a1a1a; border-bottom: 1px solid #333; | |
| font-size: 12px; color: var(--text-dim); display: flex; align-items: center; gap: 8px; | |
| } | |
| .terminal-dots { display: flex; gap: 6px; } | |
| .terminal-dots span { width: 10px; height: 10px; border-radius: 50%; } | |
| .terminal-dots .dot-r { background: #f85149; } | |
| .terminal-dots .dot-y { background: #d29922; } | |
| .terminal-dots .dot-g { background: #3fb950; } | |
| .terminal-body { | |
| padding: 14px 16px; font-size: 12px; color: var(--green); | |
| max-height: 200px; overflow: auto; white-space: pre-wrap; line-height: 1.7; | |
| } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| HOW IT WORKS | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .how-it-works { padding: 60px 0; } | |
| .how-it-works h2 { font-size: 28px; font-weight: 700; text-align: center; margin-bottom: 40px; } | |
| .steps { | |
| display: grid; grid-template-columns: repeat(3, 1fr); gap: 24px; | |
| } | |
| @media (max-width: 768px) { .steps { grid-template-columns: 1fr; } } | |
| .step { text-align: center; position: relative; } | |
| .step-num { | |
| width: 48px; height: 48px; border-radius: 50%; | |
| background: rgba(88,166,255,.12); color: var(--accent); | |
| display: flex; align-items: center; justify-content: center; | |
| font-size: 20px; font-weight: 700; margin: 0 auto 16px; | |
| } | |
| .step h3 { font-size: 16px; margin-bottom: 8px; } | |
| .step p { font-size: 14px; color: var(--text-muted); } | |
| .step-arrow { | |
| position: absolute; top: 24px; right: -18px; | |
| color: var(--text-dim); font-size: 20px; | |
| } | |
| @media (max-width: 768px) { .step-arrow { display: none; } } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| TASK EXPLORER | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .task-explorer { padding: 60px 0; } | |
| .task-explorer h2 { font-size: 28px; font-weight: 700; text-align: center; margin-bottom: 12px; } | |
| .task-explorer > .container > p { text-align: center; color: var(--text-muted); margin-bottom: 32px; } | |
| .task-grid { | |
| display: grid; grid-template-columns: repeat(auto-fill, minmax(320px, 1fr)); gap: 16px; | |
| } | |
| .task-card { cursor: pointer; } | |
| .task-card h3 { font-size: 15px; margin: 12px 0 8px; } | |
| .task-card p { font-size: 13px; color: var(--text-muted); } | |
| .task-meta { display: flex; gap: 12px; margin-top: 12px; font-size: 12px; color: var(--text-dim); } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ARCHITECTURE | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .architecture { padding: 60px 0; } | |
| .architecture h2 { font-size: 28px; font-weight: 700; text-align: center; margin-bottom: 32px; } | |
| .arch-flow { | |
| display: flex; align-items: center; justify-content: center; | |
| gap: 0; flex-wrap: wrap; padding: 32px 0; | |
| } | |
| .arch-node { | |
| padding: 16px 24px; border-radius: var(--radius); | |
| background: var(--bg-card); border: 1px solid var(--border); | |
| text-align: center; min-width: 130px; | |
| } | |
| .arch-node .icon { font-size: 24px; margin-bottom: 6px; } | |
| .arch-node .name { font-size: 14px; font-weight: 600; } | |
| .arch-node .desc { font-size: 11px; color: var(--text-muted); } | |
| .arch-arrow { font-size: 20px; color: var(--text-dim); padding: 0 10px; } | |
| @media (max-width: 600px) { | |
| .arch-flow { flex-direction: column; } | |
| .arch-arrow { transform: rotate(90deg); padding: 8px 0; } | |
| } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| METRICS | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .metrics { padding: 60px 0; } | |
| .metrics h2 { font-size: 28px; font-weight: 700; text-align: center; margin-bottom: 32px; } | |
| .metrics-grid { | |
| display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; | |
| } | |
| @media (max-width: 768px) { .metrics-grid { grid-template-columns: repeat(2, 1fr); } } | |
| .metric-card { | |
| background: var(--bg-card); border: 1px solid var(--border); | |
| border-radius: var(--radius); padding: 24px; text-align: center; | |
| } | |
| .metric-card .num { font-size: 32px; font-weight: 800; margin-bottom: 4px; } | |
| .metric-card .label { font-size: 13px; color: var(--text-muted); } | |
| /* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| FOOTER | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .footer { | |
| padding: 40px 0; border-top: 1px solid var(--border); margin-top: 40px; | |
| text-align: center; font-size: 14px; color: var(--text-muted); | |
| } | |
| .footer-links { display: flex; gap: 20px; justify-content: center; margin-bottom: 12px; } | |
| </style> | |
| </head> | |
| <body> | |
| <!-- βββ HERO βββββββββββββββββββββββββββββββββββββββββββββββ --> | |
| <section class="hero"> | |
| <div class="container"> | |
| <div class="hero-badges"> | |
| <span class="badge badge-meta">Meta x PyTorch Hackathon</span> | |
| <span class="badge badge-meta">OpenEnv Compatible</span> | |
| </div> | |
| <h1>AI-Powered <span>Code Debugging</span> Arena</h1> | |
| <p class="subtitle"> | |
| A real-world OpenEnv environment where AI agents learn to fix broken Python code. | |
| Submit buggy code, get instant fixes, and watch tests pass in real time. | |
| </p> | |
| <div class="hero-cta"> | |
| <a href="#arena" class="btn btn-primary">Try It Live</a> | |
| <a href="/docs" class="btn btn-outline">API Docs</a> | |
| </div> | |
| <div class="hero-stats" id="hero-stats"> | |
| <div class="hero-stat"> | |
| <div class="num" id="stat-status"><span class="status-dot"></span> Live</div> | |
| <div class="label">Environment Status</div> | |
| </div> | |
| <div class="hero-stat"> | |
| <div class="num" id="stat-tasks">-</div> | |
| <div class="label">Debug Tasks</div> | |
| </div> | |
| <div class="hero-stat"> | |
| <div class="num">0.0 – 1.0</div> | |
| <div class="label">Reward Range</div> | |
| </div> | |
| <div class="hero-stat"> | |
| <div class="num">3</div> | |
| <div class="label">Difficulty Levels</div> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- βββ LIVE ARENA βββββββββββββββββββββββββββββββββββββββββ --> | |
| <section class="arena fade-in" id="arena"> | |
| <div class="container"> | |
| <h2>Live Debug Arena</h2> | |
| <p class="desc">Pick a task, review the buggy code, write your fix, and submit. Watch tests execute in real time.</p> | |
| <div class="arena-controls"> | |
| <select class="task-select" id="task-select"><option value="">Loading tasks...</option></select> | |
| <button class="btn btn-primary" id="btn-reset" disabled>Load Task</button> | |
| <button class="btn btn-green" id="btn-submit" disabled>Submit Fix</button> | |
| <span id="step-info" class="mono" style="font-size:13px;color:var(--text-muted)"></span> | |
| </div> | |
| <div class="arena-grid" style="margin-top:16px"> | |
| <!-- Left: buggy code --> | |
| <div class="arena-panel"> | |
| <div class="panel-header"><span>Buggy Code</span><span id="task-difficulty"></span></div> | |
| <div class="panel-body"><pre><code class="language-python" id="buggy-code">Select a task and click "Load Task" to begin.</code></pre></div> | |
| </div> | |
| <!-- Right: editor --> | |
| <div class="arena-panel"> | |
| <div class="panel-header"><span>Your Fix</span></div> | |
| <div class="panel-body"> | |
| <textarea id="code-editor" placeholder="Paste or write your fixed code here..." spellcheck="false"></textarea> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Test results --> | |
| <div class="test-results" id="test-results"></div> | |
| <!-- Reward --> | |
| <div class="reward-display" id="reward-display" style="display:none"> | |
| <div style="display:flex;align-items:center;justify-content:space-between"> | |
| <div> | |
| <div class="reward-label">Reward Score</div> | |
| <div class="reward-value" id="reward-value">0.00</div> | |
| </div> | |
| <div style="flex:1;margin-left:24px"> | |
| <div class="reward-label">Tests Passed: <span id="tests-summary">0/0</span></div> | |
| <div class="progress-track"><div class="progress-fill" id="progress-fill" style="width:0"></div></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- Terminal --> | |
| <div class="terminal" id="terminal" style="display:none"> | |
| <div class="terminal-header"> | |
| <div class="terminal-dots"><span class="dot-r"></span><span class="dot-y"></span><span class="dot-g"></span></div> | |
| <span>Structured Logs</span> | |
| </div> | |
| <div class="terminal-body mono" id="terminal-body"></div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- βββ HOW IT WORKS ββββββββββββββββββββββββββββββββββββββ= --> | |
| <section class="how-it-works fade-in"> | |
| <div class="container"> | |
| <h2>How It Works</h2> | |
| <div class="steps"> | |
| <div class="step"> | |
| <div class="step-num">1</div> | |
| <h3>Reset Environment</h3> | |
| <p>The agent receives buggy Python code and descriptions of the tests it must pass.</p> | |
| <span class="step-arrow">→</span> | |
| </div> | |
| <div class="step"> | |
| <div class="step-num">2</div> | |
| <h3>Submit a Fix</h3> | |
| <p>The agent analyzes the code, identifies the bug, and submits corrected code via <code>step()</code>.</p> | |
| <span class="step-arrow">→</span> | |
| </div> | |
| <div class="step"> | |
| <div class="step-num">3</div> | |
| <h3>Earn Reward</h3> | |
| <p>Tests run in a sandboxed subprocess. Reward = fraction passing (0.0–1.0). Iterate until all pass.</p> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- βββ TASK EXPLORER βββββββββββββββββββββββββββββββββββββ= --> | |
| <section class="task-explorer fade-in"> | |
| <div class="container"> | |
| <h2>Task Explorer</h2> | |
| <p>6 real-world debugging challenges across 3 difficulty levels.</p> | |
| <div class="task-grid" id="task-grid"></div> | |
| </div> | |
| </section> | |
| <!-- βββ ARCHITECTURE ββββββββββββββββββββββββββββββββββββββ= --> | |
| <section class="architecture fade-in"> | |
| <div class="container"> | |
| <h2>Architecture</h2> | |
| <div class="arch-flow"> | |
| <div class="arch-node"> | |
| <div class="icon">🤖</div> | |
| <div class="name">AI Agent</div> | |
| <div class="desc">LLM via OpenAI Client</div> | |
| </div> | |
| <div class="arch-arrow">→</div> | |
| <div class="arch-node" style="border-color:var(--accent)"> | |
| <div class="icon">⚙</div> | |
| <div class="name">step(code)</div> | |
| <div class="desc">OpenEnv API</div> | |
| </div> | |
| <div class="arch-arrow">→</div> | |
| <div class="arch-node"> | |
| <div class="icon">🛠</div> | |
| <div class="name">Executor</div> | |
| <div class="desc">Subprocess + Timeout</div> | |
| </div> | |
| <div class="arch-arrow">→</div> | |
| <div class="arch-node"> | |
| <div class="icon">✅</div> | |
| <div class="name">Tests</div> | |
| <div class="desc">Pass / Fail</div> | |
| </div> | |
| <div class="arch-arrow">→</div> | |
| <div class="arch-node" style="border-color:var(--green)"> | |
| <div class="icon">🏆</div> | |
| <div class="name">Reward</div> | |
| <div class="desc">0.0 – 1.0</div> | |
| </div> | |
| </div> | |
| <div style="text-align:center;margin-top:20px"> | |
| <code class="mono" style="font-size:13px;color:var(--text-muted)"> | |
| reset() → Observation | step(action) → StepResult | state() → State | |
| </code> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- βββ METRICS βββββββββββββββββββββββββββββββββββββββββββ= --> | |
| <section class="metrics fade-in"> | |
| <div class="container"> | |
| <h2>Performance</h2> | |
| <div class="metrics-grid"> | |
| <div class="metric-card"> | |
| <div class="num" style="color:var(--green)">6/6</div> | |
| <div class="label">Tasks Solved by Baseline</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="num" style="color:var(--accent)">1.000</div> | |
| <div class="label">Average Score</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="num" style="color:var(--purple)">< 2s</div> | |
| <div class="label">Avg Response Time</div> | |
| </div> | |
| <div class="metric-card"> | |
| <div class="num" style="color:var(--orange)">100%</div> | |
| <div class="label">OpenEnv Spec Compliant</div> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <!-- βββ FOOTER ββββββββββββββββββββββββββββββββββββββββββββ= --> | |
| <footer class="footer"> | |
| <div class="container"> | |
| <div class="footer-links"> | |
| <a href="https://github.com/arnavzz/openenv-code-debugger" target="_blank">GitHub</a> | |
| <a href="/docs" target="_blank">API Docs</a> | |
| <a href="/health" target="_blank">Health Check</a> | |
| </div> | |
| <p>Built for the Meta x PyTorch OpenEnv Hackathon 2026</p> | |
| </div> | |
| </footer> | |
| <!-- βββ SCRIPTS βββββββββββββββββββββββββββββββββββββββββββ= --> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/prism.min.js"></script> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/components/prism-python.min.js"></script> | |
| <script> | |
| (function() { | |
| const API = ''; // same origin | |
| let episodeId = null; | |
| let currentTask = null; | |
| let stepCount = 0; | |
| let logs = []; | |
| const $ = s => document.querySelector(s); | |
| const $$ = s => document.querySelectorAll(s); | |
| // ββ Fade-in observer βββββββββββββββββββββββββββββββββββββ | |
| const obs = new IntersectionObserver(entries => { | |
| entries.forEach(e => { if (e.isIntersecting) { e.target.classList.add('visible'); obs.unobserve(e.target); } }); | |
| }, { threshold: 0.1 }); | |
| $$('.fade-in').forEach(el => obs.observe(el)); | |
| // ββ Load tasks βββββββββββββββββββββββββββββββββββββββββββ | |
| async function loadTasks() { | |
| try { | |
| const res = await fetch(API + '/tasks'); | |
| const tasks = await res.json(); | |
| $('#stat-tasks').textContent = tasks.length; | |
| renderTaskSelect(tasks); | |
| renderTaskGrid(tasks); | |
| } catch(e) { | |
| console.error('Failed to load tasks:', e); | |
| } | |
| } | |
| function renderTaskSelect(tasks) { | |
| const sel = $('#task-select'); | |
| sel.innerHTML = '<option value="">-- Select a task --</option>'; | |
| tasks.forEach(t => { | |
| sel.innerHTML += `<option value="${t.task_id}">[${t.difficulty.toUpperCase()}] ${t.description.slice(0,60)}</option>`; | |
| }); | |
| $('#btn-reset').disabled = false; | |
| } | |
| function renderTaskGrid(tasks) { | |
| const grid = $('#task-grid'); | |
| const order = {easy:0,medium:1,hard:2}; | |
| tasks.sort((a,b) => order[a.difficulty] - order[b.difficulty]); | |
| grid.innerHTML = tasks.map(t => ` | |
| <div class="card task-card" data-id="${t.task_id}"> | |
| <span class="badge badge-${t.difficulty}">${t.difficulty}</span> | |
| <h3>${t.task_id.replace(/^task_\d+_/, '').replace(/_/g,' ')}</h3> | |
| <p>${t.description}</p> | |
| <div class="task-meta"> | |
| <span>Max steps: ${t.max_steps}</span> | |
| <span>Tests: ${t.total_tests}</span> | |
| </div> | |
| </div> | |
| `).join(''); | |
| $$('.task-card').forEach(card => { | |
| card.addEventListener('click', () => { | |
| $('#task-select').value = card.dataset.id; | |
| resetEnv(); | |
| document.getElementById('arena').scrollIntoView({ behavior: 'smooth' }); | |
| }); | |
| }); | |
| } | |
| // ββ Reset environment ββββββββββββββββββββββββββββββββββββ | |
| async function resetEnv() { | |
| const taskId = $('#task-select').value; | |
| if (!taskId) return; | |
| $('#btn-reset').disabled = true; | |
| $('#btn-submit').disabled = true; | |
| clearResults(); | |
| try { | |
| const res = await fetch(API + '/reset', { | |
| method: 'POST', | |
| headers: {'Content-Type':'application/json'}, | |
| body: JSON.stringify({ task_id: taskId }) | |
| }); | |
| const data = await res.json(); | |
| episodeId = data.episode_id; | |
| currentTask = data.observation; | |
| stepCount = 0; | |
| logs = []; | |
| // Render buggy code | |
| const codeEl = $('#buggy-code'); | |
| codeEl.textContent = currentTask.buggy_code; | |
| Prism.highlightElement(codeEl); | |
| // Set difficulty badge | |
| const d = currentTask.difficulty; | |
| $('#task-difficulty').innerHTML = `<span class="badge badge-${d}">${d}</span>`; | |
| // Pre-fill editor with buggy code | |
| $('#code-editor').value = currentTask.buggy_code; | |
| $('#btn-submit').disabled = false; | |
| $('#step-info').textContent = `Step 0/${currentTask.max_steps}`; | |
| // Log [START] | |
| addLog(`[START] task=${taskId} env=arena model=interactive`); | |
| } catch(e) { | |
| console.error('Reset failed:', e); | |
| } | |
| $('#btn-reset').disabled = false; | |
| } | |
| // ββ Submit fix βββββββββββββββββββββββββββββββββββββββββββ | |
| async function submitFix() { | |
| if (!episodeId) return; | |
| const code = $('#code-editor').value; | |
| if (!code.trim()) return; | |
| $('#btn-submit').disabled = true; | |
| try { | |
| const res = await fetch(API + `/step/${episodeId}`, { | |
| method: 'POST', | |
| headers: {'Content-Type':'application/json'}, | |
| body: JSON.stringify({ action: { code } }) | |
| }); | |
| const data = await res.json(); | |
| const obs = data.observation; | |
| stepCount = obs.step_count; | |
| $('#step-info').textContent = `Step ${stepCount}/${obs.max_steps}`; | |
| // Animate test results | |
| renderTestResults(obs.test_results); | |
| // Show reward | |
| showReward(data.reward, obs.tests_passed, obs.total_tests); | |
| // Log | |
| const actionShort = code.replace(/\n/g, '\\n').slice(0, 60); | |
| addLog(`[STEP] step=${stepCount} action="${actionShort}..." reward=${data.reward.toFixed(2)} done=${data.done} error=null`); | |
| if (data.done) { | |
| addLog(`[END] success=${data.reward === 1.0} steps=${stepCount} score=${data.reward.toFixed(3)}`); | |
| if (data.reward === 1.0) { | |
| $('#step-info').innerHTML = `<span style="color:var(--green)">✓ All tests passed!</span>`; | |
| } else { | |
| $('#step-info').innerHTML = `<span style="color:var(--red)">Episode ended (max steps reached)</span>`; | |
| } | |
| } else { | |
| $('#btn-submit').disabled = false; | |
| } | |
| } catch(e) { | |
| console.error('Step failed:', e); | |
| $('#btn-submit').disabled = false; | |
| } | |
| } | |
| // ββ Render test results with stagger animation βββββββββββ | |
| function renderTestResults(results) { | |
| const container = $('#test-results'); | |
| container.innerHTML = ''; | |
| results.forEach((t, i) => { | |
| const row = document.createElement('div'); | |
| row.className = `test-row ${t.passed ? 'pass' : 'fail'}`; | |
| row.innerHTML = ` | |
| <span class="test-icon">${t.passed ? '✓' : '✗'}</span> | |
| <span class="test-name mono">${t.test_name}</span> | |
| ${!t.passed ? `<span class="test-detail">expected: ${t.expected} | actual: ${t.actual}</span>` : ''} | |
| `; | |
| container.appendChild(row); | |
| setTimeout(() => row.classList.add('show'), 100 + i * 120); | |
| }); | |
| } | |
| // ββ Show reward ββββββββββββββββββββββββββββββββββββββββββ | |
| function showReward(reward, passed, total) { | |
| const display = $('#reward-display'); | |
| display.style.display = 'block'; | |
| const pct = Math.round(reward * 100); | |
| $('#reward-value').textContent = reward.toFixed(2); | |
| $('#reward-value').style.color = reward === 1.0 ? 'var(--green)' : reward > 0 ? 'var(--orange)' : 'var(--red)'; | |
| $('#tests-summary').textContent = `${passed}/${total}`; | |
| const fill = $('#progress-fill'); | |
| fill.style.width = '0'; | |
| requestAnimationFrame(() => { fill.style.width = pct + '%'; }); | |
| } | |
| // ββ Terminal log βββββββββββββββββββββββββββββββββββββββββ | |
| function addLog(line) { | |
| logs.push(line); | |
| const term = $('#terminal'); | |
| term.style.display = 'block'; | |
| const body = $('#terminal-body'); | |
| body.innerHTML = logs.map(l => { | |
| if (l.startsWith('[START]')) return `<span style="color:var(--accent)">${esc(l)}</span>`; | |
| if (l.startsWith('[STEP]')) return `<span style="color:var(--text)">${esc(l)}</span>`; | |
| if (l.startsWith('[END]')) return `<span style="color:${l.includes('success=true')?'var(--green)':'var(--red)'}">${esc(l)}</span>`; | |
| return esc(l); | |
| }).join('\n') + ' <span class="cursor"></span>'; | |
| body.scrollTop = body.scrollHeight; | |
| } | |
| function esc(s) { return s.replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>'); } | |
| function clearResults() { | |
| $('#test-results').innerHTML = ''; | |
| $('#reward-display').style.display = 'none'; | |
| $('#terminal').style.display = 'none'; | |
| logs = []; | |
| } | |
| // ββ Event listeners ββββββββββββββββββββββββββββββββββββββ | |
| $('#btn-reset').addEventListener('click', resetEnv); | |
| $('#btn-submit').addEventListener('click', submitFix); | |
| // ββ Init βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| loadTasks(); | |
| })(); | |
| </script> | |
| </body> | |
| </html> | |