| import uuid |
| import base64 |
| from pathlib import Path |
| from config import GROQ_TTS_API_KEY, GROQ_TTS_MODEL |
| from gtts import gTTS |
| from fastapi import HTTPException |
|
|
| def text_to_speech( |
| text: str, |
| voice: str = "en", |
| fmt: str = "mp3", |
| ) -> str: |
| """ |
| Convert text to speech using gTTS (Google Translate, free). |
| Only MP3 is supported. |
| Returns file path. |
| """ |
| if not text or not text.strip(): |
| raise ValueError("Text cannot be empty") |
|
|
| if fmt != "mp3": |
| raise ValueError("Only MP3 format is supported by the free TTS backend") |
|
|
| try: |
| temp_dir = Path("temp_audio") |
| temp_dir.mkdir(exist_ok=True) |
|
|
| output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}" |
| output_path = temp_dir / output_filename |
|
|
| |
| tts = gTTS(text=text.strip(), lang=voice or "en") |
| tts.save(str(output_path)) |
|
|
| return str(output_path) |
|
|
| except Exception as e: |
| raise Exception(f"Unexpected error in text_to_speech: {str(e)}") |
|
|
|
|
| def text_to_speech_base64( |
| text: str, |
| voice: str = "en", |
| fmt: str = "mp3", |
| ) -> dict: |
| """ |
| Convert text to speech and return as Base64. |
| Only MP3 is supported. |
| Returns dict with Base64 and metadata. |
| """ |
| if not text or not text.strip(): |
| raise ValueError("Text cannot be empty") |
|
|
| if fmt != "mp3": |
| raise ValueError("Only MP3 format is supported by the free TTS backend") |
|
|
| try: |
| temp_dir = Path("temp_audio") |
| temp_dir.mkdir(exist_ok=True) |
|
|
| output_filename = f"tts_{uuid.uuid4().hex[:8]}.{fmt}" |
| output_path = temp_dir / output_filename |
|
|
| |
| tts = gTTS(text=text.strip(), lang=voice or "en") |
| tts.save(str(output_path)) |
|
|
| |
| with open(output_path, "rb") as audio_file: |
| audio_bytes = audio_file.read() |
| audio_base64 = base64.b64encode(audio_bytes).decode('utf-8') |
|
|
| |
| output_path.unlink(missing_ok=True) |
|
|
| return { |
| "audio_base64": audio_base64, |
| "mime_type": "audio/mpeg", |
| "format": fmt, |
| "filename": output_filename, |
| "size_bytes": len(audio_bytes), |
| "size_base64": len(audio_base64) |
| } |
|
|
| except Exception as e: |
| raise Exception(f"Unexpected error in text_to_speech_base64: {str(e)}") |