Fix: [2.0.2] yt-dlp → youtube-transcript-api로 교체

OCI 서버에서 YouTube 봇 감지로 yt-dlp 차단됨. 자막 전용 라이브러리로 교체하여 클라우드 IP 환경에서도 동작하도록 수정. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-24 14:23:52 +09:00
parent ac05240b58
commit aaf5bd8d05
2 changed files with 6 additions and 42 deletions
--- a/app/transcript.py
+++ b/app/transcript.py
@@ -1,5 +1,4 @@
-import httpx
+from youtube_transcript_api import YouTubeTranscriptApi
 import yt_dlp
 def extract_video_id(url: str) -> str:
@@ -12,44 +11,9 @@ def extract_video_id(url: str) -> str:
 def fetch_transcript(video_id: str) -> str:
-    """yt-dlp로 YouTube 자동생성 자막을 텍스트로 추출."""
+    """YouTube 자막을 텍스트로 추출."""
-    url = f"https://www.youtube.com/watch?v={video_id}"
+    ytt_api = YouTubeTranscriptApi()
-
+    transcript = ytt_api.fetch(video_id, languages=["ko", "en"])
    ydl_opts = {
        "skip_download": True,
        "writeautomaticsub": True,
        "subtitleslangs": ["ko", "en"],
        "subtitlesformat": "json3",
        "quiet": True,
        "no_warnings": True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=False)
    subs = info.get("automatic_captions", {})
    lang = "ko" if "ko" in subs else "en" if "en" in subs else None
    if not lang:
        raise ValueError(f"자막을 찾을 수 없습니다: {video_id}")
    sub_url = None
    for fmt in subs[lang]:
        if fmt["ext"] == "json3":
            sub_url = fmt["url"]
            break
    if not sub_url:
        raise ValueError(f"json3 자막 포맷을 찾을 수 없습니다: {video_id}")
    resp = httpx.get(sub_url)
    resp.raise_for_status()
    data = resp.json()
    texts = []
    for event in data.get("events", []):
        for seg in event.get("segs", []):
            text = seg.get("utf8", "").strip()
            if text and text != "\n":
                texts.append(text)
    texts = [entry.text for entry in transcript if entry.text.strip()]
    return " ".join(texts)
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 fastapi==0.115.12
 uvicorn==0.34.2
-yt-dlp>=2025.3.31
+youtube-transcript-api==1.0.3
 anthropic==0.52.0
 httpx==0.28.1
 pydantic-settings==2.8.1