Feat: [main] news-summary-bot 완성
All checks were successful
news-summary-bot-cicd / build_push_deploy (push) Successful in 11m43s

This commit is contained in:
sm4640
2026-03-24 12:19:54 +09:00
commit dc4656e452
21 changed files with 1028 additions and 0 deletions

0
app/__init__.py Normal file
View File

12
app/config.py Normal file
View File

@@ -0,0 +1,12 @@
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
anthropic_api_key: str
discord_webhook_url: str
api_secret: str = ""
model_config = {"env_file": ".env", "extra": "ignore"}
settings = Settings()

109
app/discord.py Normal file
View File

@@ -0,0 +1,109 @@
import re
from datetime import datetime, timezone
import httpx
from app.config import settings
def _extract_video_id(video_url: str) -> str | None:
"""URL에서 YouTube 비디오 ID 추출."""
patterns = [
r"(?:youtu\.be/)([^?&]+)",
r"(?:v=)([^?&]+)",
]
for p in patterns:
m = re.search(p, video_url)
if m:
return m.group(1)
return None
def _parse_summary(summary: str) -> dict[str, str]:
"""요약 텍스트를 섹션별로 파싱."""
sections: dict[str, str] = {}
current_key = None
current_lines: list[str] = []
for line in summary.split("\n"):
# **한줄 요약**: ... 또는 ## 한줄 요약 형태 매칭
header_match = re.match(
r"^(?:##\s*|-\s*\*\*|\*\*)(한줄\s*요약|주요\s*내용|결론/?시사점)[:\*\s]*(.*)",
line,
)
if header_match:
if current_key:
sections[current_key] = "\n".join(current_lines).strip()
current_key = header_match.group(1).replace(" ", "")
rest = re.sub(r"^\*\*:?\s*", "", header_match.group(2)).strip()
current_lines = [rest] if rest else []
elif current_key is not None:
current_lines.append(line)
if current_key:
sections[current_key] = "\n".join(current_lines).strip()
return sections
async def send_to_discord(title: str, video_url: str, summary: str) -> None:
"""Discord 웹훅으로 요약 전송 (임베드 디자인)."""
video_id = _extract_video_id(video_url)
thumbnail_url = (
f"https://img.youtube.com/vi/{video_id}/hqdefault.jpg"
if video_id
else None
)
sections = _parse_summary(summary)
oneliner = sections.get("한줄요약", "")
main_points = sections.get("주요내용", "")
conclusion = sections.get("결론/시사점", sections.get("결론시사점", ""))
# 파싱 실패 시 전체 텍스트를 그대로 사용
if not oneliner and not main_points:
fields = [{"name": "🔗 원본 영상", "value": video_url, "inline": False}]
description = summary[:4096]
else:
description = f"### 💡 {oneliner}" if oneliner else ""
fields = []
if main_points:
fields.append({
"name": "📋 주요 내용",
"value": main_points[:1024],
"inline": False,
})
if conclusion:
fields.append({
"name": "🎯 결론 / 시사점",
"value": conclusion[:1024],
"inline": False,
})
fields.append({
"name": "🔗 원본 영상",
"value": video_url,
"inline": False,
})
embed = {
"title": f"📰 {title}",
"url": video_url,
"description": description,
"color": 0x2B2D31,
"fields": fields,
"footer": {
"text": "YouTube 뉴스 요약 봇",
"icon_url": "https://www.youtube.com/s/desktop/f5ced909/img/favicon_144x144.png",
},
"timestamp": datetime.now(timezone.utc).isoformat(),
}
if thumbnail_url:
embed["thumbnail"] = {"url": thumbnail_url}
payload = {"embeds": [embed]}
async with httpx.AsyncClient() as client:
resp = await client.post(settings.discord_webhook_url, json=payload)
resp.raise_for_status()

36
app/main.py Normal file
View File

@@ -0,0 +1,36 @@
from fastapi import FastAPI, Header, HTTPException
from pydantic import BaseModel
from app.config import settings
from app.discord import send_to_discord
from app.summarizer import summarize
from app.transcript import extract_video_id, fetch_transcript
app = FastAPI(title="News Summary Bot")
class SummarizeRequest(BaseModel):
video_url: str
title: str = ""
@app.post("/api/news/summarize")
async def summarize_video(
req: SummarizeRequest,
x_api_secret: str = Header(default=""),
):
if settings.api_secret and x_api_secret != settings.api_secret:
raise HTTPException(status_code=401, detail="Unauthorized")
video_id = extract_video_id(req.video_url)
transcript = fetch_transcript(video_id)
title = req.title or video_id
summary = summarize(transcript, title)
await send_to_discord(title, req.video_url, summary)
return {"status": "ok", "title": title, "summary_length": len(summary)}
@app.get("/api/news/health")
async def health():
return {"status": "ok"}

34
app/summarizer.py Normal file
View File

@@ -0,0 +1,34 @@
import anthropic
from app.config import settings
client = anthropic.Anthropic(api_key=settings.anthropic_api_key)
SYSTEM_PROMPT = """너는 뉴스/경제 유튜브 영상 요약 전문가야.
영상 자막 텍스트를 받아서 아래 형식으로 요약해줘.
## 형식
- **한줄 요약**: 영상의 핵심을 한 문장으로
- **주요 내용**: 핵심 포인트를 3~7개 불릿으로 정리
- **결론/시사점**: 영상이 전달하려는 메시지나 시사점
## 규칙
- 한국어로 작성
- 간결하고 명확하게
- 자막의 오타나 말더듬은 무시하고 의미 중심으로 정리
"""
def summarize(transcript: str, title: str) -> str:
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=2048,
system=SYSTEM_PROMPT,
messages=[
{
"role": "user",
"content": f"영상 제목: {title}\n\n자막:\n{transcript}",
}
],
)
return message.content[0].text

55
app/transcript.py Normal file
View File

@@ -0,0 +1,55 @@
import httpx
import yt_dlp
def extract_video_id(url: str) -> str:
"""YouTube URL에서 video ID 추출."""
if "youtu.be/" in url:
return url.split("youtu.be/")[1].split("?")[0]
if "v=" in url:
return url.split("v=")[1].split("&")[0]
raise ValueError(f"유효하지 않은 YouTube URL: {url}")
def fetch_transcript(video_id: str) -> str:
"""yt-dlp로 YouTube 자동생성 자막을 텍스트로 추출."""
url = f"https://www.youtube.com/watch?v={video_id}"
ydl_opts = {
"skip_download": True,
"writeautomaticsub": True,
"subtitleslangs": ["ko", "en"],
"subtitlesformat": "json3",
"quiet": True,
"no_warnings": True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
subs = info.get("automatic_captions", {})
lang = "ko" if "ko" in subs else "en" if "en" in subs else None
if not lang:
raise ValueError(f"자막을 찾을 수 없습니다: {video_id}")
sub_url = None
for fmt in subs[lang]:
if fmt["ext"] == "json3":
sub_url = fmt["url"]
break
if not sub_url:
raise ValueError(f"json3 자막 포맷을 찾을 수 없습니다: {video_id}")
resp = httpx.get(sub_url)
resp.raise_for_status()
data = resp.json()
texts = []
for event in data.get("events", []):
for seg in event.get("segs", []):
text = seg.get("utf8", "").strip()
if text and text != "\n":
texts.append(text)
return " ".join(texts)