Files
baekjoon-bot/workbook_enricher.py
2026-01-14 14:54:42 +09:00

157 lines
5.4 KiB
Python

import asyncio
from typing import Dict, List, Optional, Tuple
import httpx
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
SOLVED_SHOW_URL = "https://solved.ac/api/v3/problem/show"
async def solved_problem_show(client: httpx.AsyncClient, problem_id: int) -> Tuple[Optional[str], Optional[str], Optional[int], Optional[List[str]]]:
"""
solved.ac problem/show 호출해서 메타 가져오기
반환: (title_ko, title_en, level, tags_keys)
"""
r = await client.get(SOLVED_SHOW_URL, params={"problemId": problem_id})
if r.status_code != 200:
return None, None, None, None
data = r.json()
title_ko = data.get("titleKo")
title_en = data.get("titleEn")
level = data.get("level")
# tags: [{"key": "...", "isMeta": ..., "bojTagId": ..., "problemCount": ...}, ...]
tags = data.get("tags") or []
tag_keys = [t.get("key") for t in tags if t.get("key")]
return title_ko, title_en, level, tag_keys
async def enrich_workbook(
db: AsyncSession,
workbook_id: int,
only_missing: bool = True, # True면 NULL인 것만 채움, False면 덮어씀
commit_every: int = 50, # 몇 개마다 커밋할지
sleep_sec: float = 0.12, # solved.ac 부하 줄이려고 약간 쉬기
timeout: float = 10.0,
) -> Dict:
"""
전제: workbook_problems에 (workbook_id, problem_id)는 이미 채워져 있음
목표: solved.ac problem/show로 title_ko/title_en/level/tags를 채움
"""
# 1) 대상 problem_id 목록 뽑기 (missing만 or 전체)
if only_missing:
rows = (await db.execute(
text("""
SELECT problem_id
FROM workbook_problems
WHERE workbook_id = :wid
AND (
title_ko IS NULL
OR title_en IS NULL
OR level IS NULL
OR tags IS NULL
)
ORDER BY problem_id
"""),
{"wid": workbook_id},
)).all()
else:
rows = (await db.execute(
text("""
SELECT problem_id
FROM workbook_problems
WHERE workbook_id = :wid
ORDER BY problem_id
"""),
{"wid": workbook_id},
)).all()
problem_ids = [int(r[0]) for r in rows]
if not problem_ids:
return {
"workbook_id": workbook_id,
"target_count": 0,
"updated": 0,
"skipped": 0,
"failed": 0,
"message": "nothing to enrich (already filled)",
}
updated = 0
skipped = 0
failed = 0
# 2) solved.ac 호출 + 업데이트
async with httpx.AsyncClient(timeout=timeout, headers={"User-Agent": "baekjoon-n8n-bot/1.0"}) as client:
for i, pid in enumerate(problem_ids, start=1):
try:
title_ko, title_en, level, tag_keys = await solved_problem_show(client, pid)
# 응답이 다 비었으면 스킵
if title_ko is None and title_en is None and level is None and (not tag_keys):
skipped += 1
continue
# tags 저장: TEXT[] (Postgres) -> 파라미터에 list 넘기면 asyncpg가 배열로 처리해줌
# only_missing=True면 COALESCE로 NULL만 채우고, False면 그냥 덮어씀
if only_missing:
await db.execute(
text("""
UPDATE workbook_problems
SET title_ko = COALESCE(:tko, title_ko),
title_en = COALESCE(:ten, title_en),
level = COALESCE(:lvl, level),
tags = COALESCE(:tags, tags)
WHERE workbook_id = :wid
AND problem_id = :pid
"""),
{"tko": title_ko, "ten": title_en, "lvl": level, "tags": tag_keys if tag_keys else None, "wid": workbook_id, "pid": pid},
)
else:
await db.execute(
text("""
UPDATE workbook_problems
SET title_ko = :tko,
title_en = :ten,
level = :lvl,
tags = :tags
WHERE workbook_id = :wid
AND problem_id = :pid
"""),
{"tko": title_ko, "ten": title_en, "lvl": level, "tags": tag_keys if tag_keys else None, "wid": workbook_id, "pid": pid},
)
updated += 1
# 커밋 배치
if i % commit_every == 0:
await db.commit()
# rate limit
if sleep_sec > 0:
await asyncio.sleep(sleep_sec)
except Exception:
failed += 1
# 실패해도 다음 문제로 계속 진행
await db.commit()
return {
"workbook_id": workbook_id,
"target_count": len(problem_ids),
"updated": updated,
"skipped": skipped,
"failed": failed,
"only_missing": only_missing,
"commit_every": commit_every,
"sleep_sec": sleep_sec,
"message": "enrich done",
}