Feat: [main] baekjoon-bot-v1
This commit is contained in:
156
workbook_enricher.py
Normal file
156
workbook_enricher.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import asyncio
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
|
||||
SOLVED_SHOW_URL = "https://solved.ac/api/v3/problem/show"
|
||||
|
||||
|
||||
async def solved_problem_show(client: httpx.AsyncClient, problem_id: int) -> Tuple[Optional[str], Optional[str], Optional[int], Optional[List[str]]]:
|
||||
"""
|
||||
solved.ac problem/show 호출해서 메타 가져오기
|
||||
반환: (title_ko, title_en, level, tags_keys)
|
||||
"""
|
||||
r = await client.get(SOLVED_SHOW_URL, params={"problemId": problem_id})
|
||||
if r.status_code != 200:
|
||||
return None, None, None, None
|
||||
|
||||
data = r.json()
|
||||
title_ko = data.get("titleKo")
|
||||
title_en = data.get("titleEn")
|
||||
level = data.get("level")
|
||||
|
||||
# tags: [{"key": "...", "isMeta": ..., "bojTagId": ..., "problemCount": ...}, ...]
|
||||
tags = data.get("tags") or []
|
||||
tag_keys = [t.get("key") for t in tags if t.get("key")]
|
||||
|
||||
return title_ko, title_en, level, tag_keys
|
||||
|
||||
|
||||
async def enrich_workbook(
|
||||
db: AsyncSession,
|
||||
workbook_id: int,
|
||||
only_missing: bool = True, # True면 NULL인 것만 채움, False면 덮어씀
|
||||
commit_every: int = 50, # 몇 개마다 커밋할지
|
||||
sleep_sec: float = 0.12, # solved.ac 부하 줄이려고 약간 쉬기
|
||||
timeout: float = 10.0,
|
||||
) -> Dict:
|
||||
"""
|
||||
전제: workbook_problems에 (workbook_id, problem_id)는 이미 채워져 있음
|
||||
목표: solved.ac problem/show로 title_ko/title_en/level/tags를 채움
|
||||
"""
|
||||
|
||||
# 1) 대상 problem_id 목록 뽑기 (missing만 or 전체)
|
||||
if only_missing:
|
||||
rows = (await db.execute(
|
||||
text("""
|
||||
SELECT problem_id
|
||||
FROM workbook_problems
|
||||
WHERE workbook_id = :wid
|
||||
AND (
|
||||
title_ko IS NULL
|
||||
OR title_en IS NULL
|
||||
OR level IS NULL
|
||||
OR tags IS NULL
|
||||
)
|
||||
ORDER BY problem_id
|
||||
"""),
|
||||
{"wid": workbook_id},
|
||||
)).all()
|
||||
else:
|
||||
rows = (await db.execute(
|
||||
text("""
|
||||
SELECT problem_id
|
||||
FROM workbook_problems
|
||||
WHERE workbook_id = :wid
|
||||
ORDER BY problem_id
|
||||
"""),
|
||||
{"wid": workbook_id},
|
||||
)).all()
|
||||
|
||||
problem_ids = [int(r[0]) for r in rows]
|
||||
|
||||
if not problem_ids:
|
||||
return {
|
||||
"workbook_id": workbook_id,
|
||||
"target_count": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"failed": 0,
|
||||
"message": "nothing to enrich (already filled)",
|
||||
}
|
||||
|
||||
updated = 0
|
||||
skipped = 0
|
||||
failed = 0
|
||||
|
||||
# 2) solved.ac 호출 + 업데이트
|
||||
async with httpx.AsyncClient(timeout=timeout, headers={"User-Agent": "baekjoon-n8n-bot/1.0"}) as client:
|
||||
for i, pid in enumerate(problem_ids, start=1):
|
||||
try:
|
||||
title_ko, title_en, level, tag_keys = await solved_problem_show(client, pid)
|
||||
|
||||
# 응답이 다 비었으면 스킵
|
||||
if title_ko is None and title_en is None and level is None and (not tag_keys):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# tags 저장: TEXT[] (Postgres) -> 파라미터에 list 넘기면 asyncpg가 배열로 처리해줌
|
||||
# only_missing=True면 COALESCE로 NULL만 채우고, False면 그냥 덮어씀
|
||||
if only_missing:
|
||||
await db.execute(
|
||||
text("""
|
||||
UPDATE workbook_problems
|
||||
SET title_ko = COALESCE(:tko, title_ko),
|
||||
title_en = COALESCE(:ten, title_en),
|
||||
level = COALESCE(:lvl, level),
|
||||
tags = COALESCE(:tags, tags)
|
||||
WHERE workbook_id = :wid
|
||||
AND problem_id = :pid
|
||||
"""),
|
||||
{"tko": title_ko, "ten": title_en, "lvl": level, "tags": tag_keys if tag_keys else None, "wid": workbook_id, "pid": pid},
|
||||
)
|
||||
else:
|
||||
await db.execute(
|
||||
text("""
|
||||
UPDATE workbook_problems
|
||||
SET title_ko = :tko,
|
||||
title_en = :ten,
|
||||
level = :lvl,
|
||||
tags = :tags
|
||||
WHERE workbook_id = :wid
|
||||
AND problem_id = :pid
|
||||
"""),
|
||||
{"tko": title_ko, "ten": title_en, "lvl": level, "tags": tag_keys if tag_keys else None, "wid": workbook_id, "pid": pid},
|
||||
)
|
||||
|
||||
updated += 1
|
||||
|
||||
# 커밋 배치
|
||||
if i % commit_every == 0:
|
||||
await db.commit()
|
||||
|
||||
# rate limit
|
||||
if sleep_sec > 0:
|
||||
await asyncio.sleep(sleep_sec)
|
||||
|
||||
except Exception:
|
||||
failed += 1
|
||||
# 실패해도 다음 문제로 계속 진행
|
||||
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"workbook_id": workbook_id,
|
||||
"target_count": len(problem_ids),
|
||||
"updated": updated,
|
||||
"skipped": skipped,
|
||||
"failed": failed,
|
||||
"only_missing": only_missing,
|
||||
"commit_every": commit_every,
|
||||
"sleep_sec": sleep_sec,
|
||||
"message": "enrich done",
|
||||
}
|
||||
Reference in New Issue
Block a user