157 lines
5.4 KiB
Python
157 lines
5.4 KiB
Python
import asyncio
|
|
from typing import Dict, List, Optional, Tuple
|
|
|
|
import httpx
|
|
from sqlalchemy import text
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
|
|
SOLVED_SHOW_URL = "https://solved.ac/api/v3/problem/show"
|
|
|
|
|
|
async def solved_problem_show(client: httpx.AsyncClient, problem_id: int) -> Tuple[Optional[str], Optional[str], Optional[int], Optional[List[str]]]:
|
|
"""
|
|
solved.ac problem/show 호출해서 메타 가져오기
|
|
반환: (title_ko, title_en, level, tags_keys)
|
|
"""
|
|
r = await client.get(SOLVED_SHOW_URL, params={"problemId": problem_id})
|
|
if r.status_code != 200:
|
|
return None, None, None, None
|
|
|
|
data = r.json()
|
|
title_ko = data.get("titleKo")
|
|
title_en = data.get("titleEn")
|
|
level = data.get("level")
|
|
|
|
# tags: [{"key": "...", "isMeta": ..., "bojTagId": ..., "problemCount": ...}, ...]
|
|
tags = data.get("tags") or []
|
|
tag_keys = [t.get("key") for t in tags if t.get("key")]
|
|
|
|
return title_ko, title_en, level, tag_keys
|
|
|
|
|
|
async def enrich_workbook(
|
|
db: AsyncSession,
|
|
workbook_id: int,
|
|
only_missing: bool = True, # True면 NULL인 것만 채움, False면 덮어씀
|
|
commit_every: int = 50, # 몇 개마다 커밋할지
|
|
sleep_sec: float = 0.12, # solved.ac 부하 줄이려고 약간 쉬기
|
|
timeout: float = 10.0,
|
|
) -> Dict:
|
|
"""
|
|
전제: workbook_problems에 (workbook_id, problem_id)는 이미 채워져 있음
|
|
목표: solved.ac problem/show로 title_ko/title_en/level/tags를 채움
|
|
"""
|
|
|
|
# 1) 대상 problem_id 목록 뽑기 (missing만 or 전체)
|
|
if only_missing:
|
|
rows = (await db.execute(
|
|
text("""
|
|
SELECT problem_id
|
|
FROM workbook_problems
|
|
WHERE workbook_id = :wid
|
|
AND (
|
|
title_ko IS NULL
|
|
OR title_en IS NULL
|
|
OR level IS NULL
|
|
OR tags IS NULL
|
|
)
|
|
ORDER BY problem_id
|
|
"""),
|
|
{"wid": workbook_id},
|
|
)).all()
|
|
else:
|
|
rows = (await db.execute(
|
|
text("""
|
|
SELECT problem_id
|
|
FROM workbook_problems
|
|
WHERE workbook_id = :wid
|
|
ORDER BY problem_id
|
|
"""),
|
|
{"wid": workbook_id},
|
|
)).all()
|
|
|
|
problem_ids = [int(r[0]) for r in rows]
|
|
|
|
if not problem_ids:
|
|
return {
|
|
"workbook_id": workbook_id,
|
|
"target_count": 0,
|
|
"updated": 0,
|
|
"skipped": 0,
|
|
"failed": 0,
|
|
"message": "nothing to enrich (already filled)",
|
|
}
|
|
|
|
updated = 0
|
|
skipped = 0
|
|
failed = 0
|
|
|
|
# 2) solved.ac 호출 + 업데이트
|
|
async with httpx.AsyncClient(timeout=timeout, headers={"User-Agent": "baekjoon-n8n-bot/1.0"}) as client:
|
|
for i, pid in enumerate(problem_ids, start=1):
|
|
try:
|
|
title_ko, title_en, level, tag_keys = await solved_problem_show(client, pid)
|
|
|
|
# 응답이 다 비었으면 스킵
|
|
if title_ko is None and title_en is None and level is None and (not tag_keys):
|
|
skipped += 1
|
|
continue
|
|
|
|
# tags 저장: TEXT[] (Postgres) -> 파라미터에 list 넘기면 asyncpg가 배열로 처리해줌
|
|
# only_missing=True면 COALESCE로 NULL만 채우고, False면 그냥 덮어씀
|
|
if only_missing:
|
|
await db.execute(
|
|
text("""
|
|
UPDATE workbook_problems
|
|
SET title_ko = COALESCE(:tko, title_ko),
|
|
title_en = COALESCE(:ten, title_en),
|
|
level = COALESCE(:lvl, level),
|
|
tags = COALESCE(:tags, tags)
|
|
WHERE workbook_id = :wid
|
|
AND problem_id = :pid
|
|
"""),
|
|
{"tko": title_ko, "ten": title_en, "lvl": level, "tags": tag_keys if tag_keys else None, "wid": workbook_id, "pid": pid},
|
|
)
|
|
else:
|
|
await db.execute(
|
|
text("""
|
|
UPDATE workbook_problems
|
|
SET title_ko = :tko,
|
|
title_en = :ten,
|
|
level = :lvl,
|
|
tags = :tags
|
|
WHERE workbook_id = :wid
|
|
AND problem_id = :pid
|
|
"""),
|
|
{"tko": title_ko, "ten": title_en, "lvl": level, "tags": tag_keys if tag_keys else None, "wid": workbook_id, "pid": pid},
|
|
)
|
|
|
|
updated += 1
|
|
|
|
# 커밋 배치
|
|
if i % commit_every == 0:
|
|
await db.commit()
|
|
|
|
# rate limit
|
|
if sleep_sec > 0:
|
|
await asyncio.sleep(sleep_sec)
|
|
|
|
except Exception:
|
|
failed += 1
|
|
# 실패해도 다음 문제로 계속 진행
|
|
|
|
await db.commit()
|
|
|
|
return {
|
|
"workbook_id": workbook_id,
|
|
"target_count": len(problem_ids),
|
|
"updated": updated,
|
|
"skipped": skipped,
|
|
"failed": failed,
|
|
"only_missing": only_missing,
|
|
"commit_every": commit_every,
|
|
"sleep_sec": sleep_sec,
|
|
"message": "enrich done",
|
|
}
|