import asyncio import re from typing import List, Tuple, Optional import requests from bs4 import BeautifulSoup from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncSession SESSION = requests.Session() SESSION.headers.update({"User-Agent": "baekjoon-n8n-bot/1.0"}) PROBLEM_LINK_RE = re.compile(r"/problem/(\d+)") def fetch_workbook_problem_ids(workbook_id: int) -> List[int]: url = f"https://www.acmicpc.net/workbook/view/{workbook_id}" r = SESSION.get(url, timeout=(3.05, 10)) r.raise_for_status() soup = BeautifulSoup(r.text, "lxml") ids = set() # workbook 페이지 내 /problem/{id} 링크들에서 id 수집 for a in soup.select('a[href^="/problem/"]'): href = a.get("href", "") m = PROBLEM_LINK_RE.search(href) if m: ids.add(int(m.group(1))) return sorted(ids) def solved_problem_show(problem_id: int) -> Tuple[Optional[str], Optional[str], Optional[int]]: """ solved.ac problem/show로 메타 보강 (titleKo/titleEn/level) """ url = "https://solved.ac/api/v3/problem/show" r = SESSION.get(url, params={"problemId": problem_id}, timeout=(3.05, 10)) if r.status_code != 200: return None, None, None data = r.json() return data.get("titleKo"), data.get("titleEn"), data.get("level") async def import_workbook(db: AsyncSession, workbook_id: int, title: str = None) -> dict: problem_ids = fetch_workbook_problem_ids(workbook_id) # upsert workbook await db.execute( text(""" INSERT INTO workbooks(id, title, source) VALUES (:id, :title, 'boj') ON CONFLICT (id) DO UPDATE SET title = COALESCE(EXCLUDED.title, workbooks.title), updated_at = now() """), {"id": workbook_id, "title": title}, ) inserted = 0 updated_meta = 0 # 문제 목록 upsert for pid in problem_ids: # 먼저 매핑 넣고 await db.execute( text(""" INSERT INTO workbook_problems(workbook_id, problem_id) VALUES (:wid, :pid) ON CONFLICT (workbook_id, problem_id) DO NOTHING """), {"wid": workbook_id, "pid": pid}, ) await db.commit() # 메타 보강(너무 빠르게 치면 부담될 수 있으니 간단한 rate limit) for i, pid in enumerate(problem_ids): title_ko, title_en, level = solved_problem_show(pid) if title_ko is None and title_en is None and level is None: continue await db.execute( text(""" UPDATE workbook_problems SET title_ko = COALESCE(:tko, title_ko), title_en = COALESCE(:ten, title_en), level = COALESCE(:lvl, level) WHERE workbook_id = :wid AND problem_id = :pid """), {"tko": title_ko, "ten": title_en, "lvl": level, "wid": workbook_id, "pid": pid}, ) updated_meta += 1 if i % 10 == 0: await db.commit() await asyncio.sleep(0.2) await db.commit() return { "workbook_id": workbook_id, "count": len(problem_ids), "meta_updated": updated_meta, }