104 lines
3.2 KiB
Python
104 lines
3.2 KiB
Python
import asyncio
|
|
import re
|
|
from typing import List, Tuple, Optional
|
|
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from sqlalchemy import text
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
SESSION = requests.Session()
|
|
SESSION.headers.update({"User-Agent": "baekjoon-n8n-bot/1.0"})
|
|
|
|
PROBLEM_LINK_RE = re.compile(r"/problem/(\d+)")
|
|
|
|
def fetch_workbook_problem_ids(workbook_id: int) -> List[int]:
|
|
url = f"https://www.acmicpc.net/workbook/view/{workbook_id}"
|
|
r = SESSION.get(url, timeout=(3.05, 10))
|
|
r.raise_for_status()
|
|
|
|
soup = BeautifulSoup(r.text, "lxml")
|
|
ids = set()
|
|
|
|
# workbook 페이지 내 /problem/{id} 링크들에서 id 수집
|
|
for a in soup.select('a[href^="/problem/"]'):
|
|
href = a.get("href", "")
|
|
m = PROBLEM_LINK_RE.search(href)
|
|
if m:
|
|
ids.add(int(m.group(1)))
|
|
|
|
return sorted(ids)
|
|
|
|
def solved_problem_show(problem_id: int) -> Tuple[Optional[str], Optional[str], Optional[int]]:
|
|
"""
|
|
solved.ac problem/show로 메타 보강 (titleKo/titleEn/level)
|
|
"""
|
|
url = "https://solved.ac/api/v3/problem/show"
|
|
r = SESSION.get(url, params={"problemId": problem_id}, timeout=(3.05, 10))
|
|
if r.status_code != 200:
|
|
return None, None, None
|
|
data = r.json()
|
|
return data.get("titleKo"), data.get("titleEn"), data.get("level")
|
|
|
|
async def import_workbook(db: AsyncSession, workbook_id: int, title: str = None) -> dict:
|
|
problem_ids = fetch_workbook_problem_ids(workbook_id)
|
|
|
|
# upsert workbook
|
|
await db.execute(
|
|
text("""
|
|
INSERT INTO workbooks(id, title, source)
|
|
VALUES (:id, :title, 'boj')
|
|
ON CONFLICT (id) DO UPDATE
|
|
SET title = COALESCE(EXCLUDED.title, workbooks.title),
|
|
updated_at = now()
|
|
"""),
|
|
{"id": workbook_id, "title": title},
|
|
)
|
|
|
|
inserted = 0
|
|
updated_meta = 0
|
|
|
|
# 문제 목록 upsert
|
|
for pid in problem_ids:
|
|
# 먼저 매핑 넣고
|
|
await db.execute(
|
|
text("""
|
|
INSERT INTO workbook_problems(workbook_id, problem_id)
|
|
VALUES (:wid, :pid)
|
|
ON CONFLICT (workbook_id, problem_id) DO NOTHING
|
|
"""),
|
|
{"wid": workbook_id, "pid": pid},
|
|
)
|
|
|
|
await db.commit()
|
|
|
|
# 메타 보강(너무 빠르게 치면 부담될 수 있으니 간단한 rate limit)
|
|
for i, pid in enumerate(problem_ids):
|
|
title_ko, title_en, level = solved_problem_show(pid)
|
|
if title_ko is None and title_en is None and level is None:
|
|
continue
|
|
|
|
await db.execute(
|
|
text("""
|
|
UPDATE workbook_problems
|
|
SET title_ko = COALESCE(:tko, title_ko),
|
|
title_en = COALESCE(:ten, title_en),
|
|
level = COALESCE(:lvl, level)
|
|
WHERE workbook_id = :wid AND problem_id = :pid
|
|
"""),
|
|
{"tko": title_ko, "ten": title_en, "lvl": level, "wid": workbook_id, "pid": pid},
|
|
)
|
|
updated_meta += 1
|
|
|
|
if i % 10 == 0:
|
|
await db.commit()
|
|
await asyncio.sleep(0.2)
|
|
|
|
await db.commit()
|
|
|
|
return {
|
|
"workbook_id": workbook_id,
|
|
"count": len(problem_ids),
|
|
"meta_updated": updated_meta,
|
|
}
|