Feat: [main] baekjoon-bot-v1

This commit is contained in:
sm4640
2026-01-14 14:54:42 +09:00
commit 122b367bed
10 changed files with 918 additions and 0 deletions

103
workbook_importer.py Normal file
View File

@@ -0,0 +1,103 @@
import asyncio
import re
from typing import List, Tuple, Optional
import requests
from bs4 import BeautifulSoup
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
SESSION = requests.Session()
SESSION.headers.update({"User-Agent": "baekjoon-n8n-bot/1.0"})
PROBLEM_LINK_RE = re.compile(r"/problem/(\d+)")
def fetch_workbook_problem_ids(workbook_id: int) -> List[int]:
url = f"https://www.acmicpc.net/workbook/view/{workbook_id}"
r = SESSION.get(url, timeout=(3.05, 10))
r.raise_for_status()
soup = BeautifulSoup(r.text, "lxml")
ids = set()
# workbook 페이지 내 /problem/{id} 링크들에서 id 수집
for a in soup.select('a[href^="/problem/"]'):
href = a.get("href", "")
m = PROBLEM_LINK_RE.search(href)
if m:
ids.add(int(m.group(1)))
return sorted(ids)
def solved_problem_show(problem_id: int) -> Tuple[Optional[str], Optional[str], Optional[int]]:
"""
solved.ac problem/show로 메타 보강 (titleKo/titleEn/level)
"""
url = "https://solved.ac/api/v3/problem/show"
r = SESSION.get(url, params={"problemId": problem_id}, timeout=(3.05, 10))
if r.status_code != 200:
return None, None, None
data = r.json()
return data.get("titleKo"), data.get("titleEn"), data.get("level")
async def import_workbook(db: AsyncSession, workbook_id: int, title: str = None) -> dict:
problem_ids = fetch_workbook_problem_ids(workbook_id)
# upsert workbook
await db.execute(
text("""
INSERT INTO workbooks(id, title, source)
VALUES (:id, :title, 'boj')
ON CONFLICT (id) DO UPDATE
SET title = COALESCE(EXCLUDED.title, workbooks.title),
updated_at = now()
"""),
{"id": workbook_id, "title": title},
)
inserted = 0
updated_meta = 0
# 문제 목록 upsert
for pid in problem_ids:
# 먼저 매핑 넣고
await db.execute(
text("""
INSERT INTO workbook_problems(workbook_id, problem_id)
VALUES (:wid, :pid)
ON CONFLICT (workbook_id, problem_id) DO NOTHING
"""),
{"wid": workbook_id, "pid": pid},
)
await db.commit()
# 메타 보강(너무 빠르게 치면 부담될 수 있으니 간단한 rate limit)
for i, pid in enumerate(problem_ids):
title_ko, title_en, level = solved_problem_show(pid)
if title_ko is None and title_en is None and level is None:
continue
await db.execute(
text("""
UPDATE workbook_problems
SET title_ko = COALESCE(:tko, title_ko),
title_en = COALESCE(:ten, title_en),
level = COALESCE(:lvl, level)
WHERE workbook_id = :wid AND problem_id = :pid
"""),
{"tko": title_ko, "ten": title_en, "lvl": level, "wid": workbook_id, "pid": pid},
)
updated_meta += 1
if i % 10 == 0:
await db.commit()
await asyncio.sleep(0.2)
await db.commit()
return {
"workbook_id": workbook_id,
"count": len(problem_ids),
"meta_updated": updated_meta,
}