Feat: [main] baekjoon-bot-v1
This commit is contained in:
6
.dockerignore
Normal file
6
.dockerignore
Normal file
@@ -0,0 +1,6 @@
|
||||
# .dockerignore
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
venv/
|
||||
.env
|
||||
.git/
|
||||
216
.gitignore
vendored
Normal file
216
.gitignore
vendored
Normal file
@@ -0,0 +1,216 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[codz]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
# Pipfile.lock
|
||||
|
||||
# UV
|
||||
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# uv.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
# poetry.lock
|
||||
# poetry.toml
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
||||
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
||||
# pdm.lock
|
||||
# pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# pixi
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
||||
# pixi.lock
|
||||
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
||||
# in the .venv directory. It is recommended not to include this directory in version control.
|
||||
.pixi
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# Redis
|
||||
*.rdb
|
||||
*.aof
|
||||
*.pid
|
||||
|
||||
# RabbitMQ
|
||||
mnesia/
|
||||
rabbitmq/
|
||||
rabbitmq-data/
|
||||
|
||||
# ActiveMQ
|
||||
activemq-data/
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.envrc
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
# .idea/
|
||||
|
||||
# Abstra
|
||||
# Abstra is an AI-powered process automation framework.
|
||||
# Ignore directories containing user credentials, local state, and settings.
|
||||
# Learn more at https://abstra.io/docs
|
||||
.abstra/
|
||||
|
||||
# Visual Studio Code
|
||||
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
||||
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
||||
# you could uncomment the following to ignore the entire vscode folder
|
||||
# .vscode/
|
||||
|
||||
# Ruff stuff:
|
||||
.ruff_cache/
|
||||
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# Marimo
|
||||
marimo/_static/
|
||||
marimo/_lsp/
|
||||
__marimo__/
|
||||
|
||||
# Streamlit
|
||||
.streamlit/secrets.toml
|
||||
187
app.py
Normal file
187
app.py
Normal file
@@ -0,0 +1,187 @@
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, HTTPException, Query, Depends
|
||||
from fastapi.responses import JSONResponse
|
||||
from dotenv import load_dotenv
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from utils import env, resolve_difficulty, resolve_tags, build_query, get_problem
|
||||
from db import get_db
|
||||
from workbook_picker import pick_from_workbook
|
||||
from workbook_importer import import_workbook
|
||||
from workbook_enricher import enrich_workbook
|
||||
|
||||
|
||||
load_dotenv()
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
||||
@app.get("/")
|
||||
def root():
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
@app.post("/admin/workbooks/{workbook_id}/enrich")
|
||||
async def admin_enrich_workbook(
|
||||
workbook_id: int,
|
||||
only_missing: bool = Query(True, description="True면 NULL만 채움 / False면 덮어씀"),
|
||||
commit_every: int = Query(50, ge=1, le=500, description="몇 개마다 commit 할지"),
|
||||
sleep_sec: float = Query(0.12, ge=0.0, le=2.0, description="solved.ac 호출 사이 sleep"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
result = await enrich_workbook(
|
||||
db,
|
||||
workbook_id=workbook_id,
|
||||
only_missing=only_missing,
|
||||
commit_every=commit_every,
|
||||
sleep_sec=sleep_sec,
|
||||
)
|
||||
return {"status": "ok", "result": result}
|
||||
|
||||
|
||||
@app.delete("/admin/workbooks/{workbook_id}/reset")
|
||||
async def reset_workbook_progress(
|
||||
workbook_id: int,
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
try:
|
||||
res = await db.execute(
|
||||
text("DELETE FROM workbook_sends WHERE workbook_id = :wid"),
|
||||
{"wid": workbook_id},
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
# res.rowcount: 삭제된 행 수(=초기화된 문제 수)
|
||||
return {
|
||||
"status": "ok",
|
||||
"workbook_id": workbook_id,
|
||||
"deleted_sends": int(res.rowcount or 0),
|
||||
"message": "workbook progress reset (problems can be picked again)",
|
||||
}
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
|
||||
@app.get("/today")
|
||||
async def today(
|
||||
source_mode: str = Query(env("SOURCE_MODE_DEFAULT", "search"), description="search|workbook"),
|
||||
workbook_id: Optional[int] = Query(None, description="문제집 모드일 때 workbook id"),
|
||||
workbook_pick: str = Query("level_asc", description="random|level_asc"),
|
||||
|
||||
difficulty_mode: str = Query(env("DIFFICULTY_MODE_DEFAULT", "easy"), description="easy|hard|all"),
|
||||
tag_mode: str = Query(env("TAG_MODE_DEFAULT", "easy"), description="easy|hard|all"),
|
||||
difficulty: Optional[str] = Query(None, description="예: 6..10 (주면 mode보다 우선)"),
|
||||
tags: Optional[str] = Query(None, description="예: dp,graphs (주면 mode보다 우선)"),
|
||||
lang: str = Query(env("LANG_DEFAULT", "all"), description="ko | en | ko,en | all"),
|
||||
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
sm = (source_mode or "").lower().strip()
|
||||
if sm == "workbook":
|
||||
wid = workbook_id or (int(env("WORKBOOK_ID_DEFAULT")) if env("WORKBOOK_ID_DEFAULT") else None)
|
||||
if not wid:
|
||||
return JSONResponse(status_code=400, content={"error": "workbook_id is required for workbook mode"})
|
||||
|
||||
pid, title, level = await pick_from_workbook(db, wid, pick=workbook_pick)
|
||||
if not pid:
|
||||
return JSONResponse(status_code=409, content={"error": "no_more_problems_in_workbook", "workbook_id": wid})
|
||||
|
||||
problem_url = f"https://www.acmicpc.net/problem/{pid}"
|
||||
solved_url = f"https://solved.ac/problems/id/{pid}"
|
||||
level_text = f"Lv. {level}" if level is not None else "Lv. ?"
|
||||
|
||||
discord_payload = {
|
||||
"embeds": [{
|
||||
"title": "🔔 오늘의 백준 추천 문제 (문제집)",
|
||||
"description": (
|
||||
f"**{pid}번: {title}**\n"
|
||||
f"난이도: **{level_text}**\n"
|
||||
f"source_mode: `workbook` / workbook_id: `{wid}`"
|
||||
),
|
||||
"fields": [
|
||||
{"name": "문제 링크", "value": f"[바로가기]({problem_url})", "inline": True},
|
||||
{"name": "해설/정보", "value": f"[Solved.ac]({solved_url})", "inline": True},
|
||||
],
|
||||
"footer": {"text": "매일 오전 10시 정기 알림 (n8n)"}
|
||||
}]
|
||||
}
|
||||
|
||||
return {
|
||||
"source_mode": "workbook",
|
||||
"workbook_id": wid,
|
||||
"problemId": pid,
|
||||
"title": title,
|
||||
"level": level,
|
||||
"problemUrl": problem_url,
|
||||
"solvedUrl": solved_url,
|
||||
"discordPayload": discord_payload,
|
||||
}
|
||||
|
||||
# 2) 기존 search 모드(네가 쓰던 그대로)
|
||||
dm = (difficulty_mode or "").lower()
|
||||
tm = (tag_mode or "").lower()
|
||||
|
||||
if dm not in ("easy", "hard", "all"):
|
||||
return JSONResponse(status_code=400, content={"error": "difficulty_mode must be easy|hard|all"})
|
||||
if tm not in ("easy", "hard", "all"):
|
||||
return JSONResponse(status_code=400, content={"error": "tag_mode must be easy|hard|all"})
|
||||
|
||||
chosen_difficulty = resolve_difficulty(difficulty, dm)
|
||||
chosen_tags = resolve_tags(tags if tags is not None else None, tm)
|
||||
query = build_query(chosen_difficulty, chosen_tags, lang)
|
||||
|
||||
pid, title, level = get_problem(query=query)
|
||||
if not pid:
|
||||
return JSONResponse(status_code=503, content={"error": "failed_to_fetch_problem", "query": query})
|
||||
|
||||
problem_url = f"https://www.acmicpc.net/problem/{pid}"
|
||||
solved_url = f"https://solved.ac/problems/id/{pid}"
|
||||
level_text = f"Lv. {level}" if level is not None else "Lv. ?"
|
||||
|
||||
discord_payload = {
|
||||
"embeds": [{
|
||||
"title": "🔔 오늘의 백준 추천 문제",
|
||||
"description": (
|
||||
f"**{pid}번: {title}**\n"
|
||||
f"난이도: **{level_text}**\n"
|
||||
f"difficulty_mode: `{dm}` / tag_mode: `{tm}` / lang: `{lang}`\n"
|
||||
f"filter: `{chosen_difficulty}` / tags: `{', '.join(chosen_tags) if chosen_tags else 'none'}`"
|
||||
),
|
||||
"fields": [
|
||||
{"name": "문제 링크", "value": f"[바로가기]({problem_url})", "inline": True},
|
||||
{"name": "해설/정보", "value": f"[Solved.ac]({solved_url})", "inline": True},
|
||||
],
|
||||
"footer": {"text": "매일 오전 10시 정기 알림 (n8n)"}
|
||||
}]
|
||||
}
|
||||
|
||||
return {
|
||||
"source_mode": "search",
|
||||
"difficulty_mode": dm,
|
||||
"tag_mode": tm,
|
||||
"lang": lang,
|
||||
"difficulty": chosen_difficulty,
|
||||
"tags": chosen_tags,
|
||||
"query": query,
|
||||
"problemId": pid,
|
||||
"title": title,
|
||||
"level": level,
|
||||
"problemUrl": problem_url,
|
||||
"solvedUrl": solved_url,
|
||||
"discordPayload": discord_payload,
|
||||
}
|
||||
|
||||
|
||||
# @app.post("/admin/workbooks/{workbook_id}/import")
|
||||
# async def admin_import_workbook(
|
||||
# workbook_id: int,
|
||||
# title: Optional[str] = Query(None, description="문제집 제목(옵션)"),
|
||||
# db: AsyncSession = Depends(get_db),
|
||||
# ):
|
||||
# try:
|
||||
# result = await import_workbook(db, workbook_id=workbook_id, title=title)
|
||||
# return {"status": "ok", "result": result}
|
||||
# except Exception as e:
|
||||
# return JSONResponse(status_code=500, content={"error": str(e)})
|
||||
21
db.py
Normal file
21
db.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import os
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker, AsyncSession
|
||||
|
||||
DATABASE_URL = os.getenv("DATABASE_URL", "").strip()
|
||||
if not DATABASE_URL:
|
||||
raise RuntimeError("DATABASE_URL is required (e.g. postgresql+asyncpg://...)")
|
||||
|
||||
engine = create_async_engine(DATABASE_URL, pool_pre_ping=True)
|
||||
|
||||
SessionLocal = async_sessionmaker(
|
||||
bind=engine,
|
||||
class_=AsyncSession,
|
||||
autoflush=False,
|
||||
expire_on_commit=False,
|
||||
)
|
||||
|
||||
async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
||||
async with SessionLocal() as session:
|
||||
yield session
|
||||
20
dockerfile
Normal file
20
dockerfile
Normal file
@@ -0,0 +1,20 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 타임존 설정 (로그에 한국 시간이 찍히도록 함)
|
||||
ENV TZ=Asia/Seoul
|
||||
RUN apt-get update && apt-get install -y tzdata && \
|
||||
ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# 현재 디렉토리의 모든 파일(app.py, utils.py 등)을 복사
|
||||
COPY . .
|
||||
|
||||
# 로그가 즉시 출력되도록 설정
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
EXPOSE 8000
|
||||
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
BIN
requirements.txt
Normal file
BIN
requirements.txt
Normal file
Binary file not shown.
147
utils.py
Normal file
147
utils.py
Normal file
@@ -0,0 +1,147 @@
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
from typing import Optional, Tuple, List
|
||||
|
||||
import requests
|
||||
|
||||
# ====== HTTP Session ======
|
||||
SESSION = requests.Session()
|
||||
SESSION.headers.update({"User-Agent": "baekjoon-n8n-bot/1.0"})
|
||||
|
||||
# 필요한 언어만 관리(엄격 모드에서 제외할 대상)
|
||||
KNOWN_LANGS = ["ko", "en", "ja", "ru", "zh", "de", "fr", "es", "pt", "it"]
|
||||
|
||||
|
||||
def fetch_json_with_retry(url: str, params: dict, retries: int = 3, timeout=(3.05, 10)) -> dict:
|
||||
last_err = None
|
||||
for i in range(retries):
|
||||
try:
|
||||
res = SESSION.get(url, params=params, timeout=timeout)
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
except Exception as e:
|
||||
last_err = e
|
||||
time.sleep(0.7 * (2 ** i))
|
||||
raise last_err
|
||||
|
||||
|
||||
def parse_csv(s: str) -> List[str]:
|
||||
return [x.strip() for x in (s or "").split(",") if x.strip()]
|
||||
|
||||
|
||||
def env(name: str, default: str = "") -> str:
|
||||
return os.getenv(name, default).strip()
|
||||
|
||||
|
||||
def build_lang_filter(lang: str) -> str:
|
||||
"""
|
||||
사용자가 읽을 수 있는 언어가 '하나라도' 포함된 문제를 찾는 것이 목적입니다.
|
||||
-%en 처럼 제외 필터를 쓰면, 한국어와 영어가 모두 있는 양질의 문제가 제외되므로
|
||||
긍정 필터(%ko) 위주로 구성합니다.
|
||||
"""
|
||||
raw = (lang or "all").strip().lower()
|
||||
if raw in ("all", ""):
|
||||
return ""
|
||||
|
||||
allow = set(parse_csv(raw)) & set(KNOWN_LANGS)
|
||||
if not allow:
|
||||
return ""
|
||||
|
||||
# 여러 언어를 선택했을 경우(예: ko,en) -> (%ko | %en)
|
||||
# 즉, 한국어 '또는' 영어 중 하나라도 지문이 있는 문제
|
||||
if len(allow) == 1:
|
||||
return f"%{next(iter(allow))}"
|
||||
else:
|
||||
expr = " | ".join(f"%{c}" for c in sorted(allow))
|
||||
return f"({expr})"
|
||||
|
||||
|
||||
|
||||
def resolve_difficulty(difficulty: Optional[str], difficulty_mode: str) -> str:
|
||||
if difficulty and difficulty.strip():
|
||||
return difficulty.strip()
|
||||
|
||||
mode = (difficulty_mode or env("DIFFICULTY_MODE_DEFAULT", "easy")).lower()
|
||||
if mode == "easy":
|
||||
return env("DIFFICULTY_EASY", "6..10")
|
||||
if mode == "hard":
|
||||
return env("DIFFICULTY_HARD", "11..15")
|
||||
if mode == "all":
|
||||
return env("DIFFICULTY_ALL", "1..30")
|
||||
return env("DIFFICULTY_EASY", "6..10")
|
||||
|
||||
|
||||
def resolve_tags(tags_csv: Optional[str], tag_mode: str) -> List[str]:
|
||||
"""
|
||||
tags_csv(쿼리)가 있으면 그것이 최우선.
|
||||
없으면 tag_mode 프리셋 기반으로 선택.
|
||||
TAG_PICK_* 정책에 따라 랜덤 1개 / 전체 / 필터 없음으로 결정.
|
||||
"""
|
||||
if tags_csv is not None:
|
||||
return parse_csv(tags_csv)
|
||||
|
||||
mode = (tag_mode or env("TAG_MODE_DEFAULT", "easy")).lower()
|
||||
if mode == "easy":
|
||||
preset = parse_csv(env("TAGS_EASY", ""))
|
||||
pick = env("TAG_PICK_EASY", env("TAG_PICK", "random")).lower()
|
||||
elif mode == "hard":
|
||||
preset = parse_csv(env("TAGS_HARD", ""))
|
||||
pick = env("TAG_PICK_HARD", env("TAG_PICK", "random")).lower()
|
||||
elif mode == "all":
|
||||
preset = parse_csv(env("TAGS_ALL", ""))
|
||||
pick = env("TAG_PICK_ALL", env("TAG_PICK", "none")).lower()
|
||||
else:
|
||||
preset = parse_csv(env("TAGS_EASY", ""))
|
||||
pick = env("TAG_PICK_EASY", "random").lower()
|
||||
|
||||
if pick == "none":
|
||||
return []
|
||||
if pick == "random":
|
||||
return [random.choice(preset)] if preset else []
|
||||
return preset
|
||||
|
||||
|
||||
def build_query(difficulty: str, tags: List[str], lang: str) -> str:
|
||||
# 1. 난이도 기본 조건
|
||||
query_parts = [f"*{difficulty}"]
|
||||
|
||||
# 2. 태그 조건 (괄호로 감싸서 우선순위 확보)
|
||||
if tags:
|
||||
join_op = env("TAGS_JOIN", "or").lower()
|
||||
if join_op == "and":
|
||||
# 모든 태그가 다 있어야 함: tag:a tag:b
|
||||
tag_expr = " ".join(f"tag:{t}" for t in tags)
|
||||
else:
|
||||
# 태그 중 하나만 있어도 됨: (tag:a | tag:b)
|
||||
tag_expr = "(" + " | ".join(f"tag:{t}" for t in tags) + ")"
|
||||
query_parts.append(tag_expr)
|
||||
|
||||
# 3. 언어 조건 (괄호로 감싸기)
|
||||
lang_filter = build_lang_filter(lang)
|
||||
if lang_filter:
|
||||
# 언어 필터가 복합적일 수 있으므로 괄호 처리
|
||||
query_parts.append(f"({lang_filter})")
|
||||
|
||||
# 결과 예시: *6..10 (tag:dp | tag:bfs) (%ko)
|
||||
return " ".join(query_parts)
|
||||
|
||||
|
||||
def get_problem(query: str, size: int = 50) -> Tuple[Optional[int], Optional[str], Optional[int]]:
|
||||
url = "https://solved.ac/api/v3/search/problem"
|
||||
params = {
|
||||
"query": query,
|
||||
"sort": "random",
|
||||
"direction": "desc",
|
||||
"page": 1,
|
||||
"size": size,
|
||||
}
|
||||
try:
|
||||
data = fetch_json_with_retry(url, params=params)
|
||||
items = data.get("items", [])
|
||||
if not items:
|
||||
return None, None, None
|
||||
p = random.choice(items)
|
||||
return p.get("problemId"), (p.get("titleKo") or p.get("titleEn") or "제목 없음"), p.get("level")
|
||||
except Exception:
|
||||
return None, None, None
|
||||
156
workbook_enricher.py
Normal file
156
workbook_enricher.py
Normal file
@@ -0,0 +1,156 @@
|
||||
import asyncio
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
|
||||
SOLVED_SHOW_URL = "https://solved.ac/api/v3/problem/show"
|
||||
|
||||
|
||||
async def solved_problem_show(client: httpx.AsyncClient, problem_id: int) -> Tuple[Optional[str], Optional[str], Optional[int], Optional[List[str]]]:
|
||||
"""
|
||||
solved.ac problem/show 호출해서 메타 가져오기
|
||||
반환: (title_ko, title_en, level, tags_keys)
|
||||
"""
|
||||
r = await client.get(SOLVED_SHOW_URL, params={"problemId": problem_id})
|
||||
if r.status_code != 200:
|
||||
return None, None, None, None
|
||||
|
||||
data = r.json()
|
||||
title_ko = data.get("titleKo")
|
||||
title_en = data.get("titleEn")
|
||||
level = data.get("level")
|
||||
|
||||
# tags: [{"key": "...", "isMeta": ..., "bojTagId": ..., "problemCount": ...}, ...]
|
||||
tags = data.get("tags") or []
|
||||
tag_keys = [t.get("key") for t in tags if t.get("key")]
|
||||
|
||||
return title_ko, title_en, level, tag_keys
|
||||
|
||||
|
||||
async def enrich_workbook(
|
||||
db: AsyncSession,
|
||||
workbook_id: int,
|
||||
only_missing: bool = True, # True면 NULL인 것만 채움, False면 덮어씀
|
||||
commit_every: int = 50, # 몇 개마다 커밋할지
|
||||
sleep_sec: float = 0.12, # solved.ac 부하 줄이려고 약간 쉬기
|
||||
timeout: float = 10.0,
|
||||
) -> Dict:
|
||||
"""
|
||||
전제: workbook_problems에 (workbook_id, problem_id)는 이미 채워져 있음
|
||||
목표: solved.ac problem/show로 title_ko/title_en/level/tags를 채움
|
||||
"""
|
||||
|
||||
# 1) 대상 problem_id 목록 뽑기 (missing만 or 전체)
|
||||
if only_missing:
|
||||
rows = (await db.execute(
|
||||
text("""
|
||||
SELECT problem_id
|
||||
FROM workbook_problems
|
||||
WHERE workbook_id = :wid
|
||||
AND (
|
||||
title_ko IS NULL
|
||||
OR title_en IS NULL
|
||||
OR level IS NULL
|
||||
OR tags IS NULL
|
||||
)
|
||||
ORDER BY problem_id
|
||||
"""),
|
||||
{"wid": workbook_id},
|
||||
)).all()
|
||||
else:
|
||||
rows = (await db.execute(
|
||||
text("""
|
||||
SELECT problem_id
|
||||
FROM workbook_problems
|
||||
WHERE workbook_id = :wid
|
||||
ORDER BY problem_id
|
||||
"""),
|
||||
{"wid": workbook_id},
|
||||
)).all()
|
||||
|
||||
problem_ids = [int(r[0]) for r in rows]
|
||||
|
||||
if not problem_ids:
|
||||
return {
|
||||
"workbook_id": workbook_id,
|
||||
"target_count": 0,
|
||||
"updated": 0,
|
||||
"skipped": 0,
|
||||
"failed": 0,
|
||||
"message": "nothing to enrich (already filled)",
|
||||
}
|
||||
|
||||
updated = 0
|
||||
skipped = 0
|
||||
failed = 0
|
||||
|
||||
# 2) solved.ac 호출 + 업데이트
|
||||
async with httpx.AsyncClient(timeout=timeout, headers={"User-Agent": "baekjoon-n8n-bot/1.0"}) as client:
|
||||
for i, pid in enumerate(problem_ids, start=1):
|
||||
try:
|
||||
title_ko, title_en, level, tag_keys = await solved_problem_show(client, pid)
|
||||
|
||||
# 응답이 다 비었으면 스킵
|
||||
if title_ko is None and title_en is None and level is None and (not tag_keys):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
# tags 저장: TEXT[] (Postgres) -> 파라미터에 list 넘기면 asyncpg가 배열로 처리해줌
|
||||
# only_missing=True면 COALESCE로 NULL만 채우고, False면 그냥 덮어씀
|
||||
if only_missing:
|
||||
await db.execute(
|
||||
text("""
|
||||
UPDATE workbook_problems
|
||||
SET title_ko = COALESCE(:tko, title_ko),
|
||||
title_en = COALESCE(:ten, title_en),
|
||||
level = COALESCE(:lvl, level),
|
||||
tags = COALESCE(:tags, tags)
|
||||
WHERE workbook_id = :wid
|
||||
AND problem_id = :pid
|
||||
"""),
|
||||
{"tko": title_ko, "ten": title_en, "lvl": level, "tags": tag_keys if tag_keys else None, "wid": workbook_id, "pid": pid},
|
||||
)
|
||||
else:
|
||||
await db.execute(
|
||||
text("""
|
||||
UPDATE workbook_problems
|
||||
SET title_ko = :tko,
|
||||
title_en = :ten,
|
||||
level = :lvl,
|
||||
tags = :tags
|
||||
WHERE workbook_id = :wid
|
||||
AND problem_id = :pid
|
||||
"""),
|
||||
{"tko": title_ko, "ten": title_en, "lvl": level, "tags": tag_keys if tag_keys else None, "wid": workbook_id, "pid": pid},
|
||||
)
|
||||
|
||||
updated += 1
|
||||
|
||||
# 커밋 배치
|
||||
if i % commit_every == 0:
|
||||
await db.commit()
|
||||
|
||||
# rate limit
|
||||
if sleep_sec > 0:
|
||||
await asyncio.sleep(sleep_sec)
|
||||
|
||||
except Exception:
|
||||
failed += 1
|
||||
# 실패해도 다음 문제로 계속 진행
|
||||
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"workbook_id": workbook_id,
|
||||
"target_count": len(problem_ids),
|
||||
"updated": updated,
|
||||
"skipped": skipped,
|
||||
"failed": failed,
|
||||
"only_missing": only_missing,
|
||||
"commit_every": commit_every,
|
||||
"sleep_sec": sleep_sec,
|
||||
"message": "enrich done",
|
||||
}
|
||||
103
workbook_importer.py
Normal file
103
workbook_importer.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import asyncio
|
||||
import re
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
SESSION = requests.Session()
|
||||
SESSION.headers.update({"User-Agent": "baekjoon-n8n-bot/1.0"})
|
||||
|
||||
PROBLEM_LINK_RE = re.compile(r"/problem/(\d+)")
|
||||
|
||||
def fetch_workbook_problem_ids(workbook_id: int) -> List[int]:
|
||||
url = f"https://www.acmicpc.net/workbook/view/{workbook_id}"
|
||||
r = SESSION.get(url, timeout=(3.05, 10))
|
||||
r.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(r.text, "lxml")
|
||||
ids = set()
|
||||
|
||||
# workbook 페이지 내 /problem/{id} 링크들에서 id 수집
|
||||
for a in soup.select('a[href^="/problem/"]'):
|
||||
href = a.get("href", "")
|
||||
m = PROBLEM_LINK_RE.search(href)
|
||||
if m:
|
||||
ids.add(int(m.group(1)))
|
||||
|
||||
return sorted(ids)
|
||||
|
||||
def solved_problem_show(problem_id: int) -> Tuple[Optional[str], Optional[str], Optional[int]]:
|
||||
"""
|
||||
solved.ac problem/show로 메타 보강 (titleKo/titleEn/level)
|
||||
"""
|
||||
url = "https://solved.ac/api/v3/problem/show"
|
||||
r = SESSION.get(url, params={"problemId": problem_id}, timeout=(3.05, 10))
|
||||
if r.status_code != 200:
|
||||
return None, None, None
|
||||
data = r.json()
|
||||
return data.get("titleKo"), data.get("titleEn"), data.get("level")
|
||||
|
||||
async def import_workbook(db: AsyncSession, workbook_id: int, title: str = None) -> dict:
|
||||
problem_ids = fetch_workbook_problem_ids(workbook_id)
|
||||
|
||||
# upsert workbook
|
||||
await db.execute(
|
||||
text("""
|
||||
INSERT INTO workbooks(id, title, source)
|
||||
VALUES (:id, :title, 'boj')
|
||||
ON CONFLICT (id) DO UPDATE
|
||||
SET title = COALESCE(EXCLUDED.title, workbooks.title),
|
||||
updated_at = now()
|
||||
"""),
|
||||
{"id": workbook_id, "title": title},
|
||||
)
|
||||
|
||||
inserted = 0
|
||||
updated_meta = 0
|
||||
|
||||
# 문제 목록 upsert
|
||||
for pid in problem_ids:
|
||||
# 먼저 매핑 넣고
|
||||
await db.execute(
|
||||
text("""
|
||||
INSERT INTO workbook_problems(workbook_id, problem_id)
|
||||
VALUES (:wid, :pid)
|
||||
ON CONFLICT (workbook_id, problem_id) DO NOTHING
|
||||
"""),
|
||||
{"wid": workbook_id, "pid": pid},
|
||||
)
|
||||
|
||||
await db.commit()
|
||||
|
||||
# 메타 보강(너무 빠르게 치면 부담될 수 있으니 간단한 rate limit)
|
||||
for i, pid in enumerate(problem_ids):
|
||||
title_ko, title_en, level = solved_problem_show(pid)
|
||||
if title_ko is None and title_en is None and level is None:
|
||||
continue
|
||||
|
||||
await db.execute(
|
||||
text("""
|
||||
UPDATE workbook_problems
|
||||
SET title_ko = COALESCE(:tko, title_ko),
|
||||
title_en = COALESCE(:ten, title_en),
|
||||
level = COALESCE(:lvl, level)
|
||||
WHERE workbook_id = :wid AND problem_id = :pid
|
||||
"""),
|
||||
{"tko": title_ko, "ten": title_en, "lvl": level, "wid": workbook_id, "pid": pid},
|
||||
)
|
||||
updated_meta += 1
|
||||
|
||||
if i % 10 == 0:
|
||||
await db.commit()
|
||||
await asyncio.sleep(0.2)
|
||||
|
||||
await db.commit()
|
||||
|
||||
return {
|
||||
"workbook_id": workbook_id,
|
||||
"count": len(problem_ids),
|
||||
"meta_updated": updated_meta,
|
||||
}
|
||||
62
workbook_picker.py
Normal file
62
workbook_picker.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from typing import Optional, Tuple
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
|
||||
async def pick_from_workbook(
|
||||
db: AsyncSession,
|
||||
workbook_id: int,
|
||||
pick: str = "random", # random | level_asc
|
||||
) -> Tuple[Optional[int], Optional[str], Optional[int]]:
|
||||
"""
|
||||
workbook_id에서 아직 보내지 않은 문제 1개 선택 + workbook_sends 기록까지 원샷.
|
||||
pick:
|
||||
- random: 기존처럼 랜덤
|
||||
- level_asc: level 낮은 것부터(쉬운 것부터). level NULL은 맨 뒤.
|
||||
같은 level이면 랜덤으로 섞어서 뽑음.
|
||||
"""
|
||||
mode = (pick or "random").lower().strip()
|
||||
if mode not in ("random", "level_asc"):
|
||||
mode = "random"
|
||||
|
||||
# 정렬 기준만 분기
|
||||
if mode == "level_asc":
|
||||
order_sql = "ORDER BY (wp.level IS NULL) ASC, wp.level ASC, random()"
|
||||
else:
|
||||
order_sql = "ORDER BY random()"
|
||||
|
||||
sql = f"""
|
||||
WITH candidate AS (
|
||||
SELECT
|
||||
wp.problem_id,
|
||||
COALESCE(wp.title_ko, wp.title_en, '제목 없음') AS title,
|
||||
wp.level
|
||||
FROM workbook_problems wp
|
||||
LEFT JOIN workbook_sends ws
|
||||
ON ws.workbook_id = wp.workbook_id
|
||||
AND ws.problem_id = wp.problem_id
|
||||
WHERE wp.workbook_id = :wid
|
||||
AND ws.problem_id IS NULL
|
||||
{order_sql}
|
||||
LIMIT 1
|
||||
),
|
||||
ins AS (
|
||||
INSERT INTO workbook_sends(workbook_id, problem_id)
|
||||
SELECT :wid, problem_id
|
||||
FROM candidate
|
||||
ON CONFLICT DO NOTHING
|
||||
RETURNING problem_id
|
||||
)
|
||||
SELECT problem_id, title, level
|
||||
FROM candidate;
|
||||
"""
|
||||
|
||||
row = (await db.execute(text(sql), {"wid": workbook_id})).first()
|
||||
if not row:
|
||||
return None, None, None
|
||||
|
||||
await db.commit()
|
||||
pid = int(row[0])
|
||||
title = str(row[1])
|
||||
level = int(row[2]) if row[2] is not None else None
|
||||
return pid, title, level
|
||||
Reference in New Issue
Block a user