Feat: [main] hufs-notice-crawler CI/CD까지 구현 완료
All checks were successful
hufs-notice-crawler-cicd / build_push_deploy (push) Successful in 8m35s

This commit is contained in:
2026-03-17 17:18:16 +09:00
commit ca460453af
23 changed files with 1959 additions and 0 deletions

32
tests/conftest.py Normal file
View File

@@ -0,0 +1,32 @@
from collections.abc import Generator
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import StaticPool
from app.db import Base
@pytest.fixture()
def db_session() -> Generator[Session, None, None]:
engine = create_engine(
"sqlite://",
future=True,
connect_args={"check_same_thread": False},
poolclass=StaticPool,
)
Base.metadata.create_all(bind=engine)
TestingSessionLocal = sessionmaker(
bind=engine,
autoflush=False,
autocommit=False,
future=True,
expire_on_commit=False,
)
session = TestingSessionLocal()
try:
yield session
finally:
session.close()
Base.metadata.drop_all(bind=engine)

64
tests/test_api.py Normal file
View File

@@ -0,0 +1,64 @@
from datetime import datetime
from fastapi.testclient import TestClient
from app.main import app, get_db
from app.schemas import CrawlResponse
def test_health_endpoint(monkeypatch):
monkeypatch.setattr("app.main.Base.metadata.create_all", lambda bind: None)
with TestClient(app) as client:
response = client.get("/health")
assert response.status_code == 200
assert response.json() == {"status": "ok"}
def test_crawl_endpoint_returns_mocked_posts(monkeypatch):
monkeypatch.setattr("app.main.Base.metadata.create_all", lambda bind: None)
monkeypatch.setattr(
"app.main.CrawlService.crawl_new_posts",
lambda self: CrawlResponse(
checked_at=datetime(2026, 3, 17, 12, 0, 0),
bootstrap_mode=False,
bootstrap_inserted_count=0,
new_posts_count=1,
new_posts=[
{
"board_key": "notice",
"board_name": "공지사항",
"board_id": 1926,
"article_id": 1001,
"title": "테스트 공지",
"post_url": "https://computer.hufs.ac.kr/computer/10058/subview.do?enc=test-notice-link",
"author": "관리자",
"published_at": datetime(2026, 3, 17),
"summary": "요약",
"content_text": "본문",
"attachments": [
{"name": "guide.pdf", "url": "https://computer.hufs.ac.kr/files/guide.pdf"}
],
}
],
latest_posts_by_board=[],
),
)
app.dependency_overrides[get_db] = lambda: iter([None])
try:
with TestClient(app) as client:
response = client.post("/api/v1/crawl")
finally:
app.dependency_overrides.clear()
assert response.status_code == 200
payload = response.json()
assert payload["bootstrap_mode"] is False
assert payload["new_posts_count"] == 1
assert payload["new_posts"][0]["board_key"] == "notice"
assert payload["new_posts"][0]["article_id"] == 1001
assert "/computer/10058/subview.do?enc=" in payload["new_posts"][0]["post_url"]
assert payload["new_posts"][0]["attachments"][0]["name"] == "guide.pdf"
assert payload["latest_posts_by_board"] == []

176
tests/test_service.py Normal file
View File

@@ -0,0 +1,176 @@
from datetime import datetime
from sqlalchemy import select
from app.crawler import BOARD_CONFIG, PostDetail, PostStub
from app.models import CrawlRun, ScrapedPost
from app.service import CrawlService
class FakeCrawler:
def __init__(self):
self.max_pages_per_board = 2
def crawl_board_list(self, board_key: str, page: int = 1):
if page > 1:
return []
board = BOARD_CONFIG[board_key]
if board_key == "notice":
return [
PostStub(
board_key="notice",
board_name=board["board_name"],
board_id=board["board_id"],
article_id=9001,
title="Existing notice",
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9001",
published_at=datetime(2026, 3, 1),
),
PostStub(
board_key="notice",
board_name=board["board_name"],
board_id=board["board_id"],
article_id=9002,
title="New notice",
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9002",
published_at=datetime(2026, 3, 2),
),
]
if board_key == "archive":
return [
PostStub(
board_key="archive",
board_name=board["board_name"],
board_id=board["board_id"],
article_id=9101,
title="New archive post",
post_url="https://computer.hufs.ac.kr/computer/10059/subview.do?enc=archive-9101",
published_at=datetime(2026, 3, 3),
)
]
if board_key == "jobs":
return []
return []
def crawl_post_detail(self, stub: PostStub):
return PostDetail(
board_key=stub.board_key,
board_name=stub.board_name,
board_id=stub.board_id,
article_id=stub.article_id,
title=stub.title,
post_url=stub.post_url,
author="admin",
published_at=stub.published_at,
summary=f"{stub.title} summary",
content_text=f"{stub.title} content",
attachments=[
{"name": f"{stub.article_id}.pdf", "url": f"https://example.com/files/{stub.article_id}.pdf"}
],
)
def test_crawl_service_saves_only_new_posts(db_session):
existing = ScrapedPost(
board_key="notice",
board_name="공지사항",
board_id=1926,
article_id=9001,
title="Existing notice",
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9001",
author="admin",
published_at=datetime(2026, 3, 1),
summary="Already stored post",
content_text="Already stored content",
attachments=[],
)
db_session.add(existing)
db_session.commit()
service = CrawlService(db_session)
service.crawler = FakeCrawler()
response = service.crawl_new_posts()
assert response.bootstrap_mode is False
assert response.bootstrap_inserted_count == 0
assert response.new_posts_count == 2
assert [post.article_id for post in response.new_posts] == [9002, 9101]
assert response.latest_posts_by_board == []
saved_posts = db_session.scalars(
select(ScrapedPost).order_by(ScrapedPost.board_key, ScrapedPost.article_id)
).all()
assert len(saved_posts) == 3
run = db_session.scalars(select(CrawlRun).order_by(CrawlRun.id.desc())).first()
assert run is not None
assert run.status == "success"
assert run.inserted_count == 2
def test_crawl_service_returns_zero_when_no_new_posts(db_session):
for board_key, board in BOARD_CONFIG.items():
db_session.add(
ScrapedPost(
board_key=board_key,
board_name=board["board_name"],
board_id=board["board_id"],
article_id=1,
title="Existing post",
post_url=f"https://computer.hufs.ac.kr/computer/99999/subview.do?enc={board_key}-1",
author="admin",
published_at=datetime(2026, 3, 1),
summary="Existing summary",
content_text="Existing content",
attachments=[],
)
)
db_session.commit()
class NoNewPostCrawler(FakeCrawler):
def crawl_board_list(self, board_key: str, page: int = 1):
if page > 1:
return []
board = BOARD_CONFIG[board_key]
return [
PostStub(
board_key=board_key,
board_name=board["board_name"],
board_id=board["board_id"],
article_id=1,
title="Existing post",
post_url=f"https://computer.hufs.ac.kr/computer/99999/subview.do?enc={board_key}-1",
published_at=datetime(2026, 3, 1),
)
]
service = CrawlService(db_session)
service.crawler = NoNewPostCrawler()
response = service.crawl_new_posts()
assert response.bootstrap_mode is False
assert response.new_posts_count == 0
assert response.new_posts == []
assert [post.board_key for post in response.latest_posts_by_board] == ["notice", "archive", "jobs"]
def test_crawl_service_bootstrap_saves_posts_without_returning_them(db_session):
service = CrawlService(db_session)
service.crawler = FakeCrawler()
response = service.crawl_new_posts()
assert response.bootstrap_mode is True
assert response.bootstrap_inserted_count == 3
assert response.new_posts_count == 0
assert response.new_posts == []
assert [post.board_key for post in response.latest_posts_by_board] == ["notice", "archive"]
saved_posts = db_session.scalars(select(ScrapedPost)).all()
assert len(saved_posts) == 3