Files
hufs-notice-crawler/tests/test_service.py
nkey ca460453af
All checks were successful
hufs-notice-crawler-cicd / build_push_deploy (push) Successful in 8m35s
Feat: [main] hufs-notice-crawler CI/CD까지 구현 완료
2026-03-17 17:18:16 +09:00

177 lines
6.0 KiB
Python

from datetime import datetime
from sqlalchemy import select
from app.crawler import BOARD_CONFIG, PostDetail, PostStub
from app.models import CrawlRun, ScrapedPost
from app.service import CrawlService
class FakeCrawler:
def __init__(self):
self.max_pages_per_board = 2
def crawl_board_list(self, board_key: str, page: int = 1):
if page > 1:
return []
board = BOARD_CONFIG[board_key]
if board_key == "notice":
return [
PostStub(
board_key="notice",
board_name=board["board_name"],
board_id=board["board_id"],
article_id=9001,
title="Existing notice",
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9001",
published_at=datetime(2026, 3, 1),
),
PostStub(
board_key="notice",
board_name=board["board_name"],
board_id=board["board_id"],
article_id=9002,
title="New notice",
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9002",
published_at=datetime(2026, 3, 2),
),
]
if board_key == "archive":
return [
PostStub(
board_key="archive",
board_name=board["board_name"],
board_id=board["board_id"],
article_id=9101,
title="New archive post",
post_url="https://computer.hufs.ac.kr/computer/10059/subview.do?enc=archive-9101",
published_at=datetime(2026, 3, 3),
)
]
if board_key == "jobs":
return []
return []
def crawl_post_detail(self, stub: PostStub):
return PostDetail(
board_key=stub.board_key,
board_name=stub.board_name,
board_id=stub.board_id,
article_id=stub.article_id,
title=stub.title,
post_url=stub.post_url,
author="admin",
published_at=stub.published_at,
summary=f"{stub.title} summary",
content_text=f"{stub.title} content",
attachments=[
{"name": f"{stub.article_id}.pdf", "url": f"https://example.com/files/{stub.article_id}.pdf"}
],
)
def test_crawl_service_saves_only_new_posts(db_session):
existing = ScrapedPost(
board_key="notice",
board_name="공지사항",
board_id=1926,
article_id=9001,
title="Existing notice",
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9001",
author="admin",
published_at=datetime(2026, 3, 1),
summary="Already stored post",
content_text="Already stored content",
attachments=[],
)
db_session.add(existing)
db_session.commit()
service = CrawlService(db_session)
service.crawler = FakeCrawler()
response = service.crawl_new_posts()
assert response.bootstrap_mode is False
assert response.bootstrap_inserted_count == 0
assert response.new_posts_count == 2
assert [post.article_id for post in response.new_posts] == [9002, 9101]
assert response.latest_posts_by_board == []
saved_posts = db_session.scalars(
select(ScrapedPost).order_by(ScrapedPost.board_key, ScrapedPost.article_id)
).all()
assert len(saved_posts) == 3
run = db_session.scalars(select(CrawlRun).order_by(CrawlRun.id.desc())).first()
assert run is not None
assert run.status == "success"
assert run.inserted_count == 2
def test_crawl_service_returns_zero_when_no_new_posts(db_session):
for board_key, board in BOARD_CONFIG.items():
db_session.add(
ScrapedPost(
board_key=board_key,
board_name=board["board_name"],
board_id=board["board_id"],
article_id=1,
title="Existing post",
post_url=f"https://computer.hufs.ac.kr/computer/99999/subview.do?enc={board_key}-1",
author="admin",
published_at=datetime(2026, 3, 1),
summary="Existing summary",
content_text="Existing content",
attachments=[],
)
)
db_session.commit()
class NoNewPostCrawler(FakeCrawler):
def crawl_board_list(self, board_key: str, page: int = 1):
if page > 1:
return []
board = BOARD_CONFIG[board_key]
return [
PostStub(
board_key=board_key,
board_name=board["board_name"],
board_id=board["board_id"],
article_id=1,
title="Existing post",
post_url=f"https://computer.hufs.ac.kr/computer/99999/subview.do?enc={board_key}-1",
published_at=datetime(2026, 3, 1),
)
]
service = CrawlService(db_session)
service.crawler = NoNewPostCrawler()
response = service.crawl_new_posts()
assert response.bootstrap_mode is False
assert response.new_posts_count == 0
assert response.new_posts == []
assert [post.board_key for post in response.latest_posts_by_board] == ["notice", "archive", "jobs"]
def test_crawl_service_bootstrap_saves_posts_without_returning_them(db_session):
service = CrawlService(db_session)
service.crawler = FakeCrawler()
response = service.crawl_new_posts()
assert response.bootstrap_mode is True
assert response.bootstrap_inserted_count == 3
assert response.new_posts_count == 0
assert response.new_posts == []
assert [post.board_key for post in response.latest_posts_by_board] == ["notice", "archive"]
saved_posts = db_session.scalars(select(ScrapedPost)).all()
assert len(saved_posts) == 3