Feat: [main] hufs-notice-crawler CI/CD까지 구현 완료
All checks were successful
hufs-notice-crawler-cicd / build_push_deploy (push) Successful in 8m35s
All checks were successful
hufs-notice-crawler-cicd / build_push_deploy (push) Successful in 8m35s
This commit is contained in:
176
tests/test_service.py
Normal file
176
tests/test_service.py
Normal file
@@ -0,0 +1,176 @@
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.crawler import BOARD_CONFIG, PostDetail, PostStub
|
||||
from app.models import CrawlRun, ScrapedPost
|
||||
from app.service import CrawlService
|
||||
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self):
|
||||
self.max_pages_per_board = 2
|
||||
|
||||
def crawl_board_list(self, board_key: str, page: int = 1):
|
||||
if page > 1:
|
||||
return []
|
||||
|
||||
board = BOARD_CONFIG[board_key]
|
||||
if board_key == "notice":
|
||||
return [
|
||||
PostStub(
|
||||
board_key="notice",
|
||||
board_name=board["board_name"],
|
||||
board_id=board["board_id"],
|
||||
article_id=9001,
|
||||
title="Existing notice",
|
||||
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9001",
|
||||
published_at=datetime(2026, 3, 1),
|
||||
),
|
||||
PostStub(
|
||||
board_key="notice",
|
||||
board_name=board["board_name"],
|
||||
board_id=board["board_id"],
|
||||
article_id=9002,
|
||||
title="New notice",
|
||||
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9002",
|
||||
published_at=datetime(2026, 3, 2),
|
||||
),
|
||||
]
|
||||
|
||||
if board_key == "archive":
|
||||
return [
|
||||
PostStub(
|
||||
board_key="archive",
|
||||
board_name=board["board_name"],
|
||||
board_id=board["board_id"],
|
||||
article_id=9101,
|
||||
title="New archive post",
|
||||
post_url="https://computer.hufs.ac.kr/computer/10059/subview.do?enc=archive-9101",
|
||||
published_at=datetime(2026, 3, 3),
|
||||
)
|
||||
]
|
||||
|
||||
if board_key == "jobs":
|
||||
return []
|
||||
|
||||
return []
|
||||
|
||||
def crawl_post_detail(self, stub: PostStub):
|
||||
return PostDetail(
|
||||
board_key=stub.board_key,
|
||||
board_name=stub.board_name,
|
||||
board_id=stub.board_id,
|
||||
article_id=stub.article_id,
|
||||
title=stub.title,
|
||||
post_url=stub.post_url,
|
||||
author="admin",
|
||||
published_at=stub.published_at,
|
||||
summary=f"{stub.title} summary",
|
||||
content_text=f"{stub.title} content",
|
||||
attachments=[
|
||||
{"name": f"{stub.article_id}.pdf", "url": f"https://example.com/files/{stub.article_id}.pdf"}
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def test_crawl_service_saves_only_new_posts(db_session):
|
||||
existing = ScrapedPost(
|
||||
board_key="notice",
|
||||
board_name="공지사항",
|
||||
board_id=1926,
|
||||
article_id=9001,
|
||||
title="Existing notice",
|
||||
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9001",
|
||||
author="admin",
|
||||
published_at=datetime(2026, 3, 1),
|
||||
summary="Already stored post",
|
||||
content_text="Already stored content",
|
||||
attachments=[],
|
||||
)
|
||||
db_session.add(existing)
|
||||
db_session.commit()
|
||||
|
||||
service = CrawlService(db_session)
|
||||
service.crawler = FakeCrawler()
|
||||
|
||||
response = service.crawl_new_posts()
|
||||
|
||||
assert response.bootstrap_mode is False
|
||||
assert response.bootstrap_inserted_count == 0
|
||||
assert response.new_posts_count == 2
|
||||
assert [post.article_id for post in response.new_posts] == [9002, 9101]
|
||||
assert response.latest_posts_by_board == []
|
||||
|
||||
saved_posts = db_session.scalars(
|
||||
select(ScrapedPost).order_by(ScrapedPost.board_key, ScrapedPost.article_id)
|
||||
).all()
|
||||
assert len(saved_posts) == 3
|
||||
|
||||
run = db_session.scalars(select(CrawlRun).order_by(CrawlRun.id.desc())).first()
|
||||
assert run is not None
|
||||
assert run.status == "success"
|
||||
assert run.inserted_count == 2
|
||||
|
||||
|
||||
def test_crawl_service_returns_zero_when_no_new_posts(db_session):
|
||||
for board_key, board in BOARD_CONFIG.items():
|
||||
db_session.add(
|
||||
ScrapedPost(
|
||||
board_key=board_key,
|
||||
board_name=board["board_name"],
|
||||
board_id=board["board_id"],
|
||||
article_id=1,
|
||||
title="Existing post",
|
||||
post_url=f"https://computer.hufs.ac.kr/computer/99999/subview.do?enc={board_key}-1",
|
||||
author="admin",
|
||||
published_at=datetime(2026, 3, 1),
|
||||
summary="Existing summary",
|
||||
content_text="Existing content",
|
||||
attachments=[],
|
||||
)
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
class NoNewPostCrawler(FakeCrawler):
|
||||
def crawl_board_list(self, board_key: str, page: int = 1):
|
||||
if page > 1:
|
||||
return []
|
||||
board = BOARD_CONFIG[board_key]
|
||||
return [
|
||||
PostStub(
|
||||
board_key=board_key,
|
||||
board_name=board["board_name"],
|
||||
board_id=board["board_id"],
|
||||
article_id=1,
|
||||
title="Existing post",
|
||||
post_url=f"https://computer.hufs.ac.kr/computer/99999/subview.do?enc={board_key}-1",
|
||||
published_at=datetime(2026, 3, 1),
|
||||
)
|
||||
]
|
||||
|
||||
service = CrawlService(db_session)
|
||||
service.crawler = NoNewPostCrawler()
|
||||
|
||||
response = service.crawl_new_posts()
|
||||
|
||||
assert response.bootstrap_mode is False
|
||||
assert response.new_posts_count == 0
|
||||
assert response.new_posts == []
|
||||
assert [post.board_key for post in response.latest_posts_by_board] == ["notice", "archive", "jobs"]
|
||||
|
||||
|
||||
def test_crawl_service_bootstrap_saves_posts_without_returning_them(db_session):
|
||||
service = CrawlService(db_session)
|
||||
service.crawler = FakeCrawler()
|
||||
|
||||
response = service.crawl_new_posts()
|
||||
|
||||
assert response.bootstrap_mode is True
|
||||
assert response.bootstrap_inserted_count == 3
|
||||
assert response.new_posts_count == 0
|
||||
assert response.new_posts == []
|
||||
assert [post.board_key for post in response.latest_posts_by_board] == ["notice", "archive"]
|
||||
|
||||
saved_posts = db_session.scalars(select(ScrapedPost)).all()
|
||||
assert len(saved_posts) == 3
|
||||
Reference in New Issue
Block a user