Feat: [main] hufs-notice-crawler CI/CD까지 구현 완료
All checks were successful
hufs-notice-crawler-cicd / build_push_deploy (push) Successful in 8m35s
All checks were successful
hufs-notice-crawler-cicd / build_push_deploy (push) Successful in 8m35s
This commit is contained in:
32
tests/conftest.py
Normal file
32
tests/conftest.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from collections.abc import Generator
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def db_session() -> Generator[Session, None, None]:
|
||||
engine = create_engine(
|
||||
"sqlite://",
|
||||
future=True,
|
||||
connect_args={"check_same_thread": False},
|
||||
poolclass=StaticPool,
|
||||
)
|
||||
Base.metadata.create_all(bind=engine)
|
||||
TestingSessionLocal = sessionmaker(
|
||||
bind=engine,
|
||||
autoflush=False,
|
||||
autocommit=False,
|
||||
future=True,
|
||||
expire_on_commit=False,
|
||||
)
|
||||
session = TestingSessionLocal()
|
||||
try:
|
||||
yield session
|
||||
finally:
|
||||
session.close()
|
||||
Base.metadata.drop_all(bind=engine)
|
||||
64
tests/test_api.py
Normal file
64
tests/test_api.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from datetime import datetime
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.main import app, get_db
|
||||
from app.schemas import CrawlResponse
|
||||
|
||||
|
||||
def test_health_endpoint(monkeypatch):
|
||||
monkeypatch.setattr("app.main.Base.metadata.create_all", lambda bind: None)
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.get("/health")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"status": "ok"}
|
||||
|
||||
|
||||
def test_crawl_endpoint_returns_mocked_posts(monkeypatch):
|
||||
monkeypatch.setattr("app.main.Base.metadata.create_all", lambda bind: None)
|
||||
monkeypatch.setattr(
|
||||
"app.main.CrawlService.crawl_new_posts",
|
||||
lambda self: CrawlResponse(
|
||||
checked_at=datetime(2026, 3, 17, 12, 0, 0),
|
||||
bootstrap_mode=False,
|
||||
bootstrap_inserted_count=0,
|
||||
new_posts_count=1,
|
||||
new_posts=[
|
||||
{
|
||||
"board_key": "notice",
|
||||
"board_name": "공지사항",
|
||||
"board_id": 1926,
|
||||
"article_id": 1001,
|
||||
"title": "테스트 공지",
|
||||
"post_url": "https://computer.hufs.ac.kr/computer/10058/subview.do?enc=test-notice-link",
|
||||
"author": "관리자",
|
||||
"published_at": datetime(2026, 3, 17),
|
||||
"summary": "요약",
|
||||
"content_text": "본문",
|
||||
"attachments": [
|
||||
{"name": "guide.pdf", "url": "https://computer.hufs.ac.kr/files/guide.pdf"}
|
||||
],
|
||||
}
|
||||
],
|
||||
latest_posts_by_board=[],
|
||||
),
|
||||
)
|
||||
|
||||
app.dependency_overrides[get_db] = lambda: iter([None])
|
||||
try:
|
||||
with TestClient(app) as client:
|
||||
response = client.post("/api/v1/crawl")
|
||||
finally:
|
||||
app.dependency_overrides.clear()
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["bootstrap_mode"] is False
|
||||
assert payload["new_posts_count"] == 1
|
||||
assert payload["new_posts"][0]["board_key"] == "notice"
|
||||
assert payload["new_posts"][0]["article_id"] == 1001
|
||||
assert "/computer/10058/subview.do?enc=" in payload["new_posts"][0]["post_url"]
|
||||
assert payload["new_posts"][0]["attachments"][0]["name"] == "guide.pdf"
|
||||
assert payload["latest_posts_by_board"] == []
|
||||
176
tests/test_service.py
Normal file
176
tests/test_service.py
Normal file
@@ -0,0 +1,176 @@
|
||||
from datetime import datetime
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.crawler import BOARD_CONFIG, PostDetail, PostStub
|
||||
from app.models import CrawlRun, ScrapedPost
|
||||
from app.service import CrawlService
|
||||
|
||||
|
||||
class FakeCrawler:
|
||||
def __init__(self):
|
||||
self.max_pages_per_board = 2
|
||||
|
||||
def crawl_board_list(self, board_key: str, page: int = 1):
|
||||
if page > 1:
|
||||
return []
|
||||
|
||||
board = BOARD_CONFIG[board_key]
|
||||
if board_key == "notice":
|
||||
return [
|
||||
PostStub(
|
||||
board_key="notice",
|
||||
board_name=board["board_name"],
|
||||
board_id=board["board_id"],
|
||||
article_id=9001,
|
||||
title="Existing notice",
|
||||
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9001",
|
||||
published_at=datetime(2026, 3, 1),
|
||||
),
|
||||
PostStub(
|
||||
board_key="notice",
|
||||
board_name=board["board_name"],
|
||||
board_id=board["board_id"],
|
||||
article_id=9002,
|
||||
title="New notice",
|
||||
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9002",
|
||||
published_at=datetime(2026, 3, 2),
|
||||
),
|
||||
]
|
||||
|
||||
if board_key == "archive":
|
||||
return [
|
||||
PostStub(
|
||||
board_key="archive",
|
||||
board_name=board["board_name"],
|
||||
board_id=board["board_id"],
|
||||
article_id=9101,
|
||||
title="New archive post",
|
||||
post_url="https://computer.hufs.ac.kr/computer/10059/subview.do?enc=archive-9101",
|
||||
published_at=datetime(2026, 3, 3),
|
||||
)
|
||||
]
|
||||
|
||||
if board_key == "jobs":
|
||||
return []
|
||||
|
||||
return []
|
||||
|
||||
def crawl_post_detail(self, stub: PostStub):
|
||||
return PostDetail(
|
||||
board_key=stub.board_key,
|
||||
board_name=stub.board_name,
|
||||
board_id=stub.board_id,
|
||||
article_id=stub.article_id,
|
||||
title=stub.title,
|
||||
post_url=stub.post_url,
|
||||
author="admin",
|
||||
published_at=stub.published_at,
|
||||
summary=f"{stub.title} summary",
|
||||
content_text=f"{stub.title} content",
|
||||
attachments=[
|
||||
{"name": f"{stub.article_id}.pdf", "url": f"https://example.com/files/{stub.article_id}.pdf"}
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def test_crawl_service_saves_only_new_posts(db_session):
|
||||
existing = ScrapedPost(
|
||||
board_key="notice",
|
||||
board_name="공지사항",
|
||||
board_id=1926,
|
||||
article_id=9001,
|
||||
title="Existing notice",
|
||||
post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9001",
|
||||
author="admin",
|
||||
published_at=datetime(2026, 3, 1),
|
||||
summary="Already stored post",
|
||||
content_text="Already stored content",
|
||||
attachments=[],
|
||||
)
|
||||
db_session.add(existing)
|
||||
db_session.commit()
|
||||
|
||||
service = CrawlService(db_session)
|
||||
service.crawler = FakeCrawler()
|
||||
|
||||
response = service.crawl_new_posts()
|
||||
|
||||
assert response.bootstrap_mode is False
|
||||
assert response.bootstrap_inserted_count == 0
|
||||
assert response.new_posts_count == 2
|
||||
assert [post.article_id for post in response.new_posts] == [9002, 9101]
|
||||
assert response.latest_posts_by_board == []
|
||||
|
||||
saved_posts = db_session.scalars(
|
||||
select(ScrapedPost).order_by(ScrapedPost.board_key, ScrapedPost.article_id)
|
||||
).all()
|
||||
assert len(saved_posts) == 3
|
||||
|
||||
run = db_session.scalars(select(CrawlRun).order_by(CrawlRun.id.desc())).first()
|
||||
assert run is not None
|
||||
assert run.status == "success"
|
||||
assert run.inserted_count == 2
|
||||
|
||||
|
||||
def test_crawl_service_returns_zero_when_no_new_posts(db_session):
|
||||
for board_key, board in BOARD_CONFIG.items():
|
||||
db_session.add(
|
||||
ScrapedPost(
|
||||
board_key=board_key,
|
||||
board_name=board["board_name"],
|
||||
board_id=board["board_id"],
|
||||
article_id=1,
|
||||
title="Existing post",
|
||||
post_url=f"https://computer.hufs.ac.kr/computer/99999/subview.do?enc={board_key}-1",
|
||||
author="admin",
|
||||
published_at=datetime(2026, 3, 1),
|
||||
summary="Existing summary",
|
||||
content_text="Existing content",
|
||||
attachments=[],
|
||||
)
|
||||
)
|
||||
db_session.commit()
|
||||
|
||||
class NoNewPostCrawler(FakeCrawler):
|
||||
def crawl_board_list(self, board_key: str, page: int = 1):
|
||||
if page > 1:
|
||||
return []
|
||||
board = BOARD_CONFIG[board_key]
|
||||
return [
|
||||
PostStub(
|
||||
board_key=board_key,
|
||||
board_name=board["board_name"],
|
||||
board_id=board["board_id"],
|
||||
article_id=1,
|
||||
title="Existing post",
|
||||
post_url=f"https://computer.hufs.ac.kr/computer/99999/subview.do?enc={board_key}-1",
|
||||
published_at=datetime(2026, 3, 1),
|
||||
)
|
||||
]
|
||||
|
||||
service = CrawlService(db_session)
|
||||
service.crawler = NoNewPostCrawler()
|
||||
|
||||
response = service.crawl_new_posts()
|
||||
|
||||
assert response.bootstrap_mode is False
|
||||
assert response.new_posts_count == 0
|
||||
assert response.new_posts == []
|
||||
assert [post.board_key for post in response.latest_posts_by_board] == ["notice", "archive", "jobs"]
|
||||
|
||||
|
||||
def test_crawl_service_bootstrap_saves_posts_without_returning_them(db_session):
|
||||
service = CrawlService(db_session)
|
||||
service.crawler = FakeCrawler()
|
||||
|
||||
response = service.crawl_new_posts()
|
||||
|
||||
assert response.bootstrap_mode is True
|
||||
assert response.bootstrap_inserted_count == 3
|
||||
assert response.new_posts_count == 0
|
||||
assert response.new_posts == []
|
||||
assert [post.board_key for post in response.latest_posts_by_board] == ["notice", "archive"]
|
||||
|
||||
saved_posts = db_session.scalars(select(ScrapedPost)).all()
|
||||
assert len(saved_posts) == 3
|
||||
Reference in New Issue
Block a user