Feat: [main] hufs-notice-crawler CI/CD까지 구현 완료

2026-03-17 17:18:16 +09:00
commit ca460453af
23 changed files with 1959 additions and 0 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,12 @@
+.git
+.gitignore
+venv
+.venv
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+.pytest_cache
+.mypy_cache
+.ruff_cache
+.env
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,8 @@
+APP_ENV=production
+POSTGRES_HOST=postgres
+POSTGRES_PORT=5432
+POSTGRES_DB=hufs_notice_crawler
+BASE_URL=https://computer.hufs.ac.kr
+REQUEST_TIMEOUT_SECONDS=15
+MAX_PAGES_PER_BOARD=5
+DOCKER_IMAGE=your-dockerhub-id/hufs-notice-crawler:latest
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -0,0 +1,136 @@
+name: hufs-notice-crawler-cicd
+
+on:
+  push:
+    branches: ["main"]
+
+jobs:
+  build_push_deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Setup SSH for Gitea
+        env:
+          SSH_PRIVATE_KEY: ${{ secrets.NKEY_SSH_PRIVATE_KEY }}
+        run: |
+          set -euo pipefail
+
+          mkdir -p ~/.ssh
+          chmod 700 ~/.ssh
+
+          echo "$SSH_PRIVATE_KEY" > ~/.ssh/id_ed25519
+          chmod 600 ~/.ssh/id_ed25519
+
+          ssh-keyscan -p 2222 -t rsa,ed25519 nkeystudy.site >> ~/.ssh/known_hosts
+          chmod 644 ~/.ssh/known_hosts
+
+          cat >> ~/.ssh/config <<'EOF'
+          Host nkey-gitea
+            HostName nkeystudy.site
+            User git
+            Port 2222
+            IdentityFile ~/.ssh/id_ed25519
+            IdentitiesOnly yes
+          EOF
+          chmod 600 ~/.ssh/config
+
+      - name: Manual checkout via SSH
+        env:
+          REPO: ${{ github.repository }}
+          SHA: ${{ github.sha }}
+        run: |
+          set -euo pipefail
+
+          git init .
+          git remote add origin "nkey-gitea:${REPO}.git"
+          git fetch --no-tags --prune --depth=1 origin "${SHA}"
+          git checkout -q FETCH_HEAD
+
+      - name: Ensure docker compose available
+        run: |
+          set -euo pipefail
+          docker version
+          if ! docker compose version >/dev/null 2>&1; then
+            sudo apt-get update
+            sudo apt-get install -y docker-compose-plugin
+          fi
+          docker compose version
+
+      - name: Docker login
+        env:
+          DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USERNAME }}
+          DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
+        run: |
+          set -euo pipefail
+          echo "${DOCKERHUB_TOKEN}" | docker login -u "${DOCKERHUB_USER}" --password-stdin
+
+      - name: Extract image version from commit message
+        id: version
+        env:
+          COMMIT_MESSAGE: ${{ github.event.head_commit.message }}
+        run: |
+          set -euo pipefail
+
+          VERSION_TAG=""
+          if printf '%s' "${COMMIT_MESSAGE}" | grep -Eq '\[[0-9]+\.[0-9]+\.[0-9]+\]'; then
+            VERSION_TAG="$(printf '%s' "${COMMIT_MESSAGE}" | sed -nE 's/.*\[([0-9]+\.[0-9]+\.[0-9]+)\].*/\1/p' | head -n1)"
+          fi
+
+          echo "version_tag=${VERSION_TAG}" >> "$GITHUB_OUTPUT"
+
+      - name: Build and push image
+        env:
+          DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USERNAME }}
+          IMAGE_NAME: hufs-notice-crawler
+          VERSION_TAG: ${{ steps.version.outputs.version_tag }}
+        run: |
+          set -euo pipefail
+
+          IMAGE="${DOCKERHUB_USER}/${IMAGE_NAME}:latest"
+          if [ -n "${VERSION_TAG}" ]; then
+            VERSIONED_IMAGE="${DOCKERHUB_USER}/${IMAGE_NAME}:${VERSION_TAG}"
+            docker build -t "${IMAGE}" -t "${VERSIONED_IMAGE}" .
+            docker push "${VERSIONED_IMAGE}"
+          else
+            docker build -t "${IMAGE}" .
+          fi
+
+          docker push "${IMAGE}"
+
+      - name: Deploy on server (compose pull/up)
+        run: |
+          set -euo pipefail
+          docker compose -p nkeys-apps -f /nkeysworld/compose.apps.yml pull hufs-notice-crawler
+          docker compose -p nkeys-apps -f /nkeysworld/compose.apps.yml up -d hufs-notice-crawler
+          docker image prune -f
+
+      - name: Discord Notification
+        if: always()
+        env:
+          DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
+        run: |
+          set -euo pipefail
+          if [ "${{ job.status }}" = "success" ]; then
+            STATUS="SUCCESS"
+            COLOR=3066993
+            DESC="HUFS notice crawler build/push/deploy succeeded."
+          else
+            STATUS="FAILURE"
+            COLOR=15158332
+            DESC="HUFS notice crawler build or deploy failed."
+          fi
+
+          curl -X POST -H "Content-Type: application/json" \
+          -d '{
+            "embeds": [{
+              "title": "HUFS Notice Crawler CI/CD - '"$STATUS"'",
+              "description": "'"$DESC"'",
+              "fields": [
+                { "name": "Repo", "value": "${{ github.repository }}", "inline": true },
+                { "name": "Commit", "value": "`${{ github.sha }}`", "inline": true },
+                { "name": "Actor", "value": "${{ github.actor }}", "inline": true },
+                { "name": "Image Version", "value": "`${{ steps.version.outputs.version_tag || 'latest only' }}`", "inline": true }
+              ],
+              "color": '"$COLOR"',
+              "timestamp": "'$(date -u +%Y-%m-%dT%H:%M:%SZ)'"
+            }]
+          }' "${DISCORD_WEBHOOK}"
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,11 @@
+venv/
+.venv/
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.env
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+docker-compose.yml
--- a/20
+++ b/20
@@ -0,0 +1,20 @@
+FROM python:3.12-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+COPY requirements.txt .
+
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir -r requirements.txt
+
+COPY app ./app
+COPY sql ./sql
+COPY .env.example ./.env.example
+COPY README.md ./README.md
+
+EXPOSE 8000
+
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,162 @@
+# HUFS 컴퓨터공학부 공지 크롤러
+
+`HUFS 컴퓨터공학부` 사이트의 다음 게시판을 크롤링하는 `FastAPI` 백엔드입니다.
+
+- 공지사항
+- 자료실
+- 취업정보
+
+`n8n`이 `POST /api/v1/crawl`을 주기적으로 호출하면, 서버는 게시판을 다시 확인하고 `PostgreSQL`에 저장된 기존 글과 비교해 새 글만 반환합니다.
+
+## 문서
+
+- 서비스 설명: [`README.md`](/C:/Users/USER/Desktop/notice_crawler/README.md)
+- 운영/배포: [`README.operation.md`](/C:/Users/USER/Desktop/notice_crawler/README.operation.md)
+- 테스트: [`README.test.md`](/C:/Users/USER/Desktop/notice_crawler/README.test.md)
+- n8n 연동: [`README.n8n.md`](/C:/Users/USER/Desktop/notice_crawler/README.n8n.md)
+
+## 주요 기능
+
+- 공지사항, 자료실, 취업정보 게시판 크롤링
+- 게시판별 `article_id` 기준 신규 글 판별
+- 제목, 작성자, 게시일, 본문 텍스트, 첨부파일 링크 정리
+- 사용자에게 보이는 `subview.do?enc=...` 링크 반환
+- 최초 1회 실행 시 `bootstrap mode`로 기존 글 알림 폭주 방지
+- `new_posts_count == 0`일 때만 `latest_posts_by_board` 제공
+- Docker Hub 이미지 배포 및 `docker compose pull` 운영 지원
+
+## 동작 방식
+
+1. `n8n`이 `POST /api/v1/crawl` 요청을 보냅니다.
+2. 서버가 세 게시판 목록 페이지를 크롤링합니다.
+3. 각 게시글의 `article_id`를 DB와 비교합니다.
+4. DB에 없는 글만 상세 페이지를 추가 크롤링합니다.
+5. 정리된 데이터를 응답으로 반환하고 DB에 저장합니다.
+6. 다음 실행부터는 이미 저장된 글은 제외됩니다.
+
+## bootstrap mode
+
+최초 실행 시 예전 글 알림이 한꺼번에 나가는 것을 막기 위해 `bootstrap mode`를 사용합니다.
+
+판단 기준:
+
+- `scraped_posts` 테이블이 비어 있으면 `bootstrap_mode = true`
+- 저장된 글이 하나라도 있으면 `bootstrap_mode = false`
+
+동작:
+
+- `bootstrap_mode = true`
+  - 기존 글을 DB에 저장만 함
+  - `new_posts_count = 0`
+  - `new_posts = []`
+- 이후부터는 일반 신규 감지 모드로 동작
+
+즉, 첫 실행에서 기존 공지가 한꺼번에 Discord/Slack으로 쏟아지는 문제를 막습니다.
+
+## API
+
+### `GET /health`
+
+서버 상태 확인용입니다.
+
+응답:
+
+```json
+{ "status": "ok" }
+```
+
+### `POST /api/v1/crawl`
+
+세 게시판을 크롤링해 새 글만 반환합니다.
+
+응답 필드:
+
+- `checked_at`: 크롤링 시각
+- `bootstrap_mode`: bootstrap 실행 여부
+- `bootstrap_inserted_count`: bootstrap 시 저장된 글 수
+- `new_posts_count`: 실제 신규 글 수
+- `new_posts`: 신규 글 목록
+- `latest_posts_by_board`: 게시판별 최신 글
+  - `new_posts_count == 0`일 때만 포함
+  - 별도 추가 요청이 아니라 실제 크롤링 결과를 재사용
+
+응답 예시:
+
+```json
+{
+  "checked_at": "2026-03-17T00:00:00Z",
+  "bootstrap_mode": false,
+  "bootstrap_inserted_count": 0,
+  "new_posts_count": 1,
+  "new_posts": [
+    {
+      "board_key": "notice",
+      "board_name": "공지사항",
+      "board_id": 1926,
+      "article_id": 249714,
+      "title": "예시 제목",
+      "post_url": "https://computer.hufs.ac.kr/computer/10058/subview.do?enc=...",
+      "author": "computer",
+      "published_at": "2026-03-17T00:00:00",
+      "summary": "본문 요약",
+      "content_text": "정리된 본문 텍스트",
+      "attachments": [
+        {
+          "name": "첨부파일.pdf",
+          "url": "https://computer.hufs.ac.kr/..."
+        }
+      ]
+    }
+  ],
+  "latest_posts_by_board": []
+}
+```
+
+## 환경 변수
+
+`.env.example`을 참고해서 `.env`를 준비합니다.
+
+```env
+APP_ENV=production
+DB_USER=postgres
+DB_PASSWORD=postgres
+POSTGRES_HOST=postgres
+POSTGRES_PORT=5432
+POSTGRES_DB=hufs_notice_crawler
+BASE_URL=https://computer.hufs.ac.kr
+REQUEST_TIMEOUT_SECONDS=15
+MAX_PAGES_PER_BOARD=5
+DOCKER_IMAGE=your-dockerhub-id/hufs-notice-crawler:latest
+```
+
+- `APP_ENV`: 실행 환경
+- `DB_USER`: PostgreSQL 사용자 -> 공통 .env에 있음
+- `DB_PASSWORD`: PostgreSQL 비밀번호 -> 공통 .env에 있음
+- `POSTGRES_HOST`: PostgreSQL 호스트
+- `POSTGRES_PORT`: PostgreSQL 포트
+- `POSTGRES_DB`: 데이터베이스 이름
+- `BASE_URL`: 기본 크롤링 대상 사이트
+- `REQUEST_TIMEOUT_SECONDS`: 외부 요청 타임아웃
+- `MAX_PAGES_PER_BOARD`: 게시판별 최대 확인 페이지 수
+- `DOCKER_IMAGE`: Docker Hub 이미지 이름
+
+## 주요 파일
+
+- [`app/main.py`](/C:/Users/USER/Desktop/notice_crawler/app/main.py)
+  - FastAPI 엔드포인트
+- [`app/service.py`](/C:/Users/USER/Desktop/notice_crawler/app/service.py)
+  - 크롤링 실행, bootstrap, DB 저장 로직
+- [`app/crawler.py`](/C:/Users/USER/Desktop/notice_crawler/app/crawler.py)
+  - 게시판 목록/상세 크롤러
+- [`app/models.py`](/C:/Users/USER/Desktop/notice_crawler/app/models.py)
+  - SQLAlchemy 모델
+- [`sql/schema.sql`](/C:/Users/USER/Desktop/notice_crawler/sql/schema.sql)
+  - PostgreSQL 스키마
+- [`docker-compose.yml`](/C:/Users/USER/Desktop/notice_crawler/docker-compose.yml)
+  - Docker Hub 이미지 pull 기반 실행
+
+## 참고
+
+- 신규 여부 판단 기준은 게시판별 `article_id`입니다.
+- 반환 링크는 `artclView.do`가 아니라 실제 사용자용 `subview.do?enc=...` 형식입니다.
+- HTML 구조가 바뀌면 [`app/crawler.py`](/C:/Users/USER/Desktop/notice_crawler/app/crawler.py)의 selector 조정이 필요할 수 있습니다.
--- a/README.n8n.md
+++ b/README.n8n.md
@@ -0,0 +1,342 @@
+# n8n 연동 문서
+
+이 문서는 `HUFS 컴퓨터공학부 공지 크롤러`를 `n8n`과 연결해 Discord Webhook으로 알림을 보내는 방법을 설명합니다.
+
+관련 문서:
+
+- 서비스 개요: [`README.md`](/C:/Users/USER/Desktop/notice_crawler/README.md)
+- 운영/배포: [`README.operation.md`](/C:/Users/USER/Desktop/notice_crawler/README.operation.md)
+- 테스트: [`README.test.md`](/C:/Users/USER/Desktop/notice_crawler/README.test.md)
+
+## 왜 n8n을 쓰는가
+
+현재는 Discord Webhook으로 보내더라도, 나중에 Slack이나 Telegram으로 바뀔 수 있습니다.
+
+권장 역할 분리:
+
+- `hufs-notice-crawler`
+  - 크롤링과 데이터 정규화만 담당
+  - 채널별 포맷은 모름
+- `n8n`
+  - 백엔드 JSON을 받아 Discord/Slack/Telegram 형식으로 변환
+
+이 구조의 장점:
+
+- 알림 채널이 바뀌어도 백엔드 코드 변경 최소화
+- 백엔드 응답을 Discord 전용 형식으로 오염시키지 않음
+- 같은 데이터를 여러 채널로 동시에 보낼 수 있음
+
+즉, 백엔드는 `generic JSON provider`, n8n은 `channel adapter`로 두는 것이 맞습니다.
+
+## 권장 워크플로우
+
+1. `Schedule Trigger`
+2. `Set`
+3. `HTTP Request`
+4. `IF - new_posts_count > 0`
+5. `IF - test_mode == true`
+6. `Code`
+7. `HTTP Request (Discord Webhook)`
+
+의미:
+
+1. 스케줄에 따라 n8n 실행
+2. `test_mode`를 켜거나 끔
+3. `hufs-notice-crawler` 호출
+4. 새 글이 있는지 확인
+5. 새 글이 없으면 test mode 여부 확인
+6. 상황에 맞는 Discord 메시지 포맷 생성
+7. Webhook 전송
+
+## 노드별 설정
+
+### 1. Schedule Trigger
+
+예시 cron:
+
+```text
+0 10,14,18 * * *
+```
+
+의미:
+
+- 매일 10:00, 14:00, 18:00 실행
+
+### 2. Set
+
+필드:
+
+- `test_mode`
+  - Boolean
+  - `true`: 테스트 기간
+  - `false`: 운영 모드
+
+의미:
+
+- `true`
+  - 새 글이 0개일 때도 "업데이트 없음 + 게시판별 최신 글" 메시지 전송
+- `false`
+  - 새 글이 0개면 아무 메시지도 보내지 않음
+
+### 3. HTTP Request
+
+역할:
+
+- 백엔드 API 호출
+
+권장 설정:
+
+- Method: `POST`
+- Response Format: `JSON`
+
+URL:
+
+- 내부 Docker network 직접 호출
+  - `http://hufs-notice-crawler:8000/api/v1/crawl`
+- nginx reverse proxy 경유 호출 -> nginx에 로그를 모으기 위해 채택
+  - `https://nkeystudy.site/api/hufs/crawl`
+
+## 백엔드 응답에서 중요한 필드
+
+- `bootstrap_mode`
+  - 최초 실행 bootstrap 여부
+- `bootstrap_inserted_count`
+  - bootstrap 시 저장된 글 수
+- `new_posts_count`
+  - 실제 신규 글 수
+- `new_posts`
+  - 신규 글 목록
+- `latest_posts_by_board`
+  - 게시판별 최신 글
+  - `new_posts_count == 0`일 때만 포함
+  - 별도 추가 요청이 아니라 실제 크롤링 결과 재사용
+
+## IF 분기
+
+### IF 1: 새 글 여부
+
+조건:
+
+- Left Value: `{{ $json.new_posts_count }}`
+- Operation: `larger`
+- Right Value: `0`
+
+분기:
+
+- True
+  - 새 글 알림용 Code 노드로 이동
+- False
+  - `test_mode` 확인용 IF 노드로 이동
+
+### IF 2: test mode 여부
+
+조건:
+
+- Left Value: `{{ $('{Set노드의 이름}').item.json.test_mode }}`
+- Operation: `is true`
+
+분기:
+
+- True
+  - "업데이트 없음" 테스트 메시지 전송
+- False
+  - 아무 메시지 없이 종료
+
+## Code 노드 중요 사항
+
+이 문서의 Code 노드 예시는 `Run Once for All Items` 기준입니다.
+
+이유:
+
+- `HTTP Request` 응답 1개 안에 `new_posts` 배열이 들어 있음
+- 이를 여러 Discord 메시지 item으로 펼쳐야 함
+
+즉 Code 노드에서:
+
+- `Mode = Run Once for All Items`
+
+를 권장합니다.
+
+`Run Once for Each Item`에서는 `return []` 또는 `map(...)`으로 여러 item을 반환할 때 에러가 날 수 있습니다.
+
+## Code 예시 1: 새 글 알림용 Discord Embed
+
+```javascript
+const data = $input.first().json;
+
+if (!data.new_posts || data.new_posts.length === 0) {
+  return [];
+}
+
+return data.new_posts.map((post) => {
+  const publishedAt = post.published_at ?? "날짜 없음";
+  const author = post.author ?? "작성자 없음";
+  const summary = post.summary ?? "요약 없음";
+  const attachments = (post.attachments || [])
+    .map((file) => `- [${file.name}](${file.url})`)
+    .join("\n");
+
+  const descriptionParts = [
+    `게시판: ${post.board_name}`,
+    `작성자: ${author}`,
+    `작성일: ${publishedAt}`,
+    "",
+    `요약: ${summary}`,
+  ];
+
+  if (attachments) {
+    descriptionParts.push("", "첨부파일:", attachments);
+  }
+
+  return {
+    json: {
+      discordPayload: {
+        embeds: [
+          {
+            title: post.title,
+            url: post.post_url,
+            description: descriptionParts.join("\n").slice(0, 4000),
+            color: 3447003,
+            footer: {
+              text: `article_id: ${post.article_id}`,
+            },
+            timestamp: post.published_at ?? undefined,
+          },
+        ],
+      },
+    },
+  };
+});
+```
+
+## Code 예시 2: 새 글 있을 때 role mention 태그
+
+```javascript
+const roleMention = "<@&123456789012345678>";
+const data = $input.first().json;
+
+if (!data.new_posts || data.new_posts.length === 0) {
+  return [];
+}
+
+return data.new_posts.map((post) => ({
+  json: {
+    discordPayload: {
+      content: `${roleMention} 새 글이 올라왔습니다.`,
+      embeds: [
+        {
+          title: post.title,
+          url: post.post_url,
+          description: `게시판: ${post.board_name}\n작성일: ${post.published_at ?? "날짜 없음"}`,
+          color: 3447003,
+        },
+      ],
+    },
+  },
+}));
+```
+
+역할 ID만 실제 Discord 서버 값으로 바꾸면 됩니다.
+
+## Code 예시 3: 업데이트 없음 테스트 메시지
+
+이 코드는:
+
+- `new_posts_count == 0`
+- `test_mode == true`
+
+일 때만 실행하는 용도입니다.
+
+```javascript
+const data = $input.first().json;
+const latestPosts = data.latest_posts_by_board || [];
+
+const lines = ["현재 새로 업데이트된 공지사항은 없습니다."];
+
+if (data.bootstrap_mode) {
+  lines.push(
+    `초기 bootstrap 저장이 수행되었습니다. 저장된 글 수: ${data.bootstrap_inserted_count}`,
+  );
+}
+
+if (latestPosts.length > 0) {
+  lines.push("", "게시판별 가장 최근 글:");
+  for (const post of latestPosts) {
+    lines.push(`- [${post.board_name}] ${post.title}`);
+    lines.push(`  ${post.post_url}`);
+  }
+}
+
+return [
+  {
+    json: {
+      discordPayload: {
+        content: lines.join("\n"),
+      },
+    },
+  },
+];
+```
+
+운영 모드에서는 이런 메시지를 보내지 않는 것을 권장합니다.
+
+## Discord Webhook 노드
+
+권장 설정:
+
+- Method: `POST`
+- URL: Discord Webhook URL
+-
+- Send Body: `Using JSON`
+- Body:
+
+```json
+{{ $json.discordPayload }}
+```
+
+주의:
+
+- Code 노드가 여러 item을 반환하면 Discord Webhook 노드는 게시글 수만큼 여러 번 실행됩니다.
+
+## test mode 권장 동작
+
+- 운영 모드 (`test_mode = false`)
+  - 새 글 있을 때만 전송
+  - 0개일 때는 전송 안 함
+
+- 테스트 모드 (`test_mode = true`)
+  - 새 글 있으면 전송
+  - 새 글 0개여도 "업데이트 없음" 메시지 전송
+  - 게시판별 최신 글 표시
+  - bootstrap이면 bootstrap 정보도 함께 표시 가능
+
+## bootstrap과 n8n
+
+최초 실행 시:
+
+- `bootstrap_mode = true`
+- `bootstrap_inserted_count > 0`
+- `new_posts_count = 0`
+
+즉, 기존 글은 DB에 저장되지만 `new_posts`로 반환되지 않습니다.
+
+그래서:
+
+- 첫 실행에서 예전 글 알림이 쏟아지지 않음
+- test mode가 켜져 있으면 "업데이트 없음" 메시지로 상태만 확인 가능
+
+## 나중에 Slack으로 바뀌면
+
+바뀌는 것:
+
+- Webhook URL
+- Code 노드의 메시지 포맷
+- 마지막 전송 노드
+
+안 바뀌는 것:
+
+- `hufs-notice-crawler` 응답 구조
+- 크롤링 로직
+- 신규 글 판별 로직
+
+즉 채널 변경 비용을 `n8n` 수정으로 제한할 수 있습니다.
--- a/README.operation.md
+++ b/README.operation.md
@@ -0,0 +1,253 @@
+# 운영 문서
+
+이 문서는 `HUFS 컴퓨터공학부 공지 크롤러`의 운영과 배포 절차를 설명합니다.
+
+관련 문서:
+
+- 서비스 개요: [`README.md`](/C:/Users/USER/Desktop/notice_crawler/README.md)
+- 테스트: [`README.test.md`](/C:/Users/USER/Desktop/notice_crawler/README.test.md)
+- n8n 연동: [`README.n8n.md`](/C:/Users/USER/Desktop/notice_crawler/README.n8n.md)
+
+## PostgreSQL 준비
+
+중요:
+
+- `DB 자체는 직접 만들어야 합니다.`
+- 앱이 자동으로 생성하는 것은 `테이블`까지입니다.
+- 즉 `DATABASE_URL`로 접속할 데이터베이스는 미리 존재해야 합니다.
+
+예:
+
+```sql
+CREATE DATABASE hufs_notice_crawler;
+```
+
+필요하면 사용자와 권한도 준비합니다.
+
+```sql
+CREATE USER crawler_user WITH PASSWORD 'your-password';
+GRANT ALL PRIVILEGES ON DATABASE hufs_notice_crawler TO crawler_user;
+```
+
+## 호스트에서 Postgres 컨테이너로 DB 생성
+
+컨테이너 이름 확인:
+
+```bash
+docker ps
+```
+
+예를 들어 PostgreSQL 컨테이너 이름이 `postgres`라면:
+
+### 1. DB 생성
+
+```bash
+docker exec -it postgres psql -U postgres -d postgres -c "CREATE DATABASE hufs_notice_crawler;"
+```
+
+### 2. 사용자 생성
+
+```bash
+docker exec -it postgres psql -U postgres -d postgres -c "CREATE USER crawler_user WITH PASSWORD 'your-password';"
+```
+
+### 3. 권한 부여
+
+```bash
+docker exec -it postgres psql -U postgres -d postgres -c "GRANT ALL PRIVILEGES ON DATABASE hufs_notice_crawler TO crawler_user;"
+```
+
+### 4. 확인
+
+DB 목록:
+
+```bash
+docker exec -it postgres psql -U postgres -d postgres -c "\\l"
+```
+
+특정 DB 접속:
+
+```bash
+docker exec -it postgres psql -U postgres -d hufs_notice_crawler -c "\\dt"
+```
+
+처음에는 `\dt` 결과에 테이블이 없어도 정상입니다. 앱이 시작되면 필요한 테이블을 자동 생성합니다.
+
+## 스키마 적용
+
+스키마 파일:
+
+- [`sql/schema.sql`](/C:/Users/USER/Desktop/notice_crawler/sql/schema.sql)
+
+직접 적용하려면:
+
+```bash
+psql -h <POSTGRES_HOST> -U <POSTGRES_USER> -d <POSTGRES_DB> -f sql/schema.sql
+```
+
+정리:
+
+- DB가 아직 없으면 먼저 `CREATE DATABASE`
+- DB는 있고 테이블을 수동 생성하고 싶으면 `schema.sql` 적용
+- DB는 있고 앱이 자동 생성하게 둘 거면 `schema.sql` 생략 가능
+
+## bootstrap mode 운영 의미
+
+이 서비스는 최초 실행 시 `scraped_posts` 테이블이 비어 있으면 `bootstrap mode`로 동작합니다.
+
+판단 기준:
+
+- `scraped_posts`에 행이 0개
+  - `bootstrap_mode = true`
+- `scraped_posts`에 행이 1개 이상
+  - `bootstrap_mode = false`
+
+bootstrap 동작:
+
+- 기존 글을 저장만 함
+- `new_posts_count = 0`
+- `new_posts = []`
+- `latest_posts_by_board`는 `new_posts_count == 0`일 때만 포함
+
+주의:
+
+- `scraped_posts`를 전부 비우면 다음 호출은 다시 bootstrap 모드가 됩니다.
+
+## Docker 이미지 빌드
+
+```bash
+docker build -t your-dockerhub-id/hufs-notice-crawler:latest .
+```
+
+버전 태그와 같이 빌드:
+
+```bash
+docker build -t your-dockerhub-id/hufs-notice-crawler:1.0.0 -t your-dockerhub-id/hufs-notice-crawler:latest .
+```
+
+## Docker Hub 로그인 및 push
+
+로그인:
+
+```bash
+docker login
+```
+
+push:
+
+```bash
+docker push your-dockerhub-id/hufs-notice-crawler:1.0.0
+docker push your-dockerhub-id/hufs-notice-crawler:latest
+```
+
+## 서버 배포
+
+이 프로젝트는 Docker Hub 이미지를 `pull`해서 실행하는 구조입니다.
+
+### 1. `.env` 준비
+
+```env
+APP_ENV=production
+DB_USER=postgres
+DB_PASSWORD=postgres
+POSTGRES_HOST=postgres
+POSTGRES_PORT=5432
+POSTGRES_DB=hufs_notice_crawler
+BASE_URL=https://computer.hufs.ac.kr
+REQUEST_TIMEOUT_SECONDS=15
+MAX_PAGES_PER_BOARD=5
+DOCKER_IMAGE=your-dockerhub-id/hufs-notice-crawler:latest
+```
+
+### 2. `docker-compose.yml` 확인
+
+[`docker-compose.yml`](/C:/Users/USER/Desktop/notice_crawler/docker-compose.yml) 은 `build`가 아니라 `image pull` 기반입니다.
+
+핵심 구조:
+
+```yaml
+name: nkeys-apps
+
+services:
+  hufs-notice-crawler:
+    image: ${DOCKER_IMAGE}
+    env_file:
+      - .env
+    environment:
+      - DATABASE_URL=postgresql+psycopg://${DB_USER}:${DB_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}
+    networks:
+      - nkeysworld-network
+      - obs
+```
+
+포인트:
+
+- `name: nkeys-apps`
+- 외부 네트워크 `nkeysworld-network`, `obs` 사용
+- `ports:` 없음
+- 같은 Docker network 내부 통신 전제
+
+### 3. pull 및 실행
+
+```bash
+docker compose pull
+docker compose up -d
+```
+
+업데이트 배포도 동일합니다.
+
+## 내부 호출 URL
+
+같은 Docker network 내부에서 직접 호출:
+
+```text
+http://hufs-notice-crawler:8000/api/v1/crawl
+```
+
+헬스체크:
+
+```text
+http://hufs-notice-crawler:8000/health
+```
+
+## nginx reverse proxy
+
+nginx를 통해 외부에 노출할 경우 예시는 아래와 같습니다.
+
+```nginx
+location /api/hufs/ {
+    set $hufs_up http://hufs-notice-crawler:8000;
+
+    rewrite ^/api/hufs/crawl$ /api/v1/crawl break;
+    rewrite ^/api/hufs/health$ /health break;
+
+    proxy_pass $hufs_up;
+
+    proxy_set_header Host $host;
+    proxy_set_header X-Real-IP $remote_addr;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+
+    proxy_read_timeout 120;
+    proxy_send_timeout 120;
+}
+```
+
+외부 호출 URL:
+
+- `POST https://nkeystudy.site/api/hufs/crawl`
+- `GET https://nkeystudy.site/api/hufs/health`
+
+주의:
+
+- nginx 컨테이너도 `hufs-notice-crawler`와 같은 Docker network에 연결되어 있어야 합니다.
+- 같은 network 내부에서만 통신할 거면 `ports:`는 열 필요가 없습니다.
+
+## 로컬 개발 실행
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+uvicorn app.main:app --host 0.0.0.0 --port 8000
+```
--- a/README.test.md
+++ b/README.test.md
@@ -0,0 +1,137 @@
+# 테스트 문서
+
+이 문서는 `HUFS 컴퓨터공학부 공지 크롤러`의 테스트 방법과 검증 범위를 설명합니다.
+
+관련 문서:
+
+- 서비스 개요: [`README.md`](/C:/Users/USER/Desktop/notice_crawler/README.md)
+- 운영/배포: [`README.operation.md`](/C:/Users/USER/Desktop/notice_crawler/README.operation.md)
+- n8n 연동: [`README.n8n.md`](/C:/Users/USER/Desktop/notice_crawler/README.n8n.md)
+
+## 자동 테스트 실행
+
+가상환경 예시:
+
+```bash
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+python -m pytest
+```
+
+## 현재 자동 테스트 범위
+
+- `GET /health` 응답 확인
+- `POST /api/v1/crawl` 응답 구조 확인
+- 신규 글만 DB에 저장되는지 확인
+- 신규 글이 0개일 때 응답이 올바른지 확인
+- 최초 bootstrap 실행 시 저장만 하고 반환하지 않는지 확인
+
+## 테스트 파일
+
+- [`tests/test_api.py`](/C:/Users/USER/Desktop/notice_crawler/tests/test_api.py)
+  - FastAPI 응답 구조 검증
+- [`tests/test_service.py`](/C:/Users/USER/Desktop/notice_crawler/tests/test_service.py)
+  - 신규 감지, bootstrap, latest_posts_by_board 동작 검증
+- [`tests/conftest.py`](/C:/Users/USER/Desktop/notice_crawler/tests/conftest.py)
+  - SQLite 인메모리 DB fixture
+
+## 최신 자동 테스트 결과
+
+기준 실행:
+
+```bash
+python -m pytest
+```
+
+결과:
+
+```text
+5 passed
+```
+
+의미:
+
+- API 테스트 통과
+- 서비스 로직 테스트 통과
+- bootstrap 관련 테스트 통과
+
+## 실제 사이트 수동 검증 결과
+
+`PostgreSQL` 연결 없이 실제 `HUFS 컴퓨터공학부` 사이트에 요청을 보내 크롤링 동작 여부를 확인했습니다.
+
+검증 일시:
+
+- `2026-03-17`
+
+검증 범위:
+
+- 공지사항 목록 페이지 접근
+- 자료실 목록 페이지 접근
+- 취업정보 목록 페이지 접근
+- 목록에서 `article_id`와 링크 추출
+- 상세 페이지에서 제목, 작성자, 게시일, 본문, 첨부파일 추출
+- 사용자용 `subview.do?enc=...` 링크 생성 확인
+
+확인 결과:
+
+- 공지사항 `1926` 접근 성공
+- 자료실 `1927` 접근 성공
+- 취업정보 `1929` 접근 성공
+- 크롤러가 목록 파싱 성공
+- 상세 페이지 파싱 성공
+- 자료실 샘플 게시글에서 첨부파일 링크 추출 성공
+- 반환 링크가 `subview.do?enc=...` 형식으로 생성되는 것 확인
+
+실제 확인 예시:
+
+- 공지사항
+  - 수집 수: 20개
+  - 샘플 글: `249714`
+- 자료실
+  - 수집 수: 8개
+  - 샘플 글: `239712`
+  - 첨부파일 1개 추출 확인
+- 취업정보
+  - 수집 수: 10개
+  - 샘플 글: `247506`
+
+## 문법 검증
+
+```bash
+python -m compileall app tests
+```
+
+## 경고 해석
+
+테스트 결과에 `warnings summary`가 나올 수 있습니다.
+
+- `passed`
+  - 테스트 자체는 성공
+- `warning`
+  - 추후 라이브러리 버전 업이나 파이썬 버전 변화 시 수정이 필요할 수 있는 항목
+
+현재 기준으로 보이는 경고는 대체로 다음입니다.
+
+- `fastapi.routing` 내부 `DeprecationWarning`
+  - 현재 사용 중인 FastAPI/하위 의존성과 Python 3.14 조합에서 발생 가능
+- `pytest cache` 관련 경고
+  - 캐시 디렉토리 상태 문제
+  - 테스트 실패와는 별개
+
+## 수동 확인 권장 항목
+
+운영 전에는 아래를 직접 확인하는 것이 좋습니다.
+
+1. `GET /health` 호출 시 `{"status":"ok"}` 반환
+2. `POST /api/v1/crawl` 호출 시 JSON 응답 정상
+3. 첫 실행 시 bootstrap 동작 확인
+4. 두 번째 실행부터 신규 글만 반환되는지 확인
+5. `n8n`에서 HTTP Request 노드 연동 확인
+6. PostgreSQL 연결 정보가 맞을 때 컨테이너가 정상 기동하는지 확인
+
+## 아직 직접 하지 못한 검증
+
+- 실제 PostgreSQL 연동 end-to-end 테스트
+- 실제 Docker 컨테이너 빌드/기동 테스트
+- 실제 n8n 워크플로우 전송 테스트
--- a/app/init.py
+++ b/app/init.py
@@ -0,0 +1 @@
+
--- a/app/config.py
+++ b/app/config.py
@@ -0,0 +1,30 @@
+from functools import lru_cache
+
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+
+    app_name: str = "hufs-notice-crawler"
+    app_env: str = "development"
+    database_url: str = Field(
+        default="postgresql+psycopg://postgres:postgres@localhost:5432/hufs_notice_crawler"
+    )
+    base_url: str = "https://computer.hufs.ac.kr"
+    request_timeout_seconds: float = 15.0
+    max_pages_per_board: int = 5
+    user_agent: str = (
+        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
+        "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
+    )
+
+
+@lru_cache(maxsize=1)
+def get_settings() -> Settings:
+    return Settings()
--- a/app/crawler.py
+++ b/app/crawler.py
@@ -0,0 +1,236 @@
+import re
+from base64 import b64encode
+from dataclasses import dataclass
+from datetime import datetime
+from urllib.parse import quote, urljoin
+
+import requests
+from bs4 import BeautifulSoup, Tag
+
+from app.config import get_settings
+
+ARTICLE_PATH_RE = re.compile(r"/bbs/computer/(?P<board_id>\d+)/(?P<article_id>\d+)/artclView\.do")
+DATE_RE = re.compile(r"(20\d{2})[-./](\d{1,2})[-./](\d{1,2})")
+
+
+BOARD_CONFIG = {
+    "notice": {"board_id": 1926, "board_name": "공지사항", "subview_id": 10058},
+    "archive": {"board_id": 1927, "board_name": "자료실", "subview_id": 10059},
+    "jobs": {"board_id": 1929, "board_name": "취업정보", "subview_id": 10077},
+}
+
+
+@dataclass
+class PostStub:
+    board_key: str
+    board_name: str
+    board_id: int
+    article_id: int
+    title: str
+    post_url: str
+    published_at: datetime | None
+
+
+@dataclass
+class PostDetail:
+    board_key: str
+    board_name: str
+    board_id: int
+    article_id: int
+    title: str
+    post_url: str
+    author: str | None
+    published_at: datetime | None
+    summary: str | None
+    content_text: str | None
+    attachments: list[dict]
+
+
+class HufsCrawler:
+    def __init__(self) -> None:
+        settings = get_settings()
+        self.base_url = settings.base_url
+        self.max_pages_per_board = settings.max_pages_per_board
+        self.session = requests.Session()
+        self.session.headers.update({"User-Agent": settings.user_agent})
+        self.timeout = settings.request_timeout_seconds
+
+    def crawl_board_list(self, board_key: str, page: int = 1) -> list[PostStub]:
+        board = BOARD_CONFIG[board_key]
+        list_url = f"{self.base_url}/bbs/computer/{board['board_id']}/artclList.do"
+        response = self.session.get(
+            list_url,
+            params={"layout": "unknown", "page": page},
+            timeout=self.timeout,
+        )
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, "html.parser")
+
+        seen_article_ids: set[int] = set()
+        posts: list[PostStub] = []
+        for anchor in soup.select("a[href*='artclView.do']"):
+            href = anchor.get("href") or ""
+            match = ARTICLE_PATH_RE.search(href)
+            if not match:
+                continue
+
+            article_id = int(match.group("article_id"))
+            if article_id in seen_article_ids:
+                continue
+            seen_article_ids.add(article_id)
+
+            row = anchor.find_parent("tr")
+            posts.append(
+                PostStub(
+                    board_key=board_key,
+                    board_name=board["board_name"],
+                    board_id=board["board_id"],
+                    article_id=article_id,
+                    title=self._normalize_text(anchor.get_text(" ", strip=True)),
+                    post_url=self._build_public_post_url(
+                        subview_id=board["subview_id"],
+                        board_id=board["board_id"],
+                        article_id=article_id,
+                    ),
+                    published_at=self._extract_date_from_row(row),
+                )
+            )
+
+        return posts
+
+    def crawl_post_detail(self, stub: PostStub) -> PostDetail:
+        detail_url = self._build_detail_url(board_id=stub.board_id, article_id=stub.article_id)
+        response = self.session.get(detail_url, timeout=self.timeout)
+        response.raise_for_status()
+        soup = BeautifulSoup(response.text, "html.parser")
+
+        title = self._first_text(
+            soup,
+            [
+                ".artclView .title h2",
+                ".artclView h2",
+                ".view-title",
+                "h2",
+                "h3",
+            ],
+        ) or stub.title
+
+        content_node = self._first_node(
+            soup,
+            [
+                ".artclContents",
+                ".fr-view",
+                ".view-con",
+                ".artcl-view",
+                ".bbs--view",
+                "#artclView",
+            ],
+        )
+        content_text = self._normalize_text(content_node.get_text("\n", strip=True)) if content_node else None
+
+        author = self._extract_meta(soup, ["작성자", "writer", "등록자"])
+        published_at = self._extract_date_from_soup(soup) or stub.published_at
+        attachments = self._extract_attachments(soup)
+
+        return PostDetail(
+            board_key=stub.board_key,
+            board_name=stub.board_name,
+            board_id=stub.board_id,
+            article_id=stub.article_id,
+            title=title,
+            post_url=stub.post_url,
+            author=author,
+            published_at=published_at,
+            summary=self._make_summary(content_text),
+            content_text=content_text,
+            attachments=attachments,
+        )
+
+    def _build_detail_url(self, board_id: int, article_id: int) -> str:
+        return f"{self.base_url}/bbs/computer/{board_id}/{article_id}/artclView.do"
+
+    def _build_public_post_url(self, subview_id: int, board_id: int, article_id: int) -> str:
+        article_path = f"/bbs/computer/{board_id}/{article_id}/artclView.do?"
+        encoded_path = quote(article_path, safe="")
+        enc = b64encode(f"fnct1|@@|{encoded_path}".encode("utf-8")).decode("ascii")
+        return f"{self.base_url}/computer/{subview_id}/subview.do?enc={enc}"
+
+    def _extract_attachments(self, soup: BeautifulSoup) -> list[dict]:
+        attachments: list[dict] = []
+        seen: set[str] = set()
+        for anchor in soup.select("a[href*='download'], a[href*='fileDown'], a[href*='attach'], a[href*='FileDown']"):
+            href = anchor.get("href") or ""
+            name = self._normalize_text(anchor.get_text(" ", strip=True))
+            if not href or not name:
+                continue
+            absolute_url = urljoin(self.base_url, href)
+            if absolute_url in seen:
+                continue
+            seen.add(absolute_url)
+            attachments.append({"name": name, "url": absolute_url})
+        return attachments
+
+    def _extract_meta(self, soup: BeautifulSoup, labels: list[str]) -> str | None:
+        for label in labels:
+            label_node = soup.find(string=re.compile(label, re.IGNORECASE))
+            if not label_node:
+                continue
+            parent = label_node.parent if isinstance(label_node.parent, Tag) else None
+            if not parent:
+                continue
+            container = parent.parent if parent.parent else parent
+            candidate_text = self._normalize_text(container.get_text(" ", strip=True))
+            candidate_text = candidate_text.replace(label, "").replace(":", "").strip()
+            if candidate_text:
+                return candidate_text
+        return None
+
+    def _extract_date_from_row(self, row: Tag | None) -> datetime | None:
+        if row is None:
+            return None
+        return self._parse_date(row.get_text(" ", strip=True))
+
+    def _extract_date_from_soup(self, soup: BeautifulSoup) -> datetime | None:
+        for selector in [".artclInfo", ".view-info", ".info", "body"]:
+            node = soup.select_one(selector)
+            if not node:
+                continue
+            parsed = self._parse_date(node.get_text(" ", strip=True))
+            if parsed:
+                return parsed
+        return None
+
+    def _parse_date(self, text: str | None) -> datetime | None:
+        if not text:
+            return None
+        match = DATE_RE.search(text)
+        if not match:
+            return None
+        year, month, day = map(int, match.groups())
+        return datetime(year, month, day)
+
+    def _make_summary(self, content_text: str | None, max_length: int = 280) -> str | None:
+        if not content_text:
+            return None
+        normalized = " ".join(content_text.split())
+        if len(normalized) <= max_length:
+            return normalized
+        return normalized[: max_length - 3].rstrip() + "..."
+
+    def _first_node(self, soup: BeautifulSoup, selectors: list[str]) -> Tag | None:
+        for selector in selectors:
+            node = soup.select_one(selector)
+            if node:
+                return node
+        return None
+
+    def _first_text(self, soup: BeautifulSoup, selectors: list[str]) -> str | None:
+        node = self._first_node(soup, selectors)
+        if node:
+            return self._normalize_text(node.get_text(" ", strip=True))
+        return None
+
+    def _normalize_text(self, value: str | None) -> str:
+        if not value:
+            return ""
+        return re.sub(r"\s+", " ", value).strip()
--- a/app/db.py
+++ b/app/db.py
@@ -0,0 +1,33 @@
+from sqlalchemy import create_engine
+from sqlalchemy.orm import DeclarativeBase, sessionmaker
+
+from app.config import get_settings
+
+
+class Base(DeclarativeBase):
+    pass
+
+
+settings = get_settings()
+
+engine = create_engine(
+    settings.database_url,
+    future=True,
+    pool_pre_ping=True,
+)
+
+SessionLocal = sessionmaker(
+    bind=engine,
+    autoflush=False,
+    autocommit=False,
+    future=True,
+    expire_on_commit=False,
+)
+
+
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,30 @@
+from contextlib import asynccontextmanager
+
+from fastapi import Depends, FastAPI
+from sqlalchemy.orm import Session
+
+from app.config import get_settings
+from app.db import Base, engine, get_db
+from app.schemas import CrawlResponse
+from app.service import CrawlService
+
+
+@asynccontextmanager
+async def lifespan(_: FastAPI):
+    Base.metadata.create_all(bind=engine)
+    yield
+
+
+settings = get_settings()
+app = FastAPI(title=settings.app_name, lifespan=lifespan)
+
+
+@app.get("/health")
+def health() -> dict[str, str]:
+    return {"status": "ok"}
+
+
+@app.post("/api/v1/crawl", response_model=CrawlResponse)
+def crawl_notices(db: Session = Depends(get_db)) -> CrawlResponse:
+    service = CrawlService(db)
+    return service.crawl_new_posts()
--- a/app/models.py
+++ b/app/models.py
@@ -0,0 +1,51 @@
+from datetime import datetime
+
+from sqlalchemy import JSON, CheckConstraint, DateTime, Integer, String, Text, UniqueConstraint, func
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.db import Base
+
+
+class ScrapedPost(Base):
+    __tablename__ = "scraped_posts"
+    __table_args__ = (
+        UniqueConstraint("board_key", "article_id", name="uq_scraped_posts_board_article"),
+        CheckConstraint(
+            "board_key IN ('notice', 'archive', 'jobs')",
+            name="ck_scraped_posts_board_key",
+        ),
+    )
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    board_key: Mapped[str] = mapped_column(String(32), nullable=False)
+    board_name: Mapped[str] = mapped_column(String(100), nullable=False)
+    board_id: Mapped[int] = mapped_column(Integer, nullable=False)
+    article_id: Mapped[int] = mapped_column(Integer, nullable=False)
+    title: Mapped[str] = mapped_column(String(500), nullable=False)
+    post_url: Mapped[str] = mapped_column(Text, nullable=False)
+    author: Mapped[str | None] = mapped_column(String(100), nullable=True)
+    published_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=False), nullable=True)
+    summary: Mapped[str | None] = mapped_column(Text, nullable=True)
+    content_text: Mapped[str | None] = mapped_column(Text, nullable=True)
+    attachments: Mapped[list[dict]] = mapped_column(JSON, nullable=False, default=list)
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=False),
+        nullable=False,
+        server_default=func.now(),
+    )
+
+
+class CrawlRun(Base):
+    __tablename__ = "crawl_runs"
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    started_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=False),
+        nullable=False,
+        server_default=func.now(),
+    )
+    finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=False), nullable=True)
+    status: Mapped[str] = mapped_column(String(20), nullable=False, default="running")
+    discovered_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
+    inserted_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
+    error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
--- a/app/schemas.py
+++ b/app/schemas.py
@@ -0,0 +1,41 @@
+from datetime import datetime
+
+from pydantic import BaseModel
+
+
+class AttachmentOut(BaseModel):
+    name: str
+    url: str
+
+
+class PostOut(BaseModel):
+    board_key: str
+    board_name: str
+    board_id: int
+    article_id: int
+    title: str
+    post_url: str
+    author: str | None
+    published_at: datetime | None
+    summary: str | None
+    content_text: str | None
+    attachments: list[AttachmentOut]
+
+
+class LatestBoardPostOut(BaseModel):
+    board_key: str
+    board_name: str
+    board_id: int
+    article_id: int
+    title: str
+    post_url: str
+    published_at: datetime | None
+
+
+class CrawlResponse(BaseModel):
+    checked_at: datetime
+    bootstrap_mode: bool
+    bootstrap_inserted_count: int
+    new_posts_count: int
+    new_posts: list[PostOut]
+    latest_posts_by_board: list[LatestBoardPostOut]
--- a/app/service.py
+++ b/app/service.py
@@ -0,0 +1,142 @@
+from datetime import UTC, datetime
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from app.crawler import BOARD_CONFIG, HufsCrawler, PostStub
+from app.models import CrawlRun, ScrapedPost
+from app.schemas import AttachmentOut, CrawlResponse, LatestBoardPostOut, PostOut
+
+
+class CrawlService:
+    def __init__(self, db: Session) -> None:
+        self.db = db
+        self.crawler = HufsCrawler()
+
+    def crawl_new_posts(self) -> CrawlResponse:
+        bootstrap_mode = self._is_bootstrap_mode()
+
+        run = CrawlRun(status="running", discovered_count=0, inserted_count=0)
+        self.db.add(run)
+        self.db.commit()
+        self.db.refresh(run)
+
+        inserted_posts: list[ScrapedPost] = []
+        latest_posts_by_board_map: dict[str, PostStub] = {}
+        try:
+            for board_key in BOARD_CONFIG:
+                board_inserted_posts, latest_stub = self._crawl_board(board_key)
+                inserted_posts.extend(board_inserted_posts)
+                if latest_stub is not None:
+                    latest_posts_by_board_map[board_key] = latest_stub
+
+            run.status = "success"
+            run.discovered_count = len(inserted_posts)
+            run.inserted_count = len(inserted_posts)
+            run.finished_at = datetime.now(UTC).replace(tzinfo=None)
+            self.db.add(run)
+            self.db.commit()
+        except Exception as exc:
+            run.status = "failed"
+            run.error_message = str(exc)
+            run.finished_at = datetime.now(UTC).replace(tzinfo=None)
+            self.db.add(run)
+            self.db.commit()
+            raise
+
+        response_posts = [] if bootstrap_mode else inserted_posts
+        response_latest_posts = (
+            list(latest_posts_by_board_map.values())
+            if (0 if bootstrap_mode else len(inserted_posts)) == 0
+            else []
+        )
+        return CrawlResponse(
+            checked_at=datetime.now(UTC),
+            bootstrap_mode=bootstrap_mode,
+            bootstrap_inserted_count=len(inserted_posts) if bootstrap_mode else 0,
+            new_posts_count=0 if bootstrap_mode else len(inserted_posts),
+            new_posts=[
+                PostOut(
+                    board_key=post.board_key,
+                    board_name=post.board_name,
+                    board_id=post.board_id,
+                    article_id=post.article_id,
+                    title=post.title,
+                    post_url=post.post_url,
+                    author=post.author,
+                    published_at=post.published_at,
+                    summary=post.summary,
+                    content_text=post.content_text,
+                    attachments=[
+                        AttachmentOut(name=item["name"], url=item["url"])
+                        for item in (post.attachments or [])
+                    ],
+                )
+                for post in response_posts
+            ],
+            latest_posts_by_board=[
+                LatestBoardPostOut(
+                    board_key=post.board_key,
+                    board_name=post.board_name,
+                    board_id=post.board_id,
+                    article_id=post.article_id,
+                    title=post.title,
+                    post_url=post.post_url,
+                    published_at=post.published_at,
+                )
+                for post in response_latest_posts
+            ],
+        )
+
+    def _is_bootstrap_mode(self) -> bool:
+        first_saved_post = self.db.scalar(select(ScrapedPost.id).limit(1))
+        return first_saved_post is None
+
+    def _crawl_board(self, board_key: str) -> tuple[list[ScrapedPost], PostStub | None]:
+        candidates = []
+        latest_stub: PostStub | None = None
+        known_article_ids = {
+            article_id
+            for article_id in self.db.scalars(
+                select(ScrapedPost.article_id).where(ScrapedPost.board_key == board_key)
+            )
+        }
+
+        seen_article_ids: set[int] = set()
+        for page in range(1, self.crawler.max_pages_per_board + 1):
+            page_posts = self.crawler.crawl_board_list(board_key=board_key, page=page)
+            if not page_posts:
+                break
+            if page == 1 and latest_stub is None:
+                latest_stub = page_posts[0]
+
+            for stub in page_posts:
+                if stub.article_id in seen_article_ids:
+                    continue
+                seen_article_ids.add(stub.article_id)
+                if stub.article_id in known_article_ids:
+                    continue
+                candidates.append(stub)
+
+        inserted_posts: list[ScrapedPost] = []
+        for stub in reversed(candidates):
+            detail = self.crawler.crawl_post_detail(stub)
+            record = ScrapedPost(
+                board_key=detail.board_key,
+                board_name=detail.board_name,
+                board_id=detail.board_id,
+                article_id=detail.article_id,
+                title=detail.title,
+                post_url=detail.post_url,
+                author=detail.author,
+                published_at=detail.published_at,
+                summary=detail.summary,
+                content_text=detail.content_text,
+                attachments=detail.attachments,
+            )
+            self.db.add(record)
+            self.db.commit()
+            self.db.refresh(record)
+            inserted_posts.append(record)
+
+        return inserted_posts, latest_stub
--- a/pytest.ini
+++ b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+pythonpath = .
+testpaths = tests
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,9 @@
+beautifulsoup4==4.13.4
+fastapi==0.116.1
+httpx==0.28.1
+psycopg[binary]==3.2.13
+pydantic-settings==2.10.1
+pytest==8.4.1
+requests==2.32.4
+sqlalchemy==2.0.43
+uvicorn==0.35.0
--- a/sql/schema.sql
+++ b/sql/schema.sql
@@ -0,0 +1,30 @@
+CREATE TABLE IF NOT EXISTS scraped_posts (
+    id BIGSERIAL PRIMARY KEY,
+    board_key VARCHAR(32) NOT NULL,
+    board_name VARCHAR(100) NOT NULL,
+    board_id INTEGER NOT NULL,
+    article_id INTEGER NOT NULL,
+    title VARCHAR(500) NOT NULL,
+    post_url TEXT NOT NULL,
+    author VARCHAR(100),
+    published_at TIMESTAMP,
+    summary TEXT,
+    content_text TEXT,
+    attachments JSONB NOT NULL DEFAULT '[]'::jsonb,
+    created_at TIMESTAMP NOT NULL DEFAULT NOW(),
+    CONSTRAINT uq_scraped_posts_board_article UNIQUE (board_key, article_id),
+    CONSTRAINT ck_scraped_posts_board_key CHECK (board_key IN ('notice', 'archive', 'jobs'))
+);
+
+CREATE INDEX IF NOT EXISTS idx_scraped_posts_board_key_created_at
+    ON scraped_posts (board_key, created_at DESC);
+
+CREATE TABLE IF NOT EXISTS crawl_runs (
+    id BIGSERIAL PRIMARY KEY,
+    started_at TIMESTAMP NOT NULL DEFAULT NOW(),
+    finished_at TIMESTAMP,
+    status VARCHAR(20) NOT NULL,
+    discovered_count INTEGER NOT NULL DEFAULT 0,
+    inserted_count INTEGER NOT NULL DEFAULT 0,
+    error_message TEXT
+);
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,32 @@
+from collections.abc import Generator
+
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import Session, sessionmaker
+from sqlalchemy.pool import StaticPool
+
+from app.db import Base
+
+
+@pytest.fixture()
+def db_session() -> Generator[Session, None, None]:
+    engine = create_engine(
+        "sqlite://",
+        future=True,
+        connect_args={"check_same_thread": False},
+        poolclass=StaticPool,
+    )
+    Base.metadata.create_all(bind=engine)
+    TestingSessionLocal = sessionmaker(
+        bind=engine,
+        autoflush=False,
+        autocommit=False,
+        future=True,
+        expire_on_commit=False,
+    )
+    session = TestingSessionLocal()
+    try:
+        yield session
+    finally:
+        session.close()
+        Base.metadata.drop_all(bind=engine)
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -0,0 +1,64 @@
+from datetime import datetime
+
+from fastapi.testclient import TestClient
+
+from app.main import app, get_db
+from app.schemas import CrawlResponse
+
+
+def test_health_endpoint(monkeypatch):
+    monkeypatch.setattr("app.main.Base.metadata.create_all", lambda bind: None)
+
+    with TestClient(app) as client:
+        response = client.get("/health")
+
+    assert response.status_code == 200
+    assert response.json() == {"status": "ok"}
+
+
+def test_crawl_endpoint_returns_mocked_posts(monkeypatch):
+    monkeypatch.setattr("app.main.Base.metadata.create_all", lambda bind: None)
+    monkeypatch.setattr(
+        "app.main.CrawlService.crawl_new_posts",
+        lambda self: CrawlResponse(
+            checked_at=datetime(2026, 3, 17, 12, 0, 0),
+            bootstrap_mode=False,
+            bootstrap_inserted_count=0,
+            new_posts_count=1,
+            new_posts=[
+                {
+                    "board_key": "notice",
+                    "board_name": "공지사항",
+                    "board_id": 1926,
+                    "article_id": 1001,
+                    "title": "테스트 공지",
+                    "post_url": "https://computer.hufs.ac.kr/computer/10058/subview.do?enc=test-notice-link",
+                    "author": "관리자",
+                    "published_at": datetime(2026, 3, 17),
+                    "summary": "요약",
+                    "content_text": "본문",
+                    "attachments": [
+                        {"name": "guide.pdf", "url": "https://computer.hufs.ac.kr/files/guide.pdf"}
+                    ],
+                }
+            ],
+            latest_posts_by_board=[],
+        ),
+    )
+
+    app.dependency_overrides[get_db] = lambda: iter([None])
+    try:
+        with TestClient(app) as client:
+            response = client.post("/api/v1/crawl")
+    finally:
+        app.dependency_overrides.clear()
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["bootstrap_mode"] is False
+    assert payload["new_posts_count"] == 1
+    assert payload["new_posts"][0]["board_key"] == "notice"
+    assert payload["new_posts"][0]["article_id"] == 1001
+    assert "/computer/10058/subview.do?enc=" in payload["new_posts"][0]["post_url"]
+    assert payload["new_posts"][0]["attachments"][0]["name"] == "guide.pdf"
+    assert payload["latest_posts_by_board"] == []
--- a/tests/test_service.py
+++ b/tests/test_service.py
@@ -0,0 +1,176 @@
+from datetime import datetime
+
+from sqlalchemy import select
+
+from app.crawler import BOARD_CONFIG, PostDetail, PostStub
+from app.models import CrawlRun, ScrapedPost
+from app.service import CrawlService
+
+
+class FakeCrawler:
+    def __init__(self):
+        self.max_pages_per_board = 2
+
+    def crawl_board_list(self, board_key: str, page: int = 1):
+        if page > 1:
+            return []
+
+        board = BOARD_CONFIG[board_key]
+        if board_key == "notice":
+            return [
+                PostStub(
+                    board_key="notice",
+                    board_name=board["board_name"],
+                    board_id=board["board_id"],
+                    article_id=9001,
+                    title="Existing notice",
+                    post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9001",
+                    published_at=datetime(2026, 3, 1),
+                ),
+                PostStub(
+                    board_key="notice",
+                    board_name=board["board_name"],
+                    board_id=board["board_id"],
+                    article_id=9002,
+                    title="New notice",
+                    post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9002",
+                    published_at=datetime(2026, 3, 2),
+                ),
+            ]
+
+        if board_key == "archive":
+            return [
+                PostStub(
+                    board_key="archive",
+                    board_name=board["board_name"],
+                    board_id=board["board_id"],
+                    article_id=9101,
+                    title="New archive post",
+                    post_url="https://computer.hufs.ac.kr/computer/10059/subview.do?enc=archive-9101",
+                    published_at=datetime(2026, 3, 3),
+                )
+            ]
+
+        if board_key == "jobs":
+            return []
+
+        return []
+
+    def crawl_post_detail(self, stub: PostStub):
+        return PostDetail(
+            board_key=stub.board_key,
+            board_name=stub.board_name,
+            board_id=stub.board_id,
+            article_id=stub.article_id,
+            title=stub.title,
+            post_url=stub.post_url,
+            author="admin",
+            published_at=stub.published_at,
+            summary=f"{stub.title} summary",
+            content_text=f"{stub.title} content",
+            attachments=[
+                {"name": f"{stub.article_id}.pdf", "url": f"https://example.com/files/{stub.article_id}.pdf"}
+            ],
+        )
+
+
+def test_crawl_service_saves_only_new_posts(db_session):
+    existing = ScrapedPost(
+        board_key="notice",
+        board_name="공지사항",
+        board_id=1926,
+        article_id=9001,
+        title="Existing notice",
+        post_url="https://computer.hufs.ac.kr/computer/10058/subview.do?enc=notice-9001",
+        author="admin",
+        published_at=datetime(2026, 3, 1),
+        summary="Already stored post",
+        content_text="Already stored content",
+        attachments=[],
+    )
+    db_session.add(existing)
+    db_session.commit()
+
+    service = CrawlService(db_session)
+    service.crawler = FakeCrawler()
+
+    response = service.crawl_new_posts()
+
+    assert response.bootstrap_mode is False
+    assert response.bootstrap_inserted_count == 0
+    assert response.new_posts_count == 2
+    assert [post.article_id for post in response.new_posts] == [9002, 9101]
+    assert response.latest_posts_by_board == []
+
+    saved_posts = db_session.scalars(
+        select(ScrapedPost).order_by(ScrapedPost.board_key, ScrapedPost.article_id)
+    ).all()
+    assert len(saved_posts) == 3
+
+    run = db_session.scalars(select(CrawlRun).order_by(CrawlRun.id.desc())).first()
+    assert run is not None
+    assert run.status == "success"
+    assert run.inserted_count == 2
+
+
+def test_crawl_service_returns_zero_when_no_new_posts(db_session):
+    for board_key, board in BOARD_CONFIG.items():
+        db_session.add(
+            ScrapedPost(
+                board_key=board_key,
+                board_name=board["board_name"],
+                board_id=board["board_id"],
+                article_id=1,
+                title="Existing post",
+                post_url=f"https://computer.hufs.ac.kr/computer/99999/subview.do?enc={board_key}-1",
+                author="admin",
+                published_at=datetime(2026, 3, 1),
+                summary="Existing summary",
+                content_text="Existing content",
+                attachments=[],
+            )
+        )
+    db_session.commit()
+
+    class NoNewPostCrawler(FakeCrawler):
+        def crawl_board_list(self, board_key: str, page: int = 1):
+            if page > 1:
+                return []
+            board = BOARD_CONFIG[board_key]
+            return [
+                PostStub(
+                    board_key=board_key,
+                    board_name=board["board_name"],
+                    board_id=board["board_id"],
+                    article_id=1,
+                    title="Existing post",
+                    post_url=f"https://computer.hufs.ac.kr/computer/99999/subview.do?enc={board_key}-1",
+                    published_at=datetime(2026, 3, 1),
+                )
+            ]
+
+    service = CrawlService(db_session)
+    service.crawler = NoNewPostCrawler()
+
+    response = service.crawl_new_posts()
+
+    assert response.bootstrap_mode is False
+    assert response.new_posts_count == 0
+    assert response.new_posts == []
+    assert [post.board_key for post in response.latest_posts_by_board] == ["notice", "archive", "jobs"]
+
+
+def test_crawl_service_bootstrap_saves_posts_without_returning_them(db_session):
+    service = CrawlService(db_session)
+    service.crawler = FakeCrawler()
+
+    response = service.crawl_new_posts()
+
+    assert response.bootstrap_mode is True
+    assert response.bootstrap_inserted_count == 3
+    assert response.new_posts_count == 0
+    assert response.new_posts == []
+    assert [post.board_key for post in response.latest_posts_by_board] == ["notice", "archive"]
+
+    saved_posts = db_session.scalars(select(ScrapedPost)).all()
+    assert len(saved_posts) == 3