Files
codebuddy-mem/scripts/regression-test.sh
qiukai cfed12b665 test: 添加 codebuddy-mem 全功能回归测试脚本
10 层覆盖: 进程/文件/数据库架构/数据统计/完整性/Chroma/配置/日志/HTTP/MCP API
使用: bash scripts/regression-test.sh
2026-05-05 03:34:56 +08:00

256 lines
11 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
# ============================================================
# codebuddy-mem 全功能回归测试
# 测试范围: 基础设施 / 数据库 / MCP API / Chroma / 配置
# 使用方式: bash scripts/regression-test.sh
# 或对话中说 "执行 codebuddy-mem 回归测试"
# ============================================================
set -o pipefail
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'; CYAN='\033[0;36m'
BOLD='\033[1m'; NC='\033[0m'
PASS=0; FAIL=0; WARN=0; TOTAL=0
DATA_DIR="${HOME}/.codebuddy-mem"
# ---------- helpers ----------
_ok() { PASS=$((PASS+1)); echo -e " ${GREEN}[PASS]${NC} $1"; }
_fail() { FAIL=$((FAIL+1)); echo -e " ${RED}[FAIL]${NC} $1"; }
_warn() { WARN=$((WARN+1)); echo -e " ${YELLOW}[WARN]${NC} $1"; }
_header() { echo -e "\n${CYAN}${BOLD}━━━ $1 ━━━${NC}"; }
# 给定命令 + 描述, 退出码 0 则 ok, 否则 fail
check() { TOTAL=$((TOTAL+1)); if eval "$1" &>/dev/null; then _ok "$2"; else _fail "$2"; fi; }
# 数值期望断言
assert_eq() {
TOTAL=$((TOTAL+1))
local val; val=$(eval "$1" 2>/dev/null)
if [ "$val" = "$2" ]; then _ok "$3 (expect=$2, got=$val)"; else _fail "$3 (expect=$2, got=$val)"; fi
}
assert_gt() {
TOTAL=$((TOTAL+1))
local val; val=$(eval "$1" 2>/dev/null)
if [ "$val" -gt "$2" ] 2>/dev/null; then _ok "$3 (got=$val > $2)"; else _fail "$3 (got=$val, need > $2)"; fi
}
# ============================================================
echo -e "${BOLD}codebuddy-mem 全功能回归测试${NC}"
echo "时间: $(date '+%Y-%m-%d %H:%M:%S')"
echo "数据目录: $DATA_DIR"
echo ""
echo -e "${YELLOW}注意: 第 10 层 MCP API 测试需在 CodeBuddy Code 对话中由助手手动执行${NC}"
echo ""
# ============================================================
_header "1. 进程状态"
check "pgrep -f 'codebuddy-mem.*mcp-server' > /dev/null" \
"MCP 服务进程运行中"
check "test -f $DATA_DIR/worker.pid" \
"worker.pid 存在"
check "[ -n \"\$(cat $DATA_DIR/worker.pid 2>/dev/null | python3 -c 'import sys,json; print(json.load(sys.stdin)[\"pid\"])' 2>/dev/null)\" ] && kill -0 \$(cat $DATA_DIR/worker.pid 2>/dev/null | python3 -c 'import sys,json; print(json.load(sys.stdin)[\"pid\"])' 2>/dev/null) 2>/dev/null" \
"Worker 进程运行中"
check "pgrep -f 'chroma-mcp.*codebuddy-mem' > /dev/null" \
"Chroma-MCP 进程运行中"
# ============================================================
_header "2. 文件结构"
check "test -f $DATA_DIR/codebuddy-mem.db" \
"主数据库 codebuddy-mem.db 存在"
check "test -L $DATA_DIR/claude-mem.db" \
"符号链接 claude-mem.db 存在"
check "[ \"\$(readlink $DATA_DIR/claude-mem.db)\" = 'codebuddy-mem.db' ]" \
"符号链接指向 codebuddy-mem.db"
check "test -f $DATA_DIR/codebuddy-mem.db.backup" \
"数据库备份存在"
check "test -f $DATA_DIR/chroma/chroma.sqlite3" \
"Chroma 向量库存在"
check "test -d $DATA_DIR/chroma" \
"Chroma 目录存在"
check "test -f $DATA_DIR/chroma/data_level0.bin" \
"Chroma HNSW 数据文件存在"
check "test -f $DATA_DIR/chroma/header.bin" \
"Chroma HNSW 头文件存在"
# ============================================================
_header "3. 数据库架构"
for table in sdk_sessions observations user_prompts session_summaries pending_messages schema_versions; do
check "sqlite3 $DATA_DIR/claude-mem.db \"SELECT name FROM sqlite_master WHERE type='table' AND name='$table';\" | grep -q $table" \
"$table 存在"
done
# FTS5 虚拟表在 sqlite_master 中 type='table',用 table 名存在性即可
for fts_table in observations_fts session_summaries_fts user_prompts_fts; do
check "sqlite3 $DATA_DIR/claude-mem.db \"SELECT name FROM sqlite_master WHERE name='$fts_table';\" | grep -q $fts_table" \
"FTS 虚拟表 $fts_table 存在"
done
# ============================================================
_header "4. 数据统计"
assert_gt "sqlite3 $DATA_DIR/claude-mem.db 'SELECT count(*) FROM observations;'" 0 \
"observations 有数据"
assert_gt "sqlite3 $DATA_DIR/claude-mem.db 'SELECT count(*) FROM sdk_sessions;'" 0 \
"sdk_sessions 有数据"
assert_gt "sqlite3 $DATA_DIR/claude-mem.db 'SELECT count(*) FROM user_prompts;'" 0 \
"user_prompts 有数据"
assert_gt "sqlite3 $DATA_DIR/claude-mem.db 'SELECT count(*) FROM session_summaries;'" 0 \
"session_summaries 有数据"
# FTS 与主表行数一致
check "test \"\$(sqlite3 $DATA_DIR/claude-mem.db 'SELECT count(*) FROM observations;')\" -eq \"\$(sqlite3 $DATA_DIR/claude-mem.db 'SELECT count(*) FROM observations_fts;')\"" \
"observations 与 observations_fts 行数一致"
# ============================================================
_header "5. 数据完整性"
# 必须有至少一个项目的数据
check "sqlite3 $DATA_DIR/claude-mem.db \"SELECT count(DISTINCT project) FROM observations;\" | grep -q '[1-9]'" \
"observations 包含多个项目"
# 必须有常见类型
for otype in discovery change decision; do
check "sqlite3 $DATA_DIR/claude-mem.db \"SELECT count(*) FROM observations WHERE type='$otype';\" | grep -q '[1-9]'" \
"observations 包含类型 $otype"
done
# 外键完整性: 孤儿 observation 比例 < 20% (迁移残留正常,严重才告警)
ORPHAN=$(sqlite3 $DATA_DIR/claude-mem.db "SELECT count(*) FROM observations o LEFT JOIN sdk_sessions s ON o.memory_session_id = s.memory_session_id WHERE s.memory_session_id IS NULL;" 2>/dev/null)
TOTAL_OBS=$(sqlite3 $DATA_DIR/claude-mem.db "SELECT count(*) FROM observations;" 2>/dev/null)
if [ -n "$ORPHAN" ] && [ -n "$TOTAL_OBS" ] && [ "$TOTAL_OBS" -gt 0 ]; then
if [ "$ORPHAN" -eq 0 ]; then
_ok "observations 外键完整性 (0 条孤儿)"
elif [ "$ORPHAN" -lt "$((TOTAL_OBS / 5))" ]; then
_ok "observations 外键完整性 ($ORPHAN/$TOTAL_OBS 孤儿 < 20%, 迁移残留正常)"
else
_fail "observations 外键完整性 ($ORPHAN/$TOTAL_OBS 孤儿 >= 20%)"
fi
fi
# ============================================================
_header "6. Chroma 同步状态"
check "test -f $DATA_DIR/chroma-sync-state.json" \
"chroma-sync-state.json 存在"
# sync state 中的项目与 observations 中的项目一致
check "python3 -c \"
import json
with open('$DATA_DIR/chroma-sync-state.json') as f:
state = json.load(f)
# 检查主要项目都有 >0 的 observations watermark
for proj in ['观星阁','筑基阁','mac']:
if proj not in state:
raise Exception(f'{proj} not in sync state')
if state[proj].get('observations',0) == 0:
raise Exception(f'{proj} observations watermark is 0')
print('OK')
\"" \
"主要项目同步水位 > 0"
# Chroma 向量索引有数据
check "test -f $DATA_DIR/chroma/chroma.sqlite3 && sqlite3 $DATA_DIR/chroma/chroma.sqlite3 'SELECT count(*) FROM embeddings;' | grep -q '[1-9]'" \
"Chroma embeddings 有数据"
check "sqlite3 $DATA_DIR/chroma/chroma.sqlite3 'SELECT count(*) FROM collections;' | grep -q '[1-9]'" \
"Chroma collections 有数据"
# ============================================================
_header "7. 配置文件"
check "test -f $DATA_DIR/settings.json && python3 -c 'import json; json.load(open(\"$DATA_DIR/settings.json\"))' 2>/dev/null" \
"settings.json 格式正确"
check "test -f $DATA_DIR/supervisor.json && python3 -c 'import json; json.load(open(\"$DATA_DIR/supervisor.json\"))' 2>/dev/null" \
"supervisor.json 格式正确"
# 必要配置项
check "python3 -c \"
import json
with open('$DATA_DIR/settings.json') as f:
s = json.load(f)
assert 'CODEBUDDY_MEM_DATA_DIR' in s
assert 'CODEBUDDY_MEM_WORKER_PORT' in s
assert 'CODEBUDDY_MEM_PROVIDER' in s
print('OK')
\"" \
"settings.json 包含必要配置项"
# ============================================================
_header "8. 日志健康"
check "test -d $DATA_DIR/logs" \
"日志目录存在"
check "ls $DATA_DIR/logs/*.log 2>/dev/null | head -1 | grep -q log" \
"存在日志文件"
# 最近日志中无严重错误 (ERROR 但排除已知无害的)
LATEST_LOG=$(ls -t $DATA_DIR/logs/*.log 2>/dev/null | head -1)
if [ -n "$LATEST_LOG" ]; then
ERRORS=$(grep -c '\[ERROR\]' "$LATEST_LOG" 2>/dev/null || echo 0)
if [ "$ERRORS" -eq 0 ]; then
_ok "最新日志无 ERROR ($LATEST_LOG)"
else
# 检查是否是"无害"错误 (insufficient disk / pollution cleanup)
HARMFUL=$(grep '\[ERROR\]' "$LATEST_LOG" | grep -vc 'Insufficient disk\|pollution cleanup' || echo 0)
if [ "$HARMFUL" -eq 0 ]; then
_ok "最新日志 $ERRORS 条 ERROR 均为已知无害警告"
else
_warn "最新日志有 $HARMFUL 条需要关注的 ERROR"
echo " $(grep '\[ERROR\]' "$LATEST_LOG" | grep -v 'Insufficient disk\|pollution cleanup' | tail -3)"
fi
fi
fi
# ============================================================
_header "9. HTTP 端口可用性"
PORT=$(python3 -c "import json; print(json.load(open('$DATA_DIR/settings.json'))['CODEBUDDY_MEM_WORKER_PORT'])" 2>/dev/null)
if [ -n "$PORT" ]; then
check "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:$PORT/health 2>/dev/null | grep -q '200'" \
"Worker HTTP 健康检查 (port $PORT) 返回 200"
fi
# ============================================================
# 汇总
# ============================================================
echo -e "\n${CYAN}${BOLD}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BOLD}测试结果汇总${NC}"
echo -e " 通过: ${GREEN}$PASS${NC}"
echo -e " 失败: ${RED}$FAIL${NC}"
echo -e " 警告: ${YELLOW}$WARN${NC}"
echo -e " 总计: $TOTAL"
echo ""
if [ $FAIL -gt 0 ]; then
echo -e "${RED}${BOLD}[不通过] 存在 $FAIL 个失败项,请排查后重试。${NC}"
exit 1
elif [ $WARN -gt 0 ]; then
echo -e "${YELLOW}${BOLD}[通过(有告警)] $WARN 个告警项,建议关注。${NC}"
exit 0
else
echo -e "${GREEN}${BOLD}[全通过] 所有 $PASS 项检查通过。${NC}"
exit 0
fi
# ============================================================
# 第 10 层: MCP API 测试 (由 CodeBuddy Code 助手执行)
# 以下测试无法通过 bash 脚本完成,需对话中执行:
#
# mcp__codebuddy-mem__search { query:"观星阁", limit:5 }
# mcp__codebuddy-mem__get_observations { ids:[1,200,438] }
# mcp__codebuddy-mem__timeline { query:"观星阁 Git", limit:3 }
# mcp__codebuddy-mem__list_corpora {}
# mcp__codebuddy-mem__smart_search { query:"function check" }
# mcp__codebuddy-mem__smart_outline { file:"scripts/regression-test.sh" }
#
# 验证点:
# 1. search 返回结果数 > 0结果包含 title/type/created_at
# 2. get_observations 返回完整字段: title,facts,narrative,project,type
# 3. timeline 返回带 Anchor 的上下文时间线
# 4. list_corpora 能正常调用 (corpora 可为空)
# 5. smart_search 能在代码库中搜索
# 6. smart_outline 能解析文件结构
# ============================================================