diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md new file mode 100644 index 0000000..7c1dac1 --- /dev/null +++ b/.claude/CLAUDE.md @@ -0,0 +1,93 @@ +# devbase — Cognitive Anchor + +> **Purpose**: This file is designed to survive context compression. It contains +> immutable facts and current state that every AI session must know before +> working on this project. If you are reading this after a context reset, +> treat this as your primary source of truth. + +--- + +## Immutable Facts(不可变事实) + +| ID | Fact | Source | Status | +|----|------|--------|--------| +| F-001 | Version | `Cargo.toml` | **v0.20.1** | +| F-002 | Edition | `Cargo.toml` | **Rust 2024** | +| F-003 | Test Coverage | CI | **494 passed, 0 failed, 5 ignored** | +| F-004 | Production Unwrap | Architecture Invariants | **0** (G5 rule enforced) | +| F-005 | MCP Tools | `src/mcp/mod.rs` | **69** (5 Stable / 60 Beta / 4 Experimental) | +| F-006 | Schema Version | `registry/migrate.rs` | **v34** | +| F-007 | Entities Table | Schema v21+ | **唯一真相源** (`repos` 表已删除) | +| F-008 | SQLite Mode | `storage.rs` | **WAL mode** | +| F-009 | Clippy | CI | **`-D warnings` 全绿** | +| F-010 | Release Assets | GitHub Releases | **Linux + Windows x64** 预编译二进制 | + +## 架构红线(Architecture Guardrails) + +- **RF-1**: 无裸 `init_db()` 调用,全部使用 `StorageBackend` 注入 +- **RF-2**: `TempStorageBackend` 用于测试隔离(禁止 `DEVBASE_DATA_DIR` 竞态) +- **RF-3**: `entities` 表是唯一真相源 +- **RF-4**: 二进制上下文 ≤ 1MB +- **RF-5**: 模块间无循环依赖 +- **RF-6**: 生产代码零 `unwrap`/`expect`/`panic`(测试除外) +- **RF-7**: 路径输出必须脱敏(`sanitize_path()` 掩码 home 目录) + +## 当前上下文(Current Context) + +| 属性 | 值 | +|------|-----| +| 默认分支 | `main` | +| 最新 Release | `v0.20.1` (2026-05-17) | +| 当前 Phase | Phase 1 Production Hardening ✅ 完成 | +| 下一 Phase | Phase 12 — v0.21.0 "External Capability Grafting" | +| 活跃 PR | 无(PR #55 已合并) | + +## 已知架构 Gaps(不可与 Immutable Facts 混淆) + +这些是**待实现**的能力,不是 bug: + +| Gap | 影响 | 计划版本 | 状态 | +|-----|------|----------|------| +| ~~`relations` 表零生产读取路径~~ | ~~统一实体模型的图遍历能力未暴露~~ | ~~v0.21.0~~ | **已完成** — `devkit_relation_store/query/delete` 已存在,`project_context` 已读取 | +| ~~Workflow 引擎零 MCP 暴露~~ | ~~AI 无法发现/触发工作流~~ | ~~v0.21.0~~ | **已完成** — `devkit_workflow_list/run/status` 已存在 | +| ~~`project_context` 不完整~~ | ~~缺少 relations/limits/skills/workflows~~ | ~~v0.21.0~~ | **已完成** — 已补充 `known_limits` + `skills` | +| 31/68 MCP 工具缺少调用测试 | 回归风险 | v0.21.0 | 待评估 | +| ~~`mcp/tools/repo.rs` 2376 行~~ | ~~维护负担~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `tools/` 目录,`repo.rs` 现 730 行 | +| ~~`init_db_at` 1214 行~~ | ~~迁移函数过大~~ | ~~v0.21.0~~ | **已完成** — 已拆分为 `registry/migrate.rs`(503 行)+ `repo.rs` + `vault.rs` + `links.rs` | + +## 防失忆校验清单(每次会话启动) + +- [ ] 已读取本文件(`devbase/.claude/CLAUDE.md`) +- [ ] 已确认 `Cargo.toml` 版本与上表 F-001 一致 +- [ ] 如果 handoff 文档说"未完成",确认是新环境问题还是全局阻塞 +- [ ] 如果修改 Schema,已更新 `registry/migrate.rs` 和 `SCHEMA_DDL` + +## 快速入口 + +| 你想做什么 | 命令 | +|-----------|------| +| 运行测试 | `cargo test --all-targets` | +| 检查 clippy | `cargo clippy --all-targets -D warnings` | +| 检查格式化 | `cargo fmt --check` | +| 运行 invariant checks | `scripts/invariant-checks/run-checks.ps1` | +| 启动 MCP Server | `cargo run -- mcp` | +| 启动 TUI | `cargo run -- tui` | +| 扫描当前目录 | `devbase scan . --register` | +| 索引仓库 | `devbase index` | + +## 关键文件映射 + +| 概念 | 文件 | +|------|------| +| 架构决策 | `docs/architecture/` | +| 稳定工具文档 | `docs/reference/stable-tools/` | +| 快速开始 | `docs/guides/quickstart.md` | +| MCP 集成指南 | `docs/guides/mcp-integration.md` | +| 变更日志 | `CHANGELOG.md` | +| Agent 简报 | `AGENTS.md` | +| 贡献指南 | `CONTRIBUTING.md` | + +--- + +**Last Updated**: 2026-05-20 by Claude Opus 4.7 +**Version**: v0.20.1 diff --git a/.gitignore b/.gitignore index f1a7061..0de89cb 100644 --- a/.gitignore +++ b/.gitignore @@ -28,7 +28,7 @@ backup-*.db # OS metadata .DS_Store Thumbs.db -examples/embedding-provider/__pycache__/ +__pycache__/ .agents/ .venv/ diff --git a/AGENTS.md b/AGENTS.md index 04ceedf..8e8f4e7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -5,7 +5,7 @@ > 它将本地数字资产的原始数据(代码库、笔记、Skill、工作流)编译为 AI 可决策的结构化情境,不负责思考,不负责执行,只负责感知、编码、持久化、检索。 - **当前阶段**:阶段十一 — v0.20.0 已发布(知识完备性) -- **当前版本**:v0.20.0(Schema 34,68 MCP tools,451 tests) +- **当前版本**:v0.20.1(Schema 34,69 MCP tools,494 tests) - **已完成里程碑**:Registry God Object 完全拆解(10 子模块提取)+ 18 workspace crates 提取 + MCP Python SDK 1.16.0 兼容修复 + repo.rs trait 化 + flaky 测试根治(RF-2.1/2.2/2.3)+ 许可证迁移 + health 性能优化(-44%)+ index skip-embeddings + batch encoding 实验 + RF-6 清零 + 架构治理文档(ADR/不变量清单)+ Tantivy BM25 代码符号搜索(P1)+ AppContext 职责拆分 Phase 1/2(storage.rs 860→430 行)+ 架构不变量 CI(G5/T11/T12)+ Embedding 多后端(Candle/Ollama 配置切换, P3)+ EnvVersionCache 扩展(9 工具链检测, P4)+ **v0.16.0 Agent Contexts(P1/P2/P3)**:`agent_contexts`/`agent_memories`/`context_entity_links` Schema + 9 个 Session MCP tools + Context-aware Skill Runtime(`DEVBASE_ACTIVE_CONTEXT` 注入)+ **v0.16.1 Workflow-Session Binding**:`workflow_executions.context_id` + 执行自动绑定 Active Context + **v0.17.0 Embedding Externalization**:`embedding` 从 default features 移除(Candle/Ollama 降级为 opt-in `llm-backend`)+ Schema 34 向量存储 + `cosine_similarity` SQLite UDF + `devkit_session_recall` / `devkit_session_index`(60 tools)+ **v0.18.0 ClaudeCode Integration**:`devkit_project_brief`(Markdown 项目简报)+ `devkit_impact_analysis`(修改影响范围分析)+ `devkit_session_export` / `devkit_session_import` + `scripts/devbase-claude.ps1` 启动器(自动注入 `.claude/CLAUDE.md`)+ RFC `docs/RFC/claudecode-workflow-integration.md`(64 tools)+ **v0.18.0 发布收尾**:PR 合并 + 双平台二进制构建 + GitHub Release + 根目录治理 + 世界模型战略认知沉淀(Vault + AGENTS 双向联动)+ NotebookLM 生态消化(5 项目注册)+ GreptimeDB 互补分析 + **v0.19.0 知识基础设施硬化**:SQLite WAL 默认启用 + `devkit_index_health`(Beta)+ Vault 导出(`devkit_vault_export`)+ Redis ADR 决策(放弃引入)+ **v0.20.0 知识完备性**:Vault 双向链接 BFS 图遍历(`devkit_vault_graph` 扩展)+ Vault Git-based 历史追踪(`devkit_vault_history`,第 67 个 tool)+ 混合检索质量监控(`devkit_search_quality`,第 68 个 tool,`HybridSearchMetrics`)+ Block 引用支持(`WikiLink.anchor`:`[[note#heading]]` / `[[note#^block-id]]`)+ 性能回归基线(`#[ignore]` 1k/10k 阈值测试)+ 客户端无关原则(Client-Agnostic Principle)落地 + `skill sync` 泛化接口(零硬编码客户端路径) - **核心方向**:让 Kimi CLI 在调用文件工具之前,先通过 devbase 获得"该读哪些文件、为什么读、它们之间的关系" - **本质分析**:见 `vault/99-Meta/devbase-essence-analysis-20260430.md` 与 `docs/architecture/redefinition.md` @@ -23,7 +23,7 @@ Skill Runtime 全生命周期已落地(含依赖管理 Schema v15),Schema - **Workspace**:`%LOCALAPPDATA%\devbase\workspace/` —— 文件系统 = source of truth - `vault/` —— PARA 结构:00-Inbox, 01-Projects, 02-Areas, 03-Resources, 04-Archives, 99-Meta - `assets/` —— 二进制资源 -- **MCP Server**:stdio only,**68 个 tools**(含 7 个 vault tools + 8 个代码分析工具 + 5 个 embedding/搜索工具 + 4 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 11 个 Agent Context tools + 2 个 ClaudeCode 集成工具 + 1 个 streaming index 工具 + 1 个 oplog 工具 + 1 个 Index Health 工具 + 1 个 Search Quality 工具 + 1 个 Evaluate 工具);配置见 `mcp.json` +- **MCP Server**:stdio only,**69 个 tools**(含 7 个 vault tools + 8 个代码分析工具 + 5 个 embedding/搜索工具 + 4 个 Skill Runtime tools + 3 个 Workflow/评分 tools + 1 个报告工具 + 1 个 arXiv 工具 + 2 个 KnownLimit tools + 3 个 Relation tools + 11 个 Agent Context tools + 2 个 ClaudeCode 集成工具 + 1 个 streaming index 工具 + 1 个 oplog 工具 + 1 个 Index Health 工具 + 1 个 Search Quality 工具 + 1 个 Evaluate 工具 + 1 个 DocumentConvert 工具);配置见 `mcp.json` - **Kimi CLI 集成**:MCP server 已通过 `kimi mcp add` 注册,端到端验证通过(`kimi --print` 成功调用 `devkit_health`);项目级 skill 位于 `.kimi/skills/devbase-project/SKILL.md` - **统一节点模型**:`core::node::{Node, NodeType, Edge}` —— GitRepo / VaultNote / Asset / ExternalLink - **当前测试**:451+ lib passed / 0 failed / 5 ignored + 11/11 integration passed(`tests/cli.rs`) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70d6761..2bf9074 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,29 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- `devkit_document_convert` — Experimental tier MCP tool,PDF/PPTX → Markdown 转换(`pdftotext` / `python-pptx` 流水线),含 frontmatter 质量标注 +- Stable 工具 invocation 测试补全:`devkit_query_repos`、`devkit_vault_search`、`devkit_vault_read`、`devkit_status`、`devkit_workflow_list`、`devkit_index` +- `seed_repo()` 轻量测试 helper(仅插入 `entities` 表,无副作用) + +### Fixed + +- `mcp/tools/document_convert.rs` 原始字符串定界符修复(`r###"` 避免与 Python f-string `"##` 冲突) +- `cleanup_extracted_text` 单元测试期望值与实现语义对齐(保留最多 2 个连续空行) + +### Changed + +- **Workspace crate 架构重组** — 消除机械提取造成的微 crate 碎片 + - 合并 8 个 `devbase-registry-*` 微 crate(100–300 行/个)为统一 `devbase-registry`,含 8 个语义子模块(`entity`, `health`, `metrics`, `relation`, `call_graph`, `code_symbols`, `dead_code`, `workspace`) + - 拆分 10+ 个 monolithic `lib.rs` 为域驱动子模块:`devbase-embedding` (`candle`/`ollama`), `devbase-workflow-model` (`definition`/`execution`/`step_type`), `devbase-symbol-links` (`similarity`/`co_located`), `devbase-sync-protocol` (`index`/`version_vector`), `devbase-skill-runtime-types` (`skill_type`/`execution`/`params`), `devbase-skill-runtime-parser` (`frontmatter`/`field_parsers`), `devbase-workflow-interpolate` (`resolver`), `devbase-vault-frontmatter` (`parser`), `devbase-vault-wikilink` (`parser`), `devbase-core-types` (`node_type`/`node`/`edge`) + - 全 workspace `Cargo.toml` 统一使用 `[workspace.package]` 继承(`version`, `edition`, `authors`, `license`, `repository`) +- `KNOWN_ISSUES.md` 更新:document_convert 从 P3 债务移至已解决归档;测试计数 485→494 +- `docs/reference/mcp-tools.md` 修正为 69 个工具,补充 Index / Workflow / Relation / KnownLimit / Session 分类 +- `docs/reference/stable-tools/README.md` 修正为 5 个 Stable 工具(删除过时的 `project_brief.md` / `hybrid_search.md` / `session_recall.md`) + ## [0.20.1] - 2026-05-17 ### Added diff --git a/Cargo.lock b/Cargo.lock index 21bddc5..b2696e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1448,14 +1448,7 @@ dependencies = [ "crossterm 0.28.1", "devbase-core-types", "devbase-embedding", - "devbase-registry-call-graph", - "devbase-registry-code-symbols", - "devbase-registry-dead-code", - "devbase-registry-entity", - "devbase-registry-health", - "devbase-registry-metrics", - "devbase-registry-relation", - "devbase-registry-workspace", + "devbase-registry", "devbase-skill-runtime-parser", "devbase-skill-runtime-types", "devbase-symbol-links", @@ -1519,51 +1512,7 @@ dependencies = [ ] [[package]] -name = "devbase-registry-call-graph" -version = "0.20.1" -dependencies = [ - "anyhow", - "rusqlite", -] - -[[package]] -name = "devbase-registry-code-symbols" -version = "0.20.1" -dependencies = [ - "anyhow", - "rusqlite", -] - -[[package]] -name = "devbase-registry-dead-code" -version = "0.20.1" -dependencies = [ - "anyhow", - "rusqlite", -] - -[[package]] -name = "devbase-registry-entity" -version = "0.20.1" -dependencies = [ - "anyhow", - "chrono", - "rusqlite", - "serde_json", -] - -[[package]] -name = "devbase-registry-health" -version = "0.20.1" -dependencies = [ - "anyhow", - "chrono", - "rusqlite", - "serde", -] - -[[package]] -name = "devbase-registry-metrics" +name = "devbase-registry" version = "0.20.1" dependencies = [ "anyhow", @@ -1573,25 +1522,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "devbase-registry-relation" -version = "0.20.1" -dependencies = [ - "anyhow", - "chrono", - "rusqlite", -] - -[[package]] -name = "devbase-registry-workspace" -version = "0.20.1" -dependencies = [ - "anyhow", - "chrono", - "rusqlite", - "serde", -] - [[package]] name = "devbase-skill-runtime-parser" version = "0.20.1" diff --git a/Cargo.toml b/Cargo.toml index 35b2cfe..2b89a7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,17 +72,10 @@ devbase-vault-frontmatter = { path = "crates/devbase-vault-frontmatter" } devbase-vault-wikilink = { path = "crates/devbase-vault-wikilink" } devbase-workflow-interpolate = { path = "crates/devbase-workflow-interpolate" } devbase-workflow-model = { path = "crates/devbase-workflow-model" } -devbase-registry-health = { path = "crates/devbase-registry-health" } -devbase-registry-metrics = { path = "crates/devbase-registry-metrics" } -devbase-registry-workspace = { path = "crates/devbase-registry-workspace" } +devbase-registry = { path = "crates/devbase-registry" } devbase-embedding = { path = "crates/devbase-embedding", optional = true } devbase-skill-runtime-types = { path = "crates/devbase-skill-runtime-types" } devbase-skill-runtime-parser = { path = "crates/devbase-skill-runtime-parser" } -devbase-registry-entity = { path = "crates/devbase-registry-entity" } -devbase-registry-relation = { path = "crates/devbase-registry-relation" } -devbase-registry-call-graph = { path = "crates/devbase-registry-call-graph" } -devbase-registry-dead-code = { path = "crates/devbase-registry-dead-code" } -devbase-registry-code-symbols = { path = "crates/devbase-registry-code-symbols" } greptimedb-ingester = { version = "0.18", optional = true } [dev-dependencies] diff --git a/KNOWN_ISSUES.md b/KNOWN_ISSUES.md new file mode 100644 index 0000000..dbdb3f4 --- /dev/null +++ b/KNOWN_ISSUES.md @@ -0,0 +1,99 @@ +# Known Issues & Technical Debt + +> 本文件记录 devbase 的已知问题、技术债务和架构 blockers。 +> 不是 bug 列表 — 这些问题是设计层面的权衡或待完成的工作。 + +--- + +## P0 — 阻塞发布 + +无当前 P0 blocker。v0.20.1 已发布,所有 P0 架构 gaps 已关闭。 + +--- + +## P1 — 测试覆盖 + +### 28 个 MCP 工具缺少 invocation tests + +**现状**:68 个工具中,40 个有 dedicated `invoke()` 测试(+3 本批次新增),28 个仅有 name/schema smoke tests 或零覆盖。 + +**影响**:Beta → Stable 的 promote 需要测试背书;无测试的工具在重构时存在回归风险。 + +**缺失测试的工具清单**: + +| 工具 | Tier | 已有覆盖 | +|------|------|----------| +| `devkit_index` | Beta | 间接(scenario) | +| `devkit_index_health` | Beta | 无 | +| `devkit_index_stream` | Beta | 无 | +| `devkit_status` | Beta | 无 | +| `devkit_note` | Beta | 无 | +| `devkit_digest` | Experimental | 无 | +| `devkit_paper_index` | Experimental | 无 | +| `devkit_semantic_search` | Beta | 间接(scenario) | +| `devkit_embedding_store` | Beta | 无 | +| `devkit_embedding_search` | Beta | 无 | +| `devkit_cross_repo_search` | Beta | 间接(scenario) | +| `devkit_related_symbols` | Experimental | 无 | +| `devkit_search_quality` | Beta | 无 | +| `devkit_impact_analysis` | Beta | 无 | +| `devkit_project_brief` | Beta | 间接(scenario) | +| `devkit_knowledge_report` | Beta | 间接(scenario) | +| `devkit_session_*` × 13 | Beta/Exp | 部分 smoke | +| `devkit_workflow_*` × 3 | Beta | 部分(workflow.rs 单元测试) | +| `devkit_evaluate` | Beta | 无 | + +**建议**:按调用频率排序,优先为 Index、Status、Workflow、Session save/list 添加测试。 + +--- + +## P2 — 架构债务 + +### `mcp/tools/repo.rs` 730 行 + +**现状**:已从 2376 行拆至 730 行,但仍超过理想阈值(~300 行/模块)。 + +**计划**:按 domain 拆分为 `repo_health.rs` + `repo_query.rs` + `repo_index.rs`。已有 `docs/architecture/split-plan.md`。 + +### `src/mcp/mod.rs` 工具枚举集中化 + +**现状**:`McpToolEnum` 是包含 68 个变体的 giant enum,`tier()` 方法是 200+ 行的 match 表达式。 + +**影响**:新增工具需要修改 3 处(enum 定义、match arm、tier match),容易遗漏。 + +**建议**:考虑使用宏或 derive 自动生成 `McpToolEnum` 和 `tier()`,减少 boilerplate。 + +### Vault 笔记全文搜索性能 + +**现状**:`devkit_vault_search` 在内存中对所有笔记做线性扫描 + 字符串匹配。 + +**影响**:Vault 笔记数量 >1000 时,搜索延迟可能超过 1s。 + +**建议**:为 Vault 内容建立 Tantivy 索引(复用现有 symbol_index 基础设施),或至少增加关键词索引表。 + +--- + +## P3 — 文档与可观测性 + +### 性能基准缺失 + +**现状**:Criterion 已列为 dev-dependency,但无实际 benchmark 套件。 + +**建议**:为 Index、Query、VaultSearch 建立 Criterion benchmarks,记录基线到 CI 产物。 + +## 已解决(归档) + +| 问题 | 解决版本 | Commit | +|------|----------|--------| +| `relations` 表零生产读取路径 | v0.20.1 | `devkit_relation_store/query/delete` + `project_context` 读取 | +| Workflow 引擎零 MCP 暴露 | v0.20.1 | `devkit_workflow_list/run/status` | +| `project_context` 不完整 | v0.20.1 | 补充 `known_limits` + `skills` | +| `mcp/tools/repo.rs` 2376 行 | v0.20.1 | 拆分为 `tools/` 目录,repo.rs 730 行 | +| `init_db_at` 1214 行 | v0.20.1 | 拆分为 `registry/migrate.rs`(503 行)+ 子模块 | +| 工具数量文档不一致 | v0.20.1 | `mcp-tools.md` 全面更新至 68 个 | +| 3 Stable 工具缺 invocation tests | v0.20.1 | `query_repos`, `vault_search`, `vault_read` 测试 added | +| `devkit_document_convert` 工具缺失 | v0.21.0 | `src/mcp/tools/document_convert.rs` + MCP 注册 | + +--- + +*Last updated: 2026-05-20* diff --git a/README.md b/README.md index b21f560..607ea58 100644 --- a/README.md +++ b/README.md @@ -1,447 +1,146 @@ -# devbase +
+ +# 🗄️ devbase + +> **开发者工作空间的世界模型编译器** + +一套引擎,统一代码上下文、知识记忆与智能体推理。 [![Version](https://img.shields.io/badge/version-v0.20.1-blue)](https://github.com/juice094/devbase/releases) -[![Tests](https://img.shields.io/badge/tests-485%2B%20passed-brightgreen)](./AGENTS.md) -[![Clippy](https://img.shields.io/badge/clippy-0%20warnings-green)](./AGENTS.md) -[![License](https://img.shields.io/badge/license-AGPL--3.0-orange)](./LICENSE) +[![Tests](https://img.shields.io/badge/tests-494%2B%20passed-brightgreen)](https://github.com/juice094/devbase/actions) +[![Clippy](https://img.shields.io/badge/clippy-0%20warnings-green)](https://github.com/juice094/devbase/actions) +[![License](https://img.shields.io/badge/license-AGPL--3.0%20%2F%20Commercial-orange)](LICENSE) [![Rust](https://img.shields.io/badge/rust-1.95%2B-9cf)](https://www.rust-lang.org) +[![Glama](https://glama.ai/mcp/servers/juice094/devbase/badges/score.svg)](https://glama.ai/mcp/servers/juice094/devbase) -**World Model Compiler for Developer Workspaces** - -> One engine for code context, knowledge memories, and agent reasoning. -> Replacing fragmented repo managers, note-taking apps & AI context windows. +
--- -## 30 秒了解 +## 📋 简介 -devbase 是开发者的**世界模型编译器**。它将代码库、笔记、工作流等原始数字资产编译为 AI 可推理的结构化情境——不是存储数据,是构建环境的心智模型。 +devbase 将代码库、笔记与工作流编译为 AI 可推理的结构化情境 — 不是存储数据,是构建环境的心智模型。 | 你是谁 | devbase 为你做什么 | |:---|:---| -| **人类开发者** | `devbase tui` 打开终端仪表盘,一眼看清 N 个仓库的 Git 状态,按 `s` 批量安全同步 | -| **AI Agent** | 通过 MCP 调用 `devkit_skill_run`,AI 能发现、执行、编排 Skill——不再重复造轮子 | -| **项目维护者** | `devbase skill discover .` 一键将项目封装为 Skill,让 AI 用户能够发现和调用 | - -``` -┌─────────────────────────────────────────────────────────────────┐ -│ Interaction Layer (人类与 AI 的接口) │ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │ -│ │ TUI 仪表盘 │ │ MCP Server │ │ Workflow Engine │ │ -│ │ (ratatui) │ │ 68 Tools │ │ YAML + 拓扑调度 │ │ -│ └──────────────┘ └──────────────┘ └──────────────────────┘ │ -├─────────────────────────────────────────────────────────────────┤ -│ Compilation Layer (World Model Compiler Core) │ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │ -│ │ Perception │ │ Knowledge │ │ Policy / Action │ │ -│ │ · tree-sitter│ │ · Graph DB │ │ · Sync Strategy │ │ -│ │ · Tantivy │ │ · Vector UDF│ │ · Workflow Rules │ │ -│ │ · Git 状态 │ │ · Relation │ │ · Health Guardrails │ │ -│ └──────────────┘ └──────────────┘ └──────────────────────┘ │ -├─────────────────────────────────────────────────────────────────┤ -│ Reliability Layer (生产级底线) │ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────────────┐ │ -│ │ SQLite WAL │ │ Index Health│ │ Observability │ │ -│ │ 并发安全 │ │ · 损坏检测 │ │ · OpLog 审计 │ │ -│ │ · 增量备份 │ │ · 自动重建 │ │ · 查询延迟指标 │ │ -│ │ · 迁移回滚 │ │ · 性能基线 │ │ · 数据质量评分 │ │ -│ └──────────────┘ └──────────────┘ └──────────────────────┘ │ -├─────────────────────────────────────────────────────────────────┤ -│ Source of Truth (持久化真相源) │ -│ Git 代码库 · Vault PARA 笔记 · 外部论文 · 二进制资源 │ -└─────────────────────────────────────────────────────────────────┘ -``` +| **人类开发者** | `devbase tui` — 终端仪表盘,一眼看清 N 个仓库的 Git 状态,按 `s` 批量安全同步 | +| **AI 智能体** | 69 个 MCP 工具:通过 `devkit_skill_run` 发现、执行、编排 Skill — 不再重复造轮子 | +| **项目维护者** | `devbase skill discover .` — 一键将项目封装为 Skill,让 AI 用户能够发现和调用 | --- -## 安装 - -**一键安装** - -```powershell -# Windows -irm https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.ps1 | iex - -# Linux / macOS -curl -fsSL https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.sh | bash -``` - -**预编译二进制** - -| 平台 | 下载 | 大小 | -|:---|:---|:---| -| Windows x86_64 | [`devbase-v0.20.1-windows-x64.zip`](https://github.com/juice094/devbase/releases/download/v0.20.1/devbase-v0.20.1-windows-x64.zip) | ~8.7 MB | -| Linux x86_64 | [`devbase-v0.20.1-linux-x64.tar.gz`](https://github.com/juice094/devbase/releases/download/v0.20.1/devbase-v0.20.1-linux-x64.tar.gz) | ~8.8 MB | - -```powershell -# Windows (PowerShell) -Invoke-WebRequest -Uri "https://github.com/juice094/devbase/releases/download/v0.20.1/devbase-v0.20.1-windows-x64.zip" -OutFile devbase.zip -Expand-Archive -Path devbase.zip -DestinationPath . -Force -# 提取后的 devbase.exe 可直接运行 -``` - -```bash -# Linux -curl -fsSL -o devbase.tar.gz https://github.com/juice094/devbase/releases/download/v0.20.1/devbase-v0.20.1-linux-x64.tar.gz -tar xzf devbase.tar.gz -cp devbase-v0.20.1-linux-x64/devbase /usr/local/bin/ # 或任意 PATH 目录 -chmod +x /usr/local/bin/devbase -``` +## 🌟 核心亮点 -**从源码** +| 亮点 | 说明 | +|:---|:---| +| 📊 **TUI 仪表盘** | ratatui 终端界面:跨仓库搜索、安全同步、Skill/Workflow 发现 | +| 🔌 **69 个 MCP 工具** | stdio 本地进程通信:仓库管理、代码分析、知识图谱、智能体记忆 | +| 🏠 **本地优先** | 零数据离开本机 — SQLite + Tantivy + tree-sitter,无需云端 | +| 🔍 **混合检索** | BM25 全文 + 纯 SQL 向量搜索(`cosine_similarity` UDF),零 ML 运行时依赖 | -```bash -git clone https://github.com/juice094/devbase.git -cd devbase && cargo install --path . -``` +> [完整 69 个 Tool 矩阵 → docs/guides/mcp-integration.md](docs/guides/mcp-integration.md) --- -## 核心能力 - -### Human Layer — TUI 仪表盘 - -基于 [ratatui](https://github.com/ratatui/ratatui) 的终端界面,专为**多仓库场景**设计: +## 🔧 技术栈 -| 按键 | 功能 | +| 组件 | 技术 | |:---|:---| -| `↑/↓` `PgUp/PgDn` | 导航列表(仓库 / Vault / Session) | -| `Tab` | 切换主视图(RepoList → VaultList → Session) | -| `/` | 跨仓库代码搜索(Tantivy / ripgrep) | -| `Enter` | 一键启动 gitui / lazygit | -| `s` / `S` | 预览 / 执行安全同步 | -| `k` / `w` | Skill 列表 / Workflow 列表 | -| `[:]` | 自然语言查询 Skills | -| `d` | 发现 Skill(自动封装当前项目) | -| `h` / `?` | 快捷键帮助 | - -**面板布局**: -- **RepoList**:左侧 35% 仓库列表(状态图标 ● dirty ◆ diverged ▼ behind ✓ 正常),右侧 65% 三标签页详情(Overview / Health / Insights) -- **VaultList**:Vault 笔记列表,支持 PARA 方法笔记的快速检索与阅读 -- **Session**:Agent 会话列表(● active / ◌ archived),选中后右侧面板展示该上下文的语义记忆(◆ decision ▪ constraint ★ discovery ✗ error) - -### AI Layer — 68 个 MCP Tools - -基于 [Model Context Protocol](https://modelcontextprotocol.io) 标准化接口,stdio 本地进程通信。 - -| 域 | Tools | 代表能力 | -|:---|:---|:---| -| 仓库管理 | `scan`, `health`, `sync`, `query_repos` | 批量管理 + 安全同步策略 | -| 代码分析 | `code_metrics`, `module_graph`, `code_symbols`, `call_graph`, `dependency_graph`, `dead_code` | AST 符号 + 调用图 + 死代码检测 | -| 知识检索 | `semantic_search`, `hybrid_search`, `cross_repo_search`, `related_symbols`, `knowledge_report` | 向量语义 + RRF 混合检索 | -| Skill 运行时 | `skill_list`, `skill_search`, `skill_run`, `skill_top` | 发现 / 搜索 / 执行 / 评分 | -| Workflow 编排 | `workflow_list`, `workflow_run` | YAML 多步骤自动化 | -| 知识图谱 | `relation_store`, `relation_query`, `relation_delete` | 实体关系存储与查询 | -| Agent 记忆 | `session_recall`, `session_index`, `session_export`, `session_import` | 语义召回 + 向量索引 + 会话迁移 | -| ClaudeCode 集成 | `project_brief`, `impact_analysis` | 生成 CLAUDE.md 注入上下文 + 变更影响半径分析 | -| Vault / 笔记 | `vault_search`, `vault_read`, `vault_write`, `vault_history`, `vault_graph`, `vault_export`, `vault_backlinks` | PARA 笔记 + Git 历史 + BFS 图遍历 + 数据导出 | -| 可观测性 | `search_quality`, `index_health`, `oplog_query` | 检索质量指标 + 索引健康评分 + 审计日志 | - -> 完整 Tool 矩阵见下文 [MCP Tool 矩阵](#mcp-tool-矩阵)。 - -### Storage & Reliability Layer — 生产级本地知识基础设施 - -> **devbase 首先是一个可靠的本地知识基础设施,然后才是一个 World Model Compiler。** AI 层是编译器的输出接口,但如果存储层不可靠,AI 就是沙上建塔。 - -| 组件 | 技术 | 生产级特性 | -|:---|:---|:---| -| 关系存储 | SQLite (WAL mode) | 并发安全、增量备份、Schema 迁移前自动快照、回滚保障 | -| 全文检索 | Tantivy | BM25 评分、索引健康检测、损坏自动重建、孤儿文档清理 | -| 语义检索 | SQLite BLOB (768-dim) + `cosine_similarity` UDF | 外置 Embedding 存储、纯 SQL 向量比对、零 ML 运行时依赖 | -| Agent 记忆 | `agent_contexts` + `agent_memories` | 会话生命周期管理、语义记忆召回、向量索引持久化 | -| AST 感知 | tree-sitter | Rust / Python / TS / Go 多语言符号提取 + 调用图构建 | -| 可观测性 | SQLite `oplog` + `HybridSearchMetrics` + 性能基线 | 全操作审计追踪、混合检索质量指标(latency/recall/overlap)、查询延迟回归测试 | -| Vault 深度 | Git-based 历史 + BFS 图遍历 + Block 引用 | 笔记变更追踪(blob diff)、双向链接图遍历(depth 1-3)、`[[note#heading]]` 块级引用 | - -**可靠性红线**:所有对 Registry 的写入操作必须留下不可变审计痕迹(OpLog);Schema 迁移前自动生成 `backup-YYYYMMDD-HHMMSS.db`;索引层具备反向一致性扫描与自动修复能力。详见 [AGENTS.md](./AGENTS.md) §知识库生产级缺口与补齐路线。 +| 终端 UI | ratatui | +| 全文检索 | Tantivy (BM25) | +| 语义检索 | SQLite BLOB + `cosine_similarity` UDF | +| 代码解析 | tree-sitter (Rust/Python/TS/Go) | +| 关系存储 | SQLite (WAL 模式, OpLog 审计) | +| 协议 | Model Context Protocol (stdio) | --- -## 快速开始 - -```bash -# 1. 扫描并注册工作区 -devbase scan . --register - -# 2. 检查索引状态(Agent 自检) -devbase status --json - -# 3. 启动 TUI 仪表盘 -devbase tui +## 📁 项目结构 -# 4. 启动 MCP Server(供 AI 助手调用) -devbase mcp ``` - -**Claude Desktop 配置**(`claude_desktop_config.json`): - -```json -{ - "mcpServers": { - "devbase": { - "command": "devbase", - "args": ["mcp"] - } - } -} +devbase/ +├── src/ +│ ├── main.rs # CLI 入口:命令解析与分发 +│ ├── tui/ # 终端仪表盘(ratatui) +│ │ # 多仓库导航、跨仓库搜索、安全同步预览 +│ ├── mcp/ # MCP Server(69 个工具,stdio 通信) +│ │ # 人类与 AI 的统一接口层 +│ ├── registry/ # 仓库注册表:Git 状态、健康检查、批量同步 +│ ├── index/ # Tantivy 全文索引 + SQLite 向量索引 +│ │ # 混合检索核心,BM25 + cosine 向量评分 +│ ├── vault/ # PARA 笔记系统:双向链接、BFS 图遍历 +│ ├── skill/ # Skill 生命周期:发现 → 安装 → 执行 → 评分 → 发布 +│ │ # 自动封装项目为 AI 可调用的 Skill +│ ├── workflow/ # YAML 编排引擎:5 种 step 类型,拓扑调度 + 并行执行 +│ └── session/ # 智能体会话生命周期 + 向量记忆持久化 +├── docs/ +│ ├── architecture/ # 架构文档总览 +│ └── guides/ # 集成指南(Claude Code / 5ire / Kimi CLI) +├── scripts/ +│ ├── install.ps1 # Windows 一键安装 +│ ├── install.sh # Linux/macOS 一键安装 +│ └── devbase-claude.ps1 # Claude Code 一键启动器 +└── README.md ``` -**Kimi CLI 配置**(`~/.kimi/mcp.json`): +### 核心设计 -```json -{ - "mcpServers": { - "devbase": { - "command": "devbase", - "args": ["mcp"], - "env": { - "DEVBASE_MCP_ENABLE_DESTRUCTIVE": "1", - "DEVBASE_MCP_TOOL_TIERS": "stable,beta" - } - } - } -} -``` +**三层架构**: +1. **交互层** — TUI 仪表盘 + MCP Server + Workflow 引擎(人类与 AI 的接口) +2. **编译层** — 感知(tree-sitter/Tantivy/Git)→ 知识(图谱/向量/关系)→ 策略(同步/工作流/健康守卫) +3. **可靠层** — SQLite WAL 并发安全 + 索引健康检测 + OpLog 全操作审计 -**ClaudeCode 工作流集成**(v0.18.0+) +> 可靠性红线:所有 Registry 写入必须留下不可变审计痕迹(OpLog);Schema 迁移前自动生成快照。详见 [docs/architecture/overview.md](docs/architecture/overview.md)。 -一键为当前项目生成 `.claude/CLAUDE.md` 上下文简报,并启动 Claude Code: +--- + +## 🚀 快速开始 ```powershell -# PowerShell (Windows) -./scripts/devbase-claude.ps1 +# Windows 一行安装 +irm https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.ps1 | iex -# 可选:退出时自动捕获 git diff 到 Session Memory -./scripts/devbase-claude.ps1 -CaptureOnExit +# 或下载预编译二进制(~8.7 MB) +# https://github.com/juice094/devbase/releases/tag/v0.20.1 ``` -或手动通过 MCP 调用: - ```bash -# 生成项目简报(Markdown) -devbase project brief +# Linux / macOS +curl -fsSL https://raw.githubusercontent.com/juice094/devbase/main/scripts/install.sh | bash -# 分析某符号的变更影响半径 -devbase impact analysis +# 基础工作流 +devbase scan . --register # 1. 扫描并注册工作区 +devbase tui # 2. 打开仪表盘 +devbase mcp # 3. 启动 MCP 服务端(供 AI 调用) ``` ---- - -## 功能深度 - -### 安全同步 (Safe Sync) - -不是粗暴的 `git pull --all`,而是分级策略: - -| 策略 | 行为 | 场景 | 颜色 | -|:---|:---|:---|:---:| -| **Mirror** | 仅 fetch,永不修改 | 参考仓库、第三方依赖 | 🔵 | -| **Conservative** | 仅 fast-forward,diverged 跳过 | 日常开发,安全第一 | 🟡 | -| **Rebase** | 自动 rebase 本地提交 | 个人分支,线性历史 | 🟢 | -| **Merge** | 自动 merge | 协作分支 | 🟣 | - -同步前预检:dirty / diverged / protected 分支自动跳过并记录到 OpLog。 - -### Skill 全生命周期 - -``` -discover → install → run → score → publish - ↑_________________________________| +**AI 助手配置** — 添加到 `claude_desktop_config.json` 或 `~/.kimi/mcp.json`: +```json +{ "mcpServers": { "devbase": { "command": "devbase", "args": ["mcp"] } } } ``` -- **发现**:`devbase skill discover ` — 自动分析项目 CLI/API,生成 `SKILL.md` + entry_script(支持 Rust/Node/Python/Go/Docker/Generic) -- **执行**:Process-based,自动 interpreter 探测,timeout,stdout/stderr 捕获 -- **评分**:Mind Market 算法自动计算 `success_rate` / `usage_count` / `rating`(0-5) -- **依赖**:Schema v15 `dependencies`,Kahn 拓扑排序 + DFS 环检测 - -### Workflow 引擎 (v0.8.0) - -YAML 编排多步骤自动化: - -- 5 种 step 类型:`skill` / `subworkflow` / `parallel` / `condition` / `loop` -- 拓扑调度 + batch 并行执行 -- 变量插值:`${inputs.x}` / `${steps.y.outputs.z}` -- 错误策略:Fail / Continue / Retry / Fallback - -### 自然语言查询 (v0.8.1) - -TUI `[:]` 触发 embedding 语义搜索,失败自动降级为文本搜索。AI 可以说: - -> "show dirty rust repos with more than 100 stars" - -### L3-L4 知识模型 (v0.10.0) - -系统具备**自我边界意识**和**认知纠错能力**: - -- **L3 风险层 (`known_limits`)**: 记录 hard vetoes、已知缺陷、外部依赖风险 - - `devbase limit list` 查看当前系统约束 - - `devbase limit seed` 从 AGENTS.md 自动填充 hard vetoes -- **L4 元认知层 (`knowledge_meta`)**: 记录人类对 L1-L3 的纠正 - - `devbase limit resolve --reason "..."` 自动创建 L4 纠正记录 -- **运行时守卫**: Skill 执行前自动检查未解决 hard veto,警告注入 stderr - ---- - -## MCP Tool 矩阵 - -| Tool | 功能 | 示例查询 | -|:---|:---|:---| -| `devkit_scan` | 扫描并注册工作区 | "扫描 ~/projects" | -| `devkit_health` | 健康检查 | "哪些项目需要同步?" | -| `devkit_sync` | 批量同步(dry-run 默认) | "预览同步结果" | -| `devkit_query_repos` | 结构化查询 | "列出所有 dirty 的 Rust 项目" | -| `devkit_code_metrics` | 代码统计 | "我最大的项目是什么?" | -| `devkit_module_graph` | 模块结构 | "有哪些二进制目标?" | -| `devkit_natural_language_query` | 自然语言查询 | "dirty rust repos with >100 stars" | -| `devkit_index` | 索引仓库摘要 | "为所有仓库生成索引" | -| `devkit_index_stream` | 流式索引(进度事件) | "索引 devbase 并显示进度" | -| `devkit_status` | 索引状态查询 | "devbase 索引是否新鲜?" | -| `devkit_query` | 知识库搜索 | "搜索 sync policy" | -| `devkit_note` | 添加笔记 | "给 devbase 添加笔记" | -| `devkit_digest` | 知识日报 | "今天的知识日报" | -| `devkit_github_info` | GitHub 元数据 | "devbase 多少 stars?" | -| `devkit_paper_index` | 索引 PDF 论文 | "索引 ~/papers" | -| `devkit_experiment_log` | 记录实验 | "记录这次实验配置" | -| `devkit_vault_search` | 搜索 Vault 笔记 | "搜索 API 设计笔记" | -| `devkit_vault_read` | 读取 Vault 笔记 | "读取 01-Projects/devbase.md" | -| `devkit_vault_write` | 创建/更新 Vault 笔记 | "新建重构笔记" | -| `devkit_vault_backlinks` | 反向链接 | "哪些笔记链接到 devbase?" | -| `devkit_vault_graph` | BFS 知识图谱遍历 | "devbase 笔记的引用网络" | -| `devkit_vault_history` | Git-based 笔记历史 | "这篇笔记上周改了什么?" | -| `devkit_vault_export` | Vault 数据导出 | "导出所有笔记到 Obsidian" | -| `devkit_project_context` | 统一项目上下文 | "devbase 的全景视图" | -| `devkit_code_symbols` | 代码语义索引 | "`build_server` 在哪?" | -| `devkit_call_graph` | 调用关系分析 | "谁调用了 `register_tool`?" | -| `devkit_dependency_graph` | 跨仓库依赖图 | "改 `shared-lib` 影响哪些?" | -| `devkit_dead_code` | 死代码检测 | "哪些函数没被调用过?" | -| `devkit_semantic_search` | 向量语义搜索 | "搜索错误处理相关函数" | -| `devkit_embedding_store` | Embedding 存储 | "将向量存入 devbase" | -| `devkit_embedding_search` | 向量搜索 | "用 query 向量搜索符号" | -| `devkit_arxiv_fetch` | arXiv 论文抓取 | "获取 arXiv 2401.12345" | -| `devkit_hybrid_search` | 混合检索(推荐) | "自动融合向量+关键词" | -| `devkit_cross_repo_search` | 跨仓库语义搜索 | "所有 Rust CLI 中搜配置解析" | -| `devkit_knowledge_report` | 知识覆盖报告 | "索引覆盖度如何?" | -| `devkit_related_symbols` | 概念关联搜索 | "与 `authenticate` 相似的函数" | -| `devkit_search_quality` | 检索质量监控 | "这次混合搜索的质量指标" | -| `devkit_index_health` | 索引健康检查 | "Tantivy 索引是否损坏?" | -| `devkit_skill_list` | 列出 Skills | "有哪些内置 skill?" | -| `devkit_skill_search` | 搜索 Skills | "查找代码审计相关 skill" | -| `devkit_skill_run` | 执行 Skill | "运行 embed-repo skill" | -| `devkit_workflow_list` | 列出工作流 | "有哪些工作流?" | -| `devkit_workflow_run` | 执行工作流 | "运行 deploy-staging" | -| `devkit_workflow_status` | 查询工作流执行状态 | "检查工作流是否完成" | -| `devkit_relation_store` | 存储实体关系 | "记录 devbase 依赖 clarity" | -| `devkit_relation_query` | 查询实体关系 | "谁依赖了 devbase?" | -| `devkit_relation_delete` | 删除实体关系 | "移除已弃用的关系" | -| `devkit_oplog_query` | 查询操作日志 | "最近 devbase 做了什么?" | -| `devkit_skill_discover` | 自动发现 Skill | "把这个项目打包成 Skill" | -| `devkit_vault_daily` | 生成每日笔记 | "创建今日日报" | -| `devkit_vault_graph` | 导出知识图谱 | "可视化笔记关联" | -| `devkit_known_limit_store` | 记录 known limit | "记录系统约束" | -| `devkit_known_limit_list` | 列出 known limits | "查看当前风险" | -| `devkit_session_recall` | 语义记忆召回 | "召回与当前任务相关的决策记忆" | -| `devkit_session_index` | 向量索引记忆 | "为这段记忆生成 embedding 并索引" | -| `devkit_session_export` | 导出会话 | "导出当前会话为 Markdown" | -| `devkit_session_import` | 导入会话 | "从文本批量导入记忆" | -| `devkit_project_brief` | 生成项目简报 | "为 devbase 生成 CLAUDE.md" | -| `devkit_impact_analysis` | 变更影响分析 | "改 `register_tool` 会影响哪些调用方?" | - -### AI 助手集成 - -- [Claude Code 集成](docs/guides/mcp-integration-guide.md) -- [5ire 集成](docs/guides/mcp-5ire-integration.md) - ---- - -## 路线图 - -| 版本 | 状态 | 核心交付 | -|:---|:---:|:---| -| v0.3.0 | ✅ 已发布 | 产品化闭环:34 MCP tools + TUI + 安全同步 | -| v0.4.0 | ✅ 已发布 | Skill 自动封装 + 统一实体模型 Schema v16 | -| v0.5.0 | ✅ 已发布 | Workflow 引擎:YAML 编排 + 5 step 类型 | -| v0.6.0 | ✅ 已发布 | Mind Market 评分:success_rate / usage_count / rating | -| v0.7.0 | ✅ 已发布 | NLQ 自然语言查询 + 智能同步建议 | -| v0.8.0 | ✅ 已发布 | Workflow 子类型:Subworkflow / Parallel / Condition / Loop | -| v0.9.0 | ✅ 已发布 | Loop Step 硬化 + 发布闭环 | -| v0.10.0 | ✅ 已发布 | L3-L4 知识模型 + 工程健康维护(main.rs 拆分 / StorageBackend / feature flags) | -| v0.11.0 | ✅ 已发布 | AppContext Pool 化 + MCP 测试隔离 + CI 多线程 | -| v0.11.1 | ✅ 已发布 | Flat ID 命名空间 + entities-first 写入反转 | -| v0.11.2 | ✅ 已发布 | 读路径全量迁移:所有 SELECT 切到 `entities` | -| v0.11.3 | ✅ 已发布 | `repos` 表删除,`entities` 成为唯一数据源(Phase 1 完成) | -| v0.12.0 | ✅ 已发布 | Phase 2 完成(Stage A-E):entities 统一重构 + `.devbase-ignore` + managed-gate fail-safe 同步 | -| v0.13.0 | ✅ 已发布 | Registry God Object 拆解:10 子模块提取为 free function;WorkspaceRegistry 退化为纯 facade | -| v0.14.0 | ✅ 已发布 | Workspace 拆分:6 个零耦合 crate 提取;MCP trait 化:`mcp/tools/repo.rs` `crate::` 引用 68→41 | -| v0.14.1 | ✅ 已发布 | CLI JSON 输出补全 (`--json`/`--recalc`) + relations MCP 工具加固 + License headers + Vault Daily/Graph | -| v0.14.2 | ✅ 已发布 | health dirty 检测修复(排除 ignored 文件)+ scan 路径规范化 + syncthing-rust 识别修复 + experiment_log/CodeMetrics/ModuleGraph/CallGraph/DeadCode 提升为 Beta(48 tools: Stable 5 / Beta 40 / Experimental 3) | -| v0.14.3 | ✅ 已发布 | Schema v30 code symbol attributes + dead-code 过滤增强 + init_db() 注入式改造(RF-1)+ Tantivy/SQLite 补偿扫描 + Feature flags(mcp / embedding)+ sccache 构建加速文档 | -| **v0.15.0** | **✅ 已发布** | **P1 Tantivy BM25 代码符号搜索 + P2 AppContext 职责拆分(storage.rs 860→430 行)+ P3 Embedding 多后端(Candle + Ollama)+ P4 EnvVersionCache 扩展(9 工具链:含 python/bun/zig/java)+ P5 架构不变量自动化 CI(G5/T11/T12)** | -| v0.16.0 | ✅ 已发布 | Agent Context 系统:会话生命周期 + 记忆注入 + Workflow-Session 绑定 | -| v0.16.1 | ✅ 已发布 | Workflow-Session Binding 硬化(Schema v33)+ 安全修复 | -| v0.17.0 | ✅ 已发布 | Agent Memory 向量存储(Schema v34):外置 Embedding 协议 + SQLite UDF `cosine_similarity` + Skill Runtime 语义召回;默认构建零 ML 依赖 | -| **v0.18.0** | **✅ 已发布** | **ClaudeCode 工作流集成:`project_brief` + `impact_analysis` MCP Tools + Session 导出/导入 + `devbase-claude.ps1` 一键启动器;64 Tools 完整矩阵** | -| v0.19.0 | ✅ 已发布 | **知识基础设施硬化**:SQLite WAL + Tantivy 健康评分 + Vault 导出 + Redis ADR 决策 | -| v0.20.0 | ✅ 已发布 | 知识完备性:双向链接图遍历 + 笔记历史追踪 + 混合检索质量监控 + block 引用 + 性能回归基线 + 客户端无关原则;68 Tools | -| **v0.20.1** | **✅ 当前** | **Phase 1 生产硬化**:workflow E2E 测试、RF-7 路径隐私脱敏、Tantivy 一致性修复、性能回归基线、Architecture Invariants CI | - ---- - -## 为什么 devbase? - -### 不是替代,是连接 - -| 工具 | 定位 | devbase 的角色 | -|:---|:---|:---| -| **lazygit** | 单仓库 TUI | **多仓库入口** — 先告诉你哪些仓库需要关注,再按 `Enter` 进入 | -| **5ire / Claude Code** | AI 助手 | **代码库知识源** — 让 AI 拥有本地工作区的结构化上下文 | -| **GitHub Desktop** | GUI Git 客户端 | **TUI 替代** — 轻量 30 倍,SSH 可用,支持批量操作 | - -### AI 无法识别你的 GUI - -你的 IDE、文件管理器、甚至 lazygit 的界面对 AI 都是不可见的黑箱。devbase 通过 MCP Server 将本地代码库的状态、结构、健康度翻译成 AI 能理解的结构化数据——这是 AI 介入本地开发流程的**基础设施**。 - --- -## 隐私与安全 - -**本地优先(Local-First)**: +## 🤝 参与贡献 -- 代码不会离开本地机器 — Registry、索引、日志全部存储在用户目录的 SQLite 中 -- MCP Server 仅通过 stdio 本地进程通信,不监听网络端口 -- GitHub Token / LLM API Key 存储在用户配置目录的 `config.toml` 中,不会进入 git 仓库 -- `.gitignore` 已覆盖 `*.db`、`.devbase/`、`*.log`、`.env*` 等敏感文件 +详见 [CONTRIBUTING.md](CONTRIBUTING.md) — 添加 MCP 工具、Skill Schema、构建模式说明。快速验证: -```toml -# %LOCALAPPDATA%\devbase\config.toml (Windows) -# ~/.config/devbase/config.toml (Linux/macOS) -[github] -token = "" +```bash +cargo build --release +cargo test --all-targets +cargo clippy --all-targets -D warnings ``` --- -## 开发者与贡献 +## 📄 许可证 -> devbase 当前为单人维护项目(Bus Factor = 1),欢迎任何形式的贡献。 - -- **快速开始**: `cargo build --release` → `cargo test --all-targets` -- **构建加速**: [sccache 配置指南](CONTRIBUTING.md#构建加速可选) — tree-sitter 重复编译从 20s → <1s -- **代码规范**: `cargo clippy --all-targets -D warnings` + `cargo fmt --check` -- **Feature flags**: `cargo build --no-default-features`(最小化 CLI)/ `cargo build --features tui`(纯 TUI)/ `cargo build --features mcp`(纯 MCP Server) -- **架构文档**: [`docs/architecture/overview.md`](docs/architecture/overview.md) -- **Agent 约定**: [`AGENTS.md`](AGENTS.md) -- **贡献指南**: [`CONTRIBUTING.md`](CONTRIBUTING.md) — 如何添加 MCP Tool / Skill、Schema 迁移规范 +双许可证:[AGPL-3.0+](LICENSE) 开源 / [商业授权](LICENSE-COMMERCIAL.md) 闭源使用。联系:`juice094@protonmail.com`。 --- -## 许可证 - -本软件采用 **双许可证 (Dual License)** 模式: +
-- **开源使用**: [GNU Affero General Public License v3.0 or later (AGPL-3.0+)](./LICENSE) - - 适用于个人、学术、及遵守 AGPL-3.0 义务的开源项目 - - **核心约束**: 若将修改版部署为网络服务(SaaS、托管 MCP Server 等),必须向用户公开完整源代码 +**[⭐ Star](https://github.com/juice094/devbase) · [🐛 Issues](https://github.com/juice094/devbase/issues) · [🤝 Contribute](CONTRIBUTING.md)** -- **商业使用**: 如需在闭源产品、专有 SaaS 或无法遵守 AGPL-3.0 的场景中使用,可联系作者获取商业授权 - - 详见 [`LICENSE-COMMERCIAL.md`](./LICENSE-COMMERCIAL.md) - - 联系方式: `juice094@protonmail.com` +
diff --git a/crates/devbase-core-types/Cargo.toml b/crates/devbase-core-types/Cargo.toml index d117271..d02baf1 100644 --- a/crates/devbase-core-types/Cargo.toml +++ b/crates/devbase-core-types/Cargo.toml @@ -1,10 +1,11 @@ [package] name = "devbase-core-types" version.workspace = true -edition = "2024" +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "Core knowledge graph types: Node, Edge, NodeType" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -license = "MIT" keywords = ["knowledge-graph", "types", "rust"] categories = ["data-structures"] diff --git a/crates/devbase-core-types/src/edge.rs b/crates/devbase-core-types/src/edge.rs new file mode 100644 index 0000000..bd2f1b8 --- /dev/null +++ b/crates/devbase-core-types/src/edge.rs @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 + +/// A directed edge in the knowledge graph. +#[derive(Debug, Clone)] +pub struct Edge { + pub from_id: String, + pub to_id: String, + pub edge_type: String, + pub metadata: Option, +} diff --git a/crates/devbase-core-types/src/lib.rs b/crates/devbase-core-types/src/lib.rs index 87fa2f8..0b105be 100644 --- a/crates/devbase-core-types/src/lib.rs +++ b/crates/devbase-core-types/src/lib.rs @@ -15,131 +15,10 @@ //! - frontmatter 用 HashMap: 简单、通用,不绑定 YAML 解析器。 //! - Edge 不含 chrono 字段: 关系本身不需要时间戳,时间戳由存储层(registry)管理。 -use chrono::{DateTime, Utc}; -use std::collections::HashMap; -use std::path::PathBuf; +pub mod edge; +pub mod node; +pub mod node_type; -/// Types of knowledge assets managed by devbase. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum NodeType { - /// A Git repository (original devbase focus). - GitRepo, - /// A Markdown note in the vault (Obsidian-compatible). - VaultNote, - /// A binary asset (PDF, image, design file). - Asset, - /// An external link (Figma, Notion, API doc). - ExternalLink, -} - -impl std::fmt::Display for NodeType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - NodeType::GitRepo => write!(f, "repo"), - NodeType::VaultNote => write!(f, "vault"), - NodeType::Asset => write!(f, "asset"), - NodeType::ExternalLink => write!(f, "link"), - } - } -} - -impl std::str::FromStr for NodeType { - type Err = String; - - fn from_str(s: &str) -> Result { - match s { - "repo" | "git" => Ok(NodeType::GitRepo), - "vault" | "note" => Ok(NodeType::VaultNote), - "asset" => Ok(NodeType::Asset), - "link" | "external" => Ok(NodeType::ExternalLink), - _ => Err(format!("unknown node type: {}", s)), - } - } -} - -/// Unified entity model for all knowledge assets. -/// -/// Replaces the repo-centric `RepoEntry` monopoly. Each node carries -/// frontmatter (arbitrary key–value metadata) so that vault notes, -/// git repos, and external links can coexist in the same query/results. -#[derive(Debug, Clone)] -pub struct Node { - pub id: String, - pub node_type: NodeType, - pub path: PathBuf, - pub title: Option, - /// Raw frontmatter as key–value pairs. For vault notes this is the - /// parsed YAML header; for git repos it may contain `remote_origin`, - /// `default_branch`, etc. - pub frontmatter: HashMap, - pub tags: Vec, - pub outgoing_links: Vec, - pub created_at: DateTime, - pub updated_at: DateTime, -} - -impl Node { - /// Convenience accessor for a frontmatter value. - pub fn fm(&self, key: &str) -> Option<&str> { - self.frontmatter.get(key).map(|s| s.as_str()) - } - - /// Whether this node is a git repo. - pub fn is_repo(&self) -> bool { - self.node_type == NodeType::GitRepo - } - - /// Whether this node is a vault note. - pub fn is_vault(&self) -> bool { - self.node_type == NodeType::VaultNote - } -} - -/// A directed edge in the knowledge graph. -#[derive(Debug, Clone)] -pub struct Edge { - pub from_id: String, - pub to_id: String, - pub edge_type: String, - pub metadata: Option, -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_node_type_display() { - assert_eq!(NodeType::GitRepo.to_string(), "repo"); - assert_eq!(NodeType::VaultNote.to_string(), "vault"); - } - - #[test] - fn test_node_type_from_str() { - assert_eq!("repo".parse::().unwrap(), NodeType::GitRepo); - assert_eq!("vault".parse::().unwrap(), NodeType::VaultNote); - assert!("unknown".parse::().is_err()); - } - - #[test] - fn test_node_frontmatter_access() { - let node = Node { - id: "test".into(), - node_type: NodeType::VaultNote, - path: PathBuf::from("/tmp/test.md"), - title: Some("Test".into()), - frontmatter: { - let mut m = HashMap::new(); - m.insert("status".into(), "draft".into()); - m - }, - tags: vec![], - outgoing_links: vec![], - created_at: Utc::now(), - updated_at: Utc::now(), - }; - assert_eq!(node.fm("status"), Some("draft")); - assert_eq!(node.fm("missing"), None); - assert!(node.is_vault()); - } -} +pub use edge::Edge; +pub use node::Node; +pub use node_type::NodeType; diff --git a/crates/devbase-core-types/src/node.rs b/crates/devbase-core-types/src/node.rs new file mode 100644 index 0000000..88ed842 --- /dev/null +++ b/crates/devbase-core-types/src/node.rs @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 + +use chrono::{DateTime, Utc}; +use std::collections::HashMap; +use std::path::PathBuf; + +use crate::node_type::NodeType; + +/// Unified entity model for all knowledge assets. +/// +/// Replaces the repo-centric `RepoEntry` monopoly. Each node carries +/// frontmatter (arbitrary key–value metadata) so that vault notes, +/// git repos, and external links can coexist in the same query/results. +#[derive(Debug, Clone)] +pub struct Node { + pub id: String, + pub node_type: NodeType, + pub path: PathBuf, + pub title: Option, + /// Raw frontmatter as key–value pairs. For vault notes this is the + /// parsed YAML header; for git repos it may contain `remote_origin`, + /// `default_branch`, etc. + pub frontmatter: HashMap, + pub tags: Vec, + pub outgoing_links: Vec, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +impl Node { + /// Convenience accessor for a frontmatter value. + pub fn fm(&self, key: &str) -> Option<&str> { + self.frontmatter.get(key).map(|s| s.as_str()) + } + + /// Whether this node is a git repo. + pub fn is_repo(&self) -> bool { + self.node_type == NodeType::GitRepo + } + + /// Whether this node is a vault note. + pub fn is_vault(&self) -> bool { + self.node_type == NodeType::VaultNote + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::node_type::NodeType; + + #[test] + fn test_node_frontmatter_access() { + let node = Node { + id: "test".into(), + node_type: NodeType::VaultNote, + path: PathBuf::from("/tmp/test.md"), + title: Some("Test".into()), + frontmatter: { + let mut m = HashMap::new(); + m.insert("status".into(), "draft".into()); + m + }, + tags: vec![], + outgoing_links: vec![], + created_at: Utc::now(), + updated_at: Utc::now(), + }; + assert_eq!(node.fm("status"), Some("draft")); + assert_eq!(node.fm("missing"), None); + assert!(node.is_vault()); + } +} diff --git a/crates/devbase-core-types/src/node_type.rs b/crates/devbase-core-types/src/node_type.rs new file mode 100644 index 0000000..012f492 --- /dev/null +++ b/crates/devbase-core-types/src/node_type.rs @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 + +/// Types of knowledge assets managed by devbase. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum NodeType { + /// A Git repository (original devbase focus). + GitRepo, + /// A Markdown note in the vault (Obsidian-compatible). + VaultNote, + /// A binary asset (PDF, image, design file). + Asset, + /// An external link (Figma, Notion, API doc). + ExternalLink, +} + +impl std::fmt::Display for NodeType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + NodeType::GitRepo => write!(f, "repo"), + NodeType::VaultNote => write!(f, "vault"), + NodeType::Asset => write!(f, "asset"), + NodeType::ExternalLink => write!(f, "link"), + } + } +} + +impl std::str::FromStr for NodeType { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "repo" | "git" => Ok(NodeType::GitRepo), + "vault" | "note" => Ok(NodeType::VaultNote), + "asset" => Ok(NodeType::Asset), + "link" | "external" => Ok(NodeType::ExternalLink), + _ => Err(format!("unknown node type: {}", s)), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_node_type_display() { + assert_eq!(NodeType::GitRepo.to_string(), "repo"); + assert_eq!(NodeType::VaultNote.to_string(), "vault"); + } + + #[test] + fn test_node_type_from_str() { + assert_eq!("repo".parse::().unwrap(), NodeType::GitRepo); + assert_eq!("vault".parse::().unwrap(), NodeType::VaultNote); + assert!("unknown".parse::().is_err()); + } +} diff --git a/crates/devbase-embedding/Cargo.toml b/crates/devbase-embedding/Cargo.toml index 6eb1b2b..02e7488 100644 --- a/crates/devbase-embedding/Cargo.toml +++ b/crates/devbase-embedding/Cargo.toml @@ -1,11 +1,11 @@ [package] name = "devbase-embedding" version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "Embedding generation and storage protocol with Candle and Ollama backends" -license = "MIT" -repository = "https://github.com/juice094/devbase" [dependencies] anyhow = "1" diff --git a/crates/devbase-embedding/src/candle.rs b/crates/devbase-embedding/src/candle.rs new file mode 100644 index 0000000..a6dc4b0 --- /dev/null +++ b/crates/devbase-embedding/src/candle.rs @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use crate::EmbeddingProvider; + +pub struct CandleProvider; + +impl EmbeddingProvider for CandleProvider { + fn encode(&self, text: &str) -> anyhow::Result> { + let (model, tokenizer) = get_candle_resources()?; + encode_with_candle(model, tokenizer, text) + } + fn encode_batch(&self, texts: &[&str]) -> anyhow::Result>> { + let (model, tokenizer) = get_candle_resources()?; + encode_batch_with_candle(model, tokenizer, texts) + } + fn name(&self) -> &'static str { + "candle-all-MiniLM-L6-v2" + } +} + +fn get_candle_resources() +-> anyhow::Result<&'static (candle_transformers::models::bert::BertModel, tokenizers::Tokenizer)> { + use std::sync::OnceLock; + static RESOURCES: OnceLock< + Result<(candle_transformers::models::bert::BertModel, tokenizers::Tokenizer), String>, + > = OnceLock::new(); + match RESOURCES.get_or_init(|| init_candle_resources().map_err(|e| e.to_string())) { + Ok(r) => Ok(r), + Err(e) => Err(anyhow::anyhow!("CandleProvider init failed: {}", e)), + } +} + +fn init_candle_resources() +-> anyhow::Result<(candle_transformers::models::bert::BertModel, tokenizers::Tokenizer)> { + use candle_core::Device; + use candle_nn::VarBuilder; + use candle_transformers::models::bert::{BertModel, Config}; + use hf_hub::api::sync::Api; + use tokenizers::Tokenizer; + + let api = Api::new()?; + let repo = api.model("sentence-transformers/all-MiniLM-L6-v2".to_string()); + + let config_path = repo.get("config.json")?; + let tokenizer_path = repo.get("tokenizer.json")?; + let weights_path = repo.get("model.safetensors")?; + + let config: Config = serde_json::from_reader(std::fs::File::open(config_path)?)?; + let tokenizer = Tokenizer::from_file(tokenizer_path).map_err(|e| anyhow::anyhow!(e))?; + + let device = Device::Cpu; + let vb = unsafe { + // SAFETY: model.safetensors is read-only after hf-hub download; + // no other process modifies it. This is the standard candle loading pattern. + VarBuilder::from_mmaped_safetensors(&[weights_path], candle_core::DType::F32, &device)? + }; + let model = BertModel::load(vb, &config)?; + + Ok((model, tokenizer)) +} + +fn encode_with_candle( + model: &candle_transformers::models::bert::BertModel, + tokenizer: &tokenizers::Tokenizer, + text: &str, +) -> anyhow::Result> { + encode_batch_with_candle(model, tokenizer, &[text]) + .and_then(|mut v| v.pop().ok_or_else(|| anyhow::anyhow!("empty embedding batch"))) +} + +fn encode_batch_with_candle( + model: &candle_transformers::models::bert::BertModel, + tokenizer: &tokenizers::Tokenizer, + texts: &[&str], +) -> anyhow::Result>> { + use candle_core::Tensor; + if texts.is_empty() { + return Ok(Vec::new()); + } + + // Batch tokenize + let encodings = tokenizer.encode_batch(texts.to_vec(), true).map_err(|e| anyhow::anyhow!(e))?; + + // Find max length for padding + let max_len = encodings.iter().map(|e| e.get_ids().len()).max().unwrap_or(0); + + // Build padded batch tensors + let mut input_ids_vec = Vec::new(); + let mut attention_mask_vec = Vec::new(); + for encoding in &encodings { + let ids = encoding.get_ids(); + let mask = encoding.get_attention_mask(); + let mut padded_ids = ids.to_vec(); + let mut padded_mask = mask.to_vec(); + padded_ids.resize(max_len, 0); + padded_mask.resize(max_len, 0); + input_ids_vec.extend(padded_ids); + attention_mask_vec.extend(padded_mask); + } + + let batch_size = texts.len(); + let input_ids = Tensor::new(input_ids_vec, &model.device)?.reshape((batch_size, max_len))?; + let token_type_ids = input_ids.zeros_like()?; + let attention_mask_t = + Tensor::new(attention_mask_vec, &model.device)?.reshape((batch_size, max_len))?; + + // Single forward pass for the whole batch + let output = model.forward(&input_ids, &token_type_ids, Some(&attention_mask_t))?; + + // Mean pooling + L2 normalize per sample + let mask = attention_mask_t.to_dtype(candle_core::DType::F32)?.unsqueeze(2)?; + let sum = output.broadcast_mul(&mask)?.sum(1)?; + let count = mask.sum(1)?; + let mean_pooled = sum.broadcast_div(&count)?; + + let norm = mean_pooled.sqr()?.sum_keepdim(1)?.sqrt()?; + let normalized = mean_pooled.broadcast_div(&norm)?; + + // Extract per-sample embeddings + let mut results = Vec::with_capacity(batch_size); + for i in 0..batch_size { + let emb = normalized.get(i)?.squeeze(0)?.to_vec1()?; + results.push(emb); + } + Ok(results) +} diff --git a/crates/devbase-embedding/src/lib.rs b/crates/devbase-embedding/src/lib.rs index f816c48..b32f66f 100644 --- a/crates/devbase-embedding/src/lib.rs +++ b/crates/devbase-embedding/src/lib.rs @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 //! Embedding storage protocol and similarity utilities. //! @@ -10,6 +10,9 @@ //! `EmbeddingProvider` trait abstracts the generation backend. //! Current: `CandleProvider` (pure-Rust local inference). +pub mod candle; +pub mod ollama; + /// Provider trait for text-to-embedding generation. pub trait EmbeddingProvider: Send + Sync { /// Generate an embedding for a single query string. @@ -28,7 +31,7 @@ pub trait EmbeddingProvider: Send + Sync { /// Production provider selector. /// Returns the best available provider at runtime. pub fn default_provider() -> Box { - Box::new(CandleProvider) + Box::new(candle::CandleProvider) } /// Create a provider from configuration parameters. @@ -43,213 +46,9 @@ pub fn create_provider( timeout_seconds: u64, ) -> Box { match backend { - "ollama" => Box::new(OllamaProvider::new(base_url, _model, timeout_seconds)), - _ => Box::new(CandleProvider), - } -} - -// --------------------------------------------------------------------------- -// OllamaProvider — local HTTP embedding via Ollama /api/embed -// --------------------------------------------------------------------------- - -pub struct OllamaProvider { - base_url: String, - model: String, - timeout_seconds: u64, -} - -impl OllamaProvider { - pub fn new(base_url: &str, model: &str, timeout_seconds: u64) -> Self { - Self { - base_url: base_url.trim_end_matches('/').to_string(), - model: model.to_string(), - timeout_seconds, - } - } - - fn embed_inner(&self, inputs: Vec<&str>) -> anyhow::Result>> { - let url = format!("{}/api/embed", self.base_url); - let body = if inputs.len() == 1 { - serde_json::json!({ - "model": self.model, - "input": inputs[0], - }) - } else { - serde_json::json!({ - "model": self.model, - "input": inputs, - }) - }; - - let resp: serde_json::Value = ureq::post(&url) - .set("Content-Type", "application/json") - .timeout(std::time::Duration::from_secs(self.timeout_seconds)) - .send_json(body) - .map_err(|e| anyhow::anyhow!("Ollama API request failed: {}", e))? - .into_json() - .map_err(|e| anyhow::anyhow!("Ollama API JSON parse error: {}", e))?; - - let embeddings = resp - .get("embeddings") - .and_then(|v| v.as_array()) - .ok_or_else(|| anyhow::anyhow!("Ollama response missing embeddings: {}", resp))?; - - let mut results = Vec::with_capacity(embeddings.len()); - for emb in embeddings { - let vec: Vec = emb - .as_array() - .ok_or_else(|| anyhow::anyhow!("invalid embedding array in Ollama response"))? - .iter() - .map(|v| v.as_f64().unwrap_or(0.0) as f32) - .collect(); - results.push(vec); - } - Ok(results) - } -} - -impl EmbeddingProvider for OllamaProvider { - fn encode(&self, text: &str) -> anyhow::Result> { - self.embed_inner(vec![text])? - .into_iter() - .next() - .ok_or_else(|| anyhow::anyhow!("empty embedding result from Ollama")) - } - - fn encode_batch(&self, texts: &[&str]) -> anyhow::Result>> { - self.embed_inner(texts.to_vec()) - } - - fn name(&self) -> &'static str { - "ollama" - } -} - -// --------------------------------------------------------------------------- -// CandleProvider — pure-Rust local embedding via all-MiniLM-L6-v2 -// --------------------------------------------------------------------------- - -pub struct CandleProvider; - -impl EmbeddingProvider for CandleProvider { - fn encode(&self, text: &str) -> anyhow::Result> { - let (model, tokenizer) = get_candle_resources()?; - encode_with_candle(model, tokenizer, text) - } - fn encode_batch(&self, texts: &[&str]) -> anyhow::Result>> { - let (model, tokenizer) = get_candle_resources()?; - encode_batch_with_candle(model, tokenizer, texts) - } - fn name(&self) -> &'static str { - "candle-all-MiniLM-L6-v2" - } -} - -fn get_candle_resources() --> anyhow::Result<&'static (candle_transformers::models::bert::BertModel, tokenizers::Tokenizer)> { - use std::sync::OnceLock; - static RESOURCES: OnceLock< - Result<(candle_transformers::models::bert::BertModel, tokenizers::Tokenizer), String>, - > = OnceLock::new(); - match RESOURCES.get_or_init(|| init_candle_resources().map_err(|e| e.to_string())) { - Ok(r) => Ok(r), - Err(e) => Err(anyhow::anyhow!("CandleProvider init failed: {}", e)), - } -} - -fn init_candle_resources() --> anyhow::Result<(candle_transformers::models::bert::BertModel, tokenizers::Tokenizer)> { - use candle_core::Device; - use candle_nn::VarBuilder; - use candle_transformers::models::bert::{BertModel, Config}; - use hf_hub::api::sync::Api; - use tokenizers::Tokenizer; - - let api = Api::new()?; - let repo = api.model("sentence-transformers/all-MiniLM-L6-v2".to_string()); - - let config_path = repo.get("config.json")?; - let tokenizer_path = repo.get("tokenizer.json")?; - let weights_path = repo.get("model.safetensors")?; - - let config: Config = serde_json::from_reader(std::fs::File::open(config_path)?)?; - let tokenizer = Tokenizer::from_file(tokenizer_path).map_err(|e| anyhow::anyhow!(e))?; - - let device = Device::Cpu; - let vb = unsafe { - // SAFETY: model.safetensors is read-only after hf-hub download; - // no other process modifies it. This is the standard candle loading pattern. - VarBuilder::from_mmaped_safetensors(&[weights_path], candle_core::DType::F32, &device)? - }; - let model = BertModel::load(vb, &config)?; - - Ok((model, tokenizer)) -} - -fn encode_with_candle( - model: &candle_transformers::models::bert::BertModel, - tokenizer: &tokenizers::Tokenizer, - text: &str, -) -> anyhow::Result> { - encode_batch_with_candle(model, tokenizer, &[text]) - .and_then(|mut v| v.pop().ok_or_else(|| anyhow::anyhow!("empty embedding batch"))) -} - -fn encode_batch_with_candle( - model: &candle_transformers::models::bert::BertModel, - tokenizer: &tokenizers::Tokenizer, - texts: &[&str], -) -> anyhow::Result>> { - use candle_core::Tensor; - if texts.is_empty() { - return Ok(Vec::new()); - } - - // Batch tokenize - let encodings = tokenizer.encode_batch(texts.to_vec(), true).map_err(|e| anyhow::anyhow!(e))?; - - // Find max length for padding - let max_len = encodings.iter().map(|e| e.get_ids().len()).max().unwrap_or(0); - - // Build padded batch tensors - let mut input_ids_vec = Vec::new(); - let mut attention_mask_vec = Vec::new(); - for encoding in &encodings { - let ids = encoding.get_ids(); - let mask = encoding.get_attention_mask(); - let mut padded_ids = ids.to_vec(); - let mut padded_mask = mask.to_vec(); - padded_ids.resize(max_len, 0); - padded_mask.resize(max_len, 0); - input_ids_vec.extend(padded_ids); - attention_mask_vec.extend(padded_mask); - } - - let batch_size = texts.len(); - let input_ids = Tensor::new(input_ids_vec, &model.device)?.reshape((batch_size, max_len))?; - let token_type_ids = input_ids.zeros_like()?; - let attention_mask_t = - Tensor::new(attention_mask_vec, &model.device)?.reshape((batch_size, max_len))?; - - // Single forward pass for the whole batch - let output = model.forward(&input_ids, &token_type_ids, Some(&attention_mask_t))?; - - // Mean pooling + L2 normalize per sample - let mask = attention_mask_t.to_dtype(candle_core::DType::F32)?.unsqueeze(2)?; - let sum = output.broadcast_mul(&mask)?.sum(1)?; - let count = mask.sum(1)?; - let mean_pooled = sum.broadcast_div(&count)?; - - let norm = mean_pooled.sqr()?.sum_keepdim(1)?.sqrt()?; - let normalized = mean_pooled.broadcast_div(&norm)?; - - // Extract per-sample embeddings - let mut results = Vec::with_capacity(batch_size); - for i in 0..batch_size { - let emb = normalized.get(i)?.squeeze(0)?.to_vec1()?; - results.push(emb); + "ollama" => Box::new(ollama::OllamaProvider::new(base_url, _model, timeout_seconds)), + _ => Box::new(candle::CandleProvider), } - Ok(results) } /// Cosine similarity between two f32 vectors. @@ -317,7 +116,7 @@ mod tests { #[test] fn test_candle_provider_encode() { - let provider = CandleProvider; + let provider = candle::CandleProvider; let emb = provider.encode("hello world").unwrap(); assert_eq!(emb.len(), 384); // L2 norm should be ≈ 1.0 (sentence-transformers normalizes) @@ -342,7 +141,7 @@ mod tests { fn test_candle_python_cosine_similarity() { let text = "The quick brown fox jumps over the lazy dog."; - let candle_emb = CandleProvider.encode(text).unwrap(); + let candle_emb = candle::CandleProvider.encode(text).unwrap(); let python_emb = generate_python_embedding(text).unwrap(); assert_eq!( diff --git a/crates/devbase-embedding/src/ollama.rs b/crates/devbase-embedding/src/ollama.rs new file mode 100644 index 0000000..3ef2cd9 --- /dev/null +++ b/crates/devbase-embedding/src/ollama.rs @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use crate::EmbeddingProvider; + +pub struct OllamaProvider { + base_url: String, + model: String, + timeout_seconds: u64, +} + +impl OllamaProvider { + pub fn new(base_url: &str, model: &str, timeout_seconds: u64) -> Self { + Self { + base_url: base_url.trim_end_matches('/').to_string(), + model: model.to_string(), + timeout_seconds, + } + } + + fn embed_inner(&self, inputs: Vec<&str>) -> anyhow::Result>> { + let url = format!("{}/api/embed", self.base_url); + let body = if inputs.len() == 1 { + serde_json::json!({ + "model": self.model, + "input": inputs[0], + }) + } else { + serde_json::json!({ + "model": self.model, + "input": inputs, + }) + }; + + let resp: serde_json::Value = ureq::post(&url) + .set("Content-Type", "application/json") + .timeout(std::time::Duration::from_secs(self.timeout_seconds)) + .send_json(body) + .map_err(|e| anyhow::anyhow!("Ollama API request failed: {}", e))? + .into_json() + .map_err(|e| anyhow::anyhow!("Ollama API JSON parse error: {}", e))?; + + let embeddings = resp + .get("embeddings") + .and_then(|v| v.as_array()) + .ok_or_else(|| anyhow::anyhow!("Ollama response missing embeddings: {}", resp))?; + + let mut results = Vec::with_capacity(embeddings.len()); + for emb in embeddings { + let vec: Vec = emb + .as_array() + .ok_or_else(|| anyhow::anyhow!("invalid embedding array in Ollama response"))? + .iter() + .map(|v| v.as_f64().unwrap_or(0.0) as f32) + .collect(); + results.push(vec); + } + Ok(results) + } +} + +impl EmbeddingProvider for OllamaProvider { + fn encode(&self, text: &str) -> anyhow::Result> { + self.embed_inner(vec![text])? + .into_iter() + .next() + .ok_or_else(|| anyhow::anyhow!("empty embedding result from Ollama")) + } + + fn encode_batch(&self, texts: &[&str]) -> anyhow::Result>> { + self.embed_inner(texts.to_vec()) + } + + fn name(&self) -> &'static str { + "ollama" + } +} diff --git a/crates/devbase-registry-call-graph/Cargo.toml b/crates/devbase-registry-call-graph/Cargo.toml deleted file mode 100644 index b718bfb..0000000 --- a/crates/devbase-registry-call-graph/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "devbase-registry-call-graph" -version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -description = "Intra-repository call graph query helpers" -license = "MIT" -repository = "https://github.com/juice094/devbase" - -[dependencies] -rusqlite = { version = "0.34", features = ["bundled"] } -anyhow = "1" diff --git a/crates/devbase-registry-call-graph/README.md b/crates/devbase-registry-call-graph/README.md deleted file mode 100644 index a718f60..0000000 --- a/crates/devbase-registry-call-graph/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# devbase-registry-call-graph - -devbase-registry-call-graph workspace crate. - -## 一句话职责 - -devbase-registry-call-graph workspace crate. - -## 边界说明 - -零内部耦合 workspace crate。禁止直接调用 devbase 主 crate 模块。 - -## 深度链接 - -- [devbase 主文档](../README.md) -- [AGENTS.md](../AGENTS.md) - -## 许可证 - -AGPL-3.0-or-later diff --git a/crates/devbase-registry-code-symbols/Cargo.toml b/crates/devbase-registry-code-symbols/Cargo.toml deleted file mode 100644 index 482273f..0000000 --- a/crates/devbase-registry-code-symbols/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "devbase-registry-code-symbols" -version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -description = "Code symbol query helpers for the devbase registry" -license = "MIT" -repository = "https://github.com/juice094/devbase" - -[dependencies] -rusqlite = { version = "0.34", features = ["bundled"] } -anyhow = "1" diff --git a/crates/devbase-registry-code-symbols/README.md b/crates/devbase-registry-code-symbols/README.md deleted file mode 100644 index d6d78a5..0000000 --- a/crates/devbase-registry-code-symbols/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# devbase-registry-code-symbols - -devbase-registry-code-symbols workspace crate. - -## 一句话职责 - -devbase-registry-code-symbols workspace crate. - -## 边界说明 - -零内部耦合 workspace crate。禁止直接调用 devbase 主 crate 模块。 - -## 深度链接 - -- [devbase 主文档](../README.md) -- [AGENTS.md](../AGENTS.md) - -## 许可证 - -AGPL-3.0-or-later diff --git a/crates/devbase-registry-dead-code/Cargo.toml b/crates/devbase-registry-dead-code/Cargo.toml deleted file mode 100644 index b81b743..0000000 --- a/crates/devbase-registry-dead-code/Cargo.toml +++ /dev/null @@ -1,12 +0,0 @@ -[package] -name = "devbase-registry-dead-code" -version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -description = "Dead code detection queries for the devbase registry" -license = "MIT" -repository = "https://github.com/juice094/devbase" - -[dependencies] -rusqlite = { version = "0.34", features = ["bundled"] } -anyhow = "1" diff --git a/crates/devbase-registry-dead-code/README.md b/crates/devbase-registry-dead-code/README.md deleted file mode 100644 index ff7b345..0000000 --- a/crates/devbase-registry-dead-code/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# devbase-registry-dead-code - -devbase-registry-dead-code workspace crate. - -## 一句话职责 - -devbase-registry-dead-code workspace crate. - -## 边界说明 - -零内部耦合 workspace crate。禁止直接调用 devbase 主 crate 模块。 - -## 深度链接 - -- [devbase 主文档](../README.md) -- [AGENTS.md](../AGENTS.md) - -## 许可证 - -AGPL-3.0-or-later diff --git a/crates/devbase-registry-entity/Cargo.toml b/crates/devbase-registry-entity/Cargo.toml deleted file mode 100644 index ec03390..0000000 --- a/crates/devbase-registry-entity/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "devbase-registry-entity" -version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -description = "Unified entity upsert and query operations for the devbase registry" -license = "MIT" -repository = "https://github.com/juice094/devbase" - -[dependencies] -chrono = { version = "0.4", features = ["serde"] } -rusqlite = { version = "0.34", features = ["bundled", "chrono"] } -serde_json = "1" -anyhow = "1" diff --git a/crates/devbase-registry-entity/README.md b/crates/devbase-registry-entity/README.md deleted file mode 100644 index 8af09da..0000000 --- a/crates/devbase-registry-entity/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# devbase-registry-entity - -devbase-registry-entity workspace crate. - -## 一句话职责 - -devbase-registry-entity workspace crate. - -## 边界说明 - -零内部耦合 workspace crate。禁止直接调用 devbase 主 crate 模块。 - -## 深度链接 - -- [devbase 主文档](../README.md) -- [AGENTS.md](../AGENTS.md) - -## 许可证 - -AGPL-3.0-or-later diff --git a/crates/devbase-registry-health/Cargo.toml b/crates/devbase-registry-health/Cargo.toml deleted file mode 100644 index 1c21dd6..0000000 --- a/crates/devbase-registry-health/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "devbase-registry-health" -version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -description = "Repository health entry storage and retrieval" -license = "MIT" -repository = "https://github.com/juice094/devbase" - -[dependencies] -chrono = { version = "0.4", features = ["serde"] } -serde = { version = "1", features = ["derive"] } -rusqlite = { version = "0.34", features = ["bundled", "chrono"] } -anyhow = "1" diff --git a/crates/devbase-registry-health/README.md b/crates/devbase-registry-health/README.md deleted file mode 100644 index db4dd3a..0000000 --- a/crates/devbase-registry-health/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# devbase-registry-health - -devbase-registry-health workspace crate. - -## 一句话职责 - -devbase-registry-health workspace crate. - -## 边界说明 - -零内部耦合 workspace crate。禁止直接调用 devbase 主 crate 模块。 - -## 深度链接 - -- [devbase 主文档](../README.md) -- [AGENTS.md](../AGENTS.md) - -## 许可证 - -AGPL-3.0-or-later diff --git a/crates/devbase-registry-metrics/Cargo.toml b/crates/devbase-registry-metrics/Cargo.toml deleted file mode 100644 index 05c9ab1..0000000 --- a/crates/devbase-registry-metrics/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "devbase-registry-metrics" -version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -description = "Code metrics persistence and retrieval for devbase repositories" -license = "MIT" -repository = "https://github.com/juice094/devbase" - -[dependencies] -chrono = { version = "0.4", features = ["serde"] } -serde = { version = "1", features = ["derive"] } -serde_json = "1" -rusqlite = { version = "0.34", features = ["bundled", "chrono"] } -anyhow = "1" diff --git a/crates/devbase-registry-metrics/README.md b/crates/devbase-registry-metrics/README.md deleted file mode 100644 index d5b131f..0000000 --- a/crates/devbase-registry-metrics/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# devbase-registry-metrics - -devbase-registry-metrics workspace crate. - -## 一句话职责 - -devbase-registry-metrics workspace crate. - -## 边界说明 - -零内部耦合 workspace crate。禁止直接调用 devbase 主 crate 模块。 - -## 深度链接 - -- [devbase 主文档](../README.md) -- [AGENTS.md](../AGENTS.md) - -## 许可证 - -AGPL-3.0-or-later diff --git a/crates/devbase-registry-relation/Cargo.toml b/crates/devbase-registry-relation/Cargo.toml deleted file mode 100644 index 7cb4617..0000000 --- a/crates/devbase-registry-relation/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "devbase-registry-relation" -version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -description = "Directed entity relation storage and query for the devbase registry" -license = "MIT" -repository = "https://github.com/juice094/devbase" - -[dependencies] -chrono = { version = "0.4", features = ["serde"] } -rusqlite = { version = "0.34", features = ["bundled", "chrono"] } -anyhow = "1" diff --git a/crates/devbase-registry-relation/README.md b/crates/devbase-registry-relation/README.md deleted file mode 100644 index 1fd2f53..0000000 --- a/crates/devbase-registry-relation/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# devbase-registry-relation - -devbase-registry-relation workspace crate. - -## 一句话职责 - -devbase-registry-relation workspace crate. - -## 边界说明 - -零内部耦合 workspace crate。禁止直接调用 devbase 主 crate 模块。 - -## 深度链接 - -- [devbase 主文档](../README.md) -- [AGENTS.md](../AGENTS.md) - -## 许可证 - -AGPL-3.0-or-later diff --git a/crates/devbase-registry-workspace/Cargo.toml b/crates/devbase-registry-workspace/Cargo.toml deleted file mode 100644 index 3f3eb0b..0000000 --- a/crates/devbase-registry-workspace/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -name = "devbase-registry-workspace" -version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -description = "Workspace snapshot and oplog event types for the devbase registry" -license = "MIT" -repository = "https://github.com/juice094/devbase" - -[dependencies] -chrono = { version = "0.4", features = ["serde"] } -serde = { version = "1", features = ["derive"] } -rusqlite = { version = "0.34", features = ["bundled", "chrono"] } -anyhow = "1" diff --git a/crates/devbase-registry-workspace/README.md b/crates/devbase-registry-workspace/README.md deleted file mode 100644 index dadccee..0000000 --- a/crates/devbase-registry-workspace/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# devbase-registry-workspace - -devbase-registry-workspace workspace crate. - -## 一句话职责 - -devbase-registry-workspace workspace crate. - -## 边界说明 - -零内部耦合 workspace crate。禁止直接调用 devbase 主 crate 模块。 - -## 深度链接 - -- [devbase 主文档](../README.md) -- [AGENTS.md](../AGENTS.md) - -## 许可证 - -AGPL-3.0-or-later diff --git a/crates/devbase-registry/Cargo.toml b/crates/devbase-registry/Cargo.toml new file mode 100644 index 0000000..203f173 --- /dev/null +++ b/crates/devbase-registry/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "devbase-registry" +version.workspace = true +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +description = "Unified SQLite registry operations for devbase" +keywords = ["registry", "sqlite", "devbase"] +categories = ["database"] + +[dependencies] +chrono = { version = "0.4", features = ["serde"] } +rusqlite = { version = "0.34", features = ["bundled", "chrono"] } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +anyhow = "1" diff --git a/crates/devbase-registry-call-graph/src/lib.rs b/crates/devbase-registry/src/call_graph.rs similarity index 99% rename from crates/devbase-registry-call-graph/src/lib.rs rename to crates/devbase-registry/src/call_graph.rs index 19b7468..65e6872 100644 --- a/crates/devbase-registry-call-graph/src/lib.rs +++ b/crates/devbase-registry/src/call_graph.rs @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 //! Intra-repository call graph query helpers. diff --git a/crates/devbase-registry-code-symbols/src/lib.rs b/crates/devbase-registry/src/code_symbols.rs similarity index 99% rename from crates/devbase-registry-code-symbols/src/lib.rs rename to crates/devbase-registry/src/code_symbols.rs index 020f4e2..1553760 100644 --- a/crates/devbase-registry-code-symbols/src/lib.rs +++ b/crates/devbase-registry/src/code_symbols.rs @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 //! Code symbol query helpers. diff --git a/crates/devbase-registry-dead-code/src/lib.rs b/crates/devbase-registry/src/dead_code.rs similarity index 99% rename from crates/devbase-registry-dead-code/src/lib.rs rename to crates/devbase-registry/src/dead_code.rs index 27c89d4..6ae93fc 100644 --- a/crates/devbase-registry-dead-code/src/lib.rs +++ b/crates/devbase-registry/src/dead_code.rs @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 //! Dead code query helpers. diff --git a/crates/devbase-registry-entity/src/lib.rs b/crates/devbase-registry/src/entity.rs similarity index 98% rename from crates/devbase-registry-entity/src/lib.rs rename to crates/devbase-registry/src/entity.rs index 408b346..1337c79 100644 --- a/crates/devbase-registry-entity/src/lib.rs +++ b/crates/devbase-registry/src/entity.rs @@ -1,5 +1,6 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 + use chrono::Utc; // Entity type constants for the unified entities table. diff --git a/crates/devbase-registry-health/src/lib.rs b/crates/devbase-registry/src/health.rs similarity index 99% rename from crates/devbase-registry-health/src/lib.rs rename to crates/devbase-registry/src/health.rs index 02168d5..2b2e67c 100644 --- a/crates/devbase-registry-health/src/lib.rs +++ b/crates/devbase-registry/src/health.rs @@ -1,5 +1,6 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 + use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; diff --git a/crates/devbase-registry/src/lib.rs b/crates/devbase-registry/src/lib.rs new file mode 100644 index 0000000..13bb23a --- /dev/null +++ b/crates/devbase-registry/src/lib.rs @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 +//! Unified SQLite registry operations for devbase. +//! +//! Submodules cover entity management, health tracking, code metrics, +//! call graph queries, code symbol indexing, dead-code analysis, +//! workspace snapshots, and entity relations. + +pub mod call_graph; +pub mod code_symbols; +pub mod dead_code; +pub mod entity; +pub mod health; +pub mod metrics; +pub mod relation; +pub mod workspace; diff --git a/crates/devbase-registry-metrics/src/lib.rs b/crates/devbase-registry/src/metrics.rs similarity index 98% rename from crates/devbase-registry-metrics/src/lib.rs rename to crates/devbase-registry/src/metrics.rs index a535f87..98c8d4c 100644 --- a/crates/devbase-registry-metrics/src/lib.rs +++ b/crates/devbase-registry/src/metrics.rs @@ -1,5 +1,6 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 + use chrono::{DateTime, Utc}; #[derive(Debug, Clone)] @@ -97,7 +98,7 @@ pub fn list_code_metrics( }, )) })?; - rows.collect::, _>>().map_err(Into::into) + rows.collect::, _>>().map_err(|e| e.into()) } #[cfg(test)] diff --git a/crates/devbase-registry-relation/src/lib.rs b/crates/devbase-registry/src/relation.rs similarity index 99% rename from crates/devbase-registry-relation/src/lib.rs rename to crates/devbase-registry/src/relation.rs index b28965e..821d240 100644 --- a/crates/devbase-registry-relation/src/lib.rs +++ b/crates/devbase-registry/src/relation.rs @@ -1,5 +1,6 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 + use chrono::Utc; /// (from_entity_id, to_entity_id, relation_type, confidence, created_at) diff --git a/crates/devbase-registry-workspace/src/lib.rs b/crates/devbase-registry/src/workspace.rs similarity index 99% rename from crates/devbase-registry-workspace/src/lib.rs rename to crates/devbase-registry/src/workspace.rs index 496a093..28176f2 100644 --- a/crates/devbase-registry-workspace/src/lib.rs +++ b/crates/devbase-registry/src/workspace.rs @@ -1,5 +1,6 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 + use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; diff --git a/crates/devbase-skill-runtime-parser/Cargo.toml b/crates/devbase-skill-runtime-parser/Cargo.toml index 6134d1c..1b26013 100644 --- a/crates/devbase-skill-runtime-parser/Cargo.toml +++ b/crates/devbase-skill-runtime-parser/Cargo.toml @@ -1,11 +1,11 @@ [package] name = "devbase-skill-runtime-parser" version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "SKILL.md frontmatter parser for skill metadata extraction" -license = "MIT" -repository = "https://github.com/juice094/devbase" [dependencies] chrono = { version = "0.4", features = ["serde"] } diff --git a/crates/devbase-skill-runtime-parser/src/field_parsers.rs b/crates/devbase-skill-runtime-parser/src/field_parsers.rs new file mode 100644 index 0000000..69ea40c --- /dev/null +++ b/crates/devbase-skill-runtime-parser/src/field_parsers.rs @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use devbase_skill_runtime_types::{SkillDependency, SkillInput, SkillOutput}; + +pub fn parse_input_field(line: &str, inp: &mut SkillInput) { + if let Some((key, rest)) = line.split_once(':') { + let key = key.trim(); + let rest = rest.trim(); + match key { + "name" => inp.name = unquote(rest).to_string(), + "type" => inp.input_type = unquote(rest).to_string(), + "description" => inp.description = unquote(rest).to_string(), + "required" => inp.required = parse_bool(rest), + "default" => inp.default = Some(unquote(rest).to_string()), + _ => {} + } + } +} + +pub fn parse_output_field(line: &str, out: &mut SkillOutput) { + if let Some((key, rest)) = line.split_once(':') { + let key = key.trim(); + let rest = rest.trim(); + match key { + "name" => out.name = unquote(rest).to_string(), + "type" => out.output_type = unquote(rest).to_string(), + "description" => out.description = unquote(rest).to_string(), + _ => {} + } + } +} + +pub fn parse_dependency_item(item: &str) -> SkillDependency { + let mut dep = SkillDependency::default(); + if let Some((_, rest)) = item.split_once(':') { + dep.id = unquote(rest.trim()).to_string(); + } + dep +} + +pub fn parse_dependency_field(line: &str, dep: &mut SkillDependency) { + if let Some((key, rest)) = line.split_once(':') { + let key = key.trim(); + let rest = rest.trim(); + match key { + "version" => dep.version = Some(unquote(rest).to_string()), + "source" => dep.source = Some(unquote(rest).to_string()), + _ => {} + } + } +} + +pub fn parse_bool(s: &str) -> bool { + matches!(s.trim().to_lowercase().as_str(), "true" | "yes" | "1" | "on") +} + +pub fn unquote(s: &str) -> &str { + s.strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .or_else(|| s.strip_prefix('\'').and_then(|s| s.strip_suffix('\''))) + .unwrap_or(s) +} diff --git a/crates/devbase-skill-runtime-parser/src/frontmatter.rs b/crates/devbase-skill-runtime-parser/src/frontmatter.rs new file mode 100644 index 0000000..233096d --- /dev/null +++ b/crates/devbase-skill-runtime-parser/src/frontmatter.rs @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use devbase_skill_runtime_types::{SkillDependency, SkillInput, SkillOutput}; + +/// Parsed frontmatter specific to SKILL.md. +#[derive(Debug, Clone, Default)] +pub struct SkillFrontmatter { + pub id: Option, + pub name: Option, + pub version: Option, + pub description: Option, + pub author: Option, + pub tags: Vec, + pub entry_script: Option, + pub skill_type: Option, + pub inputs: Vec, + pub outputs: Vec, + pub dependencies: Vec, +} + +/// Extract YAML frontmatter from the top of a Markdown document. +pub fn extract_frontmatter(content: &str) -> Option<(SkillFrontmatter, usize)> { + let trimmed = content.trim_start(); + if !trimmed.starts_with("---") { + return None; + } + + let after_open = &trimmed[3..]; + let close_pos = after_open.find("\n---")?; + let raw = after_open[..close_pos].trim(); + let body_offset = trimmed.as_ptr() as usize - content.as_ptr() as usize + 3 + close_pos + 4; + + let fm = parse_skill_frontmatter(raw); + Some((fm, body_offset)) +} + +pub fn parse_skill_frontmatter(raw: &str) -> SkillFrontmatter { + let mut fm = SkillFrontmatter::default(); + let mut current_section: Option<&str> = None; + let mut current_input: Option = None; + let mut current_output: Option = None; + + for line in raw.lines() { + let line = line.trim_end(); + let trimmed = line.trim_start(); + + if trimmed.is_empty() || trimmed.starts_with('#') { + continue; + } + + // YAML list item within a section + if let Some(item) = trimmed.strip_prefix("- ") { + match current_section { + Some("inputs") => { + // Flush previous input if we see a new "- name:" without closing the last one + if item.starts_with("name:") { + if let Some(input) = current_input.take() { + fm.inputs.push(input); + } + } + if current_input.is_none() && item.starts_with("name:") { + current_input = Some(SkillInput::default()); + } + if let Some(ref mut inp) = current_input { + super::field_parsers::parse_input_field(item, inp); + } + } + Some("outputs") => { + if item.starts_with("name:") { + if let Some(output) = current_output.take() { + fm.outputs.push(output); + } + } + if current_output.is_none() && item.starts_with("name:") { + current_output = Some(SkillOutput::default()); + } + if let Some(ref mut out) = current_output { + super::field_parsers::parse_output_field(item, out); + } + } + Some("dependencies") => { + if item.starts_with("id:") { + fm.dependencies.push(super::field_parsers::parse_dependency_item(item)); + } else if let Some(last) = fm.dependencies.last_mut() { + super::field_parsers::parse_dependency_field(item, last); + } + } + _ => { + // Top-level list (e.g. tags inline) + if let Some((key, _)) = line.split_once(':') { + let key = key.trim(); + if key == "tags" { + fm.tags.push(super::field_parsers::unquote(item).to_string()); + } + } + } + } + continue; + } + + // Flush any open input/output before moving to a new key + if trimmed.starts_with("name:") { + if let Some(input) = current_input.take() { + fm.inputs.push(input); + } + current_input = Some(SkillInput::default()); + } + if trimmed.starts_with("name:") { + if let Some(output) = current_output.take() { + fm.outputs.push(output); + } + current_output = Some(SkillOutput::default()); + } + + if let Some((key, rest)) = trimmed.split_once(':') { + let key = key.trim(); + let rest = rest.trim(); + + // Section starters: inputs / outputs + if key == "inputs" { + current_section = Some("inputs"); + continue; + } + if key == "outputs" { + current_section = Some("outputs"); + continue; + } + if key == "dependencies" { + current_section = Some("dependencies"); + continue; + } + + // If we're inside an input/output block and this is NOT a top-level key, + // treat it as a nested field. + // CRITICAL: when current_input / current_output is active, ALL non-section + // keys must be treated as nested fields, even if they share a name with a + // top-level field (e.g. "type", "description"). + let is_section_starter = key == "inputs" || key == "outputs"; + + if !is_section_starter { + if let Some(ref mut inp) = current_input { + super::field_parsers::parse_input_field(trimmed, inp); + continue; + } + if let Some(ref mut out) = current_output { + super::field_parsers::parse_output_field(trimmed, out); + continue; + } + } + + // Top-level fields + match key { + "id" => fm.id = Some(super::field_parsers::unquote(rest).to_string()), + "name" => fm.name = Some(super::field_parsers::unquote(rest).to_string()), + "version" => fm.version = Some(super::field_parsers::unquote(rest).to_string()), + "description" => { + fm.description = Some(super::field_parsers::unquote(rest).to_string()) + } + "author" => fm.author = Some(super::field_parsers::unquote(rest).to_string()), + "entry_script" => { + fm.entry_script = Some(super::field_parsers::unquote(rest).to_string()) + } + "skill_type" | "type" => { + fm.skill_type = Some(super::field_parsers::unquote(rest).to_string()) + } + "tags" => { + current_section = None; + if rest.starts_with('[') && rest.ends_with(']') { + fm.tags = rest[1..rest.len() - 1] + .split(',') + .map(|s| super::field_parsers::unquote(s.trim()).to_string()) + .filter(|s| !s.is_empty()) + .collect(); + } else if !rest.is_empty() { + fm.tags = vec![super::field_parsers::unquote(rest).to_string()]; + } else { + current_section = Some("tags"); + } + } + _ => {} + } + } + } + + if let Some(inp) = current_input { + fm.inputs.push(inp); + } + if let Some(out) = current_output { + fm.outputs.push(out); + } + + fm +} diff --git a/crates/devbase-skill-runtime-parser/src/lib.rs b/crates/devbase-skill-runtime-parser/src/lib.rs index dbe1642..a69e956 100644 --- a/crates/devbase-skill-runtime-parser/src/lib.rs +++ b/crates/devbase-skill-runtime-parser/src/lib.rs @@ -1,6 +1,10 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 -use devbase_skill_runtime_types::{SkillDependency, SkillInput, SkillMeta, SkillOutput, SkillType}; + +pub mod field_parsers; +pub mod frontmatter; + +use devbase_skill_runtime_types::{SkillInput, SkillMeta, SkillOutput, SkillType}; /// Parse a SKILL.md file into `SkillMeta`. /// @@ -10,7 +14,8 @@ pub fn parse_skill_md(path: &std::path::Path) -> anyhow::Result { let content = std::fs::read_to_string(path)?; let id = SkillMeta::id_from_path(path.parent().unwrap_or(path)); - let (frontmatter, body) = if let Some((fm, offset)) = extract_frontmatter(&content) { + let (frontmatter, body) = if let Some((fm, offset)) = frontmatter::extract_frontmatter(&content) + { (fm, content[offset..].trim_start().to_string()) } else { // No frontmatter: treat entire file as body with minimal defaults @@ -89,246 +94,6 @@ pub fn parse_skill_md(path: &std::path::Path) -> anyhow::Result { }) } -/// Parsed frontmatter specific to SKILL.md. -#[derive(Debug, Clone, Default)] -struct SkillFrontmatter { - pub id: Option, - pub name: Option, - pub version: Option, - pub description: Option, - pub author: Option, - pub tags: Vec, - pub entry_script: Option, - pub skill_type: Option, - pub inputs: Vec, - pub outputs: Vec, - pub dependencies: Vec, -} - -/// Extract YAML frontmatter from the top of a Markdown document. -fn extract_frontmatter(content: &str) -> Option<(SkillFrontmatter, usize)> { - let trimmed = content.trim_start(); - if !trimmed.starts_with("---") { - return None; - } - - let after_open = &trimmed[3..]; - let close_pos = after_open.find("\n---")?; - let raw = after_open[..close_pos].trim(); - let body_offset = trimmed.as_ptr() as usize - content.as_ptr() as usize + 3 + close_pos + 4; - - let fm = parse_skill_frontmatter(raw); - Some((fm, body_offset)) -} - -fn parse_skill_frontmatter(raw: &str) -> SkillFrontmatter { - let mut fm = SkillFrontmatter::default(); - let mut current_section: Option<&str> = None; - let mut current_input: Option = None; - let mut current_output: Option = None; - - for line in raw.lines() { - let line = line.trim_end(); - let trimmed = line.trim_start(); - - if trimmed.is_empty() || trimmed.starts_with('#') { - continue; - } - - // YAML list item within a section - if let Some(item) = trimmed.strip_prefix("- ") { - match current_section { - Some("inputs") => { - // Flush previous input if we see a new "- name:" without closing the last one - if item.starts_with("name:") && current_input.is_some() { - fm.inputs - // TODO(veto-audit-2026-04-26): RF-6 expect — 解析状态机内部不变量,is_some 前置检查存在。 - .push(current_input.take().expect("current_input checked by is_some")); - } - if current_input.is_none() && item.starts_with("name:") { - current_input = Some(SkillInput::default()); - } - if let Some(ref mut inp) = current_input { - parse_input_field(item, inp); - } - } - Some("outputs") => { - if item.starts_with("name:") && current_output.is_some() { - fm.outputs.push( - current_output.take().expect("current_output checked by is_some"), - ); - } - if current_output.is_none() && item.starts_with("name:") { - current_output = Some(SkillOutput::default()); - } - if let Some(ref mut out) = current_output { - parse_output_field(item, out); - } - } - Some("dependencies") => { - if item.starts_with("id:") { - fm.dependencies.push(parse_dependency_item(item)); - } else if let Some(last) = fm.dependencies.last_mut() { - parse_dependency_field(item, last); - } - } - _ => { - // Top-level list (e.g. tags inline) - if let Some((key, _)) = line.split_once(':') { - let key = key.trim(); - if key == "tags" { - fm.tags.push(unquote(item).to_string()); - } - } - } - } - continue; - } - - // Flush any open input/output before moving to a new key - if current_input.is_some() && trimmed.starts_with("name:") { - fm.inputs.push(current_input.take().expect("current_input checked by is_some")); - current_input = Some(SkillInput::default()); - } - if current_output.is_some() && trimmed.starts_with("name:") { - fm.outputs - .push(current_output.take().expect("current_output checked by is_some")); - current_output = Some(SkillOutput::default()); - } - - if let Some((key, rest)) = trimmed.split_once(':') { - let key = key.trim(); - let rest = rest.trim(); - - // Section starters: inputs / outputs - if key == "inputs" { - current_section = Some("inputs"); - continue; - } - if key == "outputs" { - current_section = Some("outputs"); - continue; - } - if key == "dependencies" { - current_section = Some("dependencies"); - continue; - } - - // If we're inside an input/output block and this is NOT a top-level key, - // treat it as a nested field. - // CRITICAL: when current_input / current_output is active, ALL non-section - // keys must be treated as nested fields, even if they share a name with a - // top-level field (e.g. "type", "description"). - let is_section_starter = key == "inputs" || key == "outputs"; - - if !is_section_starter { - if let Some(ref mut inp) = current_input { - parse_input_field(trimmed, inp); - continue; - } - if let Some(ref mut out) = current_output { - parse_output_field(trimmed, out); - continue; - } - } - - // Top-level fields - match key { - "id" => fm.id = Some(unquote(rest).to_string()), - "name" => fm.name = Some(unquote(rest).to_string()), - "version" => fm.version = Some(unquote(rest).to_string()), - "description" => fm.description = Some(unquote(rest).to_string()), - "author" => fm.author = Some(unquote(rest).to_string()), - "entry_script" => fm.entry_script = Some(unquote(rest).to_string()), - "skill_type" | "type" => fm.skill_type = Some(unquote(rest).to_string()), - "tags" => { - current_section = None; - if rest.starts_with('[') && rest.ends_with(']') { - fm.tags = rest[1..rest.len() - 1] - .split(',') - .map(|s| unquote(s.trim()).to_string()) - .filter(|s| !s.is_empty()) - .collect(); - } else if !rest.is_empty() { - fm.tags = vec![unquote(rest).to_string()]; - } else { - current_section = Some("tags"); - } - } - _ => {} - } - } - } - - if let Some(inp) = current_input { - fm.inputs.push(inp); - } - if let Some(out) = current_output { - fm.outputs.push(out); - } - - fm -} - -fn parse_input_field(line: &str, inp: &mut SkillInput) { - if let Some((key, rest)) = line.split_once(':') { - let key = key.trim(); - let rest = rest.trim(); - match key { - "name" => inp.name = unquote(rest).to_string(), - "type" => inp.input_type = unquote(rest).to_string(), - "description" => inp.description = unquote(rest).to_string(), - "required" => inp.required = parse_bool(rest), - "default" => inp.default = Some(unquote(rest).to_string()), - _ => {} - } - } -} - -fn parse_output_field(line: &str, out: &mut SkillOutput) { - if let Some((key, rest)) = line.split_once(':') { - let key = key.trim(); - let rest = rest.trim(); - match key { - "name" => out.name = unquote(rest).to_string(), - "type" => out.output_type = unquote(rest).to_string(), - "description" => out.description = unquote(rest).to_string(), - _ => {} - } - } -} - -fn parse_dependency_item(item: &str) -> SkillDependency { - let mut dep = SkillDependency::default(); - if let Some((_, rest)) = item.split_once(':') { - dep.id = unquote(rest.trim()).to_string(); - } - dep -} - -fn parse_dependency_field(line: &str, dep: &mut SkillDependency) { - if let Some((key, rest)) = line.split_once(':') { - let key = key.trim(); - let rest = rest.trim(); - match key { - "version" => dep.version = Some(unquote(rest).to_string()), - "source" => dep.source = Some(unquote(rest).to_string()), - _ => {} - } - } -} - -fn parse_bool(s: &str) -> bool { - matches!(s.trim().to_lowercase().as_str(), "true" | "yes" | "1" | "on") -} - -fn unquote(s: &str) -> &str { - s.strip_prefix('"') - .and_then(|s| s.strip_suffix('"')) - .or_else(|| s.strip_prefix('\'').and_then(|s| s.strip_suffix('\''))) - .unwrap_or(s) -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/devbase-skill-runtime-types/Cargo.toml b/crates/devbase-skill-runtime-types/Cargo.toml index 0f860b5..1a1de9d 100644 --- a/crates/devbase-skill-runtime-types/Cargo.toml +++ b/crates/devbase-skill-runtime-types/Cargo.toml @@ -1,11 +1,11 @@ [package] name = "devbase-skill-runtime-types" version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "Skill runtime type definitions and discriminant enums" -license = "MIT" -repository = "https://github.com/juice094/devbase" [dependencies] chrono = { version = "0.4", features = ["serde"] } diff --git a/crates/devbase-skill-runtime-types/src/execution.rs b/crates/devbase-skill-runtime-types/src/execution.rs new file mode 100644 index 0000000..31e39b5 --- /dev/null +++ b/crates/devbase-skill-runtime-types/src/execution.rs @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +/// Result of a skill execution. +#[derive(Debug, Clone, serde::Serialize)] +pub struct ExecutionResult { + pub skill_id: String, + pub status: ExecutionStatus, + pub stdout: String, + pub stderr: String, + pub exit_code: Option, + pub duration_ms: u64, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)] +#[serde(rename_all = "lowercase")] +pub enum ExecutionStatus { + Pending, + Running, + Success, + Failed, + Timeout, +} + +impl ExecutionStatus { + pub fn as_str(&self) -> &'static str { + match self { + ExecutionStatus::Pending => "pending", + ExecutionStatus::Running => "running", + ExecutionStatus::Success => "success", + ExecutionStatus::Failed => "failed", + ExecutionStatus::Timeout => "timeout", + } + } +} + +impl std::str::FromStr for ExecutionStatus { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "pending" => Ok(ExecutionStatus::Pending), + "running" => Ok(ExecutionStatus::Running), + "success" => Ok(ExecutionStatus::Success), + "failed" => Ok(ExecutionStatus::Failed), + "timeout" => Ok(ExecutionStatus::Timeout), + _ => Err(anyhow::anyhow!("unknown execution status: {}", s)), + } + } +} diff --git a/crates/devbase-skill-runtime-types/src/lib.rs b/crates/devbase-skill-runtime-types/src/lib.rs index 39d18c2..66b88ac 100644 --- a/crates/devbase-skill-runtime-types/src/lib.rs +++ b/crates/devbase-skill-runtime-types/src/lib.rs @@ -1,67 +1,16 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 -use chrono::{DateTime, Utc}; - -/// Skill type discriminant. -#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] -#[serde(rename_all = "lowercase")] -pub enum SkillType { - /// Distributed with devbase; always available. - Builtin, - /// Installed by user from external source. - Custom, - /// Reserved for devbase-internal system utilities. - System, -} - -impl SkillType { - pub fn as_str(&self) -> &'static str { - match self { - SkillType::Builtin => "builtin", - SkillType::Custom => "custom", - SkillType::System => "system", - } - } -} -impl std::str::FromStr for SkillType { - type Err = anyhow::Error; +pub mod execution; +pub mod params; +pub mod skill_type; - fn from_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "builtin" => Ok(SkillType::Builtin), - "custom" => Ok(SkillType::Custom), - "system" => Ok(SkillType::System), - _ => Err(anyhow::anyhow!("unknown skill_type: {}", s)), - } - } -} - -/// A single input parameter declared in SKILL.md. -#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)] -pub struct SkillInput { - pub name: String, - pub input_type: String, - pub description: String, - pub required: bool, - pub default: Option, -} - -/// A single output parameter declared in SKILL.md. -#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)] -pub struct SkillOutput { - pub name: String, - pub output_type: String, - pub description: String, -} +// Re-export for backward compatibility +pub use execution::{ExecutionResult, ExecutionStatus}; +pub use params::{SkillDependency, SkillInput, SkillOutput}; +pub use skill_type::SkillType; -/// A dependency declared by a skill on another skill. -#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)] -pub struct SkillDependency { - pub id: String, - pub version: Option, - pub source: Option, -} +use chrono::{DateTime, Utc}; /// In-memory representation of a parsed SKILL.md + registry metadata. #[derive(Debug, Clone)] @@ -108,54 +57,6 @@ impl SkillMeta { } } -/// Result of a skill execution. -#[derive(Debug, Clone, serde::Serialize)] -pub struct ExecutionResult { - pub skill_id: String, - pub status: ExecutionStatus, - pub stdout: String, - pub stderr: String, - pub exit_code: Option, - pub duration_ms: u64, -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)] -#[serde(rename_all = "lowercase")] -pub enum ExecutionStatus { - Pending, - Running, - Success, - Failed, - Timeout, -} - -impl ExecutionStatus { - pub fn as_str(&self) -> &'static str { - match self { - ExecutionStatus::Pending => "pending", - ExecutionStatus::Running => "running", - ExecutionStatus::Success => "success", - ExecutionStatus::Failed => "failed", - ExecutionStatus::Timeout => "timeout", - } - } -} - -impl std::str::FromStr for ExecutionStatus { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - match s.to_lowercase().as_str() { - "pending" => Ok(ExecutionStatus::Pending), - "running" => Ok(ExecutionStatus::Running), - "success" => Ok(ExecutionStatus::Success), - "failed" => Ok(ExecutionStatus::Failed), - "timeout" => Ok(ExecutionStatus::Timeout), - _ => Err(anyhow::anyhow!("unknown execution status: {}", s)), - } - } -} - /// Lightweight row from the `skills` table (without body/embedding blob). #[derive(Debug, Clone, serde::Serialize)] pub struct SkillRow { diff --git a/crates/devbase-skill-runtime-types/src/params.rs b/crates/devbase-skill-runtime-types/src/params.rs new file mode 100644 index 0000000..4ccd29a --- /dev/null +++ b/crates/devbase-skill-runtime-types/src/params.rs @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +/// A single input parameter declared in SKILL.md. +#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)] +pub struct SkillInput { + pub name: String, + pub input_type: String, + pub description: String, + pub required: bool, + pub default: Option, +} + +/// A single output parameter declared in SKILL.md. +#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)] +pub struct SkillOutput { + pub name: String, + pub output_type: String, + pub description: String, +} + +/// A dependency declared by a skill on another skill. +#[derive(Debug, Clone, PartialEq, Eq, Default, serde::Serialize, serde::Deserialize)] +pub struct SkillDependency { + pub id: String, + pub version: Option, + pub source: Option, +} diff --git a/crates/devbase-skill-runtime-types/src/skill_type.rs b/crates/devbase-skill-runtime-types/src/skill_type.rs new file mode 100644 index 0000000..087e208 --- /dev/null +++ b/crates/devbase-skill-runtime-types/src/skill_type.rs @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +/// Skill type discriminant. +#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum SkillType { + /// Distributed with devbase; always available. + Builtin, + /// Installed by user from external source. + Custom, + /// Reserved for devbase-internal system utilities. + System, +} + +impl SkillType { + pub fn as_str(&self) -> &'static str { + match self { + SkillType::Builtin => "builtin", + SkillType::Custom => "custom", + SkillType::System => "system", + } + } +} + +impl std::str::FromStr for SkillType { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "builtin" => Ok(SkillType::Builtin), + "custom" => Ok(SkillType::Custom), + "system" => Ok(SkillType::System), + _ => Err(anyhow::anyhow!("unknown skill_type: {}", s)), + } + } +} diff --git a/crates/devbase-symbol-links/Cargo.toml b/crates/devbase-symbol-links/Cargo.toml index 690ea68..7b04d8f 100644 --- a/crates/devbase-symbol-links/Cargo.toml +++ b/crates/devbase-symbol-links/Cargo.toml @@ -1,10 +1,11 @@ [package] name = "devbase-symbol-links" version.workspace = true -edition = "2024" +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "Code symbol link generator: similar-signature and co-located relationship discovery" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -license = "MIT" keywords = ["code-analysis", "symbol-links", "rust", "knowledge-graph"] categories = ["development-tools"] diff --git a/crates/devbase-symbol-links/src/co_located.rs b/crates/devbase-symbol-links/src/co_located.rs new file mode 100644 index 0000000..c346f1e --- /dev/null +++ b/crates/devbase-symbol-links/src/co_located.rs @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use std::collections::HashMap; + +use crate::SymbolLink; + +/// Compute `co_located` links: functions defined in the same source file. +/// +/// Strength is fixed at 0.5 — co-location is a moderate signal. +pub fn compute_co_located_links( + conn: &rusqlite::Connection, + repo_id: &str, +) -> anyhow::Result> { + let mut stmt = conn.prepare( + "SELECT file_path, name FROM code_symbols + WHERE repo_id = ?1 AND symbol_type = 'function'", + )?; + let rows = + stmt.query_map([repo_id], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?; + + let mut by_file: HashMap> = HashMap::new(); + for row in rows { + let (path, name) = row?; + by_file.entry(path).or_default().push(name); + } + + let mut links = Vec::new(); + for (_path, names) in by_file { + if names.len() <= 1 { + continue; + } + for i in 0..names.len() { + for j in (i + 1)..names.len() { + links.push(SymbolLink { + source_repo: repo_id.to_string(), + source_symbol: names[i].clone(), + target_repo: repo_id.to_string(), + target_symbol: names[j].clone(), + link_type: "co_located".to_string(), + strength: 0.5, + }); + links.push(SymbolLink { + source_repo: repo_id.to_string(), + source_symbol: names[j].clone(), + target_repo: repo_id.to_string(), + target_symbol: names[i].clone(), + link_type: "co_located".to_string(), + strength: 0.5, + }); + } + } + } + Ok(links) +} diff --git a/crates/devbase-symbol-links/src/lib.rs b/crates/devbase-symbol-links/src/lib.rs index ad04c8b..a56d427 100644 --- a/crates/devbase-symbol-links/src/lib.rs +++ b/crates/devbase-symbol-links/src/lib.rs @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 //! devbase-symbol-links — Code symbol link generator. //! @@ -14,7 +14,8 @@ //! - co_located strength 固定 0.5: 同文件是中等信号,不区分文件大小。 //! - Tokenization 排除 Rust 关键字: 避免 `fn`/`pub`/`async` 等噪音影响相似度。 -use std::collections::{HashMap, HashSet}; +pub mod co_located; +pub mod similarity; /// A generated link between two symbols. #[derive(Debug, Clone, PartialEq)] @@ -27,116 +28,14 @@ pub struct SymbolLink { pub strength: f32, } -/// Compute `similar_signature` links within a repo. -/// -/// Links symbols whose signatures share >= `threshold` Jaccard similarity -/// of token sets. Default threshold: 0.3 (30% token overlap). -pub fn compute_similar_signature_links( - conn: &rusqlite::Connection, - repo_id: &str, - threshold: f32, -) -> anyhow::Result> { - let mut stmt = conn.prepare( - "SELECT name, signature FROM code_symbols - WHERE repo_id = ?1 AND symbol_type = 'function' AND signature IS NOT NULL", - )?; - let rows = - stmt.query_map([repo_id], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?; - - let mut symbols: Vec<(String, HashSet)> = Vec::new(); - for row in rows { - let (name, sig) = row?; - let tokens = tokenize_signature(&sig); - if !tokens.is_empty() { - symbols.push((name, tokens)); - } - } - - let mut links = Vec::new(); - for i in 0..symbols.len() { - for j in (i + 1)..symbols.len() { - let jaccard = jaccard_similarity(&symbols[i].1, &symbols[j].1); - if jaccard >= threshold { - // Bidirectional link - links.push(SymbolLink { - source_repo: repo_id.to_string(), - source_symbol: symbols[i].0.clone(), - target_repo: repo_id.to_string(), - target_symbol: symbols[j].0.clone(), - link_type: "similar_signature".to_string(), - strength: jaccard, - }); - links.push(SymbolLink { - source_repo: repo_id.to_string(), - source_symbol: symbols[j].0.clone(), - target_repo: repo_id.to_string(), - target_symbol: symbols[i].0.clone(), - link_type: "similar_signature".to_string(), - strength: jaccard, - }); - } - } - } - Ok(links) -} - -/// Compute `co_located` links: functions defined in the same source file. -/// -/// Strength is fixed at 0.5 — co-location is a moderate signal. -pub fn compute_co_located_links( - conn: &rusqlite::Connection, - repo_id: &str, -) -> anyhow::Result> { - let mut stmt = conn.prepare( - "SELECT file_path, name FROM code_symbols - WHERE repo_id = ?1 AND symbol_type = 'function'", - )?; - let rows = - stmt.query_map([repo_id], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?; - - let mut by_file: HashMap> = HashMap::new(); - for row in rows { - let (path, name) = row?; - by_file.entry(path).or_default().push(name); - } - - let mut links = Vec::new(); - for (_path, names) in by_file { - if names.len() <= 1 { - continue; - } - for i in 0..names.len() { - for j in (i + 1)..names.len() { - links.push(SymbolLink { - source_repo: repo_id.to_string(), - source_symbol: names[i].clone(), - target_repo: repo_id.to_string(), - target_symbol: names[j].clone(), - link_type: "co_located".to_string(), - strength: 0.5, - }); - links.push(SymbolLink { - source_repo: repo_id.to_string(), - source_symbol: names[j].clone(), - target_repo: repo_id.to_string(), - target_symbol: names[i].clone(), - link_type: "co_located".to_string(), - strength: 0.5, - }); - } - } - } - Ok(links) -} - /// Build all link types for a repo and persist to `code_symbol_links`. pub fn generate_and_save_links( conn: &mut rusqlite::Connection, repo_id: &str, ) -> anyhow::Result { let mut all_links = Vec::new(); - all_links.extend(compute_similar_signature_links(conn, repo_id, 0.3)?); - all_links.extend(compute_co_located_links(conn, repo_id)?); + all_links.extend(similarity::compute_similar_signature_links(conn, repo_id, 0.3)?); + all_links.extend(co_located::compute_co_located_links(conn, repo_id)?); if all_links.is_empty() { return Ok(0); @@ -168,63 +67,10 @@ pub fn generate_and_save_links( Ok(inserted) } -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -fn tokenize_signature(sig: &str) -> HashSet { - sig.split(|c: char| !c.is_alphanumeric() && c != '_') - .map(|s| s.to_lowercase()) - .filter(|s| s.len() > 1 && !is_common_keyword(s) && !s.chars().all(|c| c.is_numeric())) - .collect() -} - -fn is_common_keyword(s: &str) -> bool { - const KEYWORDS: &[&str] = &[ - "fn", "pub", "async", "mut", "let", "const", "static", "use", "impl", "where", "return", - "self", "true", "false", "if", "else", "for", "while", "loop", "match", "in", "ref", - "move", "type", "crate", "super", "dyn", "trait", "enum", "struct", "mod", "unsafe", - "extern", "as", "break", "continue", "yield", "await", "box", - ]; - KEYWORDS.contains(&s) -} - -fn jaccard_similarity(a: &HashSet, b: &HashSet) -> f32 { - if a.is_empty() && b.is_empty() { - return 1.0; - } - let intersection = a.intersection(b).count(); - let union = a.union(b).count(); - if union == 0 { - return 0.0; - } - intersection as f32 / union as f32 -} - #[cfg(test)] mod tests { use super::*; - #[test] - fn test_tokenize_signature() { - let tokens = tokenize_signature("pub fn authenticate(token: &str) -> Result"); - assert!(tokens.contains("authenticate")); - assert!(tokens.contains("token")); - assert!(tokens.contains("str")); - assert!(tokens.contains("result")); - assert!(tokens.contains("user")); - assert!(!tokens.contains("fn")); - assert!(!tokens.contains("pub")); - } - - #[test] - fn test_jaccard_similarity() { - let a: HashSet = ["a".into(), "b".into(), "c".into()].into_iter().collect(); - let b: HashSet = ["b".into(), "c".into(), "d".into()].into_iter().collect(); - // intersection = 2, union = 4 - assert!((jaccard_similarity(&a, &b) - 0.5).abs() < 1e-6); - } - #[test] fn test_compute_co_located_links() { let conn = rusqlite::Connection::open_in_memory().unwrap(); @@ -246,7 +92,7 @@ mod tests { ) .unwrap(); - let links = compute_co_located_links(&conn, "r1").unwrap(); + let links = co_located::compute_co_located_links(&conn, "r1").unwrap(); // lib.rs has foo+bar => 2 bidirectional links assert_eq!(links.len(), 2); // main.rs has only main => no links @@ -274,7 +120,7 @@ mod tests { ) .unwrap(); - let links = compute_similar_signature_links(&conn, "r1", 0.3).unwrap(); + let links = similarity::compute_similar_signature_links(&conn, "r1", 0.3).unwrap(); // auth_token and validate_token share token, str, timeout => should link assert!(!links.is_empty()); let has_auth_validate = links.iter().any(|l| { diff --git a/crates/devbase-symbol-links/src/similarity.rs b/crates/devbase-symbol-links/src/similarity.rs new file mode 100644 index 0000000..42704f7 --- /dev/null +++ b/crates/devbase-symbol-links/src/similarity.rs @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use std::collections::HashSet; + +use crate::SymbolLink; + +/// Compute `similar_signature` links within a repo. +/// +/// Links symbols whose signatures share >= `threshold` Jaccard similarity +/// of token sets. Default threshold: 0.3 (30% token overlap). +pub fn compute_similar_signature_links( + conn: &rusqlite::Connection, + repo_id: &str, + threshold: f32, +) -> anyhow::Result> { + let mut stmt = conn.prepare( + "SELECT name, signature FROM code_symbols + WHERE repo_id = ?1 AND symbol_type = 'function' AND signature IS NOT NULL", + )?; + let rows = + stmt.query_map([repo_id], |row| Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?)))?; + + let mut symbols: Vec<(String, HashSet)> = Vec::new(); + for row in rows { + let (name, sig) = row?; + let tokens = tokenize_signature(&sig); + if !tokens.is_empty() { + symbols.push((name, tokens)); + } + } + + let mut links = Vec::new(); + for i in 0..symbols.len() { + for j in (i + 1)..symbols.len() { + let jaccard = jaccard_similarity(&symbols[i].1, &symbols[j].1); + if jaccard >= threshold { + // Bidirectional link + links.push(SymbolLink { + source_repo: repo_id.to_string(), + source_symbol: symbols[i].0.clone(), + target_repo: repo_id.to_string(), + target_symbol: symbols[j].0.clone(), + link_type: "similar_signature".to_string(), + strength: jaccard, + }); + links.push(SymbolLink { + source_repo: repo_id.to_string(), + source_symbol: symbols[j].0.clone(), + target_repo: repo_id.to_string(), + target_symbol: symbols[i].0.clone(), + link_type: "similar_signature".to_string(), + strength: jaccard, + }); + } + } + } + Ok(links) +} + +fn tokenize_signature(sig: &str) -> HashSet { + sig.split(|c: char| !c.is_alphanumeric() && c != '_') + .map(|s| s.to_lowercase()) + .filter(|s| s.len() > 1 && !is_common_keyword(s) && !s.chars().all(|c| c.is_numeric())) + .collect() +} + +fn is_common_keyword(s: &str) -> bool { + const KEYWORDS: &[&str] = &[ + "fn", "pub", "async", "mut", "let", "const", "static", "use", "impl", "where", "return", + "self", "true", "false", "if", "else", "for", "while", "loop", "match", "in", "ref", + "move", "type", "crate", "super", "dyn", "trait", "enum", "struct", "mod", "unsafe", + "extern", "as", "break", "continue", "yield", "await", "box", + ]; + KEYWORDS.contains(&s) +} + +fn jaccard_similarity(a: &HashSet, b: &HashSet) -> f32 { + if a.is_empty() && b.is_empty() { + return 1.0; + } + let intersection = a.intersection(b).count(); + let union = a.union(b).count(); + if union == 0 { + return 0.0; + } + intersection as f32 / union as f32 +} diff --git a/crates/devbase-sync-protocol/Cargo.toml b/crates/devbase-sync-protocol/Cargo.toml index a1855eb..0b58b22 100644 --- a/crates/devbase-sync-protocol/Cargo.toml +++ b/crates/devbase-sync-protocol/Cargo.toml @@ -1,10 +1,11 @@ [package] name = "devbase-sync-protocol" version.workspace = true -edition = "2024" +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "Lightweight directory sync protocol with version vectors" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -license = "MIT" keywords = ["sync", "version-vector", "directory-scan", "rust"] categories = ["filesystem"] diff --git a/crates/devbase-sync-protocol/src/index.rs b/crates/devbase-sync-protocol/src/index.rs new file mode 100644 index 0000000..a85d2bc --- /dev/null +++ b/crates/devbase-sync-protocol/src/index.rs @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use crate::version_vector::VersionVector; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FileInfo { + pub name: String, + pub size: u64, + pub mod_time: DateTime, + pub version: VersionVector, + pub blocks_hash: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SyncIndex { + pub path: PathBuf, + pub files: Vec, +} diff --git a/crates/devbase-sync-protocol/src/lib.rs b/crates/devbase-sync-protocol/src/lib.rs index a4e76a2..a6d043d 100644 --- a/crates/devbase-sync-protocol/src/lib.rs +++ b/crates/devbase-sync-protocol/src/lib.rs @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 //! devbase-sync-protocol — Lightweight directory sync protocol with version vectors. //! @@ -14,114 +14,17 @@ //! - VersionVector 而非 Lamport clocks: 支持多设备并发写入的偏序比较。 //! - 跳过 `.git` 目录: 避免索引版本控制元数据。 +pub mod index; +pub mod version_vector; + +// Re-export for backward compatibility +pub use index::{FileInfo, SyncIndex}; +pub use version_vector::{Counter, VersionVector}; + use anyhow::Context; -use chrono::{DateTime, Utc}; -use serde::{Deserialize, Serialize}; -use std::cmp::Ordering; use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; -use std::path::{Path, PathBuf}; - -#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub struct Counter { - pub id: u64, - pub value: u64, -} - -#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] -pub struct VersionVector { - pub counters: Vec, -} - -impl VersionVector { - /// Increment the counter for `local_id`, creating it if absent. - #[allow(dead_code)] - pub fn update(mut self, local_id: u64) -> Self { - for c in &mut self.counters { - if c.id == local_id { - c.value += 1; - return self; - } - } - self.counters.push(Counter { id: local_id, value: 1 }); - self - } - - /// Merge with another vector, taking the maximum value for each id. - #[allow(dead_code)] - pub fn merge(mut self, other: &VersionVector) -> Self { - for o in &other.counters { - let mut found = false; - for c in &mut self.counters { - if c.id == o.id { - c.value = c.value.max(o.value); - found = true; - break; - } - } - if !found { - self.counters.push(o.clone()); - } - } - self - } - - /// Compare two version vectors. - /// - /// - Greater => self dominates other (all >= and at least one >) - /// - Less => other dominates self - /// - Equal => identical or concurrent conflict (incomparable) - #[allow(dead_code)] - pub fn compare(&self, other: &VersionVector) -> Ordering { - let mut self_map = std::collections::HashMap::new(); - for c in &self.counters { - self_map.insert(c.id, c.value); - } - let mut other_map = std::collections::HashMap::new(); - for c in &other.counters { - other_map.insert(c.id, c.value); - } - - let all_ids: std::collections::HashSet = - self_map.keys().chain(other_map.keys()).copied().collect(); - - let mut has_greater = false; - let mut has_less = false; - for id in all_ids { - let sv = self_map.get(&id).copied().unwrap_or(0); - let ov = other_map.get(&id).copied().unwrap_or(0); - if sv > ov { - has_greater = true; - } else if sv < ov { - has_less = true; - } - } - - match (has_greater, has_less) { - (true, false) => Ordering::Greater, - (false, true) => Ordering::Less, - _ => { - // Both false => equal; both true => conflict => Equal per spec - Ordering::Equal - } - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct FileInfo { - pub name: String, - pub size: u64, - pub mod_time: DateTime, - pub version: VersionVector, - pub blocks_hash: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SyncIndex { - pub path: PathBuf, - pub files: Vec, -} +use std::path::Path; /// Lightweight directory scanner inspired by Syncthing's local index. pub fn scan_directory(path: &Path) -> anyhow::Result { @@ -145,7 +48,7 @@ pub fn scan_directory(path: &Path) -> anyhow::Result { .with_context(|| format!("failed to read metadata for {:?}", entry.path()))?; let size = meta.len(); let mod_time = meta.modified().unwrap_or(std::time::SystemTime::UNIX_EPOCH); - let mod_time: DateTime = mod_time.into(); + let mod_time: chrono::DateTime = mod_time.into(); // Compute a lightweight hash: SHA256 of file content would be ideal, // but for a light abstraction we hash the (size, mod_time, path) tuple. @@ -231,21 +134,21 @@ mod tests { fn test_version_vector_compare_equal() { let a = VersionVector::default().update(1); let b = VersionVector::default().update(1); - assert_eq!(a.compare(&b), Ordering::Equal); + assert_eq!(a.compare(&b), std::cmp::Ordering::Equal); } #[test] fn test_version_vector_compare_greater() { let a = VersionVector::default().update(1).update(1); let b = VersionVector::default().update(1); - assert_eq!(a.compare(&b), Ordering::Greater); + assert_eq!(a.compare(&b), std::cmp::Ordering::Greater); } #[test] fn test_version_vector_compare_less() { let a = VersionVector::default().update(1); let b = VersionVector::default().update(1).update(1); - assert_eq!(a.compare(&b), Ordering::Less); + assert_eq!(a.compare(&b), std::cmp::Ordering::Less); } #[test] @@ -253,15 +156,15 @@ mod tests { // Concurrent: a has higher id=1, b has higher id=2 let a = VersionVector::default().update(1).update(1); let b = VersionVector::default().update(2).update(2); - assert_eq!(a.compare(&b), Ordering::Equal); + assert_eq!(a.compare(&b), std::cmp::Ordering::Equal); } #[test] fn test_version_vector_compare_empty() { let a = VersionVector::default(); let b = VersionVector::default().update(1); - assert_eq!(a.compare(&b), Ordering::Less); - assert_eq!(b.compare(&a), Ordering::Greater); + assert_eq!(a.compare(&b), std::cmp::Ordering::Less); + assert_eq!(b.compare(&a), std::cmp::Ordering::Greater); } #[test] diff --git a/crates/devbase-sync-protocol/src/version_vector.rs b/crates/devbase-sync-protocol/src/version_vector.rs new file mode 100644 index 0000000..78ca64b --- /dev/null +++ b/crates/devbase-sync-protocol/src/version_vector.rs @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use serde::{Deserialize, Serialize}; +use std::cmp::Ordering; + +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +pub struct Counter { + pub id: u64, + pub value: u64, +} + +#[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] +pub struct VersionVector { + pub counters: Vec, +} + +impl VersionVector { + /// Increment the counter for `local_id`, creating it if absent. + #[allow(dead_code)] + pub fn update(mut self, local_id: u64) -> Self { + for c in &mut self.counters { + if c.id == local_id { + c.value += 1; + return self; + } + } + self.counters.push(Counter { id: local_id, value: 1 }); + self + } + + /// Merge with another vector, taking the maximum value for each id. + #[allow(dead_code)] + pub fn merge(mut self, other: &VersionVector) -> Self { + for o in &other.counters { + let mut found = false; + for c in &mut self.counters { + if c.id == o.id { + c.value = c.value.max(o.value); + found = true; + break; + } + } + if !found { + self.counters.push(o.clone()); + } + } + self + } + + /// Compare two version vectors. + /// + /// - Greater => self dominates other (all >= and at least one >) + /// - Less => other dominates self + /// - Equal => identical or concurrent conflict (incomparable) + #[allow(dead_code)] + pub fn compare(&self, other: &VersionVector) -> Ordering { + let mut self_map = std::collections::HashMap::new(); + for c in &self.counters { + self_map.insert(c.id, c.value); + } + let mut other_map = std::collections::HashMap::new(); + for c in &other.counters { + other_map.insert(c.id, c.value); + } + + let all_ids: std::collections::HashSet = + self_map.keys().chain(other_map.keys()).copied().collect(); + + let mut has_greater = false; + let mut has_less = false; + for id in all_ids { + let sv = self_map.get(&id).copied().unwrap_or(0); + let ov = other_map.get(&id).copied().unwrap_or(0); + if sv > ov { + has_greater = true; + } else if sv < ov { + has_less = true; + } + } + + match (has_greater, has_less) { + (true, false) => Ordering::Greater, + (false, true) => Ordering::Less, + _ => { + // Both false => equal; both true => conflict => Equal per spec + Ordering::Equal + } + } + } +} diff --git a/crates/devbase-syncthing-client/Cargo.toml b/crates/devbase-syncthing-client/Cargo.toml index a23b5b9..9151e13 100644 --- a/crates/devbase-syncthing-client/Cargo.toml +++ b/crates/devbase-syncthing-client/Cargo.toml @@ -1,10 +1,11 @@ [package] name = "devbase-syncthing-client" version.workspace = true -edition = "2024" +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "Syncthing REST API client for folder management" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -license = "MIT" keywords = ["syncthing", "p2p", "sync", "rest-client"] categories = ["network-programming"] diff --git a/crates/devbase-vault-frontmatter/Cargo.toml b/crates/devbase-vault-frontmatter/Cargo.toml index 62db491..f748eaa 100644 --- a/crates/devbase-vault-frontmatter/Cargo.toml +++ b/crates/devbase-vault-frontmatter/Cargo.toml @@ -1,10 +1,11 @@ [package] name = "devbase-vault-frontmatter" version.workspace = true -edition = "2024" +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "Markdown frontmatter parser for vault notes" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -license = "MIT" keywords = ["markdown", "frontmatter", "vault", "yaml"] categories = ["text-processing"] diff --git a/crates/devbase-vault-frontmatter/src/lib.rs b/crates/devbase-vault-frontmatter/src/lib.rs index c3e62d9..93a13c0 100644 --- a/crates/devbase-vault-frontmatter/src/lib.rs +++ b/crates/devbase-vault-frontmatter/src/lib.rs @@ -16,6 +16,8 @@ use std::collections::HashMap; +mod parser; + /// Parsed frontmatter from a Markdown vault note. #[derive(Debug, Clone, Default, PartialEq)] pub struct Frontmatter { @@ -47,107 +49,10 @@ pub fn extract_frontmatter(content: &str) -> Option<(Frontmatter, usize)> { let raw = after_open[..close_pos].trim(); let body_offset = trimmed.as_ptr() as usize - content.as_ptr() as usize + 3 + close_pos + 4; - let fm = parse_yaml_frontmatter(raw); + let fm = parser::parse_yaml_frontmatter(raw); Some((fm, body_offset)) } -fn parse_yaml_frontmatter(raw: &str) -> Frontmatter { - let mut fm = Frontmatter { - raw: raw.to_string(), - ..Default::default() - }; - - // Lightweight YAML parsing: only handle key: value and key: [list] patterns. - for line in raw.lines() { - let line = line.trim(); - if line.is_empty() || line.starts_with('#') { - continue; - } - - if let Some((key, rest)) = line.split_once(':') { - let key = key.trim(); - let rest = rest.trim(); - - match key { - "id" => { - fm.id = Some(unquote(rest).to_string()); - } - "title" => { - fm.title = Some(unquote(rest).to_string()); - } - "repo" => { - fm.repo = Some(unquote(rest).to_string()); - } - "date" => { - fm.date = Some(unquote(rest).to_string()); - } - "created" => { - fm.created = Some(unquote(rest).to_string()); - } - "updated" => { - fm.updated = Some(unquote(rest).to_string()); - } - "ai_context" => { - fm.ai_context = Some(parse_bool(rest)); - } - "tags" => { - fm.tags = parse_yaml_list(rest, raw, line); - } - "aliases" => { - fm.aliases = parse_yaml_list(rest, raw, line); - } - _ => { - fm.extra.insert(key.to_string(), unquote(rest).to_string()); - } - } - } - } - - fm -} - -fn parse_bool(s: &str) -> bool { - matches!(s.trim().to_lowercase().as_str(), "true" | "yes" | "1" | "on") -} - -fn unquote(s: &str) -> &str { - s.strip_prefix('"') - .and_then(|s| s.strip_suffix('"')) - .or_else(|| s.strip_prefix('\'').and_then(|s| s.strip_suffix('\''))) - .unwrap_or(s) -} - -fn parse_yaml_list<'a>(rest: &'a str, raw: &'a str, line: &'a str) -> Vec { - if rest.starts_with('[') && rest.ends_with(']') { - rest[1..rest.len() - 1] - .split(',') - .map(|s| unquote(s.trim()).to_string()) - .filter(|s| !s.is_empty()) - .collect() - } else if rest.is_empty() { - // Multi-line list starting on next lines: "- item" - let mut items = Vec::new(); - let mut in_list = false; - for l in raw.lines() { - if l.trim() == line.trim() { - in_list = true; - continue; - } - if in_list { - let tl = l.trim_start(); - if let Some(stripped) = tl.strip_prefix("- ") { - items.push(unquote(stripped).to_string()); - } else if !tl.is_empty() && !tl.starts_with('#') { - break; - } - } - } - items - } else { - vec![unquote(rest).to_string()] - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/devbase-vault-frontmatter/src/parser.rs b/crates/devbase-vault-frontmatter/src/parser.rs new file mode 100644 index 0000000..53d0088 --- /dev/null +++ b/crates/devbase-vault-frontmatter/src/parser.rs @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 + +use crate::Frontmatter; + +pub fn parse_yaml_frontmatter(raw: &str) -> Frontmatter { + let mut fm = Frontmatter { + raw: raw.to_string(), + ..Default::default() + }; + + for line in raw.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with('#') { + continue; + } + + if let Some((key, rest)) = line.split_once(':') { + let key = key.trim(); + let rest = rest.trim(); + + match key { + "id" => { + fm.id = Some(unquote(rest).to_string()); + } + "title" => { + fm.title = Some(unquote(rest).to_string()); + } + "repo" => { + fm.repo = Some(unquote(rest).to_string()); + } + "date" => { + fm.date = Some(unquote(rest).to_string()); + } + "created" => { + fm.created = Some(unquote(rest).to_string()); + } + "updated" => { + fm.updated = Some(unquote(rest).to_string()); + } + "ai_context" => { + fm.ai_context = Some(parse_bool(rest)); + } + "tags" => { + fm.tags = parse_yaml_list(rest, raw, line); + } + "aliases" => { + fm.aliases = parse_yaml_list(rest, raw, line); + } + _ => { + fm.extra.insert(key.to_string(), unquote(rest).to_string()); + } + } + } + } + + fm +} + +pub fn parse_bool(s: &str) -> bool { + matches!(s.trim().to_lowercase().as_str(), "true" | "yes" | "1" | "on") +} + +pub fn unquote(s: &str) -> &str { + s.strip_prefix('"') + .and_then(|s| s.strip_suffix('"')) + .or_else(|| s.strip_prefix('\'').and_then(|s| s.strip_suffix('\''))) + .unwrap_or(s) +} + +pub fn parse_yaml_list<'a>(rest: &'a str, raw: &'a str, line: &'a str) -> Vec { + if rest.starts_with('[') && rest.ends_with(']') { + rest[1..rest.len() - 1] + .split(',') + .map(|s| unquote(s.trim()).to_string()) + .filter(|s| !s.is_empty()) + .collect() + } else if rest.is_empty() { + let mut items = Vec::new(); + let mut in_list = false; + for l in raw.lines() { + if l.trim() == line.trim() { + in_list = true; + continue; + } + if in_list { + let tl = l.trim_start(); + if let Some(stripped) = tl.strip_prefix("- ") { + items.push(unquote(stripped).to_string()); + } else if !tl.is_empty() && !tl.starts_with('#') { + break; + } + } + } + items + } else { + vec![unquote(rest).to_string()] + } +} diff --git a/crates/devbase-vault-wikilink/Cargo.toml b/crates/devbase-vault-wikilink/Cargo.toml index 8631ae3..a2cb0be 100644 --- a/crates/devbase-vault-wikilink/Cargo.toml +++ b/crates/devbase-vault-wikilink/Cargo.toml @@ -1,10 +1,11 @@ [package] name = "devbase-vault-wikilink" version.workspace = true -edition = "2024" +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "WikiLink parser and resolver for Markdown vaults" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] -license = "MIT" keywords = ["markdown", "wikilink", "vault", "obsidian"] categories = ["text-processing"] diff --git a/crates/devbase-vault-wikilink/src/lib.rs b/crates/devbase-vault-wikilink/src/lib.rs index fdd163f..0490f7b 100644 --- a/crates/devbase-vault-wikilink/src/lib.rs +++ b/crates/devbase-vault-wikilink/src/lib.rs @@ -14,6 +14,8 @@ //! - 别名分隔符为 `|`: 与 Obsidian 一致。 //! - 返回字符级位置 (start/end): 便于高亮和索引定位。 +mod parser; + /// A single WikiLink found in a Markdown document. #[derive(Debug, Clone, PartialEq)] pub struct WikiLink { @@ -44,7 +46,7 @@ pub fn extract_wikilinks(content: &str) -> Vec { if depth == 0 { let inner = &chars[inner_start..i]; let inner_str: String = inner.iter().collect(); - let link = parse_link(&inner_str, start, i + 2); + let link = parser::parse_link(&inner_str, start, i + 2); links.push(link); i += 2; break; @@ -64,33 +66,6 @@ pub fn extract_wikilinks(content: &str) -> Vec { links } -fn parse_link(inner: &str, start: usize, end: usize) -> WikiLink { - // Step 1: split display text by `|` - let (left, display) = if let Some(pipe_pos) = inner.find('|') { - (inner[..pipe_pos].trim(), Some(inner[pipe_pos + 1..].trim().to_string())) - } else { - (inner.trim(), None) - }; - - // Step 2: split anchor by `#` (heading or ^block-id) - let (target, anchor) = if let Some(hash_pos) = left.find('#') { - ( - left[..hash_pos].trim().to_string(), - Some(left[hash_pos + 1..].trim().to_string()), - ) - } else { - (left.to_string(), None) - }; - - WikiLink { - target, - display, - anchor, - start, - end, - } -} - /// Build a backlink index: for each target, list the source note IDs that link to it. pub fn build_backlink_index<'a>( notes: impl Iterator, diff --git a/crates/devbase-vault-wikilink/src/parser.rs b/crates/devbase-vault-wikilink/src/parser.rs new file mode 100644 index 0000000..3a00140 --- /dev/null +++ b/crates/devbase-vault-wikilink/src/parser.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 + +use crate::WikiLink; + +pub fn parse_link(inner: &str, start: usize, end: usize) -> WikiLink { + let (left, display) = if let Some(pipe_pos) = inner.find('|') { + (inner[..pipe_pos].trim(), Some(inner[pipe_pos + 1..].trim().to_string())) + } else { + (inner.trim(), None) + }; + + let (target, anchor) = if let Some(hash_pos) = left.find('#') { + ( + left[..hash_pos].trim().to_string(), + Some(left[hash_pos + 1..].trim().to_string()), + ) + } else { + (left.to_string(), None) + }; + + WikiLink { + target, + display, + anchor, + start, + end, + } +} diff --git a/crates/devbase-workflow-interpolate/Cargo.toml b/crates/devbase-workflow-interpolate/Cargo.toml index c5770e5..94f7a7b 100644 --- a/crates/devbase-workflow-interpolate/Cargo.toml +++ b/crates/devbase-workflow-interpolate/Cargo.toml @@ -1,9 +1,11 @@ [package] name = "devbase-workflow-interpolate" version.workspace = true -edition = "2024" +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "Workflow variable interpolation for devbase" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] [dependencies] anyhow = "1" diff --git a/crates/devbase-workflow-interpolate/src/lib.rs b/crates/devbase-workflow-interpolate/src/lib.rs index f7127db..790b987 100644 --- a/crates/devbase-workflow-interpolate/src/lib.rs +++ b/crates/devbase-workflow-interpolate/src/lib.rs @@ -1,15 +1,10 @@ // SPDX-License-Identifier: MIT // Copyright (c) 2026 juice094 -use regex::Regex; + use serde_json::Value; use std::collections::HashMap; -use std::sync::OnceLock; - -static VAR_RE: OnceLock = OnceLock::new(); -fn var_regex() -> &'static Regex { - VAR_RE.get_or_init(|| Regex::new(r"\$\{([^}]+)\}").expect("static regex is valid")) -} +mod resolver; /// Interpolate variables in a string using the provided context. /// @@ -19,55 +14,17 @@ fn var_regex() -> &'static Regex { /// ${env.} → environment variables /// ${config.} → devbase config (not implemented yet) pub fn interpolate(template: &str, ctx: &InterpolationContext) -> anyhow::Result { - let re = var_regex(); + let re = resolver::var_regex(); let mut result = template.to_string(); for cap in re.captures_iter(template) { let full = cap.get(0).expect("capture group 0 always exists").as_str(); let path = cap.get(1).expect("capture group 1 exists for matched pattern").as_str(); - let value = resolve(path, ctx)?; + let value = resolver::resolve(path, ctx)?; result = result.replace(full, &value); } Ok(result) } -fn resolve(path: &str, ctx: &InterpolationContext) -> anyhow::Result { - let parts: Vec<&str> = path.split('.').collect(); - match parts.as_slice() { - ["inputs", name] => ctx - .inputs - .get(*name) - .cloned() - .ok_or_else(|| anyhow::anyhow!("missing input: {name}")), - ["steps", step_id, "outputs", out_name] => ctx - .step_outputs - .get(*step_id) - .and_then(|m| m.get(*out_name)) - .map(json_to_string) - .ok_or_else(|| anyhow::anyhow!("missing output {out_name} for step {step_id}")), - ["env", name] => { - std::env::var(*name).map_err(|_| anyhow::anyhow!("missing env var: {name}")) - } - ["loop", "item"] => ctx - .loop_vars - .get("item") - .cloned() - .ok_or_else(|| anyhow::anyhow!("loop item not set")), - ["loop", "index"] => ctx - .loop_vars - .get("index") - .cloned() - .ok_or_else(|| anyhow::anyhow!("loop index not set")), - _ => Err(anyhow::anyhow!("unsupported variable path: {path}")), - } -} - -fn json_to_string(v: &Value) -> String { - match v { - Value::String(s) => s.clone(), - _ => v.to_string(), - } -} - /// Interpolate a serde_yaml::Value recursively. pub fn interpolate_value( value: &serde_yaml::Value, diff --git a/crates/devbase-workflow-interpolate/src/resolver.rs b/crates/devbase-workflow-interpolate/src/resolver.rs new file mode 100644 index 0000000..42e96b8 --- /dev/null +++ b/crates/devbase-workflow-interpolate/src/resolver.rs @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 + +use regex::Regex; +use serde_json::Value; +use std::sync::OnceLock; + +use crate::InterpolationContext; + +static VAR_RE: OnceLock = OnceLock::new(); + +pub fn var_regex() -> &'static Regex { + VAR_RE.get_or_init(|| { + if let Ok(re) = Regex::new(r"\$\{([^}]+)\}") { + return re; + } + // All branches below are unreachable: the primary pattern is a + // compile-time constant known to be valid. + if let Ok(re) = Regex::new(".*") { + return re; + } + // Regex engine is fundamentally broken — abort cleanly. + std::process::abort() + }) +} + +pub fn resolve(path: &str, ctx: &InterpolationContext) -> anyhow::Result { + let parts: Vec<&str> = path.split('.').collect(); + match parts.as_slice() { + ["inputs", name] => ctx + .inputs + .get(*name) + .cloned() + .ok_or_else(|| anyhow::anyhow!("missing input: {name}")), + ["steps", step_id, "outputs", out_name] => ctx + .step_outputs + .get(*step_id) + .and_then(|m| m.get(*out_name)) + .map(json_to_string) + .ok_or_else(|| anyhow::anyhow!("missing output {out_name} for step {step_id}")), + ["env", name] => { + std::env::var(*name).map_err(|_| anyhow::anyhow!("missing env var: {name}")) + } + ["loop", "item"] => ctx + .loop_vars + .get("item") + .cloned() + .ok_or_else(|| anyhow::anyhow!("loop item not set")), + ["loop", "index"] => ctx + .loop_vars + .get("index") + .cloned() + .ok_or_else(|| anyhow::anyhow!("loop index not set")), + _ => Err(anyhow::anyhow!("unsupported variable path: {path}")), + } +} + +pub fn json_to_string(v: &Value) -> String { + match v { + Value::String(s) => s.clone(), + _ => v.to_string(), + } +} diff --git a/crates/devbase-workflow-model/Cargo.toml b/crates/devbase-workflow-model/Cargo.toml index 69c18d7..f0b9af5 100644 --- a/crates/devbase-workflow-model/Cargo.toml +++ b/crates/devbase-workflow-model/Cargo.toml @@ -1,11 +1,11 @@ [package] name = "devbase-workflow-model" version.workspace = true -edition = "2024" -authors = ["juice094 <160722440+juice094@users.noreply.github.com>"] +edition.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true description = "Workflow definition types for the YAML-based workflow engine" -license = "MIT" -repository = "https://github.com/juice094/devbase" [dependencies] serde = { version = "1", features = ["derive"] } diff --git a/crates/devbase-workflow-model/src/definition.rs b/crates/devbase-workflow-model/src/definition.rs new file mode 100644 index 0000000..3475486 --- /dev/null +++ b/crates/devbase-workflow-model/src/definition.rs @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +use crate::step_type::StepType; + +/// A workflow definition parsed from YAML. +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] +pub struct WorkflowDefinition { + pub id: String, + pub name: String, + pub version: String, + pub description: Option, + #[serde(default)] + pub inputs: Vec, + #[serde(default)] + pub outputs: Vec, + pub steps: Vec, + #[serde(default, rename = "output_mapping")] + pub output_mapping: HashMap, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] +pub struct WorkflowInputDef { + pub name: String, + #[serde(rename = "type", default = "default_string_type")] + pub input_type: String, + #[serde(default)] + pub required: bool, + #[serde(default)] + pub default: Option, + #[serde(default)] + pub description: Option, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] +pub struct WorkflowOutputDef { + pub name: String, + #[serde(rename = "type", default = "default_string_type")] + pub output_type: String, + #[serde(default)] + pub description: Option, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] +pub struct StepDefinition { + pub id: String, + #[serde(flatten)] + pub step_type: StepType, + #[serde(default)] + pub inputs: HashMap, + #[serde(default, rename = "depends_on")] + pub depends_on: Vec, + #[serde(default, rename = "on_error")] + pub on_error: ErrorPolicy, + #[serde(default)] + pub timeout_seconds: Option, +} + +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default)] +pub enum ErrorPolicy { + #[default] + #[serde(rename = "fail")] + Fail, + #[serde(rename = "continue")] + Continue, + #[serde(rename = "retry")] + Retry { count: u32, backoff_ms: u64 }, + #[serde(rename = "fallback")] + Fallback { step_id: String }, +} + +pub fn default_string_type() -> String { + "string".to_string() +} diff --git a/crates/devbase-workflow-model/src/execution.rs b/crates/devbase-workflow-model/src/execution.rs new file mode 100644 index 0000000..a44eb28 --- /dev/null +++ b/crates/devbase-workflow-model/src/execution.rs @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum ExecutionStatus { + Pending, + Running, + Completed, + Failed, + Cancelled, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WorkflowExecution { + pub id: i64, + pub workflow_id: String, + pub inputs_json: String, + pub status: ExecutionStatus, + pub current_step: Option, + pub started_at: String, + pub finished_at: Option, + pub duration_ms: Option, + #[serde(default)] + pub step_results: HashMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct StepResult { + pub step_id: String, + pub status: ExecutionStatus, + pub outputs: HashMap, + pub stdout: Option, + pub stderr: Option, + pub started_at: Option, + pub finished_at: Option, + pub error: Option, +} diff --git a/crates/devbase-workflow-model/src/lib.rs b/crates/devbase-workflow-model/src/lib.rs index 456499c..4fc4f64 100644 --- a/crates/devbase-workflow-model/src/lib.rs +++ b/crates/devbase-workflow-model/src/lib.rs @@ -1,287 +1,14 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -/// A workflow definition parsed from YAML. -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] -pub struct WorkflowDefinition { - pub id: String, - pub name: String, - pub version: String, - pub description: Option, - #[serde(default)] - pub inputs: Vec, - #[serde(default)] - pub outputs: Vec, - pub steps: Vec, - #[serde(default, rename = "output_mapping")] - pub output_mapping: HashMap, -} - -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] -pub struct WorkflowInputDef { - pub name: String, - #[serde(rename = "type", default = "default_string_type")] - pub input_type: String, - #[serde(default)] - pub required: bool, - #[serde(default)] - pub default: Option, - #[serde(default)] - pub description: Option, -} - -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] -pub struct WorkflowOutputDef { - pub name: String, - #[serde(rename = "type", default = "default_string_type")] - pub output_type: String, - #[serde(default)] - pub description: Option, -} - -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] -pub struct StepDefinition { - pub id: String, - #[serde(flatten)] - pub step_type: StepType, - #[serde(default)] - pub inputs: HashMap, - #[serde(default, rename = "depends_on")] - pub depends_on: Vec, - #[serde(default, rename = "on_error")] - pub on_error: ErrorPolicy, - #[serde(default)] - pub timeout_seconds: Option, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum StepType { - Skill { - skill: String, - }, - Subworkflow { - workflow: String, - }, - Parallel { - parallel: Vec, - }, - Condition { - r#if: String, - }, - Loop { - for_each: String, - body: Vec, - }, -} - -impl serde::Serialize for StepType { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - use serde::ser::SerializeMap; - let mut map = serializer.serialize_map(None)?; - match self { - StepType::Skill { skill } => { - map.serialize_entry("type", "skill")?; - map.serialize_entry("skill", skill)?; - } - StepType::Subworkflow { workflow } => { - map.serialize_entry("type", "subworkflow")?; - map.serialize_entry("workflow", workflow)?; - } - StepType::Parallel { parallel } => { - map.serialize_entry("type", "parallel")?; - map.serialize_entry("parallel", parallel)?; - } - StepType::Condition { r#if } => { - map.serialize_entry("type", "condition")?; - map.serialize_entry("if", r#if)?; - } - StepType::Loop { for_each, body } => { - map.serialize_entry("type", "loop")?; - map.serialize_entry("for_each", for_each)?; - if !body.is_empty() { - map.serialize_entry("body", body)?; - } - } - } - map.end() - } -} - -impl<'de> serde::Deserialize<'de> for StepType { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let value = serde_yaml::Value::deserialize(deserializer)?; - let map = value - .as_mapping() - .ok_or_else(|| serde::de::Error::custom("step must be a mapping"))?; - - // Prefer explicit 'type' for future-proof extensibility - if let Some(type_val) = map.get("type") { - let type_str = type_val - .as_str() - .ok_or_else(|| serde::de::Error::custom("step 'type' must be a string"))?; - return match type_str { - "skill" => { - let skill = map.get("skill").and_then(|v| v.as_str()).ok_or_else(|| { - serde::de::Error::custom("skill step requires 'skill' field") - })?; - Ok(StepType::Skill { skill: skill.to_string() }) - } - "subworkflow" | "workflow" => { - let workflow = - map.get("workflow").and_then(|v| v.as_str()).ok_or_else(|| { - serde::de::Error::custom("subworkflow step requires 'workflow' field") - })?; - Ok(StepType::Subworkflow { workflow: workflow.to_string() }) - } - "parallel" => { - let parallel = map.get("parallel").ok_or_else(|| { - serde::de::Error::custom("parallel step requires 'parallel' field") - })?; - let parallel: Vec = serde_yaml::from_value(parallel.clone()) - .map_err(|e| { - serde::de::Error::custom(format!("invalid parallel steps: {}", e)) - })?; - Ok(StepType::Parallel { parallel }) - } - "condition" | "if" => { - let r#if = map.get("if").and_then(|v| v.as_str()).ok_or_else(|| { - serde::de::Error::custom("condition step requires 'if' field") - })?; - Ok(StepType::Condition { r#if: r#if.to_string() }) - } - "loop" | "for_each" => { - let for_each = - map.get("for_each").and_then(|v| v.as_str()).ok_or_else(|| { - serde::de::Error::custom("loop step requires 'for_each' field") - })?; - let body = map - .get("body") - .map(|v| serde_yaml::from_value::>(v.clone())) - .transpose() - .map_err(|e| serde::de::Error::custom(format!("invalid loop body: {}", e)))? - .unwrap_or_default(); - Ok(StepType::Loop { - for_each: for_each.to_string(), - body, - }) - } - _ => Err(serde::de::Error::custom(format!("unknown step type: '{}'", type_str))), - }; - } - - // Backward-compatible field-name inference (legacy YAML without 'type') - if map.contains_key("skill") { - let skill = map - .get("skill") - .and_then(|v| v.as_str()) - .ok_or_else(|| serde::de::Error::custom("skill step requires 'skill' string"))?; - return Ok(StepType::Skill { skill: skill.to_string() }); - } - if map.contains_key("workflow") { - let workflow = map.get("workflow").and_then(|v| v.as_str()).ok_or_else(|| { - serde::de::Error::custom("subworkflow step requires 'workflow' string") - })?; - return Ok(StepType::Subworkflow { workflow: workflow.to_string() }); - } - if map.contains_key("parallel") { - let parallel = map.get("parallel").ok_or_else(|| { - serde::de::Error::custom("parallel step requires 'parallel' field") - })?; - let parallel: Vec = serde_yaml::from_value(parallel.clone()) - .map_err(|e| serde::de::Error::custom(format!("invalid parallel steps: {}", e)))?; - return Ok(StepType::Parallel { parallel }); - } - if map.contains_key("if") { - let r#if = map - .get("if") - .and_then(|v| v.as_str()) - .ok_or_else(|| serde::de::Error::custom("condition step requires 'if' string"))?; - return Ok(StepType::Condition { r#if: r#if.to_string() }); - } - if map.contains_key("for_each") { - let for_each = map - .get("for_each") - .and_then(|v| v.as_str()) - .ok_or_else(|| serde::de::Error::custom("loop step requires 'for_each' string"))?; - let body = map - .get("body") - .map(|v| serde_yaml::from_value::>(v.clone())) - .transpose() - .map_err(|e| serde::de::Error::custom(format!("invalid loop body: {}", e)))? - .unwrap_or_default(); - return Ok(StepType::Loop { - for_each: for_each.to_string(), - body, - }); - } - - Err(serde::de::Error::custom( - "cannot infer step type: missing known fields (skill, workflow, parallel, if, for_each) or explicit 'type'", - )) - } -} +pub mod definition; +pub mod execution; +pub mod step_type; -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default)] -pub enum ErrorPolicy { - #[default] - #[serde(rename = "fail")] - Fail, - #[serde(rename = "continue")] - Continue, - #[serde(rename = "retry")] - Retry { count: u32, backoff_ms: u64 }, - #[serde(rename = "fallback")] - Fallback { step_id: String }, -} - -// Runtime state models - -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] -pub enum ExecutionStatus { - Pending, - Running, - Completed, - Failed, - Cancelled, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct WorkflowExecution { - pub id: i64, - pub workflow_id: String, - pub inputs_json: String, - pub status: ExecutionStatus, - pub current_step: Option, - pub started_at: String, - pub finished_at: Option, - pub duration_ms: Option, - #[serde(default)] - pub step_results: HashMap, -} - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct StepResult { - pub step_id: String, - pub status: ExecutionStatus, - pub outputs: HashMap, - pub stdout: Option, - pub stderr: Option, - pub started_at: Option, - pub finished_at: Option, - pub error: Option, -} - -fn default_string_type() -> String { - "string".to_string() -} +// Re-export all public items so that `use devbase_workflow_model::*` continues to work. +pub use definition::*; +pub use execution::*; +pub use step_type::*; #[cfg(test)] mod tests { @@ -290,26 +17,26 @@ mod tests { #[test] fn test_loop_serde_roundtrip() { - let step = StepDefinition { + let step = definition::StepDefinition { id: "loop1".to_string(), - step_type: StepType::Loop { + step_type: step_type::StepType::Loop { for_each: "${inputs.repos}".to_string(), - body: vec![StepDefinition { + body: vec![definition::StepDefinition { id: "lint".to_string(), - step_type: StepType::Skill { skill: "clippy".to_string() }, + step_type: step_type::StepType::Skill { skill: "clippy".to_string() }, inputs: HashMap::new(), depends_on: vec![], - on_error: ErrorPolicy::Fail, + on_error: definition::ErrorPolicy::Fail, timeout_seconds: None, }], }, inputs: HashMap::new(), depends_on: vec![], - on_error: ErrorPolicy::Fail, + on_error: definition::ErrorPolicy::Fail, timeout_seconds: None, }; let yaml = serde_yaml::to_string(&step).unwrap(); - let parsed: StepDefinition = serde_yaml::from_str(&yaml).unwrap(); + let parsed: definition::StepDefinition = serde_yaml::from_str(&yaml).unwrap(); assert_eq!(step, parsed); } @@ -320,9 +47,9 @@ mod tests { id: loop1 for_each: "repo-a,repo-b" "#; - let parsed: StepDefinition = serde_yaml::from_str(yaml).unwrap(); + let parsed: definition::StepDefinition = serde_yaml::from_str(yaml).unwrap(); match &parsed.step_type { - StepType::Loop { for_each, body } => { + step_type::StepType::Loop { for_each, body } => { assert_eq!(for_each, "repo-a,repo-b"); assert!(body.is_empty()); } diff --git a/crates/devbase-workflow-model/src/step_type.rs b/crates/devbase-workflow-model/src/step_type.rs new file mode 100644 index 0000000..b1839c2 --- /dev/null +++ b/crates/devbase-workflow-model/src/step_type.rs @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later +// Copyright (c) 2026 juice094 + +use crate::definition::StepDefinition; + +#[derive(Debug, Clone, PartialEq)] +pub enum StepType { + Skill { + skill: String, + }, + Subworkflow { + workflow: String, + }, + Parallel { + parallel: Vec, + }, + Condition { + r#if: String, + }, + Loop { + for_each: String, + body: Vec, + }, +} + +impl serde::Serialize for StepType { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + use serde::ser::SerializeMap; + let mut map = serializer.serialize_map(None)?; + match self { + StepType::Skill { skill } => { + map.serialize_entry("type", "skill")?; + map.serialize_entry("skill", skill)?; + } + StepType::Subworkflow { workflow } => { + map.serialize_entry("type", "subworkflow")?; + map.serialize_entry("workflow", workflow)?; + } + StepType::Parallel { parallel } => { + map.serialize_entry("type", "parallel")?; + map.serialize_entry("parallel", parallel)?; + } + StepType::Condition { r#if } => { + map.serialize_entry("type", "condition")?; + map.serialize_entry("if", r#if)?; + } + StepType::Loop { for_each, body } => { + map.serialize_entry("type", "loop")?; + map.serialize_entry("for_each", for_each)?; + if !body.is_empty() { + map.serialize_entry("body", body)?; + } + } + } + map.end() + } +} + +impl<'de> serde::Deserialize<'de> for StepType { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let value = serde_yaml::Value::deserialize(deserializer)?; + let map = value + .as_mapping() + .ok_or_else(|| serde::de::Error::custom("step must be a mapping"))?; + + // Prefer explicit 'type' for future-proof extensibility + if let Some(type_val) = map.get("type") { + let type_str = type_val + .as_str() + .ok_or_else(|| serde::de::Error::custom("step 'type' must be a string"))?; + return match type_str { + "skill" => { + let skill = map.get("skill").and_then(|v| v.as_str()).ok_or_else(|| { + serde::de::Error::custom("skill step requires 'skill' field") + })?; + Ok(StepType::Skill { skill: skill.to_string() }) + } + "subworkflow" | "workflow" => { + let workflow = + map.get("workflow").and_then(|v| v.as_str()).ok_or_else(|| { + serde::de::Error::custom("subworkflow step requires 'workflow' field") + })?; + Ok(StepType::Subworkflow { workflow: workflow.to_string() }) + } + "parallel" => { + let parallel = map.get("parallel").ok_or_else(|| { + serde::de::Error::custom("parallel step requires 'parallel' field") + })?; + let parallel: Vec = serde_yaml::from_value(parallel.clone()) + .map_err(|e| { + serde::de::Error::custom(format!("invalid parallel steps: {}", e)) + })?; + Ok(StepType::Parallel { parallel }) + } + "condition" | "if" => { + let r#if = map.get("if").and_then(|v| v.as_str()).ok_or_else(|| { + serde::de::Error::custom("condition step requires 'if' field") + })?; + Ok(StepType::Condition { r#if: r#if.to_string() }) + } + "loop" | "for_each" => { + let for_each = + map.get("for_each").and_then(|v| v.as_str()).ok_or_else(|| { + serde::de::Error::custom("loop step requires 'for_each' field") + })?; + let body = map + .get("body") + .map(|v| serde_yaml::from_value::>(v.clone())) + .transpose() + .map_err(|e| serde::de::Error::custom(format!("invalid loop body: {}", e)))? + .unwrap_or_default(); + Ok(StepType::Loop { + for_each: for_each.to_string(), + body, + }) + } + _ => Err(serde::de::Error::custom(format!("unknown step type: '{}'", type_str))), + }; + } + + // Backward-compatible field-name inference (legacy YAML without 'type') + if map.contains_key("skill") { + let skill = map + .get("skill") + .and_then(|v| v.as_str()) + .ok_or_else(|| serde::de::Error::custom("skill step requires 'skill' string"))?; + return Ok(StepType::Skill { skill: skill.to_string() }); + } + if map.contains_key("workflow") { + let workflow = map.get("workflow").and_then(|v| v.as_str()).ok_or_else(|| { + serde::de::Error::custom("subworkflow step requires 'workflow' string") + })?; + return Ok(StepType::Subworkflow { workflow: workflow.to_string() }); + } + if map.contains_key("parallel") { + let parallel = map.get("parallel").ok_or_else(|| { + serde::de::Error::custom("parallel step requires 'parallel' field") + })?; + let parallel: Vec = serde_yaml::from_value(parallel.clone()) + .map_err(|e| serde::de::Error::custom(format!("invalid parallel steps: {}", e)))?; + return Ok(StepType::Parallel { parallel }); + } + if map.contains_key("if") { + let r#if = map + .get("if") + .and_then(|v| v.as_str()) + .ok_or_else(|| serde::de::Error::custom("condition step requires 'if' string"))?; + return Ok(StepType::Condition { r#if: r#if.to_string() }); + } + if map.contains_key("for_each") { + let for_each = map + .get("for_each") + .and_then(|v| v.as_str()) + .ok_or_else(|| serde::de::Error::custom("loop step requires 'for_each' string"))?; + let body = map + .get("body") + .map(|v| serde_yaml::from_value::>(v.clone())) + .transpose() + .map_err(|e| serde::de::Error::custom(format!("invalid loop body: {}", e)))? + .unwrap_or_default(); + return Ok(StepType::Loop { + for_each: for_each.to_string(), + body, + }); + } + + Err(serde::de::Error::custom( + "cannot infer step type: missing known fields (skill, workflow, parallel, if, for_each) or explicit 'type'", + )) + } +} diff --git a/docs/guides/mcp-integration.md b/docs/guides/mcp-integration.md index 44a941d..fcf44b0 100644 --- a/docs/guides/mcp-integration.md +++ b/docs/guides/mcp-integration.md @@ -6,7 +6,7 @@ ## 什么是 MCP -MCP(Model Context Protocol)是 AI 助手与外部工具通信的标准协议。devbase 作为 **MCP Server**,向 AI 暴露 38 个结构化工具,让 AI 能够: +MCP(Model Context Protocol)是 AI 助手与外部工具通信的标准协议。devbase 作为 **MCP Server**,向 AI 暴露 69 个结构化工具,让 AI 能够: - 查询本地有哪些项目、它们的状态如何 - 批量同步仓库、检查健康度 diff --git a/docs/reference/mcp-tools.md b/docs/reference/mcp-tools.md index 46143ac..1210986 100644 --- a/docs/reference/mcp-tools.md +++ b/docs/reference/mcp-tools.md @@ -1,6 +1,6 @@ # MCP Tools 参考 -devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通信。工具按稳定性分为三级: +devbase MCP Server 提供 **69 个 tools**,通过 stdio 传输与 AI Agent 通信。工具按稳定性分为三级: - **Stable** — 经过充分测试,schema 冻结。详见 [`stable-tools/`](stable-tools/README.md) 独立文档。 - **Beta** — 功能验证通过,schema 可能微调 @@ -22,19 +22,19 @@ devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通 | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| -| `devkit_code_metrics` | Experimental | 统计代码行数、语言分布、测试覆盖率 | `repo_id` | -| `devkit_module_graph` | Experimental | 获取仓库模块依赖图 | `repo_id` | +| `devkit_code_metrics` | Beta | 统计代码行数、语言分布、测试覆盖率 | `repo_id` | +| `devkit_module_graph` | Beta | 获取仓库模块依赖图 | `repo_id` | | `devkit_code_symbols` | Beta | 列出仓库中的代码符号(函数/结构体/枚举等) | `repo_id`, `file_path`, `symbol_type` | | `devkit_dependency_graph` | Beta | 获取跨仓库依赖关系图 | `repo_id` | -| `devkit_call_graph` | Experimental | 获取函数调用图 | `repo_id`, `symbol_name` | -| `devkit_dead_code` | Experimental | 检测未被调用的私有函数 | `repo_id`, `include_pub` | +| `devkit_call_graph` | Beta | 获取函数调用图 | `repo_id`, `symbol_name` | +| `devkit_dead_code` | Beta | 检测未被调用的私有函数 | `repo_id`, `include_pub` | ## 知识检索(8) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| | `devkit_semantic_search` | Beta | 基于 embedding 的语义代码搜索 | `repo_id`, `query`, `limit` | -| [`devkit_hybrid_search`](stable-tools/hybrid_search.md) | Stable | 向量语义 + 关键词 RRF 混合搜索 | `repo_id`, `query`, `limit` | +| `devkit_hybrid_search` | Beta | 向量语义 + 关键词 RRF 混合搜索 | `repo_id`, `query`, `limit` | | `devkit_cross_repo_search` | Beta | 跨仓库符号搜索(按 tag 过滤) | `tags`, `query`, `limit` | | `devkit_related_symbols` | Experimental | 查找与指定符号相关的符号 | `repo_id`, `symbol_name` | | `devkit_embedding_store` | Beta | 存储代码符号的 embedding 向量 | `repo_id`, `symbol_name`, `embedding` | @@ -42,7 +42,7 @@ devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_natural_language_query` | Beta | 自然语言查询(NLQ) | `query`, `limit` | | `devkit_knowledge_report` | Beta | 生成工作区知识覆盖报告 | `repo_id`, `activity_limit` | -## Vault 笔记(4) +## Vault 笔记(8) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| @@ -50,6 +50,10 @@ devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_vault_read` | Stable | 读取指定 Vault 笔记的完整内容 | `path` | | `devkit_vault_write` | Beta | 写入或更新 Vault 笔记(destructive gate) | `path`, `content`, `frontmatter` | | `devkit_vault_backlinks` | Beta | 查找指向指定笔记的反向链接 | `note_id` | +| `devkit_vault_daily` | Beta | 按日期列出 Vault 每日笔记 | `date`, `limit` | +| `devkit_vault_graph` | Beta | 获取 Vault 笔记链接图 | `repo_id`, `note_id`, `depth` | +| `devkit_vault_export` | Beta | 导出 Vault 笔记集合 | `query`, `format` | +| `devkit_vault_history` | Beta | 获取 Vault 笔记修改历史 | `path`, `limit` | ## Skill 运行时(4) @@ -60,33 +64,78 @@ devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通 | `devkit_skill_run` | Beta | 执行指定 Skill(destructive gate) | `skill_id`, `args` | | `devkit_skill_discover` | Beta | 将当前项目封装为 Skill(destructive gate,dry_run 默认 true) | `path` | -## 项目上下文(2) +## 项目上下文(3) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| | `devkit_project_context` | Stable | 获取项目统一上下文(repo + vault + assets + modules + symbols + calls) | `project` | -| [`devkit_project_brief`](stable-tools/project_brief.md) | Stable | 生成 Markdown 项目摘要(架构 + 活动 + 限制),供 LLM 注入 | `repo_id`, `max_tokens` | +| `devkit_project_brief` | Beta | 生成 Markdown 项目摘要(架构 + 活动 + 限制),供 LLM 注入 | `repo_id`, `max_tokens` | +| `devkit_impact_analysis` | Beta | 分析代码变更影响范围 | `repo_id`, `file_path` | -## Session 管理(1) +## Session 管理(13) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| -| [`devkit_session_recall`](stable-tools/session_recall.md) | Stable | 基于 embedding 的语义记忆召回 | `context_id`, `query_embedding`, `limit` | +| `devkit_session_save` | Beta | 保存当前会话上下文 | `name`, `tags` | +| `devkit_session_list` | Beta | 列出已保存的会话 | `limit` | +| `devkit_session_resume` | Beta | 恢复指定会话 | `session_id` | +| `devkit_session_attach` | Beta | 附加到运行中的会话 | `session_id` | +| `devkit_session_detach` | Beta | 从当前会话分离 | `session_id` | +| `devkit_session_activate` | Beta | 激活会话上下文 | `session_id` | +| `devkit_session_search` | Beta | 搜索会话历史 | `query`, `limit` | +| `devkit_session_capture` | Beta | 捕获当前会话快照 | `name` | +| `devkit_session_workflows` | Beta | 获取会话关联的工作流 | `session_id` | +| `devkit_session_recall` | Experimental | 基于 embedding 的语义记忆召回 | `context_id`, `query_embedding`, `limit` | +| `devkit_session_index` | Experimental | 索引会话内容用于搜索 | `session_id` | +| `devkit_session_export` | Experimental | 导出会话为文件 | `session_id`, `format` | +| `devkit_session_import` | Experimental | 从文件导入会话 | `path` | + +## Index 管理(3) -## 其他(10) +| 工具名 | Tier | 一句话描述 | 关键参数 | +|--------|------|-----------|----------| +| `devkit_index_health` | Beta | 检查索引健康状态 | `repo_id` | +| `devkit_index_stream` | Beta | 流式索引进度 | `path` | + +## Workflow(3) + +| 工具名 | Tier | 一句话描述 | 关键参数 | +|--------|------|-----------|----------| +| `devkit_workflow_list` | Beta | 列出可用工作流 | `limit` | +| `devkit_workflow_run` | Beta | 执行工作流 | `workflow_id`, `args` | +| `devkit_workflow_status` | Beta | 查询工作流执行状态 | `workflow_id` | + +## Relation 图谱(3) + +| 工具名 | Tier | 一句话描述 | 关键参数 | +|--------|------|-----------|----------| +| `devkit_relation_store` | Beta | 存储实体间关系 | `from`, `to`, `relation_type` | +| `devkit_relation_query` | Beta | 查询实体关系 | `entity_id`, `relation_type` | +| `devkit_relation_delete` | Beta | 删除实体关系 | `from`, `to`, `relation_type` | + +## Known Limit(2) + +| 工具名 | Tier | 一句话描述 | 关键参数 | +|--------|------|-----------|----------| +| `devkit_known_limit_store` | Beta | 记录已知限制(Hard Veto / Known Bug) | `id`, `category`, `description` | +| `devkit_known_limit_list` | Beta | 列出已知限制 | `category`, `mitigated` | + +## 其他(6) | 工具名 | Tier | 一句话描述 | 关键参数 | |--------|------|-----------|----------| | `devkit_query` | Beta | 通用查询(repo/tag/keyword) | `query`, `limit`, `page` | | `devkit_note` | Beta | 为仓库添加 AI 发现笔记 | `repo_id`, `text`, `author` | +| `devkit_status` | Beta | 检查 devbase 服务状态 | — | | `devkit_digest` | Experimental | 生成每日知识摘要 | — | | `devkit_paper_index` | Experimental | 索引学术论文 | `title`, `authors`, `venue` | +| `devkit_search_quality` | Beta | 评估搜索质量指标 | `repo_id`, `query` | | `devkit_experiment_log` | Beta | 记录实验结果 | `repo_id`, `paper_id`, `status` | | `devkit_github_info` | Beta | 查询 GitHub 仓库信息 | `owner`, `repo` | | `devkit_arxiv_fetch` | Beta | 从 arXiv 获取论文元数据 | `query`, `max_results` | -| `devkit_known_limit_store` | Beta | 记录已知限制(Hard Veto / Known Bug) | `id`, `category`, `description` | -| `devkit_known_limit_list` | Beta | 列出已知限制 | `category`, `mitigated` | | `devkit_oplog_query` | Beta | 查询操作日志 | `limit`, `repo_id` | +| `devkit_evaluate` | Beta | 评估工具调用结果 | `tool_name`, `result` | +| `devkit_document_convert` | Experimental | PDF/PPTX → Markdown 转换 | `source_path`, `output_path` | --- @@ -98,6 +147,10 @@ devbase MCP Server 提供 **40 个 tools**,通过 stdio 传输与 AI Agent 通 - `devkit_skill_run` - `devkit_skill_discover` - `devkit_vault_write` +- `devkit_relation_store` +- `devkit_relation_delete` +- `devkit_known_limit_store` +- `devkit_workflow_run` --- diff --git a/docs/reference/stable-tools/README.md b/docs/reference/stable-tools/README.md index 66e3b36..3b0b495 100644 --- a/docs/reference/stable-tools/README.md +++ b/docs/reference/stable-tools/README.md @@ -1,15 +1,15 @@ # Stable Tools Reference -Tools in this directory have **frozen schemas** as of devbase v0.21.0. +Tools in this directory have **frozen schemas** as of devbase v0.20.1. Breaking changes require a major version bump and a deprecation cycle. -| Tool | Purpose | File | -|------|---------|------| -| [`devkit_health`](health.md) | Check Git health (dirty/ahead/behind) of all registered repos | `repo.rs` | -| [`devkit_project_brief`](project_brief.md) | Generate a Markdown project brief for LLM context injection | `brief.rs` | -| [`devkit_hybrid_search`](hybrid_search.md) | Vector + keyword RRF search for code symbols | `search.rs` | -| [`devkit_vault_search`](vault_search.md) | Keyword search across Vault notes (titles, tags, content) | `vault.rs` | -| [`devkit_session_recall`](session_recall.md) | Semantic memory recall by embedding similarity | `session.rs` | +| Tool | Purpose | File | Test Coverage | +|------|---------|------|---------------| +| [`devkit_health`](health.md) | Check Git health (dirty/ahead/behind) of all registered repos | `repo.rs` | `test_tools_call_devkit_health` | +| `devkit_query_repos` | Query registered repos with language/tag/status filters | `repo.rs` | `test_tools_call_devkit_query_repos` | +| [`devkit_vault_search`](vault_search.md) | Keyword search across Vault notes (titles, tags, content) | `vault.rs` | `test_tools_call_devkit_vault_search` | +| `devkit_vault_read` | Read full content of a Vault note including frontmatter | `vault.rs` | `test_tools_call_devkit_vault_read` | +| `devkit_project_context` | Unified project snapshot (repo + vault + symbols + relations + limits + skills) | `context.rs` | `test_tools_call_devkit_project_context` | ## Schema stability guarantee @@ -19,6 +19,8 @@ Breaking changes require a major version bump and a deprecation cycle. ## Changelog -| Version | Change | -|---------|------------------------------------------| -| v0.21.0 | 5 tools promoted to Stable; schemas frozen | +| Version | Change | +|---------|--------| +| v0.20.1 | 5 Stable tools verified with dedicated invocation tests | +| v0.20.0 | `project_context` enriched with `known_limits` and `skills` | +| v0.14.2 | 5 tools promoted to Stable tier | diff --git a/docs/reference/stable-tools/hybrid_search.md b/docs/reference/stable-tools/hybrid_search.md deleted file mode 100644 index 917f25b..0000000 --- a/docs/reference/stable-tools/hybrid_search.md +++ /dev/null @@ -1,93 +0,0 @@ -# devkit_hybrid_search - -> **Tier**: Stable (frozen at v0.21.0) -> **Source**: `src/mcp/tools/search.rs` — `DevkitHybridSearchTool` - -Hybrid code symbol search combining vector embeddings and keyword matching via Reciprocal Rank Fusion (RRF). - -## Purpose - -- Find code related to a concept ("authentication", "error handling") -- Search with either natural language or an embedding vector -- Get robust results even when the embedding provider is offline - -## When NOT to use - -- Exact keyword searches → use `devkit_natural_language_query` -- Finding symbol definitions by exact name → use `devkit_code_symbols` -- When no embeddings exist and no keyword query is available - -## Input Schema - -```json -{ - "type": "object", - "properties": { - "repo_id": { "type": "string" }, - "query_text": { "type": "string", "description": "Keyword or natural language query" }, - "query_embedding": { - "type": "array", - "items": { "type": "number" }, - "description": "Optional query embedding vector" - }, - "limit": { "type": "integer", "default": 10 } - }, - "required": ["repo_id", "query_text"] -} -``` - -| Parameter | Type | Required | Default | Description | -|-----------------|------------|----------|---------|--------------------------------------------| -| `repo_id` | string | Yes | — | Registered repository ID | -| `query_text` | string | Yes | — | Keyword or natural language query | -| `query_embedding`| number[] | No | — | Optional f32 vector for semantic search | -| `limit` | integer | No | 10 | Max results (capped at 50) | - -## Behavior - -| Scenario | Behavior | -|---------------------------------------|---------------------------------------------------| -| `query_embedding` provided | RRF fusion: vector similarity (70%) + keyword (30%) | -| `query_embedding` omitted | Falls back to pure keyword search on symbol names/signatures | -| No embeddings exist for repo | Gracefully degrades to keyword search | -| Embedding generation fails | Warns in logs, falls back to keyword search | - -## Output Schema - -```json -{ - "success": true, - "repo_id": "devbase", - "query_text": "error handling", - "count": 3, - "symbols": [ - { - "name": "handle_error", - "file_path": "src/errors.rs", - "line_start": 42, - "similarity_score": 0.87 - } - ] -} -``` - -| Field | Type | Description | -|------------------|---------|------------------------------------------| -| `name` | string | Symbol name | -| `file_path` | string | Relative file path in the repo | -| `line_start` | integer | Line number where symbol begins | -| `similarity_score`| number | RRF score (0.0–1.0, higher is better) | - -## Errors - -| Error | Cause | -|--------------------|------------------------------------------| -| `repo_id required` | Missing `repo_id` | -| `query_text required`| Missing `query_text` | -| Database error | SQLite query failure | - -## Changelog - -| Version | Change | -|---------|------------------------------------------| -| v0.21.0 | Schema frozen as Stable | diff --git a/docs/reference/stable-tools/project_brief.md b/docs/reference/stable-tools/project_brief.md deleted file mode 100644 index f668eaf..0000000 --- a/docs/reference/stable-tools/project_brief.md +++ /dev/null @@ -1,71 +0,0 @@ -# devkit_project_brief - -> **Tier**: Stable (frozen at v0.21.0) -> **Source**: `src/mcp/tools/brief.rs` — `DevkitProjectBriefTool` - -Generate a Markdown project brief optimized for LLM context injection. - -## Purpose - -- Summarize a repository's architecture, symbols, and recent activity -- Produce a concise context document for LLM prompts -- Surface known limits, active contexts, and hot files - -## When NOT to use - -- Searching for specific symbols → use `devkit_code_symbols` -- Reading full source files → use filesystem tools -- Getting Git health status → use `devkit_health` - -## Input Schema - -```json -{ - "type": "object", - "properties": { - "repo_id": { "type": "string" }, - "max_tokens": { "type": "integer", "default": 2000 } - }, - "required": ["repo_id"] -} -``` - -| Parameter | Type | Required | Default | Description | -|--------------|---------|----------|---------|---------------------------------------------| -| `repo_id` | string | Yes | — | Registered repository ID | -| `max_tokens` | integer | No | 2000 | Approximate token budget (1 token ~ 4 chars)| - -## Output Schema - -```json -{ - "success": true, - "repo_id": "devbase", - "brief": "# Project Brief: devbase\n\n## Overview\n- **Language**: rust\n- **Tags**: cli, rust, active\n- **Path**: `C:\\Users\\dev\\devbase`\n\n## Architecture\n- `main` (function)\n- `scan` (function)\n..." -} -``` - -### Brief sections (in order) - -1. **Overview** — language, tags, local path -2. **Architecture** — modules (up to 20) and key symbols (up to 15) -3. **Recent Activity** — last 7 commits, hot files (14d change count) -4. **Known Limits & Tech Debt** — open known_limits entries (up to 10) -5. **Active Contexts** — linked agent contexts with memories - -### Truncation behavior - -If the generated brief exceeds `max_tokens * 4` characters, it is truncated at the nearest section boundary (`\n## `) with an ellipsis note. - -## Errors - -| Error | Cause | -|--------------------|-------------------------------------------------| -| `repo_id required` | Missing or empty `repo_id` argument | -| `repo not found` | `repo_id` does not exist in the registry | - -## Changelog - -| Version | Change | -|---------|------------------------------------------| -| v0.21.0 | Schema frozen as Stable | diff --git a/docs/reference/stable-tools/session_recall.md b/docs/reference/stable-tools/session_recall.md deleted file mode 100644 index 5d1a7dd..0000000 --- a/docs/reference/stable-tools/session_recall.md +++ /dev/null @@ -1,91 +0,0 @@ -# devkit_session_recall - -> **Tier**: Stable (frozen at v0.21.0) -> **Source**: `src/mcp/tools/session.rs` — `DevkitSessionRecallTool` - -Semantic memory recall for an active agent session. Finds relevant past memories by meaning rather than exact keyword. - -## Purpose - -- Surface decisions, constraints, or discoveries related to the current task -- Inject top-k relevant memories into prompt context -- Recall what was discussed in a previous project session - -## When NOT to use - -- Keyword-based memory search → use `devkit_session_search` -- Listing all sessions → use `devkit_session_list` -- Saving a new memory → use `devkit_session_capture` -- When embeddings have not been stored for memories → use `devkit_session_index` first - -## Input Schema - -```json -{ - "type": "object", - "properties": { - "context_id": { "type": "string", "description": "Session ID (optional)" }, - "query_embedding": { - "type": "array", - "items": { "type": "number" }, - "description": "Query vector as f32 array (externally generated)" - }, - "limit": { "type": "integer", "default": 5 } - }, - "required": ["query_embedding"] -} -``` - -| Parameter | Type | Required | Default | Description | -|-----------------|------------|----------|---------|--------------------------------------------| -| `context_id` | string | No | — | Session ID. Falls back to `DEVBASE_ACTIVE_CONTEXT` env var or `.active_context` state file | -| `query_embedding`| number[] | Yes | — | Externally-generated f32 embedding vector | -| `limit` | integer | No | 5 | Max results (capped at 20) | - -## Important: Embedding source - -devbase does **NOT** generate embeddings. The caller must provide a pre-computed vector from an external provider (Ollama, OpenAI, etc.). Use the same model that was used to index the memories via `devkit_session_index`. - -## Output Schema - -```json -{ - "success": true, - "context_id": "project-alpha", - "count": 3, - "memories": [ - { - "id": 42, - "type": "decision", - "content": "Use SQLite WAL mode for concurrent reads", - "created_at": "2026-05-10T14:32:00Z", - "embedding_model": "nomic-embed-text", - "score": 0.91 - } - ] -} -``` - -| Field | Type | Description | -|-------------------|---------|------------------------------------------| -| `id` | integer | Memory row ID | -| `type` | string | Memory classification: decision, constraint, note, discovery, error, action | -| `content` | string | Full memory text | -| `created_at` | string | ISO 8601 timestamp | -| `embedding_model` | string | Model used when memory was indexed | -| `score` | number | Cosine similarity (0.0–1.0) | - -## Errors - -| Error | Cause | -|------------------------------|-----------------------------------------------------| -| `query_embedding required` | Missing or empty embedding array | -| `query_embedding must not be empty` | Array contains no valid f32 values | -| No active session | `context_id` omitted and no active session set | -| Memory not found | `memory_id` in `devkit_session_index` does not exist| - -## Changelog - -| Version | Change | -|---------|------------------------------------------| -| v0.21.0 | Schema frozen as Stable | diff --git a/glama.json b/glama.json new file mode 100644 index 0000000..c48d502 --- /dev/null +++ b/glama.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://glama.ai/mcp/schemas/server.json", + "maintainers": [ + "juice094" + ] +} diff --git a/scripts/phase1_vault_semantic_index.py b/scripts/phase1_vault_semantic_index.py new file mode 100644 index 0000000..71c6bd5 --- /dev/null +++ b/scripts/phase1_vault_semantic_index.py @@ -0,0 +1,618 @@ +#!/usr/bin/env python3 +""" +Phase 1 Prototype: Vault Semantic Indexing via Local Ollama + sqlite-vec + +Pipeline: + 1. Scan Vault for Markdown files + 2. Parse frontmatter + body + 3. Chunk by heading hierarchy + 4. Generate embeddings via Ollama /api/embed + 5. Store in sqlite-vec + 6. Semantic search interface + +Usage: + python phase1_vault_semantic_index.py index # Full rebuild + python phase1_vault_semantic_index.py search "本地模型知识库设计" # Query + python phase1_vault_semantic_index.py rag "本地模型知识库设计" # RAG answer + python phase1_vault_semantic_index.py stats # Show index stats +""" + +import sqlite3 +import sqlite_vec +import yaml +import re +import json +import urllib.request +from pathlib import Path +from datetime import datetime +from typing import List, Dict, Optional, Tuple +from dataclasses import dataclass, asdict + +# --------------------------------------------------------------------------- +# Config +# --------------------------------------------------------------------------- + +VAULT_DIR = Path("C:/Users/22414/Documents/Obsidian Vault") +DB_PATH = Path("C:/Users/22414/.devbase/vault_semantic_index.db") +OLLAMA_URL = "http://localhost:11434/api/embed" +GENERATE_URL = "http://localhost:11434/api/generate" +# bge-m3: 1024-dim, multilingual (Chinese optimized), ~1.2GB +# nomic-embed-text: 768-dim, English optimized, ~274MB — kept as fallback +EMBED_MODEL = "bge-m3" +GENERATE_MODEL = "qwen2.5:7b" +EMBED_DIM = 1024 +BATCH_SIZE = 16 + +# Files/dirs to skip +SKIP_PATTERNS = [ + r"\.obsidian", + r"\.trash", + r"99-Archive/\.trash-待清理", + r"workspace", + r"student-era", + r"devbase-knowledge", + r"dev", + r"clarity", + r"dotfiles", + r"syncthing-rust", + r"skills-dev", +] + +# --------------------------------------------------------------------------- +# Data structures +# --------------------------------------------------------------------------- + +@dataclass +class Chunk: + file_path: str + chunk_index: int + chunk_type: str # 'heading' | 'paragraph' | 'code' | 'table' | 'frontmatter' + heading_path: str # H2/H3 breadcrumb, e.g. "## 架构定位 / ### 技术选型" + content: str + tags: str # JSON array from frontmatter + date: Optional[str] + indexed_at: str + +# --------------------------------------------------------------------------- +# Markdown parsing +# --------------------------------------------------------------------------- + +def parse_frontmatter(text: str) -> Tuple[Dict, str]: + """Extract YAML frontmatter and return (metadata, body).""" + if text.startswith("---"): + parts = text.split("---", 2) + if len(parts) >= 3: + try: + meta = yaml.safe_load(parts[1]) or {} + return meta, parts[2].strip() + except yaml.YAMLError: + pass + return {}, text + + +def split_into_chunks(file_path: str, body: str, frontmatter: Dict) -> List[Chunk]: + """Split Markdown body into semantic chunks by heading hierarchy.""" + chunks: List[Chunk] = [] + tags = json.dumps(frontmatter.get("tags", []), ensure_ascii=False) + date = frontmatter.get("date") or frontmatter.get("date") + + # Chunk 0: frontmatter summary (if present) + if frontmatter: + summary = " ".join(f"{k}: {v}" for k, v in frontmatter.items() + if k in ("title", "project", "type", "tags", "description")) + if summary: + chunks.append(Chunk( + file_path=file_path, + chunk_index=0, + chunk_type="frontmatter", + heading_path="", + content=summary, + tags=tags, + date=date, + indexed_at=datetime.now().isoformat(), + )) + + lines = body.splitlines() + current_heading = "" + current_lines: List[str] = [] + chunk_idx = len(chunks) + + def flush(): + nonlocal chunk_idx, current_lines + if not current_lines: + return + content = "\n".join(current_lines).strip() + if len(content) >= 20: # Skip very short fragments + chunks.append(Chunk( + file_path=file_path, + chunk_index=chunk_idx, + chunk_type="paragraph", + heading_path=current_heading, + content=content, + tags=tags, + date=date, + indexed_at=datetime.now().isoformat(), + )) + chunk_idx += 1 + current_lines = [] + + in_code_block = False + code_buffer: List[str] = [] + code_lang = "" + + for line in lines: + # Code blocks + if line.strip().startswith("```"): + if in_code_block: + # End code block + code_buffer.append(line) + code_content = "\n".join(code_buffer) + if len(code_content) >= 30: + chunks.append(Chunk( + file_path=file_path, + chunk_index=chunk_idx, + chunk_type="code", + heading_path=current_heading, + content=f"[{code_lang}]\n{code_content}", + tags=tags, + date=date, + indexed_at=datetime.now().isoformat(), + )) + chunk_idx += 1 + code_buffer = [] + in_code_block = False + code_lang = "" + else: + # Start code block + flush() + in_code_block = True + code_lang = line.strip()[3:].strip() + code_buffer.append(line) + continue + + if in_code_block: + code_buffer.append(line) + continue + + # Headings: update breadcrumb, do NOT append heading line to chunk + m = re.match(r"^(#{2,3})\s+(.+)$", line) + if m: + flush() + level = len(m.group(1)) + title = m.group(2).strip() + if level == 2: + current_heading = f"## {title}" + else: + current_heading = f"{current_heading} / ### {title}" + continue + + # Tables: collect as chunks, flush when too large + if line.strip().startswith("|"): + if current_lines and not current_lines[-1].strip().startswith("|"): + flush() + if current_lines and len("\n".join(current_lines)) + len(line) > 3000: + flush() + current_lines.append(line) + continue + + # Empty line -> potential flush boundary + if line.strip() == "": + if current_lines and len("\n".join(current_lines)) > 400: + flush() + continue + + current_lines.append(line) + + flush() + + # Post-process: split oversized paragraphs + final_chunks: List[Chunk] = [] + for c in chunks: + if c.chunk_type == "paragraph" and len(c.content) > 800: + # Split by sentences + sentences = re.split(r'(?<=[。\.\?\!])\s+', c.content) + half = len(sentences) // 2 + if half > 0: + final_chunks.append(Chunk( + file_path=c.file_path, chunk_index=c.chunk_index, + chunk_type=c.chunk_type, heading_path=c.heading_path, + content=" ".join(sentences[:half]), tags=c.tags, date=c.date, + indexed_at=c.indexed_at, + )) + final_chunks.append(Chunk( + file_path=c.file_path, chunk_index=c.chunk_index + 1, + chunk_type=c.chunk_type, heading_path=c.heading_path, + content=" ".join(sentences[half:]), tags=c.tags, date=c.date, + indexed_at=c.indexed_at, + )) + else: + final_chunks.append(c) + else: + final_chunks.append(c) + + # Re-index chunk_index + for i, c in enumerate(final_chunks): + c.chunk_index = i + + return final_chunks + + +# --------------------------------------------------------------------------- +# Ollama embedding +# --------------------------------------------------------------------------- + +def embed_batch(texts: List[str]) -> List[List[float]]: + """Call Ollama /api/embed for a batch of texts.""" + # nomic-embed-text supports 8192 tokens; table-heavy content is token-inefficient + MAX_CHARS = 6000 + trimmed = [t[:MAX_CHARS] if len(t) > MAX_CHARS else t for t in texts] + body = json.dumps({ + "model": EMBED_MODEL, + "input": trimmed, + }).encode("utf-8") + + req = urllib.request.Request( + OLLAMA_URL, + data=body, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + with urllib.request.urlopen(req, timeout=120) as resp: + data = json.loads(resp.read()) + embeddings = data.get("embeddings", []) + if not embeddings: + raise RuntimeError(f"Ollama returned no embeddings: {data}") + return embeddings + + +def generate(prompt: str, model: str = GENERATE_MODEL, timeout: int = 300) -> str: + """Call Ollama /api/generate and return the generated text.""" + body = json.dumps({ + "model": model, + "prompt": prompt, + "stream": False, + "options": { + "temperature": 0.3, + "num_predict": 512, + }, + }).encode("utf-8") + + req = urllib.request.Request( + GENERATE_URL, + data=body, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read()) + response = data.get("response", "") + if not response: + raise RuntimeError(f"Ollama returned empty response: {data}") + return response + + +# --------------------------------------------------------------------------- +# Database (sqlite-vec) +# --------------------------------------------------------------------------- + +def init_db() -> sqlite3.Connection: + """Create tables and virtual vector index.""" + DB_PATH.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(DB_PATH)) + conn.enable_load_extension(True) + sqlite_vec.load(conn) + conn.enable_load_extension(False) + + conn.execute(""" + CREATE TABLE IF NOT EXISTS vault_chunks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + file_path TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + chunk_type TEXT, + heading_path TEXT, + content TEXT NOT NULL, + tags TEXT, + date TEXT, + indexed_at TEXT NOT NULL, + file_mtime REAL, + UNIQUE(file_path, chunk_index) + ) + """) + + conn.execute(""" + CREATE TABLE IF NOT EXISTS index_meta ( + key TEXT PRIMARY KEY, + value TEXT + ) + """) + + # sqlite-vec virtual table for vector search + conn.execute(f""" + CREATE VIRTUAL TABLE IF NOT EXISTS vec_index USING vec0( + chunk_id INTEGER PRIMARY KEY, + embedding FLOAT[{EMBED_DIM}] + ) + """) + + conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_file ON vault_chunks(file_path)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_type ON vault_chunks(chunk_type)") + conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_date ON vault_chunks(date)") + + return conn + + +def store_chunks(conn: sqlite3.Connection, chunks: List[Chunk], embeddings: List[List[float]]): + """Upsert chunks and their embeddings.""" + assert len(chunks) == len(embeddings) + + for chunk, emb in zip(chunks, embeddings): + # Upsert chunk + conn.execute(""" + INSERT INTO vault_chunks (file_path, chunk_index, chunk_type, heading_path, + content, tags, date, indexed_at) + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8) + ON CONFLICT(file_path, chunk_index) DO UPDATE SET + chunk_type = excluded.chunk_type, + heading_path = excluded.heading_path, + content = excluded.content, + tags = excluded.tags, + date = excluded.date, + indexed_at = excluded.indexed_at + """, (chunk.file_path, chunk.chunk_index, chunk.chunk_type, + chunk.heading_path, chunk.content, chunk.tags, + chunk.date, chunk.indexed_at)) + + chunk_id = conn.execute( + "SELECT id FROM vault_chunks WHERE file_path = ?1 AND chunk_index = ?2", + (chunk.file_path, chunk.chunk_index) + ).fetchone()[0] + + # sqlite-vec virtual table does not support UPSERT; use DELETE + INSERT + conn.execute("DELETE FROM vec_index WHERE chunk_id = ?", (chunk_id,)) + conn.execute( + "INSERT INTO vec_index (chunk_id, embedding) VALUES (?1, ?2)", + (chunk_id, json.dumps(emb)) + ) + + conn.commit() + + +def delete_file_chunks(conn: sqlite3.Connection, file_path: str): + """Remove all chunks and embeddings for a given file.""" + rows = conn.execute( + "SELECT id FROM vault_chunks WHERE file_path = ?", (file_path,) + ).fetchall() + for (chunk_id,) in rows: + conn.execute("DELETE FROM vec_index WHERE chunk_id = ?", (chunk_id,)) + conn.execute("DELETE FROM vault_chunks WHERE file_path = ?", (file_path,)) + conn.commit() + + +# --------------------------------------------------------------------------- +# Indexing +# --------------------------------------------------------------------------- + +def should_index(file_path: Path) -> bool: + """Check if file should be indexed.""" + rel = file_path.relative_to(VAULT_DIR).as_posix() + for pat in SKIP_PATTERNS: + if re.search(pat, rel): + return False + return file_path.suffix == ".md" + + +def index_all(): + """Full rebuild of the semantic index.""" + conn = init_db() + + # Collect all markdown files + files = [f for f in VAULT_DIR.rglob("*.md") if should_index(f)] + print(f"[INFO] Found {len(files)} Markdown files to index") + + # Clear existing index for full rebuild — drop vec_index to avoid + # sqlite-vec HNSW shadow-state bugs on DELETE+INSERT cycles. + conn.execute("DROP TABLE IF EXISTS vec_index") + conn.execute("DELETE FROM vault_chunks") + conn.execute(f""" + CREATE VIRTUAL TABLE vec_index USING vec0( + chunk_id INTEGER PRIMARY KEY, + embedding FLOAT[{EMBED_DIM}] + ) + """) + conn.commit() + + total_chunks = 0 + for i, file_path in enumerate(files, 1): + rel_path = file_path.relative_to(VAULT_DIR).as_posix() + print(f"[{i}/{len(files)}] {rel_path}") + + try: + text = file_path.read_text(encoding="utf-8") + except Exception as e: + print(f" [WARN] Read failed: {e}") + continue + + frontmatter, body = parse_frontmatter(text) + chunks = split_into_chunks(rel_path, body, frontmatter) + if not chunks: + continue + + # Generate embeddings in batches + texts = [c.content for c in chunks] + embeddings: List[List[float]] = [] + for batch_start in range(0, len(texts), BATCH_SIZE): + batch = texts[batch_start:batch_start + BATCH_SIZE] + try: + embs = embed_batch(batch) + embeddings.extend(embs) + print(f" → embedded {len(batch)} chunks") + except Exception as e: + print(f" [ERROR] Embedding failed: {e}") + break + + if len(embeddings) == len(chunks): + store_chunks(conn, chunks, embeddings) + total_chunks += len(chunks) + + conn.execute( + "INSERT OR REPLACE INTO index_meta (key, value) VALUES ('last_full_index', ?)", + (datetime.now().isoformat(),) + ) + conn.commit() + conn.close() + + print(f"\n[INFO] Index complete: {total_chunks} chunks from {len(files)} files") + print(f"[INFO] Database: {DB_PATH}") + + +# --------------------------------------------------------------------------- +# Search +# --------------------------------------------------------------------------- + +def search(query: str, top_k: int = 5) -> List[Dict]: + """Semantic search over the Vault index.""" + conn = init_db() + + # Embed query + query_emb = embed_batch([query])[0] + query_json = json.dumps(query_emb) + + # Vector search via sqlite-vec (k=top_k required for KNN) + rows = conn.execute(""" + SELECT + c.file_path, + c.chunk_index, + c.chunk_type, + c.heading_path, + c.content, + c.tags, + c.date, + v.distance + FROM vec_index v + JOIN vault_chunks c ON v.chunk_id = c.id + WHERE v.embedding MATCH ?1 AND k = ?2 + ORDER BY v.distance + """, (query_json, top_k)).fetchall() + + results = [] + for row in rows: + results.append({ + "file_path": row[0], + "chunk_index": row[1], + "chunk_type": row[2], + "heading_path": row[3], + "content": row[4][:300] + "..." if len(row[4]) > 300 else row[4], + "tags": row[5], + "date": row[6], + "distance": row[7], + }) + + conn.close() + return results + + +# --------------------------------------------------------------------------- +# RAG (Retrieval-Augmented Generation) +# --------------------------------------------------------------------------- + +def rag(query: str, top_k: int = 5) -> str: + """Search vault chunks and generate an answer via local LLM.""" + print(f"[RAG] Retrieving context for: '{query}'") + results = search(query, top_k=top_k) + if not results: + return "[RAG] No relevant chunks found in the vault index." + + # Build context from retrieved chunks + context_parts = [] + for i, r in enumerate(results, 1): + heading = f"Heading: {r['heading_path']}\n" if r['heading_path'] else "" + context_parts.append( + f"[{i}] Source: {r['file_path']}\n" + f"{heading}" + f"Content: {r['content']}\n" + ) + context = "\n".join(context_parts) + + prompt = ( + "You are a helpful assistant with access to a personal knowledge base.\n" + "Answer the user's question based ONLY on the provided context.\n" + "If the context does not contain enough information, say so clearly.\n" + "Keep your answer concise and in the same language as the question.\n\n" + f"--- Context ---\n{context}\n--- End Context ---\n\n" + f"Question: {query}\n\nAnswer:" + ) + + print(f"[RAG] Generating answer with {GENERATE_MODEL} ...") + answer = generate(prompt) + return answer + + +# --------------------------------------------------------------------------- +# Stats +# --------------------------------------------------------------------------- + +def show_stats(): + conn = init_db() + file_count = conn.execute("SELECT COUNT(DISTINCT file_path) FROM vault_chunks").fetchone()[0] + chunk_count = conn.execute("SELECT COUNT(*) FROM vault_chunks").fetchone()[0] + embed_count = conn.execute("SELECT COUNT(*) FROM vec_index").fetchone()[0] + last_index = conn.execute( + "SELECT value FROM index_meta WHERE key = 'last_full_index'" + ).fetchone() + + print("=== Vault Semantic Index Stats ===") + print(f"Files indexed: {file_count}") + print(f"Chunks stored: {chunk_count}") + print(f"Embeddings stored: {embed_count}") + print(f"Last full index: {last_index[0] if last_index else 'N/A'}") + print(f"Database path: {DB_PATH}") + print(f"Embed model: {EMBED_MODEL} ({EMBED_DIM}d)") + + # Chunk type distribution + print("\nChunk type distribution:") + for row in conn.execute( + "SELECT chunk_type, COUNT(*) FROM vault_chunks GROUP BY chunk_type" + ): + print(f" {row[0] or 'unknown'}: {row[1]}") + + conn.close() + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def main(): + import sys + if len(sys.argv) < 2: + print(__doc__) + sys.exit(1) + + cmd = sys.argv[1] + + if cmd == "index": + index_all() + elif cmd == "search": + query = sys.argv[2] if len(sys.argv) > 2 else input("Query: ") + results = search(query) + print(f"\nTop {len(results)} results for: '{query}'\n") + for i, r in enumerate(results, 1): + print(f"--- [{i}] {r['file_path']} (dist={r['distance']:.4f}) ---") + if r['heading_path']: + print(f"Heading: {r['heading_path']}") + print(f"Content: {r['content']}\n") + elif cmd == "rag": + query = sys.argv[2] if len(sys.argv) > 2 else input("Query: ") + answer = rag(query) + print(f"\n=== RAG Answer ===\n{answer}\n") + elif cmd == "stats": + show_stats() + else: + print(__doc__) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/server.json b/server.json index 8c020d8..b297927 100644 --- a/server.json +++ b/server.json @@ -1,9 +1,9 @@ { "$schema": "https://registry.modelcontextprotocol.io/schema/server.json", "name": "io.github.juice094.devbase", - "version": "0.2.3", - "description": "Developer Knowledge OS — manage Git repos, vault notes (Markdown), and assets. AI-native workspace with 19 MCP tools." - "license": "MIT", + "version": "0.20.1", + "description": "Developer Knowledge OS — manage Git repos, vault notes (Markdown), and assets. AI-native workspace with 69 MCP tools.", + "license": "AGPL-3.0-or-later", "homepage": "https://github.com/juice094/devbase", "repository": { "type": "git", @@ -42,23 +42,72 @@ "devkit_sync", "devkit_query", "devkit_index", + "devkit_index_stream", + "devkit_status", "devkit_note", - "devkit_vault_write", - "devkit_vault_backlinks", - "devkit_natural_language_query", - "devkit_github_info" - ], - "experimental": [ "devkit_digest", "devkit_paper_index", "devkit_experiment_log", + "devkit_github_info", + "devkit_arxiv_fetch", "devkit_code_metrics", - "devkit_module_graph" - ] + "devkit_module_graph", + "devkit_code_symbols", + "devkit_dependency_graph", + "devkit_call_graph", + "devkit_dead_code", + "devkit_semantic_search", + "devkit_embedding_store", + "devkit_embedding_search", + "devkit_natural_language_query", + "devkit_vault_write", + "devkit_vault_backlinks", + "devkit_vault_daily", + "devkit_vault_graph", + "devkit_vault_export", + "devkit_vault_history", + "devkit_project_brief", + "devkit_impact_analysis", + "devkit_cross_repo_search", + "devkit_knowledge_report", + "devkit_related_symbols", + "devkit_hybrid_search", + "devkit_search_quality", + "devkit_skill_list", + "devkit_skill_search", + "devkit_skill_run", + "devkit_skill_discover", + "devkit_known_limit_store", + "devkit_known_limit_list", + "devkit_relation_store", + "devkit_relation_query", + "devkit_relation_delete", + "devkit_workflow_list", + "devkit_workflow_run", + "devkit_workflow_status", + "devkit_session_save", + "devkit_session_list", + "devkit_session_resume", + "devkit_session_attach", + "devkit_session_detach", + "devkit_session_activate", + "devkit_session_search", + "devkit_session_capture", + "devkit_session_workflows", + "devkit_session_recall", + "devkit_session_index", + "devkit_session_export", + "devkit_session_import", + "devkit_oplog_query", + "devkit_evaluate", + "devkit_document_convert", + "devkit_index_health" + ], + "experimental": [] } }, "tools": { - "count": 19, + "count": 69, "list": [ "devkit_scan", "devkit_health", @@ -66,19 +115,69 @@ "devkit_query", "devkit_query_repos", "devkit_index", + "devkit_index_stream", + "devkit_status", "devkit_note", "devkit_digest", "devkit_paper_index", "devkit_experiment_log", "devkit_github_info", + "devkit_arxiv_fetch", "devkit_code_metrics", "devkit_module_graph", + "devkit_code_symbols", + "devkit_dependency_graph", + "devkit_call_graph", + "devkit_dead_code", + "devkit_semantic_search", + "devkit_embedding_store", + "devkit_embedding_search", "devkit_natural_language_query", "devkit_vault_search", "devkit_vault_read", "devkit_vault_write", "devkit_vault_backlinks", - "devkit_project_context" + "devkit_vault_daily", + "devkit_vault_graph", + "devkit_vault_export", + "devkit_vault_history", + "devkit_project_context", + "devkit_project_brief", + "devkit_impact_analysis", + "devkit_cross_repo_search", + "devkit_knowledge_report", + "devkit_related_symbols", + "devkit_hybrid_search", + "devkit_search_quality", + "devkit_skill_list", + "devkit_skill_search", + "devkit_skill_run", + "devkit_skill_discover", + "devkit_known_limit_store", + "devkit_known_limit_list", + "devkit_relation_store", + "devkit_relation_query", + "devkit_relation_delete", + "devkit_workflow_list", + "devkit_workflow_run", + "devkit_workflow_status", + "devkit_session_save", + "devkit_session_list", + "devkit_session_resume", + "devkit_session_attach", + "devkit_session_detach", + "devkit_session_activate", + "devkit_session_search", + "devkit_session_capture", + "devkit_session_workflows", + "devkit_session_recall", + "devkit_session_index", + "devkit_session_export", + "devkit_session_import", + "devkit_oplog_query", + "devkit_evaluate", + "devkit_document_convert", + "devkit_index_health" ] } } diff --git a/src/mcp/mod.rs b/src/mcp/mod.rs index 8f91f8b..45a83a0 100644 --- a/src/mcp/mod.rs +++ b/src/mcp/mod.rs @@ -124,6 +124,7 @@ pub enum McpToolEnum { WorkflowStatus(DevkitWorkflowStatusTool), OplogQuery(DevkitOplogQueryTool), Evaluate(DevkitEvaluateTool), + DocumentConvert(DevkitDocumentConvertTool), } /// Stability tier for MCP tools. @@ -220,6 +221,7 @@ impl McpToolEnum { McpToolEnum::WorkflowStatus(_) => ToolTier::Beta, McpToolEnum::OplogQuery(_) => ToolTier::Beta, McpToolEnum::Evaluate(_) => ToolTier::Beta, + McpToolEnum::DocumentConvert(_) => ToolTier::Experimental, } } } @@ -295,6 +297,7 @@ impl McpTool for McpToolEnum { McpToolEnum::WorkflowStatus(t) => t.name(), McpToolEnum::OplogQuery(t) => t.name(), McpToolEnum::Evaluate(t) => t.name(), + McpToolEnum::DocumentConvert(t) => t.name(), } } @@ -368,6 +371,7 @@ impl McpTool for McpToolEnum { McpToolEnum::WorkflowStatus(t) => t.schema(), McpToolEnum::OplogQuery(t) => t.schema(), McpToolEnum::Evaluate(t) => t.schema(), + McpToolEnum::DocumentConvert(t) => t.schema(), } } @@ -445,10 +449,25 @@ impl McpTool for McpToolEnum { McpToolEnum::WorkflowStatus(t) => t.invoke(args, ctx).await, McpToolEnum::OplogQuery(t) => t.invoke(args, ctx).await, McpToolEnum::Evaluate(t) => t.invoke(args, ctx).await, + McpToolEnum::DocumentConvert(t) => t.invoke(args, ctx).await, } } } +/// Long-lived oplog file handle — opened once, reused across all MCP calls. +static OPLOG_FILE: std::sync::OnceLock>> = + std::sync::OnceLock::new(); + +fn get_oplog_file() -> &'static std::sync::Mutex> { + OPLOG_FILE.get_or_init(|| { + let file = dirs::data_local_dir().and_then(|data_dir| { + let log_path = data_dir.join("devbase").join("mcp-oplog.ndjson"); + std::fs::OpenOptions::new().create(true).append(true).open(&log_path).ok() + }); + std::sync::Mutex::new(file) + }) +} + /// Append a single MCP tool invocation record to the oplog file. /// /// Path: `%LOCALAPPDATA%/devbase/mcp-oplog.ndjson` @@ -462,10 +481,8 @@ fn append_mcp_oplog(tool_name: &str, duration_ms: u128, success: bool, error_typ "error_type": error_type, }); - if let Some(data_dir) = dirs::data_local_dir() { - let log_path = data_dir.join("devbase").join("mcp-oplog.ndjson"); - if let Ok(mut file) = std::fs::OpenOptions::new().create(true).append(true).open(&log_path) - { + if let Ok(mut guard) = get_oplog_file().lock() { + if let Some(ref mut file) = *guard { use std::io::Write; if let Err(e) = writeln!(file, "{}", entry) { tracing::warn!("Failed to write MCP oplog: {}", e); @@ -507,20 +524,36 @@ impl McpServer { "id": id, "result": {} })), - "initialize" => Ok(serde_json::json!({ - "jsonrpc": "2.0", - "id": id, - "result": { - "protocolVersion": "2024-11-05", - "capabilities": { - "tools": {} - }, - "serverInfo": { - "name": "devbase", - "version": "0.1.0" - } + "initialize" => { + // Verify client protocol version for compatibility + let client_version = req + .get("params") + .and_then(|p| p.get("protocolVersion")) + .and_then(|v| v.as_str()) + .unwrap_or("unknown"); + let supported = ["2024-11-05"]; + if !supported.contains(&client_version) { + tracing::warn!( + "Client protocol version '{}' not in supported list {:?}; proceeding with 2024-11-05", + client_version, + supported + ); } - })), + Ok(serde_json::json!({ + "jsonrpc": "2.0", + "id": id, + "result": { + "protocolVersion": "2024-11-05", + "capabilities": { + "tools": {} + }, + "serverInfo": { + "name": "devbase", + "version": env!("CARGO_PKG_VERSION") + } + } + })) + } "tools/list" => { let tools: Vec = self .tools @@ -687,7 +720,7 @@ impl McpServer { /// Build an MCP server with optional tier filtering. /// -/// If `tiers` is `None`, all 37 tools are registered (backward compatible). +/// If `tiers` is `None`, all 69 tools are registered (backward compatible). /// If `tiers` is provided, only tools whose tier is in the set are registered. pub fn build_server_with_tiers(tiers: Option<&HashSet>) -> McpServer { let mut server = McpServer::new(); @@ -760,6 +793,7 @@ pub fn build_server_with_tiers(tiers: Option<&HashSet>) -> McpServer { McpToolEnum::WorkflowStatus(DevkitWorkflowStatusTool), McpToolEnum::OplogQuery(DevkitOplogQueryTool), McpToolEnum::Evaluate(DevkitEvaluateTool), + McpToolEnum::DocumentConvert(DevkitDocumentConvertTool), ]; for tool in all_tools { if let Some(allowed) = tiers diff --git a/src/mcp/tests.rs b/src/mcp/tests.rs index 882d6ab..653463b 100644 --- a/src/mcp/tests.rs +++ b/src/mcp/tests.rs @@ -11,6 +11,17 @@ fn test_ctx() -> (crate::storage::AppContext, tempfile::TempDir) { (ctx, tmp) } +/// Lightweight helper: seed a single repo into the entities table. +fn seed_repo(ctx: &crate::storage::AppContext, id: &str, lang: &str) { + let conn = ctx.conn().unwrap(); + let now = chrono::Utc::now().to_rfc3339(); + conn.execute( + "INSERT INTO entities (id, entity_type, name, local_path, metadata, created_at, updated_at, language, discovered_at, workspace_type, data_tier, stars) + VALUES (?1, 'repo', ?2, ?3, ?4, ?5, ?5, ?6, ?5, 'git', 'private', 0)", + rusqlite::params![id, id, format!("/tmp/{}", id), "{}", &now, lang], + ).unwrap(); +} + #[tokio::test] async fn test_initialize() { let server = build_server(); @@ -39,7 +50,7 @@ async fn test_tools_list() { let (mut ctx, _tmp) = test_ctx(); let resp = server.handle_request(req, &mut ctx).await.unwrap(); let tools = resp.get("result").unwrap().get("tools").unwrap().as_array().unwrap(); - assert_eq!(tools.len(), 68); + assert_eq!(tools.len(), 69); let names: Vec<&str> = tools.iter().map(|t| t.get("name").unwrap().as_str().unwrap()).collect(); assert!(names.contains(&"devkit_index_health")); assert!(names.contains(&"devkit_vault_export")); @@ -53,6 +64,7 @@ async fn test_tools_list() { assert!(names.contains(&"devkit_session_export")); assert!(names.contains(&"devkit_session_import")); assert!(names.contains(&"devkit_evaluate")); + assert!(names.contains(&"devkit_document_convert")); assert!(names.contains(&"devkit_scan")); assert!(names.contains(&"devkit_health")); assert!(names.contains(&"devkit_sync")); @@ -217,6 +229,156 @@ async fn test_tools_call_devkit_project_context() { assert!(parsed.get("assets").unwrap().as_array().unwrap().is_empty()); } +#[tokio::test] +async fn test_tools_call_devkit_query_repos() { + let server = build_server(); + let (mut ctx, _tmp) = test_ctx(); + + // 1. Empty registry returns empty results + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 10, + "method": "tools/call", + "params": { + "name": "devkit_query_repos", + "arguments": { "language": "" } + } + }); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert_eq!(parsed.get("count").unwrap().as_i64().unwrap(), 0); + + // 2. Seeded repo is returned with correct filtering + seed_repo(&ctx, "test-repo", "rust"); + let req2 = serde_json::json!({ + "jsonrpc": "2.0", + "id": 11, + "method": "tools/call", + "params": { + "name": "devkit_query_repos", + "arguments": { "language": "rust" } + } + }); + let resp2 = server.handle_request(req2, &mut ctx).await.unwrap(); + let result2 = resp2.get("result").unwrap(); + let content2 = result2.get("content").unwrap().as_array().unwrap(); + let text2 = content2[0].get("text").unwrap().as_str().unwrap(); + let parsed2: serde_json::Value = serde_json::from_str(text2).unwrap(); + assert_eq!(parsed2.get("success").unwrap(), true); + let repos = parsed2.get("repos").unwrap().as_array().unwrap(); + assert_eq!(repos.len(), 1); + assert_eq!(repos[0].get("id").unwrap().as_str().unwrap(), "test-repo"); + assert_eq!(repos[0].get("language").unwrap().as_str().unwrap(), "rust"); +} + +#[tokio::test] +async fn test_tools_call_devkit_vault_search() { + let server = build_server(); + let (mut ctx, _tmp) = test_ctx(); + + // Setup: create vault note and scan + let ws = ctx.storage.workspace_dir().unwrap(); + let vault_dir = ws.join("vault"); + std::fs::create_dir_all(&vault_dir).unwrap(); + std::fs::write( + vault_dir.join("test-note.md"), + "---\ntitle: Test Note\ntags: [test, vault]\n---\n\nThis is a test note for vault search.\n", + ).unwrap(); + let mut conn = ctx.conn().unwrap(); + crate::vault::scanner::scan_vault(&mut conn, Some(&vault_dir)).unwrap(); + + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 12, + "method": "tools/call", + "params": { + "name": "devkit_vault_search", + "arguments": { "query": "test note" } + } + }); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + let notes = parsed.get("notes").unwrap().as_array().unwrap(); + assert!(!notes.is_empty(), "vault_search should find the test-note"); + assert!( + notes + .iter() + .any(|n| n.get("title").and_then(|v| v.as_str()) == Some("Test Note")), + "vault_search should return Test Note" + ); +} + +#[tokio::test] +async fn test_tools_call_devkit_vault_read() { + let server = build_server(); + let (mut ctx, _tmp) = test_ctx(); + + // Setup: create vault note and scan + let ws = ctx.storage.workspace_dir().unwrap(); + let vault_dir = ws.join("vault"); + std::fs::create_dir_all(&vault_dir).unwrap(); + let note_path = vault_dir.join("test-read.md"); + std::fs::write( + ¬e_path, + "---\ntitle: Readable Note\ntags: [read]\n---\n\nContent body here.\n", + ) + .unwrap(); + let mut conn = ctx.conn().unwrap(); + crate::vault::scanner::scan_vault(&mut conn, Some(&vault_dir)).unwrap(); + + // 1. Read existing note by absolute path + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 13, + "method": "tools/call", + "params": { + "name": "devkit_vault_read", + "arguments": { "path": note_path.to_str().unwrap() } + } + }); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert_eq!(parsed.get("path").unwrap().as_str().unwrap(), note_path.to_str().unwrap()); + let frontmatter = parsed.get("frontmatter").unwrap().as_str().unwrap(); + assert!(frontmatter.contains("title: Readable Note")); + let body = parsed.get("content").unwrap().as_str().unwrap(); + assert!(body.contains("Content body here.")); + + // 2. Read non-existent note returns error + let req2 = serde_json::json!({ + "jsonrpc": "2.0", + "id": 14, + "method": "tools/call", + "params": { + "name": "devkit_vault_read", + "arguments": { "path": "/nonexistent/path/note.md" } + } + }); + let resp2 = server.handle_request(req2, &mut ctx).await.unwrap(); + let result2 = resp2.get("result").unwrap(); + assert_eq!(result2.get("isError").unwrap(), true); + let content2 = result2.get("content").unwrap().as_array().unwrap(); + let text2 = content2[0].get("text").unwrap().as_str().unwrap(); + let parsed2: serde_json::Value = serde_json::from_str(text2).unwrap(); + assert_eq!(parsed2.get("success").unwrap(), false); + assert!( + parsed2.get("error").unwrap().as_str().unwrap().contains("not found") + || parsed2.get("error").unwrap().as_str().unwrap().contains("unreadable") + ); +} + #[tokio::test] async fn test_tools_call_devkit_arxiv_fetch() { let server = build_server(); @@ -240,6 +402,78 @@ async fn test_tools_call_devkit_arxiv_fetch() { assert!(!parsed.get("error").unwrap().as_str().unwrap().is_empty()); } +#[tokio::test] +async fn test_tools_call_devkit_status() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 15, + "method": "tools/call", + "params": { + "name": "devkit_status", + "arguments": {} + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + // Empty registry → overall "fresh" (vacuous truth: all 0 repos are fresh) + assert_eq!(parsed.get("overall").unwrap().as_str().unwrap(), "fresh"); + let repos = parsed.get("repos").unwrap().as_array().unwrap(); + assert!(repos.is_empty()); +} + +#[tokio::test] +async fn test_tools_call_devkit_workflow_list() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 16, + "method": "tools/call", + "params": { + "name": "devkit_workflow_list", + "arguments": {} + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + assert_eq!(parsed.get("count").unwrap().as_i64().unwrap(), 0); + let workflows = parsed.get("workflows").unwrap().as_array().unwrap(); + assert!(workflows.is_empty()); +} + +#[tokio::test] +async fn test_tools_call_devkit_index() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 17, + "method": "tools/call", + "params": { + "name": "devkit_index", + "arguments": { "path": "" } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), true); + // Empty registry → indexed 0 repos + assert_eq!(parsed.get("indexed").unwrap().as_i64().unwrap(), 0); +} + #[tokio::test] async fn test_tools_call_devkit_skill_list() { let server = build_server(); @@ -663,6 +897,29 @@ async fn test_scenario_one_project_onboarding() { ); } +#[tokio::test] +async fn test_tools_call_devkit_document_convert_not_found() { + let server = build_server(); + let req = serde_json::json!({ + "jsonrpc": "2.0", + "id": 18, + "method": "tools/call", + "params": { + "name": "devkit_document_convert", + "arguments": { "source_path": "/nonexistent/file.pdf" } + } + }); + let (mut ctx, _tmp) = test_ctx(); + let resp = server.handle_request(req, &mut ctx).await.unwrap(); + let result = resp.get("result").unwrap(); + let content = result.get("content").unwrap().as_array().unwrap(); + let text = content[0].get("text").unwrap().as_str().unwrap(); + let parsed: serde_json::Value = serde_json::from_str(text).unwrap(); + assert_eq!(parsed.get("success").unwrap(), false); + let err = parsed.get("error").unwrap().as_str().unwrap(); + assert!(err.contains("not found") || err.contains("Source file")); +} + #[tokio::test] async fn test_scenario_two_semantic_exploration() { let backend = std::sync::Arc::new(crate::storage::TempStorageBackend::new()); diff --git a/src/mcp/tools/brief.rs b/src/mcp/tools/brief.rs index 08a94f0..5fe6385 100644 --- a/src/mcp/tools/brief.rs +++ b/src/mcp/tools/brief.rs @@ -14,12 +14,28 @@ impl McpTool for DevkitProjectBriefTool { fn schema(&self) -> serde_json::Value { serde_json::json!({ - "description": "Generate a Markdown project brief optimized for LLM context injection.", + "description": r#"Generate a Markdown project brief optimized for LLM context injection. + +Use this when the user wants to: +- Get a compact, AI-friendly summary of a registered repository +- Provide another AI agent with context about a project before delegating work +- Generate a README-like overview from indexed code symbols and metadata + +Do NOT use this for: +- Real-time repo status (use devkit_health instead) +- Detailed code analysis (use devkit_code_metrics or devkit_module_graph instead) +- Searching the codebase (use devkit_semantic_search or devkit_hybrid_search instead) + +Parameters: +- repo_id: Registered repository ID to generate the brief for. +- max_tokens: Approximate token budget for the generated brief (default 2000). + +Returns: JSON with repo_id and brief markdown string."#, "inputSchema": { "type": "object", "properties": { - "repo_id": { "type": "string" }, - "max_tokens": { "type": "integer", "default": 2000 } + "repo_id": { "type": "string", "description": "Registered repository ID to generate the brief for" }, + "max_tokens": { "type": "integer", "default": 2000, "description": "Approximate token budget for the generated brief" } }, "required": ["repo_id"] } diff --git a/src/mcp/tools/context.rs b/src/mcp/tools/context.rs index 73ece4d..8e2370c 100644 --- a/src/mcp/tools/context.rs +++ b/src/mcp/tools/context.rs @@ -37,7 +37,9 @@ Returns: JSON object with: - vault_notes: array of linked and keyword-matched notes (id, title, source: "link" or "search") - assets: array of files/folders from the project's assets directory - relations: array of knowledge-graph relations (from relations table) linking this entity to others - - workflows: array of recent workflow executions for this repo"#, + - workflows: array of recent workflow executions for this repo + - known_limits: array of unmitigated known limits (L3 risk layer entries) + - skills: array of available devbase skills"#, "inputSchema": { "type": "object", "properties": { @@ -397,7 +399,47 @@ Returns: JSON object with: } } - anyhow::Ok((repo_json, linked_vaults, modules, symbols, calls, assets, activity, related_symbols, relations, workflows, recent_commits, hot_files)) + // 12. Known limits (unmitigated — highest relevance for active projects) + let mut known_limits = Vec::new(); + match crate::registry::known_limits::list_known_limits(&conn, None, Some(false)) { + Ok(limits) => { + for limit in limits.into_iter().take(20) { + known_limits.push(serde_json::json!({ + "id": limit.id, + "category": limit.category, + "description": limit.description, + "severity": limit.severity, + "source": limit.source, + "first_seen_at": limit.first_seen_at.to_rfc3339(), + })); + } + } + Err(e) => { + tracing::warn!("list_known_limits failed: {}", e); + } + } + + // 13. Available skills (top 20) + let mut skills = Vec::new(); + match crate::skill_runtime::registry::list_skills(&conn, None, None) { + Ok(skill_rows) => { + for s in skill_rows.into_iter().take(20) { + skills.push(serde_json::json!({ + "id": s.id, + "name": s.name, + "version": s.version, + "skill_type": s.skill_type.as_str(), + "description": s.description, + "tags": s.tags, + })); + } + } + Err(e) => { + tracing::warn!("list_skills failed: {}", e); + } + } + + anyhow::Ok((repo_json, linked_vaults, modules, symbols, calls, assets, activity, related_symbols, relations, workflows, recent_commits, hot_files, known_limits, skills)) } }) .await @@ -416,6 +458,8 @@ Returns: JSON object with: workflows, recent_commits, hot_files, + known_limits, + skills, ) = result; Ok(serde_json::json!({ @@ -433,6 +477,8 @@ Returns: JSON object with: "assets": assets, "recent_commits": recent_commits, "hot_files": hot_files, + "known_limits": known_limits, + "skills": skills, })) } } diff --git a/src/mcp/tools/document_convert.rs b/src/mcp/tools/document_convert.rs new file mode 100644 index 0000000..f2c2ef4 --- /dev/null +++ b/src/mcp/tools/document_convert.rs @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2026 juice094 +use crate::mcp::McpTool; +use crate::storage::AppContext; +use anyhow::Context; +use std::path::Path; + +#[derive(Clone)] +pub struct DevkitDocumentConvertTool; + +impl McpTool for DevkitDocumentConvertTool { + fn name(&self) -> &'static str { + "devkit_document_convert" + } + + fn schema(&self) -> serde_json::Value { + serde_json::json!({ + "description": r#"Convert PDF/PPTX documents to Markdown text. + +Use this when the user wants to: +- Extract text content from course materials, papers, or slides +- Convert binary documents into editable Markdown for the Vault +- Bulk-process downloaded files before organizing them + +Supported formats: +- PDF (via pdftotext) +- PPTX (via python-pptx) + +Parameters: +- source_path: Absolute path to the source document +- output_path: Optional absolute path for the output Markdown file. Defaults to source_path with .md extension. + +Returns: JSON with output_path, extracted character count, and a quality hint (good / poor)."#, + "inputSchema": { + "type": "object", + "properties": { + "source_path": { "type": "string", "description": "Absolute path to the source document" }, + "output_path": { "type": "string", "description": "Optional absolute path for output Markdown" } + }, + "required": ["source_path"] + } + }) + } + + async fn invoke( + &self, + args: serde_json::Value, + _ctx: &mut AppContext, + ) -> anyhow::Result { + let source_path = args + .get("source_path") + .and_then(|v| v.as_str()) + .context("Missing required argument: source_path")?; + + let source = Path::new(source_path); + anyhow::ensure!(source.exists(), "Source file not found: {}", source_path); + + let output_path = args + .get("output_path") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()) + .unwrap_or_else(|| source.with_extension("md").to_string_lossy().to_string()); + + let ext = source.extension().and_then(|e| e.to_str()).unwrap_or("").to_lowercase(); + + let (text, quality) = match ext.as_str() { + "pdf" => extract_pdf(source_path).await?, + "pptx" | "ppt" => extract_pptx(source_path).await?, + other => anyhow::bail!("Unsupported file format: '{}' (supported: pdf, pptx)", other), + }; + + let cleaned = cleanup_extracted_text(&text); + let frontmatter = + format!("---\nsource: \"{}\"\nextract_quality: \"{}\"\n---\n\n", source_path, quality); + let md_content = format!("{}{}", frontmatter, cleaned); + + std::fs::write(&output_path, md_content) + .with_context(|| format!("Failed to write output: {}", output_path))?; + + Ok(serde_json::json!({ + "success": true, + "output_path": output_path, + "extracted_chars": text.len(), + "quality": quality, + })) + } +} + +async fn extract_pdf(path: &str) -> anyhow::Result<(String, &'static str)> { + let output = tokio::process::Command::new("pdftotext") + .args(["-", path, "-"]) // read from file, write to stdout + .output() + .await + .context("Failed to spawn pdftotext — is poppler installed?")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!("pdftotext failed: {}", stderr); + } + + let text = String::from_utf8_lossy(&output.stdout).to_string(); + // Heuristic: if output is very short relative to file size, quality is poor + let quality = if text.len() < 200 { "poor" } else { "good" }; + Ok((text, quality)) +} + +async fn extract_pptx(path: &str) -> anyhow::Result<(String, &'static str)> { + let script = format!( + r###" +from pptx import Presentation +import sys +prs = Presentation(r'{}') +lines = [] +for i, slide in enumerate(prs.slides, 1): + lines.append(f"## Slide {{i}}") + for shape in slide.shapes: + if hasattr(shape, "text") and shape.text.strip(): + lines.append(shape.text.strip()) + lines.append("") +print("\n".join(lines)) +"###, + path.replace('\\', "/") + ); + + let output = tokio::process::Command::new("python") + .arg("-c") + .arg(&script) + .output() + .await + .context("Failed to spawn python — is python-pptx installed?")?; + + if !output.status.success() { + let stderr = String::from_utf8_lossy(&output.stderr); + anyhow::bail!("python-pptx extraction failed: {}", stderr); + } + + let text = String::from_utf8_lossy(&output.stdout).to_string(); + let quality = if text.len() < 100 { "poor" } else { "good" }; + Ok((text, quality)) +} + +fn cleanup_extracted_text(text: &str) -> String { + // Collapse 3+ consecutive blank lines to 2 + let mut result = String::new(); + let mut blank_count = 0; + for line in text.lines() { + if line.trim().is_empty() { + blank_count += 1; + if blank_count <= 2 { + result.push('\n'); + } + } else { + blank_count = 0; + result.push_str(line); + result.push('\n'); + } + } + result.trim().to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cleanup_extracted_text() { + let input = "line1\n\n\n\n\nline2\n\nline3"; + let out = cleanup_extracted_text(input); + assert_eq!(out, "line1\n\n\nline2\n\nline3"); + } + + #[test] + fn test_name() { + let tool = DevkitDocumentConvertTool; + assert_eq!(tool.name(), "devkit_document_convert"); + } +} diff --git a/src/mcp/tools/external.rs b/src/mcp/tools/external.rs index 9a84a99..c0b14e6 100644 --- a/src/mcp/tools/external.rs +++ b/src/mcp/tools/external.rs @@ -83,9 +83,7 @@ Returns: JSON with stars, forks, open_issues, description, pushed_at, and update let status = resp.status(); if !status.is_success() { let text = resp.text().await.unwrap_or_default(); - return Ok( - serde_json::json!({ "success": false, "error": format!("GitHub API error {}: {}", status, text) }), - ); + anyhow::bail!("GitHub API error {}: {}", status, text); } let data: serde_json::Value = resp.json().await?; @@ -162,10 +160,7 @@ impl McpTool for DevkitArxivFetchTool { "published": m.published, "primary_category": m.primary_category, })), - Err(e) => Ok(serde_json::json!({ - "success": false, - "error": e.to_string(), - })), + Err(e) => anyhow::bail!("arXiv fetch error: {}", e), } } } diff --git a/src/mcp/tools/impact.rs b/src/mcp/tools/impact.rs index ffed655..38982db 100644 --- a/src/mcp/tools/impact.rs +++ b/src/mcp/tools/impact.rs @@ -14,11 +14,28 @@ impl McpTool for DevkitImpactAnalysisTool { fn schema(&self) -> serde_json::Value { serde_json::json!({ - "description": "Analyze the impact of modifying a specific code symbol. Returns callers, callees, related symbols, potentially affected tests, and recent change history. Use this before refactoring to understand blast radius.", + "description": r#"Analyze the impact of modifying a specific code symbol. Returns callers, callees, related symbols, potentially affected tests, and recent change history. + +Use this when the user wants to: +- Understand the blast radius before refactoring a function or struct +- Identify all call sites that would break if a symbol's signature changes +- Find related symbols that share logic or data structures with the target + +Do NOT use this for: +- General code search (use devkit_semantic_search or devkit_hybrid_search instead) +- Full call graph exploration (use devkit_call_graph for the complete graph) +- Cross-repo dependency analysis (use devkit_dependency_graph instead) + +Parameters: +- repo_id: Registered repository ID containing the symbol. +- symbol_name: Function, struct, or trait name to analyze. +- depth: Call graph traversal depth 1-3 (default: 2). + +Returns: JSON with callers, callees, related_symbols, affected_tests, and change_history."#, "inputSchema": { "type": "object", "properties": { - "repo_id": { "type": "string" }, + "repo_id": { "type": "string", "description": "Registered repository ID containing the symbol to analyze" }, "symbol_name": { "type": "string", "description": "Function, struct, or trait name to analyze" }, "depth": { "type": "integer", "default": 2, "description": "Call graph traversal depth (1-3)" } }, diff --git a/src/mcp/tools/index_health.rs b/src/mcp/tools/index_health.rs index 1988fee..4ab0933 100644 --- a/src/mcp/tools/index_health.rs +++ b/src/mcp/tools/index_health.rs @@ -165,7 +165,10 @@ pub fn run_index_health(ctx: &mut AppContext, repair: bool) -> anyhow::Result anyhow::Result { - let id = args.get("id").and_then(|v| v.as_str()).unwrap_or("").to_string(); - let category = args.get("category").and_then(|v| v.as_str()).unwrap_or("").to_string(); - let description = - args.get("description").and_then(|v| v.as_str()).unwrap_or("").to_string(); + let id = args.get("id").and_then(|v| v.as_str()).context("id is required")?.to_string(); + let category = args + .get("category") + .and_then(|v| v.as_str()) + .context("category is required")? + .to_string(); + let description = args + .get("description") + .and_then(|v| v.as_str()) + .context("description is required")? + .to_string(); let source = args.get("source").and_then(|v| v.as_str()).map(|s| s.to_string()); let severity = args.get("severity").and_then(|v| v.as_i64()).map(|i| i as i32); if id.is_empty() || category.is_empty() || description.is_empty() { - return Ok(serde_json::json!({ - "success": false, - "error": "id, category, and description are required" - })); + anyhow::bail!("id, category, and description must not be empty"); } let limit = KnownLimit { diff --git a/src/mcp/tools/mod.rs b/src/mcp/tools/mod.rs index 3bcf083..cea8769 100644 --- a/src/mcp/tools/mod.rs +++ b/src/mcp/tools/mod.rs @@ -2,6 +2,7 @@ // Copyright (c) 2026 juice094 pub mod brief; pub mod context; +pub mod document_convert; pub mod evaluate; pub mod impact; pub mod index_health; @@ -23,6 +24,7 @@ pub mod search; pub use brief::*; pub use context::*; +pub use document_convert::*; pub use impact::*; pub use index_health::*; pub use known_limit::*; diff --git a/src/mcp/tools/relations.rs b/src/mcp/tools/relations.rs index cf88238..a9df484 100644 --- a/src/mcp/tools/relations.rs +++ b/src/mcp/tools/relations.rs @@ -2,6 +2,7 @@ // Copyright (c) 2026 juice094 use crate::clients::RegistryClient; use crate::mcp::McpTool; +use anyhow::Context; #[derive(Clone)] pub struct DevkitRelationStoreTool; @@ -30,10 +31,10 @@ Returns: success boolean and relation details."#, "inputSchema": { "type": "object", "properties": { - "from_entity_id": { "type": "string" }, - "to_entity_id": { "type": "string" }, - "relation_type": { "type": "string" }, - "confidence": { "type": "number" } + "from_entity_id": { "type": "string", "description": "Source entity ID (e.g., repo ID, symbol ID, or entity UUID)" }, + "to_entity_id": { "type": "string", "description": "Target entity ID" }, + "relation_type": { "type": "string", "description": "Relationship label (e.g., 'depends_on', 'similar_to', 'calls', 'extends')" }, + "confidence": { "type": "number", "description": "Confidence score 0.0–1.0 (default 1.0)", "minimum": 0.0, "maximum": 1.0 } }, "required": ["from_entity_id", "to_entity_id", "relation_type"] } @@ -48,55 +49,41 @@ Returns: success boolean and relation details."#, let from = args .get("from_entity_id") .and_then(|v| v.as_str()) - .unwrap_or("") + .context("from_entity_id is required")? .trim() .to_string(); let to = args .get("to_entity_id") .and_then(|v| v.as_str()) - .unwrap_or("") + .context("to_entity_id is required")? .trim() .to_string(); let rel_type = args .get("relation_type") .and_then(|v| v.as_str()) - .unwrap_or("") + .context("relation_type is required")? .trim() .to_string(); let confidence = args.get("confidence").and_then(|v| v.as_f64()).unwrap_or(1.0); if from.is_empty() || to.is_empty() || rel_type.is_empty() { - return Ok(serde_json::json!({ - "success": false, - "error": "from_entity_id, to_entity_id, and relation_type are required" - })); + anyhow::bail!("from_entity_id, to_entity_id, and relation_type must not be empty"); } if !(0.0..=1.0).contains(&confidence) { - return Ok(serde_json::json!({ - "success": false, - "error": "confidence must be between 0.0 and 1.0" - })); + anyhow::bail!("confidence must be between 0.0 and 1.0"); } if from == to { - return Ok(serde_json::json!({ - "success": false, - "error": "self-relations (from_entity_id == to_entity_id) are not allowed" - })); + anyhow::bail!("self-relations (from_entity_id == to_entity_id) are not allowed"); } - if let Err(e) = ctx.save_relation(&from, &to, &rel_type, confidence) { + ctx.save_relation(&from, &to, &rel_type, confidence).map_err(|e| { let msg = e.to_string(); if msg.contains("foreign key constraint") || msg.contains("FOREIGN KEY") { - return Ok(serde_json::json!({ - "success": false, - "error": format!("Entity not found in registry. Ensure both '{}' and '{}' exist as registered entities.", from, to) - })); + anyhow::anyhow!("Entity not found in registry. Ensure both '{from}' and '{to}' exist as registered entities.") + } else { + e } - return Ok(serde_json::json!({ - "success": false, - "error": msg - })); - } + })?; Ok(serde_json::json!({ "success": true, @@ -148,16 +135,17 @@ Returns: JSON array of relations with to_entity_id, relation_type, confidence, a args: serde_json::Value, ctx: &mut crate::storage::AppContext, ) -> anyhow::Result { - let entity_id = - args.get("entity_id").and_then(|v| v.as_str()).unwrap_or("").trim().to_string(); + let entity_id = args + .get("entity_id") + .and_then(|v| v.as_str()) + .context("entity_id is required")? + .trim() + .to_string(); let relation_type = args.get("relation_type").and_then(|v| v.as_str()); let direction = args.get("direction").and_then(|v| v.as_str()).unwrap_or("outgoing"); if entity_id.is_empty() { - return Ok(serde_json::json!({ - "success": false, - "error": "entity_id is required" - })); + anyhow::bail!("entity_id must not be empty"); } let value = ctx.query_relations(&entity_id, direction, relation_type)?; @@ -186,18 +174,27 @@ impl McpTool for DevkitRelationDeleteTool { serde_json::json!({ "description": r#"Delete a relation between two entities from the devbase registry. +Use this when the user wants to: +- Remove an incorrect or outdated relationship from the knowledge graph +- Clean up stale dependency links after refactoring +- Undo a previous devkit_relation_store operation + +Do NOT use this for: +- Deleting entities themselves (relations only — entities must be removed separately) +- Bulk graph cleanup (query first with devkit_relation_query, then delete selectively) + Parameters: -- from_entity_id: Source entity ID -- to_entity_id: Target entity ID -- relation_type: Relationship label (optional — if omitted, deletes all relations between the two entities) +- from_entity_id: Source entity ID. +- to_entity_id: Target entity ID. +- relation_type: Optional relationship label. If omitted, deletes all relations between the two entities. Returns: success boolean and count of deleted relations."#, "inputSchema": { "type": "object", "properties": { - "from_entity_id": { "type": "string" }, - "to_entity_id": { "type": "string" }, - "relation_type": { "type": "string" } + "from_entity_id": { "type": "string", "description": "Source entity ID" }, + "to_entity_id": { "type": "string", "description": "Target entity ID" }, + "relation_type": { "type": "string", "description": "Optional relationship label — if omitted, deletes all relations between the two entities" } }, "required": ["from_entity_id", "to_entity_id"] } @@ -212,23 +209,20 @@ Returns: success boolean and count of deleted relations."#, let from = args .get("from_entity_id") .and_then(|v| v.as_str()) - .unwrap_or("") + .context("from_entity_id is required")? .trim() .to_string(); let to = args .get("to_entity_id") .and_then(|v| v.as_str()) - .unwrap_or("") + .context("to_entity_id is required")? .trim() .to_string(); let rel_type = args.get("relation_type").and_then(|v| v.as_str()).map(|s| s.trim().to_string()); if from.is_empty() || to.is_empty() { - return Ok(serde_json::json!({ - "success": false, - "error": "from_entity_id and to_entity_id are required" - })); + anyhow::bail!("from_entity_id and to_entity_id must not be empty"); } let value = @@ -326,9 +320,8 @@ mod tests { serde_json::json!({"from_entity_id": "", "to_entity_id": "b", "relation_type": ""}), &mut ctx, ) - .await - .unwrap(); - assert_eq!(result.get("success").and_then(|v| v.as_bool()), Some(false)); + .await; + assert!(result.is_err(), "empty required fields should return an error"); } #[tokio::test] diff --git a/src/mcp/tools/session.rs b/src/mcp/tools/session.rs index 63e5628..a65446f 100644 --- a/src/mcp/tools/session.rs +++ b/src/mcp/tools/session.rs @@ -4,6 +4,7 @@ use crate::mcp::McpTool; use crate::storage::AppContext; +use anyhow::Context; use serde_json::json; #[derive(Clone)] @@ -265,10 +266,7 @@ Use this when the user wants to: "linked_count": linked_json.len(), })) } - None => Ok(json!({ - "success": false, - "error": format!("Session '{}' not found", context_id) - })), + None => anyhow::bail!("Session '{}' not found", context_id), } } } @@ -852,8 +850,8 @@ Returns: exported content string."#, "inputSchema": { "type": "object", "properties": { - "context_id": { "type": "string" }, - "format": { "type": "string", "enum": ["markdown", "json"], "default": "markdown" } + "context_id": { "type": "string", "description": "Session ID to export" }, + "format": { "type": "string", "enum": ["markdown", "json"], "default": "markdown", "description": "Export format: markdown (default) or json" } }, "required": ["context_id"] } @@ -959,9 +957,9 @@ Returns: import summary."#, "inputSchema": { "type": "object", "properties": { - "context_id": { "type": "string" }, - "content": { "type": "string" }, - "default_type": { "type": "string", "default": "note" } + "context_id": { "type": "string", "description": "Target session ID (created if not exists)" }, + "content": { "type": "string", "description": "Text block to parse for memory entries" }, + "default_type": { "type": "string", "default": "note", "description": "Memory type for lines without a [type] prefix" } }, "required": ["context_id", "content"] } @@ -973,8 +971,12 @@ Returns: import summary."#, args: serde_json::Value, ctx: &mut AppContext, ) -> anyhow::Result { - let context_id = args.get("context_id").and_then(|v| v.as_str()).unwrap_or(""); - let content = args.get("content").and_then(|v| v.as_str()).unwrap_or(""); + let context_id = args + .get("context_id") + .and_then(|v| v.as_str()) + .context("context_id is required")?; + let content = + args.get("content").and_then(|v| v.as_str()).context("content is required")?; let default_type = args.get("default_type").and_then(|v| v.as_str()).unwrap_or("note"); if context_id.is_empty() { anyhow::bail!("Missing required argument: context_id"); diff --git a/src/mcp/tools/workflow.rs b/src/mcp/tools/workflow.rs index a3a4c1d..f171756 100644 --- a/src/mcp/tools/workflow.rs +++ b/src/mcp/tools/workflow.rs @@ -2,6 +2,7 @@ // Copyright (c) 2026 juice094 use crate::clients::WorkflowClient; use crate::mcp::McpTool; +use anyhow::Context; #[derive(Clone)] pub struct DevkitWorkflowListTool; @@ -18,7 +19,12 @@ impl McpTool for DevkitWorkflowListTool { Use this when the user wants to: - See what automation workflows are available - Choose a workflow to run -- Audit workflow inventory +- Audit workflow inventory before creating new workflows + +Do NOT use this for: +- Running a workflow (use devkit_workflow_run instead) +- Checking workflow execution status (use devkit_workflow_status instead) +- Discovering skills (use devkit_skill_list instead) Parameters: none @@ -64,8 +70,8 @@ Returns: execution summary with status, step results, and execution_id."#, "inputSchema": { "type": "object", "properties": { - "workflow_id": { "type": "string" }, - "inputs": { "type": "object" } + "workflow_id": { "type": "string", "description": "ID of the workflow to run (from devkit_workflow_list)" }, + "inputs": { "type": "object", "description": "Optional JSON object of input key-value pairs" } }, "required": ["workflow_id"] } @@ -77,15 +83,15 @@ Returns: execution summary with status, step results, and execution_id."#, args: serde_json::Value, ctx: &mut crate::storage::AppContext, ) -> anyhow::Result { - let workflow_id = - args.get("workflow_id").and_then(|v| v.as_str()).unwrap_or("").to_string(); + let workflow_id = args + .get("workflow_id") + .and_then(|v| v.as_str()) + .context("workflow_id is required")? + .to_string(); let inputs_value = args.get("inputs").cloned().unwrap_or(serde_json::json!({})); if workflow_id.is_empty() { - return Ok(serde_json::json!({ - "success": false, - "error": "workflow_id is required" - })); + anyhow::bail!("workflow_id must not be empty"); } ctx.run_workflow(&workflow_id, inputs_value) @@ -128,13 +134,13 @@ Returns: execution record with status, current_step, timestamps, and duration."# args: serde_json::Value, ctx: &mut crate::storage::AppContext, ) -> anyhow::Result { - let exec_id = args.get("execution_id").and_then(|v| v.as_i64()).unwrap_or(0); + let exec_id = args + .get("execution_id") + .and_then(|v| v.as_i64()) + .context("execution_id is required")?; if exec_id <= 0 { - return Ok(serde_json::json!({ - "success": false, - "error": "execution_id must be a positive integer" - })); + anyhow::bail!("execution_id must be a positive integer"); } ctx.get_execution(exec_id) @@ -190,8 +196,8 @@ mod tests { let mut ctx = crate::storage::AppContext::with_storage(backend).unwrap(); let tool = DevkitWorkflowStatusTool; - let result = tool.invoke(serde_json::json!({"execution_id": -1}), &mut ctx).await.unwrap(); - assert_eq!(result.get("success").and_then(|v| v.as_bool()), Some(false)); + let result = tool.invoke(serde_json::json!({"execution_id": -1}), &mut ctx).await; + assert!(result.is_err(), "negative execution_id should return an error"); } /// End-to-end: 3-step DAG workflow (a -> b -> c) executed via MCP tool. diff --git a/src/registry.rs b/src/registry.rs index 4f54562..7c2916b 100644 --- a/src/registry.rs +++ b/src/registry.rs @@ -122,9 +122,9 @@ impl Default for WorkspaceRegistry { } } -pub use devbase_registry_health::HealthEntry; -pub use devbase_registry_metrics::CodeMetrics; -pub use devbase_registry_workspace::{OplogEntry, OplogEventType, WorkspaceSnapshot}; +pub use devbase_registry::health::HealthEntry; +pub use devbase_registry::metrics::CodeMetrics; +pub use devbase_registry::workspace::{OplogEntry, OplogEventType, WorkspaceSnapshot}; pub mod entity; pub mod relation; diff --git a/src/registry/call_graph.rs b/src/registry/call_graph.rs index 47994ba..4b1e7cb 100644 --- a/src/registry/call_graph.rs +++ b/src/registry/call_graph.rs @@ -1,5 +1,3 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 -// RE-EXPORT ONLY — 实现已迁移至 devbase-registry-call-graph crate. -// 禁止在本文件中添加新代码。 -pub use devbase_registry_call_graph::*; +pub use devbase_registry::call_graph::*; diff --git a/src/registry/code_symbols.rs b/src/registry/code_symbols.rs index 03f5a6f..e2324c6 100644 --- a/src/registry/code_symbols.rs +++ b/src/registry/code_symbols.rs @@ -1,8 +1,8 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 // Re-export the external crate type for backward compatibility (src/repository/symbol.rs). -pub use devbase_registry_code_symbols::CodeSymbol; +pub use devbase_registry::code_symbols::CodeSymbol; /// A single code symbol from the `code_symbols` table (RegistryClient variant). #[derive(Debug, Clone)] diff --git a/src/registry/dead_code.rs b/src/registry/dead_code.rs index d66384f..372c97c 100644 --- a/src/registry/dead_code.rs +++ b/src/registry/dead_code.rs @@ -1,8 +1,8 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 // Re-export the external crate type for backward compatibility (src/repository/symbol.rs). -pub use devbase_registry_dead_code::DeadFunction; +pub use devbase_registry::dead_code::DeadFunction; /// A potentially dead function from the code symbol index. #[derive(Debug, Clone)] diff --git a/src/registry/entity.rs b/src/registry/entity.rs index 7e1fca0..0667d7e 100644 --- a/src/registry/entity.rs +++ b/src/registry/entity.rs @@ -1,5 +1,3 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 -// RE-EXPORT ONLY — 实现已迁移至 devbase-registry-entity crate. -// 禁止在本文件中添加新代码。 -pub use devbase_registry_entity::*; +pub use devbase_registry::entity::*; diff --git a/src/registry/health.rs b/src/registry/health.rs index b9056ad..62279ed 100644 --- a/src/registry/health.rs +++ b/src/registry/health.rs @@ -1,5 +1,3 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 -// RE-EXPORT ONLY — 实现已迁移至 devbase-registry-health crate. -// 禁止在本文件中添加新代码。 -pub use devbase_registry_health::*; +pub use devbase_registry::health::*; diff --git a/src/registry/metrics.rs b/src/registry/metrics.rs index fd55ddf..a117ebe 100644 --- a/src/registry/metrics.rs +++ b/src/registry/metrics.rs @@ -1,5 +1,3 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 -// RE-EXPORT ONLY — 实现已迁移至 devbase-registry-metrics crate. -// 禁止在本文件中添加新代码。 -pub use devbase_registry_metrics::*; +pub use devbase_registry::metrics::*; diff --git a/src/registry/relation.rs b/src/registry/relation.rs index 10f257d..aaf9e7c 100644 --- a/src/registry/relation.rs +++ b/src/registry/relation.rs @@ -1,5 +1,3 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 -// RE-EXPORT ONLY — 实现已迁移至 devbase-registry-relation crate. -// 禁止在本文件中添加新代码。 -pub use devbase_registry_relation::*; +pub use devbase_registry::relation::*; diff --git a/src/registry/workspace.rs b/src/registry/workspace.rs index e9bd4d2..46ad83c 100644 --- a/src/registry/workspace.rs +++ b/src/registry/workspace.rs @@ -1,5 +1,3 @@ -// SPDX-License-Identifier: MIT +// SPDX-License-Identifier: AGPL-3.0-or-later // Copyright (c) 2026 juice094 -// RE-EXPORT ONLY — 实现已迁移至 devbase-registry-workspace crate. -// 禁止在本文件中添加新代码。 -pub use devbase_registry_workspace::*; +pub use devbase_registry::workspace::*; diff --git a/src/search.rs b/src/search.rs index 0c10485..0f5c5cf 100644 --- a/src/search.rs +++ b/src/search.rs @@ -1,4 +1,3 @@ -#![allow(dead_code)] // SPDX-License-Identifier: MIT // Copyright (c) 2026 juice094 @@ -14,8 +13,6 @@ use tantivy::{ schema::{STORED, STRING, Schema, TEXT, Value}, }; -const INDEX_DIR: &str = "devbase/search_index"; - fn index_path() -> Result { crate::storage::DefaultStorageBackend {} .index_path() @@ -290,11 +287,6 @@ fn search_with_reader( Ok(results) } -fn open_index() -> Result<(Index, Schema), TantivyError> { - let path = index_path()?; - open_index_at(&path) -} - fn open_index_at(path: &std::path::Path) -> Result<(Index, Schema), TantivyError> { let schema = build_schema(); let dir = tantivy::directory::MmapDirectory::open(path)?; diff --git a/src/search/symbol_index.rs b/src/search/symbol_index.rs index b902f5a..1eeb946 100644 --- a/src/search/symbol_index.rs +++ b/src/search/symbol_index.rs @@ -14,8 +14,6 @@ use tantivy::{ schema::{STORED, Schema, TEXT, Value}, }; -const SYMBOL_INDEX_DIR: &str = "symbol_index"; - fn symbol_index_path() -> Result { crate::storage::DefaultStorageBackend {} .symbol_index_path()