init

2026-04-04 10:23:48 +02:00
commit 3314a37245
8 changed files with 4174 additions and 0 deletions
--- a/.envrc
+++ b/.envrc
@@ -0,0 +1,7 @@
+export DIRENV_WARN_TIMEOUT=20s
+
+eval "$(devenv direnvrc)"
+
+# The use_devenv function supports passing flags to the devenv command
+# For example: use devenv --impure --option services.postgres.enable:bool true
+use devenv
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,24 @@
+# To do list
+/TODO.md
+
+# Devenv/Direnv
+.devenv*
+devenv.local.nix
+.direnv
+/.env
+.null-ls_*.nix
+/result
+
+# pre-commit
+.pre-commit-config.yaml
+
+# Temporary files, for example, from tests.
+/tmp/
+
+# LLMs
+**/.claude/settings.local.json
+/.mcp.json
+
+# OS specific
+.DS_Store
+/node_modules/
--- a/devenv.lock
+++ b/devenv.lock
@@ -0,0 +1,119 @@
+{
+  "nodes": {
+    "devenv": {
+      "locked": {
+        "dir": "src/modules",
+        "lastModified": 1768736080,
+        "owner": "cachix",
+        "repo": "devenv",
+        "rev": "efa86311444852d24137d14964b449075522d489",
+        "type": "github"
+      },
+      "original": {
+        "dir": "src/modules",
+        "owner": "cachix",
+        "repo": "devenv",
+        "type": "github"
+      }
+    },
+    "flake-compat": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1767039857,
+        "owner": "NixOS",
+        "repo": "flake-compat",
+        "rev": "5edf11c44bc78a0d334f6334cdaf7d60d732daab",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "git-hooks": {
+      "inputs": {
+        "flake-compat": "flake-compat",
+        "gitignore": "gitignore",
+        "nixpkgs": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1767281941,
+        "owner": "cachix",
+        "repo": "git-hooks.nix",
+        "rev": "f0927703b7b1c8d97511c4116eb9b4ec6645a0fa",
+        "type": "github"
+      },
+      "original": {
+        "owner": "cachix",
+        "repo": "git-hooks.nix",
+        "type": "github"
+      }
+    },
+    "gitignore": {
+      "inputs": {
+        "nixpkgs": [
+          "git-hooks",
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1762808025,
+        "owner": "hercules-ci",
+        "repo": "gitignore.nix",
+        "rev": "cb5e3fdca1de58ccbc3ef53de65bd372b48f567c",
+        "type": "github"
+      },
+      "original": {
+        "owner": "hercules-ci",
+        "repo": "gitignore.nix",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1767052823,
+        "owner": "cachix",
+        "repo": "devenv-nixpkgs",
+        "rev": "538a5124359f0b3d466e1160378c87887e3b51a4",
+        "type": "github"
+      },
+      "original": {
+        "owner": "cachix",
+        "ref": "rolling",
+        "repo": "devenv-nixpkgs",
+        "type": "github"
+      }
+    },
+    "nostr-bench-src": {
+      "flake": false,
+      "locked": {
+        "lastModified": 1774020724,
+        "owner": "serpent213",
+        "repo": "nostr-bench",
+        "rev": "8561b84864ce1269b26304808c64219471999caf",
+        "type": "github"
+      },
+      "original": {
+        "owner": "serpent213",
+        "repo": "nostr-bench",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "devenv": "devenv",
+        "git-hooks": "git-hooks",
+        "nixpkgs": "nixpkgs",
+        "nostr-bench-src": "nostr-bench-src",
+        "pre-commit-hooks": [
+          "git-hooks"
+        ]
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
--- a/devenv.nix
+++ b/devenv.nix
@@ -0,0 +1,35 @@
+{
+  pkgs,
+  lib,
+  inputs,
+  ...
+}: {
+  # https://devenv.sh/packages/
+  packages = with pkgs; [
+    just
+    # Nix code formatter
+    alejandra
+  ];
+
+  # https://devenv.sh/languages/
+  languages = {
+    javascript = {
+      enable = true;
+      npm.enable = true;
+    };
+  };
+
+  dotenv.enable = true;
+  devenv.warnOnNewVersion = false;
+
+  # https://devenv.sh/pre-commit-hooks/
+  git-hooks.hooks = {
+    alejandra.enable = true;
+    check-added-large-files = {
+      enable = true;
+      args = ["--maxkb=16384"];
+    };
+  };
+
+  # See full reference at https://devenv.sh/reference/options/
+}
--- a/devenv.yaml
+++ b/devenv.yaml
@@ -0,0 +1,18 @@
+# yaml-language-server: $schema=https://devenv.sh/devenv.schema.json
+inputs:
+  nixpkgs:
+    url: github:cachix/devenv-nixpkgs/rolling
+  nostr-bench-src:
+    url: github:serpent213/nostr-bench
+    flake: false
+
+# If you're using non-OSS software, you can set allowUnfree to true.
+# allowUnfree: true
+
+# If you're willing to use a package that's vulnerable
+# permittedInsecurePackages:
+#  - "openssl-1.1.1w"
+
+# If you have more than one devenv you can merge them
+#imports:
+# - ./backend
--- a/docs/llm_sexp_tools.md
+++ b/docs/llm_sexp_tools.md
@@ -0,0 +1,183 @@
+# LLM Tool Design for Scheme/Guile — Prior Art & Sharp Tool Set
+
+## Executive Summary
+
+Text-based read/write tools break predictably when LLMs edit Lisp code: paren balancing errors cascade into multi-turn repair loops. The prior art is clear — structural tools that target forms by name and identity, not text position, dramatically improve edit reliability. However, not every operation needs structural treatment; a sharp tool set avoids over-engineering by applying structure only where it genuinely beats plain text.[^1]
+
+***
+
+## Prior Art: What Has Been Tried
+
+### The Failure Mode of Plain Text Editing
+
+The canonical failure is well-documented across tools and user reports. Aider's SEARCH/REPLACE format requires exact whitespace, indentation, and content match — even minor discrepancies cause silent failures. An analysis of a real coding session logged 39 editing failures: 13 from redundant edits where the LLM lost track of applied changes, 8 from context/state mismatches, and 6 from SEARCH blocks that didn't exactly match the file. For Lisp specifically, parenthesis balancing errors compound this: Claude Code issue #1827 reports Claude "spending many tool invocations trying to solve imbalanced parentheses" after a single edit, as each repair attempt may introduce new imbalances.[^2][^3][^1]
+
+Aider's designers distilled the lesson: "HIGH LEVEL — encourage GPT to structure edits as new versions of substantive code blocks (functions, methods), not as a series of surgical/minimal changes to individual lines". Unified diffs, which present whole-function replacements, outperform surgical line edits because LLMs reason better about complete units of code.[^4]
+
+### clojure-mcp — The Closest Analog
+
+The most relevant prior art is `bhauman/clojure-mcp`, an MCP server that connects a Clojure nREPL to LLMs. Its editing pipeline addresses the s-expression problem directly:[^5][^6]
+
+1. **`clojure_edit`** — targets top-level forms by *type and name*, not text position. Operations: `replace`, `insert_before`, `insert_after`. The LLM says "replace the function named `process-data`," not "replace lines 42–67."
+2. **`clojure_edit_replace_sexp`** — targets sub-expressions within a function via structural navigation.
+3. **Pipeline**: incoming code → lint (clj-kondo) → paren repair (parinfer) → structural patch (clj-rewrite) → format (cljfmt). Syntactic validity is a *precondition*, not an afterthought.[^6]
+4. **`clojure_eval`** — evaluates in the live nREPL; REPL feedback is the fast correction loop that makes the whole system work.
+
+The reported outcome: "edit acceptance rates significantly" higher than text-based tools; "buttery smooth" editing. The key lesson from clojure-mcp's documentation is that **the REPL is the killer feature** — LLMs are excellent at iterating in a REPL where each expression is independently evaluated and any error is immediately visible. The limitation is that clojure-mcp requires high-capability models (Claude 3.7/4.1, Gemini 2.5, o3/o4-mini) — structural editing tools expose poor reasoning in weaker models.[^7][^5]
+
+A separate open issue in the Claude Code repo confirms the gap: there is no production-grade structural editing tool for Lisp dialects other than Clojure, and users resort to workarounds.[^1]
+
+### Tree-sitter-Based Indexing Tools
+
+Several tools use tree-sitter for structural *analysis* rather than editing:
+
+- **CodeRLM** indexes a codebase with tree-sitter and exposes tools: `init`, `structure`, `search`, `impl`, `callers`, `grep`. It replaces the glob/grep/read exploration cycle with index-backed lookups. In tests, it found semantic bugs (duplicated code with identical names, orphaned code) that text-search missed, and completed codebase analysis in 3 minutes vs 8 minutes for native tools.[^8]
+- **Codebase-Memory** builds a persistent tree-sitter knowledge graph (66 languages, including Scheme) via MCP with 14 typed structural query tools. It achieves a **10× token reduction** and **2.1× fewer tool calls** vs iterative file-reading for cross-file structural queries.[^9]
+- **mcp-server-tree-sitter** bridges tree-sitter parsing to MCP, enabling agents to rename functions and trace call hierarchies across files.[^10]
+- **VT Code** (Rust, terminal) combines tree-sitter and ast-grep for structural edits, previewing changes before application.[^11]
+
+For Scheme specifically, a `tree-sitter-scheme` grammar exists with explicit Guile support. It parses code as lists by default — appropriate for s-expression-level operations — with custom queries available for construct-level analysis (defines, lambdas, etc.).[^12]
+
+### Research: AST Guidance for LLMs
+
+Academic work confirms the structural advantage. AST-guided fine-tuning of LLMs reduces the training–testing accuracy gap from 29.5% to 23.1% by embedding structural knowledge that generalizes better. The AST-T5 pretraining approach outperforms text-only models on code repair and transpilation. A 2026 paper on LLM code summarization found that serialized ASTs reduce average input length by 28.6% and training time by 11.3% while achieving comparable summary quality — an efficiency argument for giving LLMs structure rather than raw text.[^13][^14][^15]
+
+The Codebase-Memory authors articulate the key distinction: "The MCP Agent excels at cross-file structural queries, hub detection, caller ranking, and dependency chain traversal, where pre-materialized graph edges avoid the linear token cost of iterative file exploration". For within-file operations on Scheme, the equivalent is form-level targeting.[^9]
+
+### LLMs in a Persistent Lisp REPL (Research Architecture)
+
+A 2025 paper proposes embedding LLMs within a persistent Lisp REPL, where the model generates `sp>...</lisp>` tagged expressions that middleware intercepts and evaluates. The REPL maintains state across turns, supports introspection, macro expansion, and dynamic redefinition. This architecture maps directly onto Guile: Guile's REPL is first-class, with `(system repl)` accessible programmatically and support for `define-syntax`, `eval`, and runtime introspection. The key insight is that Scheme/Lisp REPLs are a *natural* interface for agentic loops — expressions are the unit of evaluation, and the REPL gives immediate correctness feedback.[^16]
+
+***
+
+## When Structural Tools Beat Plain Text — And When They Don't
+
+| Operation | Plain Text | Structural | Verdict |
+|---|---|---|---|
+| Read small file (<100 LOC) | Full content in context | Overkill | **Plain text wins** |
+| Read large file (>300 LOC) | Wastes tokens on irrelevant forms | Collapsed signature view | **Structural wins** |
+| Write new file | `file_write` is sufficient | N/A | **Plain text wins** |
+| Replace a top-level `define` | SEARCH/REPLACE, fragile on whitespace | Form-by-name replace, guaranteed valid | **Structural wins** |
+| Edit a comment or string | Structural offers no help | Text is fine | **Plain text wins** |
+| Insert a new `define` after another | `file_edit` with text anchor, fragile | `insert_after` by form name, robust | **Structural wins** |
+| Sub-expression surgery (e.g., change 3rd arg of nested call) | Fragile | Fragile — LLMs struggle to specify exact paths | **Both lose; use REPL instead** |
+| Find all callers of a function | `grep` (misses aliases, shadowing) | Symbol-aware lookup | **Structural wins** |
+| Check paren balance | Manual count, error-prone | Parser guarantee | **Structural wins** |
+| Evaluate and test a form | Not applicable | REPL eval | **REPL always wins here** |
+
+***
+
+## The Sharp Tool Set for Guile/Scheme
+
+The following is a minimal, high-ROI tool set designed from an LLM perspective. Every tool is evaluated against whether it is a genuine improvement over plain file read/write.
+
+### Tier 1: Always Needed
+
+**`read_module(path)`**
+Returns a *collapsed* view of a Scheme source file: one line per top-level form showing only the head — `(define (foo x y) ...)`, `(define-record-type <point> ...)`, `(define-syntax when ...)`. Full content for forms under a configurable line threshold (e.g., ≤5 lines). This directly mirrors the `read_file` collapsed view in clojure-mcp and saves tokens in proportion to file size. **Beats plain read: yes, for files above ~100 LOC.**[^5]
+
+**`read_form(path, name)`**
+Returns the full source text of a single top-level form identified by its defined name. The LLM calls this after `read_module` identifies which form to read in detail. This is the "drill down" step. **Beats plain read: yes — isolates exactly what the LLM needs without surrounding context noise.**
+
+**`replace_form(path, name, new_source)`**
+Replaces the entire top-level form named `name` with `new_source`. The tool:
+1. Parses `new_source` to verify it is syntactically valid (balanced parens/brackets/quotes); applies parinfer-style repair if close.
+2. Locates the existing form by symbol, not line number.
+3. Replaces in-place, preserving surrounding whitespace.
+4. Returns the repaired+formatted source if any correction was made.
+
+This is the core of clojure-mcp's `clojure_edit`, adapted for Scheme. LLM generates the full new form (guided by `read_form` output), not a diff. **Beats plain write: yes — structural location, guaranteed valid output, no paren disasters.**[^6]
+
+**`insert_form(path, anchor_name, position, new_source)`**
+Inserts a new top-level form `before` or `after` the form named `anchor_name`. Includes the same validation pipeline as `replace_form`. **Beats plain file_edit: yes — text-based insertion anchored to a line number breaks if any prior edit shifts lines.**
+
+**`delete_form(path, name)`**
+Removes a top-level form by name. Cleaner than text-based deletion which can accidentally remove surrounding blank lines or leave orphaned comments. **Beats plain edit: yes.**
+
+**`eval_expr(expr, namespace?)`**
+Evaluates a Scheme expression in a running Guile REPL (e.g., via `guild` socket or embedded Guile). Returns stdout, return value, and any error with stack trace. Optionally scoped to a loaded module. This is the feedback loop that makes all editing tools safe — after any `replace_form`, the LLM calls `eval_expr` to verify. The REPL is the most important single tool in the set. **Not a comparison to plain text — it's irreplaceable.**[^16][^5]
+
+### Tier 2: High Value for Non-Trivial Codebases
+
+**`check_syntax(source_or_path)`**
+Parses a string or file and returns: `{valid: bool, errors: [...], repaired_source: string}`. Uses tree-sitter-scheme or Guile's own `(read)` in a sandboxed context. The LLM can call this before submitting edits or when unsure about balance. **Beats nothing (additive) — but extremely useful as a pre-flight check.**[^12]
+
+**`find_references(path_or_module, symbol)`**
+Returns all top-level forms (and optionally sub-expressions) that reference a given symbol. Uses tree-sitter structural queries rather than grep, so it handles `let`-bound shadowing and macro-introduced bindings differently from symbol references. Returns `{form_name, path, line, context_snippet}` per hit. **Beats grep: yes, for refactoring and impact analysis.**[^10]
+
+**`list_module_exports(path)`**
+For files with `(define-module ...)` and explicit `#:export`, returns the exported API surface. Useful for the LLM to understand what is safe to rename vs what is a public API. **Beats manual reading: marginal — worth having if Guile modules are in scope.**
+
+### Tier 3: Situational
+
+**`macro_expand(expr, namespace)`**
+Calls `(macroexpand expr)` in the REPL, returns expanded form. Useful when the LLM needs to reason about macro-generated code. **Unique capability — no text equivalent.**
+
+**`load_module(path)`**
+Re-loads a file into the running Guile REPL session (i.e., `(load path)` or `(use-modules ...)`). Used after a sequence of edits to verify the whole module compiles and no unbound-variable errors occur. **Beats eval_expr for whole-module validation.**
+
+### What to Explicitly Omit
+
+**Sub-expression path addressing** (e.g., "replace the 3rd element of the 2nd `let` binding"): LLMs consistently fail to specify correct structural coordinates for deeply nested forms. clojure-mcp experimented with this; the guidance now is to use the REPL instead — evaluate, observe, generate a new complete form, replace it. The REPL feedback loop is more robust than surgical sub-expression addressing.[^5]
+
+**Raw AST JSON dump**: Serialized AST trees are verbose and waste context. LLMs do not need to see the full AST — they need to see the *source text* of the relevant form. Academic work confirms that serialized ASTs don't outperform plain source for LLM comprehension tasks.[^15]
+
+**Paredit-style operations** (slurp, barf, transpose-sexp): These are human-interactive operations. LLMs do not navigate code incrementally the way a human using paredit does — they reason about complete transformations. Exposing slurp/barf as tools adds complexity without benefit.[^17]
+
+***
+
+## Implementation Notes for Guile
+
+Guile provides first-class programmatic REPL access and a native s-expression reader, which means the "validation pipeline" that clojure-mcp builds from external tools (`clj-kondo`, `parinfer`, `clj-rewrite`) can be implemented using Guile itself:
+
+- **Parse/validate**: `(with-exception-handler ... (lambda () (read (open-input-string src))) ...)` — Guile's native `read` is the ground truth for s-expression validity.
+- **Format**: `(pretty-print form)` from `(use-modules (ice-9 pretty-print))`.
+- **REPL socket**: Guile supports spawning a REPL on a Unix domain socket via `(run-server (make-tcp-server-socket #:port 7000))` from `(use-modules (system repl server))`, making eval_expr trivially implementable.
+- **tree-sitter-scheme** (Guile grammar #7) provides structural querying for `find_references` without running Guile itself — useful for static analysis of files that may not be loadable (e.g., incomplete work-in-progress).[^12]
+
+The pi-coding-agent extension system (TypeScript-based) can host these tools as tool definitions that call out to a sidecar Guile process or tree-sitter library, keeping the structural intelligence in the language that understands Scheme natively.
+
+***
+
+## Conclusion
+
+The evidence from clojure-mcp, CodeRLM, Codebase-Memory, and Aider's failure analysis converges on three principles: (1) target forms by name/identity, never by line number or exact text match; (2) validate and auto-repair on input, not after the fact; (3) use a live REPL as the primary correctness signal. For Guile/Scheme, a seven-tool set — `read_module`, `read_form`, `replace_form`, `insert_form`, `delete_form`, `eval_expr`, `check_syntax` — covers the 95% case. Sub-expression addressing and raw AST exposure should be omitted; they add LLM-facing complexity that empirically leads to more errors, not fewer. The REPL is the structural editor.[^1][^5]
+
+---
+
+## References
+
+1. [Structural editing tools for s-expr languages · Issue #1827 · anthropics/claude-code](https://github.com/anthropics/claude-code/issues/1827) - Claude has excess trouble with editing LISPs' code, since they require keeping parentheses balanced....
+
+2. [Code Surgery: How AI Assistants Make Precise Edits to Your Files](https://fabianhertwig.com/blog/coding-assistants-file-edits/) - Detailed Error ReportingPermalink. Aider excels at providing highly informative feedback when edits ...
+
+3. [Aider analysis of editing failures from · Issue #3895 - GitHub](https://github.com/Aider-AI/aider/issues/3895) - Unexplained Failures: A significant number of failures occurred where Aider's feedback indicated the...
+
+4. [Unified diffs make GPT-4 Turbo 3X less lazy - Aider](https://aider.chat/docs/unified-diffs.html) - Aider now asks GPT-4 Turbo to use unified diffs to edit your code. This dramatically improves GPT-4 ...
+
+5. [bhauman/clojure-mcp](https://github.com/bhauman/clojure-mcp) - Clojure MCP. Contribute to bhauman/clojure-mcp development by creating an account on GitHub.
+
+6. [ClojureMCP (Clojure MCP Server) by bhauman | AI Coding Workflows](https://www.augmentcode.com/mcp/clojure-mcp-server) - ClojureMCP is an MCP (Model Context Protocol) server for Clojure that connects LLM clients (Claude C...
+
+7. [clojure-mcp AI Agents Free Tier and OneKey Router Discounted ...](https://www.deepnlp.org/store/ai-agent/autonomous-agent/pub-bhauman/clojure-mcp) - clojure-mcp from AI Hub Admin, Insights of Top Ranking AI & Robotics Applications.
+
+8. [CodeRLM – Tree-sitter-backed code indexing for LLM agents](https://news.ycombinator.com/item?id=46974515) - I've been building a tool that changes how LLM coding agents explore codebases, and I wanted to shar...
+
+9. [Codebase-Memory: Tree-Sitter-Based Knowledge Graphs for LLM ...](https://arxiv.org/html/2603.27277v1) - An MCP-based tool interface exposing 14 typed structural queries (call-path tracing, impact analysis...
+
+10. [mcp-server-tree-sitter: The Ultimate Guide for AI Engineers](https://skywork.ai/skypage/en/mcp-server-tree-sitter-The-Ultimate-Guide-for-AI-Engineers/1972133047164960768) - Tree-sitter: A powerful and efficient parser generator that builds a concrete, incremental Abstract ...
+
+11. [Rust terminal coding agent for structural edits (Tree-sitter/ast-grep)](https://www.reddit.com/r/rust/comments/1o9ak42/vt_code_rust_terminal_coding_agent_for_structural/) - It combines Tree-sitter parsing with ast-grep patterns for safe refactors, plus tool calls (read/sea...
+
+12. [6cdh/tree-sitter-scheme - GitHub](https://github.com/6cdh/tree-sitter-scheme) - This parser doesn't parse language constructs. Instead, it parses code as lists. If you want languag...
+
+13. [An AST-guided LLM Approach for SVRF Code Synthesis - arXiv](https://arxiv.org/html/2507.00352v1) - Through data augmentation techniques using our internal LLM tools, we expanded this to 741 diverse e...
+
+14. [[PDF] Advancing Large Language Models for Code Using Code - EECS](https://www2.eecs.berkeley.edu/Pubs/TechRpts/2025/EECS-2025-50.pdf)
+
+15. [[PDF] Code vs Serialized AST Inputs for LLM-Based Code Summarization](https://paul-harvey.org/publication/2026-llm-ast-code-summary/2026-llm-ast-code-summary.pdf) - Experimental results show that, for method-level code summarization, serialized ASTs can achieve sum...
+
+16. [From Tool Calling to Symbolic Thinking: LLMs in a Persistent Lisp ...](https://arxiv.org/html/2506.10021v1) - This work proposes an alternative path: empowering language models to use a Lisp REPL as a persisten...
+
+17. [Paredit, a Visual Guide - Calva User Guide](https://calva.io/paredit/) - Calva Paredit helps you navigate, select and edit Clojure code in a structural way. LISP isn't line ...
+
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -0,0 +1,5 @@
+{
+  "devDependencies": {
+    "@mariozechner/pi-coding-agent": "^0.65.0"
+  }
+}