From 61574855bff244d81b3067a39e46c37292210818 Mon Sep 17 00:00:00 2001 From: Raymond Scott Pert Date: Wed, 8 Apr 2026 01:27:42 +0000 Subject: [PATCH] Initial commit: Gitea code search with MeiliSearch + MCP Go indexer (full re-index + webhook), MeiliSearch integration, MCP server exposing gitea_search tool for LLM agents. K8s manifests for MeiliSearch + indexer CronJob. Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 22 +++ Dockerfile | 25 +++ README.md | 251 ++++++++++++++++++++++++++++ go.mod | 12 ++ go.sum | 30 ++++ internal/gitea/client.go | 139 ++++++++++++++++ internal/mcp/server.go | 348 +++++++++++++++++++++++++++++++++++++++ internal/meili/client.go | 267 ++++++++++++++++++++++++++++++ k8s/indexer-cronjob.yaml | 126 ++++++++++++++ k8s/meilisearch.yaml | 94 +++++++++++ k8s/namespace.yaml | 4 + 11 files changed, 1318 insertions(+) create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/gitea/client.go create mode 100644 internal/mcp/server.go create mode 100644 internal/meili/client.go create mode 100644 k8s/indexer-cronjob.yaml create mode 100644 k8s/meilisearch.yaml create mode 100644 k8s/namespace.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fafe1d4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,22 @@ +# Binaries +indexer +mcp-server +*.exe + +# Go +/vendor/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Env +.env +.env.* diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..e1f4443 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,25 @@ +FROM golang:1.22-alpine AS builder + +RUN apk add --no-cache git + +WORKDIR /build +COPY go.mod go.sum ./ +RUN go mod download + +COPY . . +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /indexer ./cmd/indexer +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /mcp-server ./cmd/mcp-server + +# --- Indexer image (includes git for cloning) --- +FROM alpine:3.20 AS indexer + +RUN apk add --no-cache git ca-certificates +COPY --from=builder /indexer /usr/local/bin/indexer +ENTRYPOINT ["indexer"] + +# --- MCP server image (minimal) --- +FROM alpine:3.20 AS mcp-server + +RUN apk add --no-cache ca-certificates +COPY --from=builder /mcp-server /usr/local/bin/mcp-server +ENTRYPOINT ["mcp-server"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..183ff31 --- /dev/null +++ b/README.md @@ -0,0 +1,251 @@ +# gitea-search + +Full-text code search across all Gitea repositories, exposed as an MCP tool for Claude Code. + +Indexes file content from a Gitea instance into MeiliSearch. Provides two interfaces: a CLI for indexing/searching and an MCP server (stdio JSON-RPC) that Claude Code can call as a tool. + +## Architecture + +``` + +-----------------+ + | Gitea Instance | + | (33 repos) | + +--------+--------+ + | + +--------------+--------------+ + | | + git clone --depth 1 push webhook + | | + v v + +-------------------+ +-------------------+ + | indexer full | | indexer webhook | + | (CronJob, 4h) | | (Deployment, :8080)| + +--------+----------+ +--------+----------+ + | | + +----------+---------------+ + | + v + +-------------------+ + | MeiliSearch | + | (PVC-backed) | + +--------+----------+ + | + v + +-------------------+ + | mcp-server | + | (stdio JSON-RPC) | + +-------------------+ + ^ + | + +-------------------+ + | Claude Code | + | (MCP client) | + +-------------------+ +``` + +## Components + +| Binary | Purpose | +|--------|---------| +| `indexer full` | Clone all repos, extract files, push to MeiliSearch | +| `indexer repo ` | Re-index a single repo | +| `indexer webhook` | HTTP server (:8080) for Gitea push webhooks | +| `indexer search ` | CLI search for testing | +| `mcp-server` | MCP stdio server exposing `gitea_search` tool | + +## Quick Start + +### Prerequisites + +- Go 1.22+ +- MeiliSearch instance (v1.6+) +- Gitea instance with API token +- git (for cloning repos) + +### Build + +```sh +go build -o indexer ./cmd/indexer +go build -o mcp-server ./cmd/mcp-server +``` + +### Run a full index + +```sh +export GITEA_TOKEN=your-token-here +export MEILI_URL=http://localhost:7700 +./indexer full +``` + +### Test search + +```sh +./indexer search "wireguard config" --type=conf --limit=5 +``` + +### Run MCP server + +```sh +export MEILI_URL=http://localhost:7700 +./mcp-server +``` + +## Configuration + +All configuration via environment variables: + +| Variable | Default | Description | +|----------|---------|-------------| +| `GITEA_URL` | `https://gitea.rspworks.tech` | Gitea instance URL | +| `GITEA_TOKEN` | *(required)* | Gitea API token | +| `MEILI_URL` | `http://localhost:7700` | MeiliSearch URL | +| `MEILI_KEY` | *(empty)* | MeiliSearch master key | +| `INDEX_NAME` | `gitea-code` | MeiliSearch index name | +| `WEBHOOK_SECRET` | *(empty)* | HMAC secret for Gitea webhook validation | + +## MCP Integration with Claude Code + +### Option 1: Local binary + +Add to `~/.claude/claude_code_config.json`: + +```json +{ + "mcpServers": { + "gitea-search": { + "command": "/path/to/mcp-server", + "env": { + "MEILI_URL": "http://meilisearch.gitea-search.svc.cluster.local:7700", + "MEILI_KEY": "your-master-key" + } + } + } +} +``` + +### Option 2: Via Docker + +```json +{ + "mcpServers": { + "gitea-search": { + "command": "docker", + "args": [ + "run", "--rm", "-i", + "-e", "MEILI_URL=http://host.docker.internal:7700", + "gitea.rspworks.tech/rpert/gitea-search:mcp-server" + ] + } + } +} +``` + +### Tool usage + +Once configured, Claude Code can call the `gitea_search` tool: + +``` +Search for "wireguard" across all repos +Search for "backup" in repo "rpert/infra-ssh" with filetype "sh" +``` + +Tool parameters: + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `query` | string | yes | Search terms | +| `repo` | string | no | Filter by repo full name (e.g., `rpert/infra-ssh`) | +| `filetype` | string | no | Filter by extension (e.g., `go`, `md`, `yaml`) | +| `limit` | integer | no | Max results (default: 10) | + +## MeiliSearch Document Schema + +```json +{ + "id": "sha256(repo+branch+path)", + "repo": "rpert/infra-ssh", + "branch": "main", + "path": "docs/mail-setup.md", + "filename": "mail-setup.md", + "extension": "md", + "content": "file content (up to 50KB)", + "language": "markdown", + "updated_at": 1712534400 +} +``` + +Searchable: `content`, `path`, `filename`, `repo` +Filterable: `repo`, `extension`, `branch` +Displayed: all fields except `content` (snippets returned via highlighting) + +## K8s Deployment + +### 1. Create namespace and secrets + +```sh +kubectl apply -f k8s/namespace.yaml + +# Generate a real master key +MEILI_KEY=$(openssl rand -base64 32) + +kubectl -n gitea-search create secret generic meilisearch-secret \ + --from-literal=master-key="$MEILI_KEY" \ + --dry-run=client -o yaml | kubectl apply -f - + +kubectl -n gitea-search create secret generic indexer-secret \ + --from-literal=gitea-token="your-gitea-token" \ + --from-literal=webhook-secret="your-webhook-secret" \ + --dry-run=client -o yaml | kubectl apply -f - +``` + +### 2. Deploy MeiliSearch + +```sh +kubectl apply -f k8s/meilisearch.yaml +``` + +### 3. Build and push container image + +```sh +# Build indexer image +docker build --target indexer -t gitea.rspworks.tech/rpert/gitea-search:latest . +docker push gitea.rspworks.tech/rpert/gitea-search:latest + +# Build MCP server image +docker build --target mcp-server -t gitea.rspworks.tech/rpert/gitea-search:mcp-server . +docker push gitea.rspworks.tech/rpert/gitea-search:mcp-server +``` + +### 4. Deploy indexer CronJob and webhook server + +```sh +kubectl apply -f k8s/indexer-cronjob.yaml +``` + +### 5. Trigger initial index + +```sh +kubectl -n gitea-search create job --from=cronjob/gitea-indexer gitea-indexer-initial +kubectl -n gitea-search logs -f job/gitea-indexer-initial +``` + +### 6. Configure Gitea webhook (optional) + +In Gitea, go to Site Administration > Webhooks > Add Webhook: +- URL: `http://indexer-webhook.gitea-search.svc.cluster.local:8080/webhook` +- Content Type: `application/json` +- Secret: same as `WEBHOOK_SECRET` +- Events: Push only + +## Indexing Details + +- Clones each repo with `git clone --depth 1` (shallow, fast) +- Walks all files, skipping: `.git/`, `node_modules/`, `vendor/`, `__pycache__/`, binary files, lock files, images, archives +- Files >50KB are skipped +- Binary detection: checks first 512 bytes for null bytes +- Full reindex clears the index first, then re-populates +- Webhook reindex deletes only the affected repo's documents, then re-indexes that repo + +## License + +MIT diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..e74afdc --- /dev/null +++ b/go.mod @@ -0,0 +1,12 @@ +module gitea.rspworks.tech/rpert/gitea-search + +go 1.22.0 + +require github.com/meilisearch/meilisearch-go v0.29.0 + +require ( + github.com/andybalholm/brotli v1.1.1 // indirect + github.com/golang-jwt/jwt/v4 v4.5.1 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..6ea02d0 --- /dev/null +++ b/go.sum @@ -0,0 +1,30 @@ +github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang-jwt/jwt/v4 v4.5.0/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= +github.com/golang-jwt/jwt/v4 v4.5.1 h1:JdqV9zKUdtaa9gdPlywC3aeoEsR681PlKC+4F5gQgeo= +github.com/golang-jwt/jwt/v4 v4.5.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/meilisearch/meilisearch-go v0.29.0 h1:HZ9NEKN59USINQ/DXJge/aaXq8IrsKbXGTdAoBaaDz4= +github.com/meilisearch/meilisearch-go v0.29.0/go.mod h1:2cRCAn4ddySUsFfNDLVPod/plRibQsJkXF/4gLhxbOk= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/gitea/client.go b/internal/gitea/client.go new file mode 100644 index 0000000..7de8edb --- /dev/null +++ b/internal/gitea/client.go @@ -0,0 +1,139 @@ +package gitea + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "time" +) + +// Repo represents a Gitea repository. +type Repo struct { + ID int64 `json:"id"` + FullName string `json:"full_name"` + CloneURL string `json:"clone_url"` + DefaultBranch string `json:"default_branch"` + Empty bool `json:"empty"` + Archived bool `json:"archived"` + UpdatedAt string `json:"updated_at"` +} + +// Client is a Gitea API client. +type Client struct { + baseURL string + token string + httpClient *http.Client +} + +// NewClient creates a new Gitea API client. +func NewClient(baseURL, token string) *Client { + return &Client{ + baseURL: baseURL, + token: token, + httpClient: &http.Client{ + Timeout: 30 * time.Second, + }, + } +} + +// ListAllRepos returns all repositories accessible to the authenticated user. +// It paginates through all results automatically. +func (c *Client) ListAllRepos() ([]Repo, error) { + var allRepos []Repo + page := 1 + limit := 50 + + for { + url := fmt.Sprintf("%s/api/v1/repos/search?page=%d&limit=%d&token=%s", + c.baseURL, page, limit, c.token) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("creating request: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("fetching repos page %d: %w", page, err) + } + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + return nil, fmt.Errorf("gitea API returned %d: %s", resp.StatusCode, string(body)) + } + + var result struct { + Data []Repo `json:"data"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + resp.Body.Close() + return nil, fmt.Errorf("decoding response: %w", err) + } + resp.Body.Close() + + if len(result.Data) == 0 { + break + } + + allRepos = append(allRepos, result.Data...) + + if len(result.Data) < limit { + break + } + page++ + } + + // Filter out empty repos + filtered := make([]Repo, 0, len(allRepos)) + for _, r := range allRepos { + if !r.Empty { + filtered = append(filtered, r) + } + } + + return filtered, nil +} + +// GetRepo returns a single repository by owner/name. +func (c *Client) GetRepo(fullName string) (*Repo, error) { + url := fmt.Sprintf("%s/api/v1/repos/%s?token=%s", c.baseURL, fullName, c.token) + + req, err := http.NewRequest("GET", url, nil) + if err != nil { + return nil, fmt.Errorf("creating request: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("fetching repo %s: %w", fullName, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + return nil, fmt.Errorf("gitea API returned %d: %s", resp.StatusCode, string(body)) + } + + var repo Repo + if err := json.NewDecoder(resp.Body).Decode(&repo); err != nil { + return nil, fmt.Errorf("decoding response: %w", err) + } + + return &repo, nil +} + +// AuthenticatedCloneURL returns the clone URL with the token embedded for private repos. +func (c *Client) AuthenticatedCloneURL(repo Repo) string { + // Insert token into https URL: https://token@host/path.git + if len(c.baseURL) > 8 { + return fmt.Sprintf("%s://%s@%s", + c.baseURL[:5], // "https" + c.token, + repo.CloneURL[8:]) // strip "https://" + } + return repo.CloneURL +} diff --git a/internal/mcp/server.go b/internal/mcp/server.go new file mode 100644 index 0000000..a859f5a --- /dev/null +++ b/internal/mcp/server.go @@ -0,0 +1,348 @@ +package mcp + +import ( + "bufio" + "encoding/json" + "fmt" + "io" + "log" + "os" + "strings" + + "gitea.rspworks.tech/rpert/gitea-search/internal/meili" +) + +// JSON-RPC message types +type jsonRPCRequest struct { + JSONRPC string `json:"jsonrpc"` + ID json.RawMessage `json:"id,omitempty"` + Method string `json:"method"` + Params json.RawMessage `json:"params,omitempty"` +} + +type jsonRPCResponse struct { + JSONRPC string `json:"jsonrpc"` + ID json.RawMessage `json:"id,omitempty"` + Result interface{} `json:"result,omitempty"` + Error *jsonRPCError `json:"error,omitempty"` +} + +type jsonRPCError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +// MCP protocol types +type serverInfo struct { + Name string `json:"name"` + Version string `json:"version"` +} + +type serverCapabilities struct { + Tools *toolsCapability `json:"tools,omitempty"` +} + +type toolsCapability struct { + ListChanged bool `json:"listChanged,omitempty"` +} + +type initializeResult struct { + ProtocolVersion string `json:"protocolVersion"` + ServerInfo serverInfo `json:"serverInfo"` + Capabilities serverCapabilities `json:"capabilities"` +} + +type toolDefinition struct { + Name string `json:"name"` + Description string `json:"description"` + InputSchema inputSchema `json:"inputSchema"` +} + +type inputSchema struct { + Type string `json:"type"` + Properties map[string]property `json:"properties"` + Required []string `json:"required,omitempty"` +} + +type property struct { + Type string `json:"type"` + Description string `json:"description"` + Default any `json:"default,omitempty"` +} + +type toolsListResult struct { + Tools []toolDefinition `json:"tools"` +} + +type toolCallParams struct { + Name string `json:"name"` + Arguments json.RawMessage `json:"arguments"` +} + +type searchArgs struct { + Query string `json:"query"` + Repo string `json:"repo"` + Filetype string `json:"filetype"` + Limit int64 `json:"limit"` +} + +type contentItem struct { + Type string `json:"type"` + Text string `json:"text"` +} + +type toolCallResult struct { + Content []contentItem `json:"content"` + IsError bool `json:"isError,omitempty"` +} + +// Server is the MCP server that handles stdio JSON-RPC. +type Server struct { + meiliClient *meili.Client + version string +} + +// NewServer creates a new MCP server. +func NewServer(meiliClient *meili.Client, version string) *Server { + return &Server{ + meiliClient: meiliClient, + version: version, + } +} + +// Run starts the MCP server, reading from stdin and writing to stdout. +func (s *Server) Run() error { + // Log to stderr so it doesn't interfere with JSON-RPC on stdout + log.SetOutput(os.Stderr) + log.SetPrefix("[mcp-server] ") + + reader := bufio.NewReader(os.Stdin) + writer := os.Stdout + + log.Println("MCP server started, waiting for requests on stdin") + + for { + line, err := reader.ReadBytes('\n') + if err != nil { + if err == io.EOF { + log.Println("stdin closed, shutting down") + return nil + } + return fmt.Errorf("reading stdin: %w", err) + } + + line = []byte(strings.TrimSpace(string(line))) + if len(line) == 0 { + continue + } + + var req jsonRPCRequest + if err := json.Unmarshal(line, &req); err != nil { + log.Printf("invalid JSON-RPC request: %s", string(line)) + continue + } + + resp := s.handleRequest(req) + if resp == nil { + // Notification, no response needed + continue + } + + respBytes, err := json.Marshal(resp) + if err != nil { + log.Printf("error marshaling response: %v", err) + continue + } + + respBytes = append(respBytes, '\n') + if _, err := writer.Write(respBytes); err != nil { + return fmt.Errorf("writing response: %w", err) + } + } +} + +func (s *Server) handleRequest(req jsonRPCRequest) *jsonRPCResponse { + switch req.Method { + case "initialize": + return s.handleInitialize(req) + case "notifications/initialized": + log.Println("Client initialized") + return nil // notification, no response + case "tools/list": + return s.handleToolsList(req) + case "tools/call": + return s.handleToolsCall(req) + case "ping": + return &jsonRPCResponse{ + JSONRPC: "2.0", + ID: req.ID, + Result: map[string]interface{}{}, + } + default: + log.Printf("Unknown method: %s", req.Method) + return &jsonRPCResponse{ + JSONRPC: "2.0", + ID: req.ID, + Error: &jsonRPCError{ + Code: -32601, + Message: fmt.Sprintf("method not found: %s", req.Method), + }, + } + } +} + +func (s *Server) handleInitialize(req jsonRPCRequest) *jsonRPCResponse { + log.Println("Handling initialize") + return &jsonRPCResponse{ + JSONRPC: "2.0", + ID: req.ID, + Result: initializeResult{ + ProtocolVersion: "2024-11-05", + ServerInfo: serverInfo{ + Name: "gitea-search", + Version: s.version, + }, + Capabilities: serverCapabilities{ + Tools: &toolsCapability{}, + }, + }, + } +} + +func (s *Server) handleToolsList(req jsonRPCRequest) *jsonRPCResponse { + log.Println("Handling tools/list") + return &jsonRPCResponse{ + JSONRPC: "2.0", + ID: req.ID, + Result: toolsListResult{ + Tools: []toolDefinition{ + { + Name: "gitea_search", + Description: "Search across all Gitea repositories. Returns matching files with code snippets. Use this to find code, configuration, documentation, or any file content across the codebase.", + InputSchema: inputSchema{ + Type: "object", + Properties: map[string]property{ + "query": { + Type: "string", + Description: "Search terms. Supports natural language queries and exact phrases.", + }, + "repo": { + Type: "string", + Description: "Filter to a specific repo by full name (e.g., 'rpert/infra-ssh'). Omit to search all repos.", + }, + "filetype": { + Type: "string", + Description: "Filter by file extension without dot (e.g., 'go', 'md', 'yaml', 'py').", + }, + "limit": { + Type: "integer", + Description: "Maximum number of results to return.", + Default: 10, + }, + }, + Required: []string{"query"}, + }, + }, + }, + }, + } +} + +func (s *Server) handleToolsCall(req jsonRPCRequest) *jsonRPCResponse { + var params toolCallParams + if err := json.Unmarshal(req.Params, ¶ms); err != nil { + return &jsonRPCResponse{ + JSONRPC: "2.0", + ID: req.ID, + Error: &jsonRPCError{ + Code: -32602, + Message: fmt.Sprintf("invalid params: %v", err), + }, + } + } + + if params.Name != "gitea_search" { + return &jsonRPCResponse{ + JSONRPC: "2.0", + ID: req.ID, + Error: &jsonRPCError{ + Code: -32602, + Message: fmt.Sprintf("unknown tool: %s", params.Name), + }, + } + } + + var args searchArgs + if err := json.Unmarshal(params.Arguments, &args); err != nil { + return &jsonRPCResponse{ + JSONRPC: "2.0", + ID: req.ID, + Result: toolCallResult{ + Content: []contentItem{{Type: "text", Text: fmt.Sprintf("Error parsing arguments: %v", err)}}, + IsError: true, + }, + } + } + + if args.Query == "" { + return &jsonRPCResponse{ + JSONRPC: "2.0", + ID: req.ID, + Result: toolCallResult{ + Content: []contentItem{{Type: "text", Text: "Error: query parameter is required"}}, + IsError: true, + }, + } + } + + if args.Limit <= 0 { + args.Limit = 10 + } + + log.Printf("Searching: query=%q repo=%q filetype=%q limit=%d", args.Query, args.Repo, args.Filetype, args.Limit) + + results, err := s.meiliClient.Search(args.Query, args.Repo, args.Filetype, args.Limit) + if err != nil { + return &jsonRPCResponse{ + JSONRPC: "2.0", + ID: req.ID, + Result: toolCallResult{ + Content: []contentItem{{Type: "text", Text: fmt.Sprintf("Search error: %v", err)}}, + IsError: true, + }, + } + } + + text := formatResults(args.Query, results) + + return &jsonRPCResponse{ + JSONRPC: "2.0", + ID: req.ID, + Result: toolCallResult{ + Content: []contentItem{{Type: "text", Text: text}}, + }, + } +} + +func formatResults(query string, results []meili.SearchResult) string { + if len(results) == 0 { + return fmt.Sprintf("No results found for %q.", query) + } + + var sb strings.Builder + sb.WriteString(fmt.Sprintf("Found %d results for %q:\n\n", len(results), query)) + + for i, r := range results { + sb.WriteString(fmt.Sprintf("### %d. %s — `%s`\n", i+1, r.Repo, r.Path)) + sb.WriteString(fmt.Sprintf("- Branch: `%s`\n", r.Branch)) + if r.Extension != "" { + sb.WriteString(fmt.Sprintf("- Type: `%s`\n", r.Extension)) + } + if r.Snippet != "" { + sb.WriteString(fmt.Sprintf("- Snippet: ...%s...\n", r.Snippet)) + } + sb.WriteString("\n") + } + + return sb.String() +} diff --git a/internal/meili/client.go b/internal/meili/client.go new file mode 100644 index 0000000..6ffe1ec --- /dev/null +++ b/internal/meili/client.go @@ -0,0 +1,267 @@ +package meili + +import ( + "crypto/sha256" + "fmt" + "log" + "time" + + "github.com/meilisearch/meilisearch-go" +) + +// Document represents an indexed file in MeiliSearch. +type Document struct { + ID string `json:"id"` + Repo string `json:"repo"` + Branch string `json:"branch"` + Path string `json:"path"` + Filename string `json:"filename"` + Extension string `json:"extension"` + Content string `json:"content"` + Language string `json:"language"` + UpdatedAt int64 `json:"updated_at"` +} + +// SearchResult holds a single search hit. +type SearchResult struct { + Repo string `json:"repo"` + Branch string `json:"branch"` + Path string `json:"path"` + Filename string `json:"filename"` + Extension string `json:"extension"` + Snippet string `json:"snippet"` +} + +// Client wraps the MeiliSearch SDK. +type Client struct { + client meilisearch.ServiceManager + indexName string +} + +// NewClient creates a new MeiliSearch client. +func NewClient(url, apiKey, indexName string) (*Client, error) { + client := meilisearch.New(url, meilisearch.WithAPIKey(apiKey)) + + c := &Client{ + client: client, + indexName: indexName, + } + + if err := c.ensureIndex(); err != nil { + return nil, fmt.Errorf("ensuring index: %w", err) + } + + return c, nil +} + +// ensureIndex creates the index if it doesn't exist and configures settings. +func (c *Client) ensureIndex() error { + _, err := c.client.GetIndex(c.indexName) + if err != nil { + log.Printf("Creating index %q", c.indexName) + task, err := c.client.CreateIndex(&meilisearch.IndexConfig{ + Uid: c.indexName, + PrimaryKey: "id", + }) + if err != nil { + return fmt.Errorf("creating index: %w", err) + } + if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil { + return fmt.Errorf("waiting for index creation: %w", err) + } + } + + index := c.client.Index(c.indexName) + + // Configure searchable attributes + task, err := index.UpdateSearchableAttributes(&[]string{ + "content", "path", "filename", "repo", + }) + if err != nil { + return fmt.Errorf("updating searchable attributes: %w", err) + } + if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil { + return fmt.Errorf("waiting for searchable attributes: %w", err) + } + + // Configure filterable attributes + task, err = index.UpdateFilterableAttributes(&[]string{ + "repo", "extension", "branch", + }) + if err != nil { + return fmt.Errorf("updating filterable attributes: %w", err) + } + if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil { + return fmt.Errorf("waiting for filterable attributes: %w", err) + } + + // Configure displayed attributes (exclude full content) + task, err = index.UpdateDisplayedAttributes(&[]string{ + "id", "repo", "branch", "path", "filename", "extension", "updated_at", + }) + if err != nil { + return fmt.Errorf("updating displayed attributes: %w", err) + } + if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil { + return fmt.Errorf("waiting for displayed attributes: %w", err) + } + + log.Printf("Index %q configured", c.indexName) + return nil +} + +// DocumentID generates a deterministic ID from repo, branch, and path. +func DocumentID(repo, branch, path string) string { + h := sha256.Sum256([]byte(repo + ":" + branch + ":" + path)) + return fmt.Sprintf("%x", h[:16]) +} + +// IndexDocuments adds or updates documents in MeiliSearch. +// It batches documents in chunks to avoid overwhelming MeiliSearch. +func (c *Client) IndexDocuments(docs []Document) error { + if len(docs) == 0 { + return nil + } + + const batchSize = 100 + index := c.client.Index(c.indexName) + + for i := 0; i < len(docs); i += batchSize { + end := i + batchSize + if end > len(docs) { + end = len(docs) + } + batch := docs[i:end] + + task, err := index.AddDocuments(batch, "id") + if err != nil { + return fmt.Errorf("adding documents batch %d-%d: %w", i, end, err) + } + + if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil { + return fmt.Errorf("waiting for batch %d-%d: %w", i, end, err) + } + + log.Printf("Indexed documents %d-%d of %d", i+1, end, len(docs)) + } + + return nil +} + +// Search queries MeiliSearch and returns formatted results. +func (c *Client) Search(query string, repo string, filetype string, limit int64) ([]SearchResult, error) { + if limit <= 0 { + limit = 10 + } + + index := c.client.Index(c.indexName) + + // Build filter + var filters []string + if repo != "" { + filters = append(filters, fmt.Sprintf("repo = %q", repo)) + } + if filetype != "" { + filters = append(filters, fmt.Sprintf("extension = %q", filetype)) + } + + filterStr := "" + if len(filters) > 0 { + filterStr = filters[0] + for _, f := range filters[1:] { + filterStr += " AND " + f + } + } + + searchReq := &meilisearch.SearchRequest{ + Limit: limit, + AttributesToRetrieve: []string{"repo", "branch", "path", "filename", "extension"}, + AttributesToCrop: []string{"content:40"}, + CropLength: 40, + AttributesToHighlight: []string{"content"}, + ShowMatchesPosition: true, + } + if filterStr != "" { + searchReq.Filter = filterStr + } + + resp, err := index.Search(query, searchReq) + if err != nil { + return nil, fmt.Errorf("searching: %w", err) + } + + var results []SearchResult + for _, hit := range resp.Hits { + m, ok := hit.(map[string]interface{}) + if !ok { + continue + } + + result := SearchResult{ + Repo: strVal(m, "repo"), + Branch: strVal(m, "branch"), + Path: strVal(m, "path"), + Filename: strVal(m, "filename"), + Extension: strVal(m, "extension"), + } + + // Extract highlighted snippet from _formatted + if formatted, ok := m["_formatted"].(map[string]interface{}); ok { + if content, ok := formatted["content"].(string); ok { + result.Snippet = content + } + } + + // Fall back to cropped content + if result.Snippet == "" { + if cropped, ok := m["_croppped"].(map[string]interface{}); ok { + if content, ok := cropped["content"].(string); ok { + result.Snippet = content + } + } + } + + results = append(results, result) + } + + return results, nil +} + +// DeleteByRepo removes all documents for a given repo. +func (c *Client) DeleteByRepo(repo string) error { + index := c.client.Index(c.indexName) + + task, err := index.DeleteDocumentsByFilter(fmt.Sprintf("repo = %q", repo)) + if err != nil { + return fmt.Errorf("deleting documents for repo %s: %w", repo, err) + } + + if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil { + return fmt.Errorf("waiting for deletion: %w", err) + } + + return nil +} + +// DeleteAll removes all documents from the index. +func (c *Client) DeleteAll() error { + index := c.client.Index(c.indexName) + + task, err := index.DeleteAllDocuments() + if err != nil { + return fmt.Errorf("deleting all documents: %w", err) + } + + if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil { + return fmt.Errorf("waiting for deletion: %w", err) + } + + return nil +} + +func strVal(m map[string]interface{}, key string) string { + if v, ok := m[key].(string); ok { + return v + } + return "" +} diff --git a/k8s/indexer-cronjob.yaml b/k8s/indexer-cronjob.yaml new file mode 100644 index 0000000..3265497 --- /dev/null +++ b/k8s/indexer-cronjob.yaml @@ -0,0 +1,126 @@ +apiVersion: v1 +kind: Secret +metadata: + name: indexer-secret + namespace: gitea-search +type: Opaque +stringData: + gitea-token: "CHANGE-ME" + webhook-secret: "CHANGE-ME" +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: gitea-indexer + namespace: gitea-search +spec: + schedule: "0 */4 * * *" # Every 4 hours + concurrencyPolicy: Forbid + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 3 + jobTemplate: + spec: + backoffLimit: 2 + activeDeadlineSeconds: 1800 # 30 min timeout + template: + spec: + restartPolicy: OnFailure + containers: + - name: indexer + image: gitea.rspworks.tech/rpert/gitea-search:latest + command: ["indexer", "full"] + env: + - name: GITEA_URL + value: "https://gitea.rspworks.tech" + - name: GITEA_TOKEN + valueFrom: + secretKeyRef: + name: indexer-secret + key: gitea-token + - name: MEILI_URL + value: "http://meilisearch.gitea-search.svc.cluster.local:7700" + - name: MEILI_KEY + valueFrom: + secretKeyRef: + name: meilisearch-secret + key: master-key + - name: INDEX_NAME + value: "gitea-code" + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 500m + memory: 512Mi +--- +# Optional: webhook server deployment for real-time indexing +apiVersion: apps/v1 +kind: Deployment +metadata: + name: indexer-webhook + namespace: gitea-search + labels: + app: indexer-webhook +spec: + replicas: 1 + selector: + matchLabels: + app: indexer-webhook + template: + metadata: + labels: + app: indexer-webhook + spec: + containers: + - name: webhook + image: gitea.rspworks.tech/rpert/gitea-search:latest + command: ["indexer", "webhook"] + ports: + - containerPort: 8080 + env: + - name: GITEA_URL + value: "https://gitea.rspworks.tech" + - name: GITEA_TOKEN + valueFrom: + secretKeyRef: + name: indexer-secret + key: gitea-token + - name: MEILI_URL + value: "http://meilisearch.gitea-search.svc.cluster.local:7700" + - name: MEILI_KEY + valueFrom: + secretKeyRef: + name: meilisearch-secret + key: master-key + - name: INDEX_NAME + value: "gitea-code" + - name: WEBHOOK_SECRET + valueFrom: + secretKeyRef: + name: indexer-secret + key: webhook-secret + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 250m + memory: 256Mi + livenessProbe: + httpGet: + path: /healthz + port: 8080 + periodSeconds: 30 +--- +apiVersion: v1 +kind: Service +metadata: + name: indexer-webhook + namespace: gitea-search +spec: + selector: + app: indexer-webhook + ports: + - port: 8080 + targetPort: 8080 diff --git a/k8s/meilisearch.yaml b/k8s/meilisearch.yaml new file mode 100644 index 0000000..842bd5e --- /dev/null +++ b/k8s/meilisearch.yaml @@ -0,0 +1,94 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: meilisearch-data + namespace: gitea-search +spec: + accessModes: + - ReadWriteOnce + storageClassName: local-path + resources: + requests: + storage: 2Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: meilisearch + namespace: gitea-search + labels: + app: meilisearch +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: meilisearch + template: + metadata: + labels: + app: meilisearch + spec: + containers: + - name: meilisearch + image: getmeili/meilisearch:v1.11 + ports: + - containerPort: 7700 + env: + - name: MEILI_ENV + value: production + - name: MEILI_MASTER_KEY + valueFrom: + secretKeyRef: + name: meilisearch-secret + key: master-key + - name: MEILI_DB_PATH + value: /meili_data/data.ms + volumeMounts: + - name: data + mountPath: /meili_data + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + livenessProbe: + httpGet: + path: /health + port: 7700 + initialDelaySeconds: 10 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /health + port: 7700 + initialDelaySeconds: 5 + periodSeconds: 10 + volumes: + - name: data + persistentVolumeClaim: + claimName: meilisearch-data +--- +apiVersion: v1 +kind: Service +metadata: + name: meilisearch + namespace: gitea-search +spec: + selector: + app: meilisearch + ports: + - port: 7700 + targetPort: 7700 +--- +apiVersion: v1 +kind: Secret +metadata: + name: meilisearch-secret + namespace: gitea-search +type: Opaque +stringData: + master-key: "CHANGE-ME-generate-with-openssl-rand-base64-32" diff --git a/k8s/namespace.yaml b/k8s/namespace.yaml new file mode 100644 index 0000000..9990da0 --- /dev/null +++ b/k8s/namespace.yaml @@ -0,0 +1,4 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: gitea-search