Go indexer (full re-index + webhook), MeiliSearch integration, MCP server exposing gitea_search tool for LLM agents. K8s manifests for MeiliSearch + indexer CronJob. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
268 lines
6.9 KiB
Go
268 lines
6.9 KiB
Go
package meili
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"fmt"
|
|
"log"
|
|
"time"
|
|
|
|
"github.com/meilisearch/meilisearch-go"
|
|
)
|
|
|
|
// Document represents an indexed file in MeiliSearch.
|
|
type Document struct {
|
|
ID string `json:"id"`
|
|
Repo string `json:"repo"`
|
|
Branch string `json:"branch"`
|
|
Path string `json:"path"`
|
|
Filename string `json:"filename"`
|
|
Extension string `json:"extension"`
|
|
Content string `json:"content"`
|
|
Language string `json:"language"`
|
|
UpdatedAt int64 `json:"updated_at"`
|
|
}
|
|
|
|
// SearchResult holds a single search hit.
|
|
type SearchResult struct {
|
|
Repo string `json:"repo"`
|
|
Branch string `json:"branch"`
|
|
Path string `json:"path"`
|
|
Filename string `json:"filename"`
|
|
Extension string `json:"extension"`
|
|
Snippet string `json:"snippet"`
|
|
}
|
|
|
|
// Client wraps the MeiliSearch SDK.
|
|
type Client struct {
|
|
client meilisearch.ServiceManager
|
|
indexName string
|
|
}
|
|
|
|
// NewClient creates a new MeiliSearch client.
|
|
func NewClient(url, apiKey, indexName string) (*Client, error) {
|
|
client := meilisearch.New(url, meilisearch.WithAPIKey(apiKey))
|
|
|
|
c := &Client{
|
|
client: client,
|
|
indexName: indexName,
|
|
}
|
|
|
|
if err := c.ensureIndex(); err != nil {
|
|
return nil, fmt.Errorf("ensuring index: %w", err)
|
|
}
|
|
|
|
return c, nil
|
|
}
|
|
|
|
// ensureIndex creates the index if it doesn't exist and configures settings.
|
|
func (c *Client) ensureIndex() error {
|
|
_, err := c.client.GetIndex(c.indexName)
|
|
if err != nil {
|
|
log.Printf("Creating index %q", c.indexName)
|
|
task, err := c.client.CreateIndex(&meilisearch.IndexConfig{
|
|
Uid: c.indexName,
|
|
PrimaryKey: "id",
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("creating index: %w", err)
|
|
}
|
|
if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil {
|
|
return fmt.Errorf("waiting for index creation: %w", err)
|
|
}
|
|
}
|
|
|
|
index := c.client.Index(c.indexName)
|
|
|
|
// Configure searchable attributes
|
|
task, err := index.UpdateSearchableAttributes(&[]string{
|
|
"content", "path", "filename", "repo",
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("updating searchable attributes: %w", err)
|
|
}
|
|
if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil {
|
|
return fmt.Errorf("waiting for searchable attributes: %w", err)
|
|
}
|
|
|
|
// Configure filterable attributes
|
|
task, err = index.UpdateFilterableAttributes(&[]string{
|
|
"repo", "extension", "branch",
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("updating filterable attributes: %w", err)
|
|
}
|
|
if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil {
|
|
return fmt.Errorf("waiting for filterable attributes: %w", err)
|
|
}
|
|
|
|
// Configure displayed attributes (exclude full content)
|
|
task, err = index.UpdateDisplayedAttributes(&[]string{
|
|
"id", "repo", "branch", "path", "filename", "extension", "updated_at",
|
|
})
|
|
if err != nil {
|
|
return fmt.Errorf("updating displayed attributes: %w", err)
|
|
}
|
|
if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil {
|
|
return fmt.Errorf("waiting for displayed attributes: %w", err)
|
|
}
|
|
|
|
log.Printf("Index %q configured", c.indexName)
|
|
return nil
|
|
}
|
|
|
|
// DocumentID generates a deterministic ID from repo, branch, and path.
|
|
func DocumentID(repo, branch, path string) string {
|
|
h := sha256.Sum256([]byte(repo + ":" + branch + ":" + path))
|
|
return fmt.Sprintf("%x", h[:16])
|
|
}
|
|
|
|
// IndexDocuments adds or updates documents in MeiliSearch.
|
|
// It batches documents in chunks to avoid overwhelming MeiliSearch.
|
|
func (c *Client) IndexDocuments(docs []Document) error {
|
|
if len(docs) == 0 {
|
|
return nil
|
|
}
|
|
|
|
const batchSize = 100
|
|
index := c.client.Index(c.indexName)
|
|
|
|
for i := 0; i < len(docs); i += batchSize {
|
|
end := i + batchSize
|
|
if end > len(docs) {
|
|
end = len(docs)
|
|
}
|
|
batch := docs[i:end]
|
|
|
|
task, err := index.AddDocuments(batch, "id")
|
|
if err != nil {
|
|
return fmt.Errorf("adding documents batch %d-%d: %w", i, end, err)
|
|
}
|
|
|
|
if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil {
|
|
return fmt.Errorf("waiting for batch %d-%d: %w", i, end, err)
|
|
}
|
|
|
|
log.Printf("Indexed documents %d-%d of %d", i+1, end, len(docs))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Search queries MeiliSearch and returns formatted results.
|
|
func (c *Client) Search(query string, repo string, filetype string, limit int64) ([]SearchResult, error) {
|
|
if limit <= 0 {
|
|
limit = 10
|
|
}
|
|
|
|
index := c.client.Index(c.indexName)
|
|
|
|
// Build filter
|
|
var filters []string
|
|
if repo != "" {
|
|
filters = append(filters, fmt.Sprintf("repo = %q", repo))
|
|
}
|
|
if filetype != "" {
|
|
filters = append(filters, fmt.Sprintf("extension = %q", filetype))
|
|
}
|
|
|
|
filterStr := ""
|
|
if len(filters) > 0 {
|
|
filterStr = filters[0]
|
|
for _, f := range filters[1:] {
|
|
filterStr += " AND " + f
|
|
}
|
|
}
|
|
|
|
searchReq := &meilisearch.SearchRequest{
|
|
Limit: limit,
|
|
AttributesToRetrieve: []string{"repo", "branch", "path", "filename", "extension"},
|
|
AttributesToCrop: []string{"content:40"},
|
|
CropLength: 40,
|
|
AttributesToHighlight: []string{"content"},
|
|
ShowMatchesPosition: true,
|
|
}
|
|
if filterStr != "" {
|
|
searchReq.Filter = filterStr
|
|
}
|
|
|
|
resp, err := index.Search(query, searchReq)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("searching: %w", err)
|
|
}
|
|
|
|
var results []SearchResult
|
|
for _, hit := range resp.Hits {
|
|
m, ok := hit.(map[string]interface{})
|
|
if !ok {
|
|
continue
|
|
}
|
|
|
|
result := SearchResult{
|
|
Repo: strVal(m, "repo"),
|
|
Branch: strVal(m, "branch"),
|
|
Path: strVal(m, "path"),
|
|
Filename: strVal(m, "filename"),
|
|
Extension: strVal(m, "extension"),
|
|
}
|
|
|
|
// Extract highlighted snippet from _formatted
|
|
if formatted, ok := m["_formatted"].(map[string]interface{}); ok {
|
|
if content, ok := formatted["content"].(string); ok {
|
|
result.Snippet = content
|
|
}
|
|
}
|
|
|
|
// Fall back to cropped content
|
|
if result.Snippet == "" {
|
|
if cropped, ok := m["_croppped"].(map[string]interface{}); ok {
|
|
if content, ok := cropped["content"].(string); ok {
|
|
result.Snippet = content
|
|
}
|
|
}
|
|
}
|
|
|
|
results = append(results, result)
|
|
}
|
|
|
|
return results, nil
|
|
}
|
|
|
|
// DeleteByRepo removes all documents for a given repo.
|
|
func (c *Client) DeleteByRepo(repo string) error {
|
|
index := c.client.Index(c.indexName)
|
|
|
|
task, err := index.DeleteDocumentsByFilter(fmt.Sprintf("repo = %q", repo))
|
|
if err != nil {
|
|
return fmt.Errorf("deleting documents for repo %s: %w", repo, err)
|
|
}
|
|
|
|
if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil {
|
|
return fmt.Errorf("waiting for deletion: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// DeleteAll removes all documents from the index.
|
|
func (c *Client) DeleteAll() error {
|
|
index := c.client.Index(c.indexName)
|
|
|
|
task, err := index.DeleteAllDocuments()
|
|
if err != nil {
|
|
return fmt.Errorf("deleting all documents: %w", err)
|
|
}
|
|
|
|
if _, err := c.client.WaitForTask(task.TaskUID, 500 * time.Millisecond); err != nil {
|
|
return fmt.Errorf("waiting for deletion: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func strVal(m map[string]interface{}, key string) string {
|
|
if v, ok := m[key].(string); ok {
|
|
return v
|
|
}
|
|
return ""
|
|
}
|