From 16c7358143ab003b4af8882863fc5f158204a82d Mon Sep 17 00:00:00 2001 From: Vishal Bala Date: Fri, 24 Apr 2026 12:25:48 +0200 Subject: [PATCH 1/4] feat(mcp): add schema-aware search tool hints --- redisvl/mcp/server.py | 6 +- redisvl/mcp/tools/search.py | 41 ++++++++- tests/unit/test_mcp/test_search_tool_unit.py | 65 +++++++++++++- tests/unit/test_mcp/test_server.py | 94 ++++++++++++++++++++ 4 files changed, 197 insertions(+), 9 deletions(-) diff --git a/redisvl/mcp/server.py b/redisvl/mcp/server.py index 176609bd..4a5f5069 100644 --- a/redisvl/mcp/server.py +++ b/redisvl/mcp/server.py @@ -188,12 +188,12 @@ async def supports_native_hybrid_search(self) -> bool: ) return self._supports_native_hybrid_search - def _register_tools(self) -> None: + def _register_tools(self, schema: IndexSchema) -> None: """Register MCP tools once the server is ready.""" if self._tools_registered or not hasattr(self, "tool"): return - register_search_tool(self) + register_search_tool(self, schema) if not self.mcp_settings.read_only: register_upsert_tool(self) self._tools_registered = True @@ -299,7 +299,7 @@ async def _initialize_runtime_resources(self) -> Any: supports_native_hybrid_search=await self.supports_native_hybrid_search(), ) await self._initialize_vectorizer(effective_schema, timeout) - self._register_tools() + self._register_tools(effective_schema) return client except Exception: if self._index is None: diff --git a/redisvl/mcp/tools/search.py b/redisvl/mcp/tools/search.py index 0dcf96d7..5521e6af 100644 --- a/redisvl/mcp/tools/search.py +++ b/redisvl/mcp/tools/search.py @@ -6,8 +6,10 @@ from redisvl.mcp.errors import MCPErrorCode, RedisVLMCPError, map_exception from redisvl.mcp.filters import parse_filter from redisvl.query import AggregateHybridQuery, HybridQuery, TextQuery, VectorQuery +from redisvl.schema import IndexSchema DEFAULT_SEARCH_DESCRIPTION = "Search records in the configured Redis index." +_DSL_FILTER_FIELD_TYPES = frozenset({"tag", "text", "numeric"}) _NATIVE_HYBRID_DEFAULTS = { "combination_method": "LINEAR", @@ -25,6 +27,38 @@ ) +def _build_filter_hint(schema: IndexSchema) -> str: + """Describe fields supported by the JSON filter DSL.""" + filter_fields = [ + f"{field.name}({getattr(field.type, 'value', field.type)})" + for field in schema.fields.values() + if field.type in _DSL_FILTER_FIELD_TYPES + ] + if not filter_fields: + return "Object filter fields: none." + return "Object filter fields: " + ", ".join(filter_fields) + "." + + +def _build_return_fields_hint(schema: IndexSchema) -> str: + """Describe all fields that callers can request in `return_fields`.""" + returnable_fields = [ + field.name for field in schema.fields.values() if field.type != "vector" + ] + if not returnable_fields: + return "Allowed return_fields: none." + return "Allowed return_fields: " + ", ".join(returnable_fields) + "." + + +def _build_search_tool_description( + schema: IndexSchema, base_description: str | None = None +) -> str: + """Build the `search-records` description from static text plus schema hints.""" + description = (base_description or DEFAULT_SEARCH_DESCRIPTION).strip() + return " ".join( + [description, _build_filter_hint(schema), _build_return_fields_hint(schema)] + ) + + def _validate_request( *, query: str, @@ -405,10 +439,11 @@ async def search_records( raise map_exception(exc) from exc -def register_search_tool(server: Any) -> None: +def register_search_tool(server: Any, schema: IndexSchema) -> None: """Register the MCP `search-records` tool with its config-owned contract.""" - description = ( - server.mcp_settings.tool_search_description or DEFAULT_SEARCH_DESCRIPTION + description = _build_search_tool_description( + schema=schema, + base_description=server.mcp_settings.tool_search_description, ) async def search_records_tool( diff --git a/tests/unit/test_mcp/test_search_tool_unit.py b/tests/unit/test_mcp/test_search_tool_unit.py index c741a842..535ac322 100644 --- a/tests/unit/test_mcp/test_search_tool_unit.py +++ b/tests/unit/test_mcp/test_search_tool_unit.py @@ -7,6 +7,7 @@ from redisvl.mcp.errors import MCPErrorCode, RedisVLMCPError from redisvl.mcp.tools.search import ( _build_fallback_hybrid_kwargs, + _build_search_tool_description, _embed_query, register_search_tool, search_records, @@ -664,17 +665,75 @@ async def test_search_records_rejects_native_only_hybrid_runtime_params(monkeypa def test_register_search_tool_uses_default_and_override_descriptions(): default_server = FakeServer() - register_search_tool(default_server) + register_search_tool(default_server, default_server.index.schema) assert default_server.registered_tools[0]["name"] == "search-records" assert "Search records" in default_server.registered_tools[0]["description"] + assert ( + "Object filter fields: content(text), category(tag), rating(numeric)." + in default_server.registered_tools[0]["description"] + ) + assert ( + "Allowed return_fields: content, category, rating." + in default_server.registered_tools[0]["description"] + ) assert "query" in default_server.registered_tools[0]["fn"].__annotations__ assert "search_type" not in default_server.registered_tools[0]["fn"].__annotations__ custom_server = FakeServer() custom_server.mcp_settings.tool_search_description = "Custom search description" - register_search_tool(custom_server) + register_search_tool(custom_server, custom_server.index.schema) + + assert ( + custom_server.registered_tools[0]["description"] + == ( + "Custom search description " + "Object filter fields: content(text), category(tag), rating(numeric). " + "Allowed return_fields: content, category, rating." + ) + ) + + +def test_build_search_tool_description_preserves_schema_order_and_excludes_vectors(): + description = _build_search_tool_description(_schema()) assert ( - custom_server.registered_tools[0]["description"] == "Custom search description" + "Object filter fields: content(text), category(tag), rating(numeric)." + in description + ) + assert "embedding(vector)" not in description + assert "Allowed return_fields: content, category, rating." in description + assert "embedding" not in description.split("Allowed return_fields: ", 1)[1] + + +def test_build_search_tool_description_excludes_unsupported_filter_types(): + schema = IndexSchema.from_dict( + { + "index": { + "name": "docs-index", + "prefix": "doc", + "storage_type": "hash", + }, + "fields": [ + {"name": "content", "type": "text"}, + {"name": "category", "type": "tag"}, + {"name": "rating", "type": "numeric"}, + {"name": "location", "type": "geo"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + } ) + + description = _build_search_tool_description(schema) + + assert "location(geo)" not in description + assert "Allowed return_fields: content, category, rating, location." in description diff --git a/tests/unit/test_mcp/test_server.py b/tests/unit/test_mcp/test_server.py index c24e8d5e..14db8ac4 100644 --- a/tests/unit/test_mcp/test_server.py +++ b/tests/unit/test_mcp/test_server.py @@ -251,6 +251,100 @@ async def fake_disconnect(self): assert server._index is None +@pytest.mark.asyncio +async def test_server_registers_tools_with_effective_schema(monkeypatch): + monkeypatch.setattr( + "redisvl.mcp.server.FastMCP.__init__", lambda self, *a, **k: None + ) + monkeypatch.setattr( + "redisvl.mcp.server.load_mcp_config", + lambda path: _startup_config(), + ) + + class FakeClient: + async def aclose(self): + return None + + async def fake_connect(self, timeout): + return FakeClient() + + async def fake_load_schema(self, client, timeout): + return IndexSchema.from_dict( + { + "index": { + "name": "docs-index", + "prefix": "doc", + "storage_type": "hash", + }, + "fields": [ + {"name": "content", "type": "text"}, + {"name": "category", "type": "tag"}, + {"name": "location", "type": "geo"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "flat", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + } + ) + + async def fake_supports_native_hybrid_search(self): + return False + + async def fake_initialize_vectorizer(self, schema, timeout): + self._vectorizer = SimpleNamespace(dims=3) + + registered_schemas = [] + + def fake_register_search_tool(server, schema): + registered_schemas.append(schema) + + async def fake_disconnect(self): + return None + + monkeypatch.setattr(RedisVLMCPServer, "_connect_redis_client", fake_connect) + monkeypatch.setattr(RedisVLMCPServer, "_load_effective_schema", fake_load_schema) + monkeypatch.setattr( + RedisVLMCPServer, + "supports_native_hybrid_search", + fake_supports_native_hybrid_search, + ) + monkeypatch.setattr( + RedisVLMCPServer, "_initialize_vectorizer", fake_initialize_vectorizer + ) + monkeypatch.setattr( + "redisvl.mcp.server.register_search_tool", fake_register_search_tool + ) + monkeypatch.setattr( + "redisvl.mcp.server.register_upsert_tool", lambda server: None + ) + monkeypatch.setattr( + "redisvl.mcp.server.AsyncSearchIndex.disconnect", + fake_disconnect, + raising=False, + ) + + server = RedisVLMCPServer(_dummy_settings()) + + await server.startup() + + assert len(registered_schemas) == 1 + assert list(registered_schemas[0].field_names) == [ + "content", + "category", + "location", + "embedding", + ] + + await server.shutdown() + + @pytest.mark.asyncio async def test_startup_while_running_raises(monkeypatch): monkeypatch.setattr( From 1fab3a955ceb3d37d5826707f8a0ab35478230de Mon Sep 17 00:00:00 2001 From: Vishal Bala Date: Fri, 24 Apr 2026 12:32:56 +0200 Subject: [PATCH 2/4] docs(mcp): describe schema-aware search hints --- docs/concepts/mcp.md | 1 + docs/user_guide/how_to_guides/mcp.md | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/concepts/mcp.md b/docs/concepts/mcp.md index 9fe6848f..0a5c771f 100644 --- a/docs/concepts/mcp.md +++ b/docs/concepts/mcp.md @@ -91,6 +91,7 @@ These tools follow a stable contract: - request validation happens before query or write execution - filters support either raw strings or a RedisVL-backed JSON DSL +- `search-records` describes the inspected schema by advertising JSON DSL filter fields and valid `return_fields` - error codes are mapped into a stable set of MCP-facing categories ## Why Use MCP Instead of Direct RedisVL Calls diff --git a/docs/user_guide/how_to_guides/mcp.md b/docs/user_guide/how_to_guides/mcp.md index bd48bbe2..f137f273 100644 --- a/docs/user_guide/how_to_guides/mcp.md +++ b/docs/user_guide/how_to_guides/mcp.md @@ -81,7 +81,7 @@ You can also control boot settings through environment variables: |----------|---------| | `REDISVL_MCP_CONFIG` | Path to the MCP YAML config | | `REDISVL_MCP_READ_ONLY` | Disable `upsert-records` when set to `true` | -| `REDISVL_MCP_TOOL_SEARCH_DESCRIPTION` | Override the search tool description | +| `REDISVL_MCP_TOOL_SEARCH_DESCRIPTION` | Set the base search tool description text; RedisVL still appends schema-derived filter and `return_fields` hints | | `REDISVL_MCP_TOOL_UPSERT_DESCRIPTION` | Override the upsert tool description | ## Connect a Remote MCP Client @@ -227,6 +227,7 @@ Notes: - when `return_fields` is omitted, RedisVL MCP returns all non-vector fields - returning the configured vector field is rejected - `filter` accepts either a raw string or a JSON DSL object +- the `search-records` tool description includes schema-derived hints for JSON DSL filter fields and valid `return_fields` - `offset + limit` must stay within `runtime.max_result_window` - startup rejects schemas that use MCP-reserved score metadata field names: `id`, `__key`, `key`, `score`, `vector_distance`, `__score`, `text_score`, `vector_similarity`, `hybrid_score` From cc213a5584faee9ba6946736faf297203cf4c341 Mon Sep 17 00:00:00 2001 From: Vishal Bala Date: Fri, 24 Apr 2026 13:00:37 +0200 Subject: [PATCH 3/4] test(mcp): fix read-only upsert tool stub --- tests/integration/test_mcp/test_upsert_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_mcp/test_upsert_tool.py b/tests/integration/test_mcp/test_upsert_tool.py index 79a7d0c8..d6736007 100644 --- a/tests/integration/test_mcp/test_upsert_tool.py +++ b/tests/integration/test_mcp/test_upsert_tool.py @@ -286,7 +286,7 @@ async def test_read_only_mode_excludes_upsert_tool( ) monkeypatch.setattr( "redisvl.mcp.server.register_search_tool", - lambda server: None, + lambda server, schema: None, ) def fake_tool(*args: Any, **kwargs: Any): From 8300a977d481d8748f80d76a8dbd94f52b5f5c6f Mon Sep 17 00:00:00 2001 From: Vishal Bala Date: Mon, 27 Apr 2026 16:32:20 +0200 Subject: [PATCH 4/4] feat(mcp): advertise exists filter support --- docs/concepts/mcp.md | 2 +- docs/user_guide/how_to_guides/mcp.md | 4 +-- redisvl/mcp/tools/search.py | 17 ++++++++++-- tests/unit/test_mcp/test_search_tool_unit.py | 28 +++++++++++++------- tests/unit/test_mcp/test_server.py | 4 +-- uv.lock | 2 +- 6 files changed, 39 insertions(+), 18 deletions(-) diff --git a/docs/concepts/mcp.md b/docs/concepts/mcp.md index 0a5c771f..6a82ba73 100644 --- a/docs/concepts/mcp.md +++ b/docs/concepts/mcp.md @@ -91,7 +91,7 @@ These tools follow a stable contract: - request validation happens before query or write execution - filters support either raw strings or a RedisVL-backed JSON DSL -- `search-records` describes the inspected schema by advertising JSON DSL filter fields and valid `return_fields` +- `search-records` describes the inspected schema by advertising typed JSON DSL filter fields, object-filter `exists` support, and valid `return_fields` - error codes are mapped into a stable set of MCP-facing categories ## Why Use MCP Instead of Direct RedisVL Calls diff --git a/docs/user_guide/how_to_guides/mcp.md b/docs/user_guide/how_to_guides/mcp.md index f137f273..b8dcecd6 100644 --- a/docs/user_guide/how_to_guides/mcp.md +++ b/docs/user_guide/how_to_guides/mcp.md @@ -81,7 +81,7 @@ You can also control boot settings through environment variables: |----------|---------| | `REDISVL_MCP_CONFIG` | Path to the MCP YAML config | | `REDISVL_MCP_READ_ONLY` | Disable `upsert-records` when set to `true` | -| `REDISVL_MCP_TOOL_SEARCH_DESCRIPTION` | Set the base search tool description text; RedisVL still appends schema-derived filter and `return_fields` hints | +| `REDISVL_MCP_TOOL_SEARCH_DESCRIPTION` | Set the base search tool description text; RedisVL still appends schema-derived typed filter, `exists`, and `return_fields` hints | | `REDISVL_MCP_TOOL_UPSERT_DESCRIPTION` | Override the upsert tool description | ## Connect a Remote MCP Client @@ -227,7 +227,7 @@ Notes: - when `return_fields` is omitted, RedisVL MCP returns all non-vector fields - returning the configured vector field is rejected - `filter` accepts either a raw string or a JSON DSL object -- the `search-records` tool description includes schema-derived hints for JSON DSL filter fields and valid `return_fields` +- the `search-records` tool description includes schema-derived hints for typed JSON DSL filter fields, object-filter `exists` support, and valid `return_fields` - `offset + limit` must stay within `runtime.max_result_window` - startup rejects schemas that use MCP-reserved score metadata field names: `id`, `__key`, `key`, `score`, `vector_distance`, `__score`, `text_score`, `vector_similarity`, `hybrid_score` diff --git a/redisvl/mcp/tools/search.py b/redisvl/mcp/tools/search.py index 5521e6af..3d96e669 100644 --- a/redisvl/mcp/tools/search.py +++ b/redisvl/mcp/tools/search.py @@ -28,7 +28,7 @@ def _build_filter_hint(schema: IndexSchema) -> str: - """Describe fields supported by the JSON filter DSL.""" + """Describe fields with typed operator support in the JSON filter DSL.""" filter_fields = [ f"{field.name}({getattr(field.type, 'value', field.type)})" for field in schema.fields.values() @@ -54,8 +54,21 @@ def _build_search_tool_description( ) -> str: """Build the `search-records` description from static text plus schema hints.""" description = (base_description or DEFAULT_SEARCH_DESCRIPTION).strip() + + # `exists` is currently accepted for any schema field in the MCP object filter. + exists_fields = [field.name for field in schema.fields.values()] + if exists_fields: + exists_hint = "Object filter exists support: " + ", ".join(exists_fields) + "." + else: + exists_hint = "Object filter exists support: none." + return " ".join( - [description, _build_filter_hint(schema), _build_return_fields_hint(schema)] + [ + description, + _build_filter_hint(schema), + exists_hint, + _build_return_fields_hint(schema), + ] ) diff --git a/tests/unit/test_mcp/test_search_tool_unit.py b/tests/unit/test_mcp/test_search_tool_unit.py index 535ac322..3e136053 100644 --- a/tests/unit/test_mcp/test_search_tool_unit.py +++ b/tests/unit/test_mcp/test_search_tool_unit.py @@ -673,6 +673,10 @@ def test_register_search_tool_uses_default_and_override_descriptions(): "Object filter fields: content(text), category(tag), rating(numeric)." in default_server.registered_tools[0]["description"] ) + assert ( + "Object filter exists support: content, category, rating, embedding." + in default_server.registered_tools[0]["description"] + ) assert ( "Allowed return_fields: content, category, rating." in default_server.registered_tools[0]["description"] @@ -684,13 +688,11 @@ def test_register_search_tool_uses_default_and_override_descriptions(): custom_server.mcp_settings.tool_search_description = "Custom search description" register_search_tool(custom_server, custom_server.index.schema) - assert ( - custom_server.registered_tools[0]["description"] - == ( - "Custom search description " - "Object filter fields: content(text), category(tag), rating(numeric). " - "Allowed return_fields: content, category, rating." - ) + assert custom_server.registered_tools[0]["description"] == ( + "Custom search description " + "Object filter fields: content(text), category(tag), rating(numeric). " + "Object filter exists support: content, category, rating, embedding. " + "Allowed return_fields: content, category, rating." ) @@ -701,12 +703,15 @@ def test_build_search_tool_description_preserves_schema_order_and_excludes_vecto "Object filter fields: content(text), category(tag), rating(numeric)." in description ) - assert "embedding(vector)" not in description + assert ( + "Object filter exists support: content, category, rating, embedding." + in description + ) assert "Allowed return_fields: content, category, rating." in description assert "embedding" not in description.split("Allowed return_fields: ", 1)[1] -def test_build_search_tool_description_excludes_unsupported_filter_types(): +def test_build_search_tool_description_distinguishes_typed_and_exists_support(): schema = IndexSchema.from_dict( { "index": { @@ -736,4 +741,9 @@ def test_build_search_tool_description_excludes_unsupported_filter_types(): description = _build_search_tool_description(schema) assert "location(geo)" not in description + assert "embedding(vector)" not in description + assert ( + "Object filter exists support: content, category, rating, location, embedding." + in description + ) assert "Allowed return_fields: content, category, rating, location." in description diff --git a/tests/unit/test_mcp/test_server.py b/tests/unit/test_mcp/test_server.py index 14db8ac4..9adea52b 100644 --- a/tests/unit/test_mcp/test_server.py +++ b/tests/unit/test_mcp/test_server.py @@ -321,9 +321,7 @@ async def fake_disconnect(self): monkeypatch.setattr( "redisvl.mcp.server.register_search_tool", fake_register_search_tool ) - monkeypatch.setattr( - "redisvl.mcp.server.register_upsert_tool", lambda server: None - ) + monkeypatch.setattr("redisvl.mcp.server.register_upsert_tool", lambda server: None) monkeypatch.setattr( "redisvl.mcp.server.AsyncSearchIndex.disconnect", fake_disconnect, diff --git a/uv.lock b/uv.lock index 1385219c..7dd88824 100644 --- a/uv.lock +++ b/uv.lock @@ -4288,7 +4288,7 @@ wheels = [ [[package]] name = "redisvl" -version = "0.17.1" +version = "0.18.0" source = { editable = "." } dependencies = [ { name = "jsonpath-ng" },