From 6e9e6f300d434a9f63a63819897fb122d8f6cc81 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Mon, 29 Dec 2025 06:55:28 -0800
Subject: [PATCH 01/30] chore: clarify plan location in template

---
 ai/prompts/PLAN.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai/prompts/PLAN.md b/ai/prompts/PLAN.md
index d31d05227d..57a02a19e2 100644
--- a/ai/prompts/PLAN.md
+++ b/ai/prompts/PLAN.md
@@ -125,7 +125,7 @@ git log --oneline -n 10
 - Source: `graphistry/`
 - Tests: `graphistry/tests/` (mirrors source structure: `graphistry/foo/bar.py` → `graphistry/tests/foo/test_bar.py`)
 - Docs: `docs/`
-- Plans: `plans/` (gitignored - safe for auxiliary files, temp secrets, working data)
+- Plans: `plans/` (gitignored - safe for auxiliary files, temp secrets, working data; Codex: avoid `~/.codex/plans`; if used, copy here then delete)
 - AI prompts: `ai/prompts/`
 - AI docs: `ai/docs/`
 

From 209b2a3fa74c18aedc9f6755012e94045124fc3c Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Mon, 29 Dec 2025 06:55:42 -0800
Subject: [PATCH 02/30] feat: add collections validation and gfql support

---
 graphistry/Plottable.py                     |  12 +
 graphistry/PlotterBase.py                   |  50 ++-
 graphistry/__init__.py                      |   1 +
 graphistry/pygraphistry.py                  |  22 ++
 graphistry/tests/test_collections.py        | 117 ++++++
 graphistry/validate/validate_collections.py | 386 ++++++++++++++++++++
 6 files changed, 587 insertions(+), 1 deletion(-)
 create mode 100644 graphistry/tests/test_collections.py
 create mode 100644 graphistry/validate/validate_collections.py

diff --git a/graphistry/Plottable.py b/graphistry/Plottable.py
index 18ed6cc67a..bc83b2e68c 100644
--- a/graphistry/Plottable.py
+++ b/graphistry/Plottable.py
@@ -783,6 +783,18 @@ def settings(self,
     ) -> 'Plottable':
         ...
 
+    def collections(
+        self,
+        collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None,
+        show_collections: Optional[bool] = None,
+        collections_global_node_color: Optional[str] = None,
+        collections_global_edge_color: Optional[str] = None,
+        encode: bool = True,
+        validate: ValidationParam = 'autofix',
+        warn: bool = True
+    ) -> 'Plottable':
+        ...
+
     def privacy(self, mode: Optional[PrivacyMode] = None, notify: Optional[bool] = None, invited_users: Optional[List[str]] = None, message: Optional[str] = None, mode_action: Optional[ModeAction] = None) -> 'Plottable':
         ...
 
diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py
index 6b4f6f2ac3..7520580eed 100644
--- a/graphistry/PlotterBase.py
+++ b/graphistry/PlotterBase.py
@@ -1846,6 +1846,51 @@ def settings(self, height=None, url_params={}, render=None):
         return res
 
 
+    def collections(
+        self,
+        collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None,
+        show_collections: Optional[bool] = None,
+        collections_global_node_color: Optional[str] = None,
+        collections_global_edge_color: Optional[str] = None,
+        encode: bool = True,
+        validate: ValidationParam = 'autofix',
+        warn: bool = True
+    ) -> 'Plottable':
+        """Set collections URL parameters. Additive over previous settings.
+
+        :param collections: List/dict of collections or JSON/URL-encoded JSON string.
+        :param show_collections: Toggle collections panel display.
+        :param collections_global_node_color: Hex color for non-collection nodes (leading # stripped).
+        :param collections_global_edge_color: Hex color for non-collection edges (leading # stripped).
+        :param encode: If True, JSON-minify and URL-encode collections. Use False for pre-encoded strings.
+        :param validate: Validation mode. 'autofix' (default) warns on issues, 'strict' raises on issues.
+        :param warn: Whether to emit warnings when validate='autofix'. validate=False forces warn=False.
+        """
+        from graphistry.validate.validate_collections import encode_collections, normalize_collections
+
+        def strip_hash(value: str) -> str:
+            return value[1:] if value.startswith('#') else value
+
+        settings: Dict[str, Any] = {}
+        if collections is not None:
+            normalized = normalize_collections(collections, validate=validate, warn=warn)
+            if isinstance(collections, str) and not encode:
+                settings['collections'] = collections
+            else:
+                settings['collections'] = encode_collections(normalized, encode=encode)
+        if show_collections is not None:
+            settings['showCollections'] = show_collections
+        if collections_global_node_color is not None:
+            settings['collectionsGlobalNodeColor'] = strip_hash(collections_global_node_color)
+        if collections_global_edge_color is not None:
+            settings['collectionsGlobalEdgeColor'] = strip_hash(collections_global_edge_color)
+
+        if len(settings.keys()) > 0:
+            return self.settings(url_params={**self._url_params, **settings})
+        else:
+            return self
+
+
     def privacy(
         self, 
         mode: Optional[Mode] = None, 
@@ -2191,7 +2236,10 @@ def plot(
             'viztoken': str(uuid.uuid4())
         }
 
-        viz_url = self._pygraphistry._viz_url(info, self._url_params)
+        from graphistry.validate.validate_collections import normalize_collections_url_params
+
+        url_params = normalize_collections_url_params(self._url_params, validate=validate_mode, warn=warn)
+        viz_url = self._pygraphistry._viz_url(info, url_params)
         cfg_client_protocol_hostname = self.session.client_protocol_hostname
         full_url = ('%s:%s' % (self.session.protocol, viz_url)) if cfg_client_protocol_hostname is None else viz_url
 
diff --git a/graphistry/__init__.py b/graphistry/__init__.py
index 954713b346..a3cc2a64f8 100644
--- a/graphistry/__init__.py
+++ b/graphistry/__init__.py
@@ -23,6 +23,7 @@
     nodes,
     graph,
     settings,
+    collections,
     encode_point_color,
     encode_point_size,
     encode_point_icon,
diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py
index 6a8ae4aaa9..2e339ff188 100644
--- a/graphistry/pygraphistry.py
+++ b/graphistry/pygraphistry.py
@@ -5,6 +5,7 @@
 from graphistry.plugins_types.hypergraph import HypergraphResult
 from graphistry.client_session import ClientSession, ApiVersion, ENV_GRAPHISTRY_API_KEY, DatasetInfo, AuthManagerProtocol, strtobool
 from graphistry.Engine import EngineAbstractType
+from graphistry.models.types import ValidationParam
 
 """Top-level import of class PyGraphistry as "Graphistry". Used to connect to the Graphistry server and then create a base plotter."""
 import calendar, copy, gzip, io, json, numpy as np, pandas as pd, requests, sys, time, warnings
@@ -2274,6 +2275,26 @@ def settings(self, height=None, url_params={}, render=None):
 
         return self._plotter().settings(height, url_params, render)
 
+    def collections(
+        self,
+        collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None,
+        show_collections: Optional[bool] = None,
+        collections_global_node_color: Optional[str] = None,
+        collections_global_edge_color: Optional[str] = None,
+        encode: bool = True,
+        validate: ValidationParam = 'autofix',
+        warn: bool = True
+    ):
+        return self._plotter().collections(
+            collections=collections,
+            show_collections=show_collections,
+            collections_global_node_color=collections_global_node_color,
+            collections_global_edge_color=collections_global_edge_color,
+            encode=encode,
+            validate=validate,
+            warn=warn
+        )
+
     def _viz_url(self, info: DatasetInfo, url_params: Dict[str, Any]) -> str:
         splash_time = int(calendar.timegm(time.gmtime())) + 15
         extra = "&".join([k + "=" + str(v) for k, v in list(url_params.items())])
@@ -2501,6 +2522,7 @@ def _handle_api_response(self, response):
 pipe = PyGraphistry.pipe
 graph = PyGraphistry.graph
 settings = PyGraphistry.settings
+collections = PyGraphistry.collections
 hypergraph = PyGraphistry.hypergraph
 bolt = PyGraphistry.bolt
 cypher = PyGraphistry.cypher
diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
new file mode 100644
index 0000000000..019337af12
--- /dev/null
+++ b/graphistry/tests/test_collections.py
@@ -0,0 +1,117 @@
+# -*- coding: utf-8 -*-
+
+import json
+import pytest
+from urllib.parse import quote, unquote
+
+import graphistry
+from graphistry.validate.validate_collections import normalize_collections_url_params
+
+
+def decode_collections(encoded: str):
+    return json.loads(unquote(encoded))
+
+
+def test_collections_encodes_and_normalizes():
+    g = graphistry.bind()
+    node_filter = graphistry.n({"subscribed_to_newsletter": True})
+    collections = [
+        {
+            "type": "set",
+            "id": "newsletter_subscribers",
+            "name": "Newsletter Subscribers",
+            "node_color": "#32CD32",
+            "expr": {
+                "type": "gfql_chain",
+                "gfql": [node_filter],
+            },
+        }
+    ]
+
+    g2 = g.collections(
+        collections=collections,
+        show_collections=True,
+        collections_global_node_color="#00FF00",
+        collections_global_edge_color="#00AA00",
+    )
+
+    decoded = decode_collections(g2._url_params["collections"])
+    assert decoded == [
+        {
+            "type": "set",
+            "id": "newsletter_subscribers",
+            "name": "Newsletter Subscribers",
+            "node_color": "#32CD32",
+            "expr": {
+                "type": "gfql_chain",
+                "gfql": [node_filter.to_json()],
+            },
+        }
+    ]
+    assert g2._url_params["showCollections"] is True
+    assert g2._url_params["collectionsGlobalNodeColor"] == "00FF00"
+    assert g2._url_params["collectionsGlobalEdgeColor"] == "00AA00"
+
+
+def test_collections_accepts_chain_and_preserves_dataset_id():
+    chain = graphistry.Chain([graphistry.n({"type": "user"})])
+    g1 = graphistry.bind(dataset_id="dataset_123")
+
+    g2 = g1.collections(collections={"type": "set", "expr": chain})
+
+    decoded = decode_collections(g2._url_params["collections"])
+    assert decoded == [
+        {
+            "type": "set",
+            "expr": {
+                "type": "gfql_chain",
+                "gfql": [graphistry.n({"type": "user"}).to_json()],
+            },
+        }
+    ]
+    assert g2._dataset_id == "dataset_123"
+
+
+def test_collections_encode_false_keeps_string():
+    raw = json.dumps([{"type": "intersection", "expr": {"type": "intersection", "sets": ["a"]}}], separators=(",", ":"))
+    encoded = quote(raw, safe="")
+    g2 = graphistry.bind().collections(collections=encoded, encode=False)
+    assert g2._url_params["collections"] == encoded
+
+
+def test_collections_accepts_wire_protocol_chain():
+    chain_json = {
+        "type": "Chain",
+        "chain": [
+            {
+                "type": "Node",
+                "filter_dict": {
+                    "type": "user"
+                }
+            }
+        ]
+    }
+    g2 = graphistry.bind().collections(collections={"type": "set", "expr": chain_json})
+    decoded = decode_collections(g2._url_params["collections"])
+    assert decoded == [
+        {
+            "type": "set",
+            "expr": {
+                "type": "gfql_chain",
+                "gfql": chain_json["chain"],
+            },
+        }
+    ]
+
+
+def test_collections_validation_strict_raises():
+    bad_collections = [{"type": "set", "expr": [{"filter_dict": {"a": 1}}]}]
+    with pytest.raises(ValueError):
+        graphistry.bind().collections(collections=bad_collections, validate="strict")
+
+
+def test_plot_url_param_validation_autofix_warns():
+    bad = '[{"type":"set","expr":[{"filter_dict":{"a":1}}]}]'
+    with pytest.warns(RuntimeWarning):
+        normalized = normalize_collections_url_params({"collections": bad}, validate="autofix", warn=True)
+    assert "collections" not in normalized or normalized["collections"].startswith("%5B")
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
new file mode 100644
index 0000000000..310fc3dc75
--- /dev/null
+++ b/graphistry/validate/validate_collections.py
@@ -0,0 +1,386 @@
+import json
+from typing import Any, Dict, List, Optional, Tuple, Union
+from urllib.parse import quote, unquote
+
+from graphistry.client_session import strtobool
+from graphistry.models.types import ValidationMode, ValidationParam
+from graphistry.util import warn as emit_warn
+
+CollectionsInput = Union[str, Dict[str, Any], List[Dict[str, Any]]]
+
+
+def normalize_validation_params(
+    validate: ValidationParam = 'autofix',
+    warn: bool = True
+) -> Tuple[ValidationMode, bool]:
+    if validate is True:
+        validate_mode: ValidationMode = 'strict'
+    elif validate is False:
+        validate_mode = 'autofix'
+        warn = False
+    else:
+        validate_mode = validate
+    return validate_mode, warn
+
+
+def encode_collections(collections: List[Dict[str, Any]], encode: bool = True) -> str:
+    json_str = json.dumps(collections, separators=(',', ':'), ensure_ascii=True)
+    return quote(json_str, safe='') if encode else json_str
+
+
+def _issue(
+    message: str,
+    data: Optional[Dict[str, Any]],
+    validate_mode: ValidationMode,
+    warn: bool
+) -> None:
+    error = ValueError({'message': message, 'data': data} if data else {'message': message})
+    if validate_mode in ('strict', 'strict-fast'):
+        raise error
+    if warn and validate_mode == 'autofix':
+        emit_warn(f"Collections validation warning: {message} ({data})")
+
+
+def _parse_collections_input(
+    collections: CollectionsInput,
+    validate_mode: ValidationMode,
+    warn: bool
+) -> List[Dict[str, Any]]:
+    if isinstance(collections, list):
+        return collections
+    if isinstance(collections, dict):
+        return [collections]
+    if isinstance(collections, str):
+        try:
+            parsed = json.loads(collections)
+        except json.JSONDecodeError:
+            try:
+                parsed = json.loads(unquote(collections))
+            except json.JSONDecodeError as exc:
+                _issue('Collections string must be JSON or URL-encoded JSON', {'error': str(exc)}, validate_mode, warn)
+                return []
+        if isinstance(parsed, list):
+            return parsed
+        if isinstance(parsed, dict):
+            return [parsed]
+        _issue('Collections JSON must be a list or dict', {'type': type(parsed).__name__}, validate_mode, warn)
+        return []
+    _issue('Collections must be a list, dict, or JSON string', {'type': type(collections).__name__}, validate_mode, warn)
+    return []
+
+
+def _coerce_str_field(
+    entry: Dict[str, Any],
+    key: str,
+    validate_mode: ValidationMode,
+    warn: bool,
+    entry_index: int
+) -> None:
+    if key not in entry or entry[key] is None:
+        return
+    if isinstance(entry[key], str):
+        return
+    _issue(
+        f'Collection field "{key}" should be a string',
+        {'index': entry_index, 'value': entry[key], 'type': type(entry[key]).__name__},
+        validate_mode,
+        warn
+    )
+    if validate_mode == 'autofix':
+        entry[key] = str(entry[key])
+
+
+def _normalize_sets_list(
+    sets_value: Any,
+    validate_mode: ValidationMode,
+    warn: bool,
+    entry_index: int
+) -> Optional[List[str]]:
+    if not isinstance(sets_value, list):
+        _issue(
+            'Intersection sets must be a list of strings',
+            {'index': entry_index, 'value': sets_value, 'type': type(sets_value).__name__},
+            validate_mode,
+            warn
+        )
+        return None
+    out: List[str] = []
+    for set_id in sets_value:
+        if isinstance(set_id, str):
+            out.append(set_id)
+            continue
+        _issue(
+            'Intersection set IDs must be strings',
+            {'index': entry_index, 'value': set_id, 'type': type(set_id).__name__},
+            validate_mode,
+            warn
+        )
+        if validate_mode == 'autofix':
+            out.append(str(set_id))
+    return out
+
+
+def _normalize_gfql_ops(
+    gfql_ops: Any,
+    validate_mode: ValidationMode,
+    warn: bool,
+    entry_index: int
+) -> Optional[List[Dict[str, Any]]]:
+    from graphistry.compute.ast import ASTObject, from_json as ast_from_json
+    from graphistry.compute.chain import Chain
+
+    if gfql_ops is None:
+        _issue('GFQL chain is missing', {'index': entry_index}, validate_mode, warn)
+        return None
+    if isinstance(gfql_ops, str):
+        try:
+            gfql_ops = json.loads(gfql_ops)
+        except json.JSONDecodeError as exc:
+            _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn)
+            return None
+    if isinstance(gfql_ops, Chain):
+        ops = gfql_ops.to_json().get('chain', [])
+    elif isinstance(gfql_ops, ASTObject):
+        ops = [gfql_ops.to_json()]
+    elif isinstance(gfql_ops, dict):
+        if 'chain' in gfql_ops:
+            ops = gfql_ops.get('chain', [])
+        else:
+            ops = [gfql_ops]
+    elif isinstance(gfql_ops, list):
+        ops = []
+        for op in gfql_ops:
+            if isinstance(op, ASTObject):
+                ops.append(op.to_json())
+            elif isinstance(op, dict):
+                ops.append(op)
+            else:
+                _issue(
+                    'GFQL operations must be AST objects or dicts',
+                    {'index': entry_index, 'value': op, 'type': type(op).__name__},
+                    validate_mode,
+                    warn
+                )
+                if validate_mode == 'autofix':
+                    continue
+                return None
+    else:
+        _issue(
+            'GFQL operations must be a Chain, AST object, list, or dict',
+            {'index': entry_index, 'type': type(gfql_ops).__name__},
+            validate_mode,
+            warn
+        )
+        return None
+
+    normalized_ops: List[Dict[str, Any]] = []
+    for op in ops:
+        if not isinstance(op, dict):
+            _issue(
+                'GFQL operations must be dictionaries after normalization',
+                {'index': entry_index, 'value': op, 'type': type(op).__name__},
+                validate_mode,
+                warn
+            )
+            if validate_mode == 'autofix':
+                continue
+            return None
+        try:
+            ast_from_json(op, validate=True)
+        except Exception as exc:
+            _issue(
+                'Invalid GFQL operation in collection',
+                {'index': entry_index, 'op': op, 'error': str(exc)},
+                validate_mode,
+                warn
+            )
+            if validate_mode == 'autofix':
+                continue
+            return None
+        normalized_ops.append(op)
+
+    if len(normalized_ops) == 0:
+        _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn)
+        return None
+    return normalized_ops
+
+
+def _normalize_gfql_expr(
+    expr: Any,
+    validate_mode: ValidationMode,
+    warn: bool,
+    entry_index: int
+) -> Optional[Dict[str, Any]]:
+    if isinstance(expr, dict):
+        if expr.get('type') == 'intersection':
+            _issue('Set collection expr cannot be intersection', {'index': entry_index}, validate_mode, warn)
+            return None
+        if 'gfql' in expr or expr.get('type') == 'gfql_chain':
+            ops = _normalize_gfql_ops(expr.get('gfql'), validate_mode, warn, entry_index)
+            if ops is None:
+                return None
+            return {'type': 'gfql_chain', 'gfql': ops}
+        if 'chain' in expr:
+            ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index)
+            if ops is None:
+                return None
+            return {'type': 'gfql_chain', 'gfql': ops}
+        if 'type' in expr:
+            ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index)
+            if ops is None:
+                return None
+            return {'type': 'gfql_chain', 'gfql': ops}
+    ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index)
+    if ops is None:
+        return None
+    return {'type': 'gfql_chain', 'gfql': ops}
+
+
+def _normalize_intersection_expr(
+    expr: Any,
+    validate_mode: ValidationMode,
+    warn: bool,
+    entry_index: int
+) -> Optional[Dict[str, Any]]:
+    if not isinstance(expr, dict):
+        _issue('Intersection expr must be a dict', {'index': entry_index}, validate_mode, warn)
+        return None
+    expr_type = expr.get('type', 'intersection')
+    if expr_type != 'intersection':
+        _issue(
+            'Intersection expr type must be "intersection"',
+            {'index': entry_index, 'value': expr_type},
+            validate_mode,
+            warn
+        )
+        return None
+    sets_value = expr.get('sets', expr.get('intersection'))
+    if sets_value is None:
+        _issue('Intersection expr missing "sets"', {'index': entry_index}, validate_mode, warn)
+        return None
+    sets_list = _normalize_sets_list(sets_value, validate_mode, warn, entry_index)
+    if sets_list is None:
+        return None
+    return {'type': 'intersection', 'sets': sets_list}
+
+
+def normalize_collections(
+    collections: CollectionsInput,
+    validate: ValidationParam = 'autofix',
+    warn: bool = True
+) -> List[Dict[str, Any]]:
+    validate_mode, warn = normalize_validation_params(validate, warn)
+    items = _parse_collections_input(collections, validate_mode, warn)
+
+    normalized: List[Dict[str, Any]] = []
+    for idx, entry in enumerate(items):
+        if not isinstance(entry, dict):
+            _issue(
+                'Collection entries must be dictionaries',
+                {'index': idx, 'type': type(entry).__name__},
+                validate_mode,
+                warn
+            )
+            if validate_mode == 'autofix':
+                continue
+            return []
+
+        normalized_entry = dict(entry)
+        collection_type = normalized_entry.get('type', 'set')
+        if not isinstance(collection_type, str):
+            _issue(
+                'Collection type must be a string',
+                {'index': idx, 'value': collection_type, 'type': type(collection_type).__name__},
+                validate_mode,
+                warn
+            )
+            if validate_mode == 'autofix':
+                collection_type = str(collection_type)
+            else:
+                continue
+        collection_type = collection_type.lower()
+        normalized_entry['type'] = collection_type
+
+        if collection_type not in ('set', 'intersection'):
+            _issue(
+                'Collection type must be "set" or "intersection"',
+                {'index': idx, 'value': collection_type},
+                validate_mode,
+                warn
+            )
+            if validate_mode == 'autofix':
+                continue
+            return []
+
+        for field in ('id', 'name', 'description', 'node_color', 'edge_color'):
+            _coerce_str_field(normalized_entry, field, validate_mode, warn, idx)
+
+        expr = normalized_entry.get('expr')
+        if collection_type == 'intersection':
+            normalized_expr = _normalize_intersection_expr(expr, validate_mode, warn, idx)
+        else:
+            normalized_expr = _normalize_gfql_expr(expr, validate_mode, warn, idx)
+        if normalized_expr is None:
+            if validate_mode == 'autofix':
+                continue
+            return []
+        normalized_entry['expr'] = normalized_expr
+        normalized.append(normalized_entry)
+
+    return normalized
+
+
+def normalize_collections_url_params(
+    url_params: Dict[str, Any],
+    validate: ValidationParam = 'autofix',
+    warn: bool = True
+) -> Dict[str, Any]:
+    validate_mode, warn = normalize_validation_params(validate, warn)
+    updated = dict(url_params)
+
+    if 'collections' in updated:
+        normalized = normalize_collections(updated['collections'], validate_mode, warn)
+        if len(normalized) > 0:
+            updated['collections'] = encode_collections(normalized, encode=True)
+        else:
+            if validate_mode in ('strict', 'strict-fast'):
+                return updated
+            updated.pop('collections', None)
+
+    if 'showCollections' in updated:
+        value = updated['showCollections']
+        if isinstance(value, bool):
+            pass
+        else:
+            try:
+                updated['showCollections'] = strtobool(value)
+            except Exception as exc:
+                _issue(
+                    'showCollections must be a boolean',
+                    {'value': value, 'error': str(exc)},
+                    validate_mode,
+                    warn
+                )
+                if validate_mode == 'autofix':
+                    updated.pop('showCollections', None)
+
+    for color_key in ('collectionsGlobalNodeColor', 'collectionsGlobalEdgeColor'):
+        if color_key in updated:
+            value = updated[color_key]
+            if value is None:
+                updated.pop(color_key, None)
+                continue
+            if not isinstance(value, str):
+                _issue(
+                    f'{color_key} must be a string',
+                    {'value': value, 'type': type(value).__name__},
+                    validate_mode,
+                    warn
+                )
+                if validate_mode == 'autofix':
+                    value = str(value)
+            if isinstance(value, str) and value.startswith('#'):
+                value = value[1:]
+            updated[color_key] = value
+
+    return updated

From 673eed8d8ebc77def71cdb23ac20e63817c85348 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Mon, 29 Dec 2025 06:57:35 -0800
Subject: [PATCH 03/30] fix: satisfy mypy in collections validation

---
 graphistry/validate/validate_collections.py | 26 +++++++++++++++------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 310fc3dc75..0369477146 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -138,22 +138,23 @@ def _normalize_gfql_ops(
         except json.JSONDecodeError as exc:
             _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn)
             return None
+    ops_raw: Any
     if isinstance(gfql_ops, Chain):
-        ops = gfql_ops.to_json().get('chain', [])
+        ops_raw = gfql_ops.to_json().get('chain', [])
     elif isinstance(gfql_ops, ASTObject):
-        ops = [gfql_ops.to_json()]
+        ops_raw = [gfql_ops.to_json()]
     elif isinstance(gfql_ops, dict):
         if 'chain' in gfql_ops:
-            ops = gfql_ops.get('chain', [])
+            ops_raw = gfql_ops.get('chain', [])
         else:
-            ops = [gfql_ops]
+            ops_raw = [gfql_ops]
     elif isinstance(gfql_ops, list):
-        ops = []
+        ops_raw = []
         for op in gfql_ops:
             if isinstance(op, ASTObject):
-                ops.append(op.to_json())
+                ops_raw.append(op.to_json())
             elif isinstance(op, dict):
-                ops.append(op)
+                ops_raw.append(op)
             else:
                 _issue(
                     'GFQL operations must be AST objects or dicts',
@@ -173,6 +174,17 @@ def _normalize_gfql_ops(
         )
         return None
 
+    if not isinstance(ops_raw, list):
+        _issue(
+            'GFQL operations must be a list',
+            {'index': entry_index, 'value': ops_raw, 'type': type(ops_raw).__name__},
+            validate_mode,
+            warn
+        )
+        return None
+
+    ops: List[Any] = ops_raw
+
     normalized_ops: List[Dict[str, Any]] = []
     for op in ops:
         if not isinstance(op, dict):

From e1d05263470d3ee73d476aab0e0e1493669980ae Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Tue, 30 Dec 2025 13:49:56 -0800
Subject: [PATCH 04/30] feat: add collections helper constructors

---
 graphistry/__init__.py               |  7 +++
 graphistry/collections_helpers.py    | 82 ++++++++++++++++++++++++++++
 graphistry/tests/test_collections.py | 24 ++++++++
 3 files changed, 113 insertions(+)
 create mode 100644 graphistry/collections_helpers.py

diff --git a/graphistry/__init__.py b/graphistry/__init__.py
index a3cc2a64f8..4058939b5f 100644
--- a/graphistry/__init__.py
+++ b/graphistry/__init__.py
@@ -62,6 +62,13 @@
     from_cugraph
 )
 
+from graphistry.collections_helpers import (
+    collection_set,
+    collection_intersection,
+    CollectionSet,
+    CollectionIntersection,
+)
+
 from graphistry.compute import (
     n, e, e_forward, e_reverse, e_undirected,
     let, ref,
diff --git a/graphistry/collections_helpers.py b/graphistry/collections_helpers.py
new file mode 100644
index 0000000000..10576e0c0d
--- /dev/null
+++ b/graphistry/collections_helpers.py
@@ -0,0 +1,82 @@
+from typing import Any, Dict, List, Optional, Sequence, TypedDict, Union
+
+
+class CollectionSet(TypedDict, total=False):
+    """Collection definition for a GFQL-defined set."""
+    type: str
+    id: str
+    name: str
+    description: str
+    node_color: str
+    edge_color: str
+    expr: Any
+
+
+class CollectionIntersection(TypedDict, total=False):
+    """Collection definition for an intersection of sets."""
+    type: str
+    id: str
+    name: str
+    description: str
+    node_color: str
+    edge_color: str
+    expr: Dict[str, Any]
+
+
+Collection = Union[CollectionSet, CollectionIntersection]
+CollectionsInput = Union[str, Collection, List[Collection], Dict[str, Any], List[Dict[str, Any]]]
+
+
+def _apply_collection_metadata(
+    collection: Dict[str, Any],
+    id: Optional[str],
+    name: Optional[str],
+    description: Optional[str],
+    node_color: Optional[str],
+    edge_color: Optional[str],
+) -> Dict[str, Any]:
+    if id is not None:
+        collection["id"] = id
+    if name is not None:
+        collection["name"] = name
+    if description is not None:
+        collection["description"] = description
+    if node_color is not None:
+        collection["node_color"] = node_color
+    if edge_color is not None:
+        collection["edge_color"] = edge_color
+    return collection
+
+
+def collection_set(
+    *,
+    expr: Any,
+    id: Optional[str] = None,
+    name: Optional[str] = None,
+    description: Optional[str] = None,
+    node_color: Optional[str] = None,
+    edge_color: Optional[str] = None,
+) -> CollectionSet:
+    """Build a collection dict for a GFQL-defined set."""
+    collection: Dict[str, Any] = {"type": "set", "expr": expr}
+    return _apply_collection_metadata(collection, id, name, description, node_color, edge_color)  # type: ignore[return-value]
+
+
+def collection_intersection(
+    *,
+    sets: Sequence[str],
+    id: Optional[str] = None,
+    name: Optional[str] = None,
+    description: Optional[str] = None,
+    node_color: Optional[str] = None,
+    edge_color: Optional[str] = None,
+) -> CollectionIntersection:
+    """Build a collection dict for an intersection of set IDs."""
+    collection: Dict[str, Any] = {
+        "type": "intersection",
+        "expr": {
+            "type": "intersection",
+            "sets": list(sets),
+        },
+    }
+    return _apply_collection_metadata(collection, id, name, description, node_color, edge_color)  # type: ignore[return-value]
diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index 019337af12..ad05dbfbb8 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -5,6 +5,7 @@
 from urllib.parse import quote, unquote
 
 import graphistry
+from graphistry.collections_helpers import collection_intersection, collection_set
 from graphistry.validate.validate_collections import normalize_collections_url_params
 
 
@@ -53,6 +54,29 @@ def test_collections_encodes_and_normalizes():
     assert g2._url_params["collectionsGlobalEdgeColor"] == "00AA00"
 
 
+def test_collection_helpers_build_sets_and_intersections():
+    collections = [
+        collection_set(
+            expr=[graphistry.n({"vip": True})],
+            id="vip",
+            name="VIP",
+            node_color="#FFAA00",
+        ),
+        collection_intersection(
+            sets=["vip"],
+            name="VIP Intersection",
+            node_color="#00BFFF",
+        ),
+    ]
+
+    g2 = graphistry.bind().collections(collections=collections)
+    decoded = decode_collections(g2._url_params["collections"])
+    assert decoded[0]["type"] == "set"
+    assert decoded[0]["expr"]["type"] == "gfql_chain"
+    assert decoded[1]["type"] == "intersection"
+    assert decoded[1]["expr"] == {"type": "intersection", "sets": ["vip"]}
+
+
 def test_collections_accepts_chain_and_preserves_dataset_id():
     chain = graphistry.Chain([graphistry.n({"type": "user"})])
     g1 = graphistry.bind(dataset_id="dataset_123")

From 858ff91ff7d972d62a1d4ad6c0c004c137e3b811 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Tue, 30 Dec 2025 14:12:28 -0800
Subject: [PATCH 05/30] feat: wrap collection set expr to gfql chain

---
 graphistry/collections_helpers.py    | 41 +++++++++++++++++++++++++++-
 graphistry/tests/test_collections.py |  6 ++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/graphistry/collections_helpers.py b/graphistry/collections_helpers.py
index 10576e0c0d..2ac2e11054 100644
--- a/graphistry/collections_helpers.py
+++ b/graphistry/collections_helpers.py
@@ -48,6 +48,45 @@ def _apply_collection_metadata(
     return collection
 
 
+def _wrap_gfql_expr(expr: Any) -> Any:
+    if expr is None:
+        return expr
+
+    from graphistry.compute.ast import ASTObject
+    from graphistry.compute.chain import Chain
+
+    if isinstance(expr, dict):
+        expr_type = expr.get("type")
+        if expr_type == "intersection":
+            return expr
+        if expr_type == "gfql_chain" and "gfql" in expr:
+            return expr
+        if expr_type == "Chain" and "chain" in expr:
+            return {"type": "gfql_chain", "gfql": expr.get("chain", [])}
+        if "gfql" in expr:
+            return {"type": "gfql_chain", "gfql": expr.get("gfql")}
+        if "chain" in expr:
+            return {"type": "gfql_chain", "gfql": expr.get("chain")}
+        return {"type": "gfql_chain", "gfql": [expr]}
+
+    if isinstance(expr, Chain):
+        return {"type": "gfql_chain", "gfql": expr.to_json().get("chain", [])}
+
+    if isinstance(expr, ASTObject):
+        return {"type": "gfql_chain", "gfql": [expr.to_json()]}
+
+    if isinstance(expr, list):
+        ops: List[Any] = []
+        for op in expr:
+            if isinstance(op, ASTObject):
+                ops.append(op.to_json())
+            else:
+                ops.append(op)
+        return {"type": "gfql_chain", "gfql": ops}
+
+    return expr
+
+
 def collection_set(
     *,
     expr: Any,
@@ -58,7 +97,7 @@ def collection_set(
     edge_color: Optional[str] = None,
 ) -> CollectionSet:
     """Build a collection dict for a GFQL-defined set."""
-    collection: Dict[str, Any] = {"type": "set", "expr": expr}
+    collection: Dict[str, Any] = {"type": "set", "expr": _wrap_gfql_expr(expr)}
     return _apply_collection_metadata(collection, id, name, description, node_color, edge_color)  # type: ignore[return-value]
 
 
diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index ad05dbfbb8..dde8671e84 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -77,6 +77,12 @@ def test_collection_helpers_build_sets_and_intersections():
     assert decoded[1]["expr"] == {"type": "intersection", "sets": ["vip"]}
 
 
+def test_collection_set_wraps_ast_expr():
+    collection = collection_set(expr=graphistry.n({"vip": True}), id="vip")
+    assert collection["expr"]["type"] == "gfql_chain"
+    assert collection["expr"]["gfql"][0]["type"] == "Node"
+
+
 def test_collections_accepts_chain_and_preserves_dataset_id():
     chain = graphistry.Chain([graphistry.n({"type": "user"})])
     g1 = graphistry.bind(dataset_id="dataset_123")

From 6b5370c3c9ffe65fe4dcd5f3262521a35c012d2a Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Fri, 2 Jan 2026 01:48:21 -0800
Subject: [PATCH 06/30] Refine collections types and helpers

---
 graphistry/Plottable.py                     |   3 +-
 graphistry/PlotterBase.py                   |   3 +-
 graphistry/__init__.py                      |   2 +-
 graphistry/collections.py                   |  99 ++++++++++++++++
 graphistry/collections_helpers.py           | 121 --------------------
 graphistry/models/collections.py            |  61 ++++++++++
 graphistry/pygraphistry.py                  |   3 +-
 graphistry/tests/test_collections.py        |  85 +++++++-------
 graphistry/validate/validate_collections.py |  55 ++++++++-
 9 files changed, 262 insertions(+), 170 deletions(-)
 create mode 100644 graphistry/collections.py
 delete mode 100644 graphistry/collections_helpers.py
 create mode 100644 graphistry/models/collections.py

diff --git a/graphistry/Plottable.py b/graphistry/Plottable.py
index bc83b2e68c..271392f56f 100644
--- a/graphistry/Plottable.py
+++ b/graphistry/Plottable.py
@@ -17,6 +17,7 @@
 from graphistry.Engine import EngineAbstractType
 from graphistry.utils.json import JSONVal
 from graphistry.client_session import ClientSession, AuthManagerProtocol
+from graphistry.models.collections import CollectionsInput
 from graphistry.models.types import ValidationParam
 
 if TYPE_CHECKING:
@@ -785,7 +786,7 @@ def settings(self,
 
     def collections(
         self,
-        collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None,
+        collections: Optional[CollectionsInput] = None,
         show_collections: Optional[bool] = None,
         collections_global_node_color: Optional[str] = None,
         collections_global_edge_color: Optional[str] = None,
diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py
index 7520580eed..2497a47437 100644
--- a/graphistry/PlotterBase.py
+++ b/graphistry/PlotterBase.py
@@ -2,6 +2,7 @@
 from typing import Any, Callable, Dict, List, Optional, Union, Tuple, cast, overload, TYPE_CHECKING
 from typing_extensions import Literal
 from graphistry.io.types import ComplexEncodingsDict
+from graphistry.models.collections import CollectionsInput
 from graphistry.models.types import ValidationMode, ValidationParam
 from graphistry.plugins_types.hypergraph import HypergraphResult
 from graphistry.render.resolve_render_mode import resolve_render_mode
@@ -1848,7 +1849,7 @@ def settings(self, height=None, url_params={}, render=None):
 
     def collections(
         self,
-        collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None,
+        collections: Optional[CollectionsInput] = None,
         show_collections: Optional[bool] = None,
         collections_global_node_color: Optional[str] = None,
         collections_global_edge_color: Optional[str] = None,
diff --git a/graphistry/__init__.py b/graphistry/__init__.py
index 4058939b5f..ca4ceda33c 100644
--- a/graphistry/__init__.py
+++ b/graphistry/__init__.py
@@ -62,7 +62,7 @@
     from_cugraph
 )
 
-from graphistry.collections_helpers import (
+from graphistry.collections import (
     collection_set,
     collection_intersection,
     CollectionSet,
diff --git a/graphistry/collections.py b/graphistry/collections.py
new file mode 100644
index 0000000000..a11198c0bf
--- /dev/null
+++ b/graphistry/collections.py
@@ -0,0 +1,99 @@
+from typing import List, Optional, Sequence, TypeVar, cast
+
+from graphistry.models.collections import (
+    CollectionIntersection,
+    CollectionSet,
+    GFQLChainInput,
+    GFQLChainWire,
+    GFQLWireOp,
+)
+
+CollectionDict = TypeVar("CollectionDict", CollectionSet, CollectionIntersection)
+
+
+def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[str]) -> CollectionDict:
+    collection.update({key: value for key, value in metadata.items() if value is not None})
+    return collection
+
+
+def _wrap_gfql_expr(expr: GFQLChainInput) -> GFQLChainWire:
+
+    from graphistry.compute.ast import ASTObject
+    from graphistry.compute.chain import Chain
+
+    if isinstance(expr, dict):
+        expr_type = expr.get("type")
+        if expr_type == "gfql_chain" and "gfql" in expr:
+            return expr
+        if expr_type == "Chain" and "chain" in expr:
+            return {"type": "gfql_chain", "gfql": expr.get("chain", [])}
+        if "gfql" in expr:
+            return {"type": "gfql_chain", "gfql": expr.get("gfql")}
+        if "chain" in expr:
+            return {"type": "gfql_chain", "gfql": expr.get("chain")}
+        return {"type": "gfql_chain", "gfql": [expr]}
+
+    if isinstance(expr, Chain):
+        return {"type": "gfql_chain", "gfql": expr.to_json().get("chain", [])}
+
+    if isinstance(expr, ASTObject):
+        return {"type": "gfql_chain", "gfql": [expr.to_json()]}
+
+    if isinstance(expr, list):
+        ops: List[GFQLWireOp] = []
+        for op in expr:
+            if isinstance(op, ASTObject):
+                ops.append(op.to_json())
+            else:
+                ops.append(cast(GFQLWireOp, op))
+        return {"type": "gfql_chain", "gfql": ops}
+
+    return {"type": "gfql_chain", "gfql": [cast(GFQLWireOp, expr)]}
+
+
+def collection_set(
+    *,
+    expr: GFQLChainInput,
+    id: Optional[str] = None,
+    name: Optional[str] = None,
+    description: Optional[str] = None,
+    node_color: Optional[str] = None,
+    edge_color: Optional[str] = None,
+) -> CollectionSet:
+    """Build a collection dict for a GFQL-defined set."""
+    collection: CollectionSet = {"type": "set", "expr": _wrap_gfql_expr(expr)}
+    return _apply_collection_metadata(
+        collection,
+        id=id,
+        name=name,
+        description=description,
+        node_color=node_color,
+        edge_color=edge_color,
+    )
+
+
+def collection_intersection(
+    *,
+    sets: Sequence[str],
+    id: Optional[str] = None,
+    name: Optional[str] = None,
+    description: Optional[str] = None,
+    node_color: Optional[str] = None,
+    edge_color: Optional[str] = None,
+) -> CollectionIntersection:
+    """Build a collection dict for an intersection of set IDs."""
+    collection: CollectionIntersection = {
+        "type": "intersection",
+        "expr": {
+            "type": "intersection",
+            "sets": list(sets),
+        },
+    }
+    return _apply_collection_metadata(
+        collection,
+        id=id,
+        name=name,
+        description=description,
+        node_color=node_color,
+        edge_color=edge_color,
+    )
diff --git a/graphistry/collections_helpers.py b/graphistry/collections_helpers.py
deleted file mode 100644
index 2ac2e11054..0000000000
--- a/graphistry/collections_helpers.py
+++ /dev/null
@@ -1,121 +0,0 @@
-from typing import Any, Dict, List, Optional, Sequence, TypedDict, Union
-
-
-class CollectionSet(TypedDict, total=False):
-    """Collection definition for a GFQL-defined set."""
-    type: str
-    id: str
-    name: str
-    description: str
-    node_color: str
-    edge_color: str
-    expr: Any
-
-
-class CollectionIntersection(TypedDict, total=False):
-    """Collection definition for an intersection of sets."""
-    type: str
-    id: str
-    name: str
-    description: str
-    node_color: str
-    edge_color: str
-    expr: Dict[str, Any]
-
-
-Collection = Union[CollectionSet, CollectionIntersection]
-CollectionsInput = Union[str, Collection, List[Collection], Dict[str, Any], List[Dict[str, Any]]]
-
-
-def _apply_collection_metadata(
-    collection: Dict[str, Any],
-    id: Optional[str],
-    name: Optional[str],
-    description: Optional[str],
-    node_color: Optional[str],
-    edge_color: Optional[str],
-) -> Dict[str, Any]:
-    if id is not None:
-        collection["id"] = id
-    if name is not None:
-        collection["name"] = name
-    if description is not None:
-        collection["description"] = description
-    if node_color is not None:
-        collection["node_color"] = node_color
-    if edge_color is not None:
-        collection["edge_color"] = edge_color
-    return collection
-
-
-def _wrap_gfql_expr(expr: Any) -> Any:
-    if expr is None:
-        return expr
-
-    from graphistry.compute.ast import ASTObject
-    from graphistry.compute.chain import Chain
-
-    if isinstance(expr, dict):
-        expr_type = expr.get("type")
-        if expr_type == "intersection":
-            return expr
-        if expr_type == "gfql_chain" and "gfql" in expr:
-            return expr
-        if expr_type == "Chain" and "chain" in expr:
-            return {"type": "gfql_chain", "gfql": expr.get("chain", [])}
-        if "gfql" in expr:
-            return {"type": "gfql_chain", "gfql": expr.get("gfql")}
-        if "chain" in expr:
-            return {"type": "gfql_chain", "gfql": expr.get("chain")}
-        return {"type": "gfql_chain", "gfql": [expr]}
-
-    if isinstance(expr, Chain):
-        return {"type": "gfql_chain", "gfql": expr.to_json().get("chain", [])}
-
-    if isinstance(expr, ASTObject):
-        return {"type": "gfql_chain", "gfql": [expr.to_json()]}
-
-    if isinstance(expr, list):
-        ops: List[Any] = []
-        for op in expr:
-            if isinstance(op, ASTObject):
-                ops.append(op.to_json())
-            else:
-                ops.append(op)
-        return {"type": "gfql_chain", "gfql": ops}
-
-    return expr
-
-
-def collection_set(
-    *,
-    expr: Any,
-    id: Optional[str] = None,
-    name: Optional[str] = None,
-    description: Optional[str] = None,
-    node_color: Optional[str] = None,
-    edge_color: Optional[str] = None,
-) -> CollectionSet:
-    """Build a collection dict for a GFQL-defined set."""
-    collection: Dict[str, Any] = {"type": "set", "expr": _wrap_gfql_expr(expr)}
-    return _apply_collection_metadata(collection, id, name, description, node_color, edge_color)  # type: ignore[return-value]
-
-
-def collection_intersection(
-    *,
-    sets: Sequence[str],
-    id: Optional[str] = None,
-    name: Optional[str] = None,
-    description: Optional[str] = None,
-    node_color: Optional[str] = None,
-    edge_color: Optional[str] = None,
-) -> CollectionIntersection:
-    """Build a collection dict for an intersection of set IDs."""
-    collection: Dict[str, Any] = {
-        "type": "intersection",
-        "expr": {
-            "type": "intersection",
-            "sets": list(sets),
-        },
-    }
-    return _apply_collection_metadata(collection, id, name, description, node_color, edge_color)  # type: ignore[return-value]
diff --git a/graphistry/models/collections.py b/graphistry/models/collections.py
new file mode 100644
index 0000000000..af29a7f575
--- /dev/null
+++ b/graphistry/models/collections.py
@@ -0,0 +1,61 @@
+from __future__ import annotations
+
+from typing import Dict, List, TYPE_CHECKING, Union
+from typing_extensions import Literal, NotRequired, Required, TypedDict
+
+from graphistry.utils.json import JSONVal
+
+if TYPE_CHECKING:
+    from graphistry.compute.ast import ASTObject
+    from graphistry.compute.chain import Chain
+
+
+GFQLWireOp = Dict[str, JSONVal]
+
+
+class GFQLChainWire(TypedDict):
+    type: Literal["gfql_chain"]
+    gfql: List[GFQLWireOp]
+
+
+class GFQLChainJSON(TypedDict):
+    type: Literal["Chain"]
+    chain: List[GFQLWireOp]
+
+
+GFQLOpInput = Union["ASTObject", GFQLWireOp]
+GFQLChainInput = Union[
+    "Chain",
+    "ASTObject",
+    GFQLChainWire,
+    GFQLChainJSON,
+    List[GFQLOpInput],
+    GFQLWireOp,
+]
+
+
+class IntersectionExpr(TypedDict):
+    type: Literal["intersection"]
+    sets: List[str]
+
+
+class CollectionBase(TypedDict, total=False):
+    id: str
+    name: str
+    description: str
+    node_color: str
+    edge_color: str
+
+
+class CollectionSet(CollectionBase):
+    type: NotRequired[Literal["set"]]
+    expr: Required[GFQLChainInput]
+
+
+class CollectionIntersection(CollectionBase):
+    type: NotRequired[Literal["intersection"]]
+    expr: Required[IntersectionExpr]
+
+
+Collection = Union[CollectionSet, CollectionIntersection]
+CollectionsInput = Union[str, Collection, List[Collection]]
diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py
index 2e339ff188..171cc5082c 100644
--- a/graphistry/pygraphistry.py
+++ b/graphistry/pygraphistry.py
@@ -5,6 +5,7 @@
 from graphistry.plugins_types.hypergraph import HypergraphResult
 from graphistry.client_session import ClientSession, ApiVersion, ENV_GRAPHISTRY_API_KEY, DatasetInfo, AuthManagerProtocol, strtobool
 from graphistry.Engine import EngineAbstractType
+from graphistry.models.collections import CollectionsInput
 from graphistry.models.types import ValidationParam
 
 """Top-level import of class PyGraphistry as "Graphistry". Used to connect to the Graphistry server and then create a base plotter."""
@@ -2277,7 +2278,7 @@ def settings(self, height=None, url_params={}, render=None):
 
     def collections(
         self,
-        collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None,
+        collections: Optional[CollectionsInput] = None,
         show_collections: Optional[bool] = None,
         collections_global_node_color: Optional[str] = None,
         collections_global_edge_color: Optional[str] = None,
diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index dde8671e84..c57da0d989 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -5,7 +5,7 @@
 from urllib.parse import quote, unquote
 
 import graphistry
-from graphistry.collections_helpers import collection_intersection, collection_set
+from graphistry.collections import collection_intersection, collection_set
 from graphistry.validate.validate_collections import normalize_collections_url_params
 
 
@@ -13,8 +13,11 @@ def decode_collections(encoded: str):
     return json.loads(unquote(encoded))
 
 
+def collections_url_params(collections, **kwargs):
+    return graphistry.bind().collections(collections=collections, **kwargs)._url_params
+
+
 def test_collections_encodes_and_normalizes():
-    g = graphistry.bind()
     node_filter = graphistry.n({"subscribed_to_newsletter": True})
     collections = [
         {
@@ -29,14 +32,14 @@ def test_collections_encodes_and_normalizes():
         }
     ]
 
-    g2 = g.collections(
-        collections=collections,
+    url_params = collections_url_params(
+        collections,
         show_collections=True,
         collections_global_node_color="#00FF00",
         collections_global_edge_color="#00AA00",
     )
 
-    decoded = decode_collections(g2._url_params["collections"])
+    decoded = decode_collections(url_params["collections"])
     assert decoded == [
         {
             "type": "set",
@@ -49,53 +52,40 @@ def test_collections_encodes_and_normalizes():
             },
         }
     ]
-    assert g2._url_params["showCollections"] is True
-    assert g2._url_params["collectionsGlobalNodeColor"] == "00FF00"
-    assert g2._url_params["collectionsGlobalEdgeColor"] == "00AA00"
+    assert url_params["showCollections"] is True
+    assert url_params["collectionsGlobalNodeColor"] == "00FF00"
+    assert url_params["collectionsGlobalEdgeColor"] == "00AA00"
+
+
+@pytest.mark.parametrize("expr", [graphistry.n({"vip": True}), [graphistry.n({"vip": True})]])
+def test_collection_set_wraps_ast_expr(expr):
+    collection = collection_set(expr=expr, id="vip")
+    assert collection["expr"]["type"] == "gfql_chain"
+    assert collection["expr"]["gfql"][0]["type"] == "Node"
 
 
 def test_collection_helpers_build_sets_and_intersections():
     collections = [
-        collection_set(
-            expr=[graphistry.n({"vip": True})],
-            id="vip",
-            name="VIP",
-            node_color="#FFAA00",
-        ),
-        collection_intersection(
-            sets=["vip"],
-            name="VIP Intersection",
-            node_color="#00BFFF",
-        ),
+        collection_set(expr=[graphistry.n({"vip": True})], id="vip", name="VIP", node_color="#FFAA00"),
+        collection_intersection(sets=["vip"], name="VIP Intersection", node_color="#00BFFF"),
     ]
-
-    g2 = graphistry.bind().collections(collections=collections)
-    decoded = decode_collections(g2._url_params["collections"])
+    decoded = decode_collections(collections_url_params(collections)["collections"])
     assert decoded[0]["type"] == "set"
     assert decoded[0]["expr"]["type"] == "gfql_chain"
-    assert decoded[1]["type"] == "intersection"
     assert decoded[1]["expr"] == {"type": "intersection", "sets": ["vip"]}
 
 
-def test_collection_set_wraps_ast_expr():
-    collection = collection_set(expr=graphistry.n({"vip": True}), id="vip")
-    assert collection["expr"]["type"] == "gfql_chain"
-    assert collection["expr"]["gfql"][0]["type"] == "Node"
-
-
 def test_collections_accepts_chain_and_preserves_dataset_id():
-    chain = graphistry.Chain([graphistry.n({"type": "user"})])
-    g1 = graphistry.bind(dataset_id="dataset_123")
-
-    g2 = g1.collections(collections={"type": "set", "expr": chain})
-
+    node = graphistry.n({"type": "user"})
+    chain = graphistry.Chain([node])
+    g2 = graphistry.bind(dataset_id="dataset_123").collections(collections={"type": "set", "expr": chain})
     decoded = decode_collections(g2._url_params["collections"])
     assert decoded == [
         {
             "type": "set",
             "expr": {
                 "type": "gfql_chain",
-                "gfql": [graphistry.n({"type": "user"}).to_json()],
+                "gfql": [node.to_json()],
             },
         }
     ]
@@ -103,10 +93,10 @@ def test_collections_accepts_chain_and_preserves_dataset_id():
 
 
 def test_collections_encode_false_keeps_string():
-    raw = json.dumps([{"type": "intersection", "expr": {"type": "intersection", "sets": ["a"]}}], separators=(",", ":"))
+    raw = '[{"type":"intersection","expr":{"type":"intersection","sets":["a"]}}]'
     encoded = quote(raw, safe="")
-    g2 = graphistry.bind().collections(collections=encoded, encode=False)
-    assert g2._url_params["collections"] == encoded
+    url_params = collections_url_params(encoded, encode=False)
+    assert url_params["collections"] == encoded
 
 
 def test_collections_accepts_wire_protocol_chain():
@@ -121,8 +111,9 @@ def test_collections_accepts_wire_protocol_chain():
             }
         ]
     }
-    g2 = graphistry.bind().collections(collections={"type": "set", "expr": chain_json})
-    decoded = decode_collections(g2._url_params["collections"])
+    decoded = decode_collections(
+        collections_url_params({"type": "set", "expr": chain_json})["collections"]
+    )
     assert decoded == [
         {
             "type": "set",
@@ -134,6 +125,20 @@ def test_collections_accepts_wire_protocol_chain():
     ]
 
 
+def test_collections_drop_unexpected_fields_autofix():
+    collections = [
+        {
+            "type": "set",
+            "expr": [graphistry.n({"vip": True})],
+            "unexpected": "drop-me",
+        }
+    ]
+    decoded = decode_collections(
+        collections_url_params(collections, validate="autofix", warn=False)["collections"]
+    )
+    assert "unexpected" not in decoded[0]
+
+
 def test_collections_validation_strict_raises():
     bad_collections = [{"type": "set", "expr": [{"filter_dict": {"a": 1}}]}]
     with pytest.raises(ValueError):
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 0369477146..bc0af89c28 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -3,10 +3,18 @@
 from urllib.parse import quote, unquote
 
 from graphistry.client_session import strtobool
+from graphistry.models.collections import CollectionsInput
 from graphistry.models.types import ValidationMode, ValidationParam
 from graphistry.util import warn as emit_warn
-
-CollectionsInput = Union[str, Dict[str, Any], List[Dict[str, Any]]]
+_ALLOWED_COLLECTION_FIELDS = {
+    'type',
+    'id',
+    'name',
+    'description',
+    'node_color',
+    'edge_color',
+    'expr',
+}
 
 
 def normalize_validation_params(
@@ -41,15 +49,43 @@ def _issue(
         emit_warn(f"Collections validation warning: {message} ({data})")
 
 
+def _reparse_collections_payload(
+    collections: Union[Dict[str, Any], List[Dict[str, Any]]],
+    validate_mode: ValidationMode,
+    warn: bool
+) -> List[Dict[str, Any]]:
+    from graphistry.compute.ast import ASTObject
+    from graphistry.compute.chain import Chain
+
+    def _default(obj: Any) -> Any:
+        if isinstance(obj, Chain):
+            return obj.to_json()
+        if isinstance(obj, ASTObject):
+            return obj.to_json()
+        raise TypeError(f"Object of type {type(obj).__name__} is not JSON-serializable")
+
+    try:
+        parsed = json.loads(json.dumps(collections, default=_default, ensure_ascii=True))
+    except (TypeError, ValueError) as exc:
+        _issue('Collections must be JSON-serializable', {'error': str(exc)}, validate_mode, warn)
+        return []
+    if isinstance(parsed, list):
+        return parsed
+    if isinstance(parsed, dict):
+        return [parsed]
+    _issue('Collections JSON must be a list or dict', {'type': type(parsed).__name__}, validate_mode, warn)
+    return []
+
+
 def _parse_collections_input(
     collections: CollectionsInput,
     validate_mode: ValidationMode,
     warn: bool
 ) -> List[Dict[str, Any]]:
     if isinstance(collections, list):
-        return collections
+        return _reparse_collections_payload(collections, validate_mode, warn)
     if isinstance(collections, dict):
-        return [collections]
+        return _reparse_collections_payload(collections, validate_mode, warn)
     if isinstance(collections, str):
         try:
             parsed = json.loads(collections)
@@ -297,7 +333,16 @@ def normalize_collections(
                 continue
             return []
 
-        normalized_entry = dict(entry)
+        unexpected_fields = [key for key in entry.keys() if key not in _ALLOWED_COLLECTION_FIELDS]
+        if unexpected_fields:
+            _issue(
+                'Unexpected fields in collection',
+                {'index': idx, 'fields': unexpected_fields},
+                validate_mode,
+                warn
+            )
+
+        normalized_entry = {key: entry[key] for key in _ALLOWED_COLLECTION_FIELDS if key in entry}
         collection_type = normalized_entry.get('type', 'set')
         if not isinstance(collection_type, str):
             _issue(

From 04dd22825bc8e739cabc5dac4ca88a4b7310bd18 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Sun, 4 Jan 2026 23:07:40 -0800
Subject: [PATCH 07/30] Fix collections typing for mypy

---
 graphistry/collections.py                   | 52 ++++++++++++++-------
 graphistry/validate/validate_collections.py |  4 +-
 2 files changed, 37 insertions(+), 19 deletions(-)

diff --git a/graphistry/collections.py b/graphistry/collections.py
index a11198c0bf..593e85a502 100644
--- a/graphistry/collections.py
+++ b/graphistry/collections.py
@@ -12,7 +12,21 @@
 
 
 def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[str]) -> CollectionDict:
-    collection.update({key: value for key, value in metadata.items() if value is not None})
+    value = metadata.get("id")
+    if value is not None:
+        collection["id"] = value
+    value = metadata.get("name")
+    if value is not None:
+        collection["name"] = value
+    value = metadata.get("description")
+    if value is not None:
+        collection["description"] = value
+    value = metadata.get("node_color")
+    if value is not None:
+        collection["node_color"] = value
+    value = metadata.get("edge_color")
+    if value is not None:
+        collection["edge_color"] = value
     return collection
 
 
@@ -21,34 +35,38 @@ def _wrap_gfql_expr(expr: GFQLChainInput) -> GFQLChainWire:
     from graphistry.compute.ast import ASTObject
     from graphistry.compute.chain import Chain
 
+    def _normalize_ops(raw: object) -> List[GFQLWireOp]:
+        if isinstance(raw, list):
+            ops: List[GFQLWireOp] = []
+            for op in raw:
+                if isinstance(op, ASTObject):
+                    ops.append(op.to_json())
+                else:
+                    ops.append(cast(GFQLWireOp, op))
+            return ops
+        if isinstance(raw, ASTObject):
+            return [raw.to_json()]
+        return [cast(GFQLWireOp, raw)]
+
     if isinstance(expr, dict):
         expr_type = expr.get("type")
         if expr_type == "gfql_chain" and "gfql" in expr:
-            return expr
+            return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("gfql"))}
         if expr_type == "Chain" and "chain" in expr:
-            return {"type": "gfql_chain", "gfql": expr.get("chain", [])}
+            return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("chain"))}
         if "gfql" in expr:
-            return {"type": "gfql_chain", "gfql": expr.get("gfql")}
+            return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("gfql"))}
         if "chain" in expr:
-            return {"type": "gfql_chain", "gfql": expr.get("chain")}
-        return {"type": "gfql_chain", "gfql": [expr]}
+            return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("chain"))}
+        return {"type": "gfql_chain", "gfql": _normalize_ops(expr)}
 
     if isinstance(expr, Chain):
-        return {"type": "gfql_chain", "gfql": expr.to_json().get("chain", [])}
+        return {"type": "gfql_chain", "gfql": _normalize_ops(expr.to_json().get("chain", []))}
 
     if isinstance(expr, ASTObject):
         return {"type": "gfql_chain", "gfql": [expr.to_json()]}
 
-    if isinstance(expr, list):
-        ops: List[GFQLWireOp] = []
-        for op in expr:
-            if isinstance(op, ASTObject):
-                ops.append(op.to_json())
-            else:
-                ops.append(cast(GFQLWireOp, op))
-        return {"type": "gfql_chain", "gfql": ops}
-
-    return {"type": "gfql_chain", "gfql": [cast(GFQLWireOp, expr)]}
+    return {"type": "gfql_chain", "gfql": _normalize_ops(expr)}
 
 
 def collection_set(
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index bc0af89c28..5a818fb90a 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -3,7 +3,7 @@
 from urllib.parse import quote, unquote
 
 from graphistry.client_session import strtobool
-from graphistry.models.collections import CollectionsInput
+from graphistry.models.collections import Collection, CollectionsInput
 from graphistry.models.types import ValidationMode, ValidationParam
 from graphistry.util import warn as emit_warn
 _ALLOWED_COLLECTION_FIELDS = {
@@ -50,7 +50,7 @@ def _issue(
 
 
 def _reparse_collections_payload(
-    collections: Union[Dict[str, Any], List[Dict[str, Any]]],
+    collections: Union[Collection, List[Collection]],
     validate_mode: ValidationMode,
     warn: bool
 ) -> List[Dict[str, Any]]:

From 322d152c3ad663ae6befbf7366813a957e3f4b21 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Wed, 7 Jan 2026 10:40:51 -0800
Subject: [PATCH 08/30] Validate collections settings inputs

---
 graphistry/PlotterBase.py            | 23 +++++++++++++++--------
 graphistry/tests/test_collections.py | 10 ++++++++++
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py
index 2497a47437..6ecb2b58f8 100644
--- a/graphistry/PlotterBase.py
+++ b/graphistry/PlotterBase.py
@@ -1867,24 +1867,31 @@ def collections(
         :param validate: Validation mode. 'autofix' (default) warns on issues, 'strict' raises on issues.
         :param warn: Whether to emit warnings when validate='autofix'. validate=False forces warn=False.
         """
-        from graphistry.validate.validate_collections import encode_collections, normalize_collections
-
-        def strip_hash(value: str) -> str:
-            return value[1:] if value.startswith('#') else value
+        from graphistry.validate.validate_collections import (
+            encode_collections,
+            normalize_collections,
+            normalize_collections_url_params,
+            normalize_validation_params,
+        )
 
+        validate_mode, warn = normalize_validation_params(validate, warn)
         settings: Dict[str, Any] = {}
         if collections is not None:
-            normalized = normalize_collections(collections, validate=validate, warn=warn)
+            normalized = normalize_collections(collections, validate=validate_mode, warn=warn)
             if isinstance(collections, str) and not encode:
                 settings['collections'] = collections
             else:
                 settings['collections'] = encode_collections(normalized, encode=encode)
+        extras: Dict[str, Any] = {}
         if show_collections is not None:
-            settings['showCollections'] = show_collections
+            extras['showCollections'] = show_collections
         if collections_global_node_color is not None:
-            settings['collectionsGlobalNodeColor'] = strip_hash(collections_global_node_color)
+            extras['collectionsGlobalNodeColor'] = collections_global_node_color
         if collections_global_edge_color is not None:
-            settings['collectionsGlobalEdgeColor'] = strip_hash(collections_global_edge_color)
+            extras['collectionsGlobalEdgeColor'] = collections_global_edge_color
+        if extras:
+            extras = normalize_collections_url_params(extras, validate=validate_mode, warn=warn)
+            settings.update(extras)
 
         if len(settings.keys()) > 0:
             return self.settings(url_params={**self._url_params, **settings})
diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index c57da0d989..96ae55d170 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -139,6 +139,16 @@ def test_collections_drop_unexpected_fields_autofix():
     assert "unexpected" not in decoded[0]
 
 
+def test_collections_show_collections_coerces_autofix():
+    g2 = graphistry.bind().collections(show_collections="true", validate="autofix")
+    assert g2._url_params["showCollections"] is True
+
+
+def test_collections_show_collections_strict_raises():
+    with pytest.raises(ValueError):
+        graphistry.bind().collections(show_collections="maybe", validate="strict")
+
+
 def test_collections_validation_strict_raises():
     bad_collections = [{"type": "set", "expr": [{"filter_dict": {"a": 1}}]}]
     with pytest.raises(ValueError):

From b170076b10e203db0d212da8b6c5916642945a28 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Wed, 7 Jan 2026 11:02:38 -0800
Subject: [PATCH 09/30] Simplify collections typing

---
 graphistry/collections.py        | 23 +++++++++++++----------
 graphistry/models/collections.py | 25 +++++--------------------
 2 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/graphistry/collections.py b/graphistry/collections.py
index 593e85a502..79da133704 100644
--- a/graphistry/collections.py
+++ b/graphistry/collections.py
@@ -1,12 +1,11 @@
-from typing import List, Optional, Sequence, TypeVar, cast
+from typing import Dict, List, Optional, Sequence, TypeVar
 
 from graphistry.models.collections import (
     CollectionIntersection,
+    CollectionExprInput,
     CollectionSet,
-    GFQLChainInput,
-    GFQLChainWire,
-    GFQLWireOp,
 )
+from graphistry.utils.json import JSONVal
 
 CollectionDict = TypeVar("CollectionDict", CollectionSet, CollectionIntersection)
 
@@ -30,23 +29,27 @@ def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[
     return collection
 
 
-def _wrap_gfql_expr(expr: GFQLChainInput) -> GFQLChainWire:
+def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]:
 
     from graphistry.compute.ast import ASTObject
     from graphistry.compute.chain import Chain
 
-    def _normalize_ops(raw: object) -> List[GFQLWireOp]:
+    def _normalize_ops(raw: object) -> List[Dict[str, JSONVal]]:
         if isinstance(raw, list):
-            ops: List[GFQLWireOp] = []
+            ops: List[Dict[str, JSONVal]] = []
             for op in raw:
                 if isinstance(op, ASTObject):
                     ops.append(op.to_json())
+                elif isinstance(op, dict):
+                    ops.append(op)
                 else:
-                    ops.append(cast(GFQLWireOp, op))
+                    raise TypeError("Collection GFQL operations must be AST objects or dictionaries")
             return ops
         if isinstance(raw, ASTObject):
             return [raw.to_json()]
-        return [cast(GFQLWireOp, raw)]
+        if isinstance(raw, dict):
+            return [raw]
+        raise TypeError("Collection GFQL operations must be a list, AST object, or dictionary")
 
     if isinstance(expr, dict):
         expr_type = expr.get("type")
@@ -71,7 +74,7 @@ def _normalize_ops(raw: object) -> List[GFQLWireOp]:
 
 def collection_set(
     *,
-    expr: GFQLChainInput,
+    expr: CollectionExprInput,
     id: Optional[str] = None,
     name: Optional[str] = None,
     description: Optional[str] = None,
diff --git a/graphistry/models/collections.py b/graphistry/models/collections.py
index af29a7f575..d12896d663 100644
--- a/graphistry/models/collections.py
+++ b/graphistry/models/collections.py
@@ -10,27 +10,12 @@
     from graphistry.compute.chain import Chain
 
 
-GFQLWireOp = Dict[str, JSONVal]
-
-
-class GFQLChainWire(TypedDict):
-    type: Literal["gfql_chain"]
-    gfql: List[GFQLWireOp]
-
-
-class GFQLChainJSON(TypedDict):
-    type: Literal["Chain"]
-    chain: List[GFQLWireOp]
-
-
-GFQLOpInput = Union["ASTObject", GFQLWireOp]
-GFQLChainInput = Union[
+CollectionExprInput = Union[
     "Chain",
     "ASTObject",
-    GFQLChainWire,
-    GFQLChainJSON,
-    List[GFQLOpInput],
-    GFQLWireOp,
+    List["ASTObject"],
+    Dict[str, JSONVal],
+    List[Dict[str, JSONVal]],
 ]
 
 
@@ -49,7 +34,7 @@ class CollectionBase(TypedDict, total=False):
 
 class CollectionSet(CollectionBase):
     type: NotRequired[Literal["set"]]
-    expr: Required[GFQLChainInput]
+    expr: Required[CollectionExprInput]
 
 
 class CollectionIntersection(CollectionBase):

From 9064aee0eded523410814c203bc07e8fecae5136 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Wed, 7 Jan 2026 11:09:33 -0800
Subject: [PATCH 10/30] Reuse gfql chain normalization in collections helpers

---
 graphistry/collections.py | 64 ++++++++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 21 deletions(-)

diff --git a/graphistry/collections.py b/graphistry/collections.py
index 79da133704..0f014706ae 100644
--- a/graphistry/collections.py
+++ b/graphistry/collections.py
@@ -34,42 +34,64 @@ def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]:
     from graphistry.compute.ast import ASTObject
     from graphistry.compute.chain import Chain
 
-    def _normalize_ops(raw: object) -> List[Dict[str, JSONVal]]:
+    def _gfql_chain_from_ops(ops: List[ASTObject]) -> Dict[str, JSONVal]:
+        chain_json = Chain(ops).to_json()
+        return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
+
+    def _gfql_chain_from_wire_ops(ops: List[Dict[str, JSONVal]]) -> Dict[str, JSONVal]:
+        chain = Chain.from_json({"chain": ops}, validate=True)
+        chain_json = chain.to_json()
+        return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
+
+    def _list_to_wire_ops(raw: List[object]) -> List[Dict[str, JSONVal]]:
+        ops: List[Dict[str, JSONVal]] = []
+        for op in raw:
+            if isinstance(op, ASTObject):
+                ops.append(op.to_json())
+            elif isinstance(op, dict):
+                ops.append(op)
+            else:
+                raise TypeError("Collection GFQL operations must be AST objects or dictionaries")
+        return ops
+
+    def _wrap_ops_value(raw: object) -> List[Dict[str, JSONVal]]:
         if isinstance(raw, list):
-            ops: List[Dict[str, JSONVal]] = []
-            for op in raw:
-                if isinstance(op, ASTObject):
-                    ops.append(op.to_json())
-                elif isinstance(op, dict):
-                    ops.append(op)
-                else:
-                    raise TypeError("Collection GFQL operations must be AST objects or dictionaries")
-            return ops
-        if isinstance(raw, ASTObject):
-            return [raw.to_json()]
-        if isinstance(raw, dict):
-            return [raw]
+            return _list_to_wire_ops(raw)
+        if isinstance(raw, (ASTObject, dict)):
+            return _list_to_wire_ops([raw])
         raise TypeError("Collection GFQL operations must be a list, AST object, or dictionary")
 
     if isinstance(expr, dict):
         expr_type = expr.get("type")
         if expr_type == "gfql_chain" and "gfql" in expr:
-            return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("gfql"))}
+            gfql_ops = expr.get("gfql")
+            return _gfql_chain_from_wire_ops(_wrap_ops_value(gfql_ops))
         if expr_type == "Chain" and "chain" in expr:
-            return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("chain"))}
+            chain = Chain.from_json(expr, validate=True)
+            chain_json = chain.to_json()
+            return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
         if "gfql" in expr:
-            return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("gfql"))}
+            gfql_ops = expr.get("gfql")
+            return _gfql_chain_from_wire_ops(_wrap_ops_value(gfql_ops))
         if "chain" in expr:
-            return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("chain"))}
-        return {"type": "gfql_chain", "gfql": _normalize_ops(expr)}
+            chain = Chain.from_json(expr, validate=True)
+            chain_json = chain.to_json()
+            return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
+        return _gfql_chain_from_wire_ops([expr])
 
     if isinstance(expr, Chain):
-        return {"type": "gfql_chain", "gfql": _normalize_ops(expr.to_json().get("chain", []))}
+        chain_json = expr.to_json()
+        return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
 
     if isinstance(expr, ASTObject):
         return {"type": "gfql_chain", "gfql": [expr.to_json()]}
 
-    return {"type": "gfql_chain", "gfql": _normalize_ops(expr)}
+    if isinstance(expr, list):
+        if all(isinstance(op, ASTObject) for op in expr):
+            return _gfql_chain_from_ops(expr)
+        return _gfql_chain_from_wire_ops(_list_to_wire_ops(expr))
+
+    raise TypeError("Collection expr must be an AST object, chain, list, or dict")
 
 
 def collection_set(

From a7b2a2d958261c2055c309b690db2fab4a4d0c67 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Wed, 7 Jan 2026 17:13:47 -0800
Subject: [PATCH 11/30] Refine collections gfql normalization for mypy

---
 graphistry/collections.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/graphistry/collections.py b/graphistry/collections.py
index 0f014706ae..e321a3dac7 100644
--- a/graphistry/collections.py
+++ b/graphistry/collections.py
@@ -34,8 +34,8 @@ def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]:
     from graphistry.compute.ast import ASTObject
     from graphistry.compute.chain import Chain
 
-    def _gfql_chain_from_ops(ops: List[ASTObject]) -> Dict[str, JSONVal]:
-        chain_json = Chain(ops).to_json()
+    def _gfql_chain_from_ops(ops: Sequence[ASTObject]) -> Dict[str, JSONVal]:
+        chain_json = Chain(list(ops)).to_json()
         return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
 
     def _gfql_chain_from_wire_ops(ops: List[Dict[str, JSONVal]]) -> Dict[str, JSONVal]:
@@ -43,7 +43,7 @@ def _gfql_chain_from_wire_ops(ops: List[Dict[str, JSONVal]]) -> Dict[str, JSONVa
         chain_json = chain.to_json()
         return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
 
-    def _list_to_wire_ops(raw: List[object]) -> List[Dict[str, JSONVal]]:
+    def _list_to_wire_ops(raw: Sequence[object]) -> List[Dict[str, JSONVal]]:
         ops: List[Dict[str, JSONVal]] = []
         for op in raw:
             if isinstance(op, ASTObject):
@@ -87,8 +87,9 @@ def _wrap_ops_value(raw: object) -> List[Dict[str, JSONVal]]:
         return {"type": "gfql_chain", "gfql": [expr.to_json()]}
 
     if isinstance(expr, list):
-        if all(isinstance(op, ASTObject) for op in expr):
-            return _gfql_chain_from_ops(expr)
+        ops_ast = [op for op in expr if isinstance(op, ASTObject)]
+        if len(ops_ast) == len(expr):
+            return _gfql_chain_from_ops(ops_ast)
         return _gfql_chain_from_wire_ops(_list_to_wire_ops(expr))
 
     raise TypeError("Collection expr must be an AST object, chain, list, or dict")

From 8389026b29d471e62f3948b6db5f22659c0882cb Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Thu, 8 Jan 2026 05:17:14 -0800
Subject: [PATCH 12/30] Normalize collections GFQL via Chain and reject Let

---
 graphistry/validate/validate_collections.py | 143 ++++++++++++--------
 1 file changed, 90 insertions(+), 53 deletions(-)

diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 5a818fb90a..d0970976a2 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -162,7 +162,7 @@ def _normalize_gfql_ops(
     warn: bool,
     entry_index: int
 ) -> Optional[List[Dict[str, Any]]]:
-    from graphistry.compute.ast import ASTObject, from_json as ast_from_json
+    from graphistry.compute.ast import ASTLet, ASTObject
     from graphistry.compute.chain import Chain
 
     if gfql_ops is None:
@@ -174,83 +174,120 @@ def _normalize_gfql_ops(
         except json.JSONDecodeError as exc:
             _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn)
             return None
-    ops_raw: Any
-    if isinstance(gfql_ops, Chain):
-        ops_raw = gfql_ops.to_json().get('chain', [])
-    elif isinstance(gfql_ops, ASTObject):
-        ops_raw = [gfql_ops.to_json()]
-    elif isinstance(gfql_ops, dict):
-        if 'chain' in gfql_ops:
-            ops_raw = gfql_ops.get('chain', [])
-        else:
-            ops_raw = [gfql_ops]
-    elif isinstance(gfql_ops, list):
-        ops_raw = []
-        for op in gfql_ops:
-            if isinstance(op, ASTObject):
-                ops_raw.append(op.to_json())
-            elif isinstance(op, dict):
-                ops_raw.append(op)
-            else:
-                _issue(
-                    'GFQL operations must be AST objects or dicts',
-                    {'index': entry_index, 'value': op, 'type': type(op).__name__},
-                    validate_mode,
-                    warn
-                )
-                if validate_mode == 'autofix':
-                    continue
-                return None
-    else:
+
+    def _issue_let_not_supported(payload: Any) -> None:
         _issue(
-            'GFQL operations must be a Chain, AST object, list, or dict',
-            {'index': entry_index, 'type': type(gfql_ops).__name__},
+            'Collections do not support GFQL Let/DAG expressions',
+            {'index': entry_index, 'value': payload, 'type': type(payload).__name__},
             validate_mode,
             warn
         )
-        return None
 
-    if not isinstance(ops_raw, list):
+    def _coerce_ops_list(raw: Any) -> Optional[List[Dict[str, Any]]]:
+        if isinstance(raw, list):
+            ops: List[Dict[str, Any]] = []
+            for op in raw:
+                if isinstance(op, ASTObject):
+                    ops.append(op.to_json())
+                elif isinstance(op, dict):
+                    ops.append(op)
+                else:
+                    _issue(
+                        'GFQL operations must be AST objects or dicts',
+                        {'index': entry_index, 'value': op, 'type': type(op).__name__},
+                        validate_mode,
+                        warn
+                    )
+                    if validate_mode == 'autofix':
+                        continue
+                    return None
+            return ops
+        if isinstance(raw, ASTObject):
+            return [raw.to_json()]
+        if isinstance(raw, dict):
+            return [raw]
         _issue(
-            'GFQL operations must be a list',
-            {'index': entry_index, 'value': ops_raw, 'type': type(ops_raw).__name__},
+            'GFQL operations must be a list, AST object, or dict',
+            {'index': entry_index, 'value': raw, 'type': type(raw).__name__},
             validate_mode,
             warn
         )
         return None
 
-    ops: List[Any] = ops_raw
-
-    normalized_ops: List[Dict[str, Any]] = []
-    for op in ops:
-        if not isinstance(op, dict):
+    def _normalize_chain_ops(raw_ops: List[Dict[str, Any]]) -> Optional[List[Dict[str, Any]]]:
+        if not isinstance(raw_ops, list):
             _issue(
-                'GFQL operations must be dictionaries after normalization',
-                {'index': entry_index, 'value': op, 'type': type(op).__name__},
+                'GFQL operations must be a list',
+                {'index': entry_index, 'value': raw_ops, 'type': type(raw_ops).__name__},
                 validate_mode,
                 warn
             )
-            if validate_mode == 'autofix':
-                continue
             return None
         try:
-            ast_from_json(op, validate=True)
+            chain_obj = Chain.from_json({'chain': raw_ops}, validate=True)
         except Exception as exc:
             _issue(
-                'Invalid GFQL operation in collection',
-                {'index': entry_index, 'op': op, 'error': str(exc)},
+                'Invalid GFQL chain in collection',
+                {'index': entry_index, 'error': str(exc)},
                 validate_mode,
                 warn
             )
-            if validate_mode == 'autofix':
-                continue
             return None
-        normalized_ops.append(op)
+        if any(isinstance(op, ASTLet) for op in chain_obj.chain):
+            _issue_let_not_supported(raw_ops)
+            return None
+        normalized_ops = _coerce_ops_list(chain_obj.to_json().get('chain', []))
+        if normalized_ops is None:
+            return None
+        if len(normalized_ops) == 0:
+            _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn)
+            return None
+        return normalized_ops
 
-    if len(normalized_ops) == 0:
-        _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn)
+    if isinstance(gfql_ops, ASTLet):
+        _issue_let_not_supported(gfql_ops)
         return None
-    return normalized_ops
+
+    if isinstance(gfql_ops, Chain):
+        if any(isinstance(op, ASTLet) for op in gfql_ops.chain):
+            _issue_let_not_supported(gfql_ops)
+            return None
+        ops_raw = _coerce_ops_list(gfql_ops.to_json().get('chain', []))
+        if ops_raw is None:
+            return None
+        return _normalize_chain_ops(ops_raw)
+    if isinstance(gfql_ops, ASTObject):
+        ops_raw = _coerce_ops_list(gfql_ops)
+        if ops_raw is None:
+            return None
+        return _normalize_chain_ops(ops_raw)
+    if isinstance(gfql_ops, dict):
+        if gfql_ops.get('type') == 'Let' or 'bindings' in gfql_ops:
+            _issue_let_not_supported(gfql_ops)
+            return None
+        if 'chain' in gfql_ops:
+            ops_raw = _coerce_ops_list(gfql_ops.get('chain'))
+        elif 'gfql' in gfql_ops:
+            ops_raw = _coerce_ops_list(gfql_ops.get('gfql'))
+        else:
+            ops_raw = _coerce_ops_list(gfql_ops)
+        if ops_raw is None:
+            return None
+        return _normalize_chain_ops(ops_raw)
+    if isinstance(gfql_ops, list):
+        ops_raw = _coerce_ops_list(gfql_ops)
+        if ops_raw is None:
+            return None
+        return _normalize_chain_ops(ops_raw)
+
+    _issue(
+        'GFQL operations must be a Chain, AST object, list, or dict',
+        {'index': entry_index, 'type': type(gfql_ops).__name__},
+        validate_mode,
+        warn
+    )
+    return None
+
 
 
 def _normalize_gfql_expr(

From 1144977ab5deef5c1c7a1dd3fa4b564d2c4e822a Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Thu, 8 Jan 2026 05:24:09 -0800
Subject: [PATCH 13/30] Avoid Chain.from_json in collections normalization

---
 graphistry/collections.py                   |  65 ++++------
 graphistry/validate/validate_collections.py | 130 +++++++++-----------
 2 files changed, 84 insertions(+), 111 deletions(-)

diff --git a/graphistry/collections.py b/graphistry/collections.py
index e321a3dac7..5d4684a414 100644
--- a/graphistry/collections.py
+++ b/graphistry/collections.py
@@ -31,53 +31,43 @@ def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[
 
 def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]:
 
-    from graphistry.compute.ast import ASTObject
+    from graphistry.compute.ast import ASTLet, ASTObject, from_json as ast_from_json
     from graphistry.compute.chain import Chain
 
-    def _gfql_chain_from_ops(ops: Sequence[ASTObject]) -> Dict[str, JSONVal]:
-        chain_json = Chain(list(ops)).to_json()
-        return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
-
-    def _gfql_chain_from_wire_ops(ops: List[Dict[str, JSONVal]]) -> Dict[str, JSONVal]:
-        chain = Chain.from_json({"chain": ops}, validate=True)
-        chain_json = chain.to_json()
-        return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
-
-    def _list_to_wire_ops(raw: Sequence[object]) -> List[Dict[str, JSONVal]]:
-        ops: List[Dict[str, JSONVal]] = []
-        for op in raw:
-            if isinstance(op, ASTObject):
-                ops.append(op.to_json())
-            elif isinstance(op, dict):
-                ops.append(op)
-            else:
-                raise TypeError("Collection GFQL operations must be AST objects or dictionaries")
-        return ops
-
-    def _wrap_ops_value(raw: object) -> List[Dict[str, JSONVal]]:
+    def _normalize_op(op: object) -> Dict[str, JSONVal]:
+        if isinstance(op, ASTLet):
+            raise TypeError("Collection GFQL does not support Let/DAG expressions")
+        if isinstance(op, ASTObject):
+            return op.to_json()
+        if isinstance(op, dict):
+            if op.get("type") == "Let" or "bindings" in op:
+                raise TypeError("Collection GFQL does not support Let/DAG expressions")
+            parsed = ast_from_json(op, validate=True)
+            if isinstance(parsed, ASTLet):
+                raise TypeError("Collection GFQL does not support Let/DAG expressions")
+            return parsed.to_json()
+        raise TypeError("Collection GFQL operations must be AST objects or dictionaries")
+
+    def _normalize_ops_value(raw: object) -> List[Dict[str, JSONVal]]:
         if isinstance(raw, list):
-            return _list_to_wire_ops(raw)
-        if isinstance(raw, (ASTObject, dict)):
-            return _list_to_wire_ops([raw])
-        raise TypeError("Collection GFQL operations must be a list, AST object, or dictionary")
+            if len(raw) == 0:
+                raise ValueError("Collection GFQL operations list cannot be empty")
+            return [_normalize_op(op) for op in raw]
+        return [_normalize_op(raw)]
 
     if isinstance(expr, dict):
         expr_type = expr.get("type")
         if expr_type == "gfql_chain" and "gfql" in expr:
             gfql_ops = expr.get("gfql")
-            return _gfql_chain_from_wire_ops(_wrap_ops_value(gfql_ops))
+            return {"type": "gfql_chain", "gfql": _normalize_ops_value(gfql_ops)}
         if expr_type == "Chain" and "chain" in expr:
-            chain = Chain.from_json(expr, validate=True)
-            chain_json = chain.to_json()
-            return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
+            return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr.get("chain"))}
         if "gfql" in expr:
             gfql_ops = expr.get("gfql")
-            return _gfql_chain_from_wire_ops(_wrap_ops_value(gfql_ops))
+            return {"type": "gfql_chain", "gfql": _normalize_ops_value(gfql_ops)}
         if "chain" in expr:
-            chain = Chain.from_json(expr, validate=True)
-            chain_json = chain.to_json()
-            return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
-        return _gfql_chain_from_wire_ops([expr])
+            return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr.get("chain"))}
+        return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr)}
 
     if isinstance(expr, Chain):
         chain_json = expr.to_json()
@@ -87,10 +77,7 @@ def _wrap_ops_value(raw: object) -> List[Dict[str, JSONVal]]:
         return {"type": "gfql_chain", "gfql": [expr.to_json()]}
 
     if isinstance(expr, list):
-        ops_ast = [op for op in expr if isinstance(op, ASTObject)]
-        if len(ops_ast) == len(expr):
-            return _gfql_chain_from_ops(ops_ast)
-        return _gfql_chain_from_wire_ops(_list_to_wire_ops(expr))
+        return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr)}
 
     raise TypeError("Collection expr must be an AST object, chain, list, or dict")
 
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index d0970976a2..2ec4d865b2 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -162,7 +162,7 @@ def _normalize_gfql_ops(
     warn: bool,
     entry_index: int
 ) -> Optional[List[Dict[str, Any]]]:
-    from graphistry.compute.ast import ASTLet, ASTObject
+    from graphistry.compute.ast import ASTLet, ASTObject, from_json as ast_from_json
     from graphistry.compute.chain import Chain
 
     if gfql_ops is None:
@@ -183,29 +183,57 @@ def _issue_let_not_supported(payload: Any) -> None:
             warn
         )
 
-    def _coerce_ops_list(raw: Any) -> Optional[List[Dict[str, Any]]]:
+    def _normalize_op(op: object) -> Optional[Dict[str, Any]]:
+        if isinstance(op, ASTLet):
+            _issue_let_not_supported(op)
+            return None
+        if isinstance(op, ASTObject):
+            return op.to_json()
+        if isinstance(op, dict):
+            if op.get('type') == 'Let' or 'bindings' in op:
+                _issue_let_not_supported(op)
+                return None
+            try:
+                parsed = ast_from_json(op, validate=True)
+            except Exception as exc:
+                _issue(
+                    'Invalid GFQL operation in collection',
+                    {'index': entry_index, 'op': op, 'error': str(exc)},
+                    validate_mode,
+                    warn
+                )
+                return None
+            if isinstance(parsed, ASTLet):
+                _issue_let_not_supported(op)
+                return None
+            return parsed.to_json()
+        _issue(
+            'GFQL operations must be AST objects or dicts',
+            {'index': entry_index, 'value': op, 'type': type(op).__name__},
+            validate_mode,
+            warn
+        )
+        return None
+
+    def _normalize_ops_list(raw: object) -> Optional[List[Dict[str, Any]]]:
         if isinstance(raw, list):
-            ops: List[Dict[str, Any]] = []
+            normalized_ops: List[Dict[str, Any]] = []
             for op in raw:
-                if isinstance(op, ASTObject):
-                    ops.append(op.to_json())
-                elif isinstance(op, dict):
-                    ops.append(op)
-                else:
-                    _issue(
-                        'GFQL operations must be AST objects or dicts',
-                        {'index': entry_index, 'value': op, 'type': type(op).__name__},
-                        validate_mode,
-                        warn
-                    )
+                normalized_op = _normalize_op(op)
+                if normalized_op is None:
                     if validate_mode == 'autofix':
                         continue
                     return None
-            return ops
-        if isinstance(raw, ASTObject):
-            return [raw.to_json()]
-        if isinstance(raw, dict):
-            return [raw]
+                normalized_ops.append(normalized_op)
+            if len(normalized_ops) == 0:
+                _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn)
+                return None
+            return normalized_ops
+        if isinstance(raw, (ASTObject, dict)):
+            normalized_op = _normalize_op(raw)
+            if normalized_op is None:
+                return None
+            return [normalized_op]
         _issue(
             'GFQL operations must be a list, AST object, or dict',
             {'index': entry_index, 'value': raw, 'type': type(raw).__name__},
@@ -214,71 +242,29 @@ def _coerce_ops_list(raw: Any) -> Optional[List[Dict[str, Any]]]:
         )
         return None
 
-    def _normalize_chain_ops(raw_ops: List[Dict[str, Any]]) -> Optional[List[Dict[str, Any]]]:
-        if not isinstance(raw_ops, list):
-            _issue(
-                'GFQL operations must be a list',
-                {'index': entry_index, 'value': raw_ops, 'type': type(raw_ops).__name__},
-                validate_mode,
-                warn
-            )
-            return None
-        try:
-            chain_obj = Chain.from_json({'chain': raw_ops}, validate=True)
-        except Exception as exc:
-            _issue(
-                'Invalid GFQL chain in collection',
-                {'index': entry_index, 'error': str(exc)},
-                validate_mode,
-                warn
-            )
-            return None
-        if any(isinstance(op, ASTLet) for op in chain_obj.chain):
-            _issue_let_not_supported(raw_ops)
-            return None
-        normalized_ops = _coerce_ops_list(chain_obj.to_json().get('chain', []))
-        if normalized_ops is None:
-            return None
-        if len(normalized_ops) == 0:
-            _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn)
-            return None
-        return normalized_ops
-
     if isinstance(gfql_ops, ASTLet):
         _issue_let_not_supported(gfql_ops)
         return None
 
     if isinstance(gfql_ops, Chain):
-        if any(isinstance(op, ASTLet) for op in gfql_ops.chain):
-            _issue_let_not_supported(gfql_ops)
-            return None
-        ops_raw = _coerce_ops_list(gfql_ops.to_json().get('chain', []))
-        if ops_raw is None:
-            return None
-        return _normalize_chain_ops(ops_raw)
+        return _normalize_ops_list(gfql_ops.chain)
     if isinstance(gfql_ops, ASTObject):
-        ops_raw = _coerce_ops_list(gfql_ops)
-        if ops_raw is None:
-            return None
-        return _normalize_chain_ops(ops_raw)
+        return _normalize_ops_list(gfql_ops)
     if isinstance(gfql_ops, dict):
         if gfql_ops.get('type') == 'Let' or 'bindings' in gfql_ops:
             _issue_let_not_supported(gfql_ops)
             return None
+        if gfql_ops.get('type') == 'Chain' and 'chain' in gfql_ops:
+            return _normalize_ops_list(gfql_ops.get('chain'))
+        if gfql_ops.get('type') == 'gfql_chain' and 'gfql' in gfql_ops:
+            return _normalize_ops_list(gfql_ops.get('gfql'))
         if 'chain' in gfql_ops:
-            ops_raw = _coerce_ops_list(gfql_ops.get('chain'))
-        elif 'gfql' in gfql_ops:
-            ops_raw = _coerce_ops_list(gfql_ops.get('gfql'))
-        else:
-            ops_raw = _coerce_ops_list(gfql_ops)
-        if ops_raw is None:
-            return None
-        return _normalize_chain_ops(ops_raw)
+            return _normalize_ops_list(gfql_ops.get('chain'))
+        if 'gfql' in gfql_ops:
+            return _normalize_ops_list(gfql_ops.get('gfql'))
+        return _normalize_ops_list(gfql_ops)
     if isinstance(gfql_ops, list):
-        ops_raw = _coerce_ops_list(gfql_ops)
-        if ops_raw is None:
-            return None
-        return _normalize_chain_ops(ops_raw)
+        return _normalize_ops_list(gfql_ops)
 
     _issue(
         'GFQL operations must be a Chain, AST object, list, or dict',

From 4e889b20e05f51b4ccfb1ba0acaa3fef6525f2ec Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Thu, 8 Jan 2026 09:06:24 -0800
Subject: [PATCH 14/30] Allow Let in collections normalization

---
 graphistry/collections.py                   |  8 +------
 graphistry/tests/test_collections.py        |  9 +++++++
 graphistry/validate/validate_collections.py | 26 +--------------------
 3 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/graphistry/collections.py b/graphistry/collections.py
index 5d4684a414..5603d7660b 100644
--- a/graphistry/collections.py
+++ b/graphistry/collections.py
@@ -31,20 +31,14 @@ def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[
 
 def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]:
 
-    from graphistry.compute.ast import ASTLet, ASTObject, from_json as ast_from_json
+    from graphistry.compute.ast import ASTObject, from_json as ast_from_json
     from graphistry.compute.chain import Chain
 
     def _normalize_op(op: object) -> Dict[str, JSONVal]:
-        if isinstance(op, ASTLet):
-            raise TypeError("Collection GFQL does not support Let/DAG expressions")
         if isinstance(op, ASTObject):
             return op.to_json()
         if isinstance(op, dict):
-            if op.get("type") == "Let" or "bindings" in op:
-                raise TypeError("Collection GFQL does not support Let/DAG expressions")
             parsed = ast_from_json(op, validate=True)
-            if isinstance(parsed, ASTLet):
-                raise TypeError("Collection GFQL does not support Let/DAG expressions")
             return parsed.to_json()
         raise TypeError("Collection GFQL operations must be AST objects or dictionaries")
 
diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index 96ae55d170..5516e795f0 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -125,6 +125,15 @@ def test_collections_accepts_wire_protocol_chain():
     ]
 
 
+def test_collections_accepts_let_expr():
+    dag = graphistry.let({"seed": graphistry.n({"type": "user"})})
+    decoded = decode_collections(
+        collections_url_params({"type": "set", "expr": dag})["collections"]
+    )
+    assert decoded[0]["expr"]["type"] == "gfql_chain"
+    assert decoded[0]["expr"]["gfql"][0]["type"] == "Let"
+
+
 def test_collections_drop_unexpected_fields_autofix():
     collections = [
         {
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 2ec4d865b2..4749cd587d 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -162,7 +162,7 @@ def _normalize_gfql_ops(
     warn: bool,
     entry_index: int
 ) -> Optional[List[Dict[str, Any]]]:
-    from graphistry.compute.ast import ASTLet, ASTObject, from_json as ast_from_json
+    from graphistry.compute.ast import ASTObject, from_json as ast_from_json
     from graphistry.compute.chain import Chain
 
     if gfql_ops is None:
@@ -175,24 +175,10 @@ def _normalize_gfql_ops(
             _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn)
             return None
 
-    def _issue_let_not_supported(payload: Any) -> None:
-        _issue(
-            'Collections do not support GFQL Let/DAG expressions',
-            {'index': entry_index, 'value': payload, 'type': type(payload).__name__},
-            validate_mode,
-            warn
-        )
-
     def _normalize_op(op: object) -> Optional[Dict[str, Any]]:
-        if isinstance(op, ASTLet):
-            _issue_let_not_supported(op)
-            return None
         if isinstance(op, ASTObject):
             return op.to_json()
         if isinstance(op, dict):
-            if op.get('type') == 'Let' or 'bindings' in op:
-                _issue_let_not_supported(op)
-                return None
             try:
                 parsed = ast_from_json(op, validate=True)
             except Exception as exc:
@@ -203,9 +189,6 @@ def _normalize_op(op: object) -> Optional[Dict[str, Any]]:
                     warn
                 )
                 return None
-            if isinstance(parsed, ASTLet):
-                _issue_let_not_supported(op)
-                return None
             return parsed.to_json()
         _issue(
             'GFQL operations must be AST objects or dicts',
@@ -242,18 +225,11 @@ def _normalize_ops_list(raw: object) -> Optional[List[Dict[str, Any]]]:
         )
         return None
 
-    if isinstance(gfql_ops, ASTLet):
-        _issue_let_not_supported(gfql_ops)
-        return None
-
     if isinstance(gfql_ops, Chain):
         return _normalize_ops_list(gfql_ops.chain)
     if isinstance(gfql_ops, ASTObject):
         return _normalize_ops_list(gfql_ops)
     if isinstance(gfql_ops, dict):
-        if gfql_ops.get('type') == 'Let' or 'bindings' in gfql_ops:
-            _issue_let_not_supported(gfql_ops)
-            return None
         if gfql_ops.get('type') == 'Chain' and 'chain' in gfql_ops:
             return _normalize_ops_list(gfql_ops.get('chain'))
         if gfql_ops.get('type') == 'gfql_chain' and 'gfql' in gfql_ops:

From 59ba9a34118a122bffe366f328b344222bd45f6e Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Thu, 8 Jan 2026 14:27:10 -0800
Subject: [PATCH 15/30] Simplify collections gfql wrapping

---
 graphistry/collections.py | 49 +++++++++++++++------------------------
 1 file changed, 19 insertions(+), 30 deletions(-)

diff --git a/graphistry/collections.py b/graphistry/collections.py
index 5603d7660b..1e8a5b0166 100644
--- a/graphistry/collections.py
+++ b/graphistry/collections.py
@@ -38,42 +38,31 @@ def _normalize_op(op: object) -> Dict[str, JSONVal]:
         if isinstance(op, ASTObject):
             return op.to_json()
         if isinstance(op, dict):
-            parsed = ast_from_json(op, validate=True)
-            return parsed.to_json()
+            return ast_from_json(op, validate=True).to_json()
         raise TypeError("Collection GFQL operations must be AST objects or dictionaries")
 
-    def _normalize_ops_value(raw: object) -> List[Dict[str, JSONVal]]:
+    def _normalize_ops(raw: object) -> List[Dict[str, JSONVal]]:
+        if isinstance(raw, Chain):
+            return _normalize_ops(raw.to_json().get("chain", []))
+        if isinstance(raw, ASTObject):
+            return [raw.to_json()]
         if isinstance(raw, list):
             if len(raw) == 0:
                 raise ValueError("Collection GFQL operations list cannot be empty")
             return [_normalize_op(op) for op in raw]
-        return [_normalize_op(raw)]
-
-    if isinstance(expr, dict):
-        expr_type = expr.get("type")
-        if expr_type == "gfql_chain" and "gfql" in expr:
-            gfql_ops = expr.get("gfql")
-            return {"type": "gfql_chain", "gfql": _normalize_ops_value(gfql_ops)}
-        if expr_type == "Chain" and "chain" in expr:
-            return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr.get("chain"))}
-        if "gfql" in expr:
-            gfql_ops = expr.get("gfql")
-            return {"type": "gfql_chain", "gfql": _normalize_ops_value(gfql_ops)}
-        if "chain" in expr:
-            return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr.get("chain"))}
-        return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr)}
-
-    if isinstance(expr, Chain):
-        chain_json = expr.to_json()
-        return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])}
-
-    if isinstance(expr, ASTObject):
-        return {"type": "gfql_chain", "gfql": [expr.to_json()]}
-
-    if isinstance(expr, list):
-        return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr)}
-
-    raise TypeError("Collection expr must be an AST object, chain, list, or dict")
+        if isinstance(raw, dict):
+            if raw.get("type") == "Chain" and "chain" in raw:
+                return _normalize_ops(raw.get("chain"))
+            if raw.get("type") == "gfql_chain" and "gfql" in raw:
+                return _normalize_ops(raw.get("gfql"))
+            if "chain" in raw:
+                return _normalize_ops(raw.get("chain"))
+            if "gfql" in raw:
+                return _normalize_ops(raw.get("gfql"))
+            return [_normalize_op(raw)]
+        raise TypeError("Collection expr must be an AST object, chain, list, or dict")
+
+    return {"type": "gfql_chain", "gfql": _normalize_ops(expr)}
 
 
 def collection_set(

From 29883112e8af7052d7fd08706b731145f3b8f292 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Thu, 8 Jan 2026 15:07:13 -0800
Subject: [PATCH 16/30] Slim collections validation helpers

---
 graphistry/validate/validate_collections.py | 105 ++++++++------------
 1 file changed, 42 insertions(+), 63 deletions(-)

diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 4749cd587d..db107b18ac 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -49,26 +49,11 @@ def _issue(
         emit_warn(f"Collections validation warning: {message} ({data})")
 
 
-def _reparse_collections_payload(
-    collections: Union[Collection, List[Collection]],
+def _coerce_collection_list(
+    parsed: Any,
     validate_mode: ValidationMode,
     warn: bool
 ) -> List[Dict[str, Any]]:
-    from graphistry.compute.ast import ASTObject
-    from graphistry.compute.chain import Chain
-
-    def _default(obj: Any) -> Any:
-        if isinstance(obj, Chain):
-            return obj.to_json()
-        if isinstance(obj, ASTObject):
-            return obj.to_json()
-        raise TypeError(f"Object of type {type(obj).__name__} is not JSON-serializable")
-
-    try:
-        parsed = json.loads(json.dumps(collections, default=_default, ensure_ascii=True))
-    except (TypeError, ValueError) as exc:
-        _issue('Collections must be JSON-serializable', {'error': str(exc)}, validate_mode, warn)
-        return []
     if isinstance(parsed, list):
         return parsed
     if isinstance(parsed, dict):
@@ -82,10 +67,16 @@ def _parse_collections_input(
     validate_mode: ValidationMode,
     warn: bool
 ) -> List[Dict[str, Any]]:
-    if isinstance(collections, list):
-        return _reparse_collections_payload(collections, validate_mode, warn)
-    if isinstance(collections, dict):
-        return _reparse_collections_payload(collections, validate_mode, warn)
+    from graphistry.compute.ast import ASTObject
+    from graphistry.compute.chain import Chain
+
+    def _default(obj: Any) -> Any:
+        if isinstance(obj, Chain):
+            return obj.to_json()
+        if isinstance(obj, ASTObject):
+            return obj.to_json()
+        raise TypeError(f"Object of type {type(obj).__name__} is not JSON-serializable")
+
     if isinstance(collections, str):
         try:
             parsed = json.loads(collections)
@@ -95,14 +86,14 @@ def _parse_collections_input(
             except json.JSONDecodeError as exc:
                 _issue('Collections string must be JSON or URL-encoded JSON', {'error': str(exc)}, validate_mode, warn)
                 return []
-        if isinstance(parsed, list):
-            return parsed
-        if isinstance(parsed, dict):
-            return [parsed]
-        _issue('Collections JSON must be a list or dict', {'type': type(parsed).__name__}, validate_mode, warn)
+        return _coerce_collection_list(parsed, validate_mode, warn)
+
+    try:
+        parsed = json.loads(json.dumps(collections, default=_default, ensure_ascii=True))
+    except (TypeError, ValueError) as exc:
+        _issue('Collections must be JSON-serializable', {'error': str(exc)}, validate_mode, warn)
         return []
-    _issue('Collections must be a list, dict, or JSON string', {'type': type(collections).__name__}, validate_mode, warn)
-    return []
+    return _coerce_collection_list(parsed, validate_mode, warn)
 
 
 def _coerce_str_field(
@@ -225,22 +216,26 @@ def _normalize_ops_list(raw: object) -> Optional[List[Dict[str, Any]]]:
         )
         return None
 
-    if isinstance(gfql_ops, Chain):
-        return _normalize_ops_list(gfql_ops.chain)
-    if isinstance(gfql_ops, ASTObject):
-        return _normalize_ops_list(gfql_ops)
-    if isinstance(gfql_ops, dict):
-        if gfql_ops.get('type') == 'Chain' and 'chain' in gfql_ops:
-            return _normalize_ops_list(gfql_ops.get('chain'))
-        if gfql_ops.get('type') == 'gfql_chain' and 'gfql' in gfql_ops:
-            return _normalize_ops_list(gfql_ops.get('gfql'))
-        if 'chain' in gfql_ops:
-            return _normalize_ops_list(gfql_ops.get('chain'))
-        if 'gfql' in gfql_ops:
-            return _normalize_ops_list(gfql_ops.get('gfql'))
-        return _normalize_ops_list(gfql_ops)
-    if isinstance(gfql_ops, list):
-        return _normalize_ops_list(gfql_ops)
+    def _extract_ops_value(raw: object) -> object:
+        if isinstance(raw, Chain):
+            return raw.chain
+        if isinstance(raw, ASTObject):
+            return raw
+        if isinstance(raw, dict):
+            if raw.get('type') == 'gfql_chain' and 'gfql' in raw:
+                return raw.get('gfql')
+            if raw.get('type') == 'Chain' and 'chain' in raw:
+                return raw.get('chain')
+            if 'gfql' in raw:
+                return raw.get('gfql')
+            if 'chain' in raw:
+                return raw.get('chain')
+            return raw
+        if isinstance(raw, list):
+            return raw
+        return raw
+
+    return _normalize_ops_list(_extract_ops_value(gfql_ops))
 
     _issue(
         'GFQL operations must be a Chain, AST object, list, or dict',
@@ -258,25 +253,9 @@ def _normalize_gfql_expr(
     warn: bool,
     entry_index: int
 ) -> Optional[Dict[str, Any]]:
-    if isinstance(expr, dict):
-        if expr.get('type') == 'intersection':
-            _issue('Set collection expr cannot be intersection', {'index': entry_index}, validate_mode, warn)
-            return None
-        if 'gfql' in expr or expr.get('type') == 'gfql_chain':
-            ops = _normalize_gfql_ops(expr.get('gfql'), validate_mode, warn, entry_index)
-            if ops is None:
-                return None
-            return {'type': 'gfql_chain', 'gfql': ops}
-        if 'chain' in expr:
-            ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index)
-            if ops is None:
-                return None
-            return {'type': 'gfql_chain', 'gfql': ops}
-        if 'type' in expr:
-            ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index)
-            if ops is None:
-                return None
-            return {'type': 'gfql_chain', 'gfql': ops}
+    if isinstance(expr, dict) and expr.get('type') == 'intersection':
+        _issue('Set collection expr cannot be intersection', {'index': entry_index}, validate_mode, warn)
+        return None
     ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index)
     if ops is None:
         return None

From 2f426d0b4fec7b6ae5a168cc0d8a019e25fc4472 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Thu, 8 Jan 2026 15:18:45 -0800
Subject: [PATCH 17/30] Simplify collections input parsing

---
 graphistry/validate/validate_collections.py | 25 ++++++---------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index db107b18ac..d09fd0d88d 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -3,7 +3,7 @@
 from urllib.parse import quote, unquote
 
 from graphistry.client_session import strtobool
-from graphistry.models.collections import Collection, CollectionsInput
+from graphistry.models.collections import CollectionsInput
 from graphistry.models.types import ValidationMode, ValidationParam
 from graphistry.util import warn as emit_warn
 _ALLOWED_COLLECTION_FIELDS = {
@@ -67,16 +67,10 @@ def _parse_collections_input(
     validate_mode: ValidationMode,
     warn: bool
 ) -> List[Dict[str, Any]]:
-    from graphistry.compute.ast import ASTObject
-    from graphistry.compute.chain import Chain
-
-    def _default(obj: Any) -> Any:
-        if isinstance(obj, Chain):
-            return obj.to_json()
-        if isinstance(obj, ASTObject):
-            return obj.to_json()
-        raise TypeError(f"Object of type {type(obj).__name__} is not JSON-serializable")
-
+    if isinstance(collections, list):
+        return _coerce_collection_list(collections, validate_mode, warn)
+    if isinstance(collections, dict):
+        return _coerce_collection_list(collections, validate_mode, warn)
     if isinstance(collections, str):
         try:
             parsed = json.loads(collections)
@@ -87,13 +81,8 @@ def _default(obj: Any) -> Any:
                 _issue('Collections string must be JSON or URL-encoded JSON', {'error': str(exc)}, validate_mode, warn)
                 return []
         return _coerce_collection_list(parsed, validate_mode, warn)
-
-    try:
-        parsed = json.loads(json.dumps(collections, default=_default, ensure_ascii=True))
-    except (TypeError, ValueError) as exc:
-        _issue('Collections must be JSON-serializable', {'error': str(exc)}, validate_mode, warn)
-        return []
-    return _coerce_collection_list(parsed, validate_mode, warn)
+    _issue('Collections must be a list, dict, or JSON string', {'type': type(collections).__name__}, validate_mode, warn)
+    return []
 
 
 def _coerce_str_field(

From 924f2343a29737c206fafd38d1785e6500c94c10 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Mon, 12 Jan 2026 17:12:46 -0800
Subject: [PATCH 18/30] fix: canonicalize collections validation and encoding

---
 CHANGELOG.md                                |  4 ++
 graphistry/Plottable.py                     |  1 -
 graphistry/PlotterBase.py                   | 14 +++----
 graphistry/pygraphistry.py                  |  2 -
 graphistry/tests/test_collections.py        | 45 ++++++++++++++++++---
 graphistry/validate/validate_collections.py | 37 +++++++++++++----
 6 files changed, 79 insertions(+), 24 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 273c599efa..286f5af884 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -56,6 +56,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 ### Performance
 - **GFQL / chain**: Optimized backward pass for simple single-hop edges by skipping full `hop()` call and using vectorized merge filtering instead (~50% faster on small graphs). Added `is_simple_single_hop()` method on `ASTEdge` for optimization eligibility checks.
 
+### Changed
+- **Collections**: Autofix validation now drops invalid collections (e.g., invalid GFQL ops) and non-string collection color fields instead of string-coercing them; warnings still emit when `warn=True`.
+- **Collections**: `collections(...)` now always canonicalizes to URL-encoded JSON (string inputs are parsed + re-encoded); the `encode` parameter was removed to avoid ambiguous behavior.
+
 ### Fixed
 - **Compute / hop**: Exact-hop traversals now prune branches that do not reach `min_hops`, avoid reapplying min-hop pruning in reverse passes, keep seeds in wavefront outputs, and reuse forward wavefronts when recomputing labels so edge/node hop labels stay aligned (fixes 3-hop branch inclusion issues and mislabeled slices).
 - **GFQL / chain**: Fixed `output_min_hops`/`output_max_hops` semantics to correctly slice output nodes/edges matching oracle behavior.
diff --git a/graphistry/Plottable.py b/graphistry/Plottable.py
index 271392f56f..c94da3295e 100644
--- a/graphistry/Plottable.py
+++ b/graphistry/Plottable.py
@@ -790,7 +790,6 @@ def collections(
         show_collections: Optional[bool] = None,
         collections_global_node_color: Optional[str] = None,
         collections_global_edge_color: Optional[str] = None,
-        encode: bool = True,
         validate: ValidationParam = 'autofix',
         warn: bool = True
     ) -> 'Plottable':
diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py
index 6ecb2b58f8..e90963f8d0 100644
--- a/graphistry/PlotterBase.py
+++ b/graphistry/PlotterBase.py
@@ -1827,7 +1827,8 @@ def graph(self, ig: Any) -> Plottable:
     def settings(self, height=None, url_params={}, render=None):
         """Specify iframe height and add URL parameter dictionary.
 
-        The library takes care of URI component encoding for the dictionary.
+        Collections URL params are normalized and URL-encoded at plot time; other
+        params should already be URL-safe.
 
         :param height: Height in pixels.
         :type height: int
@@ -1853,18 +1854,16 @@ def collections(
         show_collections: Optional[bool] = None,
         collections_global_node_color: Optional[str] = None,
         collections_global_edge_color: Optional[str] = None,
-        encode: bool = True,
         validate: ValidationParam = 'autofix',
         warn: bool = True
     ) -> 'Plottable':
         """Set collections URL parameters. Additive over previous settings.
 
-        :param collections: List/dict of collections or JSON/URL-encoded JSON string.
+        :param collections: List/dict of collections or JSON/URL-encoded JSON string (stored as URL-encoded JSON).
         :param show_collections: Toggle collections panel display.
         :param collections_global_node_color: Hex color for non-collection nodes (leading # stripped).
         :param collections_global_edge_color: Hex color for non-collection edges (leading # stripped).
-        :param encode: If True, JSON-minify and URL-encode collections. Use False for pre-encoded strings.
-        :param validate: Validation mode. 'autofix' (default) warns on issues, 'strict' raises on issues.
+        :param validate: Validation mode. 'autofix' (default) drops invalid collections and color fields with warnings, 'strict' raises on issues.
         :param warn: Whether to emit warnings when validate='autofix'. validate=False forces warn=False.
         """
         from graphistry.validate.validate_collections import (
@@ -1878,10 +1877,7 @@ def collections(
         settings: Dict[str, Any] = {}
         if collections is not None:
             normalized = normalize_collections(collections, validate=validate_mode, warn=warn)
-            if isinstance(collections, str) and not encode:
-                settings['collections'] = collections
-            else:
-                settings['collections'] = encode_collections(normalized, encode=encode)
+            settings['collections'] = encode_collections(normalized)
         extras: Dict[str, Any] = {}
         if show_collections is not None:
             extras['showCollections'] = show_collections
diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py
index 171cc5082c..aa01d1a2e3 100644
--- a/graphistry/pygraphistry.py
+++ b/graphistry/pygraphistry.py
@@ -2282,7 +2282,6 @@ def collections(
         show_collections: Optional[bool] = None,
         collections_global_node_color: Optional[str] = None,
         collections_global_edge_color: Optional[str] = None,
-        encode: bool = True,
         validate: ValidationParam = 'autofix',
         warn: bool = True
     ):
@@ -2291,7 +2290,6 @@ def collections(
             show_collections=show_collections,
             collections_global_node_color=collections_global_node_color,
             collections_global_edge_color=collections_global_edge_color,
-            encode=encode,
             validate=validate,
             warn=warn
         )
diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index 5516e795f0..d49b1f07d1 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -2,7 +2,7 @@
 
 import json
 import pytest
-from urllib.parse import quote, unquote
+from urllib.parse import unquote
 
 import graphistry
 from graphistry.collections import collection_intersection, collection_set
@@ -92,11 +92,17 @@ def test_collections_accepts_chain_and_preserves_dataset_id():
     assert g2._dataset_id == "dataset_123"
 
 
-def test_collections_encode_false_keeps_string():
+def test_collections_string_input_is_encoded():
     raw = '[{"type":"intersection","expr":{"type":"intersection","sets":["a"]}}]'
-    encoded = quote(raw, safe="")
-    url_params = collections_url_params(encoded, encode=False)
-    assert url_params["collections"] == encoded
+    url_params = collections_url_params(raw)
+    assert url_params["collections"].startswith("%5B")
+    decoded = decode_collections(url_params["collections"])
+    assert decoded == [
+        {
+            "type": "intersection",
+            "expr": {"type": "intersection", "sets": ["a"]},
+        }
+    ]
 
 
 def test_collections_accepts_wire_protocol_chain():
@@ -164,6 +170,35 @@ def test_collections_validation_strict_raises():
         graphistry.bind().collections(collections=bad_collections, validate="strict")
 
 
+def test_collections_autofix_drops_invalid_colors():
+    collections = [
+        {
+            "type": "set",
+            "expr": [graphistry.n({"vip": True})],
+            "node_color": 123,
+            "edge_color": {"bad": True},
+        }
+    ]
+    with pytest.warns(RuntimeWarning):
+        url_params = collections_url_params(collections, validate="autofix", warn=True)
+    decoded = decode_collections(url_params["collections"])
+    assert "node_color" not in decoded[0]
+    assert "edge_color" not in decoded[0]
+
+
+def test_collections_autofix_drops_invalid_gfql_ops():
+    collections = [
+        {
+            "type": "set",
+            "expr": [graphistry.n({"vip": True}), {"filter_dict": {"a": 1}}],
+        }
+    ]
+    with pytest.warns(RuntimeWarning):
+        url_params = collections_url_params(collections, validate="autofix", warn=True)
+    decoded = decode_collections(url_params["collections"])
+    assert decoded == []
+
+
 def test_plot_url_param_validation_autofix_warns():
     bad = '[{"type":"set","expr":[{"filter_dict":{"a":1}}]}]'
     with pytest.warns(RuntimeWarning):
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index d09fd0d88d..248a5160a3 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -6,7 +6,7 @@
 from graphistry.models.collections import CollectionsInput
 from graphistry.models.types import ValidationMode, ValidationParam
 from graphistry.util import warn as emit_warn
-_ALLOWED_COLLECTION_FIELDS = {
+_ALLOWED_COLLECTION_FIELDS_ORDER = (
     'type',
     'id',
     'name',
@@ -14,7 +14,8 @@
     'node_color',
     'edge_color',
     'expr',
-}
+)
+_ALLOWED_COLLECTION_FIELDS_SET = set(_ALLOWED_COLLECTION_FIELDS_ORDER)
 
 
 def normalize_validation_params(
@@ -106,6 +107,27 @@ def _coerce_str_field(
         entry[key] = str(entry[key])
 
 
+def _normalize_color_field(
+    entry: Dict[str, Any],
+    key: str,
+    validate_mode: ValidationMode,
+    warn: bool,
+    entry_index: int
+) -> None:
+    if key not in entry or entry[key] is None:
+        return
+    if isinstance(entry[key], str):
+        return
+    _issue(
+        f'Collection field "{key}" should be a string',
+        {'index': entry_index, 'value': entry[key], 'type': type(entry[key]).__name__},
+        validate_mode,
+        warn
+    )
+    if validate_mode == 'autofix':
+        entry.pop(key, None)
+
+
 def _normalize_sets_list(
     sets_value: Any,
     validate_mode: ValidationMode,
@@ -184,8 +206,6 @@ def _normalize_ops_list(raw: object) -> Optional[List[Dict[str, Any]]]:
             for op in raw:
                 normalized_op = _normalize_op(op)
                 if normalized_op is None:
-                    if validate_mode == 'autofix':
-                        continue
                     return None
                 normalized_ops.append(normalized_op)
             if len(normalized_ops) == 0:
@@ -300,7 +320,7 @@ def normalize_collections(
                 continue
             return []
 
-        unexpected_fields = [key for key in entry.keys() if key not in _ALLOWED_COLLECTION_FIELDS]
+        unexpected_fields = [key for key in entry.keys() if key not in _ALLOWED_COLLECTION_FIELDS_SET]
         if unexpected_fields:
             _issue(
                 'Unexpected fields in collection',
@@ -309,7 +329,7 @@ def normalize_collections(
                 warn
             )
 
-        normalized_entry = {key: entry[key] for key in _ALLOWED_COLLECTION_FIELDS if key in entry}
+        normalized_entry = {key: entry[key] for key in _ALLOWED_COLLECTION_FIELDS_ORDER if key in entry}
         collection_type = normalized_entry.get('type', 'set')
         if not isinstance(collection_type, str):
             _issue(
@@ -337,7 +357,10 @@ def normalize_collections(
             return []
 
         for field in ('id', 'name', 'description', 'node_color', 'edge_color'):
-            _coerce_str_field(normalized_entry, field, validate_mode, warn, idx)
+            if field in ('node_color', 'edge_color'):
+                _normalize_color_field(normalized_entry, field, validate_mode, warn, idx)
+            else:
+                _coerce_str_field(normalized_entry, field, validate_mode, warn, idx)
 
         expr = normalized_entry.get('expr')
         if collection_type == 'intersection':

From 41a33c4d962b8e42e10c3ee8aa1e8798743eb1bf Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Tue, 13 Jan 2026 01:34:36 -0800
Subject: [PATCH 19/30] refactor: simplify collections normalization

---
 graphistry/PlotterBase.py                   |  6 +--
 graphistry/validate/validate_collections.py | 51 ++++++++-------------
 2 files changed, 21 insertions(+), 36 deletions(-)

diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py
index e90963f8d0..de00f5c558 100644
--- a/graphistry/PlotterBase.py
+++ b/graphistry/PlotterBase.py
@@ -1870,13 +1870,11 @@ def collections(
             encode_collections,
             normalize_collections,
             normalize_collections_url_params,
-            normalize_validation_params,
         )
 
-        validate_mode, warn = normalize_validation_params(validate, warn)
         settings: Dict[str, Any] = {}
         if collections is not None:
-            normalized = normalize_collections(collections, validate=validate_mode, warn=warn)
+            normalized = normalize_collections(collections, validate=validate, warn=warn)
             settings['collections'] = encode_collections(normalized)
         extras: Dict[str, Any] = {}
         if show_collections is not None:
@@ -1886,7 +1884,7 @@ def collections(
         if collections_global_edge_color is not None:
             extras['collectionsGlobalEdgeColor'] = collections_global_edge_color
         if extras:
-            extras = normalize_collections_url_params(extras, validate=validate_mode, warn=warn)
+            extras = normalize_collections_url_params(extras, validate=validate, warn=warn)
             settings.update(extras)
 
         if len(settings.keys()) > 0:
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 248a5160a3..08cb634f11 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -32,9 +32,9 @@ def normalize_validation_params(
     return validate_mode, warn
 
 
-def encode_collections(collections: List[Dict[str, Any]], encode: bool = True) -> str:
+def encode_collections(collections: List[Dict[str, Any]]) -> str:
     json_str = json.dumps(collections, separators=(',', ':'), ensure_ascii=True)
-    return quote(json_str, safe='') if encode else json_str
+    return quote(json_str, safe='')
 
 
 def _issue(
@@ -86,33 +86,13 @@ def _parse_collections_input(
     return []
 
 
-def _coerce_str_field(
+def _normalize_str_field(
     entry: Dict[str, Any],
     key: str,
     validate_mode: ValidationMode,
     warn: bool,
-    entry_index: int
-) -> None:
-    if key not in entry or entry[key] is None:
-        return
-    if isinstance(entry[key], str):
-        return
-    _issue(
-        f'Collection field "{key}" should be a string',
-        {'index': entry_index, 'value': entry[key], 'type': type(entry[key]).__name__},
-        validate_mode,
-        warn
-    )
-    if validate_mode == 'autofix':
-        entry[key] = str(entry[key])
-
-
-def _normalize_color_field(
-    entry: Dict[str, Any],
-    key: str,
-    validate_mode: ValidationMode,
-    warn: bool,
-    entry_index: int
+    entry_index: int,
+    autofix_drop: bool
 ) -> None:
     if key not in entry or entry[key] is None:
         return
@@ -125,7 +105,10 @@ def _normalize_color_field(
         warn
     )
     if validate_mode == 'autofix':
-        entry.pop(key, None)
+        if autofix_drop:
+            entry.pop(key, None)
+        else:
+            entry[key] = str(entry[key])
 
 
 def _normalize_sets_list(
@@ -356,11 +339,10 @@ def normalize_collections(
                 continue
             return []
 
-        for field in ('id', 'name', 'description', 'node_color', 'edge_color'):
-            if field in ('node_color', 'edge_color'):
-                _normalize_color_field(normalized_entry, field, validate_mode, warn, idx)
-            else:
-                _coerce_str_field(normalized_entry, field, validate_mode, warn, idx)
+        for field in ('id', 'name', 'description'):
+            _normalize_str_field(normalized_entry, field, validate_mode, warn, idx, autofix_drop=False)
+        for field in ('node_color', 'edge_color'):
+            _normalize_str_field(normalized_entry, field, validate_mode, warn, idx, autofix_drop=True)
 
         expr = normalized_entry.get('expr')
         if collection_type == 'intersection':
@@ -372,6 +354,11 @@ def normalize_collections(
                 continue
             return []
         normalized_entry['expr'] = normalized_expr
+        normalized_entry = {
+            key: normalized_entry[key]
+            for key in _ALLOWED_COLLECTION_FIELDS_ORDER
+            if key in normalized_entry
+        }
         normalized.append(normalized_entry)
 
     return normalized
@@ -388,7 +375,7 @@ def normalize_collections_url_params(
     if 'collections' in updated:
         normalized = normalize_collections(updated['collections'], validate_mode, warn)
         if len(normalized) > 0:
-            updated['collections'] = encode_collections(normalized, encode=True)
+            updated['collections'] = encode_collections(normalized)
         else:
             if validate_mode in ('strict', 'strict-fast'):
                 return updated

From 0de69447f8cdf4368901c396c4a658f424ecd195 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Fri, 16 Jan 2026 12:20:00 -0800
Subject: [PATCH 20/30] chore: move collections notes to development

---
 CHANGELOG.md | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 286f5af884..7ff4aa23f4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,6 +8,17 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 ## [Development]
 <!-- Do Not Erase This Section - Used for tracking unreleased changes -->
 
+### Added
+- **Collections**: New `g.collections(...)` API for defining subsets via GFQL expressions with priority-based visual encodings. Includes helper constructors `graphistry.collection_set(...)` and `graphistry.collection_intersection(...)`, support for `showCollections`, `collectionsGlobalNodeColor`, and `collectionsGlobalEdgeColor` URL params, and automatic JSON encoding. Accepts GFQL AST, Chain objects, or wire-protocol dicts (#874).
+- **Docs / Collections**: Added collections usage guide in visualization/layout/settings, tutorial notebook (`demos/more_examples/graphistry_features/collections.ipynb`), and cross-references in 10-minute guides, cheatsheet, and GFQL docs (#875).
+
+### Changed
+- **Collections**: Autofix validation now drops invalid collections (e.g., invalid GFQL ops) and non-string collection color fields instead of string-coercing them; warnings still emit when `warn=True`.
+- **Collections**: `collections(...)` now always canonicalizes to URL-encoded JSON (string inputs are parsed + re-encoded); the `encode` parameter was removed to avoid ambiguous behavior.
+
+### Tests
+- **Collections**: Added `test_collections.py` covering encoding, GFQL Chain/AST normalization, wire-protocol acceptance, validation modes, and helper constructors.
+
 ## [0.50.4 - 2026-01-15]
 
 ### Fixed
@@ -56,10 +67,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 ### Performance
 - **GFQL / chain**: Optimized backward pass for simple single-hop edges by skipping full `hop()` call and using vectorized merge filtering instead (~50% faster on small graphs). Added `is_simple_single_hop()` method on `ASTEdge` for optimization eligibility checks.
 
-### Changed
-- **Collections**: Autofix validation now drops invalid collections (e.g., invalid GFQL ops) and non-string collection color fields instead of string-coercing them; warnings still emit when `warn=True`.
-- **Collections**: `collections(...)` now always canonicalizes to URL-encoded JSON (string inputs are parsed + re-encoded); the `encode` parameter was removed to avoid ambiguous behavior.
-
 ### Fixed
 - **Compute / hop**: Exact-hop traversals now prune branches that do not reach `min_hops`, avoid reapplying min-hop pruning in reverse passes, keep seeds in wavefront outputs, and reuse forward wavefronts when recomputing labels so edge/node hop labels stay aligned (fixes 3-hop branch inclusion issues and mislabeled slices).
 - **GFQL / chain**: Fixed `output_min_hops`/`output_max_hops` semantics to correctly slice output nodes/edges matching oracle behavior.

From 23d1f0020e951e5f48518933b07b05767f2b01a2 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Sat, 7 Feb 2026 19:22:10 -0800
Subject: [PATCH 21/30] fix: address PR #874 review comments for collections
 validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove dead code after return in _normalize_intersection_expr
- Add empty sets list validation in _normalize_sets_list
- Remove pointless type coercion (str() won't produce valid types)
- Consolidate GFQL parsing: _normalize_gfql_ops calls _wrap_gfql_expr
- Add cross-validation for intersection set ID references
- Require id field for sets (server requires it, no auto-generation)
- Use precise exception handling (TypeError, ValueError, GFQLValidationError)
- Update tests to include required id fields

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 graphistry/PlotterBase.py                   |   3 +-
 graphistry/tests/test_collections.py        |  28 +++-
 graphistry/validate/validate_collections.py | 177 +++++++++++---------
 3 files changed, 123 insertions(+), 85 deletions(-)

diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py
index de00f5c558..224fecc80c 100644
--- a/graphistry/PlotterBase.py
+++ b/graphistry/PlotterBase.py
@@ -2238,9 +2238,10 @@ def plot(
             'viztoken': str(uuid.uuid4())
         }
 
+        # Validate collections in url_params (catches bypass of .collections() method)
         from graphistry.validate.validate_collections import normalize_collections_url_params
-
         url_params = normalize_collections_url_params(self._url_params, validate=validate_mode, warn=warn)
+
         viz_url = self._pygraphistry._viz_url(info, url_params)
         cfg_client_protocol_hostname = self.session.client_protocol_hostname
         full_url = ('%s:%s' % (self.session.protocol, viz_url)) if cfg_client_protocol_hostname is None else viz_url
diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index d49b1f07d1..b4082661b6 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -78,11 +78,12 @@ def test_collection_helpers_build_sets_and_intersections():
 def test_collections_accepts_chain_and_preserves_dataset_id():
     node = graphistry.n({"type": "user"})
     chain = graphistry.Chain([node])
-    g2 = graphistry.bind(dataset_id="dataset_123").collections(collections={"type": "set", "expr": chain})
+    g2 = graphistry.bind(dataset_id="dataset_123").collections(collections={"type": "set", "id": "my_set", "expr": chain})
     decoded = decode_collections(g2._url_params["collections"])
     assert decoded == [
         {
             "type": "set",
+            "id": "my_set",
             "expr": {
                 "type": "gfql_chain",
                 "gfql": [node.to_json()],
@@ -93,11 +94,18 @@ def test_collections_accepts_chain_and_preserves_dataset_id():
 
 
 def test_collections_string_input_is_encoded():
-    raw = '[{"type":"intersection","expr":{"type":"intersection","sets":["a"]}}]'
+    # Include a set so the intersection has valid references
+    raw = '[{"type":"set","id":"a","expr":{"type":"gfql_chain","gfql":[{"type":"Node"}]}},{"type":"intersection","expr":{"type":"intersection","sets":["a"]}}]'
     url_params = collections_url_params(raw)
     assert url_params["collections"].startswith("%5B")
     decoded = decode_collections(url_params["collections"])
+    # Node normalizes to include filter_dict: {}
     assert decoded == [
+        {
+            "type": "set",
+            "id": "a",
+            "expr": {"type": "gfql_chain", "gfql": [{"type": "Node", "filter_dict": {}}]},
+        },
         {
             "type": "intersection",
             "expr": {"type": "intersection", "sets": ["a"]},
@@ -118,11 +126,12 @@ def test_collections_accepts_wire_protocol_chain():
         ]
     }
     decoded = decode_collections(
-        collections_url_params({"type": "set", "expr": chain_json})["collections"]
+        collections_url_params({"type": "set", "id": "users", "expr": chain_json})["collections"]
     )
     assert decoded == [
         {
             "type": "set",
+            "id": "users",
             "expr": {
                 "type": "gfql_chain",
                 "gfql": chain_json["chain"],
@@ -134,7 +143,7 @@ def test_collections_accepts_wire_protocol_chain():
 def test_collections_accepts_let_expr():
     dag = graphistry.let({"seed": graphistry.n({"type": "user"})})
     decoded = decode_collections(
-        collections_url_params({"type": "set", "expr": dag})["collections"]
+        collections_url_params({"type": "set", "id": "users", "expr": dag})["collections"]
     )
     assert decoded[0]["expr"]["type"] == "gfql_chain"
     assert decoded[0]["expr"]["gfql"][0]["type"] == "Let"
@@ -144,6 +153,7 @@ def test_collections_drop_unexpected_fields_autofix():
     collections = [
         {
             "type": "set",
+            "id": "vip_set",
             "expr": [graphistry.n({"vip": True})],
             "unexpected": "drop-me",
         }
@@ -165,7 +175,8 @@ def test_collections_show_collections_strict_raises():
 
 
 def test_collections_validation_strict_raises():
-    bad_collections = [{"type": "set", "expr": [{"filter_dict": {"a": 1}}]}]
+    # Missing 'type' field in GFQL op causes validation error
+    bad_collections = [{"type": "set", "id": "bad_set", "expr": [{"filter_dict": {"a": 1}}]}]
     with pytest.raises(ValueError):
         graphistry.bind().collections(collections=bad_collections, validate="strict")
 
@@ -174,6 +185,7 @@ def test_collections_autofix_drops_invalid_colors():
     collections = [
         {
             "type": "set",
+            "id": "vip_set",
             "expr": [graphistry.n({"vip": True})],
             "node_color": 123,
             "edge_color": {"bad": True},
@@ -187,16 +199,18 @@ def test_collections_autofix_drops_invalid_colors():
 
 
 def test_collections_autofix_drops_invalid_gfql_ops():
+    # Collection with invalid GFQL op (missing 'type' field) gets dropped in autofix
     collections = [
         {
             "type": "set",
+            "id": "bad_set",
             "expr": [graphistry.n({"vip": True}), {"filter_dict": {"a": 1}}],
         }
     ]
     with pytest.warns(RuntimeWarning):
         url_params = collections_url_params(collections, validate="autofix", warn=True)
-    decoded = decode_collections(url_params["collections"])
-    assert decoded == []
+    # Collection dropped due to invalid GFQL, so no collections key or empty
+    assert "collections" not in url_params or decode_collections(url_params["collections"]) == []
 
 
 def test_plot_url_param_validation_autofix_warns():
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 08cb634f11..ab8b2747af 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -3,6 +3,7 @@
 from urllib.parse import quote, unquote
 
 from graphistry.client_session import strtobool
+from graphistry.compute.exceptions import GFQLValidationError
 from graphistry.models.collections import CollectionsInput
 from graphistry.models.types import ValidationMode, ValidationParam
 from graphistry.util import warn as emit_warn
@@ -138,6 +139,16 @@ def _normalize_sets_list(
         )
         if validate_mode == 'autofix':
             out.append(str(set_id))
+
+    if len(out) == 0:
+        _issue(
+            'Intersection sets list cannot be empty',
+            {'index': entry_index},
+            validate_mode,
+            warn
+        )
+        return None
+
     return out
 
 
@@ -147,12 +158,17 @@ def _normalize_gfql_ops(
     warn: bool,
     entry_index: int
 ) -> Optional[List[Dict[str, Any]]]:
-    from graphistry.compute.ast import ASTObject, from_json as ast_from_json
-    from graphistry.compute.chain import Chain
+    """
+    Normalize GFQL operations to a list of JSON-serializable dicts.
 
+    Uses _wrap_gfql_expr from collections.py as the canonical implementation,
+    wrapping with error handling for validation modes.
+    """
     if gfql_ops is None:
         _issue('GFQL chain is missing', {'index': entry_index}, validate_mode, warn)
         return None
+
+    # Handle JSON string input
     if isinstance(gfql_ops, str):
         try:
             gfql_ops = json.loads(gfql_ops)
@@ -160,84 +176,29 @@ def _normalize_gfql_ops(
             _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn)
             return None
 
-    def _normalize_op(op: object) -> Optional[Dict[str, Any]]:
-        if isinstance(op, ASTObject):
-            return op.to_json()
-        if isinstance(op, dict):
-            try:
-                parsed = ast_from_json(op, validate=True)
-            except Exception as exc:
-                _issue(
-                    'Invalid GFQL operation in collection',
-                    {'index': entry_index, 'op': op, 'error': str(exc)},
-                    validate_mode,
-                    warn
-                )
-                return None
-            return parsed.to_json()
-        _issue(
-            'GFQL operations must be AST objects or dicts',
-            {'index': entry_index, 'value': op, 'type': type(op).__name__},
-            validate_mode,
-            warn
-        )
-        return None
-
-    def _normalize_ops_list(raw: object) -> Optional[List[Dict[str, Any]]]:
-        if isinstance(raw, list):
-            normalized_ops: List[Dict[str, Any]] = []
-            for op in raw:
-                normalized_op = _normalize_op(op)
-                if normalized_op is None:
-                    return None
-                normalized_ops.append(normalized_op)
-            if len(normalized_ops) == 0:
-                _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn)
-                return None
-            return normalized_ops
-        if isinstance(raw, (ASTObject, dict)):
-            normalized_op = _normalize_op(raw)
-            if normalized_op is None:
-                return None
-            return [normalized_op]
+    # Use canonical implementation from collections.py
+    try:
+        from graphistry.collections import _wrap_gfql_expr
+        result = _wrap_gfql_expr(gfql_ops)
+        ops_raw = result.get('gfql', [])
+        if not isinstance(ops_raw, list):
+            _issue('GFQL chain must be a list', {'index': entry_index}, validate_mode, warn)
+            return None
+        ops: List[Dict[str, Any]] = ops_raw
+        if len(ops) == 0:
+            _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn)
+            return None
+        return ops
+    except (TypeError, ValueError, GFQLValidationError) as exc:
+        # Precise exception handling for GFQL parsing errors
         _issue(
-            'GFQL operations must be a list, AST object, or dict',
-            {'index': entry_index, 'value': raw, 'type': type(raw).__name__},
+            'Invalid GFQL operation in collection',
+            {'index': entry_index, 'error': str(exc)},
             validate_mode,
             warn
         )
         return None
 
-    def _extract_ops_value(raw: object) -> object:
-        if isinstance(raw, Chain):
-            return raw.chain
-        if isinstance(raw, ASTObject):
-            return raw
-        if isinstance(raw, dict):
-            if raw.get('type') == 'gfql_chain' and 'gfql' in raw:
-                return raw.get('gfql')
-            if raw.get('type') == 'Chain' and 'chain' in raw:
-                return raw.get('chain')
-            if 'gfql' in raw:
-                return raw.get('gfql')
-            if 'chain' in raw:
-                return raw.get('chain')
-            return raw
-        if isinstance(raw, list):
-            return raw
-        return raw
-
-    return _normalize_ops_list(_extract_ops_value(gfql_ops))
-
-    _issue(
-        'GFQL operations must be a Chain, AST object, list, or dict',
-        {'index': entry_index, 'type': type(gfql_ops).__name__},
-        validate_mode,
-        warn
-    )
-    return None
-
-
 
 def _normalize_gfql_expr(
     expr: Any,
@@ -321,12 +282,12 @@ def normalize_collections(
                 validate_mode,
                 warn
             )
+            # str() coercion is pointless - it won't produce 'set' or 'intersection'
+            # so we skip this entry in autofix mode, or fail in strict mode
             if validate_mode == 'autofix':
-                collection_type = str(collection_type)
-            else:
                 continue
+            return []
         collection_type = collection_type.lower()
-        normalized_entry['type'] = collection_type
 
         if collection_type not in ('set', 'intersection'):
             _issue(
@@ -339,11 +300,30 @@ def normalize_collections(
                 continue
             return []
 
+        normalized_entry['type'] = collection_type
+
         for field in ('id', 'name', 'description'):
             _normalize_str_field(normalized_entry, field, validate_mode, warn, idx, autofix_drop=False)
         for field in ('node_color', 'edge_color'):
             _normalize_str_field(normalized_entry, field, validate_mode, warn, idx, autofix_drop=True)
 
+        # Validate id field - required for sets (server requires it, and intersections reference by ID)
+        # Note: We warn but don't auto-generate IDs - user must provide meaningful IDs
+        if collection_type == 'set':
+            if 'id' not in normalized_entry or normalized_entry.get('id') is None:
+                _issue(
+                    'Set collection requires an id field (server requires it for subgraph storage)',
+                    {'index': idx},
+                    validate_mode,
+                    warn
+                )
+                # In autofix mode, skip this collection rather than generate arbitrary IDs
+                # User should provide meaningful IDs they control
+                if validate_mode == 'autofix':
+                    continue
+                else:
+                    continue
+
         expr = normalized_entry.get('expr')
         if collection_type == 'intersection':
             normalized_expr = _normalize_intersection_expr(expr, validate_mode, warn, idx)
@@ -361,9 +341,52 @@ def normalize_collections(
         }
         normalized.append(normalized_entry)
 
+    # Cross-validate intersection set references
+    normalized = _validate_intersection_references(normalized, validate_mode, warn)
+
     return normalized
 
 
+def _validate_intersection_references(
+    collections: List[Dict[str, Any]],
+    validate_mode: ValidationMode,
+    warn: bool
+) -> List[Dict[str, Any]]:
+    """
+    Validate that intersection set IDs reference actual collection IDs.
+
+    Dangling references (e.g., sets: ["nonexistent"]) cause backend errors.
+    In strict mode, raise on first issue. In autofix mode, drop invalid intersections.
+    """
+    # Collect all set IDs (only from 'set' type collections, not intersections)
+    set_ids = {
+        c.get('id')
+        for c in collections
+        if c.get('type') == 'set' and c.get('id')
+    }
+
+    valid_collections: List[Dict[str, Any]] = []
+    for idx, collection in enumerate(collections):
+        if collection.get('type') == 'intersection':
+            expr = collection.get('expr', {})
+            referenced_sets = expr.get('sets', [])
+            missing = [sid for sid in referenced_sets if sid not in set_ids]
+            if missing:
+                _issue(
+                    'Intersection references non-existent set IDs',
+                    {'index': idx, 'missing_sets': missing, 'available_sets': list(set_ids)},
+                    validate_mode,
+                    warn
+                )
+                if validate_mode == 'autofix':
+                    continue  # Drop invalid intersection
+                # In strict mode, we already raised in _issue
+                return []
+        valid_collections.append(collection)
+
+    return valid_collections
+
+
 def normalize_collections_url_params(
     url_params: Dict[str, Any],
     validate: ValidationParam = 'autofix',

From 00ec4f8571dca12009dc82d0591cf456a06385aa Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Sat, 7 Feb 2026 20:13:27 -0800
Subject: [PATCH 22/30] docs: update CHANGELOG with collections review fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Set collections require id field (no auto-generation)
- Intersection cross-validation for set ID references
- GFQL parsing consolidation with precise exception handling

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e8a7c99770..f79e60e188 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 ### Changed
 - **Collections**: Autofix validation now drops invalid collections (e.g., invalid GFQL ops) and non-string collection color fields instead of string-coercing them; warnings still emit when `warn=True`.
 - **Collections**: `collections(...)` now always canonicalizes to URL-encoded JSON (string inputs are parsed + re-encoded); the `encode` parameter was removed to avoid ambiguous behavior.
+- **Collections**: Set collections now require an `id` field (server requires it for subgraph storage); missing IDs are warned and dropped in autofix mode rather than auto-generated.
+- **Collections**: Intersection collections now cross-validate that referenced set IDs exist; dangling references are warned and dropped in autofix mode.
+- **Collections**: GFQL parsing consolidated to use `_wrap_gfql_expr` from `collections.py` as the canonical implementation with precise exception handling.
 
 ### Tests
 - **Collections**: Added `test_collections.py` covering encoding, GFQL Chain/AST normalization, wire-protocol acceptance, validation modes, and helper constructors.

From d2bdf02ee0ec60c183ca5efbfb561e056127ab55 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Sat, 7 Feb 2026 20:29:33 -0800
Subject: [PATCH 23/30] refactor: inline _coerce_collection_list into
 _parse_collections_input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address review comment #2683393920 - removes unnecessary helper function.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 graphistry/validate/validate_collections.py | 26 ++++++++-------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index ab8b2747af..73ca6b3875 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -51,28 +51,16 @@ def _issue(
         emit_warn(f"Collections validation warning: {message} ({data})")
 
 
-def _coerce_collection_list(
-    parsed: Any,
-    validate_mode: ValidationMode,
-    warn: bool
-) -> List[Dict[str, Any]]:
-    if isinstance(parsed, list):
-        return parsed
-    if isinstance(parsed, dict):
-        return [parsed]
-    _issue('Collections JSON must be a list or dict', {'type': type(parsed).__name__}, validate_mode, warn)
-    return []
-
-
 def _parse_collections_input(
     collections: CollectionsInput,
     validate_mode: ValidationMode,
     warn: bool
 ) -> List[Dict[str, Any]]:
+    """Parse collections input to a list of dicts, handling list/dict/JSON string inputs."""
     if isinstance(collections, list):
-        return _coerce_collection_list(collections, validate_mode, warn)
+        return collections
     if isinstance(collections, dict):
-        return _coerce_collection_list(collections, validate_mode, warn)
+        return [collections]
     if isinstance(collections, str):
         try:
             parsed = json.loads(collections)
@@ -82,7 +70,13 @@ def _parse_collections_input(
             except json.JSONDecodeError as exc:
                 _issue('Collections string must be JSON or URL-encoded JSON', {'error': str(exc)}, validate_mode, warn)
                 return []
-        return _coerce_collection_list(parsed, validate_mode, warn)
+        # Coerce parsed JSON to list
+        if isinstance(parsed, list):
+            return parsed
+        if isinstance(parsed, dict):
+            return [parsed]
+        _issue('Collections JSON must be a list or dict', {'type': type(parsed).__name__}, validate_mode, warn)
+        return []
     _issue('Collections must be a list, dict, or JSON string', {'type': type(collections).__name__}, validate_mode, warn)
     return []
 

From 47d87e86dcf0108647e9e7c754eec56c28faffef Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Sat, 7 Feb 2026 21:24:45 -0800
Subject: [PATCH 24/30] fix: add type casts for mypy after inlining
 _coerce_collection_list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CollectionsInput includes TypedDicts which need to be cast to Dict[str, Any].

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 graphistry/validate/validate_collections.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 73ca6b3875..71dd111d9f 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -4,7 +4,7 @@
 
 from graphistry.client_session import strtobool
 from graphistry.compute.exceptions import GFQLValidationError
-from graphistry.models.collections import CollectionsInput
+from graphistry.models.collections import Collection, CollectionsInput
 from graphistry.models.types import ValidationMode, ValidationParam
 from graphistry.util import warn as emit_warn
 _ALLOWED_COLLECTION_FIELDS_ORDER = (
@@ -55,7 +55,7 @@ def _parse_collections_input(
     collections: CollectionsInput,
     validate_mode: ValidationMode,
     warn: bool
-) -> List[Dict[str, Any]]:
+) -> Union[List[Dict[str, Any]], List[Collection]]:
     """Parse collections input to a list of dicts, handling list/dict/JSON string inputs."""
     if isinstance(collections, list):
         return collections
@@ -258,7 +258,10 @@ def normalize_collections(
                 continue
             return []
 
-        unexpected_fields = [key for key in entry.keys() if key not in _ALLOWED_COLLECTION_FIELDS_SET]
+        # Convert to plain dict for uniform handling (TypedDicts become regular dicts)
+        entry_dict: Dict[str, Any] = dict(entry)
+
+        unexpected_fields = [key for key in entry_dict.keys() if key not in _ALLOWED_COLLECTION_FIELDS_SET]
         if unexpected_fields:
             _issue(
                 'Unexpected fields in collection',
@@ -267,7 +270,7 @@ def normalize_collections(
                 warn
             )
 
-        normalized_entry = {key: entry[key] for key in _ALLOWED_COLLECTION_FIELDS_ORDER if key in entry}
+        normalized_entry = {key: entry_dict[key] for key in _ALLOWED_COLLECTION_FIELDS_ORDER if key in entry_dict}
         collection_type = normalized_entry.get('type', 'set')
         if not isinstance(collection_type, str):
             _issue(

From 2b5fa78b18419ba2aa34273cc1abfb220add32a5 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Sat, 7 Feb 2026 23:43:25 -0800
Subject: [PATCH 25/30] fix: auto-generate collection IDs in autofix mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Server requires IDs for all collection types (used as storage keys).
In autofix mode, generate `{type}_{idx}` IDs when missing instead of
dropping the collection. This makes simple use cases "just work" while
still warning about the missing ID.

- Sets get `set_0`, `set_1`, etc.
- Intersections get `intersection_0`, `intersection_1`, etc.
- Strict mode still rejects missing IDs
- Added test for auto-generation behavior

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 graphistry/tests/test_collections.py        | 18 +++++++++++--
 graphistry/validate/validate_collections.py | 29 +++++++++------------
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index b4082661b6..521983dd03 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -67,7 +67,7 @@ def test_collection_set_wraps_ast_expr(expr):
 def test_collection_helpers_build_sets_and_intersections():
     collections = [
         collection_set(expr=[graphistry.n({"vip": True})], id="vip", name="VIP", node_color="#FFAA00"),
-        collection_intersection(sets=["vip"], name="VIP Intersection", node_color="#00BFFF"),
+        collection_intersection(sets=["vip"], id="vip_intersection", name="VIP Intersection", node_color="#00BFFF"),
     ]
     decoded = decode_collections(collections_url_params(collections)["collections"])
     assert decoded[0]["type"] == "set"
@@ -95,7 +95,7 @@ def test_collections_accepts_chain_and_preserves_dataset_id():
 
 def test_collections_string_input_is_encoded():
     # Include a set so the intersection has valid references
-    raw = '[{"type":"set","id":"a","expr":{"type":"gfql_chain","gfql":[{"type":"Node"}]}},{"type":"intersection","expr":{"type":"intersection","sets":["a"]}}]'
+    raw = '[{"type":"set","id":"a","expr":{"type":"gfql_chain","gfql":[{"type":"Node"}]}},{"type":"intersection","id":"b","expr":{"type":"intersection","sets":["a"]}}]'
     url_params = collections_url_params(raw)
     assert url_params["collections"].startswith("%5B")
     decoded = decode_collections(url_params["collections"])
@@ -108,6 +108,7 @@ def test_collections_string_input_is_encoded():
         },
         {
             "type": "intersection",
+            "id": "b",
             "expr": {"type": "intersection", "sets": ["a"]},
         }
     ]
@@ -218,3 +219,16 @@ def test_plot_url_param_validation_autofix_warns():
     with pytest.warns(RuntimeWarning):
         normalized = normalize_collections_url_params({"collections": bad}, validate="autofix", warn=True)
     assert "collections" not in normalized or normalized["collections"].startswith("%5B")
+
+
+def test_collections_autofix_generates_missing_ids():
+    # Collections without IDs get auto-generated IDs in autofix mode
+    collections = [
+        {"type": "set", "expr": [graphistry.n({"a": 1})]},
+        {"type": "intersection", "expr": {"type": "intersection", "sets": ["set_0"]}},
+    ]
+    with pytest.warns(RuntimeWarning):
+        url_params = collections_url_params(collections, validate="autofix", warn=True)
+    decoded = decode_collections(url_params["collections"])
+    assert decoded[0]["id"] == "set_0"
+    assert decoded[1]["id"] == "intersection_1"
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 71dd111d9f..8d28eb323c 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -304,22 +304,19 @@ def normalize_collections(
         for field in ('node_color', 'edge_color'):
             _normalize_str_field(normalized_entry, field, validate_mode, warn, idx, autofix_drop=True)
 
-        # Validate id field - required for sets (server requires it, and intersections reference by ID)
-        # Note: We warn but don't auto-generate IDs - user must provide meaningful IDs
-        if collection_type == 'set':
-            if 'id' not in normalized_entry or normalized_entry.get('id') is None:
-                _issue(
-                    'Set collection requires an id field (server requires it for subgraph storage)',
-                    {'index': idx},
-                    validate_mode,
-                    warn
-                )
-                # In autofix mode, skip this collection rather than generate arbitrary IDs
-                # User should provide meaningful IDs they control
-                if validate_mode == 'autofix':
-                    continue
-                else:
-                    continue
+        # Validate id field - required by server for all collection types (used as storage key)
+        if 'id' not in normalized_entry or normalized_entry.get('id') is None:
+            _issue(
+                f'{collection_type.capitalize()} collection missing id field (server requires it for storage)',
+                {'index': idx},
+                validate_mode,
+                warn
+            )
+            if validate_mode == 'autofix':
+                # Auto-generate ID so collection still works
+                normalized_entry['id'] = f'{collection_type}_{idx}'
+            else:
+                continue
 
         expr = normalized_entry.get('expr')
         if collection_type == 'intersection':

From 1ab2e827cb2c82146b6d1e3512d3d3ad84fc4b99 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Sun, 8 Feb 2026 04:25:53 -0800
Subject: [PATCH 26/30] style: use kebab-case for auto-generated collection IDs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change from `set_0` to `set-0` for consistency with other ID patterns
in the codebase (e.g., kepler's `dataset-{uuid[:8]}`).

User-provided IDs can be any string - no validation beyond type check.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 graphistry/tests/test_collections.py        | 8 ++++----
 graphistry/validate/validate_collections.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index 521983dd03..1f94491b32 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -222,13 +222,13 @@ def test_plot_url_param_validation_autofix_warns():
 
 
 def test_collections_autofix_generates_missing_ids():
-    # Collections without IDs get auto-generated IDs in autofix mode
+    # Collections without IDs get auto-generated IDs in autofix mode (kebab-case)
     collections = [
         {"type": "set", "expr": [graphistry.n({"a": 1})]},
-        {"type": "intersection", "expr": {"type": "intersection", "sets": ["set_0"]}},
+        {"type": "intersection", "expr": {"type": "intersection", "sets": ["set-0"]}},
     ]
     with pytest.warns(RuntimeWarning):
         url_params = collections_url_params(collections, validate="autofix", warn=True)
     decoded = decode_collections(url_params["collections"])
-    assert decoded[0]["id"] == "set_0"
-    assert decoded[1]["id"] == "intersection_1"
+    assert decoded[0]["id"] == "set-0"
+    assert decoded[1]["id"] == "intersection-1"
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 8d28eb323c..5f0873f5ec 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -314,7 +314,7 @@ def normalize_collections(
             )
             if validate_mode == 'autofix':
                 # Auto-generate ID so collection still works
-                normalized_entry['id'] = f'{collection_type}_{idx}'
+                normalized_entry['id'] = f'{collection_type}-{idx}'
             else:
                 continue
 

From 3d74b1e96c5d6ae57b3027b45c46cf66305509f4 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Sun, 8 Feb 2026 18:08:15 -0800
Subject: [PATCH 27/30] refactor: move GFQL normalization to compute/ast.py
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add normalize_gfql_to_wire() as canonical GFQL→wire implementation
- Simplify collections.py (removed 36 lines of duplicate logic)
- Simplify validate_collections.py to call compute/ast
- Clean import graph: models → compute → helpers/validation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 graphistry/collections.py                   | 42 ++--------------
 graphistry/compute/ast.py                   | 54 +++++++++++++++++++++
 graphistry/validate/validate_collections.py | 21 +++-----
 3 files changed, 63 insertions(+), 54 deletions(-)

diff --git a/graphistry/collections.py b/graphistry/collections.py
index 1e8a5b0166..a133b746ab 100644
--- a/graphistry/collections.py
+++ b/graphistry/collections.py
@@ -1,11 +1,10 @@
-from typing import Dict, List, Optional, Sequence, TypeVar
+from typing import Optional, Sequence, TypeVar
 
 from graphistry.models.collections import (
     CollectionIntersection,
     CollectionExprInput,
     CollectionSet,
 )
-from graphistry.utils.json import JSONVal
 
 CollectionDict = TypeVar("CollectionDict", CollectionSet, CollectionIntersection)
 
@@ -29,42 +28,6 @@ def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[
     return collection
 
 
-def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]:
-
-    from graphistry.compute.ast import ASTObject, from_json as ast_from_json
-    from graphistry.compute.chain import Chain
-
-    def _normalize_op(op: object) -> Dict[str, JSONVal]:
-        if isinstance(op, ASTObject):
-            return op.to_json()
-        if isinstance(op, dict):
-            return ast_from_json(op, validate=True).to_json()
-        raise TypeError("Collection GFQL operations must be AST objects or dictionaries")
-
-    def _normalize_ops(raw: object) -> List[Dict[str, JSONVal]]:
-        if isinstance(raw, Chain):
-            return _normalize_ops(raw.to_json().get("chain", []))
-        if isinstance(raw, ASTObject):
-            return [raw.to_json()]
-        if isinstance(raw, list):
-            if len(raw) == 0:
-                raise ValueError("Collection GFQL operations list cannot be empty")
-            return [_normalize_op(op) for op in raw]
-        if isinstance(raw, dict):
-            if raw.get("type") == "Chain" and "chain" in raw:
-                return _normalize_ops(raw.get("chain"))
-            if raw.get("type") == "gfql_chain" and "gfql" in raw:
-                return _normalize_ops(raw.get("gfql"))
-            if "chain" in raw:
-                return _normalize_ops(raw.get("chain"))
-            if "gfql" in raw:
-                return _normalize_ops(raw.get("gfql"))
-            return [_normalize_op(raw)]
-        raise TypeError("Collection expr must be an AST object, chain, list, or dict")
-
-    return {"type": "gfql_chain", "gfql": _normalize_ops(expr)}
-
-
 def collection_set(
     *,
     expr: CollectionExprInput,
@@ -75,7 +38,8 @@ def collection_set(
     edge_color: Optional[str] = None,
 ) -> CollectionSet:
     """Build a collection dict for a GFQL-defined set."""
-    collection: CollectionSet = {"type": "set", "expr": _wrap_gfql_expr(expr)}
+    from graphistry.compute.ast import normalize_gfql_to_wire
+    collection: CollectionSet = {"type": "set", "expr": {"type": "gfql_chain", "gfql": normalize_gfql_to_wire(expr)}}
     return _apply_collection_metadata(
         collection,
         id=id,
diff --git a/graphistry/compute/ast.py b/graphistry/compute/ast.py
index dd024c121d..6d3c661879 100644
--- a/graphistry/compute/ast.py
+++ b/graphistry/compute/ast.py
@@ -1508,6 +1508,60 @@ def from_json(o: JSONVal, validate: bool = True) -> Union[ASTNode, ASTEdge, ASTL
     return out
 
 
+def normalize_gfql_to_wire(expr: Any) -> List[Dict[str, JSONVal]]:
+    """
+    Normalize GFQL expression to wire format (list of JSON-serializable dicts).
+
+    Accepts:
+    - Chain object
+    - Single ASTObject
+    - List of ASTObjects
+    - Dict with 'type': 'Chain' and 'chain' key
+    - Dict with 'type': 'gfql_chain' and 'gfql' key
+    - Dict with just 'chain' or 'gfql' key
+    - Single dict (parsed as AST op)
+
+    Returns:
+    - List of JSON-serializable dicts ready for wire protocol
+
+    Raises:
+    - TypeError: if expr type is not supported
+    - ValueError: if expr is empty
+    - GFQLSyntaxError: if dict cannot be parsed as valid AST
+    """
+    from graphistry.compute.chain import Chain
+
+    def _normalize_op(op: object) -> Dict[str, JSONVal]:
+        if isinstance(op, ASTObject):
+            return op.to_json()
+        if isinstance(op, dict):
+            return from_json(op, validate=True).to_json()
+        raise TypeError("GFQL operations must be AST objects or dictionaries")
+
+    def _normalize_ops(raw: object) -> List[Dict[str, JSONVal]]:
+        if isinstance(raw, Chain):
+            return _normalize_ops(raw.to_json().get("chain", []))
+        if isinstance(raw, ASTObject):
+            return [raw.to_json()]
+        if isinstance(raw, list):
+            if len(raw) == 0:
+                raise ValueError("GFQL operations list cannot be empty")
+            return [_normalize_op(op) for op in raw]
+        if isinstance(raw, dict):
+            if raw.get("type") == "Chain" and "chain" in raw:
+                return _normalize_ops(raw.get("chain"))
+            if raw.get("type") == "gfql_chain" and "gfql" in raw:
+                return _normalize_ops(raw.get("gfql"))
+            if "chain" in raw:
+                return _normalize_ops(raw.get("chain"))
+            if "gfql" in raw:
+                return _normalize_ops(raw.get("gfql"))
+            return [_normalize_op(raw)]
+        raise TypeError("GFQL expr must be Chain, ASTObject, list, or dict")
+
+    return _normalize_ops(expr)
+
+
 ###############################################################################
 # User-friendly aliases for public API
 
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index 5f0873f5ec..d5e67f7f28 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -3,7 +3,7 @@
 from urllib.parse import quote, unquote
 
 from graphistry.client_session import strtobool
-from graphistry.compute.exceptions import GFQLValidationError
+from graphistry.compute.exceptions import GFQLSyntaxError, GFQLValidationError
 from graphistry.models.collections import Collection, CollectionsInput
 from graphistry.models.types import ValidationMode, ValidationParam
 from graphistry.util import warn as emit_warn
@@ -155,7 +155,7 @@ def _normalize_gfql_ops(
     """
     Normalize GFQL operations to a list of JSON-serializable dicts.
 
-    Uses _wrap_gfql_expr from collections.py as the canonical implementation,
+    Uses normalize_gfql_to_wire from compute/ast.py as the canonical implementation,
     wrapping with error handling for validation modes.
     """
     if gfql_ops is None:
@@ -170,20 +170,11 @@ def _normalize_gfql_ops(
             _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn)
             return None
 
-    # Use canonical implementation from collections.py
+    # Use canonical implementation from compute/ast.py
     try:
-        from graphistry.collections import _wrap_gfql_expr
-        result = _wrap_gfql_expr(gfql_ops)
-        ops_raw = result.get('gfql', [])
-        if not isinstance(ops_raw, list):
-            _issue('GFQL chain must be a list', {'index': entry_index}, validate_mode, warn)
-            return None
-        ops: List[Dict[str, Any]] = ops_raw
-        if len(ops) == 0:
-            _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn)
-            return None
-        return ops
-    except (TypeError, ValueError, GFQLValidationError) as exc:
+        from graphistry.compute.ast import normalize_gfql_to_wire
+        return normalize_gfql_to_wire(gfql_ops)
+    except (TypeError, ValueError, GFQLValidationError, GFQLSyntaxError) as exc:
         # Precise exception handling for GFQL parsing errors
         _issue(
             'Invalid GFQL operation in collection',

From 1f0c38797758a3b985ad90d8aa8aca55178de7ed Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Tue, 10 Feb 2026 15:36:29 -0800
Subject: [PATCH 28/30] feat: validate intersection DAG (cycles, self-refs,
 intersections-of-intersections)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Allow intersections to reference other intersections (backend supports this)
- Detect self-references (intersection referencing itself)
- Detect cycles in intersection dependencies (A->B->A)
- Add tests for DAG validation

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 graphistry/tests/test_collections.py        | 51 ++++++++++++
 graphistry/validate/validate_collections.py | 88 +++++++++++++++++----
 2 files changed, 125 insertions(+), 14 deletions(-)

diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index 1f94491b32..a9f66da654 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -232,3 +232,54 @@ def test_collections_autofix_generates_missing_ids():
     decoded = decode_collections(url_params["collections"])
     assert decoded[0]["id"] == "set-0"
     assert decoded[1]["id"] == "intersection-1"
+
+
+def test_collections_intersection_of_intersections():
+    # Backend supports intersections-of-intersections (DAG structure)
+    collections = [
+        {"type": "set", "id": "set_a", "expr": [graphistry.n({"a": 1})]},
+        {"type": "set", "id": "set_b", "expr": [graphistry.n({"b": 1})]},
+        {"type": "intersection", "id": "inter_ab", "expr": {"type": "intersection", "sets": ["set_a", "set_b"]}},
+        {"type": "intersection", "id": "inter_of_inter", "expr": {"type": "intersection", "sets": ["set_a", "inter_ab"]}},
+    ]
+    url_params = collections_url_params(collections)
+    decoded = decode_collections(url_params["collections"])
+    assert len(decoded) == 4
+    assert decoded[3]["id"] == "inter_of_inter"
+    assert decoded[3]["expr"]["sets"] == ["set_a", "inter_ab"]
+
+
+def test_collections_intersection_self_reference_rejected():
+    # Intersection cannot reference itself
+    collections = [
+        {"type": "set", "id": "set_a", "expr": [graphistry.n({"a": 1})]},
+        {"type": "intersection", "id": "bad_inter", "expr": {"type": "intersection", "sets": ["set_a", "bad_inter"]}},
+    ]
+    with pytest.raises(ValueError):
+        collections_url_params(collections, validate="strict")
+
+
+def test_collections_intersection_cycle_rejected():
+    # Cycles in intersection DAG are rejected
+    collections = [
+        {"type": "set", "id": "set_a", "expr": [graphistry.n({"a": 1})]},
+        {"type": "intersection", "id": "inter_a", "expr": {"type": "intersection", "sets": ["set_a", "inter_b"]}},
+        {"type": "intersection", "id": "inter_b", "expr": {"type": "intersection", "sets": ["set_a", "inter_a"]}},
+    ]
+    with pytest.raises(ValueError):
+        collections_url_params(collections, validate="strict")
+
+
+def test_collections_intersection_cycle_autofix_drops():
+    # In autofix mode, cyclic intersections are dropped
+    collections = [
+        {"type": "set", "id": "set_a", "expr": [graphistry.n({"a": 1})]},
+        {"type": "intersection", "id": "inter_a", "expr": {"type": "intersection", "sets": ["set_a", "inter_b"]}},
+        {"type": "intersection", "id": "inter_b", "expr": {"type": "intersection", "sets": ["set_a", "inter_a"]}},
+    ]
+    with pytest.warns(RuntimeWarning):
+        url_params = collections_url_params(collections, validate="autofix", warn=True)
+    decoded = decode_collections(url_params["collections"])
+    # Both cyclic intersections dropped, only set remains
+    assert len(decoded) == 1
+    assert decoded[0]["id"] == "set_a"
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index d5e67f7f28..b7a282cf10 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -338,35 +338,95 @@ def _validate_intersection_references(
     warn: bool
 ) -> List[Dict[str, Any]]:
     """
-    Validate that intersection set IDs reference actual collection IDs.
+    Validate intersection references form a valid DAG.
 
-    Dangling references (e.g., sets: ["nonexistent"]) cause backend errors.
+    Checks:
+    1. All referenced IDs exist as 'set' or 'intersection' collections
+    2. No self-references (intersection referencing itself)
+    3. No cycles (A->B->A)
+
+    Dangling/cyclic references cause backend errors ("Infinite loop detected").
     In strict mode, raise on first issue. In autofix mode, drop invalid intersections.
     """
-    # Collect all set IDs (only from 'set' type collections, not intersections)
-    set_ids = {
-        c.get('id')
-        for c in collections
-        if c.get('type') == 'set' and c.get('id')
-    }
+    # Build ID -> collection type mapping for valid reference targets
+    collection_ids: Dict[str, str] = {}
+    for c in collections:
+        cid = c.get('id')
+        ctype = c.get('type')
+        if cid and ctype in ('set', 'intersection'):
+            collection_ids[cid] = ctype
+
+    # Build dependency graph for cycle detection
+    # intersection_id -> set of IDs it references
+    dependencies: Dict[str, List[str]] = {}
+    for c in collections:
+        if c.get('type') == 'intersection' and c.get('id'):
+            expr = c.get('expr', {})
+            dependencies[c['id']] = expr.get('sets', [])
+
+    def has_cycle(start_id: str, visited: set, path: set) -> bool:
+        """DFS cycle detection."""
+        if start_id in path:
+            return True
+        if start_id not in dependencies:
+            return False  # It's a set, not an intersection - no further deps
+        if start_id in visited:
+            return False
+
+        visited.add(start_id)
+        path.add(start_id)
+
+        for dep_id in dependencies.get(start_id, []):
+            if has_cycle(dep_id, visited, path):
+                return True
+
+        path.remove(start_id)
+        return False
 
     valid_collections: List[Dict[str, Any]] = []
     for idx, collection in enumerate(collections):
         if collection.get('type') == 'intersection':
+            coll_id = collection.get('id')
             expr = collection.get('expr', {})
-            referenced_sets = expr.get('sets', [])
-            missing = [sid for sid in referenced_sets if sid not in set_ids]
+            referenced_ids = expr.get('sets', [])
+
+            # Check for self-reference
+            if coll_id and coll_id in referenced_ids:
+                _issue(
+                    'Intersection references itself',
+                    {'index': idx, 'id': coll_id},
+                    validate_mode,
+                    warn
+                )
+                if validate_mode == 'autofix':
+                    continue
+                return []
+
+            # Check all referenced IDs exist and are valid types
+            missing = [sid for sid in referenced_ids if sid not in collection_ids]
             if missing:
                 _issue(
-                    'Intersection references non-existent set IDs',
-                    {'index': idx, 'missing_sets': missing, 'available_sets': list(set_ids)},
+                    'Intersection references non-existent collection IDs',
+                    {'index': idx, 'missing': missing, 'available': list(collection_ids.keys())},
                     validate_mode,
                     warn
                 )
                 if validate_mode == 'autofix':
-                    continue  # Drop invalid intersection
-                # In strict mode, we already raised in _issue
+                    continue
                 return []
+
+            # Check for cycles
+            if coll_id and has_cycle(coll_id, set(), set()):
+                _issue(
+                    'Intersection creates a dependency cycle',
+                    {'index': idx, 'id': coll_id},
+                    validate_mode,
+                    warn
+                )
+                if validate_mode == 'autofix':
+                    continue
+                return []
+
         valid_collections.append(collection)
 
     return valid_collections

From 75839b24d8b03479c32b982d9a06b49f394761f6 Mon Sep 17 00:00:00 2001
From: Leo Meyerovich <leo@graphistry.com>
Date: Tue, 10 Feb 2026 18:07:05 -0800
Subject: [PATCH 29/30] fix: catch AssertionError from AST from_json in
 validation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

AST class-level from_json methods (ASTLet, ASTRemoteGraph, ASTRef, ASTCall)
use bare `assert` for required field checks. These raise AssertionError
which was not caught by the validation boundary, causing raw crashes
instead of proper validation errors.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 graphistry/tests/test_collections.py        | 21 +++++++++++++++++++++
 graphistry/validate/validate_collections.py |  4 ++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py
index a9f66da654..3e7750cb14 100644
--- a/graphistry/tests/test_collections.py
+++ b/graphistry/tests/test_collections.py
@@ -283,3 +283,24 @@ def test_collections_intersection_cycle_autofix_drops():
     # Both cyclic intersections dropped, only set remains
     assert len(decoded) == 1
     assert decoded[0]["id"] == "set_a"
+
+
+def test_collections_malformed_ast_autofix_drops():
+    # AST from_json uses bare asserts - these should be caught, not crash
+    # {"type": "Let"} missing required 'bindings' field
+    from graphistry.validate.validate_collections import normalize_collections
+    collections = [
+        {"type": "set", "id": "good", "expr": [{"type": "Node"}]},
+        {"type": "set", "id": "bad-let", "expr": [{"type": "Let"}]},  # missing bindings
+    ]
+    result = normalize_collections(collections, validate="autofix", warn=False)
+    ids = [c.get("id") for c in result]
+    assert "good" in ids
+    assert "bad-let" not in ids
+
+
+def test_collections_malformed_ast_strict_raises():
+    from graphistry.validate.validate_collections import normalize_collections
+    collections = [{"type": "set", "id": "bad", "expr": [{"type": "Let"}]}]
+    with pytest.raises(ValueError):
+        normalize_collections(collections, validate="strict")
diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py
index b7a282cf10..6fc87e2643 100644
--- a/graphistry/validate/validate_collections.py
+++ b/graphistry/validate/validate_collections.py
@@ -174,8 +174,8 @@ def _normalize_gfql_ops(
     try:
         from graphistry.compute.ast import normalize_gfql_to_wire
         return normalize_gfql_to_wire(gfql_ops)
-    except (TypeError, ValueError, GFQLValidationError, GFQLSyntaxError) as exc:
-        # Precise exception handling for GFQL parsing errors
+    except (TypeError, ValueError, AssertionError, GFQLValidationError, GFQLSyntaxError) as exc:
+        # AssertionError: AST from_json methods use bare asserts for required fields
         _issue(
             'Invalid GFQL operation in collection',
             {'index': entry_index, 'error': str(exc)},

From 2f12a7953149b13ee5a7ffefb4f9e993ee0cb1cc Mon Sep 17 00:00:00 2001
From: lmeyerov <leo@graphistry.com>
Date: Mon, 2 Mar 2026 19:20:22 -0800
Subject: [PATCH 30/30] refactor(ai): migrate prompts into skills and port docs
 to .agents (#930)

* refactor(ai): migrate prompts into skills and port docs

* docs(changelog): note ai skills and .agents migration

* refactor(ai): make .agents skills canonical for claude
---
 .agents/README.md                             |  45 +++
 .../assets/find_dynamic_imports.sh            |   4 +-
 .../assets/generate_comment_inventory.sh      |   4 +-
 .../assets/pysa_extract_callers.py            |   4 +-
 {ai => .agents}/docs/gfql/README.md           |   2 +-
 {ai => .agents}/docs/gfql/calls_checklist.md  |   4 +-
 {ai => .agents}/docs/gfql/conformance.md      |   0
 {ai => .agents}/docs/gfql/oracle.md           |   0
 .../docs/gfql/predicates_checklist.md         |   0
 .agents/skills/SKILLS.md                      |  11 +
 .../skills/conventional-commits/SKILL.md      |   7 +-
 .../skills/decomment/SKILL.md                 |  11 +-
 .../gfql-llm-guide-maintenance/SKILL.md       |   9 +-
 .../skills/hoist-imports/SKILL.md             |  13 +-
 .../skills/lint-types-check/SKILL.md          |   5 +
 .../PLAN.md => .agents/skills/plan/SKILL.md   |  11 +-
 .../skills/pyre-analysis/SKILL.md             |  11 +-
 .gitignore                                    |   4 +
 CHANGELOG.md                                  |   3 +
 CLAUDE.md                                     |  12 +-
 ai/README.md                                  | 321 ------------------
 21 files changed, 134 insertions(+), 347 deletions(-)
 create mode 100644 .agents/README.md
 rename {ai => .agents}/assets/find_dynamic_imports.sh (96%)
 rename {ai => .agents}/assets/generate_comment_inventory.sh (94%)
 rename {ai => .agents}/assets/pysa_extract_callers.py (89%)
 rename {ai => .agents}/docs/gfql/README.md (97%)
 rename {ai => .agents}/docs/gfql/calls_checklist.md (96%)
 rename {ai => .agents}/docs/gfql/conformance.md (100%)
 rename {ai => .agents}/docs/gfql/oracle.md (100%)
 rename {ai => .agents}/docs/gfql/predicates_checklist.md (100%)
 create mode 100644 .agents/skills/SKILLS.md
 rename ai/prompts/CONVENTIONAL_COMMITS.md => .agents/skills/conventional-commits/SKILL.md (88%)
 rename ai/prompts/DECOMMENT.md => .agents/skills/decomment/SKILL.md (96%)
 rename ai/prompts/GFQL_LLM_GUIDE_MAINTENANCE.md => .agents/skills/gfql-llm-guide-maintenance/SKILL.md (97%)
 rename ai/prompts/HOISTIMPORTS.md => .agents/skills/hoist-imports/SKILL.md (96%)
 rename ai/prompts/LINT_TYPES_CHECK.md => .agents/skills/lint-types-check/SKILL.md (98%)
 rename ai/prompts/PLAN.md => .agents/skills/plan/SKILL.md (96%)
 rename ai/prompts/PYRE_ANALYSIS.md => .agents/skills/pyre-analysis/SKILL.md (81%)
 delete mode 100644 ai/README.md

diff --git a/.agents/README.md b/.agents/README.md
new file mode 100644
index 0000000000..74c7a5bf1f
--- /dev/null
+++ b/.agents/README.md
@@ -0,0 +1,45 @@
+# .agents
+
+Assistant-facing support material for this repository.
+
+## Purpose
+
+- Keep reusable agent docs and helper scripts in one place.
+- Keep task-oriented skill definitions in `.agents/skills/`.
+
+## Layout
+
+```text
+.agents/
+├── README.md
+├── assets/
+│   ├── find_dynamic_imports.sh
+│   ├── generate_comment_inventory.sh
+│   └── pysa_extract_callers.py
+└── docs/
+    └── gfql/
+        ├── README.md
+        ├── calls_checklist.md
+        ├── conformance.md
+        ├── oracle.md
+        └── predicates_checklist.md
+```
+
+## Related Paths
+
+- Skills: `.agents/skills/`
+- Skill index: `.agents/skills/SKILLS.md`
+- Plans (gitignored): `plans/`
+
+## Quick Commands
+
+```bash
+# Generate dynamic import inventory for refactors
+./.agents/assets/find_dynamic_imports.sh master plans/<task>/dynamic_imports.md
+
+# Generate comment inventory for cleanup passes
+./.agents/assets/generate_comment_inventory.sh master plans/<task>/comment_inventory.md
+
+# Extract callers from pysa call graph
+python3 .agents/assets/pysa_extract_callers.py pysa_results/call-graph.json PlotterBase.PlotterBase.bind
+```
diff --git a/ai/assets/find_dynamic_imports.sh b/.agents/assets/find_dynamic_imports.sh
similarity index 96%
rename from ai/assets/find_dynamic_imports.sh
rename to .agents/assets/find_dynamic_imports.sh
index c9b1bc6fd6..8e9931400f 100755
--- a/ai/assets/find_dynamic_imports.sh
+++ b/.agents/assets/find_dynamic_imports.sh
@@ -1,14 +1,14 @@
 #!/bin/bash
 # Find Dynamic Imports for HOISTIMPORTS Protocol
 #
-# Usage: ./ai/assets/find_dynamic_imports.sh [base_branch] [output_file]
+# Usage: ./.agents/assets/find_dynamic_imports.sh [base_branch] [output_file]
 #
 # This script automates Phase 1 of the HOISTIMPORTS protocol by extracting all
 # dynamic imports (imports inside functions/methods/conditionals) added in a PR
 # and formatting them with context for categorization.
 #
 # Example:
-#   ./ai/assets/find_dynamic_imports.sh master plans/my-feature/dynamic_imports.md
+#   ./.agents/assets/find_dynamic_imports.sh master plans/my-feature/dynamic_imports.md
 
 set -euo pipefail
 
diff --git a/ai/assets/generate_comment_inventory.sh b/.agents/assets/generate_comment_inventory.sh
similarity index 94%
rename from ai/assets/generate_comment_inventory.sh
rename to .agents/assets/generate_comment_inventory.sh
index 9f90283cd6..583626b016 100755
--- a/ai/assets/generate_comment_inventory.sh
+++ b/.agents/assets/generate_comment_inventory.sh
@@ -1,13 +1,13 @@
 #!/bin/bash
 # Generate Comment Inventory for DECOMMENT Protocol
 #
-# Usage: ./ai/assets/generate_comment_inventory.sh [base_branch] [output_file]
+# Usage: ./.agents/assets/generate_comment_inventory.sh [base_branch] [output_file]
 #
 # This script automates Phase 1 of the DECOMMENT protocol by extracting all
 # comments added in a PR and formatting them with context for categorization.
 #
 # Example:
-#   ./ai/assets/generate_comment_inventory.sh master plans/my-feature/comment_inventory.md
+#   ./.agents/assets/generate_comment_inventory.sh master plans/my-feature/comment_inventory.md
 
 set -euo pipefail
 
diff --git a/ai/assets/pysa_extract_callers.py b/.agents/assets/pysa_extract_callers.py
similarity index 89%
rename from ai/assets/pysa_extract_callers.py
rename to .agents/assets/pysa_extract_callers.py
index ebfb8b97a4..3636282ed7 100755
--- a/ai/assets/pysa_extract_callers.py
+++ b/.agents/assets/pysa_extract_callers.py
@@ -2,10 +2,10 @@
 """Extract caller information from Pysa call-graph.json
 
 Usage:
-    python3 ai/assets/pysa_extract_callers.py path/to/call-graph.json method1 method2 ...
+    python3 .agents/assets/pysa_extract_callers.py path/to/call-graph.json method1 method2 ...
 
 Example:
-    python3 ai/assets/pysa_extract_callers.py \\
+    python3 .agents/assets/pysa_extract_callers.py \\
         pysa_results/call-graph.json \\
         PlotterBase.PlotterBase.bind \\
         PlotterBase.PlotterBase.nodes
diff --git a/ai/docs/gfql/README.md b/.agents/docs/gfql/README.md
similarity index 97%
rename from ai/docs/gfql/README.md
rename to .agents/docs/gfql/README.md
index 6d8ef057c3..f8c4560380 100644
--- a/ai/docs/gfql/README.md
+++ b/.agents/docs/gfql/README.md
@@ -8,7 +8,7 @@ Guide for AI assistants working with GFQL (Graph Frame Query Language) in PyGrap
 - [`calls_checklist.md`](./calls_checklist.md) — Required steps for exposing or updating GFQL `call()` functions.
 - [`predicates_checklist.md`](./predicates_checklist.md) — End-to-end checklist for predicate implementations.
 - [`conformance.md`](./conformance.md) — Cypher TCK conformance harness and CI wiring.
-- [`../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md`](../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md) — Guidance for keeping AI assistants aligned with GFQL changes.
+- [`../../skills/gfql-llm-guide-maintenance/SKILL.md`](../../skills/gfql-llm-guide-maintenance/SKILL.md) — Guidance for keeping AI assistants aligned with GFQL changes.
 
 ### Essential GFQL Operations
 ```python
diff --git a/ai/docs/gfql/calls_checklist.md b/.agents/docs/gfql/calls_checklist.md
similarity index 96%
rename from ai/docs/gfql/calls_checklist.md
rename to .agents/docs/gfql/calls_checklist.md
index 81b5df9092..3e45821b38 100644
--- a/ai/docs/gfql/calls_checklist.md
+++ b/.agents/docs/gfql/calls_checklist.md
@@ -4,7 +4,7 @@ When exposing or updating GFQL `call()` functions, the change is **multi-system*
 runtime executors, JSON schema validators, static typing, documentation, and tests
 all need to stay in sync. Use this playbook (paired with
 [`predicates_checklist.md`](./predicates_checklist.md) and the guidance in
-[`../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md`](../../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md))
+[`../../skills/gfql-llm-guide-maintenance/SKILL.md`](../../skills/gfql-llm-guide-maintenance/SKILL.md))
 to avoid regressions.
 
 > **Scope**: Items below are required for *every* new or modified `call()` entry.
@@ -90,7 +90,7 @@ to avoid regressions.
 ## 📎 Optional (Promote When User-Facing)
 
 - **Marketing / blog teaser** when the call is a marquee feature.
-- **LLM prompt updates** (`ai/prompts/gfql/*`) beyond baseline instructions.
+- **LLM prompt updates** (`.agents/skills/gfql-llm-guide-maintenance/SKILL.md`) beyond baseline instructions.
 - **Customer templates** or dataset refreshes referencing the new capability.
 
 Keep this file synchronized with future GFQL evolutions—open a PR to amend when the onboarding surface changes.
diff --git a/ai/docs/gfql/conformance.md b/.agents/docs/gfql/conformance.md
similarity index 100%
rename from ai/docs/gfql/conformance.md
rename to .agents/docs/gfql/conformance.md
diff --git a/ai/docs/gfql/oracle.md b/.agents/docs/gfql/oracle.md
similarity index 100%
rename from ai/docs/gfql/oracle.md
rename to .agents/docs/gfql/oracle.md
diff --git a/ai/docs/gfql/predicates_checklist.md b/.agents/docs/gfql/predicates_checklist.md
similarity index 100%
rename from ai/docs/gfql/predicates_checklist.md
rename to .agents/docs/gfql/predicates_checklist.md
diff --git a/.agents/skills/SKILLS.md b/.agents/skills/SKILLS.md
new file mode 100644
index 0000000000..378aaca739
--- /dev/null
+++ b/.agents/skills/SKILLS.md
@@ -0,0 +1,11 @@
+# Skills Catalog
+
+Each skill is a folder containing `SKILL.md` with YAML frontmatter.
+
+- `conventional-commits`
+- `decomment`
+- `gfql-llm-guide-maintenance`
+- `hoist-imports`
+- `lint-types-check`
+- `plan`
+- `pyre-analysis`
diff --git a/ai/prompts/CONVENTIONAL_COMMITS.md b/.agents/skills/conventional-commits/SKILL.md
similarity index 88%
rename from ai/prompts/CONVENTIONAL_COMMITS.md
rename to .agents/skills/conventional-commits/SKILL.md
index e258aec1bf..46f62b8b4a 100644
--- a/ai/prompts/CONVENTIONAL_COMMITS.md
+++ b/.agents/skills/conventional-commits/SKILL.md
@@ -1,6 +1,11 @@
+---
+name: conventional-commits
+description: "Create atomic PyGraphistry commits with conventional commit messages and patch-based staging. Use when preparing commit sequences, selecting staged hunks, or writing commit messages."
+---
+
 # Conventional Commits Guide for PyGraphistry
 
-**⚠️ BEFORE STARTING: Reread [ai/prompts/PLAN.md](PLAN.md). Reuse existing `plans/*/plan.md` if one exists, else start a new one.**
+**⚠️ BEFORE STARTING: Reread [the planning skill](../plan/SKILL.md). Reuse existing `plans/*/plan.md` if one exists, else start a new one.**
 
 PyGraphistry-specific commit conventions only. See PLAN.md for execution protocol.
 
diff --git a/ai/prompts/DECOMMENT.md b/.agents/skills/decomment/SKILL.md
similarity index 96%
rename from ai/prompts/DECOMMENT.md
rename to .agents/skills/decomment/SKILL.md
index cc57db88f1..6385beae71 100644
--- a/ai/prompts/DECOMMENT.md
+++ b/.agents/skills/decomment/SKILL.md
@@ -1,3 +1,8 @@
+---
+name: decomment
+description: "Remove low-value comments from pull requests while preserving necessary documentation. Use before review or merge when cleaning up explanatory comments in changed code."
+---
+
 # Decomment Protocol
 
 **Purpose:** Remove unnecessary comments from PRs while preserving valuable documentation.
@@ -15,7 +20,7 @@ This protocol guides the systematic removal of redundant comments from code chan
 
 ## Prerequisites
 
-1. **Plan File**: Ensure you have a current plan file (see `ai/prompts/PLAN.md`)
+1. **Plan File**: Ensure you have a current plan file (see `../plan/SKILL.md`)
    - If no plan exists for this work, create one following PLAN.md template
    - If plan exists but is stale, refresh it with current state
 2. **Clean Git State**: All changes committed to feature branch
@@ -39,7 +44,7 @@ This protocol guides the systematic removal of redundant comments from code chan
 **Commands:**
 ```bash
 # Automated inventory generation (RECOMMENDED)
-./ai/assets/generate_comment_inventory.sh master plans/[task]/comment_inventory.md
+./.agents/assets/generate_comment_inventory.sh master plans/[task]/comment_inventory.md
 
 # Manual alternatives:
 # Get base branch
@@ -217,7 +222,7 @@ Phase 4: Verification Results
 ls plans/[current-task]/plan.md
 
 # If no plan exists:
-# 1. Create plan following ai/prompts/PLAN.md template
+# 1. Create plan following ../plan/SKILL.md template
 # 2. Document decomment work as Phase N.A: "Remove redundant comments"
 
 # If plan exists but stale:
diff --git a/ai/prompts/GFQL_LLM_GUIDE_MAINTENANCE.md b/.agents/skills/gfql-llm-guide-maintenance/SKILL.md
similarity index 97%
rename from ai/prompts/GFQL_LLM_GUIDE_MAINTENANCE.md
rename to .agents/skills/gfql-llm-guide-maintenance/SKILL.md
index 7df6ff088a..adab3724e5 100644
--- a/ai/prompts/GFQL_LLM_GUIDE_MAINTENANCE.md
+++ b/.agents/skills/gfql-llm-guide-maintenance/SKILL.md
@@ -1,3 +1,8 @@
+---
+name: gfql-llm-guide-maintenance
+description: "Maintain GFQL LLM guidance docs when call surfaces, predicates, algorithms, or common failure patterns change. Use for release updates and guide conformance checks."
+---
+
 # LLM_GUIDE.md Maintenance Process
 
 **Purpose**: Update `LLM_GUIDE.md` when GFQL features change or user patterns emerge.
@@ -13,7 +18,7 @@
 5. **LLMs generating invalid JSON** → [LLM Failures](#llm-failures)
 6. **Multiple changes** → [Multi-Change Releases](#multi-change-releases)
 
-**See also**: the end-to-end checklist in `ai/docs/gfql/calls_checklist.md`
+**See also**: the end-to-end checklist in `.agents/docs/gfql/calls_checklist.md`
 whenever a `call()` surface changes.
 
 ## Update Triggers
@@ -97,7 +102,7 @@ grep "new_fn" graphistry/tests/compute/ -rn
 ```
 
 **Update**: `## Graph Algorithms` (add example) + `## Call Functions` (list)
-- Cross-check `ai/docs/gfql/calls_checklist.md` for required code/tests/doc updates
+- Cross-check `.agents/docs/gfql/calls_checklist.md` for required code/tests/doc updates
 
 **Format**:
 ```markdown
diff --git a/ai/prompts/HOISTIMPORTS.md b/.agents/skills/hoist-imports/SKILL.md
similarity index 96%
rename from ai/prompts/HOISTIMPORTS.md
rename to .agents/skills/hoist-imports/SKILL.md
index d6a15ea489..8543240892 100644
--- a/ai/prompts/HOISTIMPORTS.md
+++ b/.agents/skills/hoist-imports/SKILL.md
@@ -1,10 +1,15 @@
+---
+name: hoist-imports
+description: "Refactor dynamic imports to top-level imports with an inventory, categorization, and verification workflow. Use during cleanup or review-prep when import placement needs standardization."
+---
+
 # Hoist Imports Protocol
 
 **Purpose:** Move dynamic imports to top-level unless there's a documented reason not to.
 
 ## Quick Reference (60-second version)
 
-1. **Identify**: Run `./ai/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md`
+1. **Identify**: Run `./.agents/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md`
 2. **Categorize**: Review each as HOIST (move to top) or KEEP (document reason)
 3. **Hoist**: Move imports to top-level following PEP 8 section ordering
 4. **Verify**: Run tests, confirm no circular imports or heavy load issues
@@ -27,7 +32,7 @@ This protocol guides the systematic refactoring of dynamic imports (imports insi
 
 ## Prerequisites
 
-1. **Plan File**: Ensure you have a current plan file (see `ai/prompts/PLAN.md`)
+1. **Plan File**: Ensure you have a current plan file (see `../plan/SKILL.md`)
 2. **Clean Git State**: All changes committed to feature branch
 3. **PR Context**: Know which branch the PR will land into (usually `master` or `main`)
 
@@ -50,7 +55,7 @@ This protocol guides the systematic refactoring of dynamic imports (imports insi
 **Commands:**
 ```bash
 # Automated inventory generation (RECOMMENDED)
-./ai/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md
+./.agents/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md
 
 # Manual alternatives (if automation script unavailable):
 # Get base branch
@@ -324,7 +329,7 @@ Add this phase to your plan:
 
 **Tool Calls:**
 ```bash
-./ai/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md
+./.agents/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md
 # [edit commands]
 git commit -m "refactor: Hoist dynamic imports to top-level"
 git push
diff --git a/ai/prompts/LINT_TYPES_CHECK.md b/.agents/skills/lint-types-check/SKILL.md
similarity index 98%
rename from ai/prompts/LINT_TYPES_CHECK.md
rename to .agents/skills/lint-types-check/SKILL.md
index 08df8a222d..be2a31f82a 100644
--- a/ai/prompts/LINT_TYPES_CHECK.md
+++ b/.agents/skills/lint-types-check/SKILL.md
@@ -1,3 +1,8 @@
+---
+name: lint-types-check
+description: "Run iterative lint and typecheck remediation for PyGraphistry. Use when fixing flake8/mypy issues and driving a branch to clean validation results."
+---
+
 # Lint and Type Check Template for PyGraphistry
 
 <!-- FILE TRACKING HEADER - FILL IN WHEN FILE CREATED -->
diff --git a/ai/prompts/PLAN.md b/.agents/skills/plan/SKILL.md
similarity index 96%
rename from ai/prompts/PLAN.md
rename to .agents/skills/plan/SKILL.md
index 57a02a19e2..e92d26623c 100644
--- a/ai/prompts/PLAN.md
+++ b/.agents/skills/plan/SKILL.md
@@ -1,3 +1,8 @@
+---
+name: plan
+description: "Create and maintain a resumable phased execution plan for complex tasks. Use when work spans multiple steps or sessions and requires strict status tracking."
+---
+
 # Task Plan Template
 
 <!-- ═══════════════════════════════════════════════════════════════════════════
@@ -126,8 +131,8 @@ git log --oneline -n 10
 - Tests: `graphistry/tests/` (mirrors source structure: `graphistry/foo/bar.py` → `graphistry/tests/foo/test_bar.py`)
 - Docs: `docs/`
 - Plans: `plans/` (gitignored - safe for auxiliary files, temp secrets, working data; Codex: avoid `~/.codex/plans`; if used, copy here then delete)
-- AI prompts: `ai/prompts/`
-- AI docs: `ai/docs/`
+- AI skills: `.agents/skills/`
+- AI docs: `.agents/docs/`
 
 ### Security & Working Files
 
@@ -143,7 +148,7 @@ echo "export GRAPHISTRY_SERVER='https://hub.graphistry.com'" >> plans/[task]/.en
 source plans/[task]/.env
 ```
 
-**For commits:** See [ai/prompts/CONVENTIONAL_COMMITS.md](CONVENTIONAL_COMMITS.md)
+**For commits:** See [the conventional commits skill](../conventional-commits/SKILL.md)
 
 ## Phase Protocol
 
diff --git a/ai/prompts/PYRE_ANALYSIS.md b/.agents/skills/pyre-analysis/SKILL.md
similarity index 81%
rename from ai/prompts/PYRE_ANALYSIS.md
rename to .agents/skills/pyre-analysis/SKILL.md
index 9277572ec2..95a89def72 100644
--- a/ai/prompts/PYRE_ANALYSIS.md
+++ b/.agents/skills/pyre-analysis/SKILL.md
@@ -1,3 +1,8 @@
+---
+name: pyre-analysis
+description: "Run Pyre/Pysa analysis for call graphs and caller extraction in PyGraphistry. Use for type-aware refactors, indirect dependency tracing, and call-chain debugging."
+---
+
 # Pyre/Pysa Analysis for PyGraphistry
 
 **Recommendation**: Use AST (< 1s) for direct patterns, then Pysa (~60s) for call chains
@@ -15,14 +20,14 @@ docker run --rm -v $(pwd):/workspace -w /workspace python:3.12-slim \
 ls -lh pysa_results/call-graph.json  # Should be ~16MB, 37626 functions
 ```
 
-**3. Extract callers** (see `ai/assets/pysa_extract_callers.py`):
+**3. Extract callers** (see `.agents/assets/pysa_extract_callers.py`):
 ```bash
 # Single method
-python3 ai/assets/pysa_extract_callers.py pysa_results/call-graph.json \
+python3 .agents/assets/pysa_extract_callers.py pysa_results/call-graph.json \
   PlotterBase.PlotterBase.bind
 
 # Multiple methods
-python3 ai/assets/pysa_extract_callers.py pysa_results/call-graph.json \
+python3 .agents/assets/pysa_extract_callers.py pysa_results/call-graph.json \
   PlotterBase.PlotterBase.bind \
   PlotterBase.PlotterBase.nodes \
   PlotterBase.PlotterBase.edges
diff --git a/.gitignore b/.gitignore
index ccd9476047..05e9c9a39f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -105,3 +105,7 @@ AI_PROGRESS/
 plans/
 tmp/
 test_env/
+
+# Claude Code local setup
+.claude/settings.local.json
+.claude/skills
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f79e60e188..77f7a06f11 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -22,6 +22,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 ### Tests
 - **Collections**: Added `test_collections.py` covering encoding, GFQL Chain/AST normalization, wire-protocol acceptance, validation modes, and helper constructors.
 
+### Docs
+- **AI tooling**: Migrated legacy assistant prompts from `ai/prompts/` into structured skills under `.agents/skills/*/SKILL.md` (with frontmatter), and moved assistant docs/assets into `.agents/` with updated internal references (#930).
+
 ## [0.50.6 - 2026-01-27]
 
 ### Fixed
diff --git a/CLAUDE.md b/CLAUDE.md
index a561f1bfed..b2314187fc 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1 +1,11 @@
-See [ai/README.md](ai/README.md) for AI assistant development guidance.
\ No newline at end of file
+@AGENTS.md
+
+## Claude Code Setup
+
+If no repo skills appear in Claude Code (check with `/skills`), configure the local symlink:
+
+```bash
+ln -s ../.agents/skills .claude/skills
+```
+
+If `.claude/skills` already exists and is wrong, replace it with the symlink above.
diff --git a/ai/README.md b/ai/README.md
deleted file mode 100644
index a4ed7403f6..0000000000
--- a/ai/README.md
+++ /dev/null
@@ -1,321 +0,0 @@
-# ai
-
-Specialized documentation for AI assistants working on PyGraphistry. These guides supplement the main CLAUDE.md with detailed, topic-specific information.
-
-## 🎯 Quick Reference
-
-### Critical Development Rules
-- **Functional Programming**: Always return new objects, never modify in-place
-- **No `copy()` on DataFrames**: Operations already return new objects  
-- **Use `df.assign()`**: Never use `df[col] = val` syntax
-- **Preserve Git History**: Avoid unnecessary rewrites
-- **No Claude Comments**: Remove explanatory comments before committing
-
-### Essential Commands
-```bash
-# Before any work - establish baseline (containerized)
-cd docker && WITH_BUILD=0 WITH_TEST=0 ./test-cpu-local.sh
-
-# Quick Docker test (from docker/ directory)
-WITH_BUILD=0 ./test-cpu-local-minimal.sh
-
-# Run specific tests fast
-WITH_LINT=0 WITH_TYPECHECK=0 WITH_BUILD=0 ./test-cpu-local.sh graphistry/tests/test_file.py
-
-# GPU tests - FAST (reuse base image, no rebuild)
-IMAGE="graphistry/graphistry-nvidia:${APP_BUILD_TAG:-latest}-${CUDA_SHORT_VERSION:-12.8}"
-docker run --rm --gpus all -v "$(pwd)/graphistry:/opt/pygraphistry/graphistry:ro" \
-    $IMAGE pytest /opt/pygraphistry/graphistry/tests/test_file.py -v
-
-# GPU tests - SLOW (full rebuild, use before merge)
-cd docker && ./test-gpu-local.sh
-
-# Validate RST documentation syntax
-./docs/validate-docs.sh                           # All docs
-./docs/validate-docs.sh docs/source/gfql/*.rst   # Specific files
-git diff --name-only HEAD -- '*.rst' | xargs ./docs/validate-docs.sh  # Changed files
-
-# Note: Direct script execution requires local environment setup
-# ./bin/lint.sh && ./bin/mypy.sh && ./bin/pytest.sh
-```
-
-### Performance Must-Haves
-- Never call `str()` repeatedly on same value
-- Use vectorized operations, not loops
-- Select only needed columns: `df[['col1', 'col2']]`
-- Use `logger.debug('msg %s', var)` not f-strings in loggers
-- Respect engine abstractions (`df_concat`, `resolve_engine`)
-
-## 📋 Documentation Hierarchy
-
-```
-CLAUDE.md                    # General guide (< 500 lines)
-├── ai/                    # Specialized guides
-│   ├── README.md           # This file - overview & quick ref
-│   ├── docs/              # Documentation guides
-│   │   ├── gfql/          # GFQL patterns & optimization
-│   │   ├── gpu/           # GPU/RAPIDS best practices  
-│   │   └── connectors/    # Database-specific patterns
-│   └── prompts/           # Reusable workflow templates
-└── plans/                 # Task tracking (gitignored)
-```
-
-### When to Use Each Level
-- **CLAUDE.md**: Start here for general PyGraphistry development
-- **ai/**: Load specific guides only when working on that topic
-- **plans/**: Track multi-session work and complex implementations
-
-## 🚀 Status Tracking Conventions
-
-### Priority System (P0-P5)
-- **P0 🚨**: Critical - Breaking functionality, must fix immediately
-- **P1 🔴**: High - Type safety, imports, security issues  
-- **P2 🟡**: Medium - Code style consistency, best practices
-- **P3 🟢**: Low - Minor improvements, nice-to-haves
-- **P4 ⚪**: Minimal - Cosmetic, already suppressed
-- **P5 ⬜**: Skip - Won't fix, intentional patterns
-
-### Progress Indicators
-- ✅ Complete
-- 🔄 In Progress  
-- 📝 Planned
-- ❌ Blocked
-- ⏭️ Skipped
-
-## 📁 Directory Structure
-
-```
-ai/
-├── docs/                    # Documentation guides
-│   ├── gfql/               # GFQL-specific patterns and guidelines
-│   ├── gpu/                # GPU/CUDA development notes
-│   └── connectors/         # Database connector patterns
-└── prompts/                # Reusable workflow templates
-    ├── PLAN.md                        # Task planning template with strict execution protocol
-    ├── LINT_TYPES_CHECK.md            # Code quality enforcement (with P0-P5)
-    ├── CONVENTIONAL_COMMITS.md        # Git commit workflow with PyGraphistry conventions
-    ├── PYRE_ANALYSIS.md               # Advanced code analysis with pyre-check
-    ├── GFQL_LLM_GUIDE_MAINTENANCE.md  # Process for maintaining GFQL JSON generation guide
-    ├── HOISTIMPORTS.md                # Import hoisting and organization patterns
-    ├── DECOMMENT.md                   # Comment removal and cleanup guidance
-    ├── IMPLEMENTATION_PLAN.md         # [TODO] Feature implementation tracking
-    └── USER_TESTING_PLAYBOOK.md       # [TODO] AI-driven testing workflows
-```
-
-## 📖 Usage Guidelines
-
-### Loading Documentation
-1. **Start with CLAUDE.md** for general PyGraphistry work
-2. **Load specific guides** only when working on that topic
-3. **Don't load everything** - each guide is self-contained
-4. **Check file size** - guides should be < 500 lines
-
-### File Size Guidelines
-- **Documentation**: Max 500 lines per file
-- **Code files**: Max 300 lines ideal, 500 lines acceptable
-- **Functions**: Max 50 lines per function
-- **Classes**: Split large classes into mixins
-
-## ✏️ Creating New Guides
-
-When adding a new guide:
-1. Place in appropriate subdirectory
-2. Use descriptive names (e.g., `neo4j_patterns.md`)
-3. Add header explaining when to use it
-4. Focus on patterns, not API details
-5. Include practical examples
-6. Add to directory structure above
-
-## 📚 Current Guides
-
-### GFQL (Graph Frame Query Language)
-- Query patterns and optimization
-- Column naming conventions  
-- Performance considerations
-- Engine abstraction patterns
-- **Load when**: Working on graph queries, chain/hop operations
-
-### GPU/RAPIDS
-- RAPIDS integration patterns
-- Memory management strategies
-- CPU/GPU fallback handling
-- cuDF vs pandas compatibility
-- **Load when**: Implementing GPU features, optimizing performance
-
-### Connectors
-- Database-specific patterns
-- Connection management
-- Error handling strategies
-- Testing with databases
-- **Load when**: Adding/fixing database integrations
-
-### Prompt Templates
-- **PLAN.md**: Task planning template with strict execution protocol for multi-step work
-- **LINT_TYPES_CHECK.md**: Code quality enforcement with P0-P5 priorities
-- **CONVENTIONAL_COMMITS.md**: Git commit workflow following PyGraphistry conventions
-- **PYRE_ANALYSIS.md**: Advanced code analysis with pyre-check for refactoring and type-aware searching
-- **IMPLEMENTATION_PLAN.md** [TODO]: Systematic feature implementation
-- **USER_TESTING_PLAYBOOK.md** [TODO]: AI-driven testing workflows
-- **Load when**: Starting new tasks, creating commits, fixing code quality issues, planning complex work, refactoring code
-
-## 🧪 Testing Quick Reference
-
-### Docker Commands (Recommended)
-```bash
-cd docker
-
-# Fast iteration - skip slow parts
-WITH_BUILD=0 ./test-cpu-local-minimal.sh
-
-# Only lint and typecheck (no tests or build)
-WITH_BUILD=0 WITH_TEST=0 ./test-cpu-local.sh
-
-# Full validation before commit
-./test-cpu-local.sh
-
-# GPU functionality
-./test-gpu-local.sh
-
-# Specific features
-./test-umap-learn-core.sh  # UMAP embeddings
-./test-dgl.sh              # Graph neural networks
-./test-embed.sh            # Embedding features
-```
-
-### GPU Testing - Fast (Reuse Base Image)
-
-Docker containers include: **pytest, mypy, ruff** (preinstalled)
-
-```bash
-# Reuse existing graphistry image (no rebuild)
-IMAGE="graphistry/graphistry-nvidia:${APP_BUILD_TAG:-latest}-${CUDA_SHORT_VERSION:-12.8}"
-
-docker run --rm --gpus all \
-    -v "$(pwd):/workspace:ro" \
-    -w /workspace -e PYTHONPATH=/workspace \
-    $IMAGE pytest graphistry/tests/test_file.py -v
-```
-
-**Fast iteration**: Use this during development
-**Full rebuild**: Use `./docker/test-gpu-local.sh` before merge
-
-### Environment Control
-| Variable | Default | Purpose |
-|----------|---------|---------|
-| `WITH_LINT` | 1 | Run flake8 linting |
-| `WITH_TYPECHECK` | 1 | Run mypy type checking |
-| `WITH_BUILD` | 0 | Build documentation |
-| `WITH_NEO4J` | 0 | Run Neo4j integration tests |
-| `PYTHON_VERSION` | - | Override Python version |
-
-## 🔍 Code Analysis & Search Tools
-
-### Tool Selection Guide
-
-**Use Grep/Ripgrep for:**
-- Simple text/pattern search
-- Quick file location
-- Initial exploration
-```bash
-grep -r "dataset_id" graphistry/*.py
-rg "_dataset_id" --type py
-```
-
-**Use AST Scripts for:**
-- Custom pattern detection (e.g., "methods that modify attribute X")
-- Fast iteration during development (< 1 second)
-- When pyre is too slow or times out
-```bash
-python3 plans/task_name/analyze_*.py
-```
-
-**Use Pyre for:**
-- Type-aware analysis and refactoring
-- Find-all-references (call graph analysis)
-- Finding all implementations of an interface
-- Complex dependency chain analysis
-- See [prompts/PYRE_ANALYSIS.md](prompts/PYRE_ANALYSIS.md) for detailed guide
-
-**Recommendation**: Start with grep for exploration, use AST scripts for custom analysis, use pyre only when you need type-aware refactoring or call graphs.
-
-## 🔧 Common Patterns
-
-### DataFrame Operations
-```python
-# ✅ Good - Functional style
-df = df.assign(new_col=values)
-df = df[df['col'] > 0]  # Returns new DataFrame
-
-# ❌ Bad - In-place modification  
-df['new_col'] = values
-df.drop('col', inplace=True)
-```
-
-### Engine Abstraction
-```python
-# ✅ Good - Engine agnostic
-from graphistry.compute.typing import DataFrameLike
-engine = resolve_engine(df)
-result = engine.df_concat([df1, df2])
-
-# ❌ Bad - Engine specific
-if isinstance(df, pd.DataFrame):
-    result = pd.concat([df1, df2])
-```
-
-### Type Annotations
-```python
-# ✅ Good - Clear types with imports
-from typing import Optional, Union, TYPE_CHECKING
-if TYPE_CHECKING:
-    import cudf
-
-def process(df: Union[pd.DataFrame, 'cudf.DataFrame']) -> Optional[pd.DataFrame]:
-    return df if len(df) > 0 else None
-```
-
-## 🎯 Development Workflow
-
-### Before Starting
-1. Read CLAUDE.md for general context
-2. Run baseline: `./bin/lint.sh && ./bin/mypy.sh`
-3. Load specific guide if needed
-
-### During Development  
-1. Follow functional programming patterns
-2. Add type annotations to new code
-3. Use appropriate priority (P0-P5) for issues
-4. Track complex work in plans/
-
-### Before Committing
-1. Run Docker tests: `cd docker && WITH_BUILD=0 ./test-cpu-local.sh`
-2. Update CHANGELOG.md under `## [Development]` for user-visible changes:
-   - **Added**: New features, predicates, call methods, API additions
-   - **Fixed**: Bug fixes, breaking changes resolved
-   - **Changed**: Behavior changes, deprecations
-   - **Breaking 🔥**: API changes that require user code updates
-   - **Docs**: Documentation improvements, examples, tutorials
-   - **Infra**: CI/CD, testing infrastructure, build system changes
-   - **Security**: Security fixes and improvements
-   - **Perf**: Performance improvements with benchmarks
-   - Include PR/issue numbers, examples, and impact descriptions
-   - Omit: Internal refactorings, test updates, type-only changes
-3. Use conventional commit: `fix(scope): description` (see `prompts/CONVENTIONAL_COMMITS.md`)
-4. Remove debug code and Claude comments
-
-## 📝 Task Planning & Tracking
-
-For multi-session or complex work:
-```
-plans/task_name/
-├── implementation_plan.md  # Phases and approach
-├── progress.md            # Current status (update each session)
-├── insights.md            # Learnings and recommendations
-└── [task-specific files]  # Test results, benchmarks, etc.
-```
-
-### Example Task Names
-- `add_gfql_caching`
-- `fix_gpu_memory_leak`  
-- `implement_new_layout`
-- `optimize_umap_performance`
\ No newline at end of file