From 6e9e6f300d434a9f63a63819897fb122d8f6cc81 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Mon, 29 Dec 2025 06:55:28 -0800 Subject: [PATCH 01/30] chore: clarify plan location in template --- ai/prompts/PLAN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai/prompts/PLAN.md b/ai/prompts/PLAN.md index d31d05227d..57a02a19e2 100644 --- a/ai/prompts/PLAN.md +++ b/ai/prompts/PLAN.md @@ -125,7 +125,7 @@ git log --oneline -n 10 - Source: `graphistry/` - Tests: `graphistry/tests/` (mirrors source structure: `graphistry/foo/bar.py` β†’ `graphistry/tests/foo/test_bar.py`) - Docs: `docs/` -- Plans: `plans/` (gitignored - safe for auxiliary files, temp secrets, working data) +- Plans: `plans/` (gitignored - safe for auxiliary files, temp secrets, working data; Codex: avoid `~/.codex/plans`; if used, copy here then delete) - AI prompts: `ai/prompts/` - AI docs: `ai/docs/` From 209b2a3fa74c18aedc9f6755012e94045124fc3c Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Mon, 29 Dec 2025 06:55:42 -0800 Subject: [PATCH 02/30] feat: add collections validation and gfql support --- graphistry/Plottable.py | 12 + graphistry/PlotterBase.py | 50 ++- graphistry/__init__.py | 1 + graphistry/pygraphistry.py | 22 ++ graphistry/tests/test_collections.py | 117 ++++++ graphistry/validate/validate_collections.py | 386 ++++++++++++++++++++ 6 files changed, 587 insertions(+), 1 deletion(-) create mode 100644 graphistry/tests/test_collections.py create mode 100644 graphistry/validate/validate_collections.py diff --git a/graphistry/Plottable.py b/graphistry/Plottable.py index 18ed6cc67a..bc83b2e68c 100644 --- a/graphistry/Plottable.py +++ b/graphistry/Plottable.py @@ -783,6 +783,18 @@ def settings(self, ) -> 'Plottable': ... + def collections( + self, + collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None, + show_collections: Optional[bool] = None, + collections_global_node_color: Optional[str] = None, + collections_global_edge_color: Optional[str] = None, + encode: bool = True, + validate: ValidationParam = 'autofix', + warn: bool = True + ) -> 'Plottable': + ... + def privacy(self, mode: Optional[PrivacyMode] = None, notify: Optional[bool] = None, invited_users: Optional[List[str]] = None, message: Optional[str] = None, mode_action: Optional[ModeAction] = None) -> 'Plottable': ... diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py index 6b4f6f2ac3..7520580eed 100644 --- a/graphistry/PlotterBase.py +++ b/graphistry/PlotterBase.py @@ -1846,6 +1846,51 @@ def settings(self, height=None, url_params={}, render=None): return res + def collections( + self, + collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None, + show_collections: Optional[bool] = None, + collections_global_node_color: Optional[str] = None, + collections_global_edge_color: Optional[str] = None, + encode: bool = True, + validate: ValidationParam = 'autofix', + warn: bool = True + ) -> 'Plottable': + """Set collections URL parameters. Additive over previous settings. + + :param collections: List/dict of collections or JSON/URL-encoded JSON string. + :param show_collections: Toggle collections panel display. + :param collections_global_node_color: Hex color for non-collection nodes (leading # stripped). + :param collections_global_edge_color: Hex color for non-collection edges (leading # stripped). + :param encode: If True, JSON-minify and URL-encode collections. Use False for pre-encoded strings. + :param validate: Validation mode. 'autofix' (default) warns on issues, 'strict' raises on issues. + :param warn: Whether to emit warnings when validate='autofix'. validate=False forces warn=False. + """ + from graphistry.validate.validate_collections import encode_collections, normalize_collections + + def strip_hash(value: str) -> str: + return value[1:] if value.startswith('#') else value + + settings: Dict[str, Any] = {} + if collections is not None: + normalized = normalize_collections(collections, validate=validate, warn=warn) + if isinstance(collections, str) and not encode: + settings['collections'] = collections + else: + settings['collections'] = encode_collections(normalized, encode=encode) + if show_collections is not None: + settings['showCollections'] = show_collections + if collections_global_node_color is not None: + settings['collectionsGlobalNodeColor'] = strip_hash(collections_global_node_color) + if collections_global_edge_color is not None: + settings['collectionsGlobalEdgeColor'] = strip_hash(collections_global_edge_color) + + if len(settings.keys()) > 0: + return self.settings(url_params={**self._url_params, **settings}) + else: + return self + + def privacy( self, mode: Optional[Mode] = None, @@ -2191,7 +2236,10 @@ def plot( 'viztoken': str(uuid.uuid4()) } - viz_url = self._pygraphistry._viz_url(info, self._url_params) + from graphistry.validate.validate_collections import normalize_collections_url_params + + url_params = normalize_collections_url_params(self._url_params, validate=validate_mode, warn=warn) + viz_url = self._pygraphistry._viz_url(info, url_params) cfg_client_protocol_hostname = self.session.client_protocol_hostname full_url = ('%s:%s' % (self.session.protocol, viz_url)) if cfg_client_protocol_hostname is None else viz_url diff --git a/graphistry/__init__.py b/graphistry/__init__.py index 954713b346..a3cc2a64f8 100644 --- a/graphistry/__init__.py +++ b/graphistry/__init__.py @@ -23,6 +23,7 @@ nodes, graph, settings, + collections, encode_point_color, encode_point_size, encode_point_icon, diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py index 6a8ae4aaa9..2e339ff188 100644 --- a/graphistry/pygraphistry.py +++ b/graphistry/pygraphistry.py @@ -5,6 +5,7 @@ from graphistry.plugins_types.hypergraph import HypergraphResult from graphistry.client_session import ClientSession, ApiVersion, ENV_GRAPHISTRY_API_KEY, DatasetInfo, AuthManagerProtocol, strtobool from graphistry.Engine import EngineAbstractType +from graphistry.models.types import ValidationParam """Top-level import of class PyGraphistry as "Graphistry". Used to connect to the Graphistry server and then create a base plotter.""" import calendar, copy, gzip, io, json, numpy as np, pandas as pd, requests, sys, time, warnings @@ -2274,6 +2275,26 @@ def settings(self, height=None, url_params={}, render=None): return self._plotter().settings(height, url_params, render) + def collections( + self, + collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None, + show_collections: Optional[bool] = None, + collections_global_node_color: Optional[str] = None, + collections_global_edge_color: Optional[str] = None, + encode: bool = True, + validate: ValidationParam = 'autofix', + warn: bool = True + ): + return self._plotter().collections( + collections=collections, + show_collections=show_collections, + collections_global_node_color=collections_global_node_color, + collections_global_edge_color=collections_global_edge_color, + encode=encode, + validate=validate, + warn=warn + ) + def _viz_url(self, info: DatasetInfo, url_params: Dict[str, Any]) -> str: splash_time = int(calendar.timegm(time.gmtime())) + 15 extra = "&".join([k + "=" + str(v) for k, v in list(url_params.items())]) @@ -2501,6 +2522,7 @@ def _handle_api_response(self, response): pipe = PyGraphistry.pipe graph = PyGraphistry.graph settings = PyGraphistry.settings +collections = PyGraphistry.collections hypergraph = PyGraphistry.hypergraph bolt = PyGraphistry.bolt cypher = PyGraphistry.cypher diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py new file mode 100644 index 0000000000..019337af12 --- /dev/null +++ b/graphistry/tests/test_collections.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- + +import json +import pytest +from urllib.parse import quote, unquote + +import graphistry +from graphistry.validate.validate_collections import normalize_collections_url_params + + +def decode_collections(encoded: str): + return json.loads(unquote(encoded)) + + +def test_collections_encodes_and_normalizes(): + g = graphistry.bind() + node_filter = graphistry.n({"subscribed_to_newsletter": True}) + collections = [ + { + "type": "set", + "id": "newsletter_subscribers", + "name": "Newsletter Subscribers", + "node_color": "#32CD32", + "expr": { + "type": "gfql_chain", + "gfql": [node_filter], + }, + } + ] + + g2 = g.collections( + collections=collections, + show_collections=True, + collections_global_node_color="#00FF00", + collections_global_edge_color="#00AA00", + ) + + decoded = decode_collections(g2._url_params["collections"]) + assert decoded == [ + { + "type": "set", + "id": "newsletter_subscribers", + "name": "Newsletter Subscribers", + "node_color": "#32CD32", + "expr": { + "type": "gfql_chain", + "gfql": [node_filter.to_json()], + }, + } + ] + assert g2._url_params["showCollections"] is True + assert g2._url_params["collectionsGlobalNodeColor"] == "00FF00" + assert g2._url_params["collectionsGlobalEdgeColor"] == "00AA00" + + +def test_collections_accepts_chain_and_preserves_dataset_id(): + chain = graphistry.Chain([graphistry.n({"type": "user"})]) + g1 = graphistry.bind(dataset_id="dataset_123") + + g2 = g1.collections(collections={"type": "set", "expr": chain}) + + decoded = decode_collections(g2._url_params["collections"]) + assert decoded == [ + { + "type": "set", + "expr": { + "type": "gfql_chain", + "gfql": [graphistry.n({"type": "user"}).to_json()], + }, + } + ] + assert g2._dataset_id == "dataset_123" + + +def test_collections_encode_false_keeps_string(): + raw = json.dumps([{"type": "intersection", "expr": {"type": "intersection", "sets": ["a"]}}], separators=(",", ":")) + encoded = quote(raw, safe="") + g2 = graphistry.bind().collections(collections=encoded, encode=False) + assert g2._url_params["collections"] == encoded + + +def test_collections_accepts_wire_protocol_chain(): + chain_json = { + "type": "Chain", + "chain": [ + { + "type": "Node", + "filter_dict": { + "type": "user" + } + } + ] + } + g2 = graphistry.bind().collections(collections={"type": "set", "expr": chain_json}) + decoded = decode_collections(g2._url_params["collections"]) + assert decoded == [ + { + "type": "set", + "expr": { + "type": "gfql_chain", + "gfql": chain_json["chain"], + }, + } + ] + + +def test_collections_validation_strict_raises(): + bad_collections = [{"type": "set", "expr": [{"filter_dict": {"a": 1}}]}] + with pytest.raises(ValueError): + graphistry.bind().collections(collections=bad_collections, validate="strict") + + +def test_plot_url_param_validation_autofix_warns(): + bad = '[{"type":"set","expr":[{"filter_dict":{"a":1}}]}]' + with pytest.warns(RuntimeWarning): + normalized = normalize_collections_url_params({"collections": bad}, validate="autofix", warn=True) + assert "collections" not in normalized or normalized["collections"].startswith("%5B") diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py new file mode 100644 index 0000000000..310fc3dc75 --- /dev/null +++ b/graphistry/validate/validate_collections.py @@ -0,0 +1,386 @@ +import json +from typing import Any, Dict, List, Optional, Tuple, Union +from urllib.parse import quote, unquote + +from graphistry.client_session import strtobool +from graphistry.models.types import ValidationMode, ValidationParam +from graphistry.util import warn as emit_warn + +CollectionsInput = Union[str, Dict[str, Any], List[Dict[str, Any]]] + + +def normalize_validation_params( + validate: ValidationParam = 'autofix', + warn: bool = True +) -> Tuple[ValidationMode, bool]: + if validate is True: + validate_mode: ValidationMode = 'strict' + elif validate is False: + validate_mode = 'autofix' + warn = False + else: + validate_mode = validate + return validate_mode, warn + + +def encode_collections(collections: List[Dict[str, Any]], encode: bool = True) -> str: + json_str = json.dumps(collections, separators=(',', ':'), ensure_ascii=True) + return quote(json_str, safe='') if encode else json_str + + +def _issue( + message: str, + data: Optional[Dict[str, Any]], + validate_mode: ValidationMode, + warn: bool +) -> None: + error = ValueError({'message': message, 'data': data} if data else {'message': message}) + if validate_mode in ('strict', 'strict-fast'): + raise error + if warn and validate_mode == 'autofix': + emit_warn(f"Collections validation warning: {message} ({data})") + + +def _parse_collections_input( + collections: CollectionsInput, + validate_mode: ValidationMode, + warn: bool +) -> List[Dict[str, Any]]: + if isinstance(collections, list): + return collections + if isinstance(collections, dict): + return [collections] + if isinstance(collections, str): + try: + parsed = json.loads(collections) + except json.JSONDecodeError: + try: + parsed = json.loads(unquote(collections)) + except json.JSONDecodeError as exc: + _issue('Collections string must be JSON or URL-encoded JSON', {'error': str(exc)}, validate_mode, warn) + return [] + if isinstance(parsed, list): + return parsed + if isinstance(parsed, dict): + return [parsed] + _issue('Collections JSON must be a list or dict', {'type': type(parsed).__name__}, validate_mode, warn) + return [] + _issue('Collections must be a list, dict, or JSON string', {'type': type(collections).__name__}, validate_mode, warn) + return [] + + +def _coerce_str_field( + entry: Dict[str, Any], + key: str, + validate_mode: ValidationMode, + warn: bool, + entry_index: int +) -> None: + if key not in entry or entry[key] is None: + return + if isinstance(entry[key], str): + return + _issue( + f'Collection field "{key}" should be a string', + {'index': entry_index, 'value': entry[key], 'type': type(entry[key]).__name__}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + entry[key] = str(entry[key]) + + +def _normalize_sets_list( + sets_value: Any, + validate_mode: ValidationMode, + warn: bool, + entry_index: int +) -> Optional[List[str]]: + if not isinstance(sets_value, list): + _issue( + 'Intersection sets must be a list of strings', + {'index': entry_index, 'value': sets_value, 'type': type(sets_value).__name__}, + validate_mode, + warn + ) + return None + out: List[str] = [] + for set_id in sets_value: + if isinstance(set_id, str): + out.append(set_id) + continue + _issue( + 'Intersection set IDs must be strings', + {'index': entry_index, 'value': set_id, 'type': type(set_id).__name__}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + out.append(str(set_id)) + return out + + +def _normalize_gfql_ops( + gfql_ops: Any, + validate_mode: ValidationMode, + warn: bool, + entry_index: int +) -> Optional[List[Dict[str, Any]]]: + from graphistry.compute.ast import ASTObject, from_json as ast_from_json + from graphistry.compute.chain import Chain + + if gfql_ops is None: + _issue('GFQL chain is missing', {'index': entry_index}, validate_mode, warn) + return None + if isinstance(gfql_ops, str): + try: + gfql_ops = json.loads(gfql_ops) + except json.JSONDecodeError as exc: + _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn) + return None + if isinstance(gfql_ops, Chain): + ops = gfql_ops.to_json().get('chain', []) + elif isinstance(gfql_ops, ASTObject): + ops = [gfql_ops.to_json()] + elif isinstance(gfql_ops, dict): + if 'chain' in gfql_ops: + ops = gfql_ops.get('chain', []) + else: + ops = [gfql_ops] + elif isinstance(gfql_ops, list): + ops = [] + for op in gfql_ops: + if isinstance(op, ASTObject): + ops.append(op.to_json()) + elif isinstance(op, dict): + ops.append(op) + else: + _issue( + 'GFQL operations must be AST objects or dicts', + {'index': entry_index, 'value': op, 'type': type(op).__name__}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + continue + return None + else: + _issue( + 'GFQL operations must be a Chain, AST object, list, or dict', + {'index': entry_index, 'type': type(gfql_ops).__name__}, + validate_mode, + warn + ) + return None + + normalized_ops: List[Dict[str, Any]] = [] + for op in ops: + if not isinstance(op, dict): + _issue( + 'GFQL operations must be dictionaries after normalization', + {'index': entry_index, 'value': op, 'type': type(op).__name__}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + continue + return None + try: + ast_from_json(op, validate=True) + except Exception as exc: + _issue( + 'Invalid GFQL operation in collection', + {'index': entry_index, 'op': op, 'error': str(exc)}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + continue + return None + normalized_ops.append(op) + + if len(normalized_ops) == 0: + _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn) + return None + return normalized_ops + + +def _normalize_gfql_expr( + expr: Any, + validate_mode: ValidationMode, + warn: bool, + entry_index: int +) -> Optional[Dict[str, Any]]: + if isinstance(expr, dict): + if expr.get('type') == 'intersection': + _issue('Set collection expr cannot be intersection', {'index': entry_index}, validate_mode, warn) + return None + if 'gfql' in expr or expr.get('type') == 'gfql_chain': + ops = _normalize_gfql_ops(expr.get('gfql'), validate_mode, warn, entry_index) + if ops is None: + return None + return {'type': 'gfql_chain', 'gfql': ops} + if 'chain' in expr: + ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index) + if ops is None: + return None + return {'type': 'gfql_chain', 'gfql': ops} + if 'type' in expr: + ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index) + if ops is None: + return None + return {'type': 'gfql_chain', 'gfql': ops} + ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index) + if ops is None: + return None + return {'type': 'gfql_chain', 'gfql': ops} + + +def _normalize_intersection_expr( + expr: Any, + validate_mode: ValidationMode, + warn: bool, + entry_index: int +) -> Optional[Dict[str, Any]]: + if not isinstance(expr, dict): + _issue('Intersection expr must be a dict', {'index': entry_index}, validate_mode, warn) + return None + expr_type = expr.get('type', 'intersection') + if expr_type != 'intersection': + _issue( + 'Intersection expr type must be "intersection"', + {'index': entry_index, 'value': expr_type}, + validate_mode, + warn + ) + return None + sets_value = expr.get('sets', expr.get('intersection')) + if sets_value is None: + _issue('Intersection expr missing "sets"', {'index': entry_index}, validate_mode, warn) + return None + sets_list = _normalize_sets_list(sets_value, validate_mode, warn, entry_index) + if sets_list is None: + return None + return {'type': 'intersection', 'sets': sets_list} + + +def normalize_collections( + collections: CollectionsInput, + validate: ValidationParam = 'autofix', + warn: bool = True +) -> List[Dict[str, Any]]: + validate_mode, warn = normalize_validation_params(validate, warn) + items = _parse_collections_input(collections, validate_mode, warn) + + normalized: List[Dict[str, Any]] = [] + for idx, entry in enumerate(items): + if not isinstance(entry, dict): + _issue( + 'Collection entries must be dictionaries', + {'index': idx, 'type': type(entry).__name__}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + continue + return [] + + normalized_entry = dict(entry) + collection_type = normalized_entry.get('type', 'set') + if not isinstance(collection_type, str): + _issue( + 'Collection type must be a string', + {'index': idx, 'value': collection_type, 'type': type(collection_type).__name__}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + collection_type = str(collection_type) + else: + continue + collection_type = collection_type.lower() + normalized_entry['type'] = collection_type + + if collection_type not in ('set', 'intersection'): + _issue( + 'Collection type must be "set" or "intersection"', + {'index': idx, 'value': collection_type}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + continue + return [] + + for field in ('id', 'name', 'description', 'node_color', 'edge_color'): + _coerce_str_field(normalized_entry, field, validate_mode, warn, idx) + + expr = normalized_entry.get('expr') + if collection_type == 'intersection': + normalized_expr = _normalize_intersection_expr(expr, validate_mode, warn, idx) + else: + normalized_expr = _normalize_gfql_expr(expr, validate_mode, warn, idx) + if normalized_expr is None: + if validate_mode == 'autofix': + continue + return [] + normalized_entry['expr'] = normalized_expr + normalized.append(normalized_entry) + + return normalized + + +def normalize_collections_url_params( + url_params: Dict[str, Any], + validate: ValidationParam = 'autofix', + warn: bool = True +) -> Dict[str, Any]: + validate_mode, warn = normalize_validation_params(validate, warn) + updated = dict(url_params) + + if 'collections' in updated: + normalized = normalize_collections(updated['collections'], validate_mode, warn) + if len(normalized) > 0: + updated['collections'] = encode_collections(normalized, encode=True) + else: + if validate_mode in ('strict', 'strict-fast'): + return updated + updated.pop('collections', None) + + if 'showCollections' in updated: + value = updated['showCollections'] + if isinstance(value, bool): + pass + else: + try: + updated['showCollections'] = strtobool(value) + except Exception as exc: + _issue( + 'showCollections must be a boolean', + {'value': value, 'error': str(exc)}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + updated.pop('showCollections', None) + + for color_key in ('collectionsGlobalNodeColor', 'collectionsGlobalEdgeColor'): + if color_key in updated: + value = updated[color_key] + if value is None: + updated.pop(color_key, None) + continue + if not isinstance(value, str): + _issue( + f'{color_key} must be a string', + {'value': value, 'type': type(value).__name__}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + value = str(value) + if isinstance(value, str) and value.startswith('#'): + value = value[1:] + updated[color_key] = value + + return updated From 673eed8d8ebc77def71cdb23ac20e63817c85348 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Mon, 29 Dec 2025 06:57:35 -0800 Subject: [PATCH 03/30] fix: satisfy mypy in collections validation --- graphistry/validate/validate_collections.py | 26 +++++++++++++++------ 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 310fc3dc75..0369477146 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -138,22 +138,23 @@ def _normalize_gfql_ops( except json.JSONDecodeError as exc: _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn) return None + ops_raw: Any if isinstance(gfql_ops, Chain): - ops = gfql_ops.to_json().get('chain', []) + ops_raw = gfql_ops.to_json().get('chain', []) elif isinstance(gfql_ops, ASTObject): - ops = [gfql_ops.to_json()] + ops_raw = [gfql_ops.to_json()] elif isinstance(gfql_ops, dict): if 'chain' in gfql_ops: - ops = gfql_ops.get('chain', []) + ops_raw = gfql_ops.get('chain', []) else: - ops = [gfql_ops] + ops_raw = [gfql_ops] elif isinstance(gfql_ops, list): - ops = [] + ops_raw = [] for op in gfql_ops: if isinstance(op, ASTObject): - ops.append(op.to_json()) + ops_raw.append(op.to_json()) elif isinstance(op, dict): - ops.append(op) + ops_raw.append(op) else: _issue( 'GFQL operations must be AST objects or dicts', @@ -173,6 +174,17 @@ def _normalize_gfql_ops( ) return None + if not isinstance(ops_raw, list): + _issue( + 'GFQL operations must be a list', + {'index': entry_index, 'value': ops_raw, 'type': type(ops_raw).__name__}, + validate_mode, + warn + ) + return None + + ops: List[Any] = ops_raw + normalized_ops: List[Dict[str, Any]] = [] for op in ops: if not isinstance(op, dict): From e1d05263470d3ee73d476aab0e0e1493669980ae Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Tue, 30 Dec 2025 13:49:56 -0800 Subject: [PATCH 04/30] feat: add collections helper constructors --- graphistry/__init__.py | 7 +++ graphistry/collections_helpers.py | 82 ++++++++++++++++++++++++++++ graphistry/tests/test_collections.py | 24 ++++++++ 3 files changed, 113 insertions(+) create mode 100644 graphistry/collections_helpers.py diff --git a/graphistry/__init__.py b/graphistry/__init__.py index a3cc2a64f8..4058939b5f 100644 --- a/graphistry/__init__.py +++ b/graphistry/__init__.py @@ -62,6 +62,13 @@ from_cugraph ) +from graphistry.collections_helpers import ( + collection_set, + collection_intersection, + CollectionSet, + CollectionIntersection, +) + from graphistry.compute import ( n, e, e_forward, e_reverse, e_undirected, let, ref, diff --git a/graphistry/collections_helpers.py b/graphistry/collections_helpers.py new file mode 100644 index 0000000000..10576e0c0d --- /dev/null +++ b/graphistry/collections_helpers.py @@ -0,0 +1,82 @@ +from typing import Any, Dict, List, Optional, Sequence, TypedDict, Union + + +class CollectionSet(TypedDict, total=False): + """Collection definition for a GFQL-defined set.""" + type: str + id: str + name: str + description: str + node_color: str + edge_color: str + expr: Any + + +class CollectionIntersection(TypedDict, total=False): + """Collection definition for an intersection of sets.""" + type: str + id: str + name: str + description: str + node_color: str + edge_color: str + expr: Dict[str, Any] + + +Collection = Union[CollectionSet, CollectionIntersection] +CollectionsInput = Union[str, Collection, List[Collection], Dict[str, Any], List[Dict[str, Any]]] + + +def _apply_collection_metadata( + collection: Dict[str, Any], + id: Optional[str], + name: Optional[str], + description: Optional[str], + node_color: Optional[str], + edge_color: Optional[str], +) -> Dict[str, Any]: + if id is not None: + collection["id"] = id + if name is not None: + collection["name"] = name + if description is not None: + collection["description"] = description + if node_color is not None: + collection["node_color"] = node_color + if edge_color is not None: + collection["edge_color"] = edge_color + return collection + + +def collection_set( + *, + expr: Any, + id: Optional[str] = None, + name: Optional[str] = None, + description: Optional[str] = None, + node_color: Optional[str] = None, + edge_color: Optional[str] = None, +) -> CollectionSet: + """Build a collection dict for a GFQL-defined set.""" + collection: Dict[str, Any] = {"type": "set", "expr": expr} + return _apply_collection_metadata(collection, id, name, description, node_color, edge_color) # type: ignore[return-value] + + +def collection_intersection( + *, + sets: Sequence[str], + id: Optional[str] = None, + name: Optional[str] = None, + description: Optional[str] = None, + node_color: Optional[str] = None, + edge_color: Optional[str] = None, +) -> CollectionIntersection: + """Build a collection dict for an intersection of set IDs.""" + collection: Dict[str, Any] = { + "type": "intersection", + "expr": { + "type": "intersection", + "sets": list(sets), + }, + } + return _apply_collection_metadata(collection, id, name, description, node_color, edge_color) # type: ignore[return-value] diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index 019337af12..ad05dbfbb8 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -5,6 +5,7 @@ from urllib.parse import quote, unquote import graphistry +from graphistry.collections_helpers import collection_intersection, collection_set from graphistry.validate.validate_collections import normalize_collections_url_params @@ -53,6 +54,29 @@ def test_collections_encodes_and_normalizes(): assert g2._url_params["collectionsGlobalEdgeColor"] == "00AA00" +def test_collection_helpers_build_sets_and_intersections(): + collections = [ + collection_set( + expr=[graphistry.n({"vip": True})], + id="vip", + name="VIP", + node_color="#FFAA00", + ), + collection_intersection( + sets=["vip"], + name="VIP Intersection", + node_color="#00BFFF", + ), + ] + + g2 = graphistry.bind().collections(collections=collections) + decoded = decode_collections(g2._url_params["collections"]) + assert decoded[0]["type"] == "set" + assert decoded[0]["expr"]["type"] == "gfql_chain" + assert decoded[1]["type"] == "intersection" + assert decoded[1]["expr"] == {"type": "intersection", "sets": ["vip"]} + + def test_collections_accepts_chain_and_preserves_dataset_id(): chain = graphistry.Chain([graphistry.n({"type": "user"})]) g1 = graphistry.bind(dataset_id="dataset_123") From 858ff91ff7d972d62a1d4ad6c0c004c137e3b811 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Tue, 30 Dec 2025 14:12:28 -0800 Subject: [PATCH 05/30] feat: wrap collection set expr to gfql chain --- graphistry/collections_helpers.py | 41 +++++++++++++++++++++++++++- graphistry/tests/test_collections.py | 6 ++++ 2 files changed, 46 insertions(+), 1 deletion(-) diff --git a/graphistry/collections_helpers.py b/graphistry/collections_helpers.py index 10576e0c0d..2ac2e11054 100644 --- a/graphistry/collections_helpers.py +++ b/graphistry/collections_helpers.py @@ -48,6 +48,45 @@ def _apply_collection_metadata( return collection +def _wrap_gfql_expr(expr: Any) -> Any: + if expr is None: + return expr + + from graphistry.compute.ast import ASTObject + from graphistry.compute.chain import Chain + + if isinstance(expr, dict): + expr_type = expr.get("type") + if expr_type == "intersection": + return expr + if expr_type == "gfql_chain" and "gfql" in expr: + return expr + if expr_type == "Chain" and "chain" in expr: + return {"type": "gfql_chain", "gfql": expr.get("chain", [])} + if "gfql" in expr: + return {"type": "gfql_chain", "gfql": expr.get("gfql")} + if "chain" in expr: + return {"type": "gfql_chain", "gfql": expr.get("chain")} + return {"type": "gfql_chain", "gfql": [expr]} + + if isinstance(expr, Chain): + return {"type": "gfql_chain", "gfql": expr.to_json().get("chain", [])} + + if isinstance(expr, ASTObject): + return {"type": "gfql_chain", "gfql": [expr.to_json()]} + + if isinstance(expr, list): + ops: List[Any] = [] + for op in expr: + if isinstance(op, ASTObject): + ops.append(op.to_json()) + else: + ops.append(op) + return {"type": "gfql_chain", "gfql": ops} + + return expr + + def collection_set( *, expr: Any, @@ -58,7 +97,7 @@ def collection_set( edge_color: Optional[str] = None, ) -> CollectionSet: """Build a collection dict for a GFQL-defined set.""" - collection: Dict[str, Any] = {"type": "set", "expr": expr} + collection: Dict[str, Any] = {"type": "set", "expr": _wrap_gfql_expr(expr)} return _apply_collection_metadata(collection, id, name, description, node_color, edge_color) # type: ignore[return-value] diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index ad05dbfbb8..dde8671e84 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -77,6 +77,12 @@ def test_collection_helpers_build_sets_and_intersections(): assert decoded[1]["expr"] == {"type": "intersection", "sets": ["vip"]} +def test_collection_set_wraps_ast_expr(): + collection = collection_set(expr=graphistry.n({"vip": True}), id="vip") + assert collection["expr"]["type"] == "gfql_chain" + assert collection["expr"]["gfql"][0]["type"] == "Node" + + def test_collections_accepts_chain_and_preserves_dataset_id(): chain = graphistry.Chain([graphistry.n({"type": "user"})]) g1 = graphistry.bind(dataset_id="dataset_123") From 6b5370c3c9ffe65fe4dcd5f3262521a35c012d2a Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Fri, 2 Jan 2026 01:48:21 -0800 Subject: [PATCH 06/30] Refine collections types and helpers --- graphistry/Plottable.py | 3 +- graphistry/PlotterBase.py | 3 +- graphistry/__init__.py | 2 +- graphistry/collections.py | 99 ++++++++++++++++ graphistry/collections_helpers.py | 121 -------------------- graphistry/models/collections.py | 61 ++++++++++ graphistry/pygraphistry.py | 3 +- graphistry/tests/test_collections.py | 85 +++++++------- graphistry/validate/validate_collections.py | 55 ++++++++- 9 files changed, 262 insertions(+), 170 deletions(-) create mode 100644 graphistry/collections.py delete mode 100644 graphistry/collections_helpers.py create mode 100644 graphistry/models/collections.py diff --git a/graphistry/Plottable.py b/graphistry/Plottable.py index bc83b2e68c..271392f56f 100644 --- a/graphistry/Plottable.py +++ b/graphistry/Plottable.py @@ -17,6 +17,7 @@ from graphistry.Engine import EngineAbstractType from graphistry.utils.json import JSONVal from graphistry.client_session import ClientSession, AuthManagerProtocol +from graphistry.models.collections import CollectionsInput from graphistry.models.types import ValidationParam if TYPE_CHECKING: @@ -785,7 +786,7 @@ def settings(self, def collections( self, - collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None, + collections: Optional[CollectionsInput] = None, show_collections: Optional[bool] = None, collections_global_node_color: Optional[str] = None, collections_global_edge_color: Optional[str] = None, diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py index 7520580eed..2497a47437 100644 --- a/graphistry/PlotterBase.py +++ b/graphistry/PlotterBase.py @@ -2,6 +2,7 @@ from typing import Any, Callable, Dict, List, Optional, Union, Tuple, cast, overload, TYPE_CHECKING from typing_extensions import Literal from graphistry.io.types import ComplexEncodingsDict +from graphistry.models.collections import CollectionsInput from graphistry.models.types import ValidationMode, ValidationParam from graphistry.plugins_types.hypergraph import HypergraphResult from graphistry.render.resolve_render_mode import resolve_render_mode @@ -1848,7 +1849,7 @@ def settings(self, height=None, url_params={}, render=None): def collections( self, - collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None, + collections: Optional[CollectionsInput] = None, show_collections: Optional[bool] = None, collections_global_node_color: Optional[str] = None, collections_global_edge_color: Optional[str] = None, diff --git a/graphistry/__init__.py b/graphistry/__init__.py index 4058939b5f..ca4ceda33c 100644 --- a/graphistry/__init__.py +++ b/graphistry/__init__.py @@ -62,7 +62,7 @@ from_cugraph ) -from graphistry.collections_helpers import ( +from graphistry.collections import ( collection_set, collection_intersection, CollectionSet, diff --git a/graphistry/collections.py b/graphistry/collections.py new file mode 100644 index 0000000000..a11198c0bf --- /dev/null +++ b/graphistry/collections.py @@ -0,0 +1,99 @@ +from typing import List, Optional, Sequence, TypeVar, cast + +from graphistry.models.collections import ( + CollectionIntersection, + CollectionSet, + GFQLChainInput, + GFQLChainWire, + GFQLWireOp, +) + +CollectionDict = TypeVar("CollectionDict", CollectionSet, CollectionIntersection) + + +def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[str]) -> CollectionDict: + collection.update({key: value for key, value in metadata.items() if value is not None}) + return collection + + +def _wrap_gfql_expr(expr: GFQLChainInput) -> GFQLChainWire: + + from graphistry.compute.ast import ASTObject + from graphistry.compute.chain import Chain + + if isinstance(expr, dict): + expr_type = expr.get("type") + if expr_type == "gfql_chain" and "gfql" in expr: + return expr + if expr_type == "Chain" and "chain" in expr: + return {"type": "gfql_chain", "gfql": expr.get("chain", [])} + if "gfql" in expr: + return {"type": "gfql_chain", "gfql": expr.get("gfql")} + if "chain" in expr: + return {"type": "gfql_chain", "gfql": expr.get("chain")} + return {"type": "gfql_chain", "gfql": [expr]} + + if isinstance(expr, Chain): + return {"type": "gfql_chain", "gfql": expr.to_json().get("chain", [])} + + if isinstance(expr, ASTObject): + return {"type": "gfql_chain", "gfql": [expr.to_json()]} + + if isinstance(expr, list): + ops: List[GFQLWireOp] = [] + for op in expr: + if isinstance(op, ASTObject): + ops.append(op.to_json()) + else: + ops.append(cast(GFQLWireOp, op)) + return {"type": "gfql_chain", "gfql": ops} + + return {"type": "gfql_chain", "gfql": [cast(GFQLWireOp, expr)]} + + +def collection_set( + *, + expr: GFQLChainInput, + id: Optional[str] = None, + name: Optional[str] = None, + description: Optional[str] = None, + node_color: Optional[str] = None, + edge_color: Optional[str] = None, +) -> CollectionSet: + """Build a collection dict for a GFQL-defined set.""" + collection: CollectionSet = {"type": "set", "expr": _wrap_gfql_expr(expr)} + return _apply_collection_metadata( + collection, + id=id, + name=name, + description=description, + node_color=node_color, + edge_color=edge_color, + ) + + +def collection_intersection( + *, + sets: Sequence[str], + id: Optional[str] = None, + name: Optional[str] = None, + description: Optional[str] = None, + node_color: Optional[str] = None, + edge_color: Optional[str] = None, +) -> CollectionIntersection: + """Build a collection dict for an intersection of set IDs.""" + collection: CollectionIntersection = { + "type": "intersection", + "expr": { + "type": "intersection", + "sets": list(sets), + }, + } + return _apply_collection_metadata( + collection, + id=id, + name=name, + description=description, + node_color=node_color, + edge_color=edge_color, + ) diff --git a/graphistry/collections_helpers.py b/graphistry/collections_helpers.py deleted file mode 100644 index 2ac2e11054..0000000000 --- a/graphistry/collections_helpers.py +++ /dev/null @@ -1,121 +0,0 @@ -from typing import Any, Dict, List, Optional, Sequence, TypedDict, Union - - -class CollectionSet(TypedDict, total=False): - """Collection definition for a GFQL-defined set.""" - type: str - id: str - name: str - description: str - node_color: str - edge_color: str - expr: Any - - -class CollectionIntersection(TypedDict, total=False): - """Collection definition for an intersection of sets.""" - type: str - id: str - name: str - description: str - node_color: str - edge_color: str - expr: Dict[str, Any] - - -Collection = Union[CollectionSet, CollectionIntersection] -CollectionsInput = Union[str, Collection, List[Collection], Dict[str, Any], List[Dict[str, Any]]] - - -def _apply_collection_metadata( - collection: Dict[str, Any], - id: Optional[str], - name: Optional[str], - description: Optional[str], - node_color: Optional[str], - edge_color: Optional[str], -) -> Dict[str, Any]: - if id is not None: - collection["id"] = id - if name is not None: - collection["name"] = name - if description is not None: - collection["description"] = description - if node_color is not None: - collection["node_color"] = node_color - if edge_color is not None: - collection["edge_color"] = edge_color - return collection - - -def _wrap_gfql_expr(expr: Any) -> Any: - if expr is None: - return expr - - from graphistry.compute.ast import ASTObject - from graphistry.compute.chain import Chain - - if isinstance(expr, dict): - expr_type = expr.get("type") - if expr_type == "intersection": - return expr - if expr_type == "gfql_chain" and "gfql" in expr: - return expr - if expr_type == "Chain" and "chain" in expr: - return {"type": "gfql_chain", "gfql": expr.get("chain", [])} - if "gfql" in expr: - return {"type": "gfql_chain", "gfql": expr.get("gfql")} - if "chain" in expr: - return {"type": "gfql_chain", "gfql": expr.get("chain")} - return {"type": "gfql_chain", "gfql": [expr]} - - if isinstance(expr, Chain): - return {"type": "gfql_chain", "gfql": expr.to_json().get("chain", [])} - - if isinstance(expr, ASTObject): - return {"type": "gfql_chain", "gfql": [expr.to_json()]} - - if isinstance(expr, list): - ops: List[Any] = [] - for op in expr: - if isinstance(op, ASTObject): - ops.append(op.to_json()) - else: - ops.append(op) - return {"type": "gfql_chain", "gfql": ops} - - return expr - - -def collection_set( - *, - expr: Any, - id: Optional[str] = None, - name: Optional[str] = None, - description: Optional[str] = None, - node_color: Optional[str] = None, - edge_color: Optional[str] = None, -) -> CollectionSet: - """Build a collection dict for a GFQL-defined set.""" - collection: Dict[str, Any] = {"type": "set", "expr": _wrap_gfql_expr(expr)} - return _apply_collection_metadata(collection, id, name, description, node_color, edge_color) # type: ignore[return-value] - - -def collection_intersection( - *, - sets: Sequence[str], - id: Optional[str] = None, - name: Optional[str] = None, - description: Optional[str] = None, - node_color: Optional[str] = None, - edge_color: Optional[str] = None, -) -> CollectionIntersection: - """Build a collection dict for an intersection of set IDs.""" - collection: Dict[str, Any] = { - "type": "intersection", - "expr": { - "type": "intersection", - "sets": list(sets), - }, - } - return _apply_collection_metadata(collection, id, name, description, node_color, edge_color) # type: ignore[return-value] diff --git a/graphistry/models/collections.py b/graphistry/models/collections.py new file mode 100644 index 0000000000..af29a7f575 --- /dev/null +++ b/graphistry/models/collections.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from typing import Dict, List, TYPE_CHECKING, Union +from typing_extensions import Literal, NotRequired, Required, TypedDict + +from graphistry.utils.json import JSONVal + +if TYPE_CHECKING: + from graphistry.compute.ast import ASTObject + from graphistry.compute.chain import Chain + + +GFQLWireOp = Dict[str, JSONVal] + + +class GFQLChainWire(TypedDict): + type: Literal["gfql_chain"] + gfql: List[GFQLWireOp] + + +class GFQLChainJSON(TypedDict): + type: Literal["Chain"] + chain: List[GFQLWireOp] + + +GFQLOpInput = Union["ASTObject", GFQLWireOp] +GFQLChainInput = Union[ + "Chain", + "ASTObject", + GFQLChainWire, + GFQLChainJSON, + List[GFQLOpInput], + GFQLWireOp, +] + + +class IntersectionExpr(TypedDict): + type: Literal["intersection"] + sets: List[str] + + +class CollectionBase(TypedDict, total=False): + id: str + name: str + description: str + node_color: str + edge_color: str + + +class CollectionSet(CollectionBase): + type: NotRequired[Literal["set"]] + expr: Required[GFQLChainInput] + + +class CollectionIntersection(CollectionBase): + type: NotRequired[Literal["intersection"]] + expr: Required[IntersectionExpr] + + +Collection = Union[CollectionSet, CollectionIntersection] +CollectionsInput = Union[str, Collection, List[Collection]] diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py index 2e339ff188..171cc5082c 100644 --- a/graphistry/pygraphistry.py +++ b/graphistry/pygraphistry.py @@ -5,6 +5,7 @@ from graphistry.plugins_types.hypergraph import HypergraphResult from graphistry.client_session import ClientSession, ApiVersion, ENV_GRAPHISTRY_API_KEY, DatasetInfo, AuthManagerProtocol, strtobool from graphistry.Engine import EngineAbstractType +from graphistry.models.collections import CollectionsInput from graphistry.models.types import ValidationParam """Top-level import of class PyGraphistry as "Graphistry". Used to connect to the Graphistry server and then create a base plotter.""" @@ -2277,7 +2278,7 @@ def settings(self, height=None, url_params={}, render=None): def collections( self, - collections: Optional[Union[str, Dict[str, Any], List[Dict[str, Any]]]] = None, + collections: Optional[CollectionsInput] = None, show_collections: Optional[bool] = None, collections_global_node_color: Optional[str] = None, collections_global_edge_color: Optional[str] = None, diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index dde8671e84..c57da0d989 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -5,7 +5,7 @@ from urllib.parse import quote, unquote import graphistry -from graphistry.collections_helpers import collection_intersection, collection_set +from graphistry.collections import collection_intersection, collection_set from graphistry.validate.validate_collections import normalize_collections_url_params @@ -13,8 +13,11 @@ def decode_collections(encoded: str): return json.loads(unquote(encoded)) +def collections_url_params(collections, **kwargs): + return graphistry.bind().collections(collections=collections, **kwargs)._url_params + + def test_collections_encodes_and_normalizes(): - g = graphistry.bind() node_filter = graphistry.n({"subscribed_to_newsletter": True}) collections = [ { @@ -29,14 +32,14 @@ def test_collections_encodes_and_normalizes(): } ] - g2 = g.collections( - collections=collections, + url_params = collections_url_params( + collections, show_collections=True, collections_global_node_color="#00FF00", collections_global_edge_color="#00AA00", ) - decoded = decode_collections(g2._url_params["collections"]) + decoded = decode_collections(url_params["collections"]) assert decoded == [ { "type": "set", @@ -49,53 +52,40 @@ def test_collections_encodes_and_normalizes(): }, } ] - assert g2._url_params["showCollections"] is True - assert g2._url_params["collectionsGlobalNodeColor"] == "00FF00" - assert g2._url_params["collectionsGlobalEdgeColor"] == "00AA00" + assert url_params["showCollections"] is True + assert url_params["collectionsGlobalNodeColor"] == "00FF00" + assert url_params["collectionsGlobalEdgeColor"] == "00AA00" + + +@pytest.mark.parametrize("expr", [graphistry.n({"vip": True}), [graphistry.n({"vip": True})]]) +def test_collection_set_wraps_ast_expr(expr): + collection = collection_set(expr=expr, id="vip") + assert collection["expr"]["type"] == "gfql_chain" + assert collection["expr"]["gfql"][0]["type"] == "Node" def test_collection_helpers_build_sets_and_intersections(): collections = [ - collection_set( - expr=[graphistry.n({"vip": True})], - id="vip", - name="VIP", - node_color="#FFAA00", - ), - collection_intersection( - sets=["vip"], - name="VIP Intersection", - node_color="#00BFFF", - ), + collection_set(expr=[graphistry.n({"vip": True})], id="vip", name="VIP", node_color="#FFAA00"), + collection_intersection(sets=["vip"], name="VIP Intersection", node_color="#00BFFF"), ] - - g2 = graphistry.bind().collections(collections=collections) - decoded = decode_collections(g2._url_params["collections"]) + decoded = decode_collections(collections_url_params(collections)["collections"]) assert decoded[0]["type"] == "set" assert decoded[0]["expr"]["type"] == "gfql_chain" - assert decoded[1]["type"] == "intersection" assert decoded[1]["expr"] == {"type": "intersection", "sets": ["vip"]} -def test_collection_set_wraps_ast_expr(): - collection = collection_set(expr=graphistry.n({"vip": True}), id="vip") - assert collection["expr"]["type"] == "gfql_chain" - assert collection["expr"]["gfql"][0]["type"] == "Node" - - def test_collections_accepts_chain_and_preserves_dataset_id(): - chain = graphistry.Chain([graphistry.n({"type": "user"})]) - g1 = graphistry.bind(dataset_id="dataset_123") - - g2 = g1.collections(collections={"type": "set", "expr": chain}) - + node = graphistry.n({"type": "user"}) + chain = graphistry.Chain([node]) + g2 = graphistry.bind(dataset_id="dataset_123").collections(collections={"type": "set", "expr": chain}) decoded = decode_collections(g2._url_params["collections"]) assert decoded == [ { "type": "set", "expr": { "type": "gfql_chain", - "gfql": [graphistry.n({"type": "user"}).to_json()], + "gfql": [node.to_json()], }, } ] @@ -103,10 +93,10 @@ def test_collections_accepts_chain_and_preserves_dataset_id(): def test_collections_encode_false_keeps_string(): - raw = json.dumps([{"type": "intersection", "expr": {"type": "intersection", "sets": ["a"]}}], separators=(",", ":")) + raw = '[{"type":"intersection","expr":{"type":"intersection","sets":["a"]}}]' encoded = quote(raw, safe="") - g2 = graphistry.bind().collections(collections=encoded, encode=False) - assert g2._url_params["collections"] == encoded + url_params = collections_url_params(encoded, encode=False) + assert url_params["collections"] == encoded def test_collections_accepts_wire_protocol_chain(): @@ -121,8 +111,9 @@ def test_collections_accepts_wire_protocol_chain(): } ] } - g2 = graphistry.bind().collections(collections={"type": "set", "expr": chain_json}) - decoded = decode_collections(g2._url_params["collections"]) + decoded = decode_collections( + collections_url_params({"type": "set", "expr": chain_json})["collections"] + ) assert decoded == [ { "type": "set", @@ -134,6 +125,20 @@ def test_collections_accepts_wire_protocol_chain(): ] +def test_collections_drop_unexpected_fields_autofix(): + collections = [ + { + "type": "set", + "expr": [graphistry.n({"vip": True})], + "unexpected": "drop-me", + } + ] + decoded = decode_collections( + collections_url_params(collections, validate="autofix", warn=False)["collections"] + ) + assert "unexpected" not in decoded[0] + + def test_collections_validation_strict_raises(): bad_collections = [{"type": "set", "expr": [{"filter_dict": {"a": 1}}]}] with pytest.raises(ValueError): diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 0369477146..bc0af89c28 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -3,10 +3,18 @@ from urllib.parse import quote, unquote from graphistry.client_session import strtobool +from graphistry.models.collections import CollectionsInput from graphistry.models.types import ValidationMode, ValidationParam from graphistry.util import warn as emit_warn - -CollectionsInput = Union[str, Dict[str, Any], List[Dict[str, Any]]] +_ALLOWED_COLLECTION_FIELDS = { + 'type', + 'id', + 'name', + 'description', + 'node_color', + 'edge_color', + 'expr', +} def normalize_validation_params( @@ -41,15 +49,43 @@ def _issue( emit_warn(f"Collections validation warning: {message} ({data})") +def _reparse_collections_payload( + collections: Union[Dict[str, Any], List[Dict[str, Any]]], + validate_mode: ValidationMode, + warn: bool +) -> List[Dict[str, Any]]: + from graphistry.compute.ast import ASTObject + from graphistry.compute.chain import Chain + + def _default(obj: Any) -> Any: + if isinstance(obj, Chain): + return obj.to_json() + if isinstance(obj, ASTObject): + return obj.to_json() + raise TypeError(f"Object of type {type(obj).__name__} is not JSON-serializable") + + try: + parsed = json.loads(json.dumps(collections, default=_default, ensure_ascii=True)) + except (TypeError, ValueError) as exc: + _issue('Collections must be JSON-serializable', {'error': str(exc)}, validate_mode, warn) + return [] + if isinstance(parsed, list): + return parsed + if isinstance(parsed, dict): + return [parsed] + _issue('Collections JSON must be a list or dict', {'type': type(parsed).__name__}, validate_mode, warn) + return [] + + def _parse_collections_input( collections: CollectionsInput, validate_mode: ValidationMode, warn: bool ) -> List[Dict[str, Any]]: if isinstance(collections, list): - return collections + return _reparse_collections_payload(collections, validate_mode, warn) if isinstance(collections, dict): - return [collections] + return _reparse_collections_payload(collections, validate_mode, warn) if isinstance(collections, str): try: parsed = json.loads(collections) @@ -297,7 +333,16 @@ def normalize_collections( continue return [] - normalized_entry = dict(entry) + unexpected_fields = [key for key in entry.keys() if key not in _ALLOWED_COLLECTION_FIELDS] + if unexpected_fields: + _issue( + 'Unexpected fields in collection', + {'index': idx, 'fields': unexpected_fields}, + validate_mode, + warn + ) + + normalized_entry = {key: entry[key] for key in _ALLOWED_COLLECTION_FIELDS if key in entry} collection_type = normalized_entry.get('type', 'set') if not isinstance(collection_type, str): _issue( From 04dd22825bc8e739cabc5dac4ca88a4b7310bd18 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Sun, 4 Jan 2026 23:07:40 -0800 Subject: [PATCH 07/30] Fix collections typing for mypy --- graphistry/collections.py | 52 ++++++++++++++------- graphistry/validate/validate_collections.py | 4 +- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/graphistry/collections.py b/graphistry/collections.py index a11198c0bf..593e85a502 100644 --- a/graphistry/collections.py +++ b/graphistry/collections.py @@ -12,7 +12,21 @@ def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[str]) -> CollectionDict: - collection.update({key: value for key, value in metadata.items() if value is not None}) + value = metadata.get("id") + if value is not None: + collection["id"] = value + value = metadata.get("name") + if value is not None: + collection["name"] = value + value = metadata.get("description") + if value is not None: + collection["description"] = value + value = metadata.get("node_color") + if value is not None: + collection["node_color"] = value + value = metadata.get("edge_color") + if value is not None: + collection["edge_color"] = value return collection @@ -21,34 +35,38 @@ def _wrap_gfql_expr(expr: GFQLChainInput) -> GFQLChainWire: from graphistry.compute.ast import ASTObject from graphistry.compute.chain import Chain + def _normalize_ops(raw: object) -> List[GFQLWireOp]: + if isinstance(raw, list): + ops: List[GFQLWireOp] = [] + for op in raw: + if isinstance(op, ASTObject): + ops.append(op.to_json()) + else: + ops.append(cast(GFQLWireOp, op)) + return ops + if isinstance(raw, ASTObject): + return [raw.to_json()] + return [cast(GFQLWireOp, raw)] + if isinstance(expr, dict): expr_type = expr.get("type") if expr_type == "gfql_chain" and "gfql" in expr: - return expr + return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("gfql"))} if expr_type == "Chain" and "chain" in expr: - return {"type": "gfql_chain", "gfql": expr.get("chain", [])} + return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("chain"))} if "gfql" in expr: - return {"type": "gfql_chain", "gfql": expr.get("gfql")} + return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("gfql"))} if "chain" in expr: - return {"type": "gfql_chain", "gfql": expr.get("chain")} - return {"type": "gfql_chain", "gfql": [expr]} + return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("chain"))} + return {"type": "gfql_chain", "gfql": _normalize_ops(expr)} if isinstance(expr, Chain): - return {"type": "gfql_chain", "gfql": expr.to_json().get("chain", [])} + return {"type": "gfql_chain", "gfql": _normalize_ops(expr.to_json().get("chain", []))} if isinstance(expr, ASTObject): return {"type": "gfql_chain", "gfql": [expr.to_json()]} - if isinstance(expr, list): - ops: List[GFQLWireOp] = [] - for op in expr: - if isinstance(op, ASTObject): - ops.append(op.to_json()) - else: - ops.append(cast(GFQLWireOp, op)) - return {"type": "gfql_chain", "gfql": ops} - - return {"type": "gfql_chain", "gfql": [cast(GFQLWireOp, expr)]} + return {"type": "gfql_chain", "gfql": _normalize_ops(expr)} def collection_set( diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index bc0af89c28..5a818fb90a 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -3,7 +3,7 @@ from urllib.parse import quote, unquote from graphistry.client_session import strtobool -from graphistry.models.collections import CollectionsInput +from graphistry.models.collections import Collection, CollectionsInput from graphistry.models.types import ValidationMode, ValidationParam from graphistry.util import warn as emit_warn _ALLOWED_COLLECTION_FIELDS = { @@ -50,7 +50,7 @@ def _issue( def _reparse_collections_payload( - collections: Union[Dict[str, Any], List[Dict[str, Any]]], + collections: Union[Collection, List[Collection]], validate_mode: ValidationMode, warn: bool ) -> List[Dict[str, Any]]: From 322d152c3ad663ae6befbf7366813a957e3f4b21 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Wed, 7 Jan 2026 10:40:51 -0800 Subject: [PATCH 08/30] Validate collections settings inputs --- graphistry/PlotterBase.py | 23 +++++++++++++++-------- graphistry/tests/test_collections.py | 10 ++++++++++ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py index 2497a47437..6ecb2b58f8 100644 --- a/graphistry/PlotterBase.py +++ b/graphistry/PlotterBase.py @@ -1867,24 +1867,31 @@ def collections( :param validate: Validation mode. 'autofix' (default) warns on issues, 'strict' raises on issues. :param warn: Whether to emit warnings when validate='autofix'. validate=False forces warn=False. """ - from graphistry.validate.validate_collections import encode_collections, normalize_collections - - def strip_hash(value: str) -> str: - return value[1:] if value.startswith('#') else value + from graphistry.validate.validate_collections import ( + encode_collections, + normalize_collections, + normalize_collections_url_params, + normalize_validation_params, + ) + validate_mode, warn = normalize_validation_params(validate, warn) settings: Dict[str, Any] = {} if collections is not None: - normalized = normalize_collections(collections, validate=validate, warn=warn) + normalized = normalize_collections(collections, validate=validate_mode, warn=warn) if isinstance(collections, str) and not encode: settings['collections'] = collections else: settings['collections'] = encode_collections(normalized, encode=encode) + extras: Dict[str, Any] = {} if show_collections is not None: - settings['showCollections'] = show_collections + extras['showCollections'] = show_collections if collections_global_node_color is not None: - settings['collectionsGlobalNodeColor'] = strip_hash(collections_global_node_color) + extras['collectionsGlobalNodeColor'] = collections_global_node_color if collections_global_edge_color is not None: - settings['collectionsGlobalEdgeColor'] = strip_hash(collections_global_edge_color) + extras['collectionsGlobalEdgeColor'] = collections_global_edge_color + if extras: + extras = normalize_collections_url_params(extras, validate=validate_mode, warn=warn) + settings.update(extras) if len(settings.keys()) > 0: return self.settings(url_params={**self._url_params, **settings}) diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index c57da0d989..96ae55d170 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -139,6 +139,16 @@ def test_collections_drop_unexpected_fields_autofix(): assert "unexpected" not in decoded[0] +def test_collections_show_collections_coerces_autofix(): + g2 = graphistry.bind().collections(show_collections="true", validate="autofix") + assert g2._url_params["showCollections"] is True + + +def test_collections_show_collections_strict_raises(): + with pytest.raises(ValueError): + graphistry.bind().collections(show_collections="maybe", validate="strict") + + def test_collections_validation_strict_raises(): bad_collections = [{"type": "set", "expr": [{"filter_dict": {"a": 1}}]}] with pytest.raises(ValueError): From b170076b10e203db0d212da8b6c5916642945a28 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Wed, 7 Jan 2026 11:02:38 -0800 Subject: [PATCH 09/30] Simplify collections typing --- graphistry/collections.py | 23 +++++++++++++---------- graphistry/models/collections.py | 25 +++++-------------------- 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/graphistry/collections.py b/graphistry/collections.py index 593e85a502..79da133704 100644 --- a/graphistry/collections.py +++ b/graphistry/collections.py @@ -1,12 +1,11 @@ -from typing import List, Optional, Sequence, TypeVar, cast +from typing import Dict, List, Optional, Sequence, TypeVar from graphistry.models.collections import ( CollectionIntersection, + CollectionExprInput, CollectionSet, - GFQLChainInput, - GFQLChainWire, - GFQLWireOp, ) +from graphistry.utils.json import JSONVal CollectionDict = TypeVar("CollectionDict", CollectionSet, CollectionIntersection) @@ -30,23 +29,27 @@ def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[ return collection -def _wrap_gfql_expr(expr: GFQLChainInput) -> GFQLChainWire: +def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]: from graphistry.compute.ast import ASTObject from graphistry.compute.chain import Chain - def _normalize_ops(raw: object) -> List[GFQLWireOp]: + def _normalize_ops(raw: object) -> List[Dict[str, JSONVal]]: if isinstance(raw, list): - ops: List[GFQLWireOp] = [] + ops: List[Dict[str, JSONVal]] = [] for op in raw: if isinstance(op, ASTObject): ops.append(op.to_json()) + elif isinstance(op, dict): + ops.append(op) else: - ops.append(cast(GFQLWireOp, op)) + raise TypeError("Collection GFQL operations must be AST objects or dictionaries") return ops if isinstance(raw, ASTObject): return [raw.to_json()] - return [cast(GFQLWireOp, raw)] + if isinstance(raw, dict): + return [raw] + raise TypeError("Collection GFQL operations must be a list, AST object, or dictionary") if isinstance(expr, dict): expr_type = expr.get("type") @@ -71,7 +74,7 @@ def _normalize_ops(raw: object) -> List[GFQLWireOp]: def collection_set( *, - expr: GFQLChainInput, + expr: CollectionExprInput, id: Optional[str] = None, name: Optional[str] = None, description: Optional[str] = None, diff --git a/graphistry/models/collections.py b/graphistry/models/collections.py index af29a7f575..d12896d663 100644 --- a/graphistry/models/collections.py +++ b/graphistry/models/collections.py @@ -10,27 +10,12 @@ from graphistry.compute.chain import Chain -GFQLWireOp = Dict[str, JSONVal] - - -class GFQLChainWire(TypedDict): - type: Literal["gfql_chain"] - gfql: List[GFQLWireOp] - - -class GFQLChainJSON(TypedDict): - type: Literal["Chain"] - chain: List[GFQLWireOp] - - -GFQLOpInput = Union["ASTObject", GFQLWireOp] -GFQLChainInput = Union[ +CollectionExprInput = Union[ "Chain", "ASTObject", - GFQLChainWire, - GFQLChainJSON, - List[GFQLOpInput], - GFQLWireOp, + List["ASTObject"], + Dict[str, JSONVal], + List[Dict[str, JSONVal]], ] @@ -49,7 +34,7 @@ class CollectionBase(TypedDict, total=False): class CollectionSet(CollectionBase): type: NotRequired[Literal["set"]] - expr: Required[GFQLChainInput] + expr: Required[CollectionExprInput] class CollectionIntersection(CollectionBase): From 9064aee0eded523410814c203bc07e8fecae5136 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Wed, 7 Jan 2026 11:09:33 -0800 Subject: [PATCH 10/30] Reuse gfql chain normalization in collections helpers --- graphistry/collections.py | 64 ++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/graphistry/collections.py b/graphistry/collections.py index 79da133704..0f014706ae 100644 --- a/graphistry/collections.py +++ b/graphistry/collections.py @@ -34,42 +34,64 @@ def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]: from graphistry.compute.ast import ASTObject from graphistry.compute.chain import Chain - def _normalize_ops(raw: object) -> List[Dict[str, JSONVal]]: + def _gfql_chain_from_ops(ops: List[ASTObject]) -> Dict[str, JSONVal]: + chain_json = Chain(ops).to_json() + return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} + + def _gfql_chain_from_wire_ops(ops: List[Dict[str, JSONVal]]) -> Dict[str, JSONVal]: + chain = Chain.from_json({"chain": ops}, validate=True) + chain_json = chain.to_json() + return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} + + def _list_to_wire_ops(raw: List[object]) -> List[Dict[str, JSONVal]]: + ops: List[Dict[str, JSONVal]] = [] + for op in raw: + if isinstance(op, ASTObject): + ops.append(op.to_json()) + elif isinstance(op, dict): + ops.append(op) + else: + raise TypeError("Collection GFQL operations must be AST objects or dictionaries") + return ops + + def _wrap_ops_value(raw: object) -> List[Dict[str, JSONVal]]: if isinstance(raw, list): - ops: List[Dict[str, JSONVal]] = [] - for op in raw: - if isinstance(op, ASTObject): - ops.append(op.to_json()) - elif isinstance(op, dict): - ops.append(op) - else: - raise TypeError("Collection GFQL operations must be AST objects or dictionaries") - return ops - if isinstance(raw, ASTObject): - return [raw.to_json()] - if isinstance(raw, dict): - return [raw] + return _list_to_wire_ops(raw) + if isinstance(raw, (ASTObject, dict)): + return _list_to_wire_ops([raw]) raise TypeError("Collection GFQL operations must be a list, AST object, or dictionary") if isinstance(expr, dict): expr_type = expr.get("type") if expr_type == "gfql_chain" and "gfql" in expr: - return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("gfql"))} + gfql_ops = expr.get("gfql") + return _gfql_chain_from_wire_ops(_wrap_ops_value(gfql_ops)) if expr_type == "Chain" and "chain" in expr: - return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("chain"))} + chain = Chain.from_json(expr, validate=True) + chain_json = chain.to_json() + return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} if "gfql" in expr: - return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("gfql"))} + gfql_ops = expr.get("gfql") + return _gfql_chain_from_wire_ops(_wrap_ops_value(gfql_ops)) if "chain" in expr: - return {"type": "gfql_chain", "gfql": _normalize_ops(expr.get("chain"))} - return {"type": "gfql_chain", "gfql": _normalize_ops(expr)} + chain = Chain.from_json(expr, validate=True) + chain_json = chain.to_json() + return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} + return _gfql_chain_from_wire_ops([expr]) if isinstance(expr, Chain): - return {"type": "gfql_chain", "gfql": _normalize_ops(expr.to_json().get("chain", []))} + chain_json = expr.to_json() + return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} if isinstance(expr, ASTObject): return {"type": "gfql_chain", "gfql": [expr.to_json()]} - return {"type": "gfql_chain", "gfql": _normalize_ops(expr)} + if isinstance(expr, list): + if all(isinstance(op, ASTObject) for op in expr): + return _gfql_chain_from_ops(expr) + return _gfql_chain_from_wire_ops(_list_to_wire_ops(expr)) + + raise TypeError("Collection expr must be an AST object, chain, list, or dict") def collection_set( From a7b2a2d958261c2055c309b690db2fab4a4d0c67 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Wed, 7 Jan 2026 17:13:47 -0800 Subject: [PATCH 11/30] Refine collections gfql normalization for mypy --- graphistry/collections.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/graphistry/collections.py b/graphistry/collections.py index 0f014706ae..e321a3dac7 100644 --- a/graphistry/collections.py +++ b/graphistry/collections.py @@ -34,8 +34,8 @@ def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]: from graphistry.compute.ast import ASTObject from graphistry.compute.chain import Chain - def _gfql_chain_from_ops(ops: List[ASTObject]) -> Dict[str, JSONVal]: - chain_json = Chain(ops).to_json() + def _gfql_chain_from_ops(ops: Sequence[ASTObject]) -> Dict[str, JSONVal]: + chain_json = Chain(list(ops)).to_json() return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} def _gfql_chain_from_wire_ops(ops: List[Dict[str, JSONVal]]) -> Dict[str, JSONVal]: @@ -43,7 +43,7 @@ def _gfql_chain_from_wire_ops(ops: List[Dict[str, JSONVal]]) -> Dict[str, JSONVa chain_json = chain.to_json() return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} - def _list_to_wire_ops(raw: List[object]) -> List[Dict[str, JSONVal]]: + def _list_to_wire_ops(raw: Sequence[object]) -> List[Dict[str, JSONVal]]: ops: List[Dict[str, JSONVal]] = [] for op in raw: if isinstance(op, ASTObject): @@ -87,8 +87,9 @@ def _wrap_ops_value(raw: object) -> List[Dict[str, JSONVal]]: return {"type": "gfql_chain", "gfql": [expr.to_json()]} if isinstance(expr, list): - if all(isinstance(op, ASTObject) for op in expr): - return _gfql_chain_from_ops(expr) + ops_ast = [op for op in expr if isinstance(op, ASTObject)] + if len(ops_ast) == len(expr): + return _gfql_chain_from_ops(ops_ast) return _gfql_chain_from_wire_ops(_list_to_wire_ops(expr)) raise TypeError("Collection expr must be an AST object, chain, list, or dict") From 8389026b29d471e62f3948b6db5f22659c0882cb Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Thu, 8 Jan 2026 05:17:14 -0800 Subject: [PATCH 12/30] Normalize collections GFQL via Chain and reject Let --- graphistry/validate/validate_collections.py | 143 ++++++++++++-------- 1 file changed, 90 insertions(+), 53 deletions(-) diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 5a818fb90a..d0970976a2 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -162,7 +162,7 @@ def _normalize_gfql_ops( warn: bool, entry_index: int ) -> Optional[List[Dict[str, Any]]]: - from graphistry.compute.ast import ASTObject, from_json as ast_from_json + from graphistry.compute.ast import ASTLet, ASTObject from graphistry.compute.chain import Chain if gfql_ops is None: @@ -174,83 +174,120 @@ def _normalize_gfql_ops( except json.JSONDecodeError as exc: _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn) return None - ops_raw: Any - if isinstance(gfql_ops, Chain): - ops_raw = gfql_ops.to_json().get('chain', []) - elif isinstance(gfql_ops, ASTObject): - ops_raw = [gfql_ops.to_json()] - elif isinstance(gfql_ops, dict): - if 'chain' in gfql_ops: - ops_raw = gfql_ops.get('chain', []) - else: - ops_raw = [gfql_ops] - elif isinstance(gfql_ops, list): - ops_raw = [] - for op in gfql_ops: - if isinstance(op, ASTObject): - ops_raw.append(op.to_json()) - elif isinstance(op, dict): - ops_raw.append(op) - else: - _issue( - 'GFQL operations must be AST objects or dicts', - {'index': entry_index, 'value': op, 'type': type(op).__name__}, - validate_mode, - warn - ) - if validate_mode == 'autofix': - continue - return None - else: + + def _issue_let_not_supported(payload: Any) -> None: _issue( - 'GFQL operations must be a Chain, AST object, list, or dict', - {'index': entry_index, 'type': type(gfql_ops).__name__}, + 'Collections do not support GFQL Let/DAG expressions', + {'index': entry_index, 'value': payload, 'type': type(payload).__name__}, validate_mode, warn ) - return None - if not isinstance(ops_raw, list): + def _coerce_ops_list(raw: Any) -> Optional[List[Dict[str, Any]]]: + if isinstance(raw, list): + ops: List[Dict[str, Any]] = [] + for op in raw: + if isinstance(op, ASTObject): + ops.append(op.to_json()) + elif isinstance(op, dict): + ops.append(op) + else: + _issue( + 'GFQL operations must be AST objects or dicts', + {'index': entry_index, 'value': op, 'type': type(op).__name__}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + continue + return None + return ops + if isinstance(raw, ASTObject): + return [raw.to_json()] + if isinstance(raw, dict): + return [raw] _issue( - 'GFQL operations must be a list', - {'index': entry_index, 'value': ops_raw, 'type': type(ops_raw).__name__}, + 'GFQL operations must be a list, AST object, or dict', + {'index': entry_index, 'value': raw, 'type': type(raw).__name__}, validate_mode, warn ) return None - ops: List[Any] = ops_raw - - normalized_ops: List[Dict[str, Any]] = [] - for op in ops: - if not isinstance(op, dict): + def _normalize_chain_ops(raw_ops: List[Dict[str, Any]]) -> Optional[List[Dict[str, Any]]]: + if not isinstance(raw_ops, list): _issue( - 'GFQL operations must be dictionaries after normalization', - {'index': entry_index, 'value': op, 'type': type(op).__name__}, + 'GFQL operations must be a list', + {'index': entry_index, 'value': raw_ops, 'type': type(raw_ops).__name__}, validate_mode, warn ) - if validate_mode == 'autofix': - continue return None try: - ast_from_json(op, validate=True) + chain_obj = Chain.from_json({'chain': raw_ops}, validate=True) except Exception as exc: _issue( - 'Invalid GFQL operation in collection', - {'index': entry_index, 'op': op, 'error': str(exc)}, + 'Invalid GFQL chain in collection', + {'index': entry_index, 'error': str(exc)}, validate_mode, warn ) - if validate_mode == 'autofix': - continue return None - normalized_ops.append(op) + if any(isinstance(op, ASTLet) for op in chain_obj.chain): + _issue_let_not_supported(raw_ops) + return None + normalized_ops = _coerce_ops_list(chain_obj.to_json().get('chain', [])) + if normalized_ops is None: + return None + if len(normalized_ops) == 0: + _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn) + return None + return normalized_ops - if len(normalized_ops) == 0: - _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn) + if isinstance(gfql_ops, ASTLet): + _issue_let_not_supported(gfql_ops) return None - return normalized_ops + + if isinstance(gfql_ops, Chain): + if any(isinstance(op, ASTLet) for op in gfql_ops.chain): + _issue_let_not_supported(gfql_ops) + return None + ops_raw = _coerce_ops_list(gfql_ops.to_json().get('chain', [])) + if ops_raw is None: + return None + return _normalize_chain_ops(ops_raw) + if isinstance(gfql_ops, ASTObject): + ops_raw = _coerce_ops_list(gfql_ops) + if ops_raw is None: + return None + return _normalize_chain_ops(ops_raw) + if isinstance(gfql_ops, dict): + if gfql_ops.get('type') == 'Let' or 'bindings' in gfql_ops: + _issue_let_not_supported(gfql_ops) + return None + if 'chain' in gfql_ops: + ops_raw = _coerce_ops_list(gfql_ops.get('chain')) + elif 'gfql' in gfql_ops: + ops_raw = _coerce_ops_list(gfql_ops.get('gfql')) + else: + ops_raw = _coerce_ops_list(gfql_ops) + if ops_raw is None: + return None + return _normalize_chain_ops(ops_raw) + if isinstance(gfql_ops, list): + ops_raw = _coerce_ops_list(gfql_ops) + if ops_raw is None: + return None + return _normalize_chain_ops(ops_raw) + + _issue( + 'GFQL operations must be a Chain, AST object, list, or dict', + {'index': entry_index, 'type': type(gfql_ops).__name__}, + validate_mode, + warn + ) + return None + def _normalize_gfql_expr( From 1144977ab5deef5c1c7a1dd3fa4b564d2c4e822a Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Thu, 8 Jan 2026 05:24:09 -0800 Subject: [PATCH 13/30] Avoid Chain.from_json in collections normalization --- graphistry/collections.py | 65 ++++------ graphistry/validate/validate_collections.py | 130 +++++++++----------- 2 files changed, 84 insertions(+), 111 deletions(-) diff --git a/graphistry/collections.py b/graphistry/collections.py index e321a3dac7..5d4684a414 100644 --- a/graphistry/collections.py +++ b/graphistry/collections.py @@ -31,53 +31,43 @@ def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[ def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]: - from graphistry.compute.ast import ASTObject + from graphistry.compute.ast import ASTLet, ASTObject, from_json as ast_from_json from graphistry.compute.chain import Chain - def _gfql_chain_from_ops(ops: Sequence[ASTObject]) -> Dict[str, JSONVal]: - chain_json = Chain(list(ops)).to_json() - return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} - - def _gfql_chain_from_wire_ops(ops: List[Dict[str, JSONVal]]) -> Dict[str, JSONVal]: - chain = Chain.from_json({"chain": ops}, validate=True) - chain_json = chain.to_json() - return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} - - def _list_to_wire_ops(raw: Sequence[object]) -> List[Dict[str, JSONVal]]: - ops: List[Dict[str, JSONVal]] = [] - for op in raw: - if isinstance(op, ASTObject): - ops.append(op.to_json()) - elif isinstance(op, dict): - ops.append(op) - else: - raise TypeError("Collection GFQL operations must be AST objects or dictionaries") - return ops - - def _wrap_ops_value(raw: object) -> List[Dict[str, JSONVal]]: + def _normalize_op(op: object) -> Dict[str, JSONVal]: + if isinstance(op, ASTLet): + raise TypeError("Collection GFQL does not support Let/DAG expressions") + if isinstance(op, ASTObject): + return op.to_json() + if isinstance(op, dict): + if op.get("type") == "Let" or "bindings" in op: + raise TypeError("Collection GFQL does not support Let/DAG expressions") + parsed = ast_from_json(op, validate=True) + if isinstance(parsed, ASTLet): + raise TypeError("Collection GFQL does not support Let/DAG expressions") + return parsed.to_json() + raise TypeError("Collection GFQL operations must be AST objects or dictionaries") + + def _normalize_ops_value(raw: object) -> List[Dict[str, JSONVal]]: if isinstance(raw, list): - return _list_to_wire_ops(raw) - if isinstance(raw, (ASTObject, dict)): - return _list_to_wire_ops([raw]) - raise TypeError("Collection GFQL operations must be a list, AST object, or dictionary") + if len(raw) == 0: + raise ValueError("Collection GFQL operations list cannot be empty") + return [_normalize_op(op) for op in raw] + return [_normalize_op(raw)] if isinstance(expr, dict): expr_type = expr.get("type") if expr_type == "gfql_chain" and "gfql" in expr: gfql_ops = expr.get("gfql") - return _gfql_chain_from_wire_ops(_wrap_ops_value(gfql_ops)) + return {"type": "gfql_chain", "gfql": _normalize_ops_value(gfql_ops)} if expr_type == "Chain" and "chain" in expr: - chain = Chain.from_json(expr, validate=True) - chain_json = chain.to_json() - return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} + return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr.get("chain"))} if "gfql" in expr: gfql_ops = expr.get("gfql") - return _gfql_chain_from_wire_ops(_wrap_ops_value(gfql_ops)) + return {"type": "gfql_chain", "gfql": _normalize_ops_value(gfql_ops)} if "chain" in expr: - chain = Chain.from_json(expr, validate=True) - chain_json = chain.to_json() - return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} - return _gfql_chain_from_wire_ops([expr]) + return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr.get("chain"))} + return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr)} if isinstance(expr, Chain): chain_json = expr.to_json() @@ -87,10 +77,7 @@ def _wrap_ops_value(raw: object) -> List[Dict[str, JSONVal]]: return {"type": "gfql_chain", "gfql": [expr.to_json()]} if isinstance(expr, list): - ops_ast = [op for op in expr if isinstance(op, ASTObject)] - if len(ops_ast) == len(expr): - return _gfql_chain_from_ops(ops_ast) - return _gfql_chain_from_wire_ops(_list_to_wire_ops(expr)) + return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr)} raise TypeError("Collection expr must be an AST object, chain, list, or dict") diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index d0970976a2..2ec4d865b2 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -162,7 +162,7 @@ def _normalize_gfql_ops( warn: bool, entry_index: int ) -> Optional[List[Dict[str, Any]]]: - from graphistry.compute.ast import ASTLet, ASTObject + from graphistry.compute.ast import ASTLet, ASTObject, from_json as ast_from_json from graphistry.compute.chain import Chain if gfql_ops is None: @@ -183,29 +183,57 @@ def _issue_let_not_supported(payload: Any) -> None: warn ) - def _coerce_ops_list(raw: Any) -> Optional[List[Dict[str, Any]]]: + def _normalize_op(op: object) -> Optional[Dict[str, Any]]: + if isinstance(op, ASTLet): + _issue_let_not_supported(op) + return None + if isinstance(op, ASTObject): + return op.to_json() + if isinstance(op, dict): + if op.get('type') == 'Let' or 'bindings' in op: + _issue_let_not_supported(op) + return None + try: + parsed = ast_from_json(op, validate=True) + except Exception as exc: + _issue( + 'Invalid GFQL operation in collection', + {'index': entry_index, 'op': op, 'error': str(exc)}, + validate_mode, + warn + ) + return None + if isinstance(parsed, ASTLet): + _issue_let_not_supported(op) + return None + return parsed.to_json() + _issue( + 'GFQL operations must be AST objects or dicts', + {'index': entry_index, 'value': op, 'type': type(op).__name__}, + validate_mode, + warn + ) + return None + + def _normalize_ops_list(raw: object) -> Optional[List[Dict[str, Any]]]: if isinstance(raw, list): - ops: List[Dict[str, Any]] = [] + normalized_ops: List[Dict[str, Any]] = [] for op in raw: - if isinstance(op, ASTObject): - ops.append(op.to_json()) - elif isinstance(op, dict): - ops.append(op) - else: - _issue( - 'GFQL operations must be AST objects or dicts', - {'index': entry_index, 'value': op, 'type': type(op).__name__}, - validate_mode, - warn - ) + normalized_op = _normalize_op(op) + if normalized_op is None: if validate_mode == 'autofix': continue return None - return ops - if isinstance(raw, ASTObject): - return [raw.to_json()] - if isinstance(raw, dict): - return [raw] + normalized_ops.append(normalized_op) + if len(normalized_ops) == 0: + _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn) + return None + return normalized_ops + if isinstance(raw, (ASTObject, dict)): + normalized_op = _normalize_op(raw) + if normalized_op is None: + return None + return [normalized_op] _issue( 'GFQL operations must be a list, AST object, or dict', {'index': entry_index, 'value': raw, 'type': type(raw).__name__}, @@ -214,71 +242,29 @@ def _coerce_ops_list(raw: Any) -> Optional[List[Dict[str, Any]]]: ) return None - def _normalize_chain_ops(raw_ops: List[Dict[str, Any]]) -> Optional[List[Dict[str, Any]]]: - if not isinstance(raw_ops, list): - _issue( - 'GFQL operations must be a list', - {'index': entry_index, 'value': raw_ops, 'type': type(raw_ops).__name__}, - validate_mode, - warn - ) - return None - try: - chain_obj = Chain.from_json({'chain': raw_ops}, validate=True) - except Exception as exc: - _issue( - 'Invalid GFQL chain in collection', - {'index': entry_index, 'error': str(exc)}, - validate_mode, - warn - ) - return None - if any(isinstance(op, ASTLet) for op in chain_obj.chain): - _issue_let_not_supported(raw_ops) - return None - normalized_ops = _coerce_ops_list(chain_obj.to_json().get('chain', [])) - if normalized_ops is None: - return None - if len(normalized_ops) == 0: - _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn) - return None - return normalized_ops - if isinstance(gfql_ops, ASTLet): _issue_let_not_supported(gfql_ops) return None if isinstance(gfql_ops, Chain): - if any(isinstance(op, ASTLet) for op in gfql_ops.chain): - _issue_let_not_supported(gfql_ops) - return None - ops_raw = _coerce_ops_list(gfql_ops.to_json().get('chain', [])) - if ops_raw is None: - return None - return _normalize_chain_ops(ops_raw) + return _normalize_ops_list(gfql_ops.chain) if isinstance(gfql_ops, ASTObject): - ops_raw = _coerce_ops_list(gfql_ops) - if ops_raw is None: - return None - return _normalize_chain_ops(ops_raw) + return _normalize_ops_list(gfql_ops) if isinstance(gfql_ops, dict): if gfql_ops.get('type') == 'Let' or 'bindings' in gfql_ops: _issue_let_not_supported(gfql_ops) return None + if gfql_ops.get('type') == 'Chain' and 'chain' in gfql_ops: + return _normalize_ops_list(gfql_ops.get('chain')) + if gfql_ops.get('type') == 'gfql_chain' and 'gfql' in gfql_ops: + return _normalize_ops_list(gfql_ops.get('gfql')) if 'chain' in gfql_ops: - ops_raw = _coerce_ops_list(gfql_ops.get('chain')) - elif 'gfql' in gfql_ops: - ops_raw = _coerce_ops_list(gfql_ops.get('gfql')) - else: - ops_raw = _coerce_ops_list(gfql_ops) - if ops_raw is None: - return None - return _normalize_chain_ops(ops_raw) + return _normalize_ops_list(gfql_ops.get('chain')) + if 'gfql' in gfql_ops: + return _normalize_ops_list(gfql_ops.get('gfql')) + return _normalize_ops_list(gfql_ops) if isinstance(gfql_ops, list): - ops_raw = _coerce_ops_list(gfql_ops) - if ops_raw is None: - return None - return _normalize_chain_ops(ops_raw) + return _normalize_ops_list(gfql_ops) _issue( 'GFQL operations must be a Chain, AST object, list, or dict', From 4e889b20e05f51b4ccfb1ba0acaa3fef6525f2ec Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Thu, 8 Jan 2026 09:06:24 -0800 Subject: [PATCH 14/30] Allow Let in collections normalization --- graphistry/collections.py | 8 +------ graphistry/tests/test_collections.py | 9 +++++++ graphistry/validate/validate_collections.py | 26 +-------------------- 3 files changed, 11 insertions(+), 32 deletions(-) diff --git a/graphistry/collections.py b/graphistry/collections.py index 5d4684a414..5603d7660b 100644 --- a/graphistry/collections.py +++ b/graphistry/collections.py @@ -31,20 +31,14 @@ def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[ def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]: - from graphistry.compute.ast import ASTLet, ASTObject, from_json as ast_from_json + from graphistry.compute.ast import ASTObject, from_json as ast_from_json from graphistry.compute.chain import Chain def _normalize_op(op: object) -> Dict[str, JSONVal]: - if isinstance(op, ASTLet): - raise TypeError("Collection GFQL does not support Let/DAG expressions") if isinstance(op, ASTObject): return op.to_json() if isinstance(op, dict): - if op.get("type") == "Let" or "bindings" in op: - raise TypeError("Collection GFQL does not support Let/DAG expressions") parsed = ast_from_json(op, validate=True) - if isinstance(parsed, ASTLet): - raise TypeError("Collection GFQL does not support Let/DAG expressions") return parsed.to_json() raise TypeError("Collection GFQL operations must be AST objects or dictionaries") diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index 96ae55d170..5516e795f0 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -125,6 +125,15 @@ def test_collections_accepts_wire_protocol_chain(): ] +def test_collections_accepts_let_expr(): + dag = graphistry.let({"seed": graphistry.n({"type": "user"})}) + decoded = decode_collections( + collections_url_params({"type": "set", "expr": dag})["collections"] + ) + assert decoded[0]["expr"]["type"] == "gfql_chain" + assert decoded[0]["expr"]["gfql"][0]["type"] == "Let" + + def test_collections_drop_unexpected_fields_autofix(): collections = [ { diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 2ec4d865b2..4749cd587d 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -162,7 +162,7 @@ def _normalize_gfql_ops( warn: bool, entry_index: int ) -> Optional[List[Dict[str, Any]]]: - from graphistry.compute.ast import ASTLet, ASTObject, from_json as ast_from_json + from graphistry.compute.ast import ASTObject, from_json as ast_from_json from graphistry.compute.chain import Chain if gfql_ops is None: @@ -175,24 +175,10 @@ def _normalize_gfql_ops( _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn) return None - def _issue_let_not_supported(payload: Any) -> None: - _issue( - 'Collections do not support GFQL Let/DAG expressions', - {'index': entry_index, 'value': payload, 'type': type(payload).__name__}, - validate_mode, - warn - ) - def _normalize_op(op: object) -> Optional[Dict[str, Any]]: - if isinstance(op, ASTLet): - _issue_let_not_supported(op) - return None if isinstance(op, ASTObject): return op.to_json() if isinstance(op, dict): - if op.get('type') == 'Let' or 'bindings' in op: - _issue_let_not_supported(op) - return None try: parsed = ast_from_json(op, validate=True) except Exception as exc: @@ -203,9 +189,6 @@ def _normalize_op(op: object) -> Optional[Dict[str, Any]]: warn ) return None - if isinstance(parsed, ASTLet): - _issue_let_not_supported(op) - return None return parsed.to_json() _issue( 'GFQL operations must be AST objects or dicts', @@ -242,18 +225,11 @@ def _normalize_ops_list(raw: object) -> Optional[List[Dict[str, Any]]]: ) return None - if isinstance(gfql_ops, ASTLet): - _issue_let_not_supported(gfql_ops) - return None - if isinstance(gfql_ops, Chain): return _normalize_ops_list(gfql_ops.chain) if isinstance(gfql_ops, ASTObject): return _normalize_ops_list(gfql_ops) if isinstance(gfql_ops, dict): - if gfql_ops.get('type') == 'Let' or 'bindings' in gfql_ops: - _issue_let_not_supported(gfql_ops) - return None if gfql_ops.get('type') == 'Chain' and 'chain' in gfql_ops: return _normalize_ops_list(gfql_ops.get('chain')) if gfql_ops.get('type') == 'gfql_chain' and 'gfql' in gfql_ops: From 59ba9a34118a122bffe366f328b344222bd45f6e Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Thu, 8 Jan 2026 14:27:10 -0800 Subject: [PATCH 15/30] Simplify collections gfql wrapping --- graphistry/collections.py | 49 +++++++++++++++------------------------ 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/graphistry/collections.py b/graphistry/collections.py index 5603d7660b..1e8a5b0166 100644 --- a/graphistry/collections.py +++ b/graphistry/collections.py @@ -38,42 +38,31 @@ def _normalize_op(op: object) -> Dict[str, JSONVal]: if isinstance(op, ASTObject): return op.to_json() if isinstance(op, dict): - parsed = ast_from_json(op, validate=True) - return parsed.to_json() + return ast_from_json(op, validate=True).to_json() raise TypeError("Collection GFQL operations must be AST objects or dictionaries") - def _normalize_ops_value(raw: object) -> List[Dict[str, JSONVal]]: + def _normalize_ops(raw: object) -> List[Dict[str, JSONVal]]: + if isinstance(raw, Chain): + return _normalize_ops(raw.to_json().get("chain", [])) + if isinstance(raw, ASTObject): + return [raw.to_json()] if isinstance(raw, list): if len(raw) == 0: raise ValueError("Collection GFQL operations list cannot be empty") return [_normalize_op(op) for op in raw] - return [_normalize_op(raw)] - - if isinstance(expr, dict): - expr_type = expr.get("type") - if expr_type == "gfql_chain" and "gfql" in expr: - gfql_ops = expr.get("gfql") - return {"type": "gfql_chain", "gfql": _normalize_ops_value(gfql_ops)} - if expr_type == "Chain" and "chain" in expr: - return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr.get("chain"))} - if "gfql" in expr: - gfql_ops = expr.get("gfql") - return {"type": "gfql_chain", "gfql": _normalize_ops_value(gfql_ops)} - if "chain" in expr: - return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr.get("chain"))} - return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr)} - - if isinstance(expr, Chain): - chain_json = expr.to_json() - return {"type": "gfql_chain", "gfql": chain_json.get("chain", [])} - - if isinstance(expr, ASTObject): - return {"type": "gfql_chain", "gfql": [expr.to_json()]} - - if isinstance(expr, list): - return {"type": "gfql_chain", "gfql": _normalize_ops_value(expr)} - - raise TypeError("Collection expr must be an AST object, chain, list, or dict") + if isinstance(raw, dict): + if raw.get("type") == "Chain" and "chain" in raw: + return _normalize_ops(raw.get("chain")) + if raw.get("type") == "gfql_chain" and "gfql" in raw: + return _normalize_ops(raw.get("gfql")) + if "chain" in raw: + return _normalize_ops(raw.get("chain")) + if "gfql" in raw: + return _normalize_ops(raw.get("gfql")) + return [_normalize_op(raw)] + raise TypeError("Collection expr must be an AST object, chain, list, or dict") + + return {"type": "gfql_chain", "gfql": _normalize_ops(expr)} def collection_set( From 29883112e8af7052d7fd08706b731145f3b8f292 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Thu, 8 Jan 2026 15:07:13 -0800 Subject: [PATCH 16/30] Slim collections validation helpers --- graphistry/validate/validate_collections.py | 105 ++++++++------------ 1 file changed, 42 insertions(+), 63 deletions(-) diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 4749cd587d..db107b18ac 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -49,26 +49,11 @@ def _issue( emit_warn(f"Collections validation warning: {message} ({data})") -def _reparse_collections_payload( - collections: Union[Collection, List[Collection]], +def _coerce_collection_list( + parsed: Any, validate_mode: ValidationMode, warn: bool ) -> List[Dict[str, Any]]: - from graphistry.compute.ast import ASTObject - from graphistry.compute.chain import Chain - - def _default(obj: Any) -> Any: - if isinstance(obj, Chain): - return obj.to_json() - if isinstance(obj, ASTObject): - return obj.to_json() - raise TypeError(f"Object of type {type(obj).__name__} is not JSON-serializable") - - try: - parsed = json.loads(json.dumps(collections, default=_default, ensure_ascii=True)) - except (TypeError, ValueError) as exc: - _issue('Collections must be JSON-serializable', {'error': str(exc)}, validate_mode, warn) - return [] if isinstance(parsed, list): return parsed if isinstance(parsed, dict): @@ -82,10 +67,16 @@ def _parse_collections_input( validate_mode: ValidationMode, warn: bool ) -> List[Dict[str, Any]]: - if isinstance(collections, list): - return _reparse_collections_payload(collections, validate_mode, warn) - if isinstance(collections, dict): - return _reparse_collections_payload(collections, validate_mode, warn) + from graphistry.compute.ast import ASTObject + from graphistry.compute.chain import Chain + + def _default(obj: Any) -> Any: + if isinstance(obj, Chain): + return obj.to_json() + if isinstance(obj, ASTObject): + return obj.to_json() + raise TypeError(f"Object of type {type(obj).__name__} is not JSON-serializable") + if isinstance(collections, str): try: parsed = json.loads(collections) @@ -95,14 +86,14 @@ def _parse_collections_input( except json.JSONDecodeError as exc: _issue('Collections string must be JSON or URL-encoded JSON', {'error': str(exc)}, validate_mode, warn) return [] - if isinstance(parsed, list): - return parsed - if isinstance(parsed, dict): - return [parsed] - _issue('Collections JSON must be a list or dict', {'type': type(parsed).__name__}, validate_mode, warn) + return _coerce_collection_list(parsed, validate_mode, warn) + + try: + parsed = json.loads(json.dumps(collections, default=_default, ensure_ascii=True)) + except (TypeError, ValueError) as exc: + _issue('Collections must be JSON-serializable', {'error': str(exc)}, validate_mode, warn) return [] - _issue('Collections must be a list, dict, or JSON string', {'type': type(collections).__name__}, validate_mode, warn) - return [] + return _coerce_collection_list(parsed, validate_mode, warn) def _coerce_str_field( @@ -225,22 +216,26 @@ def _normalize_ops_list(raw: object) -> Optional[List[Dict[str, Any]]]: ) return None - if isinstance(gfql_ops, Chain): - return _normalize_ops_list(gfql_ops.chain) - if isinstance(gfql_ops, ASTObject): - return _normalize_ops_list(gfql_ops) - if isinstance(gfql_ops, dict): - if gfql_ops.get('type') == 'Chain' and 'chain' in gfql_ops: - return _normalize_ops_list(gfql_ops.get('chain')) - if gfql_ops.get('type') == 'gfql_chain' and 'gfql' in gfql_ops: - return _normalize_ops_list(gfql_ops.get('gfql')) - if 'chain' in gfql_ops: - return _normalize_ops_list(gfql_ops.get('chain')) - if 'gfql' in gfql_ops: - return _normalize_ops_list(gfql_ops.get('gfql')) - return _normalize_ops_list(gfql_ops) - if isinstance(gfql_ops, list): - return _normalize_ops_list(gfql_ops) + def _extract_ops_value(raw: object) -> object: + if isinstance(raw, Chain): + return raw.chain + if isinstance(raw, ASTObject): + return raw + if isinstance(raw, dict): + if raw.get('type') == 'gfql_chain' and 'gfql' in raw: + return raw.get('gfql') + if raw.get('type') == 'Chain' and 'chain' in raw: + return raw.get('chain') + if 'gfql' in raw: + return raw.get('gfql') + if 'chain' in raw: + return raw.get('chain') + return raw + if isinstance(raw, list): + return raw + return raw + + return _normalize_ops_list(_extract_ops_value(gfql_ops)) _issue( 'GFQL operations must be a Chain, AST object, list, or dict', @@ -258,25 +253,9 @@ def _normalize_gfql_expr( warn: bool, entry_index: int ) -> Optional[Dict[str, Any]]: - if isinstance(expr, dict): - if expr.get('type') == 'intersection': - _issue('Set collection expr cannot be intersection', {'index': entry_index}, validate_mode, warn) - return None - if 'gfql' in expr or expr.get('type') == 'gfql_chain': - ops = _normalize_gfql_ops(expr.get('gfql'), validate_mode, warn, entry_index) - if ops is None: - return None - return {'type': 'gfql_chain', 'gfql': ops} - if 'chain' in expr: - ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index) - if ops is None: - return None - return {'type': 'gfql_chain', 'gfql': ops} - if 'type' in expr: - ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index) - if ops is None: - return None - return {'type': 'gfql_chain', 'gfql': ops} + if isinstance(expr, dict) and expr.get('type') == 'intersection': + _issue('Set collection expr cannot be intersection', {'index': entry_index}, validate_mode, warn) + return None ops = _normalize_gfql_ops(expr, validate_mode, warn, entry_index) if ops is None: return None From 2f426d0b4fec7b6ae5a168cc0d8a019e25fc4472 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Thu, 8 Jan 2026 15:18:45 -0800 Subject: [PATCH 17/30] Simplify collections input parsing --- graphistry/validate/validate_collections.py | 25 ++++++--------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index db107b18ac..d09fd0d88d 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -3,7 +3,7 @@ from urllib.parse import quote, unquote from graphistry.client_session import strtobool -from graphistry.models.collections import Collection, CollectionsInput +from graphistry.models.collections import CollectionsInput from graphistry.models.types import ValidationMode, ValidationParam from graphistry.util import warn as emit_warn _ALLOWED_COLLECTION_FIELDS = { @@ -67,16 +67,10 @@ def _parse_collections_input( validate_mode: ValidationMode, warn: bool ) -> List[Dict[str, Any]]: - from graphistry.compute.ast import ASTObject - from graphistry.compute.chain import Chain - - def _default(obj: Any) -> Any: - if isinstance(obj, Chain): - return obj.to_json() - if isinstance(obj, ASTObject): - return obj.to_json() - raise TypeError(f"Object of type {type(obj).__name__} is not JSON-serializable") - + if isinstance(collections, list): + return _coerce_collection_list(collections, validate_mode, warn) + if isinstance(collections, dict): + return _coerce_collection_list(collections, validate_mode, warn) if isinstance(collections, str): try: parsed = json.loads(collections) @@ -87,13 +81,8 @@ def _default(obj: Any) -> Any: _issue('Collections string must be JSON or URL-encoded JSON', {'error': str(exc)}, validate_mode, warn) return [] return _coerce_collection_list(parsed, validate_mode, warn) - - try: - parsed = json.loads(json.dumps(collections, default=_default, ensure_ascii=True)) - except (TypeError, ValueError) as exc: - _issue('Collections must be JSON-serializable', {'error': str(exc)}, validate_mode, warn) - return [] - return _coerce_collection_list(parsed, validate_mode, warn) + _issue('Collections must be a list, dict, or JSON string', {'type': type(collections).__name__}, validate_mode, warn) + return [] def _coerce_str_field( From 924f2343a29737c206fafd38d1785e6500c94c10 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Mon, 12 Jan 2026 17:12:46 -0800 Subject: [PATCH 18/30] fix: canonicalize collections validation and encoding --- CHANGELOG.md | 4 ++ graphistry/Plottable.py | 1 - graphistry/PlotterBase.py | 14 +++---- graphistry/pygraphistry.py | 2 - graphistry/tests/test_collections.py | 45 ++++++++++++++++++--- graphistry/validate/validate_collections.py | 37 +++++++++++++---- 6 files changed, 79 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 273c599efa..286f5af884 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Performance - **GFQL / chain**: Optimized backward pass for simple single-hop edges by skipping full `hop()` call and using vectorized merge filtering instead (~50% faster on small graphs). Added `is_simple_single_hop()` method on `ASTEdge` for optimization eligibility checks. +### Changed +- **Collections**: Autofix validation now drops invalid collections (e.g., invalid GFQL ops) and non-string collection color fields instead of string-coercing them; warnings still emit when `warn=True`. +- **Collections**: `collections(...)` now always canonicalizes to URL-encoded JSON (string inputs are parsed + re-encoded); the `encode` parameter was removed to avoid ambiguous behavior. + ### Fixed - **Compute / hop**: Exact-hop traversals now prune branches that do not reach `min_hops`, avoid reapplying min-hop pruning in reverse passes, keep seeds in wavefront outputs, and reuse forward wavefronts when recomputing labels so edge/node hop labels stay aligned (fixes 3-hop branch inclusion issues and mislabeled slices). - **GFQL / chain**: Fixed `output_min_hops`/`output_max_hops` semantics to correctly slice output nodes/edges matching oracle behavior. diff --git a/graphistry/Plottable.py b/graphistry/Plottable.py index 271392f56f..c94da3295e 100644 --- a/graphistry/Plottable.py +++ b/graphistry/Plottable.py @@ -790,7 +790,6 @@ def collections( show_collections: Optional[bool] = None, collections_global_node_color: Optional[str] = None, collections_global_edge_color: Optional[str] = None, - encode: bool = True, validate: ValidationParam = 'autofix', warn: bool = True ) -> 'Plottable': diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py index 6ecb2b58f8..e90963f8d0 100644 --- a/graphistry/PlotterBase.py +++ b/graphistry/PlotterBase.py @@ -1827,7 +1827,8 @@ def graph(self, ig: Any) -> Plottable: def settings(self, height=None, url_params={}, render=None): """Specify iframe height and add URL parameter dictionary. - The library takes care of URI component encoding for the dictionary. + Collections URL params are normalized and URL-encoded at plot time; other + params should already be URL-safe. :param height: Height in pixels. :type height: int @@ -1853,18 +1854,16 @@ def collections( show_collections: Optional[bool] = None, collections_global_node_color: Optional[str] = None, collections_global_edge_color: Optional[str] = None, - encode: bool = True, validate: ValidationParam = 'autofix', warn: bool = True ) -> 'Plottable': """Set collections URL parameters. Additive over previous settings. - :param collections: List/dict of collections or JSON/URL-encoded JSON string. + :param collections: List/dict of collections or JSON/URL-encoded JSON string (stored as URL-encoded JSON). :param show_collections: Toggle collections panel display. :param collections_global_node_color: Hex color for non-collection nodes (leading # stripped). :param collections_global_edge_color: Hex color for non-collection edges (leading # stripped). - :param encode: If True, JSON-minify and URL-encode collections. Use False for pre-encoded strings. - :param validate: Validation mode. 'autofix' (default) warns on issues, 'strict' raises on issues. + :param validate: Validation mode. 'autofix' (default) drops invalid collections and color fields with warnings, 'strict' raises on issues. :param warn: Whether to emit warnings when validate='autofix'. validate=False forces warn=False. """ from graphistry.validate.validate_collections import ( @@ -1878,10 +1877,7 @@ def collections( settings: Dict[str, Any] = {} if collections is not None: normalized = normalize_collections(collections, validate=validate_mode, warn=warn) - if isinstance(collections, str) and not encode: - settings['collections'] = collections - else: - settings['collections'] = encode_collections(normalized, encode=encode) + settings['collections'] = encode_collections(normalized) extras: Dict[str, Any] = {} if show_collections is not None: extras['showCollections'] = show_collections diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py index 171cc5082c..aa01d1a2e3 100644 --- a/graphistry/pygraphistry.py +++ b/graphistry/pygraphistry.py @@ -2282,7 +2282,6 @@ def collections( show_collections: Optional[bool] = None, collections_global_node_color: Optional[str] = None, collections_global_edge_color: Optional[str] = None, - encode: bool = True, validate: ValidationParam = 'autofix', warn: bool = True ): @@ -2291,7 +2290,6 @@ def collections( show_collections=show_collections, collections_global_node_color=collections_global_node_color, collections_global_edge_color=collections_global_edge_color, - encode=encode, validate=validate, warn=warn ) diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index 5516e795f0..d49b1f07d1 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -2,7 +2,7 @@ import json import pytest -from urllib.parse import quote, unquote +from urllib.parse import unquote import graphistry from graphistry.collections import collection_intersection, collection_set @@ -92,11 +92,17 @@ def test_collections_accepts_chain_and_preserves_dataset_id(): assert g2._dataset_id == "dataset_123" -def test_collections_encode_false_keeps_string(): +def test_collections_string_input_is_encoded(): raw = '[{"type":"intersection","expr":{"type":"intersection","sets":["a"]}}]' - encoded = quote(raw, safe="") - url_params = collections_url_params(encoded, encode=False) - assert url_params["collections"] == encoded + url_params = collections_url_params(raw) + assert url_params["collections"].startswith("%5B") + decoded = decode_collections(url_params["collections"]) + assert decoded == [ + { + "type": "intersection", + "expr": {"type": "intersection", "sets": ["a"]}, + } + ] def test_collections_accepts_wire_protocol_chain(): @@ -164,6 +170,35 @@ def test_collections_validation_strict_raises(): graphistry.bind().collections(collections=bad_collections, validate="strict") +def test_collections_autofix_drops_invalid_colors(): + collections = [ + { + "type": "set", + "expr": [graphistry.n({"vip": True})], + "node_color": 123, + "edge_color": {"bad": True}, + } + ] + with pytest.warns(RuntimeWarning): + url_params = collections_url_params(collections, validate="autofix", warn=True) + decoded = decode_collections(url_params["collections"]) + assert "node_color" not in decoded[0] + assert "edge_color" not in decoded[0] + + +def test_collections_autofix_drops_invalid_gfql_ops(): + collections = [ + { + "type": "set", + "expr": [graphistry.n({"vip": True}), {"filter_dict": {"a": 1}}], + } + ] + with pytest.warns(RuntimeWarning): + url_params = collections_url_params(collections, validate="autofix", warn=True) + decoded = decode_collections(url_params["collections"]) + assert decoded == [] + + def test_plot_url_param_validation_autofix_warns(): bad = '[{"type":"set","expr":[{"filter_dict":{"a":1}}]}]' with pytest.warns(RuntimeWarning): diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index d09fd0d88d..248a5160a3 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -6,7 +6,7 @@ from graphistry.models.collections import CollectionsInput from graphistry.models.types import ValidationMode, ValidationParam from graphistry.util import warn as emit_warn -_ALLOWED_COLLECTION_FIELDS = { +_ALLOWED_COLLECTION_FIELDS_ORDER = ( 'type', 'id', 'name', @@ -14,7 +14,8 @@ 'node_color', 'edge_color', 'expr', -} +) +_ALLOWED_COLLECTION_FIELDS_SET = set(_ALLOWED_COLLECTION_FIELDS_ORDER) def normalize_validation_params( @@ -106,6 +107,27 @@ def _coerce_str_field( entry[key] = str(entry[key]) +def _normalize_color_field( + entry: Dict[str, Any], + key: str, + validate_mode: ValidationMode, + warn: bool, + entry_index: int +) -> None: + if key not in entry or entry[key] is None: + return + if isinstance(entry[key], str): + return + _issue( + f'Collection field "{key}" should be a string', + {'index': entry_index, 'value': entry[key], 'type': type(entry[key]).__name__}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + entry.pop(key, None) + + def _normalize_sets_list( sets_value: Any, validate_mode: ValidationMode, @@ -184,8 +206,6 @@ def _normalize_ops_list(raw: object) -> Optional[List[Dict[str, Any]]]: for op in raw: normalized_op = _normalize_op(op) if normalized_op is None: - if validate_mode == 'autofix': - continue return None normalized_ops.append(normalized_op) if len(normalized_ops) == 0: @@ -300,7 +320,7 @@ def normalize_collections( continue return [] - unexpected_fields = [key for key in entry.keys() if key not in _ALLOWED_COLLECTION_FIELDS] + unexpected_fields = [key for key in entry.keys() if key not in _ALLOWED_COLLECTION_FIELDS_SET] if unexpected_fields: _issue( 'Unexpected fields in collection', @@ -309,7 +329,7 @@ def normalize_collections( warn ) - normalized_entry = {key: entry[key] for key in _ALLOWED_COLLECTION_FIELDS if key in entry} + normalized_entry = {key: entry[key] for key in _ALLOWED_COLLECTION_FIELDS_ORDER if key in entry} collection_type = normalized_entry.get('type', 'set') if not isinstance(collection_type, str): _issue( @@ -337,7 +357,10 @@ def normalize_collections( return [] for field in ('id', 'name', 'description', 'node_color', 'edge_color'): - _coerce_str_field(normalized_entry, field, validate_mode, warn, idx) + if field in ('node_color', 'edge_color'): + _normalize_color_field(normalized_entry, field, validate_mode, warn, idx) + else: + _coerce_str_field(normalized_entry, field, validate_mode, warn, idx) expr = normalized_entry.get('expr') if collection_type == 'intersection': From 41a33c4d962b8e42e10c3ee8aa1e8798743eb1bf Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Tue, 13 Jan 2026 01:34:36 -0800 Subject: [PATCH 19/30] refactor: simplify collections normalization --- graphistry/PlotterBase.py | 6 +-- graphistry/validate/validate_collections.py | 51 ++++++++------------- 2 files changed, 21 insertions(+), 36 deletions(-) diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py index e90963f8d0..de00f5c558 100644 --- a/graphistry/PlotterBase.py +++ b/graphistry/PlotterBase.py @@ -1870,13 +1870,11 @@ def collections( encode_collections, normalize_collections, normalize_collections_url_params, - normalize_validation_params, ) - validate_mode, warn = normalize_validation_params(validate, warn) settings: Dict[str, Any] = {} if collections is not None: - normalized = normalize_collections(collections, validate=validate_mode, warn=warn) + normalized = normalize_collections(collections, validate=validate, warn=warn) settings['collections'] = encode_collections(normalized) extras: Dict[str, Any] = {} if show_collections is not None: @@ -1886,7 +1884,7 @@ def collections( if collections_global_edge_color is not None: extras['collectionsGlobalEdgeColor'] = collections_global_edge_color if extras: - extras = normalize_collections_url_params(extras, validate=validate_mode, warn=warn) + extras = normalize_collections_url_params(extras, validate=validate, warn=warn) settings.update(extras) if len(settings.keys()) > 0: diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 248a5160a3..08cb634f11 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -32,9 +32,9 @@ def normalize_validation_params( return validate_mode, warn -def encode_collections(collections: List[Dict[str, Any]], encode: bool = True) -> str: +def encode_collections(collections: List[Dict[str, Any]]) -> str: json_str = json.dumps(collections, separators=(',', ':'), ensure_ascii=True) - return quote(json_str, safe='') if encode else json_str + return quote(json_str, safe='') def _issue( @@ -86,33 +86,13 @@ def _parse_collections_input( return [] -def _coerce_str_field( +def _normalize_str_field( entry: Dict[str, Any], key: str, validate_mode: ValidationMode, warn: bool, - entry_index: int -) -> None: - if key not in entry or entry[key] is None: - return - if isinstance(entry[key], str): - return - _issue( - f'Collection field "{key}" should be a string', - {'index': entry_index, 'value': entry[key], 'type': type(entry[key]).__name__}, - validate_mode, - warn - ) - if validate_mode == 'autofix': - entry[key] = str(entry[key]) - - -def _normalize_color_field( - entry: Dict[str, Any], - key: str, - validate_mode: ValidationMode, - warn: bool, - entry_index: int + entry_index: int, + autofix_drop: bool ) -> None: if key not in entry or entry[key] is None: return @@ -125,7 +105,10 @@ def _normalize_color_field( warn ) if validate_mode == 'autofix': - entry.pop(key, None) + if autofix_drop: + entry.pop(key, None) + else: + entry[key] = str(entry[key]) def _normalize_sets_list( @@ -356,11 +339,10 @@ def normalize_collections( continue return [] - for field in ('id', 'name', 'description', 'node_color', 'edge_color'): - if field in ('node_color', 'edge_color'): - _normalize_color_field(normalized_entry, field, validate_mode, warn, idx) - else: - _coerce_str_field(normalized_entry, field, validate_mode, warn, idx) + for field in ('id', 'name', 'description'): + _normalize_str_field(normalized_entry, field, validate_mode, warn, idx, autofix_drop=False) + for field in ('node_color', 'edge_color'): + _normalize_str_field(normalized_entry, field, validate_mode, warn, idx, autofix_drop=True) expr = normalized_entry.get('expr') if collection_type == 'intersection': @@ -372,6 +354,11 @@ def normalize_collections( continue return [] normalized_entry['expr'] = normalized_expr + normalized_entry = { + key: normalized_entry[key] + for key in _ALLOWED_COLLECTION_FIELDS_ORDER + if key in normalized_entry + } normalized.append(normalized_entry) return normalized @@ -388,7 +375,7 @@ def normalize_collections_url_params( if 'collections' in updated: normalized = normalize_collections(updated['collections'], validate_mode, warn) if len(normalized) > 0: - updated['collections'] = encode_collections(normalized, encode=True) + updated['collections'] = encode_collections(normalized) else: if validate_mode in ('strict', 'strict-fast'): return updated From 0de69447f8cdf4368901c396c4a658f424ecd195 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Fri, 16 Jan 2026 12:20:00 -0800 Subject: [PATCH 20/30] chore: move collections notes to development --- CHANGELOG.md | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 286f5af884..7ff4aa23f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,17 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Development] +### Added +- **Collections**: New `g.collections(...)` API for defining subsets via GFQL expressions with priority-based visual encodings. Includes helper constructors `graphistry.collection_set(...)` and `graphistry.collection_intersection(...)`, support for `showCollections`, `collectionsGlobalNodeColor`, and `collectionsGlobalEdgeColor` URL params, and automatic JSON encoding. Accepts GFQL AST, Chain objects, or wire-protocol dicts (#874). +- **Docs / Collections**: Added collections usage guide in visualization/layout/settings, tutorial notebook (`demos/more_examples/graphistry_features/collections.ipynb`), and cross-references in 10-minute guides, cheatsheet, and GFQL docs (#875). + +### Changed +- **Collections**: Autofix validation now drops invalid collections (e.g., invalid GFQL ops) and non-string collection color fields instead of string-coercing them; warnings still emit when `warn=True`. +- **Collections**: `collections(...)` now always canonicalizes to URL-encoded JSON (string inputs are parsed + re-encoded); the `encode` parameter was removed to avoid ambiguous behavior. + +### Tests +- **Collections**: Added `test_collections.py` covering encoding, GFQL Chain/AST normalization, wire-protocol acceptance, validation modes, and helper constructors. + ## [0.50.4 - 2026-01-15] ### Fixed @@ -56,10 +67,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Performance - **GFQL / chain**: Optimized backward pass for simple single-hop edges by skipping full `hop()` call and using vectorized merge filtering instead (~50% faster on small graphs). Added `is_simple_single_hop()` method on `ASTEdge` for optimization eligibility checks. -### Changed -- **Collections**: Autofix validation now drops invalid collections (e.g., invalid GFQL ops) and non-string collection color fields instead of string-coercing them; warnings still emit when `warn=True`. -- **Collections**: `collections(...)` now always canonicalizes to URL-encoded JSON (string inputs are parsed + re-encoded); the `encode` parameter was removed to avoid ambiguous behavior. - ### Fixed - **Compute / hop**: Exact-hop traversals now prune branches that do not reach `min_hops`, avoid reapplying min-hop pruning in reverse passes, keep seeds in wavefront outputs, and reuse forward wavefronts when recomputing labels so edge/node hop labels stay aligned (fixes 3-hop branch inclusion issues and mislabeled slices). - **GFQL / chain**: Fixed `output_min_hops`/`output_max_hops` semantics to correctly slice output nodes/edges matching oracle behavior. From 23d1f0020e951e5f48518933b07b05767f2b01a2 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Sat, 7 Feb 2026 19:22:10 -0800 Subject: [PATCH 21/30] fix: address PR #874 review comments for collections validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove dead code after return in _normalize_intersection_expr - Add empty sets list validation in _normalize_sets_list - Remove pointless type coercion (str() won't produce valid types) - Consolidate GFQL parsing: _normalize_gfql_ops calls _wrap_gfql_expr - Add cross-validation for intersection set ID references - Require id field for sets (server requires it, no auto-generation) - Use precise exception handling (TypeError, ValueError, GFQLValidationError) - Update tests to include required id fields πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- graphistry/PlotterBase.py | 3 +- graphistry/tests/test_collections.py | 28 +++- graphistry/validate/validate_collections.py | 177 +++++++++++--------- 3 files changed, 123 insertions(+), 85 deletions(-) diff --git a/graphistry/PlotterBase.py b/graphistry/PlotterBase.py index de00f5c558..224fecc80c 100644 --- a/graphistry/PlotterBase.py +++ b/graphistry/PlotterBase.py @@ -2238,9 +2238,10 @@ def plot( 'viztoken': str(uuid.uuid4()) } + # Validate collections in url_params (catches bypass of .collections() method) from graphistry.validate.validate_collections import normalize_collections_url_params - url_params = normalize_collections_url_params(self._url_params, validate=validate_mode, warn=warn) + viz_url = self._pygraphistry._viz_url(info, url_params) cfg_client_protocol_hostname = self.session.client_protocol_hostname full_url = ('%s:%s' % (self.session.protocol, viz_url)) if cfg_client_protocol_hostname is None else viz_url diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index d49b1f07d1..b4082661b6 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -78,11 +78,12 @@ def test_collection_helpers_build_sets_and_intersections(): def test_collections_accepts_chain_and_preserves_dataset_id(): node = graphistry.n({"type": "user"}) chain = graphistry.Chain([node]) - g2 = graphistry.bind(dataset_id="dataset_123").collections(collections={"type": "set", "expr": chain}) + g2 = graphistry.bind(dataset_id="dataset_123").collections(collections={"type": "set", "id": "my_set", "expr": chain}) decoded = decode_collections(g2._url_params["collections"]) assert decoded == [ { "type": "set", + "id": "my_set", "expr": { "type": "gfql_chain", "gfql": [node.to_json()], @@ -93,11 +94,18 @@ def test_collections_accepts_chain_and_preserves_dataset_id(): def test_collections_string_input_is_encoded(): - raw = '[{"type":"intersection","expr":{"type":"intersection","sets":["a"]}}]' + # Include a set so the intersection has valid references + raw = '[{"type":"set","id":"a","expr":{"type":"gfql_chain","gfql":[{"type":"Node"}]}},{"type":"intersection","expr":{"type":"intersection","sets":["a"]}}]' url_params = collections_url_params(raw) assert url_params["collections"].startswith("%5B") decoded = decode_collections(url_params["collections"]) + # Node normalizes to include filter_dict: {} assert decoded == [ + { + "type": "set", + "id": "a", + "expr": {"type": "gfql_chain", "gfql": [{"type": "Node", "filter_dict": {}}]}, + }, { "type": "intersection", "expr": {"type": "intersection", "sets": ["a"]}, @@ -118,11 +126,12 @@ def test_collections_accepts_wire_protocol_chain(): ] } decoded = decode_collections( - collections_url_params({"type": "set", "expr": chain_json})["collections"] + collections_url_params({"type": "set", "id": "users", "expr": chain_json})["collections"] ) assert decoded == [ { "type": "set", + "id": "users", "expr": { "type": "gfql_chain", "gfql": chain_json["chain"], @@ -134,7 +143,7 @@ def test_collections_accepts_wire_protocol_chain(): def test_collections_accepts_let_expr(): dag = graphistry.let({"seed": graphistry.n({"type": "user"})}) decoded = decode_collections( - collections_url_params({"type": "set", "expr": dag})["collections"] + collections_url_params({"type": "set", "id": "users", "expr": dag})["collections"] ) assert decoded[0]["expr"]["type"] == "gfql_chain" assert decoded[0]["expr"]["gfql"][0]["type"] == "Let" @@ -144,6 +153,7 @@ def test_collections_drop_unexpected_fields_autofix(): collections = [ { "type": "set", + "id": "vip_set", "expr": [graphistry.n({"vip": True})], "unexpected": "drop-me", } @@ -165,7 +175,8 @@ def test_collections_show_collections_strict_raises(): def test_collections_validation_strict_raises(): - bad_collections = [{"type": "set", "expr": [{"filter_dict": {"a": 1}}]}] + # Missing 'type' field in GFQL op causes validation error + bad_collections = [{"type": "set", "id": "bad_set", "expr": [{"filter_dict": {"a": 1}}]}] with pytest.raises(ValueError): graphistry.bind().collections(collections=bad_collections, validate="strict") @@ -174,6 +185,7 @@ def test_collections_autofix_drops_invalid_colors(): collections = [ { "type": "set", + "id": "vip_set", "expr": [graphistry.n({"vip": True})], "node_color": 123, "edge_color": {"bad": True}, @@ -187,16 +199,18 @@ def test_collections_autofix_drops_invalid_colors(): def test_collections_autofix_drops_invalid_gfql_ops(): + # Collection with invalid GFQL op (missing 'type' field) gets dropped in autofix collections = [ { "type": "set", + "id": "bad_set", "expr": [graphistry.n({"vip": True}), {"filter_dict": {"a": 1}}], } ] with pytest.warns(RuntimeWarning): url_params = collections_url_params(collections, validate="autofix", warn=True) - decoded = decode_collections(url_params["collections"]) - assert decoded == [] + # Collection dropped due to invalid GFQL, so no collections key or empty + assert "collections" not in url_params or decode_collections(url_params["collections"]) == [] def test_plot_url_param_validation_autofix_warns(): diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 08cb634f11..ab8b2747af 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -3,6 +3,7 @@ from urllib.parse import quote, unquote from graphistry.client_session import strtobool +from graphistry.compute.exceptions import GFQLValidationError from graphistry.models.collections import CollectionsInput from graphistry.models.types import ValidationMode, ValidationParam from graphistry.util import warn as emit_warn @@ -138,6 +139,16 @@ def _normalize_sets_list( ) if validate_mode == 'autofix': out.append(str(set_id)) + + if len(out) == 0: + _issue( + 'Intersection sets list cannot be empty', + {'index': entry_index}, + validate_mode, + warn + ) + return None + return out @@ -147,12 +158,17 @@ def _normalize_gfql_ops( warn: bool, entry_index: int ) -> Optional[List[Dict[str, Any]]]: - from graphistry.compute.ast import ASTObject, from_json as ast_from_json - from graphistry.compute.chain import Chain + """ + Normalize GFQL operations to a list of JSON-serializable dicts. + Uses _wrap_gfql_expr from collections.py as the canonical implementation, + wrapping with error handling for validation modes. + """ if gfql_ops is None: _issue('GFQL chain is missing', {'index': entry_index}, validate_mode, warn) return None + + # Handle JSON string input if isinstance(gfql_ops, str): try: gfql_ops = json.loads(gfql_ops) @@ -160,84 +176,29 @@ def _normalize_gfql_ops( _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn) return None - def _normalize_op(op: object) -> Optional[Dict[str, Any]]: - if isinstance(op, ASTObject): - return op.to_json() - if isinstance(op, dict): - try: - parsed = ast_from_json(op, validate=True) - except Exception as exc: - _issue( - 'Invalid GFQL operation in collection', - {'index': entry_index, 'op': op, 'error': str(exc)}, - validate_mode, - warn - ) - return None - return parsed.to_json() - _issue( - 'GFQL operations must be AST objects or dicts', - {'index': entry_index, 'value': op, 'type': type(op).__name__}, - validate_mode, - warn - ) - return None - - def _normalize_ops_list(raw: object) -> Optional[List[Dict[str, Any]]]: - if isinstance(raw, list): - normalized_ops: List[Dict[str, Any]] = [] - for op in raw: - normalized_op = _normalize_op(op) - if normalized_op is None: - return None - normalized_ops.append(normalized_op) - if len(normalized_ops) == 0: - _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn) - return None - return normalized_ops - if isinstance(raw, (ASTObject, dict)): - normalized_op = _normalize_op(raw) - if normalized_op is None: - return None - return [normalized_op] + # Use canonical implementation from collections.py + try: + from graphistry.collections import _wrap_gfql_expr + result = _wrap_gfql_expr(gfql_ops) + ops_raw = result.get('gfql', []) + if not isinstance(ops_raw, list): + _issue('GFQL chain must be a list', {'index': entry_index}, validate_mode, warn) + return None + ops: List[Dict[str, Any]] = ops_raw + if len(ops) == 0: + _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn) + return None + return ops + except (TypeError, ValueError, GFQLValidationError) as exc: + # Precise exception handling for GFQL parsing errors _issue( - 'GFQL operations must be a list, AST object, or dict', - {'index': entry_index, 'value': raw, 'type': type(raw).__name__}, + 'Invalid GFQL operation in collection', + {'index': entry_index, 'error': str(exc)}, validate_mode, warn ) return None - def _extract_ops_value(raw: object) -> object: - if isinstance(raw, Chain): - return raw.chain - if isinstance(raw, ASTObject): - return raw - if isinstance(raw, dict): - if raw.get('type') == 'gfql_chain' and 'gfql' in raw: - return raw.get('gfql') - if raw.get('type') == 'Chain' and 'chain' in raw: - return raw.get('chain') - if 'gfql' in raw: - return raw.get('gfql') - if 'chain' in raw: - return raw.get('chain') - return raw - if isinstance(raw, list): - return raw - return raw - - return _normalize_ops_list(_extract_ops_value(gfql_ops)) - - _issue( - 'GFQL operations must be a Chain, AST object, list, or dict', - {'index': entry_index, 'type': type(gfql_ops).__name__}, - validate_mode, - warn - ) - return None - - def _normalize_gfql_expr( expr: Any, @@ -321,12 +282,12 @@ def normalize_collections( validate_mode, warn ) + # str() coercion is pointless - it won't produce 'set' or 'intersection' + # so we skip this entry in autofix mode, or fail in strict mode if validate_mode == 'autofix': - collection_type = str(collection_type) - else: continue + return [] collection_type = collection_type.lower() - normalized_entry['type'] = collection_type if collection_type not in ('set', 'intersection'): _issue( @@ -339,11 +300,30 @@ def normalize_collections( continue return [] + normalized_entry['type'] = collection_type + for field in ('id', 'name', 'description'): _normalize_str_field(normalized_entry, field, validate_mode, warn, idx, autofix_drop=False) for field in ('node_color', 'edge_color'): _normalize_str_field(normalized_entry, field, validate_mode, warn, idx, autofix_drop=True) + # Validate id field - required for sets (server requires it, and intersections reference by ID) + # Note: We warn but don't auto-generate IDs - user must provide meaningful IDs + if collection_type == 'set': + if 'id' not in normalized_entry or normalized_entry.get('id') is None: + _issue( + 'Set collection requires an id field (server requires it for subgraph storage)', + {'index': idx}, + validate_mode, + warn + ) + # In autofix mode, skip this collection rather than generate arbitrary IDs + # User should provide meaningful IDs they control + if validate_mode == 'autofix': + continue + else: + continue + expr = normalized_entry.get('expr') if collection_type == 'intersection': normalized_expr = _normalize_intersection_expr(expr, validate_mode, warn, idx) @@ -361,9 +341,52 @@ def normalize_collections( } normalized.append(normalized_entry) + # Cross-validate intersection set references + normalized = _validate_intersection_references(normalized, validate_mode, warn) + return normalized +def _validate_intersection_references( + collections: List[Dict[str, Any]], + validate_mode: ValidationMode, + warn: bool +) -> List[Dict[str, Any]]: + """ + Validate that intersection set IDs reference actual collection IDs. + + Dangling references (e.g., sets: ["nonexistent"]) cause backend errors. + In strict mode, raise on first issue. In autofix mode, drop invalid intersections. + """ + # Collect all set IDs (only from 'set' type collections, not intersections) + set_ids = { + c.get('id') + for c in collections + if c.get('type') == 'set' and c.get('id') + } + + valid_collections: List[Dict[str, Any]] = [] + for idx, collection in enumerate(collections): + if collection.get('type') == 'intersection': + expr = collection.get('expr', {}) + referenced_sets = expr.get('sets', []) + missing = [sid for sid in referenced_sets if sid not in set_ids] + if missing: + _issue( + 'Intersection references non-existent set IDs', + {'index': idx, 'missing_sets': missing, 'available_sets': list(set_ids)}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + continue # Drop invalid intersection + # In strict mode, we already raised in _issue + return [] + valid_collections.append(collection) + + return valid_collections + + def normalize_collections_url_params( url_params: Dict[str, Any], validate: ValidationParam = 'autofix', From 00ec4f8571dca12009dc82d0591cf456a06385aa Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Sat, 7 Feb 2026 20:13:27 -0800 Subject: [PATCH 22/30] docs: update CHANGELOG with collections review fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Set collections require id field (no auto-generation) - Intersection cross-validation for set ID references - GFQL parsing consolidation with precise exception handling πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8a7c99770..f79e60e188 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Changed - **Collections**: Autofix validation now drops invalid collections (e.g., invalid GFQL ops) and non-string collection color fields instead of string-coercing them; warnings still emit when `warn=True`. - **Collections**: `collections(...)` now always canonicalizes to URL-encoded JSON (string inputs are parsed + re-encoded); the `encode` parameter was removed to avoid ambiguous behavior. +- **Collections**: Set collections now require an `id` field (server requires it for subgraph storage); missing IDs are warned and dropped in autofix mode rather than auto-generated. +- **Collections**: Intersection collections now cross-validate that referenced set IDs exist; dangling references are warned and dropped in autofix mode. +- **Collections**: GFQL parsing consolidated to use `_wrap_gfql_expr` from `collections.py` as the canonical implementation with precise exception handling. ### Tests - **Collections**: Added `test_collections.py` covering encoding, GFQL Chain/AST normalization, wire-protocol acceptance, validation modes, and helper constructors. From d2bdf02ee0ec60c183ca5efbfb561e056127ab55 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Sat, 7 Feb 2026 20:29:33 -0800 Subject: [PATCH 23/30] refactor: inline _coerce_collection_list into _parse_collections_input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review comment #2683393920 - removes unnecessary helper function. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- graphistry/validate/validate_collections.py | 26 ++++++++------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index ab8b2747af..73ca6b3875 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -51,28 +51,16 @@ def _issue( emit_warn(f"Collections validation warning: {message} ({data})") -def _coerce_collection_list( - parsed: Any, - validate_mode: ValidationMode, - warn: bool -) -> List[Dict[str, Any]]: - if isinstance(parsed, list): - return parsed - if isinstance(parsed, dict): - return [parsed] - _issue('Collections JSON must be a list or dict', {'type': type(parsed).__name__}, validate_mode, warn) - return [] - - def _parse_collections_input( collections: CollectionsInput, validate_mode: ValidationMode, warn: bool ) -> List[Dict[str, Any]]: + """Parse collections input to a list of dicts, handling list/dict/JSON string inputs.""" if isinstance(collections, list): - return _coerce_collection_list(collections, validate_mode, warn) + return collections if isinstance(collections, dict): - return _coerce_collection_list(collections, validate_mode, warn) + return [collections] if isinstance(collections, str): try: parsed = json.loads(collections) @@ -82,7 +70,13 @@ def _parse_collections_input( except json.JSONDecodeError as exc: _issue('Collections string must be JSON or URL-encoded JSON', {'error': str(exc)}, validate_mode, warn) return [] - return _coerce_collection_list(parsed, validate_mode, warn) + # Coerce parsed JSON to list + if isinstance(parsed, list): + return parsed + if isinstance(parsed, dict): + return [parsed] + _issue('Collections JSON must be a list or dict', {'type': type(parsed).__name__}, validate_mode, warn) + return [] _issue('Collections must be a list, dict, or JSON string', {'type': type(collections).__name__}, validate_mode, warn) return [] From 47d87e86dcf0108647e9e7c754eec56c28faffef Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Sat, 7 Feb 2026 21:24:45 -0800 Subject: [PATCH 24/30] fix: add type casts for mypy after inlining _coerce_collection_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CollectionsInput includes TypedDicts which need to be cast to Dict[str, Any]. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- graphistry/validate/validate_collections.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 73ca6b3875..71dd111d9f 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -4,7 +4,7 @@ from graphistry.client_session import strtobool from graphistry.compute.exceptions import GFQLValidationError -from graphistry.models.collections import CollectionsInput +from graphistry.models.collections import Collection, CollectionsInput from graphistry.models.types import ValidationMode, ValidationParam from graphistry.util import warn as emit_warn _ALLOWED_COLLECTION_FIELDS_ORDER = ( @@ -55,7 +55,7 @@ def _parse_collections_input( collections: CollectionsInput, validate_mode: ValidationMode, warn: bool -) -> List[Dict[str, Any]]: +) -> Union[List[Dict[str, Any]], List[Collection]]: """Parse collections input to a list of dicts, handling list/dict/JSON string inputs.""" if isinstance(collections, list): return collections @@ -258,7 +258,10 @@ def normalize_collections( continue return [] - unexpected_fields = [key for key in entry.keys() if key not in _ALLOWED_COLLECTION_FIELDS_SET] + # Convert to plain dict for uniform handling (TypedDicts become regular dicts) + entry_dict: Dict[str, Any] = dict(entry) + + unexpected_fields = [key for key in entry_dict.keys() if key not in _ALLOWED_COLLECTION_FIELDS_SET] if unexpected_fields: _issue( 'Unexpected fields in collection', @@ -267,7 +270,7 @@ def normalize_collections( warn ) - normalized_entry = {key: entry[key] for key in _ALLOWED_COLLECTION_FIELDS_ORDER if key in entry} + normalized_entry = {key: entry_dict[key] for key in _ALLOWED_COLLECTION_FIELDS_ORDER if key in entry_dict} collection_type = normalized_entry.get('type', 'set') if not isinstance(collection_type, str): _issue( From 2b5fa78b18419ba2aa34273cc1abfb220add32a5 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Sat, 7 Feb 2026 23:43:25 -0800 Subject: [PATCH 25/30] fix: auto-generate collection IDs in autofix mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Server requires IDs for all collection types (used as storage keys). In autofix mode, generate `{type}_{idx}` IDs when missing instead of dropping the collection. This makes simple use cases "just work" while still warning about the missing ID. - Sets get `set_0`, `set_1`, etc. - Intersections get `intersection_0`, `intersection_1`, etc. - Strict mode still rejects missing IDs - Added test for auto-generation behavior πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- graphistry/tests/test_collections.py | 18 +++++++++++-- graphistry/validate/validate_collections.py | 29 +++++++++------------ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index b4082661b6..521983dd03 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -67,7 +67,7 @@ def test_collection_set_wraps_ast_expr(expr): def test_collection_helpers_build_sets_and_intersections(): collections = [ collection_set(expr=[graphistry.n({"vip": True})], id="vip", name="VIP", node_color="#FFAA00"), - collection_intersection(sets=["vip"], name="VIP Intersection", node_color="#00BFFF"), + collection_intersection(sets=["vip"], id="vip_intersection", name="VIP Intersection", node_color="#00BFFF"), ] decoded = decode_collections(collections_url_params(collections)["collections"]) assert decoded[0]["type"] == "set" @@ -95,7 +95,7 @@ def test_collections_accepts_chain_and_preserves_dataset_id(): def test_collections_string_input_is_encoded(): # Include a set so the intersection has valid references - raw = '[{"type":"set","id":"a","expr":{"type":"gfql_chain","gfql":[{"type":"Node"}]}},{"type":"intersection","expr":{"type":"intersection","sets":["a"]}}]' + raw = '[{"type":"set","id":"a","expr":{"type":"gfql_chain","gfql":[{"type":"Node"}]}},{"type":"intersection","id":"b","expr":{"type":"intersection","sets":["a"]}}]' url_params = collections_url_params(raw) assert url_params["collections"].startswith("%5B") decoded = decode_collections(url_params["collections"]) @@ -108,6 +108,7 @@ def test_collections_string_input_is_encoded(): }, { "type": "intersection", + "id": "b", "expr": {"type": "intersection", "sets": ["a"]}, } ] @@ -218,3 +219,16 @@ def test_plot_url_param_validation_autofix_warns(): with pytest.warns(RuntimeWarning): normalized = normalize_collections_url_params({"collections": bad}, validate="autofix", warn=True) assert "collections" not in normalized or normalized["collections"].startswith("%5B") + + +def test_collections_autofix_generates_missing_ids(): + # Collections without IDs get auto-generated IDs in autofix mode + collections = [ + {"type": "set", "expr": [graphistry.n({"a": 1})]}, + {"type": "intersection", "expr": {"type": "intersection", "sets": ["set_0"]}}, + ] + with pytest.warns(RuntimeWarning): + url_params = collections_url_params(collections, validate="autofix", warn=True) + decoded = decode_collections(url_params["collections"]) + assert decoded[0]["id"] == "set_0" + assert decoded[1]["id"] == "intersection_1" diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 71dd111d9f..8d28eb323c 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -304,22 +304,19 @@ def normalize_collections( for field in ('node_color', 'edge_color'): _normalize_str_field(normalized_entry, field, validate_mode, warn, idx, autofix_drop=True) - # Validate id field - required for sets (server requires it, and intersections reference by ID) - # Note: We warn but don't auto-generate IDs - user must provide meaningful IDs - if collection_type == 'set': - if 'id' not in normalized_entry or normalized_entry.get('id') is None: - _issue( - 'Set collection requires an id field (server requires it for subgraph storage)', - {'index': idx}, - validate_mode, - warn - ) - # In autofix mode, skip this collection rather than generate arbitrary IDs - # User should provide meaningful IDs they control - if validate_mode == 'autofix': - continue - else: - continue + # Validate id field - required by server for all collection types (used as storage key) + if 'id' not in normalized_entry or normalized_entry.get('id') is None: + _issue( + f'{collection_type.capitalize()} collection missing id field (server requires it for storage)', + {'index': idx}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + # Auto-generate ID so collection still works + normalized_entry['id'] = f'{collection_type}_{idx}' + else: + continue expr = normalized_entry.get('expr') if collection_type == 'intersection': From 1ab2e827cb2c82146b6d1e3512d3d3ad84fc4b99 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Sun, 8 Feb 2026 04:25:53 -0800 Subject: [PATCH 26/30] style: use kebab-case for auto-generated collection IDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change from `set_0` to `set-0` for consistency with other ID patterns in the codebase (e.g., kepler's `dataset-{uuid[:8]}`). User-provided IDs can be any string - no validation beyond type check. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- graphistry/tests/test_collections.py | 8 ++++---- graphistry/validate/validate_collections.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index 521983dd03..1f94491b32 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -222,13 +222,13 @@ def test_plot_url_param_validation_autofix_warns(): def test_collections_autofix_generates_missing_ids(): - # Collections without IDs get auto-generated IDs in autofix mode + # Collections without IDs get auto-generated IDs in autofix mode (kebab-case) collections = [ {"type": "set", "expr": [graphistry.n({"a": 1})]}, - {"type": "intersection", "expr": {"type": "intersection", "sets": ["set_0"]}}, + {"type": "intersection", "expr": {"type": "intersection", "sets": ["set-0"]}}, ] with pytest.warns(RuntimeWarning): url_params = collections_url_params(collections, validate="autofix", warn=True) decoded = decode_collections(url_params["collections"]) - assert decoded[0]["id"] == "set_0" - assert decoded[1]["id"] == "intersection_1" + assert decoded[0]["id"] == "set-0" + assert decoded[1]["id"] == "intersection-1" diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 8d28eb323c..5f0873f5ec 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -314,7 +314,7 @@ def normalize_collections( ) if validate_mode == 'autofix': # Auto-generate ID so collection still works - normalized_entry['id'] = f'{collection_type}_{idx}' + normalized_entry['id'] = f'{collection_type}-{idx}' else: continue From 3d74b1e96c5d6ae57b3027b45c46cf66305509f4 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Sun, 8 Feb 2026 18:08:15 -0800 Subject: [PATCH 27/30] refactor: move GFQL normalization to compute/ast.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add normalize_gfql_to_wire() as canonical GFQLβ†’wire implementation - Simplify collections.py (removed 36 lines of duplicate logic) - Simplify validate_collections.py to call compute/ast - Clean import graph: models β†’ compute β†’ helpers/validation πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- graphistry/collections.py | 42 ++-------------- graphistry/compute/ast.py | 54 +++++++++++++++++++++ graphistry/validate/validate_collections.py | 21 +++----- 3 files changed, 63 insertions(+), 54 deletions(-) diff --git a/graphistry/collections.py b/graphistry/collections.py index 1e8a5b0166..a133b746ab 100644 --- a/graphistry/collections.py +++ b/graphistry/collections.py @@ -1,11 +1,10 @@ -from typing import Dict, List, Optional, Sequence, TypeVar +from typing import Optional, Sequence, TypeVar from graphistry.models.collections import ( CollectionIntersection, CollectionExprInput, CollectionSet, ) -from graphistry.utils.json import JSONVal CollectionDict = TypeVar("CollectionDict", CollectionSet, CollectionIntersection) @@ -29,42 +28,6 @@ def _apply_collection_metadata(collection: CollectionDict, **metadata: Optional[ return collection -def _wrap_gfql_expr(expr: CollectionExprInput) -> Dict[str, JSONVal]: - - from graphistry.compute.ast import ASTObject, from_json as ast_from_json - from graphistry.compute.chain import Chain - - def _normalize_op(op: object) -> Dict[str, JSONVal]: - if isinstance(op, ASTObject): - return op.to_json() - if isinstance(op, dict): - return ast_from_json(op, validate=True).to_json() - raise TypeError("Collection GFQL operations must be AST objects or dictionaries") - - def _normalize_ops(raw: object) -> List[Dict[str, JSONVal]]: - if isinstance(raw, Chain): - return _normalize_ops(raw.to_json().get("chain", [])) - if isinstance(raw, ASTObject): - return [raw.to_json()] - if isinstance(raw, list): - if len(raw) == 0: - raise ValueError("Collection GFQL operations list cannot be empty") - return [_normalize_op(op) for op in raw] - if isinstance(raw, dict): - if raw.get("type") == "Chain" and "chain" in raw: - return _normalize_ops(raw.get("chain")) - if raw.get("type") == "gfql_chain" and "gfql" in raw: - return _normalize_ops(raw.get("gfql")) - if "chain" in raw: - return _normalize_ops(raw.get("chain")) - if "gfql" in raw: - return _normalize_ops(raw.get("gfql")) - return [_normalize_op(raw)] - raise TypeError("Collection expr must be an AST object, chain, list, or dict") - - return {"type": "gfql_chain", "gfql": _normalize_ops(expr)} - - def collection_set( *, expr: CollectionExprInput, @@ -75,7 +38,8 @@ def collection_set( edge_color: Optional[str] = None, ) -> CollectionSet: """Build a collection dict for a GFQL-defined set.""" - collection: CollectionSet = {"type": "set", "expr": _wrap_gfql_expr(expr)} + from graphistry.compute.ast import normalize_gfql_to_wire + collection: CollectionSet = {"type": "set", "expr": {"type": "gfql_chain", "gfql": normalize_gfql_to_wire(expr)}} return _apply_collection_metadata( collection, id=id, diff --git a/graphistry/compute/ast.py b/graphistry/compute/ast.py index dd024c121d..6d3c661879 100644 --- a/graphistry/compute/ast.py +++ b/graphistry/compute/ast.py @@ -1508,6 +1508,60 @@ def from_json(o: JSONVal, validate: bool = True) -> Union[ASTNode, ASTEdge, ASTL return out +def normalize_gfql_to_wire(expr: Any) -> List[Dict[str, JSONVal]]: + """ + Normalize GFQL expression to wire format (list of JSON-serializable dicts). + + Accepts: + - Chain object + - Single ASTObject + - List of ASTObjects + - Dict with 'type': 'Chain' and 'chain' key + - Dict with 'type': 'gfql_chain' and 'gfql' key + - Dict with just 'chain' or 'gfql' key + - Single dict (parsed as AST op) + + Returns: + - List of JSON-serializable dicts ready for wire protocol + + Raises: + - TypeError: if expr type is not supported + - ValueError: if expr is empty + - GFQLSyntaxError: if dict cannot be parsed as valid AST + """ + from graphistry.compute.chain import Chain + + def _normalize_op(op: object) -> Dict[str, JSONVal]: + if isinstance(op, ASTObject): + return op.to_json() + if isinstance(op, dict): + return from_json(op, validate=True).to_json() + raise TypeError("GFQL operations must be AST objects or dictionaries") + + def _normalize_ops(raw: object) -> List[Dict[str, JSONVal]]: + if isinstance(raw, Chain): + return _normalize_ops(raw.to_json().get("chain", [])) + if isinstance(raw, ASTObject): + return [raw.to_json()] + if isinstance(raw, list): + if len(raw) == 0: + raise ValueError("GFQL operations list cannot be empty") + return [_normalize_op(op) for op in raw] + if isinstance(raw, dict): + if raw.get("type") == "Chain" and "chain" in raw: + return _normalize_ops(raw.get("chain")) + if raw.get("type") == "gfql_chain" and "gfql" in raw: + return _normalize_ops(raw.get("gfql")) + if "chain" in raw: + return _normalize_ops(raw.get("chain")) + if "gfql" in raw: + return _normalize_ops(raw.get("gfql")) + return [_normalize_op(raw)] + raise TypeError("GFQL expr must be Chain, ASTObject, list, or dict") + + return _normalize_ops(expr) + + ############################################################################### # User-friendly aliases for public API diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index 5f0873f5ec..d5e67f7f28 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -3,7 +3,7 @@ from urllib.parse import quote, unquote from graphistry.client_session import strtobool -from graphistry.compute.exceptions import GFQLValidationError +from graphistry.compute.exceptions import GFQLSyntaxError, GFQLValidationError from graphistry.models.collections import Collection, CollectionsInput from graphistry.models.types import ValidationMode, ValidationParam from graphistry.util import warn as emit_warn @@ -155,7 +155,7 @@ def _normalize_gfql_ops( """ Normalize GFQL operations to a list of JSON-serializable dicts. - Uses _wrap_gfql_expr from collections.py as the canonical implementation, + Uses normalize_gfql_to_wire from compute/ast.py as the canonical implementation, wrapping with error handling for validation modes. """ if gfql_ops is None: @@ -170,20 +170,11 @@ def _normalize_gfql_ops( _issue('GFQL chain string must be JSON', {'index': entry_index, 'error': str(exc)}, validate_mode, warn) return None - # Use canonical implementation from collections.py + # Use canonical implementation from compute/ast.py try: - from graphistry.collections import _wrap_gfql_expr - result = _wrap_gfql_expr(gfql_ops) - ops_raw = result.get('gfql', []) - if not isinstance(ops_raw, list): - _issue('GFQL chain must be a list', {'index': entry_index}, validate_mode, warn) - return None - ops: List[Dict[str, Any]] = ops_raw - if len(ops) == 0: - _issue('GFQL chain is empty', {'index': entry_index}, validate_mode, warn) - return None - return ops - except (TypeError, ValueError, GFQLValidationError) as exc: + from graphistry.compute.ast import normalize_gfql_to_wire + return normalize_gfql_to_wire(gfql_ops) + except (TypeError, ValueError, GFQLValidationError, GFQLSyntaxError) as exc: # Precise exception handling for GFQL parsing errors _issue( 'Invalid GFQL operation in collection', From 1f0c38797758a3b985ad90d8aa8aca55178de7ed Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Tue, 10 Feb 2026 15:36:29 -0800 Subject: [PATCH 28/30] feat: validate intersection DAG (cycles, self-refs, intersections-of-intersections) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Allow intersections to reference other intersections (backend supports this) - Detect self-references (intersection referencing itself) - Detect cycles in intersection dependencies (A->B->A) - Add tests for DAG validation πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- graphistry/tests/test_collections.py | 51 ++++++++++++ graphistry/validate/validate_collections.py | 88 +++++++++++++++++---- 2 files changed, 125 insertions(+), 14 deletions(-) diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index 1f94491b32..a9f66da654 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -232,3 +232,54 @@ def test_collections_autofix_generates_missing_ids(): decoded = decode_collections(url_params["collections"]) assert decoded[0]["id"] == "set-0" assert decoded[1]["id"] == "intersection-1" + + +def test_collections_intersection_of_intersections(): + # Backend supports intersections-of-intersections (DAG structure) + collections = [ + {"type": "set", "id": "set_a", "expr": [graphistry.n({"a": 1})]}, + {"type": "set", "id": "set_b", "expr": [graphistry.n({"b": 1})]}, + {"type": "intersection", "id": "inter_ab", "expr": {"type": "intersection", "sets": ["set_a", "set_b"]}}, + {"type": "intersection", "id": "inter_of_inter", "expr": {"type": "intersection", "sets": ["set_a", "inter_ab"]}}, + ] + url_params = collections_url_params(collections) + decoded = decode_collections(url_params["collections"]) + assert len(decoded) == 4 + assert decoded[3]["id"] == "inter_of_inter" + assert decoded[3]["expr"]["sets"] == ["set_a", "inter_ab"] + + +def test_collections_intersection_self_reference_rejected(): + # Intersection cannot reference itself + collections = [ + {"type": "set", "id": "set_a", "expr": [graphistry.n({"a": 1})]}, + {"type": "intersection", "id": "bad_inter", "expr": {"type": "intersection", "sets": ["set_a", "bad_inter"]}}, + ] + with pytest.raises(ValueError): + collections_url_params(collections, validate="strict") + + +def test_collections_intersection_cycle_rejected(): + # Cycles in intersection DAG are rejected + collections = [ + {"type": "set", "id": "set_a", "expr": [graphistry.n({"a": 1})]}, + {"type": "intersection", "id": "inter_a", "expr": {"type": "intersection", "sets": ["set_a", "inter_b"]}}, + {"type": "intersection", "id": "inter_b", "expr": {"type": "intersection", "sets": ["set_a", "inter_a"]}}, + ] + with pytest.raises(ValueError): + collections_url_params(collections, validate="strict") + + +def test_collections_intersection_cycle_autofix_drops(): + # In autofix mode, cyclic intersections are dropped + collections = [ + {"type": "set", "id": "set_a", "expr": [graphistry.n({"a": 1})]}, + {"type": "intersection", "id": "inter_a", "expr": {"type": "intersection", "sets": ["set_a", "inter_b"]}}, + {"type": "intersection", "id": "inter_b", "expr": {"type": "intersection", "sets": ["set_a", "inter_a"]}}, + ] + with pytest.warns(RuntimeWarning): + url_params = collections_url_params(collections, validate="autofix", warn=True) + decoded = decode_collections(url_params["collections"]) + # Both cyclic intersections dropped, only set remains + assert len(decoded) == 1 + assert decoded[0]["id"] == "set_a" diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index d5e67f7f28..b7a282cf10 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -338,35 +338,95 @@ def _validate_intersection_references( warn: bool ) -> List[Dict[str, Any]]: """ - Validate that intersection set IDs reference actual collection IDs. + Validate intersection references form a valid DAG. - Dangling references (e.g., sets: ["nonexistent"]) cause backend errors. + Checks: + 1. All referenced IDs exist as 'set' or 'intersection' collections + 2. No self-references (intersection referencing itself) + 3. No cycles (A->B->A) + + Dangling/cyclic references cause backend errors ("Infinite loop detected"). In strict mode, raise on first issue. In autofix mode, drop invalid intersections. """ - # Collect all set IDs (only from 'set' type collections, not intersections) - set_ids = { - c.get('id') - for c in collections - if c.get('type') == 'set' and c.get('id') - } + # Build ID -> collection type mapping for valid reference targets + collection_ids: Dict[str, str] = {} + for c in collections: + cid = c.get('id') + ctype = c.get('type') + if cid and ctype in ('set', 'intersection'): + collection_ids[cid] = ctype + + # Build dependency graph for cycle detection + # intersection_id -> set of IDs it references + dependencies: Dict[str, List[str]] = {} + for c in collections: + if c.get('type') == 'intersection' and c.get('id'): + expr = c.get('expr', {}) + dependencies[c['id']] = expr.get('sets', []) + + def has_cycle(start_id: str, visited: set, path: set) -> bool: + """DFS cycle detection.""" + if start_id in path: + return True + if start_id not in dependencies: + return False # It's a set, not an intersection - no further deps + if start_id in visited: + return False + + visited.add(start_id) + path.add(start_id) + + for dep_id in dependencies.get(start_id, []): + if has_cycle(dep_id, visited, path): + return True + + path.remove(start_id) + return False valid_collections: List[Dict[str, Any]] = [] for idx, collection in enumerate(collections): if collection.get('type') == 'intersection': + coll_id = collection.get('id') expr = collection.get('expr', {}) - referenced_sets = expr.get('sets', []) - missing = [sid for sid in referenced_sets if sid not in set_ids] + referenced_ids = expr.get('sets', []) + + # Check for self-reference + if coll_id and coll_id in referenced_ids: + _issue( + 'Intersection references itself', + {'index': idx, 'id': coll_id}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + continue + return [] + + # Check all referenced IDs exist and are valid types + missing = [sid for sid in referenced_ids if sid not in collection_ids] if missing: _issue( - 'Intersection references non-existent set IDs', - {'index': idx, 'missing_sets': missing, 'available_sets': list(set_ids)}, + 'Intersection references non-existent collection IDs', + {'index': idx, 'missing': missing, 'available': list(collection_ids.keys())}, validate_mode, warn ) if validate_mode == 'autofix': - continue # Drop invalid intersection - # In strict mode, we already raised in _issue + continue return [] + + # Check for cycles + if coll_id and has_cycle(coll_id, set(), set()): + _issue( + 'Intersection creates a dependency cycle', + {'index': idx, 'id': coll_id}, + validate_mode, + warn + ) + if validate_mode == 'autofix': + continue + return [] + valid_collections.append(collection) return valid_collections From 75839b24d8b03479c32b982d9a06b49f394761f6 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Tue, 10 Feb 2026 18:07:05 -0800 Subject: [PATCH 29/30] fix: catch AssertionError from AST from_json in validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AST class-level from_json methods (ASTLet, ASTRemoteGraph, ASTRef, ASTCall) use bare `assert` for required field checks. These raise AssertionError which was not caught by the validation boundary, causing raw crashes instead of proper validation errors. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- graphistry/tests/test_collections.py | 21 +++++++++++++++++++++ graphistry/validate/validate_collections.py | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/graphistry/tests/test_collections.py b/graphistry/tests/test_collections.py index a9f66da654..3e7750cb14 100644 --- a/graphistry/tests/test_collections.py +++ b/graphistry/tests/test_collections.py @@ -283,3 +283,24 @@ def test_collections_intersection_cycle_autofix_drops(): # Both cyclic intersections dropped, only set remains assert len(decoded) == 1 assert decoded[0]["id"] == "set_a" + + +def test_collections_malformed_ast_autofix_drops(): + # AST from_json uses bare asserts - these should be caught, not crash + # {"type": "Let"} missing required 'bindings' field + from graphistry.validate.validate_collections import normalize_collections + collections = [ + {"type": "set", "id": "good", "expr": [{"type": "Node"}]}, + {"type": "set", "id": "bad-let", "expr": [{"type": "Let"}]}, # missing bindings + ] + result = normalize_collections(collections, validate="autofix", warn=False) + ids = [c.get("id") for c in result] + assert "good" in ids + assert "bad-let" not in ids + + +def test_collections_malformed_ast_strict_raises(): + from graphistry.validate.validate_collections import normalize_collections + collections = [{"type": "set", "id": "bad", "expr": [{"type": "Let"}]}] + with pytest.raises(ValueError): + normalize_collections(collections, validate="strict") diff --git a/graphistry/validate/validate_collections.py b/graphistry/validate/validate_collections.py index b7a282cf10..6fc87e2643 100644 --- a/graphistry/validate/validate_collections.py +++ b/graphistry/validate/validate_collections.py @@ -174,8 +174,8 @@ def _normalize_gfql_ops( try: from graphistry.compute.ast import normalize_gfql_to_wire return normalize_gfql_to_wire(gfql_ops) - except (TypeError, ValueError, GFQLValidationError, GFQLSyntaxError) as exc: - # Precise exception handling for GFQL parsing errors + except (TypeError, ValueError, AssertionError, GFQLValidationError, GFQLSyntaxError) as exc: + # AssertionError: AST from_json methods use bare asserts for required fields _issue( 'Invalid GFQL operation in collection', {'index': entry_index, 'error': str(exc)}, From 2f12a7953149b13ee5a7ffefb4f9e993ee0cb1cc Mon Sep 17 00:00:00 2001 From: lmeyerov Date: Mon, 2 Mar 2026 19:20:22 -0800 Subject: [PATCH 30/30] refactor(ai): migrate prompts into skills and port docs to .agents (#930) * refactor(ai): migrate prompts into skills and port docs * docs(changelog): note ai skills and .agents migration * refactor(ai): make .agents skills canonical for claude --- .agents/README.md | 45 +++ .../assets/find_dynamic_imports.sh | 4 +- .../assets/generate_comment_inventory.sh | 4 +- .../assets/pysa_extract_callers.py | 4 +- {ai => .agents}/docs/gfql/README.md | 2 +- {ai => .agents}/docs/gfql/calls_checklist.md | 4 +- {ai => .agents}/docs/gfql/conformance.md | 0 {ai => .agents}/docs/gfql/oracle.md | 0 .../docs/gfql/predicates_checklist.md | 0 .agents/skills/SKILLS.md | 11 + .../skills/conventional-commits/SKILL.md | 7 +- .../skills/decomment/SKILL.md | 11 +- .../gfql-llm-guide-maintenance/SKILL.md | 9 +- .../skills/hoist-imports/SKILL.md | 13 +- .../skills/lint-types-check/SKILL.md | 5 + .../PLAN.md => .agents/skills/plan/SKILL.md | 11 +- .../skills/pyre-analysis/SKILL.md | 11 +- .gitignore | 4 + CHANGELOG.md | 3 + CLAUDE.md | 12 +- ai/README.md | 321 ------------------ 21 files changed, 134 insertions(+), 347 deletions(-) create mode 100644 .agents/README.md rename {ai => .agents}/assets/find_dynamic_imports.sh (96%) rename {ai => .agents}/assets/generate_comment_inventory.sh (94%) rename {ai => .agents}/assets/pysa_extract_callers.py (89%) rename {ai => .agents}/docs/gfql/README.md (97%) rename {ai => .agents}/docs/gfql/calls_checklist.md (96%) rename {ai => .agents}/docs/gfql/conformance.md (100%) rename {ai => .agents}/docs/gfql/oracle.md (100%) rename {ai => .agents}/docs/gfql/predicates_checklist.md (100%) create mode 100644 .agents/skills/SKILLS.md rename ai/prompts/CONVENTIONAL_COMMITS.md => .agents/skills/conventional-commits/SKILL.md (88%) rename ai/prompts/DECOMMENT.md => .agents/skills/decomment/SKILL.md (96%) rename ai/prompts/GFQL_LLM_GUIDE_MAINTENANCE.md => .agents/skills/gfql-llm-guide-maintenance/SKILL.md (97%) rename ai/prompts/HOISTIMPORTS.md => .agents/skills/hoist-imports/SKILL.md (96%) rename ai/prompts/LINT_TYPES_CHECK.md => .agents/skills/lint-types-check/SKILL.md (98%) rename ai/prompts/PLAN.md => .agents/skills/plan/SKILL.md (96%) rename ai/prompts/PYRE_ANALYSIS.md => .agents/skills/pyre-analysis/SKILL.md (81%) delete mode 100644 ai/README.md diff --git a/.agents/README.md b/.agents/README.md new file mode 100644 index 0000000000..74c7a5bf1f --- /dev/null +++ b/.agents/README.md @@ -0,0 +1,45 @@ +# .agents + +Assistant-facing support material for this repository. + +## Purpose + +- Keep reusable agent docs and helper scripts in one place. +- Keep task-oriented skill definitions in `.agents/skills/`. + +## Layout + +```text +.agents/ +β”œβ”€β”€ README.md +β”œβ”€β”€ assets/ +β”‚ β”œβ”€β”€ find_dynamic_imports.sh +β”‚ β”œβ”€β”€ generate_comment_inventory.sh +β”‚ └── pysa_extract_callers.py +└── docs/ + └── gfql/ + β”œβ”€β”€ README.md + β”œβ”€β”€ calls_checklist.md + β”œβ”€β”€ conformance.md + β”œβ”€β”€ oracle.md + └── predicates_checklist.md +``` + +## Related Paths + +- Skills: `.agents/skills/` +- Skill index: `.agents/skills/SKILLS.md` +- Plans (gitignored): `plans/` + +## Quick Commands + +```bash +# Generate dynamic import inventory for refactors +./.agents/assets/find_dynamic_imports.sh master plans//dynamic_imports.md + +# Generate comment inventory for cleanup passes +./.agents/assets/generate_comment_inventory.sh master plans//comment_inventory.md + +# Extract callers from pysa call graph +python3 .agents/assets/pysa_extract_callers.py pysa_results/call-graph.json PlotterBase.PlotterBase.bind +``` diff --git a/ai/assets/find_dynamic_imports.sh b/.agents/assets/find_dynamic_imports.sh similarity index 96% rename from ai/assets/find_dynamic_imports.sh rename to .agents/assets/find_dynamic_imports.sh index c9b1bc6fd6..8e9931400f 100755 --- a/ai/assets/find_dynamic_imports.sh +++ b/.agents/assets/find_dynamic_imports.sh @@ -1,14 +1,14 @@ #!/bin/bash # Find Dynamic Imports for HOISTIMPORTS Protocol # -# Usage: ./ai/assets/find_dynamic_imports.sh [base_branch] [output_file] +# Usage: ./.agents/assets/find_dynamic_imports.sh [base_branch] [output_file] # # This script automates Phase 1 of the HOISTIMPORTS protocol by extracting all # dynamic imports (imports inside functions/methods/conditionals) added in a PR # and formatting them with context for categorization. # # Example: -# ./ai/assets/find_dynamic_imports.sh master plans/my-feature/dynamic_imports.md +# ./.agents/assets/find_dynamic_imports.sh master plans/my-feature/dynamic_imports.md set -euo pipefail diff --git a/ai/assets/generate_comment_inventory.sh b/.agents/assets/generate_comment_inventory.sh similarity index 94% rename from ai/assets/generate_comment_inventory.sh rename to .agents/assets/generate_comment_inventory.sh index 9f90283cd6..583626b016 100755 --- a/ai/assets/generate_comment_inventory.sh +++ b/.agents/assets/generate_comment_inventory.sh @@ -1,13 +1,13 @@ #!/bin/bash # Generate Comment Inventory for DECOMMENT Protocol # -# Usage: ./ai/assets/generate_comment_inventory.sh [base_branch] [output_file] +# Usage: ./.agents/assets/generate_comment_inventory.sh [base_branch] [output_file] # # This script automates Phase 1 of the DECOMMENT protocol by extracting all # comments added in a PR and formatting them with context for categorization. # # Example: -# ./ai/assets/generate_comment_inventory.sh master plans/my-feature/comment_inventory.md +# ./.agents/assets/generate_comment_inventory.sh master plans/my-feature/comment_inventory.md set -euo pipefail diff --git a/ai/assets/pysa_extract_callers.py b/.agents/assets/pysa_extract_callers.py similarity index 89% rename from ai/assets/pysa_extract_callers.py rename to .agents/assets/pysa_extract_callers.py index ebfb8b97a4..3636282ed7 100755 --- a/ai/assets/pysa_extract_callers.py +++ b/.agents/assets/pysa_extract_callers.py @@ -2,10 +2,10 @@ """Extract caller information from Pysa call-graph.json Usage: - python3 ai/assets/pysa_extract_callers.py path/to/call-graph.json method1 method2 ... + python3 .agents/assets/pysa_extract_callers.py path/to/call-graph.json method1 method2 ... Example: - python3 ai/assets/pysa_extract_callers.py \\ + python3 .agents/assets/pysa_extract_callers.py \\ pysa_results/call-graph.json \\ PlotterBase.PlotterBase.bind \\ PlotterBase.PlotterBase.nodes diff --git a/ai/docs/gfql/README.md b/.agents/docs/gfql/README.md similarity index 97% rename from ai/docs/gfql/README.md rename to .agents/docs/gfql/README.md index 6d8ef057c3..f8c4560380 100644 --- a/ai/docs/gfql/README.md +++ b/.agents/docs/gfql/README.md @@ -8,7 +8,7 @@ Guide for AI assistants working with GFQL (Graph Frame Query Language) in PyGrap - [`calls_checklist.md`](./calls_checklist.md) β€” Required steps for exposing or updating GFQL `call()` functions. - [`predicates_checklist.md`](./predicates_checklist.md) β€” End-to-end checklist for predicate implementations. - [`conformance.md`](./conformance.md) β€” Cypher TCK conformance harness and CI wiring. -- [`../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md`](../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md) β€” Guidance for keeping AI assistants aligned with GFQL changes. +- [`../../skills/gfql-llm-guide-maintenance/SKILL.md`](../../skills/gfql-llm-guide-maintenance/SKILL.md) β€” Guidance for keeping AI assistants aligned with GFQL changes. ### Essential GFQL Operations ```python diff --git a/ai/docs/gfql/calls_checklist.md b/.agents/docs/gfql/calls_checklist.md similarity index 96% rename from ai/docs/gfql/calls_checklist.md rename to .agents/docs/gfql/calls_checklist.md index 81b5df9092..3e45821b38 100644 --- a/ai/docs/gfql/calls_checklist.md +++ b/.agents/docs/gfql/calls_checklist.md @@ -4,7 +4,7 @@ When exposing or updating GFQL `call()` functions, the change is **multi-system* runtime executors, JSON schema validators, static typing, documentation, and tests all need to stay in sync. Use this playbook (paired with [`predicates_checklist.md`](./predicates_checklist.md) and the guidance in -[`../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md`](../../prompts/GFQL_LLM_GUIDE_MAINTENANCE.md)) +[`../../skills/gfql-llm-guide-maintenance/SKILL.md`](../../skills/gfql-llm-guide-maintenance/SKILL.md)) to avoid regressions. > **Scope**: Items below are required for *every* new or modified `call()` entry. @@ -90,7 +90,7 @@ to avoid regressions. ## πŸ“Ž Optional (Promote When User-Facing) - **Marketing / blog teaser** when the call is a marquee feature. -- **LLM prompt updates** (`ai/prompts/gfql/*`) beyond baseline instructions. +- **LLM prompt updates** (`.agents/skills/gfql-llm-guide-maintenance/SKILL.md`) beyond baseline instructions. - **Customer templates** or dataset refreshes referencing the new capability. Keep this file synchronized with future GFQL evolutionsβ€”open a PR to amend when the onboarding surface changes. diff --git a/ai/docs/gfql/conformance.md b/.agents/docs/gfql/conformance.md similarity index 100% rename from ai/docs/gfql/conformance.md rename to .agents/docs/gfql/conformance.md diff --git a/ai/docs/gfql/oracle.md b/.agents/docs/gfql/oracle.md similarity index 100% rename from ai/docs/gfql/oracle.md rename to .agents/docs/gfql/oracle.md diff --git a/ai/docs/gfql/predicates_checklist.md b/.agents/docs/gfql/predicates_checklist.md similarity index 100% rename from ai/docs/gfql/predicates_checklist.md rename to .agents/docs/gfql/predicates_checklist.md diff --git a/.agents/skills/SKILLS.md b/.agents/skills/SKILLS.md new file mode 100644 index 0000000000..378aaca739 --- /dev/null +++ b/.agents/skills/SKILLS.md @@ -0,0 +1,11 @@ +# Skills Catalog + +Each skill is a folder containing `SKILL.md` with YAML frontmatter. + +- `conventional-commits` +- `decomment` +- `gfql-llm-guide-maintenance` +- `hoist-imports` +- `lint-types-check` +- `plan` +- `pyre-analysis` diff --git a/ai/prompts/CONVENTIONAL_COMMITS.md b/.agents/skills/conventional-commits/SKILL.md similarity index 88% rename from ai/prompts/CONVENTIONAL_COMMITS.md rename to .agents/skills/conventional-commits/SKILL.md index e258aec1bf..46f62b8b4a 100644 --- a/ai/prompts/CONVENTIONAL_COMMITS.md +++ b/.agents/skills/conventional-commits/SKILL.md @@ -1,6 +1,11 @@ +--- +name: conventional-commits +description: "Create atomic PyGraphistry commits with conventional commit messages and patch-based staging. Use when preparing commit sequences, selecting staged hunks, or writing commit messages." +--- + # Conventional Commits Guide for PyGraphistry -**⚠️ BEFORE STARTING: Reread [ai/prompts/PLAN.md](PLAN.md). Reuse existing `plans/*/plan.md` if one exists, else start a new one.** +**⚠️ BEFORE STARTING: Reread [the planning skill](../plan/SKILL.md). Reuse existing `plans/*/plan.md` if one exists, else start a new one.** PyGraphistry-specific commit conventions only. See PLAN.md for execution protocol. diff --git a/ai/prompts/DECOMMENT.md b/.agents/skills/decomment/SKILL.md similarity index 96% rename from ai/prompts/DECOMMENT.md rename to .agents/skills/decomment/SKILL.md index cc57db88f1..6385beae71 100644 --- a/ai/prompts/DECOMMENT.md +++ b/.agents/skills/decomment/SKILL.md @@ -1,3 +1,8 @@ +--- +name: decomment +description: "Remove low-value comments from pull requests while preserving necessary documentation. Use before review or merge when cleaning up explanatory comments in changed code." +--- + # Decomment Protocol **Purpose:** Remove unnecessary comments from PRs while preserving valuable documentation. @@ -15,7 +20,7 @@ This protocol guides the systematic removal of redundant comments from code chan ## Prerequisites -1. **Plan File**: Ensure you have a current plan file (see `ai/prompts/PLAN.md`) +1. **Plan File**: Ensure you have a current plan file (see `../plan/SKILL.md`) - If no plan exists for this work, create one following PLAN.md template - If plan exists but is stale, refresh it with current state 2. **Clean Git State**: All changes committed to feature branch @@ -39,7 +44,7 @@ This protocol guides the systematic removal of redundant comments from code chan **Commands:** ```bash # Automated inventory generation (RECOMMENDED) -./ai/assets/generate_comment_inventory.sh master plans/[task]/comment_inventory.md +./.agents/assets/generate_comment_inventory.sh master plans/[task]/comment_inventory.md # Manual alternatives: # Get base branch @@ -217,7 +222,7 @@ Phase 4: Verification Results ls plans/[current-task]/plan.md # If no plan exists: -# 1. Create plan following ai/prompts/PLAN.md template +# 1. Create plan following ../plan/SKILL.md template # 2. Document decomment work as Phase N.A: "Remove redundant comments" # If plan exists but stale: diff --git a/ai/prompts/GFQL_LLM_GUIDE_MAINTENANCE.md b/.agents/skills/gfql-llm-guide-maintenance/SKILL.md similarity index 97% rename from ai/prompts/GFQL_LLM_GUIDE_MAINTENANCE.md rename to .agents/skills/gfql-llm-guide-maintenance/SKILL.md index 7df6ff088a..adab3724e5 100644 --- a/ai/prompts/GFQL_LLM_GUIDE_MAINTENANCE.md +++ b/.agents/skills/gfql-llm-guide-maintenance/SKILL.md @@ -1,3 +1,8 @@ +--- +name: gfql-llm-guide-maintenance +description: "Maintain GFQL LLM guidance docs when call surfaces, predicates, algorithms, or common failure patterns change. Use for release updates and guide conformance checks." +--- + # LLM_GUIDE.md Maintenance Process **Purpose**: Update `LLM_GUIDE.md` when GFQL features change or user patterns emerge. @@ -13,7 +18,7 @@ 5. **LLMs generating invalid JSON** β†’ [LLM Failures](#llm-failures) 6. **Multiple changes** β†’ [Multi-Change Releases](#multi-change-releases) -**See also**: the end-to-end checklist in `ai/docs/gfql/calls_checklist.md` +**See also**: the end-to-end checklist in `.agents/docs/gfql/calls_checklist.md` whenever a `call()` surface changes. ## Update Triggers @@ -97,7 +102,7 @@ grep "new_fn" graphistry/tests/compute/ -rn ``` **Update**: `## Graph Algorithms` (add example) + `## Call Functions` (list) -- Cross-check `ai/docs/gfql/calls_checklist.md` for required code/tests/doc updates +- Cross-check `.agents/docs/gfql/calls_checklist.md` for required code/tests/doc updates **Format**: ```markdown diff --git a/ai/prompts/HOISTIMPORTS.md b/.agents/skills/hoist-imports/SKILL.md similarity index 96% rename from ai/prompts/HOISTIMPORTS.md rename to .agents/skills/hoist-imports/SKILL.md index d6a15ea489..8543240892 100644 --- a/ai/prompts/HOISTIMPORTS.md +++ b/.agents/skills/hoist-imports/SKILL.md @@ -1,10 +1,15 @@ +--- +name: hoist-imports +description: "Refactor dynamic imports to top-level imports with an inventory, categorization, and verification workflow. Use during cleanup or review-prep when import placement needs standardization." +--- + # Hoist Imports Protocol **Purpose:** Move dynamic imports to top-level unless there's a documented reason not to. ## Quick Reference (60-second version) -1. **Identify**: Run `./ai/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md` +1. **Identify**: Run `./.agents/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md` 2. **Categorize**: Review each as HOIST (move to top) or KEEP (document reason) 3. **Hoist**: Move imports to top-level following PEP 8 section ordering 4. **Verify**: Run tests, confirm no circular imports or heavy load issues @@ -27,7 +32,7 @@ This protocol guides the systematic refactoring of dynamic imports (imports insi ## Prerequisites -1. **Plan File**: Ensure you have a current plan file (see `ai/prompts/PLAN.md`) +1. **Plan File**: Ensure you have a current plan file (see `../plan/SKILL.md`) 2. **Clean Git State**: All changes committed to feature branch 3. **PR Context**: Know which branch the PR will land into (usually `master` or `main`) @@ -50,7 +55,7 @@ This protocol guides the systematic refactoring of dynamic imports (imports insi **Commands:** ```bash # Automated inventory generation (RECOMMENDED) -./ai/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md +./.agents/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md # Manual alternatives (if automation script unavailable): # Get base branch @@ -324,7 +329,7 @@ Add this phase to your plan: **Tool Calls:** ```bash -./ai/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md +./.agents/assets/find_dynamic_imports.sh master plans/[task]/dynamic_imports.md # [edit commands] git commit -m "refactor: Hoist dynamic imports to top-level" git push diff --git a/ai/prompts/LINT_TYPES_CHECK.md b/.agents/skills/lint-types-check/SKILL.md similarity index 98% rename from ai/prompts/LINT_TYPES_CHECK.md rename to .agents/skills/lint-types-check/SKILL.md index 08df8a222d..be2a31f82a 100644 --- a/ai/prompts/LINT_TYPES_CHECK.md +++ b/.agents/skills/lint-types-check/SKILL.md @@ -1,3 +1,8 @@ +--- +name: lint-types-check +description: "Run iterative lint and typecheck remediation for PyGraphistry. Use when fixing flake8/mypy issues and driving a branch to clean validation results." +--- + # Lint and Type Check Template for PyGraphistry diff --git a/ai/prompts/PLAN.md b/.agents/skills/plan/SKILL.md similarity index 96% rename from ai/prompts/PLAN.md rename to .agents/skills/plan/SKILL.md index 57a02a19e2..e92d26623c 100644 --- a/ai/prompts/PLAN.md +++ b/.agents/skills/plan/SKILL.md @@ -1,3 +1,8 @@ +--- +name: plan +description: "Create and maintain a resumable phased execution plan for complex tasks. Use when work spans multiple steps or sessions and requires strict status tracking." +--- + # Task Plan Template