From 4a51bd4eea2264423ebba96de6c8bec2ed1157b6 Mon Sep 17 00:00:00 2001 From: Surajit Dasgupta Date: Wed, 25 Feb 2026 12:52:50 +0530 Subject: [PATCH] feat: build workflow from dict, add lib versioning --- .github/workflows/ci.yml | 32 +++++++++ .github/workflows/publish.yml | 126 +++++++++++++++++++++++++++------- .pre-commit-config.yaml | 7 ++ Makefile | 25 ++++++- pyproject.toml | 8 ++- sygra/__init__.py | 2 +- sygra/configuration/loader.py | 21 ++++-- sygra/workflow/__init__.py | 35 +++++++++- uv.lock | 1 - 9 files changed, 224 insertions(+), 33 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f05b322e..9ad7f23a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,6 +3,7 @@ name: Run CI on: push: branches: [ main ] + tags: [ "v*" ] pull_request: branches: [ main ] types: [ opened, synchronize, reopened, ready_for_review ] @@ -75,6 +76,37 @@ jobs: - name: Setup dev environment run: make setup-dev + # ---- VERSION CHECK ---- + - name: Validate version format + run: | + VERSION=$(python3 -c "import re; m=re.search(r'__version__\s*=\s*\"(\d+\.\d+\.\d+(?:\.(?:post|dev)\d+|(?:a|b|rc)\d+)?)\"', open('sygra/__init__.py').read()); print(m.group(1) if m else 'INVALID')") + if [ "$VERSION" = "INVALID" ]; then + echo "[ERROR] __version__ in sygra/__init__.py is not valid PEP 440 (e.g. X.Y.Z, X.Y.Z.postN)" + exit 1 + fi + echo "[SUCCESS] Version: $VERSION" + + - name: Validate tag matches __version__ (tagged builds only) + if: startsWith(github.ref, 'refs/tags/v') + run: | + TAG_VERSION="${GITHUB_REF#refs/tags/v}" + CODE_VERSION=$(python3 -c "import re; print(re.search(r'__version__\s*=\s*\"(.+?)\"', open('sygra/__init__.py').read()).group(1))") + if [ "$TAG_VERSION" != "$CODE_VERSION" ]; then + echo "[WARNING] Tag version ($TAG_VERSION) != __version__ ($CODE_VERSION) in sygra/__init__.py" + echo "" + echo " If you used 'make bump-version' + 'git tag' (CLI flow):" + echo " Run: make bump-version V=$TAG_VERSION" + echo " Then re-tag and push." + echo "" + echo " If you created a Release via GitHub UI:" + echo " This is expected — publish.yml patches the version at build time." + echo " The publish will succeed; this CI check is informational only." + echo "" + echo " To avoid this warning, run 'make bump-version V=X.Y.Z' before tagging." + exit 1 + fi + echo "[SUCCESS] Tag v$TAG_VERSION matches __version__" + # ---- FORMAT ---- - name: Run formatter run: make check-format diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index f51df7dc..4dd85790 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -3,7 +3,13 @@ name: Publish to PyPI on: push: tags: - - "v*" # Triggers when a new GitHub Tag is published eg: v1.2.3 + - "v*" # Triggers on tag push (e.g. git push origin --tags) + release: + types: [ published ] # Triggers on GitHub UI Release creation + +concurrency: + group: publish-${{ github.ref }} + cancel-in-progress: false jobs: publish: @@ -27,37 +33,111 @@ jobs: - name: Install dependencies run: make setup-dev - - name: Install versioning deps + # ---- VERSION EXTRACTION (handles push-tag + release events, with/without v prefix) ---- + - name: Extract and validate version from tag run: | - python -m pip install --upgrade pip - pip install tomlkit + # Get tag name — works for both push (refs/tags/v1.2.3) and release events + if [ "${{ github.event_name }}" = "release" ]; then + TAG="${{ github.event.release.tag_name }}" + else + TAG="${GITHUB_REF#refs/tags/}" + fi + + # Strip optional 'v' prefix: v2.1.0 → 2.1.0, 2.1.0 → 2.1.0 + VERSION="${TAG#v}" + + # Validate PEP 440 format + python3 -c " + import re, sys + v = '$VERSION' + if not re.fullmatch(r'\d+\.\d+\.\d+([.](post|dev)\d+|(a|b|rc)\d+)?', v): + print(f'❌ Tag \'{TAG}\' does not contain a valid PEP 440 version (extracted: \'{v}\')') + print(' Expected formats: X.Y.Z, X.Y.Z.postN, X.Y.Z.devN, X.Y.ZaN, X.Y.ZbN, X.Y.ZrcN') + sys.exit(1) + " + + # Export for all subsequent steps + echo "VERSION=$VERSION" >> $GITHUB_ENV + echo "✅ Extracted version: $VERSION (from tag: $TAG, event: ${{ github.event_name }})" - - name: Set version from GitHub tag + # ---- PRE-FLIGHT: Check version is not already burnt on PyPI ---- + - name: Check version is available on PyPI run: | - # Extract tag like "v1.2.3" → "1.2.3" - export VERSION="${GITHUB_REF#refs/tags/v}" - echo "Setting [project].version to $VERSION" - python - << 'PY' - from pathlib import Path - from tomlkit import parse, dumps - import os - version = os.environ["VERSION"] - p = Path('pyproject.toml') - doc = parse(p.read_text(encoding='utf-8')) - # Update PEP 621 version - if 'project' in doc: - doc['project']['version'] = version - p.write_text(dumps(doc), encoding='utf-8') - PY + python3 -c " + import urllib.request, urllib.error, sys + try: + urllib.request.urlopen('https://pypi.org/pypi/sygra/${{ env.VERSION }}/json') + print('❌ Version ${{ env.VERSION }} already exists on PyPI — this version is burnt.') + print(' Options:') + print(' • Use a .postN suffix: v${{ env.VERSION }}.post1') + print(' • Bump to the next version: make bump-version V=X.Y.Z') + sys.exit(1) + except urllib.error.HTTPError as e: + if e.code == 404: + print('✅ Version ${{ env.VERSION }} is available on PyPI') + sys.exit(0) + print(f'⚠️ PyPI check returned HTTP {e.code} — proceeding anyway') + except Exception as e: + print(f'⚠️ PyPI check failed ({e}) — proceeding anyway') + " + + # ---- PATCH VERSION ---- + - name: Set version in source files + run: | + echo "Setting __version__ to $VERSION" + + # Patch sygra/__init__.py (hatchling reads version from here) + python3 -c " + import re, pathlib + p = pathlib.Path('sygra/__init__.py') + p.write_text(re.sub(r'^__version__ = \".*\"', '__version__ = \"$VERSION\"', p.read_text(), count=1, flags=re.MULTILINE)) + " + # Patch [tool.poetry] version so Poetry stays consistent + python3 -c " + import re, pathlib + p = pathlib.Path('pyproject.toml') + p.write_text(re.sub(r'(\[tool\.poetry\]\nversion\s*=\s*)\"[^\"]*\"', r'\g<1>\"$VERSION\"', p.read_text(), count=1)) + " + + # Verify + grep '__version__' sygra/__init__.py + echo "✅ Version set to $VERSION" + + - name: Validate version consistency + run: | + BUILT_VERSION=$(python3 -c "import re; m=re.search(r'__version__\s*=\s*\"(.+?)\"', open('sygra/__init__.py').read()); print(m.group(1))") + if [ "$VERSION" != "$BUILT_VERSION" ]; then + echo "❌ Version mismatch: tag=$VERSION, __init__.py=$BUILT_VERSION" + exit 1 + fi + echo "✅ Version validated: $VERSION" + + # ---- BUILD ---- - name: Build package run: make build + - name: Validate built artifacts + run: | + ls dist/ + if ! ls dist/sygra-${VERSION}-*.whl 1>/dev/null 2>&1; then + echo "❌ No wheel found for version $VERSION in dist/" + ls dist/ + exit 1 + fi + if ! ls dist/sygra-${VERSION}.tar.gz 1>/dev/null 2>&1; then + echo "❌ No sdist found for version $VERSION in dist/" + ls dist/ + exit 1 + fi + echo "✅ Built artifacts verified for version $VERSION" + + # ---- PUBLISH ---- - name: Publish to PyPI env: TWINE_USERNAME: __token__ TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} run: | - python -m pip install --upgrade pip - pip install twine - python -m twine upload --repository pypi dist/* --verbose + python3 -m pip install --upgrade pip + python3 -m pip install twine + python3 -m twine upload --repository pypi dist/* --verbose diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b536060d..70c56fb2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -61,6 +61,13 @@ repos: - repo: local hooks: + - id: version-check + name: Validate __version__ is valid PEP 440 + entry: python3 -c "import re, sys; content=open('sygra/__init__.py').read(); m=re.search(r'__version__\s*=\s*\"(\d+\.\d+\.\d+(?:\.(?:post|dev)\d+|(?:a|b|rc)\d+)?)\"', content); sys.exit(0) if m else (print('[ERROR] Invalid __version__ in sygra/__init__.py. Must be PEP 440 (e.g. X.Y.Z, X.Y.Z.postN)') or sys.exit(1))" + language: system + files: ^sygra/__init__\.py$ + pass_filenames: false + - id: pytest name: Run tests with pytest entry: uv run pytest -q tests diff --git a/Makefile b/Makefile index d69dba09..2b896a84 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ studio-build: ## Build the Studio frontend (only if not already built) echo "📦 Building Studio frontend..."; \ cd $(STUDIO_FRONTEND_DIR) && npm install && npm run build; \ else \ - echo "✅ Studio frontend already built. Use 'make studio-rebuild' to force rebuild."; \ + echo "[SUCCESS] Studio frontend already built. Use 'make studio-rebuild' to force rebuild."; \ fi .PHONY: studio-rebuild @@ -112,6 +112,29 @@ docs-serve: ## Serve documentation locally # BUILDING & PUBLISHING ######################################################################################################################## +.PHONY: version +version: ## Show current version + @python3 -c "import re; m=re.search(r'__version__\s*=\s*\"(.+?)\"', open('sygra/__init__.py').read()); print(m.group(1))" + +.PHONY: bump-version +bump-version: ## Bump version: make bump-version V=2.1.0 (or V=2.1.0.post1) + @if [ -z "$(V)" ]; then \ + echo "[ERROR] Usage: make bump-version V=X.Y.Z[.postN]"; \ + exit 1; \ + fi + @if ! echo "$(V)" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+(\.(post|dev)[0-9]+|(a|b|rc)[0-9]+)?$$'; then \ + echo "[ERROR] Invalid version: $(V). Must be PEP 440 (e.g. X.Y.Z, X.Y.Z.postN)"; \ + exit 1; \ + fi + @python3 -c "import re, pathlib; p=pathlib.Path('sygra/__init__.py'); p.write_text(re.sub(r'^__version__ = \".*\"', '__version__ = \"$(V)\"', p.read_text(), count=1, flags=re.MULTILINE))" + @python3 -c "import re, pathlib; p=pathlib.Path('pyproject.toml'); p.write_text(re.sub(r'(\[tool\.poetry\]\nversion\s*=\s*)\"[^\"]*\"', r'\1\"$(V)\"', p.read_text(), count=1))" + @echo "[SUCCESS] Version bumped to $(V) in sygra/__init__.py and pyproject.toml" + @echo " Next steps:" + @echo " 1. git add sygra/__init__.py pyproject.toml" + @echo " 2. git commit -m 'Bump version to $(V)'" + @echo " 3. git tag v$(V)" + @echo " 4. git push origin main --tags" + .PHONY: build build: ## Build package $(UV) run $(PYTHON) -m build diff --git a/pyproject.toml b/pyproject.toml index c707ccf4..3d6d6c70 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "sygra" -version = "1.0.0" +dynamic = ["version"] description = "Graph-oriented Synthetic data generation Pipeline library" readme = "README.md" requires-python = ">=3.9,<3.12,!=3.9.7" @@ -90,6 +90,9 @@ Releases = "https://github.com/ServiceNow/SyGra/releases" Issues = "https://github.com/ServiceNow/SyGra/issues" Discussions = "https://github.com/ServiceNow/SyGra/discussions" +[tool.hatch.version] +path = "sygra/__init__.py" + [tool.hatch.build.targets.wheel] packages = ["sygra", "studio"] include = [ @@ -156,5 +159,8 @@ module = ["tests.*"] disallow_untyped_defs = false check_untyped_defs = false +[tool.poetry] +version = "2.0.0.post1" + [tool.poetry.group.dev.dependencies] uvicorn = "^0.38.0" diff --git a/sygra/__init__.py b/sygra/__init__.py index d509182e..71a1fa38 100644 --- a/sygra/__init__.py +++ b/sygra/__init__.py @@ -122,7 +122,7 @@ DATA_UTILS_AVAILABLE = False -__version__ = "1.0.0" +__version__ = "2.0.0.post1" __author__ = "SyGra Team" __description__ = "Graph-oriented Synthetic data generation Pipeline library" diff --git a/sygra/configuration/loader.py b/sygra/configuration/loader.py index 0b15b856..8a19e650 100644 --- a/sygra/configuration/loader.py +++ b/sygra/configuration/loader.py @@ -1,14 +1,19 @@ +from __future__ import annotations + import os from pathlib import Path -from typing import Any, Union +from typing import TYPE_CHECKING, Any, Union import yaml +if TYPE_CHECKING: + from sygra.workflow import Workflow + try: from sygra.core.dataset.dataset_config import DataSourceConfig, OutputConfig # noqa: F401 from sygra.core.graph.graph_config import GraphConfig # noqa: F401 from sygra.utils import utils - from sygra.workflow import AutoNestedDict + from sygra.workflow import AutoNestedDict # noqa: F401 UTILS_AVAILABLE = True except ImportError: @@ -42,12 +47,12 @@ def load(self, config_path: Union[str, Path, dict[str, Any]]) -> dict[str, Any]: return config - def load_and_create(self, config_path: Union[str, Path, dict[str, Any]]): + def load_and_create(self, config_path: Union[str, Path, dict[str, Any]]) -> Workflow: """Load config and create appropriate Workflow or Graph object.""" config = self.load(config_path) # Import here to avoid circular imports - from ..workflow import Workflow + from ..workflow import AutoNestedDict, Workflow workflow = Workflow() workflow._config = AutoNestedDict.convert_dict(config) @@ -60,8 +65,16 @@ def load_and_create(self, config_path: Union[str, Path, dict[str, Any]]): if isinstance(config_path, (str, Path)): workflow.name = Path(config_path).parent.name + workflow._is_existing_task = True else: workflow.name = config.get("task_name", "loaded_workflow") + # Mark as existing task if config has nodes defined + if config.get("graph_config", {}).get("nodes"): + workflow._is_existing_task = True + + # Track node count from loaded config + if "graph_config" in config and "nodes" in config["graph_config"]: + workflow._node_counter = len(config["graph_config"]["nodes"]) return workflow diff --git a/sygra/workflow/__init__.py b/sygra/workflow/__init__.py index 7dfdaecb..b47a8c8a 100644 --- a/sygra/workflow/__init__.py +++ b/sygra/workflow/__init__.py @@ -192,6 +192,38 @@ def __init__(self, name: Optional[str] = None): self._load_existing_config_if_present() + @classmethod + def from_config(cls, config: Union[str, Path, dict[str, Any]]) -> "Workflow": + """ + Create a Workflow from a configuration dictionary or YAML file path. + + Args: + config: A dictionary containing the full workflow configuration, + or a path to a YAML configuration file. + + Returns: + Workflow: A configured Workflow instance ready for execution. + + Examples: + # From dictionary + >>> config = { + ... "data_config": {"source": {"type": "disk", "file_path": "data.json", "file_format": "json"}}, + ... "graph_config": { + ... "nodes": {"llm_1": {"node_type": "llm", "model": {"name": "gpt-4o"}, "prompt": [{"user": "Hello {text}"}]}}, + ... "edges": [{"from": "START", "to": "llm_1"}, {"from": "llm_1", "to": "END"}] + ... } + ... } + >>> workflow = Workflow.from_config(config) + >>> workflow.run(num_records=1) + + # From YAML file + >>> workflow = Workflow.from_config("tasks/examples/text_to_speech/graph_config.yaml") + """ + from sygra.configuration import ConfigLoader + + loader = ConfigLoader() + return loader.load_and_create(config) + def _load_existing_config_if_present(self): """Load existing task configuration if this appears to be a task path.""" if self.name and (os.path.exists(self.name) or "/" in self.name or "\\" in self.name): @@ -863,8 +895,7 @@ def _execute_existing_task( if kwargs.get("quality_only", False): executor = JudgeQualityTaskExecutor(args, kwargs.get("quality_config")) else: - executor = DefaultTaskExecutor(args) - BaseTaskExecutor.__init__(executor, args, modified_config) + executor = DefaultTaskExecutor(args, modified_config) result = executor.execute() logger.info(f"Successfully executed task: {task_name}") diff --git a/uv.lock b/uv.lock index f90bf275..79a0a8fa 100644 --- a/uv.lock +++ b/uv.lock @@ -4730,7 +4730,6 @@ wheels = [ [[package]] name = "sygra" -version = "1.0.0" source = { editable = "." } dependencies = [ { name = "aiohttp" },