diff --git a/acceptance/bin/print_requests.py b/acceptance/bin/print_requests.py index 64479b79a5a..cdee3f9381f 100755 --- a/acceptance/bin/print_requests.py +++ b/acceptance/bin/print_requests.py @@ -7,10 +7,11 @@ If argument starts with ! then it's a negation filter. Examples: - print_requests.py //jobs # Show non-GET requests with /jobs in path - print_requests.py --get //jobs # Show all requests with /jobs in path - print_requests.py --sort '^//import-file/' # Show non-GET requests, exclude /import-file/, sort output - print_requests.py --keep //jobs # Show requests and do not delete out.requests.json afterwards + print_requests.py //jobs # Show non-GET requests with /jobs in path + print_requests.py --get //jobs # Show all requests with /jobs in path + print_requests.py --sort '^//import-file/' # Show non-GET requests, exclude /import-file/, sort output + print_requests.py --keep //jobs # Show requests and do not delete out.requests.json afterwards + print_requests.py //api/2.0/repos/snapshots --method DELETE # Show only DELETE to that path This replaces custom jq wrappers like: jq --sort-keys 'select(.method != "GET" and (.path | contains("/jobs")))' < out.requests.txt @@ -123,7 +124,7 @@ def read_json_many(s): assert result == [{"method": "GET"}, {"method": "POST"}], result -def filter_requests(requests, path_filters, include_get, should_sort, unique=False): +def filter_requests(requests, path_filters, include_get, should_sort, unique=False, method_filter=None): """Filter requests based on method and path filters.""" positive_filters = [] negative_filters = [] @@ -138,8 +139,12 @@ def filter_requests(requests, path_filters, include_get, should_sort, unique=Fal filtered_requests = [] for req in requests: - # Skip GET requests unless include_get is True - if req.get("method") == "GET" and not include_get: + if method_filter: + # --method overrides the default GET exclusion + if req.get("method") != method_filter: + continue + elif req.get("method") == "GET" and not include_get: + # Skip GET requests unless include_get is True continue # Apply path filters @@ -186,6 +191,7 @@ def main(): action="store_true", help="Collapse consecutive duplicate requests (like uniq), e.g. repeated GET polls", ) + parser.add_argument("--method", metavar="METHOD", help="Only show requests with this HTTP method (e.g. DELETE)") parser.add_argument("--oneline", action="store_true", help="Print output with one request per line") parser.add_argument( "--del-body", @@ -217,7 +223,7 @@ def main(): return requests = read_json_many(data) - filtered_requests = filter_requests(requests, args.path_filters, args.get, args.sort, args.unique) + filtered_requests = filter_requests(requests, args.path_filters, args.get, args.sort, args.unique, args.method) for req in filtered_requests: body = req.get("body") diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl new file mode 100644 index 00000000000..6c41e997af6 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/databricks.yml.tmpl @@ -0,0 +1,26 @@ +bundle: + name: test-bundle-immutable-no-artifacts-$UNIQUE_NAME + +experimental: + immutable_folder: true + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: spark_python_task + spark_python_task: + python_file: ./src/main.py + environment_key: env + - task_key: notebook_task + notebook_task: + notebook_path: ./src/notebook.py + base_parameters: + path: ${workspace.file_path}/some_path + + + environments: + - environment_key: env + spec: + environment_version: "4" diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml new file mode 100644 index 00000000000..e90b6d5d1ba --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/output.txt b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt new file mode 100644 index 00000000000..44360ad4ea6 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/output.txt @@ -0,0 +1,33 @@ + +>>> [CLI] bundle validate +Name: test-bundle-immutable-no-artifacts-[UNIQUE_NAME] +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default + +Validation OK! + +>>> [CLI] bundle deploy +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/src/main.py" + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/src/notebook" + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/some_path" + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-no-artifacts-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/script b/acceptance/bundle/deploy/immutable-no-artifacts/script new file mode 100644 index 00000000000..a7aad718750 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/script @@ -0,0 +1,18 @@ +envsubst < databricks.yml.tmpl > databricks.yml + +cleanup() { + rm -f out.requests.txt +} +trap cleanup EXIT + +trace $CLI bundle validate +trace $CLI bundle deploy + + +# Get a job and check that task paths point into the snapshot +JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.base_parameters.path' + +trace $CLI bundle destroy --auto-approve diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py b/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py new file mode 100644 index 00000000000..6c285f7e2f5 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/src/main.py @@ -0,0 +1 @@ +print("Hello from Spark Python Task!") diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py b/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py new file mode 100644 index 00000000000..fb3c9597fbf --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/src/notebook.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello from Notebook Task!") diff --git a/acceptance/bundle/deploy/immutable-no-artifacts/test.toml b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml new file mode 100644 index 00000000000..9b345bc3395 --- /dev/null +++ b/acceptance/bundle/deploy/immutable-no-artifacts/test.toml @@ -0,0 +1,20 @@ +Local = true +Cloud = false # Temporary disable cloud tests until the API is fully available +RecordRequests = true + +# immutable_folder only works with the direct engine. +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] + +Ignore = [ + "databricks.yml", + ".databricks", + ".venv", + "script", + "*.pyc", +] + +# Normalize the content-addressed snapshot hash so it doesn't need to be +# hardcoded in output.txt and the test stays stable across file changes. +[[Repls]] +Old = '[0-9a-f]{64}' +New = '[SNAPSHOT_HASH]' diff --git a/acceptance/bundle/deploy/immutable/databricks.yml.tmpl b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl new file mode 100644 index 00000000000..407a1bbbe6f --- /dev/null +++ b/acceptance/bundle/deploy/immutable/databricks.yml.tmpl @@ -0,0 +1,34 @@ +bundle: + name: test-bundle-immutable-$UNIQUE_NAME + +experimental: + immutable_folder: true + +artifacts: + python_artifact: + type: whl + build: uv build --wheel + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: spark_python_task + spark_python_task: + python_file: ./src/main.py + environment_key: env + - task_key: notebook_task + notebook_task: + notebook_path: ./src/notebook.py + - task_key: python_wheel_task + python_wheel_task: + package_name: immutable + entry_point: main + environment_key: env + environments: + - environment_key: env + spec: + environment_version: "4" + dependencies: + - ./dist/*.whl diff --git a/acceptance/bundle/deploy/immutable/out.test.toml b/acceptance/bundle/deploy/immutable/out.test.toml new file mode 100644 index 00000000000..e90b6d5d1ba --- /dev/null +++ b/acceptance/bundle/deploy/immutable/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/deploy/immutable/output.txt b/acceptance/bundle/deploy/immutable/output.txt new file mode 100644 index 00000000000..211282542bd --- /dev/null +++ b/acceptance/bundle/deploy/immutable/output.txt @@ -0,0 +1,62 @@ + +>>> [CLI] bundle validate +Name: test-bundle-immutable-[UNIQUE_NAME] +Target: default +Workspace: + User: [USERNAME] + Path: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default + +Validation OK! + +>>> [CLI] bundle plan -o json +Building python_artifact... +[ + { + "notebook_task": { + "notebook_path": "${workspace.snapshot_path}/src/files/src/notebook" + }, + "task_key": "notebook_task" + }, + { + "environment_key": "env", + "python_wheel_task": { + "entry_point": "main", + "package_name": "immutable" + }, + "task_key": "python_wheel_task" + }, + { + "environment_key": "env", + "spark_python_task": { + "python_file": "${workspace.snapshot_path}/src/files/src/main.py" + }, + "task_key": "spark_python_task" + } +] + +>>> [CLI] bundle deploy +Building python_artifact... +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/src/main.py" + +>>> [CLI] jobs get [NUMID] +"/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/files/src/notebook" + +>>> [CLI] jobs get [NUMID] +[ + "/Workspace/Users/[UUID]/.snapshots/[UUID]/[SNAPSHOT_HASH]/src/artifacts/.internal/immutable-0.0.1-py3-none-any.whl" +] + +>>> [CLI] bundle destroy --auto-approve +The following resources will be deleted: + delete resources.jobs.my_job + +All files and directories at the following location will be deleted: /Workspace/Users/[USERNAME]/.bundle/test-bundle-immutable-[UNIQUE_NAME]/default + +Deleting files... +Destroy complete! diff --git a/acceptance/bundle/deploy/immutable/pyproject.toml b/acceptance/bundle/deploy/immutable/pyproject.toml new file mode 100644 index 00000000000..4e796c3f93a --- /dev/null +++ b/acceptance/bundle/deploy/immutable/pyproject.toml @@ -0,0 +1,34 @@ +[project] +name = "immutable" +version = "0.0.1" +authors = [{ name = "andrew.nester@databricks.com" }] +requires-python = ">=3.10,<3.13" +dependencies = [ + # Any dependencies for jobs and pipelines in this project can be added here + # See also https://docs.databricks.com/dev-tools/bundles/library-dependencies + # + # LIMITATION: for pipelines, dependencies are cached during development; + # add dependencies to the 'environment' section of your pipeline.yml file instead +] + +[dependency-groups] +dev = [ + "pytest", + "ruff", + "databricks-dlt", + "databricks-connect>=15.4,<15.5", + "ipykernel", +] + +[project.scripts] +main = "immutable.main:main" + +[build-system] +requires = ["setuptools>=40.8.0"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.ruff] +line-length = 120 diff --git a/acceptance/bundle/deploy/immutable/script b/acceptance/bundle/deploy/immutable/script new file mode 100644 index 00000000000..85e9a908d1d --- /dev/null +++ b/acceptance/bundle/deploy/immutable/script @@ -0,0 +1,19 @@ +envsubst < databricks.yml.tmpl > databricks.yml +cleanup() { + trace $CLI bundle destroy --auto-approve +} +trap cleanup EXIT + +trace $CLI bundle validate +trace $CLI bundle plan -o json | jq '.plan["resources.jobs.my_job"].new_state.value.tasks' +trace $CLI bundle deploy + + +# Get a job and check that task paths are immutable +JOB_ID=$($CLI bundle summary -o json | jq -r '.resources.jobs.my_job.id') +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.spark_python_task != null) | .spark_python_task.python_file' +trace $CLI jobs get $JOB_ID | jq '.settings.tasks' | jq '.[] | select(.notebook_task != null) | .notebook_task.notebook_path' +trace $CLI jobs get $JOB_ID | jq '.settings.environments[0].spec.dependencies' + +# Redirect run output to a log file — the real workspace produces different output than the local test server. +$CLI bundle run my_job &> LOG.run diff --git a/acceptance/bundle/deploy/immutable/src/immutable/__init__.py b/acceptance/bundle/deploy/immutable/src/immutable/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/acceptance/bundle/deploy/immutable/src/immutable/main.py b/acceptance/bundle/deploy/immutable/src/immutable/main.py new file mode 100644 index 00000000000..9eccd00150d --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/immutable/main.py @@ -0,0 +1,6 @@ +def main(): + print("Hello from Python Wheel Task!") + + +if __name__ == "__main__": + main() diff --git a/acceptance/bundle/deploy/immutable/src/main.py b/acceptance/bundle/deploy/immutable/src/main.py new file mode 100644 index 00000000000..6c285f7e2f5 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/main.py @@ -0,0 +1 @@ +print("Hello from Spark Python Task!") diff --git a/acceptance/bundle/deploy/immutable/src/notebook.py b/acceptance/bundle/deploy/immutable/src/notebook.py new file mode 100644 index 00000000000..fb3c9597fbf --- /dev/null +++ b/acceptance/bundle/deploy/immutable/src/notebook.py @@ -0,0 +1,3 @@ +# Databricks notebook source + +print("Hello from Notebook Task!") diff --git a/acceptance/bundle/deploy/immutable/test.toml b/acceptance/bundle/deploy/immutable/test.toml new file mode 100644 index 00000000000..45dffe2d2d7 --- /dev/null +++ b/acceptance/bundle/deploy/immutable/test.toml @@ -0,0 +1,21 @@ +Local = true +Cloud = false # Temporary disable cloud tests until the API is fully available + +# immutable_folder only works with the direct engine. +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] + +Ignore = [ + "dist", + "build", + "databricks.yml", + ".databricks", + ".venv", + "script", + "*.pyc", + "src/*.egg-info", +] + +[[Repls]] +# Replace snapshot hash with SNAPSHOT_HASH +Old = "[0-9a-f]{64}" +New = "[SNAPSHOT_HASH]" diff --git a/acceptance/bundle/resources/apps/immutable/app/app.py b/acceptance/bundle/resources/apps/immutable/app/app.py new file mode 100644 index 00000000000..184b9c5c592 --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/app/app.py @@ -0,0 +1,3 @@ +import streamlit as st + +st.write("hello") diff --git a/acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl b/acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl new file mode 100644 index 00000000000..6b9b696ef01 --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/databricks.yml.tmpl @@ -0,0 +1,11 @@ +bundle: + name: test-bundle-immutable-app-$UNIQUE_NAME + +experimental: + immutable_folder: true + +resources: + apps: + my_app: + name: my-immutable-app + source_code_path: ./app diff --git a/acceptance/bundle/resources/apps/immutable/out.test.toml b/acceptance/bundle/resources/apps/immutable/out.test.toml new file mode 100644 index 00000000000..e90b6d5d1ba --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] diff --git a/acceptance/bundle/resources/apps/immutable/output.txt b/acceptance/bundle/resources/apps/immutable/output.txt new file mode 100644 index 00000000000..6ba6818e2cc --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/output.txt @@ -0,0 +1,42 @@ + +>>> [CLI] bundle deploy +Uploading immutable bundle snapshot... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle run my_app +✓ Getting the status of the app my-immutable-app +✓ App is in RUNNING state +✓ App compute is in STOPPED state +✓ Starting the app my-immutable-app +✓ App is starting... +✓ App is started! +✓ Deployment succeeded +You can access the app at my-immutable-app-123.cloud.databricksapps.com + +>>> print_requests.py //apps +{ + "method": "POST", + "path": "/api/2.0/apps", + "q": { + "no_compute": "true" + }, + "body": { + "description": "", + "name": "my-immutable-app" + } +} +{ + "method": "POST", + "path": "/api/2.0/apps/my-immutable-app/start", + "body": {} +} +{ + "method": "POST", + "path": "/api/2.0/apps/my-immutable-app/deployments", + "body": { + "mode": "SNAPSHOT", + "source_code_path": "${workspace.snapshot_path}/src/files/app" + } +} diff --git a/acceptance/bundle/resources/apps/immutable/script b/acceptance/bundle/resources/apps/immutable/script new file mode 100644 index 00000000000..968143b2ef5 --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/script @@ -0,0 +1,13 @@ +envsubst < databricks.yml.tmpl > databricks.yml + +cleanup() { + rm -f out.requests.txt +} +trap cleanup EXIT + +trace $CLI bundle deploy +trace $CLI bundle run my_app + +# Print the app requests to verify that source_code_path in the deployment +# points to the content-addressed snapshot path rather than a local path. +trace print_requests.py //apps diff --git a/acceptance/bundle/resources/apps/immutable/test.toml b/acceptance/bundle/resources/apps/immutable/test.toml new file mode 100644 index 00000000000..005616c8df6 --- /dev/null +++ b/acceptance/bundle/resources/apps/immutable/test.toml @@ -0,0 +1,20 @@ +Local = true +Cloud = false # Temporary disable cloud tests until the API is fully available +RecordRequests = true + +# immutable_folder only works with the direct engine. +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["direct"] + +Ignore = [ + "databricks.yml", + ".databricks", + ".venv", + "script", + "*.pyc", +] + +# Normalize the content-addressed snapshot hash so the test doesn't need +# to be updated whenever the bundle content changes. +[[Repls]] +Old = '[0-9a-f]{64}' +New = '[SNAPSHOT_HASH]' diff --git a/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml new file mode 100644 index 00000000000..6e3a4cb46ee --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/databricks.yml @@ -0,0 +1,23 @@ +bundle: + name: my-bundle + +experimental: + immutable_folder: true + +sync: + exclude: + # Test framework files that are not part of the bundle source. + - "repls.json" + - "user_repls.json" + - "script" + - "*.toml" + +resources: + jobs: + my_job: + name: my job + tasks: + - task_key: my_task + existing_cluster_id: "0101-120000-aaaaaaaa" + spark_python_task: + python_file: ./src/main.py diff --git a/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml b/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml new file mode 100644 index 00000000000..f784a183258 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/out.test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/validate/immutable_workspace_paths/output.txt b/acceptance/bundle/validate/immutable_workspace_paths/output.txt new file mode 100644 index 00000000000..33c5f2be3a8 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/output.txt @@ -0,0 +1,30 @@ + +>>> [CLI] bundle validate -o json +Warning: Pattern user_repls.json does not match any files + at sync.exclude[1] + in databricks.yml:11:7 + +{ + "workspace": { + "artifact_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/artifacts", + "current_user": { + "domain_friendly_name": "[USERNAME]", + "id": "[USERID]", + "short_name": "[USERNAME]", + "userName": "[USERNAME]" + }, + "file_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/files", + "resource_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/resources", + "root_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default", + "state_path": "/Workspace/Users/[USERNAME]/.bundle/my-bundle/default/state" + }, + "tasks": [ + { + "existing_cluster_id": "0101-120000-aaaaaaaa", + "spark_python_task": { + "python_file": "${workspace.snapshot_path}/src/files/src/main.py" + }, + "task_key": "my_task" + } + ] +} diff --git a/acceptance/bundle/validate/immutable_workspace_paths/script b/acceptance/bundle/validate/immutable_workspace_paths/script new file mode 100644 index 00000000000..df056fa9b99 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/script @@ -0,0 +1 @@ +trace $CLI bundle validate -o json | jq '{workspace: .workspace, tasks: .resources.jobs.my_job.tasks}' diff --git a/acceptance/bundle/validate/immutable_workspace_paths/src/main.py b/acceptance/bundle/validate/immutable_workspace_paths/src/main.py new file mode 100644 index 00000000000..11b15b1a458 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/src/main.py @@ -0,0 +1 @@ +print("hello") diff --git a/acceptance/bundle/validate/immutable_workspace_paths/test.toml b/acceptance/bundle/validate/immutable_workspace_paths/test.toml new file mode 100644 index 00000000000..85e02532c93 --- /dev/null +++ b/acceptance/bundle/validate/immutable_workspace_paths/test.toml @@ -0,0 +1,3 @@ +Local = true +Cloud = false +Ignore = [".databricks"] diff --git a/bundle/bundle.go b/bundle/bundle.go index a471a5b9b2e..53332f73bdb 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -379,3 +379,8 @@ func (b *Bundle) StateFilenameTerraform(ctx context.Context) (string, string) { func (b *Bundle) StateFilenameConfigSnapshot(ctx context.Context) (string, string) { return configSnapshotFilename, filepath.ToSlash(filepath.Join(b.GetLocalStateDir(ctx), configSnapshotFilename)) } + +// IsImmutableFolder reports whether experimental.immutable_folder is enabled. +func (b *Bundle) IsImmutableFolder() bool { + return b.Config.Experimental != nil && b.Config.Experimental.ImmutableFolder +} diff --git a/bundle/config/experimental.go b/bundle/config/experimental.go index b8984adaddd..658f1cea819 100644 --- a/bundle/config/experimental.go +++ b/bundle/config/experimental.go @@ -3,6 +3,13 @@ package config type Experimental struct { Scripts map[ScriptHook]Command `json:"scripts,omitempty"` + // ImmutableFolder specifies that bundle files and artifacts are uploaded as a + // single immutable snapshot rather than being synced individually. When true, + // the deployment calls /api/2.0/repos/snapshots with a zip of all files and sets + // workspace.file_path and workspace.artifact_path to the returned content-addressed + // path. Only supported with the direct deployment engine. + ImmutableFolder bool `json:"immutable_folder,omitempty"` + // By default Python wheel tasks deployed as is to Databricks platform. // If notebook wrapper required (for example, used in DBR < 13.1 or other configuration differences), users can provide a following experimental setting // experimental: diff --git a/bundle/config/mutator/override_immutable_folder.go b/bundle/config/mutator/override_immutable_folder.go new file mode 100644 index 00000000000..51307523d82 --- /dev/null +++ b/bundle/config/mutator/override_immutable_folder.go @@ -0,0 +1,35 @@ +package mutator + +import ( + "context" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/env" +) + +type overrideImmutableFolder struct{} + +// OverrideImmutableFolder sets bundle.deployment.immutable_folder to true +// if the DATABRICKS_IMMUTABLE_FOLDER environment variable is non-empty. +// This allows running the acceptance test suite against the immutable folder +// code path without modifying any databricks.yml files. +func OverrideImmutableFolder() bundle.Mutator { + return &overrideImmutableFolder{} +} + +func (m *overrideImmutableFolder) Name() string { + return "OverrideImmutableFolder" +} + +func (m *overrideImmutableFolder) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + if env.Get(ctx, "DATABRICKS_IMMUTABLE_FOLDER") == "" { + return nil + } + if b.Config.Experimental == nil { + b.Config.Experimental = &config.Experimental{} + } + b.Config.Experimental.ImmutableFolder = true + return nil +} diff --git a/bundle/config/mutator/override_immutable_folder_test.go b/bundle/config/mutator/override_immutable_folder_test.go new file mode 100644 index 00000000000..e538fa1ffbb --- /dev/null +++ b/bundle/config/mutator/override_immutable_folder_test.go @@ -0,0 +1,43 @@ +package mutator_test + +import ( + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/libs/env" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestOverrideImmutableFolderNotSet(t *testing.T) { + t.Parallel() + ctx := env.Set(t.Context(), "DATABRICKS_IMMUTABLE_FOLDER", "") + b := &bundle.Bundle{Config: config.Root{}} + diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) + require.NoError(t, diags.Error()) + assert.True(t, b.Config.Experimental == nil || !b.Config.Experimental.ImmutableFolder) +} + +func TestOverrideImmutableFolderSet(t *testing.T) { + t.Parallel() + ctx := env.Set(t.Context(), "DATABRICKS_IMMUTABLE_FOLDER", "true") + b := &bundle.Bundle{Config: config.Root{}} + diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) + require.NoError(t, diags.Error()) + require.NotNil(t, b.Config.Experimental) + assert.True(t, b.Config.Experimental.ImmutableFolder) +} + +func TestOverrideImmutableFolderAlreadyTrue(t *testing.T) { + t.Parallel() + ctx := env.Set(t.Context(), "DATABRICKS_IMMUTABLE_FOLDER", "") + b := &bundle.Bundle{Config: config.Root{}} + b.Config.Experimental = &config.Experimental{ImmutableFolder: true} + diags := bundle.Apply(ctx, b, mutator.OverrideImmutableFolder()) + require.NoError(t, diags.Error()) + // Existing true value must not be cleared when the env var is absent. + require.NotNil(t, b.Config.Experimental) + assert.True(t, b.Config.Experimental.ImmutableFolder) +} diff --git a/bundle/config/mutator/resolve_variable_references.go b/bundle/config/mutator/resolve_variable_references.go index 113f0576394..0aa73b575dd 100644 --- a/bundle/config/mutator/resolve_variable_references.go +++ b/bundle/config/mutator/resolve_variable_references.go @@ -59,6 +59,11 @@ type resolveVariableReferences struct { includeResources bool artifactsReferenceUsed bool + + // excludePaths lists variable reference paths (e.g. "workspace.file_path") whose + // resolution should be skipped. References to these paths remain unresolved so a + // later mutator can set the value and re-run resolution. + excludePaths []string } func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { @@ -74,6 +79,24 @@ func ResolveVariableReferencesOnlyResources(prefixes ...string) bundle.Mutator { } } +// ResolveVariableReferencesOnlyResourcesExcluding is like ResolveVariableReferencesOnlyResources +// but leaves the listed variable reference paths unresolved. Use this when a workspace path will +// be updated by a later mutator (e.g. snapshot.Upload sets workspace.file_path to the snapshot +// location) and the final value should be substituted at that later point. +func ResolveVariableReferencesOnlyResourcesExcluding(excluded []string, prefixes ...string) bundle.Mutator { + if len(prefixes) == 0 { + prefixes = defaultPrefixes + } + return &resolveVariableReferences{ + prefixes: prefixes, + lookupFn: lookup, + extraRounds: maxResolutionRounds - 1, + pattern: dyn.NewPattern(dyn.Key("resources")), + includeResources: true, + excludePaths: excluded, + } +} + func ResolveVariableReferencesWithoutResources(prefixes ...string) bundle.Mutator { if len(prefixes) == 0 { prefixes = defaultPrefixes @@ -229,6 +252,9 @@ func (m *resolveVariableReferences) resolveOnce(b *bundle.Bundle, prefixes []dyn // Perform resolution only if the path starts with one of the specified prefixes. if slices.ContainsFunc(prefixes, path.HasPrefix) { + if slices.Contains(m.excludePaths, path.String()) { + return dyn.InvalidValue, dynvar.ErrSkipResolution + } value, err := m.lookupFn(normalized, path, b) hasUpdates = hasUpdates || (err == nil && value.IsValid()) return value, err diff --git a/bundle/config/mutator/resourcemutator/process_static_resources.go b/bundle/config/mutator/resourcemutator/process_static_resources.go index 7d3ad742e4b..28a7cbefe78 100644 --- a/bundle/config/mutator/resourcemutator/process_static_resources.go +++ b/bundle/config/mutator/resourcemutator/process_static_resources.go @@ -38,13 +38,14 @@ func (p processStaticResources) Apply(ctx context.Context, b *bundle.Bundle) dia // we need to resolve variables because they can change path values: // - variable can be used a prefix // - path can be part of a complex variable value + bundle.ApplySeqContext( ctx, b, // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes - mutator.ResolveVariableReferencesOnlyResources(), + immutableExcludingResolver(b), mutator.NormalizePaths(), // Translate dashboard paths into paths in the workspace file system diff --git a/bundle/config/mutator/resourcemutator/resource_mutator.go b/bundle/config/mutator/resourcemutator/resource_mutator.go index 45740f53599..4a552a63310 100644 --- a/bundle/config/mutator/resourcemutator/resource_mutator.go +++ b/bundle/config/mutator/resourcemutator/resource_mutator.go @@ -133,6 +133,22 @@ func applyInitializeMutators(ctx context.Context, b *bundle.Bundle) { ) } +// immutableExcludingResolver returns a variable reference resolver for the resources +// section. When experimental.immutable_folder is enabled it excludes +// workspace.file_path, workspace.artifact_path, and workspace.snapshot_path from +// resolution: those paths are set by snapshot.Upload() in the Deploy phase, so +// resolving them here would freeze them to the default bundle path instead. +// workspace.snapshot_path is also excluded so it stays as a literal ${...} template +// in the plan output (making the pre-upload intent visible). +func immutableExcludingResolver(b *bundle.Bundle) bundle.Mutator { + if b.IsImmutableFolder() { + return mutator.ResolveVariableReferencesOnlyResourcesExcluding( + []string{"workspace.file_path", "workspace.artifact_path", "workspace.snapshot_path"}, + ) + } + return mutator.ResolveVariableReferencesOnlyResources() +} + // Normalization is applied multiple times if resource is modified during initialization // // If bundle is modified outside of 'resources' section, these changes are discarded. @@ -146,7 +162,7 @@ func applyNormalizeMutators(ctx context.Context, b *bundle.Bundle) { // Reads (dynamic): * (strings) (searches for variable references in string values) // Updates (dynamic): resources.* (strings) (resolves variable references to their actual values) // Resolves variable references in 'resources' using bundle, workspace, and variables prefixes - mutator.ResolveVariableReferencesOnlyResources(), + immutableExcludingResolver(b), // Reads (dynamic): resources.pipelines.*.libraries (checks for notebook.path and file.path fields) // Updates (dynamic): resources.pipelines.*.libraries (expands glob patterns in path fields to multiple library entries) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index b36ec094447..d50cdbf3060 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -320,11 +320,26 @@ func (t *translateContext) rewriteValue(ctx context.Context, p dyn.Path, v dyn.V } func applyTranslations(ctx context.Context, b *bundle.Bundle, t *translateContext, translations []func(context.Context, dyn.Value) (dyn.Value, error)) diag.Diagnostics { - // Set the remote root to the sync root if source-linked deployment is enabled. - // Otherwise, set it to the workspace file path. - if config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment) { + switch { + case b.IsImmutableFolder(): + // Reject an explicit workspace.file_path: immutable bundles control that path + // automatically (it is set to the content-addressed snapshot location after upload). + // A user-supplied value would be silently discarded, so we error early instead. + if loc := b.Config.GetLocation("workspace.file_path"); loc.File != "" { + return diag.Diagnostics{{ + Severity: diag.Error, + Summary: "workspace.file_path cannot be configured when experimental.immutable_folder is true", + Locations: []dyn.Location{loc}, + }} + } + // Use a placeholder referencing workspace.snapshot_path so that paths are stored + // as ${workspace.snapshot_path}/src/files/ during validate. After + // snapshot.Upload() sets workspace.snapshot_path, a variable-resolution pass + // expands these references to the actual content-addressed paths. + t.remoteRoot = "${workspace.snapshot_path}/src/files" + case config.IsExplicitlyEnabled(t.b.Config.Presets.SourceLinkedDeployment): t.remoteRoot = t.b.SyncRootPath - } else { + default: t.remoteRoot = t.b.Config.Workspace.FilePath } diff --git a/bundle/config/workspace.go b/bundle/config/workspace.go index 1300a87a78c..8b869213435 100644 --- a/bundle/config/workspace.go +++ b/bundle/config/workspace.go @@ -78,6 +78,12 @@ type Workspace struct { // Remote workspace path for deployment state. // This defaults to "${workspace.root}/state". StatePath string `json:"state_path,omitempty"` + + // SnapshotPath is the workspace path of the immutable snapshot uploaded during + // deployment. Set by snapshot.Upload() and used by the subsequent variable-resolution + // pass to expand ${workspace.snapshot_path} placeholders in resource configs. + // Only populated at runtime for bundles with experimental.immutable_folder = true. + SnapshotPath string `json:"snapshot_path,omitempty" bundle:"internal"` } type User struct { diff --git a/bundle/deploy/snapshot/client.go b/bundle/deploy/snapshot/client.go new file mode 100644 index 00000000000..4e5df29e408 --- /dev/null +++ b/bundle/deploy/snapshot/client.go @@ -0,0 +1,114 @@ +package snapshot + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "mime/multipart" + "net/http" + "net/textproto" + + "github.com/databricks/cli/libs/auth" + "github.com/databricks/databricks-sdk-go" + databricksclient "github.com/databricks/databricks-sdk-go/client" +) + +// SnapshotInfo holds the result of a successful snapshot upload. +type SnapshotInfo struct { + // Path is the immutable workspace path for the uploaded snapshot content. + Path string +} + +// ACLEntry is one element of the access_control_list sent to the snapshot API. +// All entries are granted CAN_READ; the snapshot API does not support other levels. +type ACLEntry struct { + UserName string `json:"user_name,omitempty"` + GroupName string `json:"group_name,omitempty"` + ServicePrincipalName string `json:"service_principal_name,omitempty"` + PermissionLevel string `json:"permission_level"` +} + +// SnapshotUploader abstracts the /api/2.0/repos/snapshots endpoint. +// snapshotID is the content-addressed key supplied by the caller; the API uses +// it as the final path component so that identical content always resolves to +// the same workspace location. +// This interface exists so the implementation can later be replaced with a Go SDK call. +type SnapshotUploader interface { + Upload(ctx context.Context, bundleID, snapshotID string, acl []ACLEntry, zipContent []byte) (*SnapshotInfo, error) +} + +// snapshotAPIClient implements SnapshotUploader against /api/2.0/repos/snapshots. +type snapshotAPIClient struct { + client *databricksclient.DatabricksClient +} + +// snapshotUploadResponse mirrors the /api/2.0/repos/snapshots response body. +type snapshotUploadResponse struct { + Snapshot struct { + Path string `json:"path"` + } `json:"snapshot"` +} + +// NewSnapshotUploader creates a SnapshotUploader backed by /api/2.0/repos/snapshots. +func NewSnapshotUploader(w *databricks.WorkspaceClient) (SnapshotUploader, error) { + c, err := databricksclient.New(w.Config) + if err != nil { + return nil, err + } + return &snapshotAPIClient{client: c}, nil +} + +// Upload uploads zipContent as an immutable snapshot identified by snapshotID. +// snapshotID is the SHA-256 of the zip and is used by the server as the +// content-addressed path component. acl grants CAN_READ to each listed principal. +func (c *snapshotAPIClient) Upload(ctx context.Context, bundleID, snapshotID string, acl []ACLEntry, zipContent []byte) (*SnapshotInfo, error) { + var body bytes.Buffer + mw := multipart.NewWriter(&body) + + if err := mw.WriteField("snapshot_id", snapshotID); err != nil { + return nil, fmt.Errorf("failed to write snapshot_id: %w", err) + } + if err := mw.WriteField("bundle_id", bundleID); err != nil { + return nil, fmt.Errorf("failed to write bundle_id: %w", err) + } + + aclJSON, err := json.Marshal(acl) + if err != nil { + return nil, fmt.Errorf("failed to marshal access_control_list: %w", err) + } + if err := mw.WriteField("access_control_list", string(aclJSON)); err != nil { + return nil, fmt.Errorf("failed to write access_control_list: %w", err) + } + + // Attach the zip with an explicit content-type so the server treats it as binary. + fh := make(textproto.MIMEHeader) + fh.Set("Content-Disposition", `form-data; name="file"; filename="snapshot.zip"`) + fh.Set("Content-Type", "application/zip") + part, err := mw.CreatePart(fh) + if err != nil { + return nil, fmt.Errorf("failed to create file part: %w", err) + } + if _, err := part.Write(zipContent); err != nil { + return nil, fmt.Errorf("failed to write zip content: %w", err) + } + if err := mw.Close(); err != nil { + return nil, fmt.Errorf("failed to finalize multipart body: %w", err) + } + + // Workspace routing header is required so the server can locate the correct + // ASP (application service principal) that owns the snapshot directory. + headers := auth.WorkspaceIDHeaders(c.client.Config) + if headers == nil { + headers = make(map[string]string) + } + headers["Content-Type"] = mw.FormDataContentType() + + var resp snapshotUploadResponse + err = c.client.Do(ctx, http.MethodPost, "/api/2.0/repos/snapshots", headers, nil, body.Bytes(), &resp) + if err != nil { + return nil, fmt.Errorf("snapshot upload: %w", err) + } + + return &SnapshotInfo{Path: resp.Snapshot.Path}, nil +} diff --git a/bundle/deploy/snapshot/path.go b/bundle/deploy/snapshot/path.go new file mode 100644 index 00000000000..9d0ebdce16a --- /dev/null +++ b/bundle/deploy/snapshot/path.go @@ -0,0 +1,154 @@ +package snapshot + +import ( + "archive/zip" + "bytes" + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "os" + "path/filepath" + "slices" + "time" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/deploy/files" + "github.com/databricks/cli/libs/fileset" + libsync "github.com/databricks/cli/libs/sync" +) + +// zipEpoch is a fixed timestamp used for all zip entries to make the zip content-addressed +// and reproducible: the same file content always produces the same hash regardless of when +// the zip was built or the file's mtime. +var zipEpoch = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) + +// BundleZip builds the zip that is uploaded to the snapshot API. +// It contains: +// - all files from the bundle sync root under the "files/" prefix, +// selected with the same git-aware + include/exclude logic as files.Upload +// - all built artifact files under the "artifacts/.internal/" prefix +// +// The snapshot ID is always IDFromContent(BundleZip(b)), ensuring the +// pre-calculated path and the uploaded path are derived from the same content. +// The second return value is the number of sync-root files included in the zip. +func BundleZip(ctx context.Context, b *bundle.Bundle) ([]byte, int, error) { + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + + fileCount, err := addSyncRootToZip(ctx, zw, b) + if err != nil { + return nil, 0, err + } + if err := addArtifactsToZip(zw, b); err != nil { + return nil, 0, err + } + + if err := zw.Close(); err != nil { + return nil, 0, err + } + return buf.Bytes(), fileCount, nil +} + +// IDFromContent returns the SHA-256 hex digest of content. +func IDFromContent(content []byte) string { + h := sha256.Sum256(content) + return hex.EncodeToString(h[:]) +} + +// SnapshotID builds the bundle zip and returns its SHA-256 hex digest. +// Called after artifacts are built so that ApplyImmutableWorkspacePaths and +// snapshot.Upload both hash identical content. +func SnapshotID(ctx context.Context, b *bundle.Bundle) (string, error) { + content, _, err := BundleZip(ctx, b) + if err != nil { + return "", err + } + return IDFromContent(content), nil +} + +// addSyncRootToZip returns the number of files added from the sync root. +func addSyncRootToZip(ctx context.Context, zw *zip.Writer, b *bundle.Bundle) (int, error) { + opts, err := files.GetSyncOptions(ctx, b) + if err != nil { + return 0, err + } + fileList, err := libsync.GetFileList(ctx, *opts) + if err != nil { + return 0, err + } + // Sort for a stable zip (same content → same hash regardless of iteration order). + slices.SortFunc(fileList, func(a, b fileset.File) int { + if a.Relative < b.Relative { + return -1 + } + if a.Relative > b.Relative { + return 1 + } + return 0 + }) + + for _, f := range fileList { + rc, err := b.SyncRoot.Open(f.Relative) + if err != nil { + return 0, fmt.Errorf("open %s: %w", f.Relative, err) + } + + entryPath := filepath.ToSlash(f.Relative) + h := &zip.FileHeader{ + Name: "files/" + entryPath, + Method: zip.Deflate, + Modified: zipEpoch, + } + w, err := zw.CreateHeader(h) + if err != nil { + rc.Close() + return 0, fmt.Errorf("zip entry for %s: %w", f.Relative, err) + } + _, err = io.Copy(w, rc) + rc.Close() + if err != nil { + return 0, fmt.Errorf("write %s: %w", f.Relative, err) + } + } + return len(fileList), nil +} + +func addArtifactsToZip(zw *zip.Writer, b *bundle.Bundle) error { + for _, artifact := range b.Config.Artifacts { + for _, af := range artifact.Files { + source := af.Source + if af.Patched != "" { + source = af.Patched + } + // ".internal" matches libraries.InternalDirName so that ReplaceWithRemotePath + // produces library paths that resolve correctly inside the snapshot. + if err := addLocalFileToZip(zw, source, "artifacts/.internal"); err != nil { + return err + } + } + } + return nil +} + +func addLocalFileToZip(zw *zip.Writer, localPath, zipPrefix string) error { + f, err := os.Open(localPath) + if err != nil { + return fmt.Errorf("open %s: %w", localPath, err) + } + defer f.Close() + + entryName := zipPrefix + "/" + filepath.Base(localPath) + h := &zip.FileHeader{ + Name: entryName, + Method: zip.Deflate, + Modified: zipEpoch, + } + w, err := zw.CreateHeader(h) + if err != nil { + return fmt.Errorf("zip entry %s: %w", entryName, err) + } + _, err = io.Copy(w, f) + return err +} diff --git a/bundle/deploy/snapshot/path_test.go b/bundle/deploy/snapshot/path_test.go new file mode 100644 index 00000000000..ee8251fe316 --- /dev/null +++ b/bundle/deploy/snapshot/path_test.go @@ -0,0 +1,132 @@ +package snapshot_test + +import ( + "archive/zip" + "bytes" + "os" + "path/filepath" + "slices" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/deploy/snapshot" + "github.com/databricks/cli/libs/vfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func makeBundleWithFiles(t *testing.T, files map[string]string) *bundle.Bundle { + t.Helper() + dir := t.TempDir() + for name, content := range files { + p := filepath.Join(dir, name) + require.NoError(t, os.MkdirAll(filepath.Dir(p), 0o755)) + require.NoError(t, os.WriteFile(p, []byte(content), 0o644)) + } + root := vfs.MustNew(dir) + return &bundle.Bundle{ + BundleRootPath: dir, + SyncRoot: root, + // WorktreeRoot = SyncRoot is the fallback used by LoadGitDetails when + // there is no git repository. + WorktreeRoot: root, + Config: config.Root{ + Bundle: config.Bundle{Target: "default"}, + }, + } +} + +func TestBundleZipIsDeterministic(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "src/task.py": "def run(): pass", + }) + + zip1, _, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + zip2, _, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + + assert.Equal(t, zip1, zip2, "BundleZip must produce identical bytes for identical content") +} + +func TestBundleZipChangesWithContent(t *testing.T) { + b1 := makeBundleWithFiles(t, map[string]string{"main.py": "v1"}) + b2 := makeBundleWithFiles(t, map[string]string{"main.py": "v2"}) + + zip1, _, err := snapshot.BundleZip(t.Context(), b1) + require.NoError(t, err) + zip2, _, err := snapshot.BundleZip(t.Context(), b2) + require.NoError(t, err) + + assert.NotEqual(t, zip1, zip2, "different file content must produce different zips") +} + +func TestBundleZipRespectsExcludes(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "skip.json": `{"id": "runtime-generated"}`, + }) + bExclude := makeBundleWithFiles(t, map[string]string{ + "main.py": "print('hello')", + "skip.json": `{"id": "runtime-generated"}`, + }) + bExclude.Config.Sync.Exclude = []string{"*.json"} + + zipAll, _, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + zipExcl, _, err := snapshot.BundleZip(t.Context(), bExclude) + require.NoError(t, err) + + // The zip without the excluded file should be smaller and different. + assert.NotEqual(t, zipAll, zipExcl) + assert.Less(t, len(zipExcl), len(zipAll)) +} + +func TestIDFromContent(t *testing.T) { + id := snapshot.IDFromContent([]byte("hello")) + // SHA-256 of "hello" + assert.Equal(t, "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824", id) + assert.Len(t, id, 64, "SHA-256 hex must be 64 characters") +} + +func TestSnapshotIDMatchesBundleZipHash(t *testing.T) { + b := makeBundleWithFiles(t, map[string]string{"task.py": "x = 1"}) + + zipContent, _, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + expectedID := snapshot.IDFromContent(zipContent) + + id, err := snapshot.SnapshotID(t.Context(), b) + require.NoError(t, err) + + assert.Equal(t, expectedID, id) +} + +func zipEntryNames(t *testing.T, zipContent []byte) []string { + t.Helper() + r, err := zip.NewReader(bytes.NewReader(zipContent), int64(len(zipContent))) + require.NoError(t, err) + names := make([]string, len(r.File)) + for i, f := range r.File { + names[i] = f.Name + } + return names +} + +func TestBundleZipDoNotStripNotebookExtensions(t *testing.T) { + // Minimal valid Jupyter notebook content. + ipynb := `{"nbformat": 4, "nbformat_minor": 5, "cells": [], "metadata": {}}` + b := makeBundleWithFiles(t, map[string]string{ + "src/my_notebook.ipynb": ipynb, + "src/script.py": "print('hello')", + }) + + zipContent, _, err := snapshot.BundleZip(t.Context(), b) + require.NoError(t, err) + + names := zipEntryNames(t, zipContent) + assert.True(t, slices.Contains(names, "files/src/my_notebook.ipynb"), "notebook should keep its extension") + assert.True(t, slices.Contains(names, "files/src/script.py"), "regular Python file should keep its extension") +} diff --git a/bundle/deploy/snapshot/upload.go b/bundle/deploy/snapshot/upload.go new file mode 100644 index 00000000000..ca85df0e326 --- /dev/null +++ b/bundle/deploy/snapshot/upload.go @@ -0,0 +1,99 @@ +package snapshot + +import ( + "context" + "fmt" + "path" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/log" +) + +// fileLimitWarning is the file count above which immutable folder deployments may fail. +const fileLimitWarning = 1000 + +type snapshotUpload struct { + // uploader allows test injection of a custom SnapshotUploader. + uploader SnapshotUploader +} + +// Upload returns a mutator that builds the bundle zip, uploads it via +// /api/2.0/repos/snapshots, and updates workspace.file_path and +// workspace.artifact_path to the content-addressed location returned by the API. +func Upload() bundle.Mutator { + return &snapshotUpload{} +} + +func (m *snapshotUpload) Name() string { + return "snapshot.Upload" +} + +func (m *snapshotUpload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { + uploader := m.uploader + if uploader == nil { + var err error + uploader, err = NewSnapshotUploader(b.WorkspaceClient(ctx)) + if err != nil { + return diag.FromErr(err) + } + } + + cmdio.LogString(ctx, "Uploading immutable bundle snapshot...") + + zipContent, fileCount, err := BundleZip(ctx, b) + if err != nil { + return diag.FromErr(fmt.Errorf("failed to build snapshot zip: %w", err)) + } + var diags diag.Diagnostics + if fileCount > fileLimitWarning { + diags = append(diags, diag.Warningf( + "immutable folder deployment may not work correctly: bundle contains %d files (limit is %d)", + fileCount, fileLimitWarning, + )...) + } + snapshotID := IDFromContent(zipContent) + log.Debugf(ctx, "snapshot.Upload: snapshotID=%s zip=%d bytes", snapshotID, len(zipContent)) + + acl := BuildACL(b) + // Use the deployment lineage UUID as bundle_id so the snapshot directory is + // keyed to this specific deployment (not to the bundle name, which can be + // reused across unrelated deployments). + bundleID := b.DeploymentBundle.StateDB.GetOrInitLineage() + info, err := uploader.Upload(ctx, bundleID, snapshotID, acl, zipContent) + if err != nil { + return diag.FromErr(err) + } + + log.Infof(ctx, "Snapshot uploaded to %s", info.Path) + + // The API unpacks the zip under a "src" subdirectory. + b.Config.Workspace.SnapshotPath = info.Path + b.Config.Workspace.FilePath = path.Join(info.Path, "src", "files") + // Only set artifact_path when artifacts are present; with no artifacts the + // zip has no "src/artifacts" directory and a get-status on it would 404. + if len(b.Config.Artifacts) > 0 { + b.Config.Workspace.ArtifactPath = path.Join(info.Path, "src", "artifacts") + } + + return diags +} + +// BuildACL constructs the access_control_list for the snapshot upload. +// It grants CAN_READ to the current user and to every principal listed in the +// top-level permissions section of the bundle config. +func BuildACL(b *bundle.Bundle) []ACLEntry { + acl := []ACLEntry{ + {UserName: b.Config.Workspace.CurrentUser.UserName, PermissionLevel: "CAN_READ"}, + } + for _, p := range b.Config.Permissions { + acl = append(acl, ACLEntry{ + UserName: p.UserName, + GroupName: p.GroupName, + ServicePrincipalName: p.ServicePrincipalName, + PermissionLevel: "CAN_READ", + }) + } + return acl +} diff --git a/bundle/deploy/snapshot/upload_test.go b/bundle/deploy/snapshot/upload_test.go new file mode 100644 index 00000000000..3b53f31ae3c --- /dev/null +++ b/bundle/deploy/snapshot/upload_test.go @@ -0,0 +1,53 @@ +package snapshot_test + +import ( + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/bundle/deploy/snapshot" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/stretchr/testify/assert" +) + +func bundleWithPermissions(currentUser string, perms []resources.Permission) *bundle.Bundle { + b := &bundle.Bundle{ + Config: config.Root{ + Workspace: config.Workspace{ + CurrentUser: &config.User{}, + }, + Permissions: perms, + }, + } + b.Config.Workspace.CurrentUser.User = &iam.User{UserName: currentUser} + return b +} + +func TestBuildACLCurrentUserOnly(t *testing.T) { + b := bundleWithPermissions("alice@example.com", nil) + + acl := snapshot.BuildACL(b) + + assert.Equal(t, []snapshot.ACLEntry{ + {UserName: "alice@example.com", PermissionLevel: "CAN_READ"}, + }, acl) +} + +func TestBuildACLWithTopLevelPermissions(t *testing.T) { + perms := []resources.Permission{ + {Level: "CAN_VIEW", UserName: "bob@example.com"}, + {Level: "CAN_MANAGE", GroupName: "devs"}, + {Level: "CAN_RUN", ServicePrincipalName: "sp-123"}, + } + b := bundleWithPermissions("alice@example.com", perms) + + acl := snapshot.BuildACL(b) + + assert.Equal(t, []snapshot.ACLEntry{ + {UserName: "alice@example.com", PermissionLevel: "CAN_READ"}, + {UserName: "bob@example.com", PermissionLevel: "CAN_READ"}, + {GroupName: "devs", PermissionLevel: "CAN_READ"}, + {ServicePrincipalName: "sp-123", PermissionLevel: "CAN_READ"}, + }, acl) +} diff --git a/bundle/deploy/snapshot/upload_warning_test.go b/bundle/deploy/snapshot/upload_warning_test.go new file mode 100644 index 00000000000..fd8ad648b81 --- /dev/null +++ b/bundle/deploy/snapshot/upload_warning_test.go @@ -0,0 +1,75 @@ +package snapshot + +import ( + "context" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/diag" + "github.com/databricks/cli/libs/logdiag" + "github.com/databricks/cli/libs/vfs" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type mockUploader struct{ path string } + +func (m *mockUploader) Upload(_ context.Context, _, _ string, _ []ACLEntry, _ []byte) (*SnapshotInfo, error) { + return &SnapshotInfo{Path: m.path}, nil +} + +func makeBundle(t *testing.T, nFiles int) *bundle.Bundle { + t.Helper() + dir := t.TempDir() + for i := range nFiles { + p := filepath.Join(dir, fmt.Sprintf("f%d.py", i)) + require.NoError(t, os.WriteFile(p, []byte("x"), 0o644)) + } + root := vfs.MustNew(dir) + b := &bundle.Bundle{ + BundleRootPath: dir, + SyncRoot: root, + WorktreeRoot: root, + Config: config.Root{ + Bundle: config.Bundle{Target: "default"}, + Workspace: config.Workspace{ + CurrentUser: &config.User{ + User: &iam.User{UserName: "test@example.test"}, + }, + }, + }, + } + return b +} + +func testContext(t *testing.T) context.Context { + t.Helper() + return logdiag.InitContext(cmdio.MockDiscard(t.Context())) +} + +func TestUploadWarnsAboveFileLimit(t *testing.T) { + b := makeBundle(t, fileLimitWarning+1) + m := &snapshotUpload{uploader: &mockUploader{path: "/snapshots/test"}} + + diags := m.Apply(testContext(t), b) + + require.Equal(t, 1, len(diags)) + assert.Equal(t, diag.Warning, diags[0].Severity) + assert.Contains(t, diags[0].Summary, fmt.Sprintf("%d files", fileLimitWarning+1)) + assert.Equal(t, "/snapshots/test", b.Config.Workspace.SnapshotPath) +} + +func TestUploadNoWarningBelowFileLimit(t *testing.T) { + b := makeBundle(t, 5) + m := &snapshotUpload{uploader: &mockUploader{path: "/snapshots/test"}} + + diags := m.Apply(testContext(t), b) + + assert.True(t, diags.HasError() == false && len(diags) == 0, "expected no diagnostics") +} diff --git a/bundle/direct/bundle_plan.go b/bundle/direct/bundle_plan.go index d890b8d5d7b..b3b8d66cba3 100644 --- a/bundle/direct/bundle_plan.go +++ b/bundle/direct/bundle_plan.go @@ -658,6 +658,12 @@ func splitResourcePath(path *structpath.PathNode) (string, *structpath.PathNode) } func (b *DeploymentBundle) LookupReferencePreDeploy(ctx context.Context, path *structpath.PathNode) (any, error) { + // ${workspace.snapshot_path} is resolved by the mutator pipeline after + // snapshot.Upload() — not by the direct engine. Return errDelayed so the + // template string is preserved in the plan output rather than causing an error. + if path.String() == "workspace.snapshot_path" { + return nil, errDelayed + } targetResourceKey, fieldPath := splitResourcePath(path) targetGroup := config.GetResourceTypeFromKey(targetResourceKey) @@ -967,6 +973,11 @@ func (b *DeploymentBundle) makePlan(ctx context.Context, configRoot *config.Root targetNodeDP, _ := config.GetNodeAndType(targetPathParsed) targetNode := targetNodeDP.String() + // ${workspace.snapshot_path} is resolved by the mutator pipeline after + // snapshot.Upload(), not by the direct engine — skip it here. + if targetPath == "workspace.snapshot_path" { + continue + } fullRef := "${" + targetPath + "}" diff --git a/bundle/internal/schema/annotations.yml b/bundle/internal/schema/annotations.yml index c8a8353012f..a868bdc98f7 100644 --- a/bundle/internal/schema/annotations.yml +++ b/bundle/internal/schema/annotations.yml @@ -139,6 +139,9 @@ experimental: "description": |- Defines attributes for experimental features. "$fields": + "immutable_folder": + "description": |- + Whether to deploy bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a content-addressed archive and workspace.file_path and workspace.artifact_path are set to the resulting location. "pydabs": "description": |- The PyDABs configuration. diff --git a/bundle/paths/paths.go b/bundle/paths/paths.go index e413cc59f7b..54ffc0b5622 100644 --- a/bundle/paths/paths.go +++ b/bundle/paths/paths.go @@ -32,6 +32,10 @@ func CollectUniqueWorkspacePathPrefixes(workspace config.Workspace) WorkspacePat workspace.StatePath, workspace.ResourcePath, } { + if p == "" { + continue + } + if libraries.IsVolumesPath(p) { continue } diff --git a/bundle/permissions/workspace_root.go b/bundle/permissions/workspace_root.go index 9bb9065fe80..3d130d2c925 100644 --- a/bundle/permissions/workspace_root.go +++ b/bundle/permissions/workspace_root.go @@ -7,6 +7,7 @@ import ( "strconv" "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/resources" "github.com/databricks/cli/bundle/libraries" "github.com/databricks/cli/bundle/metrics" @@ -28,7 +29,18 @@ func (*workspaceRootPermissions) Name() string { // Apply implements bundle.Mutator. func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { - stateFolderPermissions, err := giveAccessForWorkspaceRoot(ctx, b) + workspace := b.Config.Workspace + if b.IsImmutableFolder() { + // For immutable bundles, file_path and artifact_path point into content-addressed + // snapshot storage that is not a normal workspace folder. Clear them so that + // giveAccessForWorkspaceRoot only applies permissions to root_path (and the + // state_path / resource_path nested under it), which still need ACLs for + // shared deployments to work correctly. + workspace.FilePath = "" + workspace.ArtifactPath = "" + } + + stateFolderPermissions, err := giveAccessForWorkspaceRoot(ctx, b, workspace) if err != nil { return diag.FromErr(err) } @@ -41,7 +53,7 @@ func (*workspaceRootPermissions) Apply(ctx context.Context, b *bundle.Bundle) di // workspace folders and returns the resulting permissions of the folder that holds // the deployment state. It returns nil only when no permissions are declared, in // which case no folders are synced. -func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) (*WorkspacePathPermissions, error) { +func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle, wsConfig config.Workspace) (*WorkspacePathPermissions, error) { var permissions []workspace.WorkspaceObjectAccessControlRequest for _, p := range b.Config.Permissions { level, err := GetWorkspaceObjectPermissionLevel(string(p.Level)) @@ -62,7 +74,7 @@ func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) (*Workspa } w := b.WorkspaceClient(ctx).Workspace - wsPaths := paths.CollectUniqueWorkspacePathPrefixes(b.Config.Workspace) + wsPaths := paths.CollectUniqueWorkspacePathPrefixes(wsConfig) // Each goroutine writes the folder's resulting permissions into its own slot, // so they are inspected after Wait rather than concurrently. @@ -83,7 +95,7 @@ func giveAccessForWorkspaceRoot(ctx context.Context, b *bundle.Bundle) (*Workspa // Return the permissions of the folder governing the deployment state. When // state_path is nested under root_path it is deduplicated out of the collected // paths, so Governing resolves it to root_path, whose ACL it inherits. - stateFolder := wsPaths.Governing(b.Config.Workspace.StatePath) + stateFolder := wsPaths.Governing(wsConfig.StatePath) i := slices.Index(wsPaths.Paths, stateFolder) if i < 0 { return nil, nil diff --git a/bundle/phases/build.go b/bundle/phases/build.go index 5a32435f8f1..db376e07e28 100644 --- a/bundle/phases/build.go +++ b/bundle/phases/build.go @@ -14,9 +14,11 @@ import ( "github.com/databricks/cli/libs/logdiag" ) +// LibLocationMap maps artifact names to library locations that need uploading. +// Computed by Build and consumed by Deploy to upload the right files. type LibLocationMap map[string][]libraries.LocationToUpdate -// The build phase builds artifacts. +// Build runs the build phase, which builds artifacts. func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { log.Info(ctx, "Phase: build") @@ -24,6 +26,7 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { scripts.Execute(config.ScriptPreBuild), artifacts.Build(), scripts.Execute(config.ScriptPostBuild), + mutator.ResolveVariableReferencesWithoutResources( "artifacts", ), @@ -41,16 +44,20 @@ func Build(ctx context.Context, b *bundle.Bundle) LibLocationMap { libraries.SwitchToPatchedWheels(), ) - libs, diags := libraries.ReplaceWithRemotePath(ctx, b) - for _, diag := range diags { - logdiag.LogDiag(ctx, diag) + if logdiag.HasError(ctx) { + return nil } - bundle.ApplyContext(ctx, b, - // TransformWheelTask must be run after ReplaceWithRemotePath so we can use correct remote path in the - // transformed notebook - trampoline.TransformWheelTask(), - ) + // For immutable bundles, library remote paths are set in the deploy phase + // after snapshot.Upload() provides the content-addressed workspace.artifact_path. + if b.IsImmutableFolder() { + return nil + } + libs, diags := libraries.ReplaceWithRemotePath(ctx, b) + for _, d := range diags { + logdiag.LogDiag(ctx, d) + } + bundle.ApplyContext(ctx, b, trampoline.TransformWheelTask()) return libs } diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index fd76151483c..7a906be8b5b 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -8,10 +8,12 @@ import ( "github.com/databricks/cli/bundle/artifacts" "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/config/engine" + "github.com/databricks/cli/bundle/config/mutator" "github.com/databricks/cli/bundle/deploy" "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/metadata" + "github.com/databricks/cli/bundle/deploy/snapshot" "github.com/databricks/cli/bundle/deploy/terraform" "github.com/databricks/cli/bundle/deployplan" "github.com/databricks/cli/bundle/direct" @@ -147,13 +149,42 @@ func Deploy(ctx context.Context, b *bundle.Bundle, outputHandler sync.OutputHand bundle.ApplyContext(ctx, b, lock.Release(lock.GoalDeploy)) }() - uploadLibraries(ctx, b, libs) + immutable := b.IsImmutableFolder() + if immutable && !engine.IsDirect() { + logdiag.LogError(ctx, errors.New("experimental.immutable_folder is only supported with the direct deployment engine")) + return + } + + if immutable { + // Upload all source files and built artifacts as a single immutable snapshot. + // snapshot.Upload() sets workspace.snapshot_path; the variable-resolution + // pass expands ${workspace.snapshot_path} placeholders written by translate_paths. + bundle.ApplySeqContext(ctx, b, + snapshot.Upload(), + mutator.ResolveVariableReferencesOnlyResources("workspace"), + ) + if !logdiag.HasError(ctx) { + _, libDiags := libraries.ReplaceWithRemotePath(ctx, b) + for _, d := range libDiags { + logdiag.LogDiag(ctx, d) + } + } + } else { + uploadLibraries(ctx, b, libs) + } + if logdiag.HasError(ctx) { return } + if !immutable { + bundle.ApplySeqContext(ctx, b, files.Upload(outputHandler)) + if logdiag.HasError(ctx) { + return + } + } + bundle.ApplySeqContext(ctx, b, - files.Upload(outputHandler), deploy.StateUpdate(), deploy.StatePush(), permissions.ApplyWorkspaceRootPermissions(), diff --git a/bundle/phases/initialize.go b/bundle/phases/initialize.go index 80127843e83..0513c870139 100644 --- a/bundle/phases/initialize.go +++ b/bundle/phases/initialize.go @@ -70,6 +70,12 @@ func Initialize(ctx context.Context, b *bundle.Bundle) { // because it affects how workspace variables are resolved. mutator.ApplySourceLinkedDeploymentPreset(), + // Reads (env): DATABRICKS_IMMUTABLE_FOLDER (non-empty value enables immutable folder mode) + // Updates (typed): b.Config.Experimental.ImmutableFolder (forces to true when env var is set) + // Allows running the full test suite against the immutable folder code path without + // modifying any databricks.yml files. + mutator.OverrideImmutableFolder(), + // Reads (typed): b.Config.Workspace.RootPath (checks if it's already set) // Reads (typed): b.Config.Bundle.Name, b.Config.Bundle.Target (used to construct default path) // Updates (typed): b.Config.Workspace.RootPath (sets to ~/.bundle/{name}/{target} if not set) diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index dc03d3c9d0b..1bbbd3f7246 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -2745,6 +2745,10 @@ { "type": "object", "properties": { + "immutable_folder": { + "description": "Whether to deploy bundle files and artifacts as a single immutable snapshot. When true, all files are packaged into a content-addressed archive and workspace.file_path and workspace.artifact_path are set to the resulting location.", + "$ref": "#/$defs/bool" + }, "pydabs": { "description": "The PyDABs configuration.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.PyDABs", diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 6d7708c8b37..cee1057e82c 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -245,6 +245,35 @@ func (s *Sync) GetFileList(ctx context.Context) ([]fileset.File, error) { return all.Iter(), nil } +// GetFileList returns the list of files that would be synced given opts, +// applying the same git-aware include/exclude logic as RunOnce. +// Unlike New, it does not verify the remote path or load a sync snapshot. +func GetFileList(ctx context.Context, opts SyncOptions) ([]fileset.File, error) { + paths := opts.Paths + if len(paths) == 0 { + paths = []string{"."} + } + fileSet, err := git.NewFileSet(ctx, opts.WorktreeRoot, opts.LocalRoot, paths) + if err != nil { + return nil, fmt.Errorf("build file set: %w", err) + } + includeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Include) + if err != nil { + return nil, err + } + excludeFileSet, err := fileset.NewGlobSet(opts.LocalRoot, opts.Exclude) + if err != nil { + return nil, err + } + s := &Sync{ + SyncOptions: &opts, + fileSet: fileSet, + includeFileSet: includeFileSet, + excludeFileSet: excludeFileSet, + } + return s.GetFileList(ctx) +} + func (s *Sync) RunContinuous(ctx context.Context) error { ticker := time.NewTicker(s.PollInterval) defer ticker.Stop() diff --git a/libs/testserver/handlers.go b/libs/testserver/handlers.go index 90863d503f6..071b8b91ce3 100644 --- a/libs/testserver/handlers.go +++ b/libs/testserver/handlers.go @@ -1,9 +1,13 @@ package testserver import ( + "bytes" "encoding/base64" "encoding/json" "fmt" + "io" + "mime" + "mime/multipart" "net/http" "path" "strings" @@ -569,6 +573,45 @@ func AddDefaultHandlers(server *Server) { return req.Workspace.ReposDelete(req) }) + server.Handle("POST", "/api/2.0/repos/snapshots", func(req Request) any { + contentType := req.Headers.Get("Content-Type") + mediaType, params, err := mime.ParseMediaType(contentType) + if err != nil || !strings.HasPrefix(mediaType, "multipart/") { + return Response{StatusCode: http.StatusBadRequest} + } + + mr := multipart.NewReader(bytes.NewReader(req.Body), params["boundary"]) + var bundleID, snapshotID string + for { + p, err := mr.NextPart() + if err == io.EOF { + break + } + if err != nil { + return Response{StatusCode: http.StatusInternalServerError} + } + data, err := io.ReadAll(p) + if err != nil { + return Response{StatusCode: http.StatusInternalServerError} + } + switch p.FormName() { + case "bundle_id": + bundleID = string(data) + case "snapshot_id": + snapshotID = string(data) + } + } + + // The real API uses the workspace user UUID (not email) in the snapshot path, + // matching service-principal identities used in cloud acceptance tests. + snapshotPath := fmt.Sprintf("/Workspace/Users/%s/.snapshots/%s/%s", TestUserSP.UserName, bundleID, snapshotID) + return map[string]any{ + "snapshot": map[string]any{ + "path": snapshotPath, + }, + } + }) + // SQL Warehouses: server.Handle("GET", "/api/2.0/sql/warehouses/{warehouse_id}", func(req Request) any {