diff --git a/pdm.lock b/pdm.lock index dd5e4309..7ff6c025 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "all", "bedrock", "dev", "office-document", "otel", "vertex", "web"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:80daab1550751c5cfa428831aba94e43b94d1ba2e8e537b8bc06bce5407c4867" +content_hash = "sha256:b6a3df80970cb4a803d485e290a835a8f040768b5ba6f3fb77f360c36ed16def" [[metadata.targets]] requires_python = ">=3.10,<3.14" @@ -178,36 +178,32 @@ files = [ [[package]] name = "askui-agent-os" -version = "26.2.2" +version = "26.5.1" requires_python = ">=3.10" summary = "Platform-specific binaries for the AskUI Remote Device Controller, shipped as a Python package. Used by the AskUI Agent (created with the AskUI Python SDK) to control the local device on Windows, Linux, and macOS." groups = ["default"] files = [ - {file = "askui_agent_os-26.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e62761d0d926ca90132af5c8e1307c4b4d82114b41f4c1e5df7c6acce89e6129"}, - {file = "askui_agent_os-26.2.2-cp310-cp310-manylinux_2_34_aarch64.whl", hash = "sha256:e8eefac2197d9ab8a7afa92d6aa8641c0f7ca98c2c143a66f73e7d660a96ebcf"}, - {file = "askui_agent_os-26.2.2-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:f90df76ef5f2235c7a1f6c5415cc523bf857338249bc91d73feb3673e65ae4b2"}, - {file = "askui_agent_os-26.2.2-cp310-cp310-win32.whl", hash = "sha256:cb53f95047873dbe1ceb304ee83d8a67c4e3b987d47b7f9921b5b4076164b36a"}, - {file = "askui_agent_os-26.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:6f218ab3b4984f3946160546a9157a7c22876c3c9725b00eff93c0496ed1a5c3"}, - {file = "askui_agent_os-26.2.2-cp310-cp310-win_arm64.whl", hash = "sha256:2c3edc1c0e0d904636c4de8fac64bfd4cf2e40ed36d7520ef6ed6280b91ae80e"}, - {file = "askui_agent_os-26.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a83b6278b41365eb8538cdb8f29ea306efe6d029ba538c35b93f5a7053ed2e8c"}, - {file = "askui_agent_os-26.2.2-cp311-cp311-manylinux_2_34_aarch64.whl", hash = "sha256:3e2ba6c6fb3636218335d8e2051f5bead7c8a3bc2e50e80599d21ffee1371c4d"}, - {file = "askui_agent_os-26.2.2-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:c5fb39a73f433dc18059d22d9a640e9aca5a1c94847fb04cf406aa04045dc28e"}, - {file = "askui_agent_os-26.2.2-cp311-cp311-win32.whl", hash = "sha256:e86d438579add1c944023c6747cfdc41d9b56e4c0e0265a62e58d65df9589a6d"}, - {file = "askui_agent_os-26.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:f265d526e26ee2bcadc7b8e54e37356da1b4eac08b20b40d7d0273637b681ffd"}, - {file = "askui_agent_os-26.2.2-cp311-cp311-win_arm64.whl", hash = "sha256:3c57cfefaed4ea32abf5ed9073a05df4d518330cb39c10da1bdaad2db6c1d767"}, - {file = "askui_agent_os-26.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:270f0afc974274e17119f09abc78c445399d9b9e9f9c22b7456f553b63e0640b"}, - {file = "askui_agent_os-26.2.2-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:9c13671488355c2ec63d676024cc431c4451736d6fb04da7f8a98bfffbb90f5b"}, - {file = "askui_agent_os-26.2.2-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:13da7a46fff64791cd1113cb750fd9f394fada5c798072f73e5928ffcb0e1480"}, - {file = "askui_agent_os-26.2.2-cp312-cp312-win32.whl", hash = "sha256:3659d459c177e30eaf865001bc50f62b62fe686c337ae658d671aff01178505b"}, - {file = "askui_agent_os-26.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:4a0587fb0aa75e572c0f0a400af757c193daa7ee58003adf2c5b591107c1ce6d"}, - {file = "askui_agent_os-26.2.2-cp312-cp312-win_arm64.whl", hash = "sha256:647f7a881e6deec42cacb02d77d44eee814b7abcb6540540569b4e41f412d0ec"}, - {file = "askui_agent_os-26.2.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:454c338bf73e2e638d26c62302d069c55854a3152a3fe1b8b3261aa846eb1d37"}, - {file = "askui_agent_os-26.2.2-cp313-cp313-manylinux_2_34_aarch64.whl", hash = "sha256:79b6cd87f01f67347c8492fced82a24e1c72f51189b1eb704848e6b7cdc62160"}, - {file = "askui_agent_os-26.2.2-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:45e5cb29d63fd5a748e4ce71b77dc52b695a18a16490d60f89d15314148d513e"}, - {file = "askui_agent_os-26.2.2-cp313-cp313-win32.whl", hash = "sha256:cfb8607831111b02fca0789c7e49658a06a4b9e7c844ee76721bd5b496c289b2"}, - {file = "askui_agent_os-26.2.2-cp313-cp313-win_amd64.whl", hash = "sha256:4f7324b92dba6ba4017bbb13503d00e37fb8da35f627c094f4fd4f04e63c4505"}, - {file = "askui_agent_os-26.2.2-cp313-cp313-win_arm64.whl", hash = "sha256:63073baf1d5cf6a2c9a3970e2e272c528d3f95bd73816cc15c7a6fa3483afa01"}, - {file = "askui_agent_os-26.2.2.tar.gz", hash = "sha256:96617229d439bde66d439d9dc541a41f134bbb628d0ba3afa611c690d8125133"}, + {file = "askui_agent_os-26.5.1-cp310-cp310-manylinux_2_34_aarch64.whl", hash = "sha256:e3ebc4a88d5ee06719044f39fdf5436c41d7a3de703f722f2be693456e6cf34d"}, + {file = "askui_agent_os-26.5.1-cp310-cp310-manylinux_2_34_x86_64.whl", hash = "sha256:862b82347c153e595f03cf7dbd1931c571bf8dc887046ba06aa0360f82f63592"}, + {file = "askui_agent_os-26.5.1-cp310-cp310-win32.whl", hash = "sha256:b1574c3841b237316c1df8540b4b287d200b578c08479dfa4ef701984e76c99d"}, + {file = "askui_agent_os-26.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:bf00febd587583ae159031549fcacf9520f9fffdd765098567d654cd306ca015"}, + {file = "askui_agent_os-26.5.1-cp310-cp310-win_arm64.whl", hash = "sha256:f8354d6821020f9b82ee946f770cf7a68162828ffce0c7f391a138de83bb6363"}, + {file = "askui_agent_os-26.5.1-cp311-cp311-manylinux_2_34_aarch64.whl", hash = "sha256:a017766e26dff4d8a762b1d4f70f7df1cb9bfac2f7aee21f364fbead769d0f06"}, + {file = "askui_agent_os-26.5.1-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:8e3354ebd45b738a2178230f73509974b35024bf977dfbdcfdec43565eea36d4"}, + {file = "askui_agent_os-26.5.1-cp311-cp311-win32.whl", hash = "sha256:631c97feb1aaf813991c87669e44551031678f11341f842e608d153535a349c1"}, + {file = "askui_agent_os-26.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:4e7742a7c283549b81b5d519c58b0757d5e33330ba3f3b0fc1cbc64c9950c01a"}, + {file = "askui_agent_os-26.5.1-cp311-cp311-win_arm64.whl", hash = "sha256:f6be5b41518e9b8b53dd9a192c9fcd561b917d64e6502cdb55c8d2b76a278b7a"}, + {file = "askui_agent_os-26.5.1-cp312-cp312-manylinux_2_34_aarch64.whl", hash = "sha256:49a813bc1896565741754ed7b6691c3b95f1940307e128e536b52c4adfbac0f6"}, + {file = "askui_agent_os-26.5.1-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:c9f2b9e7047146d0b39f61c41b6a890cb1397b002c246952ebaf509a91069519"}, + {file = "askui_agent_os-26.5.1-cp312-cp312-win32.whl", hash = "sha256:abc3f6348117e4cb7b5dbd41551657deb6fcef7e2eb38347dbc853fc59263205"}, + {file = "askui_agent_os-26.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:8c8050da6d610c61057dcddac3169f8d33f0b3e2e8296233ec7c77b80ed583b0"}, + {file = "askui_agent_os-26.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:5e4b706d4dbeecc42678c3ee1c5ede1190acdc8b84d464f69ca26b5685fa9e73"}, + {file = "askui_agent_os-26.5.1-cp313-cp313-manylinux_2_34_aarch64.whl", hash = "sha256:dee3f7bb081a2c52838217d5300d638e419bf0a9cbdd3a5789b966afde3a2219"}, + {file = "askui_agent_os-26.5.1-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:a2aac14cf3a288094382515bf0e8fdb5b70b328f089b82163a87702eac43faa7"}, + {file = "askui_agent_os-26.5.1-cp313-cp313-win32.whl", hash = "sha256:b8082eeb93632fe4df40e2e549be05eb6ee5c41c000878d4e45f98bb89ea12cb"}, + {file = "askui_agent_os-26.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:ae72e8754361440fa7225bf3712b7a978b5372951f5bba242514cd667335f5d8"}, + {file = "askui_agent_os-26.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:4c86ca64adac6fc409f3e78d16939f76c64e4e488985dec5050879e631102b2b"}, + {file = "askui_agent_os-26.5.1.tar.gz", hash = "sha256:dd5dd07e07f7064994576d778f30325d5e2e711153fb52054ef8eded7bd18674"}, ] [[package]] diff --git a/pyproject.toml b/pyproject.toml index d8bfc7e4..47a5b3f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,8 @@ authors = [ {name = "askui GmbH", email = "info@askui.com"}, ] dependencies = [ - "askui-agent-os>=26.1.1", + 'askui-agent-os>=26.4.1; sys_platform == "darwin"', + 'askui-agent-os>=26.5.1; sys_platform != "darwin"', "anthropic>=0.86.0", "fastapi>=0.115.12", "fastmcp>=2.3.0", diff --git a/src/askui/computer_agent.py b/src/askui/computer_agent.py index 016607bd..ad0a6627 100644 --- a/src/askui/computer_agent.py +++ b/src/askui/computer_agent.py @@ -56,7 +56,9 @@ class ComputerAgent(Agent): tools (AgentToolbox | None, optional): Custom toolbox instance. If `None`, a default one will be created with `AskUiControllerClient`. settings (AgentSettings | None, optional): Provider-based model settings. If `None`, uses the default AskUI model stack. retry (Retry, optional): The retry instance to use for retrying failed actions. Defaults to `ConfigurableRetry` with exponential backoff. Currently only supported for `locate()` method. - act_tools (list[Tool] | None, optional): Additional tools to make available for the `act()` method. + act_tools (list[Tool] | None, optional): Additional tools to make available for + the `act()` method for every call. Same tools can instead be passed per call + via `act(..., tools=[...])` (see example below). Example: ```python @@ -67,6 +69,26 @@ class ComputerAgent(Agent): agent.type("Hello World") agent.act("Open settings menu") ``` + + Example (optional tools for `act()`): + Register tools from `askui.tools.store` (or your own `Tool` implementations) + either on the agent so they apply to all `act()` calls, or only for one call. + + ```python + from askui import ComputerAgent + from askui.tools.store.computer import ComputerSaveScreenshotTool + + with ComputerAgent( + act_tools=[ComputerSaveScreenshotTool(base_dir="/path/to/screenshots")] + ) as agent: + agent.act("Take a screenshot and save it as demo/demo.png") + + with ComputerAgent() as agent: + agent.act( + "Take a screenshot and save it as demo/demo.png", + tools=[ComputerSaveScreenshotTool(base_dir="/path/to/screenshots")], + ) + ``` """ @telemetry.record_call( diff --git a/src/askui/tools/agent_os.py b/src/askui/tools/agent_os.py index 344a83ab..96ecc831 100644 --- a/src/askui/tools/agent_os.py +++ b/src/askui/tools/agent_os.py @@ -676,3 +676,46 @@ def set_window_in_focus(self, process_id: int, window_id: int) -> None: window_id (int): The ID of the window to set as active. """ raise NotImplementedError + + def get_file_names(self, absolute_directory_path: str) -> list[str]: + """ + List file names in an absolute directory on the automation target + (desktop Agent OS). + + Args: + absolute_directory_path (str): Absolute directory path on the target system. + + Returns: + list[str]: Names of files in that directory. + + Raises: + NotImplementedError: If the implementation does not support this operation. + """ + raise NotImplementedError + + def get_file(self, path: str) -> Image.Image | str: + """ + Read a file from the automation target (desktop Agent OS). + + Binary image payloads are returned as `PIL.Image.Image` when recognized; + otherwise UTF-8 text when decodable. + + Args: + path (str): File path on the target system. + + Returns: + Image.Image | str: Decoded file contents. + + Raises: + NotImplementedError: If the implementation does not support this operation. + """ + raise NotImplementedError + + def remove_virtual_displays(self) -> None: + """ + Remove virtual displays from the controller, leaving real displays only. + + Raises: + NotImplementedError: If the implementation does not support this operation. + """ + raise NotImplementedError diff --git a/src/askui/tools/askui/askui_controller.py b/src/askui/tools/askui/askui_controller.py index 5e8814bf..90ffe590 100644 --- a/src/askui/tools/askui/askui_controller.py +++ b/src/askui/tools/askui/askui_controller.py @@ -1,3 +1,4 @@ +import base64 import logging import pathlib import subprocess @@ -43,6 +44,8 @@ DeleteRenderObjectCommand, GetActiveProcessCommand, GetActiveWindowCommand, + GetFileCommand, + GetFileNamesCommand, GetMousePositionCommand, GetSystemInfoCommand, Guid, @@ -51,6 +54,7 @@ Location, Message, Parameter3, + RemoveVirtualDisplaysCommand, RenderImage, RenderObjectId, RenderObjectStyle, @@ -66,10 +70,13 @@ GetActiveProcessResponseModel, GetActiveWindowResponse, GetActiveWindowResponseModel, + GetFileNamesResponse, + GetFileResponse, GetSystemInfoResponse, GetSystemInfoResponseModel, ) from askui.utils.annotated_image import AnnotatedImage +from askui.utils.image_utils import base64_to_image from ..utils import process_exists, wait_for_port from .exceptions import ( @@ -217,6 +224,8 @@ def connect(self) -> None: self._start_session() self._start_execution() self.set_display(self._display) + if self._settings.clean_virtual_displays: + self.remove_virtual_displays() def _get_stub(self) -> controller_v1.ControllerAPIStub: assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( @@ -1294,3 +1303,117 @@ def set_window_in_focus(self, process_id: int, window_id: int) -> None: _window_id = Parameter3(root=window_id) command = SetActiveWindowCommand(parameters=[_process_id, _window_id]) self._send_command(command) + + def get_file_names(self, absolute_directory_path: str) -> list[str]: + """ + Get the file names in the given absolute directory on the device under + automation. + + Args: + absolute_directory_path (str): The absolute directory path to list + file names from. + + Returns: + list[str]: The file names returned by the controller. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + self._reporter.add_message( + "AgentOS", f"get_file_names({absolute_directory_path})" + ) + command = GetFileNamesCommand(parameters=[absolute_directory_path]) + res = self._send_command(command).message.command + if not isinstance(res, GetFileNamesResponse): + message = f"unexpected response type: {res}" + raise DesktopAgentOsError(message) + if res.error is not None: + raise DesktopAgentOsError(res.error) + if res.response is None: + message = f"{type(res).__name__} is missing both error and response" + raise DesktopAgentOsError(message) + self._reporter.add_message( + "AgentOS", f"get_file_names({absolute_directory_path}) -> {res.response}" + ) + return res.response.fileNames + + def get_file(self, path: str) -> Image.Image | str: + """ + Get the contents of a file at the given path on the device under + automation. + + The controller returns the file as a Base64-encoded string, which is + decoded and returned as `PIL.Image.Image` when the bytes can be opened + as an image (PNG, JPEG, BMP, GIF, WebP, TIFF, ...), or as `str` when + they decode cleanly as UTF-8 text. + + Args: + path (str): The file path to read on the device under automation. + + Returns: + Image.Image | str: The decoded file contents. + + Raises: + DesktopAgentOsError: If the file cannot be read or the response is invalid. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + self._reporter.add_message("AgentOS", f"get_file({path})") + command = GetFileCommand(parameters=[path]) + res = self._send_command(command).message.command + if not isinstance(res, GetFileResponse): + message = f"unexpected response type: {res}" + raise DesktopAgentOsError(message) + if res.error is not None: + raise DesktopAgentOsError(res.error) + if res.response is None: + message = f"{type(res).__name__} is missing both error and response" + raise DesktopAgentOsError(message) + decoded = self._decode_file_payload(res.response.file.content) + if isinstance(decoded, Image.Image): + detail = f"image ({decoded.format}, {decoded.size[0]}x{decoded.size[1]})" + self._reporter.add_message( + "AgentOS", f"get_file({path}) -> {detail}", decoded + ) + return decoded + + detail = f"text ({len(decoded)} chars)" + self._reporter.add_message("AgentOS", f"get_file({path}) -> {detail}") + return decoded + + def remove_virtual_displays(self) -> None: + """ + Remove all virtual displays from the controller, leaving only real + displays active. + """ + assert isinstance(self._stub, controller_v1.ControllerAPIStub), ( + "Stub is not initialized" + ) + self._reporter.add_message("AgentOS", "remove_virtual_displays()") + command = RemoveVirtualDisplaysCommand() + self._send_command(command) + self._reporter.add_message("AgentOS", "remove_virtual_displays() -> done") + + logger.debug( + ( + "Setting display to 1 to ensure that the " + "controller is using the real display" + ) + ) + self.set_display(1) + + @staticmethod + def _decode_file_payload(base64_data: str) -> Image.Image | str: + try: + return base64_to_image(base64_data) + except ValueError: + pass + data = base64.b64decode(base64_data, validate=True) + if b"\x00" not in data: + try: + return data.decode("utf-8") + except UnicodeDecodeError: + pass + message = "File contents are neither a supported image nor UTF-8 text" + raise DesktopAgentOsError(message) diff --git a/src/askui/tools/askui/askui_controller_client_settings.py b/src/askui/tools/askui/askui_controller_client_settings.py index 28db94d7..6e53b747 100644 --- a/src/askui/tools/askui/askui_controller_client_settings.py +++ b/src/askui/tools/askui/askui_controller_client_settings.py @@ -22,5 +22,13 @@ class AskUiControllerClientSettings(BaseSettings): "Controller server. Defaults to True.", ) + clean_virtual_displays: bool = Field( + default=False, + description=( + "Whether to clean virtual displays after the controller is started." + "Default: False" + ), + ) + __all__ = ["AskUiControllerClientSettings"] diff --git a/src/askui/tools/askui/askui_ui_controller_grpc/generated/AgentOS_Send_Request_2501.py b/src/askui/tools/askui/askui_ui_controller_grpc/generated/AgentOS_Send_Request_2501.py index b281e20c..6e6c65e5 100644 --- a/src/askui/tools/askui/askui_ui_controller_grpc/generated/AgentOS_Send_Request_2501.py +++ b/src/askui/tools/askui/askui_ui_controller_grpc/generated/AgentOS_Send_Request_2501.py @@ -5,9 +5,10 @@ from __future__ import annotations from enum import Enum -from typing import Dict, List, Literal, Optional, Union +from typing import Any, Dict, List, Literal, Optional, Union -from pydantic import BaseModel, ConfigDict, Field, RootModel, confloat, conint, constr +from pydantic import (BaseModel, ConfigDict, Field, RootModel, confloat, + conint, constr) class ParameterEnum(Enum): @@ -393,6 +394,20 @@ class LoadCharacterMapCommand(BaseModel): None, max_length=1, min_length=1 ) +class GetFileNamesCommand(BaseModel): + name: Literal['GetFileNames'] = 'GetFileNames' + parameters: list[str] = Field(..., max_length=1, min_length=1) + + +class GetFileCommand(BaseModel): + name: Literal['GetFile'] = 'GetFile' + parameters: list[str] = Field(..., max_length=1, min_length=1) + + +class RemoveVirtualDisplaysCommand(BaseModel): + name: Literal['RemoveVirtualDisplays'] = 'RemoveVirtualDisplays' + parameters: List[Any] = [] + Command =Union[ GetSystemInfoCommand, GetMousePositionCommand, @@ -412,6 +427,9 @@ class LoadCharacterMapCommand(BaseModel): SetActiveProcessCommand, GetActiveWindowCommand, SetActiveWindowCommand, + GetFileNamesCommand, + GetFileCommand, + RemoveVirtualDisplaysCommand, ] class Message(BaseModel): diff --git a/src/askui/tools/askui/askui_ui_controller_grpc/generated/AgentOS_Send_Response_2501.py b/src/askui/tools/askui/askui_ui_controller_grpc/generated/AgentOS_Send_Response_2501.py index df795891..b21192b2 100644 --- a/src/askui/tools/askui/askui_ui_controller_grpc/generated/AgentOS_Send_Response_2501.py +++ b/src/askui/tools/askui/askui_ui_controller_grpc/generated/AgentOS_Send_Response_2501.py @@ -229,6 +229,26 @@ class GetActiveWindowResponseModel(BaseModel): window: Window +class FileModel(BaseModel): + model_config = ConfigDict( + extra='forbid', + ) + content: str + + +class GetFileNamesResponseModel(BaseModel): + model_config = ConfigDict( + extra='forbid', + ) + fileNames: list[str] + + +class GetFileResponseModel(BaseModel): + model_config = ConfigDict( + extra='forbid', + ) + file: FileModel + class Length(RootModel[Union[constr(pattern=r'^(\d+(\.\d+)?(px|%)|auto)$'), float]]): root: Union[constr(pattern=r'^(\d+(\.\d+)?(px|%)|auto)$'), float] @@ -356,6 +376,24 @@ class GetActiveWindowResponse(BaseModel): response: GetActiveWindowResponseModel +class GetFileNamesResponse(BaseModel): + name: Literal['GetFileNames'] + actionId: ActionId + error: str | None = None + response: GetFileNamesResponseModel | None = None + + +class GetFileResponse(BaseModel): + name: Literal['GetFile'] + actionId: ActionId + error: str | None = None + response: GetFileResponseModel | None = None + + +class RemoveVirtualDisplaysResponse(BaseModel): + name: Literal['RemoveVirtualDisplays'] + actionId: ActionId + class SetActiveWindowResponse(BaseModel): name: Literal['SetActiveWindow'] actionId: ActionId @@ -369,6 +407,8 @@ class SetActiveWindowResponse(BaseModel): AddRenderObjectResponseModel, GetActiveProcessResponseModel, GetActiveWindowResponseModel, + GetFileNamesResponseModel, + GetFileResponseModel, ] class Message(BaseModel): command: Union[ @@ -386,6 +426,9 @@ class Message(BaseModel): SetActiveProcessResponse, GetActiveWindowResponse, SetActiveWindowResponse, + GetFileNamesResponse, + GetFileResponse, + RemoveVirtualDisplaysResponse, ] diff --git a/src/askui/tools/askui/askui_ui_controller_grpc/generated/__init__.py b/src/askui/tools/askui/askui_ui_controller_grpc/generated/__init__.py index a78424f9..c60c43b5 100644 --- a/src/askui/tools/askui/askui_ui_controller_grpc/generated/__init__.py +++ b/src/askui/tools/askui/askui_ui_controller_grpc/generated/__init__.py @@ -1,3 +1,3 @@ # generated by datamodel-codegen: # filename: json_schema -# timestamp: 2025-12-12T10:41:23+00:00 +# timestamp: 2026-05-12T11:23:08+00:00 diff --git a/src/askui/tools/askui/askui_ui_controller_grpc/json_schema/AgentOS-Send-Request-2501.json b/src/askui/tools/askui/askui_ui_controller_grpc/json_schema/AgentOS-Send-Request-2501.json index 73ec03c2..42d108f3 100644 --- a/src/askui/tools/askui/askui_ui_controller_grpc/json_schema/AgentOS-Send-Request-2501.json +++ b/src/askui/tools/askui/askui_ui_controller_grpc/json_schema/AgentOS-Send-Request-2501.json @@ -42,7 +42,10 @@ "$ref": "#/definitions/length" } }, - "required": ["x", "y"] + "required": [ + "x", + "y" + ] }, "rotation": { "type": "integer", @@ -51,7 +54,12 @@ }, "renderObjectType": { "type": "string", - "enum": ["Quad", "Line", "Texture", "Image"] + "enum": [ + "Quad", + "Line", + "Texture", + "Image" + ] }, "renderObjectId": { "type": "integer", @@ -296,7 +304,9 @@ "$ref": "#/definitions/guid" } }, - "required": ["authentication"] + "required": [ + "authentication" + ] }, "command": { "type": "object", @@ -314,7 +324,12 @@ "type": "array", "items": { "type": "string", - "enum": ["platform", "label", "version", "architecture"] + "enum": [ + "platform", + "label", + "version", + "architecture" + ] }, "minItems": 1, "maxItems": 4, @@ -325,7 +340,10 @@ "maxItems": 1 } }, - "required": ["name", "parameters"], + "required": [ + "name", + "parameters" + ], "title": "GetSystemInfoCommand" }, { @@ -338,7 +356,9 @@ "type": "array" } }, - "required": ["name"], + "required": [ + "name" + ], "title": "GetMousePositionCommand" }, { @@ -358,7 +378,10 @@ "maxItems": 1 } }, - "required": ["name", "parameters"], + "required": [ + "name", + "parameters" + ], "title": "SetMousePositionCommand" }, { @@ -374,7 +397,9 @@ "items": [ { "type": "string", - "enum": ["Quad"] + "enum": [ + "Quad" + ] }, { "$ref": "#/definitions/renderObjectStyle" @@ -387,7 +412,9 @@ "items": [ { "type": "string", - "enum": ["Line"] + "enum": [ + "Line" + ] }, { "$ref": "#/definitions/renderObjectStyle" @@ -403,7 +430,9 @@ "items": [ { "type": "string", - "enum": ["Image"] + "enum": [ + "Image" + ] }, { "$ref": "#/definitions/renderObjectStyle" @@ -419,7 +448,9 @@ "items": [ { "type": "string", - "enum": ["Text"] + "enum": [ + "Text" + ] }, { "$ref": "#/definitions/renderObjectStyle" @@ -435,7 +466,9 @@ "items": [ { "type": "string", - "enum": ["Shape"] + "enum": [ + "Shape" + ] }, { "$ref": "#/definitions/renderObjectStyle" @@ -450,7 +483,10 @@ ] } }, - "required": ["name", "parameters"], + "required": [ + "name", + "parameters" + ], "title": "AddRenderObjectCommand" }, { @@ -508,7 +544,9 @@ "items": [ { "type": "string", - "enum": ["Image"] + "enum": [ + "Image" + ] }, { "$ref": "#/definitions/renderObjectStyle" @@ -524,7 +562,9 @@ "items": [ { "type": "string", - "enum": ["Text"] + "enum": [ + "Text" + ] }, { "$ref": "#/definitions/renderObjectStyle" @@ -539,7 +579,10 @@ ] } }, - "required": ["name", "parameters"], + "required": [ + "name", + "parameters" + ], "title": "UpdateRenderObjectCommand" }, { @@ -558,7 +601,10 @@ "type": "array", "items": { "type": "string", - "enum": ["width", "height"] + "enum": [ + "width", + "height" + ] }, "minItems": 1, "maxItems": 2, @@ -569,7 +615,10 @@ "maxItems": 2 } }, - "required": ["name", "parameters"], + "required": [ + "name", + "parameters" + ], "title": "GetRenderObjectInfoCommand" }, { @@ -589,7 +638,10 @@ "maxItems": 1 } }, - "required": ["name", "parameters"], + "required": [ + "name", + "parameters" + ], "title": "DeleteRenderObjectCommand" }, { @@ -602,7 +654,9 @@ "type": "array" } }, - "required": ["name"], + "required": [ + "name" + ], "title": "ClearRenderObjectsCommand" }, { @@ -625,14 +679,20 @@ }, { "type": "string", - "enum": ["BreakWord", "BreakAll"] + "enum": [ + "BreakWord", + "BreakAll" + ] } ], "minItems": 4, "maxItems": 4 } }, - "required": ["name", "parameters"], + "required": [ + "name", + "parameters" + ], "title": "GetRenderTextWordBreakIndexCommand" }, { @@ -650,7 +710,9 @@ "maxItems": 1 } }, - "required": ["name"], + "required": [ + "name" + ], "title": "WaitForKeyPressCommand" }, { @@ -673,7 +735,9 @@ "maxItems": 1 } }, - "required": ["name"], + "required": [ + "name" + ], "title": "LoadKeyMapCommand" }, { @@ -700,7 +764,9 @@ "maxItems": 1 } }, - "required": ["name"], + "required": [ + "name" + ], "title": "LoadCharacterMapCommand" }, { @@ -713,7 +779,9 @@ "type": "array" } }, - "required": ["name"], + "required": [ + "name" + ], "title": "ClearKeyMapCommand" }, { @@ -726,7 +794,9 @@ "type": "array" } }, - "required": ["name"], + "required": [ + "name" + ], "title": "ClearCharacterMapCommand" }, { @@ -739,7 +809,9 @@ "type": "array" } }, - "required": ["name"], + "required": [ + "name" + ], "title": "GetActiveProcessCommand" }, { @@ -759,7 +831,9 @@ "maxItems": 1 } }, - "required": ["name"], + "required": [ + "name" + ], "title": "SetActiveProcessCommand" }, { @@ -772,7 +846,9 @@ "type": "array" } }, - "required": ["name"], + "required": [ + "name" + ], "title": "GetActiveWindowCommand" }, { @@ -792,14 +868,76 @@ "maxItems": 2 } }, - "required": ["name"], + "required": [ + "name" + ], "title": "SetActiveWindowCommand" + }, + { + "properties": { + "name": { + "type": "string", + "const": "GetFileNames" + }, + "parameters": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "maxItems": 1 + } + }, + "required": [ + "name" + ], + "title": "GetFileNamesCommand" + }, + { + "properties": { + "name": { + "type": "string", + "const": "GetFile" + }, + "parameters": { + "type": "array", + "items": { + "type": "string" + }, + "minItems": 1, + "maxItems": 1 + } + }, + "required": [ + "name" + ], + "title": "GetFileCommand" + }, + { + "properties": { + "name": { + "type": "string", + "const": "RemoveVirtualDisplays" + }, + "parameters": { + "type": "array" + } + }, + "required": [ + "name" + ], + "title": "RemoveVirtualDisplaysCommand" } ] } }, - "required": ["header", "command"] + "required": [ + "header", + "command" + ] } }, - "required": ["message"] -} + "required": [ + "message" + ] +} \ No newline at end of file diff --git a/src/askui/tools/askui/askui_ui_controller_grpc/json_schema/AgentOS-Send-Response-2501.json b/src/askui/tools/askui/askui_ui_controller_grpc/json_schema/AgentOS-Send-Response-2501.json index 4c057bec..f57276fa 100644 --- a/src/askui/tools/askui/askui_ui_controller_grpc/json_schema/AgentOS-Send-Response-2501.json +++ b/src/askui/tools/askui/askui_ui_controller_grpc/json_schema/AgentOS-Send-Response-2501.json @@ -27,7 +27,10 @@ "$ref": "#/definitions/length" } }, - "required": ["x", "y"] + "required": [ + "x", + "y" + ] }, "renderObjectId": { "type": "integer", @@ -44,7 +47,7 @@ "message": { "type": "object", "properties": { - "command": { + "Response": { "type": "object", "oneOf": [ { @@ -75,7 +78,11 @@ } } }, - "required": ["name", "actionId", "reponse"], + "required": [ + "name", + "actionId", + "reponse" + ], "title": "GetSystemInfoResponse" }, { @@ -95,10 +102,16 @@ "$ref": "#/definitions/location2" } }, - "required": ["position"] + "required": [ + "position" + ] } }, - "required": ["name", "actionId", "response"], + "required": [ + "name", + "actionId", + "response" + ], "title": "GetMousePositionResponse" }, { @@ -111,7 +124,10 @@ "$ref": "#/definitions/actionId" } }, - "required": ["name", "actionId"], + "required": [ + "name", + "actionId" + ], "title": "SetMousePositionResponse" }, { @@ -131,10 +147,16 @@ "$ref": "#/definitions/renderObjectId" } }, - "required": ["id"] + "required": [ + "id" + ] } }, - "required": ["name", "actionId", "response"], + "required": [ + "name", + "actionId", + "response" + ], "title": "AddRenderObjectResponse" }, { @@ -147,7 +169,10 @@ "$ref": "#/definitions/actionId" } }, - "required": ["name", "actionId"], + "required": [ + "name", + "actionId" + ], "title": "UpdateRenderObjectResponse" }, { @@ -172,7 +197,11 @@ } } }, - "required": ["name", "actionId", "reponse"], + "required": [ + "name", + "actionId", + "reponse" + ], "title": "GetRenderObjectInfoResponse" }, { @@ -185,7 +214,10 @@ "$ref": "#/definitions/actionId" } }, - "required": ["name", "actionId"], + "required": [ + "name", + "actionId" + ], "title": "DeleteRenderObjectResponse" }, { @@ -198,7 +230,10 @@ "$ref": "#/definitions/actionId" } }, - "required": ["name", "actionId"], + "required": [ + "name", + "actionId" + ], "title": "ClearRenderObjectsResponse" }, { @@ -218,10 +253,16 @@ "type": "integer" } }, - "required": ["index"] + "required": [ + "index" + ] } }, - "required": ["name", "actionId", "response"], + "required": [ + "name", + "actionId", + "response" + ], "title": "GetRenderTextWordBreakIndexResponse" }, { @@ -397,7 +438,11 @@ } } }, - "required": ["name", "actionId", "reponse"], + "required": [ + "name", + "actionId", + "reponse" + ], "title": "WaitForKeyPressResponse" }, { @@ -410,7 +455,10 @@ "$ref": "#/definitions/actionId" } }, - "required": ["name", "actionId"], + "required": [ + "name", + "actionId" + ], "title": "LoadKeyMapResponse" }, { @@ -423,7 +471,10 @@ "$ref": "#/definitions/actionId" } }, - "required": ["name", "actionId"], + "required": [ + "name", + "actionId" + ], "title": "LoadCharacterMapResponse" }, { @@ -436,7 +487,10 @@ "$ref": "#/definitions/actionId" } }, - "required": ["name", "actionId"], + "required": [ + "name", + "actionId" + ], "title": "ClearKeyMapResponse" }, { @@ -449,7 +503,10 @@ "$ref": "#/definitions/actionId" } }, - "required": ["name", "actionId"], + "required": [ + "name", + "actionId" + ], "title": "ClearCharacterMapResponse" }, { @@ -478,13 +535,22 @@ "type": "string" } }, - "required": ["id", "name"] + "required": [ + "id", + "name" + ] } }, - "required": ["process"] + "required": [ + "process" + ] } }, - "required": ["name", "actionId", "response"], + "required": [ + "name", + "actionId", + "response" + ], "title": "GetActiveProcessResponse" }, { @@ -497,7 +563,10 @@ "$ref": "#/definitions/actionId" } }, - "required": ["name", "actionId"], + "required": [ + "name", + "actionId" + ], "title": "SetActiveProcessResponse" }, { @@ -534,13 +603,24 @@ "type": "string" } }, - "required": ["id", "name", "processId", "processName"] + "required": [ + "id", + "name", + "processId", + "processName" + ] } }, - "required": ["window"] + "required": [ + "window" + ] } }, - "required": ["name", "actionId", "response"], + "required": [ + "name", + "actionId", + "response" + ], "title": "GetActiveWindowResponse" }, { @@ -553,14 +633,135 @@ "$ref": "#/definitions/actionId" } }, - "required": ["name", "actionId"], + "required": [ + "name", + "actionId" + ], "title": "SetActiveWindowResponse" + }, + { + "properties": { + "name": { + "type": "string", + "const": "GetFileNames" + }, + "actionId": { + "$ref": "#/definitions/actionId" + }, + "error": { + "type": "string" + }, + "response": { + "type": "object", + "additionalProperties": false, + "properties": { + "fileNames": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "fileNames" + ] + } + }, + "required": [ + "name", + "actionId" + ], + "oneOf": [ + { + "required": [ + "error" + ] + }, + { + "required": [ + "response" + ] + } + ], + "title": "GetFileNamesResponse" + }, + { + "properties": { + "name": { + "type": "string", + "const": "GetFile" + }, + "actionId": { + "$ref": "#/definitions/actionId" + }, + "error": { + "type": "string" + }, + "response": { + "type": "object", + "additionalProperties": false, + "properties": { + "file": { + "type": "object", + "additionalProperties": false, + "properties": { + "content": { + "type": "string" + } + }, + "required": [ + "content" + ] + } + }, + "required": [ + "file" + ] + } + }, + "required": [ + "name", + "actionId" + ], + "oneOf": [ + { + "required": [ + "error" + ] + }, + { + "required": [ + "response" + ] + } + ], + "title": "GetFileResponse" + }, + { + "properties": { + "name": { + "type": "string", + "const": "RemoveVirtualDisplays" + }, + "actionId": { + "$ref": "#/definitions/actionId" + } + }, + "required": [ + "name", + "actionId" + ], + "title": "RemoveVirtualDisplaysResponse" } ] } }, - "required": ["command"] + "required": [ + "command" + ] } }, - "required": ["message"] + "required": [ + "message" + ] } diff --git a/src/askui/tools/computer_agent_os_facade.py b/src/askui/tools/computer_agent_os_facade.py index 3be1481e..28a1a8c5 100644 --- a/src/askui/tools/computer_agent_os_facade.py +++ b/src/askui/tools/computer_agent_os_facade.py @@ -266,6 +266,40 @@ def set_window_in_focus(self, process_id: int, window_id: int) -> None: """ self._agent_os.set_window_in_focus(process_id, window_id) + def get_file_names(self, absolute_directory_path: str) -> list[str]: + """ + List file names in an absolute directory on the automation target. + + Args: + absolute_directory_path (str): Absolute directory path on the target system. + + Returns: + list[str]: Names of files in that directory. + """ + return self._agent_os.get_file_names(absolute_directory_path) + + def get_file(self, path: str) -> Image.Image | str: + """ + Read a file from the automation target. + + Args: + path (str): File path on the target system. + + Returns: + Image.Image | str: Decoded file contents. + """ + response = self._agent_os.get_file(path) + if isinstance(response, Image.Image): + return scale_image_to_fit(response, self._target_resolution) + return response + + def remove_virtual_displays(self) -> None: + """ + Remove virtual displays from the controller, leaving real displays only. + """ + self._agent_os.remove_virtual_displays() + self._real_screen_resolution = None + def _scale_coordinates_back( self, x: int, diff --git a/src/askui/tools/store/__init__.py b/src/askui/tools/store/__init__.py index ae053925..2a05056d 100644 --- a/src/askui/tools/store/__init__.py +++ b/src/askui/tools/store/__init__.py @@ -7,11 +7,11 @@ Example: ```python - from askui import VisionAgent + from askui import ComputerAgent from askui.tools.store.computer import ComputerSaveScreenshotTool from askui.tools.store.universal import PrintToConsoleTool - with VisionAgent() as agent: + with ComputerAgent() as agent: agent.act( "Save the current screen as demo/demo.png and keep me updated.", tools=[ComputerSaveScreenshotTool(base_dir="/path/to/screenshots"), diff --git a/src/askui/tools/store/computer/experimental/__init__.py b/src/askui/tools/store/computer/experimental/__init__.py index e87f2eed..43414e4b 100644 --- a/src/askui/tools/store/computer/experimental/__init__.py +++ b/src/askui/tools/store/computer/experimental/__init__.py @@ -1,3 +1,6 @@ +from .get_file import ComputerGetFileTool +from .get_file_names import ComputerGetFileNamesTool +from .remove_virtual_displays import ComputerRemoveVirtualDisplaysTool from .window_management import ( ComputerAddWindowAsVirtualDisplayTool, ComputerListProcessTool, @@ -7,6 +10,9 @@ ) __all__ = [ + "ComputerGetFileNamesTool", + "ComputerGetFileTool", + "ComputerRemoveVirtualDisplaysTool", "ComputerListProcessTool", "ComputerListProcessWindowsTool", "ComputerAddWindowAsVirtualDisplayTool", diff --git a/src/askui/tools/store/computer/experimental/get_file.py b/src/askui/tools/store/computer/experimental/get_file.py new file mode 100644 index 00000000..407c1515 --- /dev/null +++ b/src/askui/tools/store/computer/experimental/get_file.py @@ -0,0 +1,54 @@ +from PIL import Image + +from askui.models.shared import ComputerBaseTool, ToolTags +from askui.tools.agent_os import AgentOs + + +class ComputerGetFileTool(ComputerBaseTool): + """ + Reads a file at an absolute path on the computer under automation. + + Example: + ```python + from askui import ComputerAgent + from askui.tools.store.computer.experimental import ComputerGetFileTool + + with ComputerAgent(act_tools=[ComputerGetFileTool()]) as agent: + agent.act("Read /home/user/notes.txt and summarize the contents") + + with ComputerAgent() as agent: + agent.act( + "Read /home/user/notes.txt and summarize the contents", + tools=[ComputerGetFileTool()], + ) + ``` + """ + + def __init__(self, agent_os: AgentOs | None = None) -> None: + super().__init__( + name="get_file_tool", + description=( + "Reads a file at an absolute path on the computer under automation. " + "Returns UTF-8 text as a string, or a decoded image for " + "supported image formats. Unsupported binary types are rejected." + ), + input_schema={ + "type": "object", + "properties": { + "absolute_file_path": { + "type": "string", + "description": ( + "Absolute path to the file on the target machine (for " + "example 'C:\\\\path\\\\notes.txt' on Windows or " + "'/home/user/notes.txt' on Linux/macOS)." + ), + }, + }, + "required": ["absolute_file_path"], + }, + agent_os=agent_os, + required_tags=[ToolTags.SCALED_AGENT_OS.value], + ) + + def __call__(self, absolute_file_path: str) -> Image.Image | str: + return self.agent_os.get_file(absolute_file_path) diff --git a/src/askui/tools/store/computer/experimental/get_file_names.py b/src/askui/tools/store/computer/experimental/get_file_names.py new file mode 100644 index 00000000..5002b0eb --- /dev/null +++ b/src/askui/tools/store/computer/experimental/get_file_names.py @@ -0,0 +1,58 @@ +from askui.models.shared import ComputerBaseTool +from askui.tools.agent_os import AgentOs + + +class ComputerGetFileNamesTool(ComputerBaseTool): + """ + Lists regular files (not subdirectories) in a directory on the computer under + automation. + + Example: + ```python + from askui import ComputerAgent + from askui.tools.store.computer.experimental import ComputerGetFileNamesTool + + with ComputerAgent(act_tools=[ComputerGetFileNamesTool()]) as agent: + agent.act("List the regular files in /home/user/Documents") + + with ComputerAgent() as agent: + agent.act( + "List the regular files in /home/user/Documents", + tools=[ComputerGetFileNamesTool()], + ) + ``` + """ + + def __init__(self, agent_os: AgentOs | None = None) -> None: + super().__init__( + name="get_file_names_tool", + description=( + "Lists the names of regular files in an absolute directory on the " + "computer under automation. Subdirectories are not included—only " + "files are returned. Use absolute paths as on the target machine. " + "Returns names only; use get_file_tool to read a file's contents." + ), + input_schema={ + "type": "object", + "properties": { + "absolute_directory_path": { + "type": "string", + "description": ( + "Absolute path of the directory to scan (for example " + "'C:\\\\Users\\\\Public' on Windows or '/home/user/Public' " + "on Linux/macOS)." + ), + }, + }, + "required": ["absolute_directory_path"], + }, + agent_os=agent_os, + ) + self.is_cacheable = True + + def __call__(self, absolute_directory_path: str) -> str: + names = self.agent_os.get_file_names(absolute_directory_path) + file_names = ",".join(f"'{n}'" for n in names) + return ( + f"Files in '{absolute_directory_path}' ({len(names)} files): {file_names} " + ) diff --git a/src/askui/tools/store/computer/experimental/remove_virtual_displays.py b/src/askui/tools/store/computer/experimental/remove_virtual_displays.py new file mode 100644 index 00000000..25772c66 --- /dev/null +++ b/src/askui/tools/store/computer/experimental/remove_virtual_displays.py @@ -0,0 +1,47 @@ +from askui.models.shared import ComputerBaseTool +from askui.tools.agent_os import AgentOs + + +class ComputerRemoveVirtualDisplaysTool(ComputerBaseTool): + """ + Removes virtual displays so only physical displays remain active. + + Example: + ```python + from askui import ComputerAgent + from askui.tools.store.computer.experimental import ( + ComputerRemoveVirtualDisplaysTool, + ) + + with ComputerAgent(act_tools=[ComputerRemoveVirtualDisplaysTool()]) as agent: + agent.act("Remove virtual displays so only physical screens are active") + + with ComputerAgent() as agent: + agent.act( + "Remove virtual displays so only physical screens are active", + tools=[ComputerRemoveVirtualDisplaysTool()], + ) + ``` + """ + + def __init__(self, agent_os: AgentOs | None = None) -> None: + super().__init__( + name="remove_virtual_displays_tool", + description=( + "Removes all virtual displays from the current display " + "configuration, keeping only physical screens. Use after workflows " + "that attach windows as virtual displays (for example " + "add_window_as_virtual_display_tool) to restore a normal setup before " + "continuing automation." + ), + input_schema={ + "type": "object", + "properties": {}, + "required": [], + }, + agent_os=agent_os, + ) + + def __call__(self) -> str: + self.agent_os.remove_virtual_displays() + return "Removed virtual displays; only physical displays remain active."