From b838a831f6eee2ad2975ce96e61489734cc617bb Mon Sep 17 00:00:00 2001 From: Samir mlika <105347215+mlikasam-askui@users.noreply.github.com> Date: Mon, 23 Mar 2026 12:01:08 +0100 Subject: [PATCH 1/2] feat(android): add UIAutomator hierarchy dump, parsing, and agent tool Add UIElement and UIElementCollection to parse UIAutomator window-dump XML from normalized shell output (bounds, text, resource-id, content-desc, clickable, etc.). Expose get_ui_elements() on Android AgentOs and implement it in the facade and PpAdb path so callers get a flattened hierarchy string. Register AndroidGetUIAutomatorHierarchyTool in the Android tool store for act flows that need structure instead of screenshots. Refresh pdm.lock for the otel dependency group and OpenTelemetry-related package updates. --- pdm.lock | 219 +++++++++++++++--- src/askui/tools/android/agent_os.py | 9 + src/askui/tools/android/agent_os_facade.py | 13 ++ src/askui/tools/android/ppadb_agent_os.py | 32 +++ .../tools/android/uiautomator_hierarchy.py | 158 +++++++++++++ src/askui/tools/store/android/__init__.py | 4 + .../android/get_uiautomator_hierarchy_tool.py | 74 ++++++ 7 files changed, 481 insertions(+), 28 deletions(-) create mode 100644 src/askui/tools/android/uiautomator_hierarchy.py create mode 100644 src/askui/tools/store/android/get_uiautomator_hierarchy_tool.py diff --git a/pdm.lock b/pdm.lock index 728086f1..9e4212dd 100644 --- a/pdm.lock +++ b/pdm.lock @@ -2,10 +2,10 @@ # It is not intended for manual editing. [metadata] -groups = ["default", "all", "android", "bedrock", "dev", "vertex", "web"] +groups = ["default", "all", "android", "bedrock", "dev", "otel", "vertex", "web"] strategy = ["inherit_metadata"] lock_version = "4.5.0" -content_hash = "sha256:52d39f70b344148d3dd0461d075a560f85a0563971bc9ea4edfa86c8037fb578" +content_hash = "sha256:d3f3e7f6150492644409b7b37ce08b24ceb242bce8003159b690a3d0135ff4c2" [[metadata.targets]] requires_python = ">=3.10,<3.14" @@ -372,7 +372,7 @@ name = "certifi" version = "2025.8.3" requires_python = ">=3.7" summary = "Python package for providing Mozilla's CA Bundle." -groups = ["default", "all", "bedrock", "vertex"] +groups = ["default", "all", "bedrock", "otel", "vertex"] files = [ {file = "certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5"}, {file = "certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407"}, @@ -479,7 +479,7 @@ name = "charset-normalizer" version = "3.4.3" requires_python = ">=3.7" summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -groups = ["default", "all", "vertex"] +groups = ["default", "all", "otel", "vertex"] files = [ {file = "charset_normalizer-3.4.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fb7f67a1bfa6e40b438170ebdc8158b78dc465a5a67b6dde178a46987b244a72"}, {file = "charset_normalizer-3.4.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc9370a2da1ac13f0153780040f465839e6cccb4a1e44810124b4e22483c93fe"}, @@ -1432,7 +1432,7 @@ name = "googleapis-common-protos" version = "1.71.0" requires_python = ">=3.7" summary = "Common protobufs used in Google APIs" -groups = ["all", "vertex"] +groups = ["all", "otel", "vertex"] dependencies = [ "protobuf!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<7.0.0,>=3.20.2", ] @@ -1808,7 +1808,7 @@ name = "idna" version = "3.10" requires_python = ">=3.6" summary = "Internationalized Domain Names in Applications (IDNA)" -groups = ["default", "all", "bedrock", "dev", "vertex"] +groups = ["default", "all", "bedrock", "dev", "otel", "vertex"] files = [ {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, @@ -1835,7 +1835,7 @@ name = "importlib-metadata" version = "8.7.1" requires_python = ">=3.9" summary = "Read metadata from Python packages" -groups = ["default"] +groups = ["default", "all", "otel"] dependencies = [ "zipp>=3.20", ] @@ -2788,48 +2788,160 @@ files = [ [[package]] name = "opentelemetry-api" -version = "1.39.1" +version = "1.40.0" requires_python = ">=3.9" summary = "OpenTelemetry Python API" -groups = ["default"] +groups = ["default", "all", "otel"] dependencies = [ "importlib-metadata<8.8.0,>=6.0", "typing-extensions>=4.5.0", ] files = [ - {file = "opentelemetry_api-1.39.1-py3-none-any.whl", hash = "sha256:2edd8463432a7f8443edce90972169b195e7d6a05500cd29e6d13898187c9950"}, - {file = "opentelemetry_api-1.39.1.tar.gz", hash = "sha256:fbde8c80e1b937a2c61f20347e91c0c18a1940cecf012d62e65a7caf08967c9c"}, + {file = "opentelemetry_api-1.40.0-py3-none-any.whl", hash = "sha256:82dd69331ae74b06f6a874704be0cfaa49a1650e1537d4a813b86ecef7d0ecf9"}, + {file = "opentelemetry_api-1.40.0.tar.gz", hash = "sha256:159be641c0b04d11e9ecd576906462773eb97ae1b657730f0ecf64d32071569f"}, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-common" +version = "1.40.0" +requires_python = ">=3.9" +summary = "OpenTelemetry Protobuf encoding" +groups = ["all", "otel"] +dependencies = [ + "opentelemetry-proto==1.40.0", +] +files = [ + {file = "opentelemetry_exporter_otlp_proto_common-1.40.0-py3-none-any.whl", hash = "sha256:7081ff453835a82417bf38dccf122c827c3cbc94f2079b03bba02a3165f25149"}, + {file = "opentelemetry_exporter_otlp_proto_common-1.40.0.tar.gz", hash = "sha256:1cbee86a4064790b362a86601ee7934f368b81cd4cc2f2e163902a6e7818a0fa"}, +] + +[[package]] +name = "opentelemetry-exporter-otlp-proto-http" +version = "1.40.0" +requires_python = ">=3.9" +summary = "OpenTelemetry Collector Protobuf over HTTP Exporter" +groups = ["all", "otel"] +dependencies = [ + "googleapis-common-protos~=1.52", + "opentelemetry-api~=1.15", + "opentelemetry-exporter-otlp-proto-common==1.40.0", + "opentelemetry-proto==1.40.0", + "opentelemetry-sdk~=1.40.0", + "requests~=2.7", + "typing-extensions>=4.5.0", +] +files = [ + {file = "opentelemetry_exporter_otlp_proto_http-1.40.0-py3-none-any.whl", hash = "sha256:a8d1dab28f504c5d96577d6509f80a8150e44e8f45f82cdbe0e34c99ab040069"}, + {file = "opentelemetry_exporter_otlp_proto_http-1.40.0.tar.gz", hash = "sha256:db48f5e0f33217588bbc00274a31517ba830da576e59503507c839b38fa0869c"}, +] + +[[package]] +name = "opentelemetry-instrumentation" +version = "0.61b0" +requires_python = ">=3.9" +summary = "Instrumentation Tools & Auto Instrumentation for OpenTelemetry Python" +groups = ["all", "otel"] +dependencies = [ + "opentelemetry-api~=1.4", + "opentelemetry-semantic-conventions==0.61b0", + "packaging>=18.0", + "wrapt<2.0.0,>=1.0.0", +] +files = [ + {file = "opentelemetry_instrumentation-0.61b0-py3-none-any.whl", hash = "sha256:92a93a280e69788e8f88391247cc530fd81f16f2b011979d4d6398f805cfbc63"}, + {file = "opentelemetry_instrumentation-0.61b0.tar.gz", hash = "sha256:cb21b48db738c9de196eba6b805b4ff9de3b7f187e4bbf9a466fa170514f1fc7"}, +] + +[[package]] +name = "opentelemetry-instrumentation-httpx" +version = "0.61b0" +requires_python = ">=3.9" +summary = "OpenTelemetry HTTPX Instrumentation" +groups = ["all", "otel"] +dependencies = [ + "opentelemetry-api~=1.12", + "opentelemetry-instrumentation==0.61b0", + "opentelemetry-semantic-conventions==0.61b0", + "opentelemetry-util-http==0.61b0", + "wrapt<2.0.0,>=1.0.0", +] +files = [ + {file = "opentelemetry_instrumentation_httpx-0.61b0-py3-none-any.whl", hash = "sha256:dee05c93a6593a5dc3ae5d9d5c01df8b4e2c5d02e49275e5558534ee46343d5e"}, + {file = "opentelemetry_instrumentation_httpx-0.61b0.tar.gz", hash = "sha256:6569ec097946c5551c2a4252f74c98666addd1bf047c1dde6b4ef426719ff8dd"}, +] + +[[package]] +name = "opentelemetry-instrumentation-sqlalchemy" +version = "0.61b0" +requires_python = ">=3.9" +summary = "OpenTelemetry SQLAlchemy instrumentation" +groups = ["all", "otel"] +dependencies = [ + "opentelemetry-api~=1.12", + "opentelemetry-instrumentation==0.61b0", + "opentelemetry-semantic-conventions==0.61b0", + "packaging>=21.0", + "wrapt>=1.11.2", +] +files = [ + {file = "opentelemetry_instrumentation_sqlalchemy-0.61b0-py3-none-any.whl", hash = "sha256:f115e0be54116ba4c327b8d7b68db4045ee18d44439d888ab8130a549c50d1c1"}, + {file = "opentelemetry_instrumentation_sqlalchemy-0.61b0.tar.gz", hash = "sha256:13a3a159a2043a52f0180b3757fbaa26741b0e08abb50deddce4394c118956e6"}, +] + +[[package]] +name = "opentelemetry-proto" +version = "1.40.0" +requires_python = ">=3.9" +summary = "OpenTelemetry Python Proto" +groups = ["all", "otel"] +dependencies = [ + "protobuf<7.0,>=5.0", +] +files = [ + {file = "opentelemetry_proto-1.40.0-py3-none-any.whl", hash = "sha256:266c4385d88923a23d63e353e9761af0f47a6ed0d486979777fe4de59dc9b25f"}, + {file = "opentelemetry_proto-1.40.0.tar.gz", hash = "sha256:03f639ca129ba513f5819810f5b1f42bcb371391405d99c168fe6937c62febcd"}, ] [[package]] name = "opentelemetry-sdk" -version = "1.39.1" +version = "1.40.0" requires_python = ">=3.9" summary = "OpenTelemetry Python SDK" -groups = ["default"] +groups = ["default", "all", "otel"] dependencies = [ - "opentelemetry-api==1.39.1", - "opentelemetry-semantic-conventions==0.60b1", + "opentelemetry-api==1.40.0", + "opentelemetry-semantic-conventions==0.61b0", "typing-extensions>=4.5.0", ] files = [ - {file = "opentelemetry_sdk-1.39.1-py3-none-any.whl", hash = "sha256:4d5482c478513ecb0a5d938dcc61394e647066e0cc2676bee9f3af3f3f45f01c"}, - {file = "opentelemetry_sdk-1.39.1.tar.gz", hash = "sha256:cf4d4563caf7bff906c9f7967e2be22d0d6b349b908be0d90fb21c8e9c995cc6"}, + {file = "opentelemetry_sdk-1.40.0-py3-none-any.whl", hash = "sha256:787d2154a71f4b3d81f20524a8ce061b7db667d24e46753f32a7bc48f1c1f3f1"}, + {file = "opentelemetry_sdk-1.40.0.tar.gz", hash = "sha256:18e9f5ec20d859d268c7cb3c5198c8d105d073714db3de50b593b8c1345a48f2"}, ] [[package]] name = "opentelemetry-semantic-conventions" -version = "0.60b1" +version = "0.61b0" requires_python = ">=3.9" summary = "OpenTelemetry Semantic Conventions" -groups = ["default"] +groups = ["default", "all", "otel"] dependencies = [ - "opentelemetry-api==1.39.1", + "opentelemetry-api==1.40.0", "typing-extensions>=4.5.0", ] files = [ - {file = "opentelemetry_semantic_conventions-0.60b1-py3-none-any.whl", hash = "sha256:9fa8c8b0c110da289809292b0591220d3a7b53c1526a23021e977d68597893fb"}, - {file = "opentelemetry_semantic_conventions-0.60b1.tar.gz", hash = "sha256:87c228b5a0669b748c76d76df6c364c369c28f1c465e50f661e39737e84bc953"}, + {file = "opentelemetry_semantic_conventions-0.61b0-py3-none-any.whl", hash = "sha256:fa530a96be229795f8cef353739b618148b0fe2b4b3f005e60e262926c4d38e2"}, + {file = "opentelemetry_semantic_conventions-0.61b0.tar.gz", hash = "sha256:072f65473c5d7c6dc0355b27d6c9d1a679d63b6d4b4b16a9773062cb7e31192a"}, +] + +[[package]] +name = "opentelemetry-util-http" +version = "0.61b0" +requires_python = ">=3.9" +summary = "Web util for OpenTelemetry" +groups = ["all", "otel"] +files = [ + {file = "opentelemetry_util_http-0.61b0-py3-none-any.whl", hash = "sha256:8e715e848233e9527ea47e275659ea60a57a75edf5206a3b937e236a6da5fc33"}, + {file = "opentelemetry_util_http-0.61b0.tar.gz", hash = "sha256:1039cb891334ad2731affdf034d8fb8b48c239af9b6dd295e5fabd07f1c95572"}, ] [[package]] @@ -2848,7 +2960,7 @@ name = "packaging" version = "25.0" requires_python = ">=3.8" summary = "Core utilities for Python packages" -groups = ["default", "all", "dev", "vertex"] +groups = ["default", "all", "dev", "otel", "vertex"] files = [ {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, @@ -3119,7 +3231,7 @@ name = "protobuf" version = "6.32.1" requires_python = ">=3.9" summary = "" -groups = ["default", "all", "dev", "vertex"] +groups = ["default", "all", "dev", "otel", "vertex"] files = [ {file = "protobuf-6.32.1-cp310-abi3-win32.whl", hash = "sha256:a8a32a84bc9f2aad712041b8b366190f71dde248926da517bde9e832e4412085"}, {file = "protobuf-6.32.1-cp310-abi3-win_amd64.whl", hash = "sha256:b00a7d8c25fa471f16bc8153d0e53d6c9e827f0953f3c09aaa4331c718cae5e1"}, @@ -3703,7 +3815,7 @@ name = "requests" version = "2.32.5" requires_python = ">=3.9" summary = "Python HTTP for Humans." -groups = ["default", "all", "vertex"] +groups = ["default", "all", "otel", "vertex"] dependencies = [ "certifi>=2017.4.17", "charset-normalizer<4,>=2", @@ -4482,7 +4594,7 @@ name = "typing-extensions" version = "4.15.0" requires_python = ">=3.9" summary = "Backported and Experimental Type Hints for Python 3.9+" -groups = ["default", "all", "bedrock", "dev", "vertex", "web"] +groups = ["default", "all", "bedrock", "dev", "otel", "vertex", "web"] files = [ {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, @@ -4543,7 +4655,7 @@ name = "urllib3" version = "2.5.0" requires_python = ">=3.9" summary = "HTTP library with thread-safe connection pooling, file post, and more." -groups = ["default", "all", "bedrock", "dev", "vertex"] +groups = ["default", "all", "bedrock", "dev", "otel", "vertex"] files = [ {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"}, {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"}, @@ -4664,6 +4776,57 @@ files = [ {file = "winregistry-2.1.1.tar.gz", hash = "sha256:8233c4261a9d937cd8f0670da0d1e61fd7b86712c39b1af08cb83e91316195a7"}, ] +[[package]] +name = "wrapt" +version = "1.17.3" +requires_python = ">=3.8" +summary = "Module for decorators, wrappers and monkey patching." +groups = ["all", "otel"] +files = [ + {file = "wrapt-1.17.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04"}, + {file = "wrapt-1.17.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b13af258d6a9ad602d57d889f83b9d5543acd471eee12eb51f5b01f8eb1bc2"}, + {file = "wrapt-1.17.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd341868a4b6714a5962c1af0bd44f7c404ef78720c7de4892901e540417111c"}, + {file = "wrapt-1.17.3-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f9b2601381be482f70e5d1051a5965c25fb3625455a2bf520b5a077b22afb775"}, + {file = "wrapt-1.17.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:343e44b2a8e60e06a7e0d29c1671a0d9951f59174f3709962b5143f60a2a98bd"}, + {file = "wrapt-1.17.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:33486899acd2d7d3066156b03465b949da3fd41a5da6e394ec49d271baefcf05"}, + {file = "wrapt-1.17.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e6f40a8aa5a92f150bdb3e1c44b7e98fb7113955b2e5394122fa5532fec4b418"}, + {file = "wrapt-1.17.3-cp310-cp310-win32.whl", hash = "sha256:a36692b8491d30a8c75f1dfee65bef119d6f39ea84ee04d9f9311f83c5ad9390"}, + {file = "wrapt-1.17.3-cp310-cp310-win_amd64.whl", hash = "sha256:afd964fd43b10c12213574db492cb8f73b2f0826c8df07a68288f8f19af2ebe6"}, + {file = "wrapt-1.17.3-cp310-cp310-win_arm64.whl", hash = "sha256:af338aa93554be859173c39c85243970dc6a289fa907402289eeae7543e1ae18"}, + {file = "wrapt-1.17.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7"}, + {file = "wrapt-1.17.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85"}, + {file = "wrapt-1.17.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f"}, + {file = "wrapt-1.17.3-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b32888aad8b6e68f83a8fdccbf3165f5469702a7544472bdf41f582970ed3311"}, + {file = "wrapt-1.17.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8cccf4f81371f257440c88faed6b74f1053eef90807b77e31ca057b2db74edb1"}, + {file = "wrapt-1.17.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8a210b158a34164de8bb68b0e7780041a903d7b00c87e906fb69928bf7890d5"}, + {file = "wrapt-1.17.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:79573c24a46ce11aab457b472efd8d125e5a51da2d1d24387666cd85f54c05b2"}, + {file = "wrapt-1.17.3-cp311-cp311-win32.whl", hash = "sha256:c31eebe420a9a5d2887b13000b043ff6ca27c452a9a22fa71f35f118e8d4bf89"}, + {file = "wrapt-1.17.3-cp311-cp311-win_amd64.whl", hash = "sha256:0b1831115c97f0663cb77aa27d381237e73ad4f721391a9bfb2fe8bc25fa6e77"}, + {file = "wrapt-1.17.3-cp311-cp311-win_arm64.whl", hash = "sha256:5a7b3c1ee8265eb4c8f1b7d29943f195c00673f5ab60c192eba2d4a7eae5f46a"}, + {file = "wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0"}, + {file = "wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba"}, + {file = "wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd"}, + {file = "wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828"}, + {file = "wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9"}, + {file = "wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396"}, + {file = "wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc"}, + {file = "wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe"}, + {file = "wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c"}, + {file = "wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6"}, + {file = "wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0"}, + {file = "wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77"}, + {file = "wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7"}, + {file = "wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277"}, + {file = "wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d"}, + {file = "wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa"}, + {file = "wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050"}, + {file = "wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8"}, + {file = "wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb"}, + {file = "wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16"}, + {file = "wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22"}, + {file = "wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0"}, +] + [[package]] name = "xlrd" version = "2.0.2" @@ -4680,7 +4843,7 @@ name = "zipp" version = "3.23.0" requires_python = ">=3.9" summary = "Backport of pathlib-compatible object wrapper for zip files" -groups = ["default"] +groups = ["default", "all", "otel"] files = [ {file = "zipp-3.23.0-py3-none-any.whl", hash = "sha256:071652d6115ed432f5ce1d34c336c0adfd6a884660d1e9712a256d3d3bd4b14e"}, {file = "zipp-3.23.0.tar.gz", hash = "sha256:a07157588a12518c9d4034df3fbbee09c814741a33ff63c05fa29d26a2404166"}, diff --git a/src/askui/tools/android/agent_os.py b/src/askui/tools/android/agent_os.py index 9659352e..3a5a8285 100644 --- a/src/askui/tools/android/agent_os.py +++ b/src/askui/tools/android/agent_os.py @@ -3,6 +3,8 @@ from PIL import Image +from askui.tools.android.uiautomator_hierarchy import UIElementCollection + ANDROID_KEY = Literal[ # pylint: disable=C0103 "HOME", "BACK", @@ -493,3 +495,10 @@ def pull(self, remote_path: str, local_path: str) -> None: Pulls a file from the device. """ raise NotImplementedError + + @abstractmethod + def get_ui_elements(self) -> UIElementCollection: + """ + Gets the UI elements. + """ + raise NotImplementedError diff --git a/src/askui/tools/android/agent_os_facade.py b/src/askui/tools/android/agent_os_facade.py index 850e9712..d6504b12 100644 --- a/src/askui/tools/android/agent_os_facade.py +++ b/src/askui/tools/android/agent_os_facade.py @@ -4,6 +4,7 @@ from askui.models.shared.tool_tags import ToolTags from askui.tools.android.agent_os import ANDROID_KEY, AndroidAgentOs, AndroidDisplay +from askui.tools.android.uiautomator_hierarchy import UIElementCollection from askui.utils.image_utils import scale_coordinates, scale_image_to_fit @@ -121,3 +122,15 @@ def push(self, local_path: str, remote_path: str) -> None: def pull(self, remote_path: str, local_path: str) -> None: self._agent_os.pull(remote_path, local_path) + + def get_ui_elements(self) -> UIElementCollection: + ui_elemet_collection = self._agent_os.get_ui_elements() + + for element in ui_elemet_collection: + if element.center is None: + continue + element.set_center( + self._scale_coordinates_back(element.center[0], element.center[1]) + ) + + return ui_elemet_collection diff --git a/src/askui/tools/android/ppadb_agent_os.py b/src/askui/tools/android/ppadb_agent_os.py index c2a9cb2d..407520e6 100644 --- a/src/askui/tools/android/ppadb_agent_os.py +++ b/src/askui/tools/android/ppadb_agent_os.py @@ -18,6 +18,7 @@ UnknownAndroidDisplay, ) from askui.tools.android.android_agent_os_error import AndroidAgentOsError +from askui.tools.android.uiautomator_hierarchy import UIElementCollection from askui.utils.annotated_image import AnnotatedImage @@ -34,6 +35,7 @@ class PpadbAgentOs(AndroidAgentOs): """ _REPORTER_ROLE_NAME: str = "AndroidAgentOS" + _UIAUTOMATOR_DUMP_PATH: str = "/data/local/tmp/askui_window_dump.xml" def __init__( self, reporter: Reporter = NULL_REPORTER, device_identifier: str | int = 0 @@ -482,3 +484,33 @@ def pull(self, remote_path: str, local_path: str) -> None: self._REPORTER_ROLE_NAME, f"pull(remote_path='{remote_path}', local_path='{local_path}')", ) + + def get_ui_elements(self) -> UIElementCollection: + """ + Return UI elements from a `uiautomator dump` of the current screen. + + Returns: + UIElementCollection: Parsed hierarchy from the dump, or empty if the dump + has no usable content. + + Raises: + AndroidAgentOsError: When the dump command does not report success (often + while animations are visible on screen). + + Notes: + `uiautomator dump` is unreliable while the screen shows animation + (transitions, loaders, pulsing highlights, etc.). Retry after motion has + stopped and the UI has settled. + """ + self._check_if_device_is_selected() + assert self._device is not None + dump_cmd = f"uiautomator dump {self._UIAUTOMATOR_DUMP_PATH}" + dump_response = self.shell(dump_cmd) + if "dumped" not in dump_response.lower(): + msg = f"Failed to dump UI hierarchy: {dump_response}" + raise AndroidAgentOsError(msg) + + raw = self.shell(f"cat {self._UIAUTOMATOR_DUMP_PATH}") + if not raw or not raw.strip(): + return UIElementCollection([]) + return UIElementCollection.build_from_xml_dump(raw) diff --git a/src/askui/tools/android/uiautomator_hierarchy.py b/src/askui/tools/android/uiautomator_hierarchy.py new file mode 100644 index 00000000..7943d6f6 --- /dev/null +++ b/src/askui/tools/android/uiautomator_hierarchy.py @@ -0,0 +1,158 @@ +""" +Parse UIAutomator hierarchy dump XML from Android (normalized shell output). +""" + +from __future__ import annotations + +import re +import xml.etree.ElementTree as ET +from dataclasses import dataclass +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping + +# Match & that is not start of a valid XML entity +_RE_INVALID_AMP = re.compile(r"&(?!(?:amp|lt|gt|apos|quot|#\d+|#x[0-9a-fA-F]+);)") # noqa: E501 +_RE_BOUNDS = re.compile(r"\[(\d+),(\d+)\]\[(\d+),(\d+)\]") + +_XML_START_MARKERS = (" tuple[int, int] | None: + """Return (x, y) center of bounds, or None if bounds invalid.""" + if self._center is not None: + return self._center + m = _RE_BOUNDS.match(self.bounds) + if not m: + return None + x1, y1, x2, y2 = (int(g) for g in m.groups()) + self._center = ((x1 + x2) // 2, (y1 + y2) // 2) + return self._center + + def __str__(self) -> str: + """Short description for list output.""" + parts: list[str] = [f"clickable={self.clickable}"] + if self.center: + parts.append(f"center=(x={self.center[0]}, y={self.center[1]})") + if self.text: + parts.append(f'text="{self.text}"') + if self.resource_id: + parts.append(f'resource-id="{self.resource_id}"') + if self.content_desc: + parts.append(f'content-desc="{self.content_desc}"') + if self.class_name: + parts.append(f"class={self.class_name.split('.')[-1]}") + return " | ".join(parts) + + def set_center(self, center: tuple[int, int]) -> None: + """Set the center of the element.""" + self._center = center + + @classmethod + def from_xml_attrib(cls, attrib: Mapping[str, str]) -> UIElement | None: + """Build from XML node attributes, or None if there are no bounds.""" + bounds = attrib.get("bounds", "").strip() + if not bounds: + return None + return cls( + text=attrib.get("text", ""), + resource_id=attrib.get("resource-id", ""), + content_desc=attrib.get("content-desc", ""), + class_name=attrib.get("class", ""), + bounds=bounds, + clickable=attrib.get("clickable", "false") == "true", + enabled=attrib.get("enabled", "true") == "true", + package=attrib.get("package", ""), + ) + + @staticmethod + def from_json(json_content: Mapping[str, str]) -> UIElement: + """Build a UIElement from a string-keyed mapping (e.g. JSON object).""" + return UIElement( + text=json_content.get("text", ""), + resource_id=json_content.get("resource-id", ""), + content_desc=json_content.get("content-desc", ""), + class_name=json_content.get("class", ""), + bounds=json_content.get("bounds", ""), + clickable=json_content.get("clickable", "false") == "true", + enabled=json_content.get("enabled", "true") == "true", + package=json_content.get("package", ""), + ) + + +class UIElementCollection: + """Collection of UI elements.""" + + def __init__(self, elements: list[UIElement]) -> None: + self._elements = list(elements) + + def get_all(self) -> list[UIElement]: + """Return a copy of all elements.""" + return list(self._elements) + + def __iter__(self) -> Iterator[UIElement]: + return iter(self._elements) + + def __len__(self) -> int: + return len(self._elements) + + def __str__(self) -> str: + """String representation of the collection.""" + return "\n".join(str(element) for element in self._elements) + + @staticmethod + def _normalize_dump_string(raw: str) -> str: + """ + Normalize raw shell output to valid XML before parsing. + + Handles encoding, ADB/shell cruft, control chars, and unescaped & in attributes. + """ + raw = raw.strip().lstrip("\ufeff") + start_indices = [raw.find(marker) for marker in _XML_START_MARKERS] + valid = [i for i in start_indices if i >= 0] + if valid: + raw = raw[min(valid) :] + end_tag = "" + j = raw.rfind(end_tag) + if j >= 0: + raw = raw[: j + len(end_tag)] + raw = "".join(c for c in raw if c in "\n\t" or ord(c) >= 32) + return _RE_INVALID_AMP.sub("&", raw) + + @staticmethod + def build_from_xml_dump(xml_content: str) -> UIElementCollection: + """Build a UIElementCollection from a UIAutomator dump XML string.""" + elements: list[UIElement] = [] + xml_content = UIElementCollection._normalize_dump_string(xml_content) + if not xml_content: + return UIElementCollection(elements) + try: + root = ET.fromstring(xml_content) + except ET.ParseError: + return UIElementCollection(elements) + + def collect(node: ET.Element) -> None: + elem = UIElement.from_xml_attrib(node.attrib) + if elem is not None: + elements.append(elem) + for child in node: + collect(child) + + collect(root) + return UIElementCollection(elements) diff --git a/src/askui/tools/store/android/__init__.py b/src/askui/tools/store/android/__init__.py index 2bc97187..e753020d 100644 --- a/src/askui/tools/store/android/__init__.py +++ b/src/askui/tools/store/android/__init__.py @@ -4,8 +4,12 @@ AndroidAgent. """ +from askui.tools.store.android.get_uiautomator_hierarchy_tool import ( + AndroidGetUIAutomatorHierarchyTool, +) from askui.tools.store.android.save_screenshot_tool import AndroidSaveScreenshotTool __all__ = [ "AndroidSaveScreenshotTool", + "AndroidGetUIAutomatorHierarchyTool", ] diff --git a/src/askui/tools/store/android/get_uiautomator_hierarchy_tool.py b/src/askui/tools/store/android/get_uiautomator_hierarchy_tool.py new file mode 100644 index 00000000..b6212dd5 --- /dev/null +++ b/src/askui/tools/store/android/get_uiautomator_hierarchy_tool.py @@ -0,0 +1,74 @@ +from askui.models.shared import AndroidBaseTool, ToolTags +from askui.tools.android.agent_os_facade import AndroidAgentOsFacade + + +class AndroidGetUIAutomatorHierarchyTool(AndroidBaseTool): + """ + Returns a flattened, text-friendly snapshot of the Android accessibility hierarchy + for the connected device (via UIAutomator window dump). + + Each line describes one on-screen view: `clickable`, tap `center` computed from + bounds, and when non-empty: quoted `text`, `resource-id`, `content-desc`, and a + short view `class` name (last segment of the fully qualified class). Views without + parseable bounds are omitted. + + Prefer this over screenshots when capture fails, is unavailable, or you want + explicit structure (ids, descriptions, centers) instead of visual inference. + Prefer using returned centers and labels over blind coordinate guesses. + + Lines use ` | ` between fields, for example: + `clickable=True | center=(x=120, y=340) | text="OK" | class=Button`. + + Args: + agent_os (AndroidAgentOsFacade | None, optional): The Android agent OS facade. + If omitted, the agent supplies the connected device implementation at + runtime. + + Examples: + ```python + from askui import AndroidAgent + from askui.tools.store.android import AndroidGetUIAutomatorHierarchyTool + + with AndroidAgent() as agent: + agent.act( + "List tappable elements on the screen using the accessibility tree", + tools=[AndroidGetUIAutomatorHierarchyTool()], + ) + ``` + + ```python + from askui import AndroidAgent + from askui.tools.store.android import AndroidGetUIAutomatorHierarchyTool + + with AndroidAgent(act_tools=[AndroidGetUIAutomatorHierarchyTool()]) as agent: + agent.act("What buttons and links are visible on this screen?") + ``` + """ + + def __init__(self, agent_os: AndroidAgentOsFacade | None = None) -> None: + super().__init__( + name="get_uiautomator_hierarchy_tool", + description=( + "UIAutomator accessibility snapshot for the current Android screen" + " (window dump). Returns one text line per view: clickable, tap center" + " from bounds (`center=(x=..., y=...)`), and when set: text," + " resource-id," + " content-desc, short view class—fields joined by ` | `. Skips views" + " without valid bounds. Use instead of screenshots when capture is" + " unreliable or you need ids, descriptions, and tap centers for" + " structured reasoning; avoid guessing raw coordinates." + ), + required_tags=[ToolTags.SCALED_AGENT_OS.value], + agent_os=agent_os, + ) + + def __call__(self) -> str: + """ + Build one string of the accessibility hierarchy for the model. + + Returns: + str: Prefix `UIAutomator hierarchy was retrieved:` followed by newline- + separated element lines (see class docstring for field format). + """ + hierarchy = self.agent_os.get_ui_elements() + return f"UIAutomator hierarchy was retrieved: {str(hierarchy)}" From e9c58e0997f9818ab77703e49efcbec976e732bd Mon Sep 17 00:00:00 2001 From: Samir Mlika Date: Tue, 24 Mar 2026 16:13:02 +0100 Subject: [PATCH 2/2] fix scaling issues --- src/askui/tools/android/agent_os_facade.py | 27 ++++++++++++++-------- src/askui/utils/image_utils.py | 4 +--- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/askui/tools/android/agent_os_facade.py b/src/askui/tools/android/agent_os_facade.py index d6504b12..f27d0eee 100644 --- a/src/askui/tools/android/agent_os_facade.py +++ b/src/askui/tools/android/agent_os_facade.py @@ -37,7 +37,12 @@ def screenshot(self) -> Image.Image: self._target_resolution, ) - def _scale_coordinates_back(self, x: int, y: int) -> Tuple[int, int]: + def _scale_coordinates( + self, + x: int, + y: int, + from_agent: bool = True, + ) -> Tuple[int, int]: if self._real_screen_resolution is None: self._real_screen_resolution = self._agent_os.screenshot().size @@ -45,25 +50,25 @@ def _scale_coordinates_back(self, x: int, y: int) -> Tuple[int, int]: (x, y), self._real_screen_resolution, self._target_resolution, - inverse=True, + inverse=from_agent, ) def tap(self, x: int, y: int) -> None: - x, y = self._scale_coordinates_back(x, y) + x, y = self._scale_coordinates(x, y) self._agent_os.tap(x, y) def swipe( self, x1: int, y1: int, x2: int, y2: int, duration_in_ms: int = 1000 ) -> None: - x1, y1 = self._scale_coordinates_back(x1, y1) - x2, y2 = self._scale_coordinates_back(x2, y2) + x1, y1 = self._scale_coordinates(x1, y1) + x2, y2 = self._scale_coordinates(x2, y2) self._agent_os.swipe(x1, y1, x2, y2, duration_in_ms) def drag_and_drop( self, x1: int, y1: int, x2: int, y2: int, duration_in_ms: int = 1000 ) -> None: - x1, y1 = self._scale_coordinates_back(x1, y1) - x2, y2 = self._scale_coordinates_back(x2, y2) + x1, y1 = self._scale_coordinates(x1, y1) + x2, y2 = self._scale_coordinates(x2, y2) self._agent_os.drag_and_drop(x1, y1, x2, y2, duration_in_ms) def type(self, text: str) -> None: @@ -125,12 +130,14 @@ def pull(self, remote_path: str, local_path: str) -> None: def get_ui_elements(self) -> UIElementCollection: ui_elemet_collection = self._agent_os.get_ui_elements() - for element in ui_elemet_collection: if element.center is None: continue element.set_center( - self._scale_coordinates_back(element.center[0], element.center[1]) + self._scale_coordinates( + x=element.center[0], + y=element.center[1], + from_agent=False, + ) ) - return ui_elemet_collection diff --git a/src/askui/utils/image_utils.py b/src/askui/utils/image_utils.py index b2e5e375..19a5f92c 100644 --- a/src/askui/utils/image_utils.py +++ b/src/askui/utils/image_utils.py @@ -306,9 +306,7 @@ def scale_coordinates( offset = _calc_center_offset(scaling_results.size, target_size) result = _scale_coordinates(coordinates, offset, scaling_results.factor, inverse) if check_coordinates_in_bounds: - _check_coordinates_in_bounds( - result, original_size if inverse else scaling_results.size - ) + _check_coordinates_in_bounds(result, original_size if inverse else target_size) return result