From a120f6b56cb9394a283f11aa04da18fba93bc8e2 Mon Sep 17 00:00:00 2001 From: "NEANC.or.HZCK" Date: Thu, 12 Feb 2026 06:54:57 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E7=A7=BB=E9=99=A4=E6=AE=8B=E7=95=99?= =?UTF-8?q?=E7=9A=84AI=EF=BC=9Amobile=5Fagent?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mobile_v3/run_mobileagentv3.py | 389 --------------------------------- 1 file changed, 389 deletions(-) delete mode 100644 mobile_v3/run_mobileagentv3.py diff --git a/mobile_v3/run_mobileagentv3.py b/mobile_v3/run_mobileagentv3.py deleted file mode 100644 index e91afca..0000000 --- a/mobile_v3/run_mobileagentv3.py +++ /dev/null @@ -1,389 +0,0 @@ -import os -import uuid -import json -import time -import argparse -from PIL import Image -from datetime import datetime - -from utils.mobile_agent_e import ( - InfoPool, - Manager, - Executor, - Notetaker, - ActionReflector, - INPUT_KNOW -) -import utils.controller as controller -from utils.call_mobile_agent_e import GUIOwlWrapper -from typing import Optional -import threading - -def run_instruction( - adb_path, - hdc_path, - api_key, - base_url, - model, - instruction, - add_info, - coor_type, - if_notetaker, - max_step: int = 25, - log_path: str = "./logs", - stop_event: Optional[threading.Event] = None, -): - if adb_path and hdc_path: - raise ValueError("adb_path and hdc_path cannot be provided at the same time. Please specify only one of them.") - if adb_path: - from utils.android_controller import AndroidController - controller = AndroidController(adb_path) - else: - from utils.harmonyos_controller import HarmonyOSController - controller = HarmonyOSController(hdc_path) - - if not os.path.exists(log_path): - os.mkdir(log_path) - - now = datetime.now() - time_str = now.strftime("%Y%m%d_%H%M%S") - save_path = f"{log_path}/{time_str}_{instruction[:10]}" - os.mkdir(save_path) - image_save_path = os.path.join(save_path, "images") - os.mkdir(image_save_path) - - # Debug: show which adb/hdc path is used (includes -s for adb) - try: - path_desc = adb_path if adb_path else hdc_path - if path_desc: - print(f"[Agent] Controller path: {path_desc}") - except Exception: - pass - - info_pool = InfoPool( - additional_knowledge_manager=add_info, - additional_knowledge_executor=INPUT_KNOW, - err_to_manager_thresh=2 - ) - - vllm = GUIOwlWrapper(api_key, base_url, model) - manager = Manager() - executor = Executor() - notetaker = Notetaker() - action_reflector = ActionReflector() - message_manager, message_operator, message_reflector, message_notekeeper = None, None, None, None - info_pool.instruction = instruction - - def _should_stop(): - try: - return bool(stop_event and stop_event.is_set()) - except Exception: - return False - - final_answer_content = None - for step in range(max_step): - if _should_stop(): - print("[Agent] Stop signal received. Exiting.") - break - if step == max_step: - task_result_path = os.path.join(save_path, "task_result.json") - current_time = datetime.now() - formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S.%f") - task_result_data = {"goal": instruction, "finish_dtime": formatted_time, "hit_step_limit": 1.0} - with open(task_result_path, 'w', encoding='utf-8') as json_file: - json.dump(task_result_data, json_file, ensure_ascii=False, indent=4) - break - - if step == 0: - current_time = datetime.now() - formatted_time = current_time.strftime(f'%Y-%m-%d-{current_time.hour * 3600 + current_time.minute * 60 + current_time.second}-{str(uuid.uuid4().hex[:8])}') - local_image_dir = os.path.join(image_save_path, f"screenshot_{formatted_time}.png") - else: - local_image_dir = local_image_dir2 - - # get the screenshot - for _ in range(5): - if _should_stop(): - print("[Agent] Stop during screenshot acquisition.") - break - if not controller.get_screenshot(local_image_dir): - print("Get screenshot failed, retry.") - time.sleep(5) - else: - break - if _should_stop(): - break - - width, height = Image.open(local_image_dir).size - - info_pool.error_flag_plan = False - err_to_manager_thresh = info_pool.err_to_manager_thresh - if len(info_pool.action_outcomes) >= err_to_manager_thresh: - # check if the last err_to_manager_thresh actions are all errors - latest_outcomes = info_pool.action_outcomes[-err_to_manager_thresh:] - count = 0 - for outcome in latest_outcomes: - if outcome in ["B", "C"]: - count += 1 - if count == err_to_manager_thresh: - info_pool.error_flag_plan = True - - skip_manager = False - ## if previous action is invalid, skip the manager and try again first ## - if not info_pool.error_flag_plan and len(info_pool.action_history) > 0: - if info_pool.action_history[-1]['action'] == 'invalid': - skip_manager = True - - if _should_stop(): - break - - if not skip_manager: - print("\n### Manager ... ###\n") - prompt_planning = manager.get_prompt(info_pool) - output_planning, message_manager, raw_response = vllm.predict_mm( - prompt_planning, - [local_image_dir] - ) - - message_save_path = os.path.join(save_path, f"step_{step+1}") - os.mkdir(message_save_path) - message_file = os.path.join(message_save_path, "manager.json") - message_data = {"name": "manager", "messages": message_manager, "response": output_planning, "step_id": step+1} - with open(message_file, 'w', encoding='utf-8') as json_file: - json.dump(message_data, json_file, ensure_ascii=False, indent=4) - - parsed_result_planning = manager.parse_response(output_planning) - info_pool.completed_plan = parsed_result_planning['completed_subgoal'] - info_pool.plan = parsed_result_planning['plan'] - if not raw_response: - raise RuntimeError('Error calling vLLM in planning phase.') - - print('Completed subgoal: ' + info_pool.completed_plan) - print('Planning thought: ' + parsed_result_planning['thought']) - print('Plan: ' + info_pool.plan, "\n") - - if _should_stop(): - break - - if "Finished" in info_pool.plan.strip() and len(info_pool.plan.strip()) < 15: - print("Instruction finished, stop the process.") - task_result_path = os.path.join(save_path, "task_result.json") - current_time = datetime.now() - formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S.%f") - task_result_data = {"goal": instruction, "finish_dtime": formatted_time, "hit_step_limit": 0.0} - with open(task_result_path, 'w', encoding='utf-8') as json_file: - json.dump(task_result_data, json_file, ensure_ascii=False, indent=4) - break - else: - print("\n### Operator ... ###\n") - - prompt_action = executor.get_prompt(info_pool) - output_action, message_operator, raw_response = vllm.predict_mm( - prompt_action, - [local_image_dir], - ) - - if not raw_response: - raise RuntimeError('Error calling LLM in operator phase.') - parsed_result_action = executor.parse_response(output_action) - action_thought, action_object_str, action_description = parsed_result_action['thought'], parsed_result_action['action'], parsed_result_action['description'] - - info_pool.last_action_thought = action_thought - info_pool.last_summary = action_description - - if (not action_thought) or (not action_object_str): - print('Action prompt output is not in the correct format.') - info_pool.last_action = {"action": "invalid"} - info_pool.action_history.append({"action": "invalid"}) - info_pool.summary_history.append(action_description) - info_pool.action_outcomes.append("C") - info_pool.error_descriptions.append("invalid action format, do nothing.") - continue - - action_object_str = action_object_str.replace("```", "").replace("json", "").strip() - print('Thought: ' + action_thought) - print('Action: ' + action_object_str) - print('Action description: ' + action_description) - - try: - action_object = json.loads(action_object_str) - operator_response = f'''### Thought ### -{action_thought} - -### Action ### -{action_object} - -### Description ### -{action_description}''' - - if _should_stop(): - print("[Agent] Stop before handling operator action.") - break - - if action_object['action'] == "answer": - message_file = os.path.join(message_save_path, "operator.json") - message_data = {"name": "operator", "messages": message_operator, "response": operator_response, "step_id": step+1} - with open(message_file, 'w', encoding='utf-8') as json_file: - json.dump(message_data, json_file, ensure_ascii=False, indent=4) - - answer_content = action_object['text'] - print(f"Instruction finished, answer: {answer_content}, stop the process.") - task_result_path = os.path.join(save_path, "task_result.json") - current_time = datetime.now() - formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S.%f") - task_result_data = {"goal": instruction, "finish_dtime": formatted_time, "hit_step_limit": 0.0} - with open(task_result_path, 'w', encoding='utf-8') as json_file: - json.dump(task_result_data, json_file, ensure_ascii=False, indent=4) - final_answer_content = answer_content - break - - if coor_type != "abs": - if "coordinate" in action_object: - action_object['coordinate'] = [int(action_object['coordinate'][0] / 1000 * width), int(action_object['coordinate'][1] / 1000 * height)] - if "coordinate2" in action_object: - action_object['coordinate2'] = [int(action_object['coordinate2'][0] / 1000 * width), int(action_object['coordinate2'][1] / 1000 * height)] - - if _should_stop(): - print("[Agent] Stop before executing device action.") - break - - if action_object['action'] == "click": - controller.tap(action_object['coordinate'][0], action_object['coordinate'][1]) - elif action_object['action'] == "swipe": - controller.slide(action_object['coordinate'][0], action_object['coordinate'][1], action_object['coordinate2'][0], action_object['coordinate2'][1]) - elif action_object['action'] == "type": - controller.type(action_object['text']) - elif action_object['action'] == "system_button": - if action_object['button'] == "Back": - controller.back() - elif action_object['button'] == "Home": - controller.home() - - except: - info_pool.last_action = {"action": "invalid"} - info_pool.action_history.append({"action": "invalid"}) - info_pool.summary_history.append(action_description) - info_pool.action_outcomes.append("C") - info_pool.error_descriptions.append("invalid action format, do nothing.") - local_image_dir2 = local_image_dir - continue - - message_file = os.path.join(message_save_path, "operator.json") - message_data = {"name": "operator", "messages": message_operator, "response": operator_response, "step_id": step+1} - with open(message_file, 'w', encoding='utf-8') as json_file: - json.dump(message_data, json_file, ensure_ascii=False, indent=4) - - info_pool.last_action = json.loads(action_object_str) - - if _should_stop(): - break - if step == 0: - for _ in range(8): - if _should_stop(): - break - time.sleep(1) # maybe a pop-up when first open an app - for _ in range(2): - if _should_stop(): - break - time.sleep(1) - if _should_stop(): - break - - current_time = datetime.now() - formatted_time = current_time.strftime(f'%Y-%m-%d-{current_time.hour * 3600 + current_time.minute * 60 + current_time.second}-{str(uuid.uuid4().hex[:8])}') - local_image_dir2 = os.path.join(image_save_path, f"screenshot_{formatted_time}.png") - - # get the screenshot - for _ in range(5): - if _should_stop(): - print("[Agent] Stop during post-action screenshot acquisition.") - break - if not controller.get_screenshot(local_image_dir2): - print("Get screenshot failed, retry.") - time.sleep(5) - else: - break - if _should_stop(): - break - - print("\n### Action Reflector ... ###\n") - prompt_action_reflect = action_reflector.get_prompt(info_pool) - output_action_reflect, message_reflector, raw_response = vllm.predict_mm( - prompt_action_reflect, - [ - local_image_dir, - local_image_dir2, - ], - ) - - message_file = os.path.join(message_save_path, "reflector.json") - message_data = {"name": "reflector", "messages": message_reflector, "response": output_action_reflect, "step_id": step+1} - with open(message_file, 'w', encoding='utf-8') as json_file: - json.dump(message_data, json_file, ensure_ascii=False, indent=4) - - parsed_result_action_reflect = action_reflector.parse_response(output_action_reflect) - outcome, error_description = ( - parsed_result_action_reflect['outcome'], - parsed_result_action_reflect['error_description'] - ) - progress_status = info_pool.completed_plan - - if "A" in outcome: # Successful. The result of the last action meets the expectation. - action_outcome = "A" - elif "B" in outcome: # Failed. The last action results in a wrong page. I need to return to the previous state. - action_outcome = "B" - elif "C" in outcome: # Failed. The last action produces no changes. - action_outcome = "C" - else: - raise ValueError("Invalid outcome:", outcome) - - print('Action reflection outcome: ' + action_outcome) - print('Action reflection error description: ' + error_description) - print('Action reflection progress status: ' + progress_status, "\n") - - info_pool.action_history.append(json.loads(action_object_str)) - info_pool.summary_history.append(action_description) - info_pool.action_outcomes.append(action_outcome) - info_pool.error_descriptions.append(error_description) - info_pool.progress_status = progress_status - - if _should_stop(): - break - - if action_outcome == "A" and if_notetaker: - print("\n### NoteKeeper ... ###\n") - prompt_note = notetaker.get_prompt(info_pool) - output_note, message_notekeeper, raw_response = vllm.predict_mm( - prompt_note, - [local_image_dir2], - ) - - message_file = os.path.join(message_save_path, "notekeeper.json") - message_data = {"name": "notekeeper", "messages": message_notekeeper, "response": output_note, "step_id": step+1} - with open(message_file, 'w', encoding='utf-8') as json_file: - json.dump(message_data, json_file, ensure_ascii=False, indent=4) - - parsed_result_note = notetaker.parse_response(output_note) - important_notes = parsed_result_note['important_notes'] - info_pool.important_notes = important_notes - - print('Important notes: ' + important_notes, "\n") - - return final_answer_content - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="Run Mobile-Agent-v3 with a given model and instruction" - ) - parser.add_argument("--adb_path", type=str) - parser.add_argument("--hdc_path", type=str) - parser.add_argument("--api_key", type=str) - parser.add_argument("--base_url", type=str) - parser.add_argument("--model", type=str) - parser.add_argument("--instruction", type=str) - parser.add_argument("--add_info", type=str, default="") - parser.add_argument("--coor_type", type=str, default="abs") - parser.add_argument("--notetaker", type=bool, default=False) - args = parser.parse_args() - - run_instruction(args.adb_path, args.hdc_path, args.api_key, args.base_url, args.model, args.instruction, args.add_info, args.coor_type, args.notetaker) From 0f8d8f082328681a2474e95186da3e6c0fbfa6da Mon Sep 17 00:00:00 2001 From: "NEANC.or.HZCK" Date: Thu, 12 Feb 2026 06:56:53 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E5=81=9C=E6=AD=A2=E9=95=9C=E5=83=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- templates/index.html | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/templates/index.html b/templates/index.html index 9d80691..c5b7d84 100644 --- a/templates/index.html +++ b/templates/index.html @@ -586,6 +586,9 @@

已保存的设备

function initInput(width, height) { function input_data_cb(data) { + // 重置自动停止定时器 + resetAutoStopTimer(); + if (currentMirroringDevice) { socket.emit('control_data', { device_id: currentMirroringDevice, @@ -607,21 +610,27 @@

已保存的设备

const menuBtn = document.getElementById('menu-btn'); backBtn.addEventListener('mousedown', (event) => { + resetAutoStopTimer(); input.snedKeyCode(event, 0, 4); }); backBtn.addEventListener('mouseup', (event) => { + resetAutoStopTimer(); input.snedKeyCode(event, 1, 4); }); homeBtn.addEventListener('mousedown', (event) => { + resetAutoStopTimer(); input.snedKeyCode(event, 0, 3); }); homeBtn.addEventListener('mouseup', (event) => { + resetAutoStopTimer(); input.snedKeyCode(event, 1, 3); }); menuBtn.addEventListener('mousedown', (event) => { + resetAutoStopTimer(); input.snedKeyCode(event, 0, 187); }); menuBtn.addEventListener('mouseup', (event) => { + resetAutoStopTimer(); input.snedKeyCode(event, 1, 187); }); @@ -629,6 +638,30 @@

已保存的设备

} } + let autoStopTimer = null; + const AUTO_STOP_TIME = 15 * 60 * 1000; // 15分钟 + + function resetAutoStopTimer() { + if (autoStopTimer) { + clearTimeout(autoStopTimer); + } + if (currentMirroringDevice) { + autoStopTimer = setTimeout(() => { + if (currentMirroringDevice) { + showToast('15分钟无操作,自动停止镜像', 'info'); + socket.emit('stop_mirror', { device_id: currentMirroringDevice }); + } + }, AUTO_STOP_TIME); + } + } + + function clearAutoStopTimer() { + if (autoStopTimer) { + clearTimeout(autoStopTimer); + autoStopTimer = null; + } + } + let parser = new VideoParser(({ type, data }) => { if (type === 'nalu') { if (jmuxerReady && jmuxer) { @@ -966,12 +999,16 @@

已保存的设备

if (!jmuxer) { jmuxer = createJMuxer(); } + // 启动自动停止定时器 + resetAutoStopTimer(); }); socket.on('mirror_stopped', (data) => { showToast(`设备 ${data.device_id} 镜像已停止`, 'warning'); currentMirroringDevice = null; // 清除当前镜像设备 hideControlPanel(); + // 清除自动停止定时器 + clearAutoStopTimer(); // 重置播放器和解析器,避免残留状态影响再次启动 resetPlayer(); if (input && typeof input.destroy === 'function') { From a91ea76a739e0c24c16f0ee095e6f22e5da19d7e Mon Sep 17 00:00:00 2001 From: "NEANC.or.HZCK" Date: Thu, 12 Feb 2026 06:59:09 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BB=8E=20data/.env=20?= =?UTF-8?q?=E4=B8=AD=E8=AF=BB=E5=8F=96=E8=87=AA=E5=8A=A8=E5=81=9C=E6=AD=A2?= =?UTF-8?q?=E6=97=B6=E9=97=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app.py | 15 +++++++++++++++ templates/index.html | 18 ++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index 6e2e0e2..7d0cd92 100644 --- a/app.py +++ b/app.py @@ -45,6 +45,18 @@ def get_saved_devices(): # 转换为新的列表格式,设备名称默认为地址 return [{'name': device, 'address': device} for device in devices] +def get_auto_stop_time(): + """ + 从 data/.env 文件中读取自动停止时间(分钟) + 如果未设置,返回默认值15分钟 + """ + config = dotenv_values(ENV_FILE_PATH) + auto_stop_time = config.get('AUTO_STOP_TIME', '15') + try: + return int(auto_stop_time) + except (ValueError, TypeError): + return 15 + def save_devices(devices): """ 将所有已连接的设备 ADB 地址保存到 data/.env 文件中 @@ -183,6 +195,9 @@ def handle_connect(): # 发送保存的设备列表 saved_devices = get_saved_devices() emit('saved_devices', saved_devices) + # 发送自动停止时间 + auto_stop_time = get_auto_stop_time() + emit('auto_stop_time', {'minutes': auto_stop_time}) return True def get_current_mirroring_device_id(): diff --git a/templates/index.html b/templates/index.html index c5b7d84..8ce2d40 100644 --- a/templates/index.html +++ b/templates/index.html @@ -639,7 +639,14 @@

已保存的设备

} let autoStopTimer = null; - const AUTO_STOP_TIME = 15 * 60 * 1000; // 15分钟 + let autoStopMinutes = 15; // 默认15分钟 + let AUTO_STOP_TIME = 15 * 60 * 1000; // 默认15分钟(毫秒) + + function updateAutoStopTime(minutes) { + autoStopMinutes = minutes; + AUTO_STOP_TIME = minutes * 60 * 1000; + console.log(`自动停止时间已更新为 ${minutes} 分钟`); + } function resetAutoStopTimer() { if (autoStopTimer) { @@ -648,7 +655,7 @@

已保存的设备

if (currentMirroringDevice) { autoStopTimer = setTimeout(() => { if (currentMirroringDevice) { - showToast('15分钟无操作,自动停止镜像', 'info'); + showToast(`${autoStopMinutes}分钟无操作,自动停止镜像`, 'info'); socket.emit('stop_mirror', { device_id: currentMirroringDevice }); } }, AUTO_STOP_TIME); @@ -729,6 +736,13 @@

已保存的设备

showToast('已连接到服务器', 'success'); }); + // 接收自动停止时间 + socket.on('auto_stop_time', (data) => { + if (data && data.minutes) { + updateAutoStopTime(data.minutes); + } + }); + // 接收保存的 ADB 地址 socket.on('saved_adb_address', (data) => { if (data.ip) { From 6c42597a0422b14f834e88d0aa15a68df5c46a87 Mon Sep 17 00:00:00 2001 From: "NEANC.or.HZCK" Date: Thu, 12 Feb 2026 07:01:19 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=20=E9=85=8D=E7=BD=AE?= =?UTF-8?q?=E6=96=87=E4=BB=B6=20=E8=AF=B4=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 94de2dc..90759bf 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,12 @@ docker-compose up -d 3. 访问 Web 界面 - 打开浏览器访问 `http://localhost:5000/`。 +### 配置文件 + +- `data/.env`:环境变量配置文件。 + - `ADB_DEVICES`:adb 设备列表,格式为 `{"设备名称": "IP:PORT"}`。 + - `AUTO_STOP_TIME`:自动停止镜像时间,单位为分钟。默认值为 15 分钟。 + ## 许可证 Apache License 2.0