From ad71df3c8035ee47be258e2e3be6def5371d6346 Mon Sep 17 00:00:00 2001 From: Ryunosuke Iwai Date: Tue, 18 Mar 2025 14:38:47 +0900 Subject: [PATCH 01/10] ix: spreadsheet update timing --- freeplay/conversation_formatter.py | 2 +- freeplay/spreadsheet.py | 20 ++++++- freeplay/trajectory_runner.py | 84 +++++++++++++++++++----------- 3 files changed, 74 insertions(+), 32 deletions(-) diff --git a/freeplay/conversation_formatter.py b/freeplay/conversation_formatter.py index 15b5cbcc6..3056e3673 100644 --- a/freeplay/conversation_formatter.py +++ b/freeplay/conversation_formatter.py @@ -117,7 +117,7 @@ async def format_conversation( + [ Message( role="user", - content="Your output\n[Planning]", + content="Remember that your python code must be always enclosed with ```python ... ``` decorator. It's very import for parsing your code. It you can't, you will be fired.\n\nYour output\n[Planning]", ), ] ) diff --git a/freeplay/spreadsheet.py b/freeplay/spreadsheet.py index 1f5febf53..3dd17b324 100644 --- a/freeplay/spreadsheet.py +++ b/freeplay/spreadsheet.py @@ -112,6 +112,10 @@ def insert_to_spreadsheet(spreadsheet_id, range_name, values, max_retries=3): spreadsheet_id (str): スプレッドシートのID range_name (str): データを挿入する範囲(例:'Sheet1!A1:B2') values (list): 挿入するデータ(2次元配列) + + Returns: + tuple: (result, row_number) resultはAPIレスポンス、row_numberは挿入された最初の行番号 + エラー時は (None, None) """ for attempt in range(max_retries): try: @@ -142,7 +146,19 @@ def insert_to_spreadsheet(spreadsheet_id, range_name, values, max_retries=3): ) print(f"{len(values)} 行のデータを追加しました。") - return result + # updatedRangeから行番号を抽出 (例: 'Sheet1!A371:B371' -> 371) + updated_range = result.get("updates", {}).get("updatedRange", "") + row_number = None + if updated_range: + # 'Sheet1!A371:B371' から '371' を抽出 + try: + row_number = int( + "".join(filter(str.isdigit, updated_range.split(":")[0])) + ) + except (ValueError, IndexError): + print("行番号の抽出に失敗しました。") + + return result, row_number except HttpError as error: print(f"試行 {attempt + 1}/{max_retries} でエラーが発生しました: {error}") @@ -151,4 +167,4 @@ def insert_to_spreadsheet(spreadsheet_id, range_name, values, max_retries=3): time.sleep(60) else: print("最大リトライ回数に達しました。") - return None + return None, None diff --git a/freeplay/trajectory_runner.py b/freeplay/trajectory_runner.py index d7d04308f..8e9fadb1f 100644 --- a/freeplay/trajectory_runner.py +++ b/freeplay/trajectory_runner.py @@ -184,18 +184,19 @@ async def run(self): STEPS_PER_ITERATION = 20 iteration = (depth // STEPS_PER_ITERATION) + 1 + current_entities = f"{instance.namespace.get_entities()}" + current_inventory = f"{instance.namespace.inspect_inventory()}" + + (previous_iteration_summary,) = await self.agent.report_summary( + iteration=iteration, + current_inventory=current_inventory, + current_entities=current_entities, + current_conversation=current_conversation, + ) + while True: iteration += 1 print(f"### Iteration {iteration} ###") - current_entities = f"{instance.namespace.get_entities()}" - current_inventory = f"{instance.namespace.inspect_inventory()}" - - (previous_iteration_summary,) = await self.agent.report_summary( - iteration=iteration - 1, - current_inventory=current_inventory, - current_entities=current_entities, - current_conversation=current_conversation, - ) update_spreadsheet_cell( os.getenv("SPREADSHEET_ID"), @@ -230,8 +231,11 @@ async def run(self): f"[Iteration {iteration}] LLM実行中...", ) + current_entities = f"{instance.namespace.get_entities()}" + current_inventory = f"{instance.namespace.inspect_inventory()}" + # Save results to spreadsheet - insert_to_spreadsheet( + (_, iteration_row_number) = insert_to_spreadsheet( os.getenv("SPREADSHEET_ID"), "Iterations!A1:Z", [ @@ -242,7 +246,6 @@ async def run(self): instruction, current_entities, current_inventory, - previous_iteration_summary, ], ], ) @@ -268,6 +271,24 @@ async def run(self): f"Step {iteration}-{step + 1}" ) + # Save results to spreadsheet + (_, step_row_number) = insert_to_spreadsheet( + os.getenv("SPREADSHEET_ID"), + "Steps!A1:Z", + [ + [ + self.config.version, + self.config.model, + iteration, + step, + current_entities, + current_inventory, + program.thinking, + program.code, + ] + ], + ) + if not program: continue @@ -328,24 +349,12 @@ async def run(self): parent_id = saved_program.id - # Save results to spreadsheet - insert_to_spreadsheet( - os.getenv("SPREADSHEET_ID"), - "Steps!A1:Z", - [ - [ - self.config.version, - self.config.model, - iteration, - step, - current_entities, - current_inventory, - program.thinking, - program.code, - program.response, - ] - ], - ) + if step_row_number: + update_spreadsheet_cell( + os.getenv("SPREADSHEET_ID"), + f"Steps!I{step_row_number}", + program.response, + ) # Update state for next iteration if program.state: @@ -356,6 +365,23 @@ async def run(self): print(f"Error in Step {iteration}-{step + 1}: {e}") continue + current_entities = f"{instance.namespace.get_entities()}" + current_inventory = f"{instance.namespace.inspect_inventory()}" + + (previous_iteration_summary,) = await self.agent.report_summary( + iteration=iteration, + current_inventory=current_inventory, + current_entities=current_entities, + current_conversation=current_conversation, + ) + + if iteration_row_number: + update_spreadsheet_cell( + os.getenv("SPREADSHEET_ID"), + f"Iterations!G{iteration_row_number}", + previous_iteration_summary, + ) + elapsed = time.time() - self.start_time elapsed_str = f"{int(elapsed // 3600):02d}:{int((elapsed % 3600) // 60):02d}:{int(elapsed % 60):02d}" print( From b69475219e772f867ffcb5d8e13f9b2feff0d367 Mon Sep 17 00:00:00 2001 From: Ryunosuke Iwai Date: Tue, 18 Mar 2025 17:29:22 +0900 Subject: [PATCH 02/10] refine prompt --- freeplay/basic_agent.py | 1 + freeplay/conversation_formatter.py | 26 +++++++++++++++++++------- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/freeplay/basic_agent.py b/freeplay/basic_agent.py index a69c64427..084bbb899 100644 --- a/freeplay/basic_agent.py +++ b/freeplay/basic_agent.py @@ -155,6 +155,7 @@ def find_idle_furnaces(entities): - Do not encapsulate your code in a function _unless_ you are writing a utility for future use - just write it as if you were typing directly into the Python interpreter. - Your inventory has space for ~2000 items. If it fills up, insert the items into a chest. - Ensure that your factory is arranged in a grid, as this will make things easier. +- Try to assign a specific and clear role to each entity, and ensure that it is working as expected. Check if similar entities are already present on the map. If exists, try to reuse them or fix the issues with them. """ diff --git a/freeplay/conversation_formatter.py b/freeplay/conversation_formatter.py index 3056e3673..1eb71dab2 100644 --- a/freeplay/conversation_formatter.py +++ b/freeplay/conversation_formatter.py @@ -13,6 +13,14 @@ from namespace import FactorioNamespace FINAL_INSTRUCTION = """" +## Step Input +You are given updated state of existing entities on map and your inventory at each step. +You are supposed to take a look at these information carefully to plan your next step. + +- You can place nothing but entities in your current inventory. If you don't have any entities in your inventory, you need to get them first by crafting, harvesting or smelting etc. +- Try to understand the role of each exsting entities on map. For example, one stone furnace might be used to smelt iron ore into iron plates, while another one might be used to smelt copper ore into copper plates, or to smelt iron plates into steel plates. +- In opposite, not-working entities have no use in the game. If you need to place some entities, you should first consider replacing existing ones. Example abundoned pipes or belts, not-working inserters, or empty chests. + ## Response Format ### 1. PLANNING Stage @@ -95,12 +103,6 @@ async def format_conversation( ## Previous Iteration Summary {self.previous_iteration_summary} -## Existing Entities -{current_entities} - -## Current Inventory -{current_inventory} - {FINAL_INSTRUCTION} {self.instruction} @@ -117,7 +119,17 @@ async def format_conversation( + [ Message( role="user", - content="Remember that your python code must be always enclosed with ```python ... ``` decorator. It's very import for parsing your code. It you can't, you will be fired.\n\nYour output\n[Planning]", + content=f""" +## Existing Entities on Map +{current_entities} + +## Your Inventory +{current_inventory} + +Remember that your python code must be always enclosed with ```python ... ``` decorator. It's very import for parsing your code. It you can't, you will be fired. + +Your output +[Planning]""", ), ] ) From 9488eddcaf21a8970c74fad1e57c64f1c04158b2 Mon Sep 17 00:00:00 2001 From: Ryunosuke Iwai Date: Tue, 18 Mar 2025 17:37:00 +0900 Subject: [PATCH 03/10] Add depth to spreadsheet --- freeplay/trajectory_runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/freeplay/trajectory_runner.py b/freeplay/trajectory_runner.py index 8e9fadb1f..bef491059 100644 --- a/freeplay/trajectory_runner.py +++ b/freeplay/trajectory_runner.py @@ -181,7 +181,7 @@ async def run(self): last_response = None # Run trajectory - STEPS_PER_ITERATION = 20 + STEPS_PER_ITERATION = 50 iteration = (depth // STEPS_PER_ITERATION) + 1 current_entities = f"{instance.namespace.get_entities()}" @@ -280,7 +280,8 @@ async def run(self): self.config.version, self.config.model, iteration, - step, + step + 1, + program.depth // 2, current_entities, current_inventory, program.thinking, @@ -352,7 +353,7 @@ async def run(self): if step_row_number: update_spreadsheet_cell( os.getenv("SPREADSHEET_ID"), - f"Steps!I{step_row_number}", + f"Steps!J{step_row_number}", program.response, ) From 73fd783731fc11432e145cf356446d7bccb3094c Mon Sep 17 00:00:00 2001 From: Ryunosuke Iwai Date: Tue, 18 Mar 2025 18:52:24 +0900 Subject: [PATCH 04/10] feat: update agent policy --- agents/utils/parse_response.py | 10 +- env/src/models/conversation.py | 20 ++-- freeplay/basic_agent.py | 143 ++++++++++++++++++++++++----- freeplay/conversation_formatter.py | 48 +--------- freeplay/evaluator.py | 1 + freeplay/trajectory_runner.py | 41 ++++----- 6 files changed, 162 insertions(+), 101 deletions(-) diff --git a/agents/utils/parse_response.py b/agents/utils/parse_response.py index 274cc5f6e..28f2510d0 100644 --- a/agents/utils/parse_response.py +++ b/agents/utils/parse_response.py @@ -23,14 +23,6 @@ def parse_response(response) -> Optional[Policy]: try: code, text_response = PythonParser.extract_code(choice) - splits = text_response.split("```python") - - thinking = "" - if len(splits) > 1: - thinking = splits[0] - thinking = thinking.replace("[Planning]", "") - thinking = thinking.replace("[Policy]", "") - except Exception as e: print(f"Failed to extract code from choice: {str(e)}") return None @@ -39,7 +31,7 @@ def parse_response(response) -> Optional[Policy]: return None policy = Policy( - thinking=thinking, + thinking="", code=code, meta=PolicyMeta( output_tokens=output_tokens, diff --git a/env/src/models/conversation.py b/env/src/models/conversation.py index 0754d1657..91c7ab489 100644 --- a/env/src/models/conversation.py +++ b/env/src/models/conversation.py @@ -7,17 +7,23 @@ class Conversation(BaseModel): """Tracks dialogue between LLM and Factorio""" + messages: List[Message] = Field(default_factory=list) @classmethod - def parse_raw(cls, data: Dict[str, Any]) -> 'Conversation': - messages = [Message(**msg) if isinstance(msg, dict) else msg - for msg in data['messages']] + def parse_raw(cls, data: Dict[str, Any]) -> "Conversation": + messages = [ + Message(**msg) if isinstance(msg, dict) else msg for msg in data["messages"] + ] return cls(messages=messages) - def add_result(self, program: str, response: str, **kwargs): + def add_result(self, thinking: str, program: str, response: str, **kwargs): """Add program execution result to conversation""" - self.messages.append(Message(role="assistant", content=program, metadata=kwargs)) + self.messages.append( + Message( + role="assistant", + content=f'"""\n{thinking}\n"""\n\n{program}', + metadata=kwargs, + ) + ) self.messages.append(Message(role="user", content=response, metadata=kwargs)) - - diff --git a/freeplay/basic_agent.py b/freeplay/basic_agent.py index 084bbb899..0446037ac 100644 --- a/freeplay/basic_agent.py +++ b/freeplay/basic_agent.py @@ -161,6 +161,30 @@ def find_idle_furnaces(entities): def entity_summary_prompt(entities: str): return f""" +# Factorio LLM Agent Instructions + +## Overview +You are an AI agent designed to play Factorio, specializing in: +- Long-horizon planning +- Spatial reasoning +- Systematic automation + +## Game Progression +- Think about long term objectives, and break them down into smaller, manageable steps. +- Advance toward more complex automation +- Build on previous successes +- Maintain efficient resource usage + +## Important Notes +- Use transport belts to keep burners fed with coal +- Consider long-term implications of actions +- Maintain working systems, and clear entities that aren't working or don't have a clear purpose +- Build incrementally and verify each step +- Your inventory has space for ~2000 items. If it fills up, insert the items into a chest. +- Ensure that your factory is arranged in a grid, as this will make things easier. +- Try to assign a specific and clear role to each entity, and ensure that it is working as expected. Check if similar entities are already present on the map. If exists, try to reuse them or fix the issues with them. + +## Instruction You are a report generating model for the game factorio. Given existing entities, you must summarise what structures the agent has created on the map and what are the use-cases of those structures. You must also bring out the entities and positions of entities of each of those structures. @@ -168,21 +192,40 @@ def entity_summary_prompt(entities: str): ###Electricity generator at position(x) Consists of steam engine(position x), boiler(position y) and offshore pump (position z) +Role: +- Generator produces electricity by burning fuel. It supplies electricity to nearby entities through electric poles. + +Issues: +- It is working as expected +- However, the fuel supply is not automated. We need to automate the coal supply to the boiler occasionally. + ###Copper plate mine at position(x) Consists of following entities - Burner mining drill (position x1) and a furnace at position(y1) - Burner mining drill (position x2) and a furnace at position(y2) - Burner mining drill (position x3) and a furnace at position(y3) +Role: +- Mines copper ore and smelts it into copper plates + +Issues: +- The burner mining drill at position x3 is not working due to lack of fuel. We need to supply coal to it. + ###Copper cable factory Consists of following entities - Burner mining drill (position x1) and a furnace at position(y1) - Assembling machine at position(z1) and inserter at position(a) that puts into assembling machine - Beltgroup (position ) that connects the furnace at position y1 to assembling machine at position(z1) -- If multiple sections are connected, summarise them as one structure -- Do not include any mention of harvesting or crafting activities. That is not the aim of this report and is self-evident as the agent can see its own inventory -- All structures from the previous report that did not have any updates, include them in the new report unchanged +Role: +- Produces copper cables from copper plates + +Issues: +- No issues. It is working as expected. + +If multiple sections are connected, summarise them as one structure. +Do not include any mention of harvesting or crafting activities. That is not the aim of this report and is self-evident as the agent can see its own inventory. +All structures from the previous report that did not have any updates, include them in the new report unchanged. Output the summary only, do not include any other information. @@ -193,6 +236,45 @@ def entity_summary_prompt(entities: str): """ +def planning_prompt(instruction: str, entity_summary: str, inventory: str): + return f""" +# Factorio LLM Agent Instructions + +## Overview +You are an AI agent designed to play Factorio, specializing in: +- Long-horizon planning +- Spatial reasoning +- Systematic automation + +## Game Progression +- Think about long term objectives, and break them down into smaller, manageable steps. +- Advance toward more complex automation +- Build on previous successes +- Maintain efficient resource usage + +## Important Notes +- Use transport belts to keep burners fed with coal +- Consider long-term implications of actions +- Maintain working systems, and clear entities that aren't working or don't have a clear purpose +- Build incrementally and verify each step +- Your inventory has space for ~2000 items. If it fills up, insert the items into a chest. +- Ensure that your factory is arranged in a grid, as this will make things easier. +- Try to assign a specific and clear role to each entity, and ensure that it is working as expected. Check if similar entities are already present on the map. If exists, try to reuse them or fix the issues with them. + +## Instruction +Given the existing entities on map, your current inventory. To achive the following goal, decide what is the next action to do. + +### Entities on map +{entity_summary} + +### Your current inventory +{inventory} + +### Your objective +{instruction} +""" + + def iteration_summary_prompt( instruction: str, entities: str, inventory: str, logs: str ): @@ -311,13 +393,12 @@ async def start_iteration( self, iteration: int, instruction: str, - previous_iteration_summary: str, ): self.formatter.start_iteration( iteration=iteration, instruction=instruction, - previous_iteration_summary=previous_iteration_summary, ) + self.instruction = instruction async def report_summary( self, @@ -326,20 +407,6 @@ async def report_summary( current_entities: str, current_conversation: Conversation, ): - # entity_summary_response = await self.llm_factory.acall( - # messages=[ - # { - # "role": "user", - # "content": entity_summary_prompt(entities), - # } - # ], - # n_samples=1, # We only need one program per iteration - # temperature=self.generation_params.temperature, - # max_tokens=16384, # use longer max_tokens - # model=self.generation_params.model, - # ) - # entity_summary = entity_summary_response.choices[0].message.content - instruction = "" iteration_messages = [] for message in current_conversation.messages: @@ -386,17 +453,50 @@ async def step( entities: str, inventory: str, ) -> Policy: + # 1. Generate entity summary + entity_summary_response = await self.llm_factory.acall( + messages=[ + { + "role": "user", + "content": entity_summary_prompt(entities), + } + ], + n_samples=1, # We only need one program per iteration + temperature=self.generation_params.temperature, + max_tokens=16384, # use longer max_tokens + model=self.generation_params.model, + ) + entity_summary = entity_summary_response.choices[0].message.content + + # 2. Generate plan + plan_response = await self.llm_factory.acall( + messages=[ + { + "role": "user", + "content": planning_prompt( + self.instruction, entity_summary, inventory + ), + } + ], + n_samples=1, # We only need one program per iteration + temperature=self.generation_params.temperature, + max_tokens=2048, # use longer max_tokens + model=self.generation_params.model, + ) + plan = plan_response.choices[0].message.content + # We format the conversation every N steps to add a context summary to the system prompt formatted_conversation = await self.formatter.format_conversation( conversation, namespace, entities, inventory, + plan, ) # We set the new conversation state for external use self.set_conversation(formatted_conversation) - return await self._get_policy(formatted_conversation) + return await self._get_policy(formatted_conversation, plan) @tenacity.retry( retry=retry_if_exception_type(Exception), @@ -404,7 +504,7 @@ async def step( before_sleep=my_before_sleep, stop=stop_after_attempt(3), ) - async def _get_policy(self, conversation: Conversation): + async def _get_policy(self, conversation: Conversation, plan: str): messages = self.formatter.to_llm_messages(conversation) with open("messages.json", "w") as f: @@ -419,6 +519,7 @@ async def _get_policy(self, conversation: Conversation): ) policy = parse_response(response) + policy.thinking = plan if not policy: raise Exception("Not a valid Python policy") diff --git a/freeplay/conversation_formatter.py b/freeplay/conversation_formatter.py index 1eb71dab2..4b2f7a237 100644 --- a/freeplay/conversation_formatter.py +++ b/freeplay/conversation_formatter.py @@ -13,44 +13,12 @@ from namespace import FactorioNamespace FINAL_INSTRUCTION = """" -## Step Input -You are given updated state of existing entities on map and your inventory at each step. -You are supposed to take a look at these information carefully to plan your next step. - -- You can place nothing but entities in your current inventory. If you don't have any entities in your inventory, you need to get them first by crafting, harvesting or smelting etc. -- Try to understand the role of each exsting entities on map. For example, one stone furnace might be used to smelt iron ore into iron plates, while another one might be used to smelt copper ore into copper plates, or to smelt iron plates into steel plates. -- In opposite, not-working entities have no use in the game. If you need to place some entities, you should first consider replacing existing ones. Example abundoned pipes or belts, not-working inserters, or empty chests. - ## Response Format - -### 1. PLANNING Stage -Think through each step extensively in natural language, addressing: -1. Error Analysis - - Was there an error in the previous execution? - - If yes, what was the problem? -2. Next Step Planning - - What is the most useful next step of reasonable size? - - Why is this step valuable? - - Should I -3. Action Planning - - What specific actions are needed? - - What resources are required? - -### 2. POLICY Stage Write Python code to execute the planned actions: ```python # Code must be enclosed in Python tags your_code_here ``` - -Your output should be in the following format: -[Planning] -your_planning_here - -[Policy] -```python -your_code_here -``` """ @@ -67,11 +35,9 @@ def start_iteration( self, iteration: int, instruction: str, - previous_iteration_summary: str, ): self.iteration = iteration self.instruction = instruction - self.previous_iteration_summary = previous_iteration_summary async def format_conversation( self, @@ -79,6 +45,7 @@ async def format_conversation( namespace: FactorioNamespace, current_entities: str, current_inventory: str, + plan: str, ) -> Conversation: """ conversations: @@ -100,9 +67,6 @@ async def format_conversation( updated_system_prompt = f""" {self.system_prompt} -## Previous Iteration Summary -{self.previous_iteration_summary} - {FINAL_INSTRUCTION} {self.instruction} @@ -120,16 +84,14 @@ async def format_conversation( Message( role="user", content=f""" -## Existing Entities on Map -{current_entities} +## Planned Actions +{plan} ## Your Inventory {current_inventory} -Remember that your python code must be always enclosed with ```python ... ``` decorator. It's very import for parsing your code. It you can't, you will be fired. - -Your output -[Planning]""", +[Policy] +""", ), ] ) diff --git a/freeplay/evaluator.py b/freeplay/evaluator.py index 6bdb0a413..503f7565e 100644 --- a/freeplay/evaluator.py +++ b/freeplay/evaluator.py @@ -66,6 +66,7 @@ async def evaluate( response, task_response ) conversation.add_result( + program.thinking, program.code, final_response, iteration=iteration, diff --git a/freeplay/trajectory_runner.py b/freeplay/trajectory_runner.py index bef491059..44e2de8f9 100644 --- a/freeplay/trajectory_runner.py +++ b/freeplay/trajectory_runner.py @@ -181,18 +181,18 @@ async def run(self): last_response = None # Run trajectory - STEPS_PER_ITERATION = 50 + STEPS_PER_ITERATION = 30 iteration = (depth // STEPS_PER_ITERATION) + 1 current_entities = f"{instance.namespace.get_entities()}" current_inventory = f"{instance.namespace.inspect_inventory()}" - (previous_iteration_summary,) = await self.agent.report_summary( - iteration=iteration, - current_inventory=current_inventory, - current_entities=current_entities, - current_conversation=current_conversation, - ) + # (previous_iteration_summary,) = await self.agent.report_summary( + # iteration=iteration, + # current_inventory=current_inventory, + # current_entities=current_entities, + # current_conversation=current_conversation, + # ) while True: iteration += 1 @@ -222,7 +222,6 @@ async def run(self): await self.agent.start_iteration( iteration=iteration, instruction=instruction, - previous_iteration_summary=previous_iteration_summary, ) update_spreadsheet_cell( @@ -369,19 +368,19 @@ async def run(self): current_entities = f"{instance.namespace.get_entities()}" current_inventory = f"{instance.namespace.inspect_inventory()}" - (previous_iteration_summary,) = await self.agent.report_summary( - iteration=iteration, - current_inventory=current_inventory, - current_entities=current_entities, - current_conversation=current_conversation, - ) - - if iteration_row_number: - update_spreadsheet_cell( - os.getenv("SPREADSHEET_ID"), - f"Iterations!G{iteration_row_number}", - previous_iteration_summary, - ) + # (previous_iteration_summary,) = await self.agent.report_summary( + # iteration=iteration, + # current_inventory=current_inventory, + # current_entities=current_entities, + # current_conversation=current_conversation, + # ) + + # if iteration_row_number: + # update_spreadsheet_cell( + # os.getenv("SPREADSHEET_ID"), + # f"Iterations!G{iteration_row_number}", + # previous_iteration_summary, + # ) elapsed = time.time() - self.start_time elapsed_str = f"{int(elapsed // 3600):02d}:{int((elapsed % 3600) // 60):02d}:{int(elapsed % 60):02d}" From b2deebabb212ded5acf8e235aea3fd18479b7a74 Mon Sep 17 00:00:00 2001 From: Ryunosuke Iwai Date: Tue, 18 Mar 2025 18:56:26 +0900 Subject: [PATCH 05/10] fix: prompt --- freeplay/conversation_formatter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/freeplay/conversation_formatter.py b/freeplay/conversation_formatter.py index 4b2f7a237..3609d9177 100644 --- a/freeplay/conversation_formatter.py +++ b/freeplay/conversation_formatter.py @@ -67,8 +67,6 @@ async def format_conversation( updated_system_prompt = f""" {self.system_prompt} -{FINAL_INSTRUCTION} - {self.instruction} """ @@ -84,13 +82,15 @@ async def format_conversation( Message( role="user", content=f""" +{FINAL_INSTRUCTION} + ## Planned Actions {plan} ## Your Inventory {current_inventory} -[Policy] +[Python code] """, ), ] From 4050335cd3abaca5c9cbc3bb011e2d4f0ce296ef Mon Sep 17 00:00:00 2001 From: Ryunosuke Iwai Date: Tue, 18 Mar 2025 18:57:27 +0900 Subject: [PATCH 06/10] log messages --- freeplay/trajectory_runner.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/freeplay/trajectory_runner.py b/freeplay/trajectory_runner.py index 44e2de8f9..5815ecc4e 100644 --- a/freeplay/trajectory_runner.py +++ b/freeplay/trajectory_runner.py @@ -361,6 +361,9 @@ async def run(self): current_state = program.state current_conversation = program.conversation + with open("messages.json", "w") as f: + json.dump(current_conversation.messages, f, indent=2) + except Exception as e: print(f"Error in Step {iteration}-{step + 1}: {e}") continue From 85b760b3b63dbdd2e7eae1ac137c9a298edc773a Mon Sep 17 00:00:00 2001 From: Ryunosuke Iwai Date: Tue, 18 Mar 2025 19:19:29 +0900 Subject: [PATCH 07/10] add execution log --- freeplay/basic_agent.py | 90 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 4 deletions(-) diff --git a/freeplay/basic_agent.py b/freeplay/basic_agent.py index 0446037ac..1540248fb 100644 --- a/freeplay/basic_agent.py +++ b/freeplay/basic_agent.py @@ -236,8 +236,11 @@ def entity_summary_prompt(entities: str): """ -def planning_prompt(instruction: str, entity_summary: str, inventory: str): - return f""" +def planning_prompt( + execution_log: str, instruction: str, entity_summary: str, inventory: str +): + return ( + """ # Factorio LLM Agent Instructions ## Overview @@ -246,12 +249,67 @@ def planning_prompt(instruction: str, entity_summary: str, inventory: str): - Spatial reasoning - Systematic automation +## Environment Structure +- Operates like an interactive Python shell +- Agent messages = Python programs to execute +- User responses = STDOUT/STDERR from REPL +- Interacts through 27 core API methods (to be specified) + +## Understanding Output + +### Error Messages +```stderr +Error: 1: ("Initial Inventory: {...}") +10: ("Error occurred in following lines...") +``` +- Numbers indicate line of execution +- Previous lines executed successfully +- Fix errors at indicated line + +### Status Updates +```stdout +23: ('Resource collection completed...') +78: ('Entities on map: [...]') +``` +- Shows execution progress +- Provides entity status +- Lists warnings and conditions + +### Entity Status Checking +- Monitor entity `warnings` field +- Check entity `status` field +- Verify resource levels +- Track production states + ## Game Progression - Think about long term objectives, and break them down into smaller, manageable steps. - Advance toward more complex automation - Build on previous successes - Maintain efficient resource usage +## Data Structures +- Use Python's built-in data structures to organize entities +- Sets for unique entity collections: +```python +working_furnaces = {e for e in get_entities() + if e.status == EntityStatus.WORKING} +``` +- Dictionaries for entity mapping: +```python +furnace_by_position = { + (e.position.x, e.position.y): e + for e in get_entities() + if isinstance(e, Furnace) +} +``` +- Lists for ordered operations: +```python +sorted_furnaces = sorted( + get_entities(), + key=lambda e: (e.position.x, e.position.y) +) +``` + ## Important Notes - Use transport belts to keep burners fed with coal - Consider long-term implications of actions @@ -262,7 +320,21 @@ def planning_prompt(instruction: str, entity_summary: str, inventory: str): - Try to assign a specific and clear role to each entity, and ensure that it is working as expected. Check if similar entities are already present on the map. If exists, try to reuse them or fix the issues with them. ## Instruction -Given the existing entities on map, your current inventory. To achive the following goal, decide what is the next action to do. +Think through each step extensively in natural language, addressing: +1. Error Analysis + - Was there an error in the previous execution? + - If yes, what was the problem? +2. Next Step Planning + - What is the most useful next step of reasonable size? + - Why is this step valuable? + - Should I +3. Action Planning + - What specific actions are needed? + - What resources are required? +""" + + f""" +### Execution logs +{execution_log} ### Entities on map {entity_summary} @@ -273,6 +345,7 @@ def planning_prompt(instruction: str, entity_summary: str, inventory: str): ### Your objective {instruction} """ + ) def iteration_summary_prompt( @@ -469,12 +542,21 @@ async def step( entity_summary = entity_summary_response.choices[0].message.content # 2. Generate plan + iteration_messages = [] + for message in conversation.messages: + if message.metadata.get("iteration") == self.iteration: + iteration_messages.append(message) + + execution_log = "\n".join( + [f"role: {m.role}\ncontent: {m.content}\n" for m in iteration_messages] + ) + plan_response = await self.llm_factory.acall( messages=[ { "role": "user", "content": planning_prompt( - self.instruction, entity_summary, inventory + execution_log, self.instruction, entity_summary, inventory ), } ], From 9c732c573dcf5ab28e7075bdfc9db0b36c1ea3dd Mon Sep 17 00:00:00 2001 From: Ryunosuke Iwai Date: Tue, 18 Mar 2025 19:21:35 +0900 Subject: [PATCH 08/10] add iteration property --- freeplay/basic_agent.py | 1 + 1 file changed, 1 insertion(+) diff --git a/freeplay/basic_agent.py b/freeplay/basic_agent.py index 0446037ac..fc80819d2 100644 --- a/freeplay/basic_agent.py +++ b/freeplay/basic_agent.py @@ -398,6 +398,7 @@ async def start_iteration( iteration=iteration, instruction=instruction, ) + self.iteration = iteration self.instruction = instruction async def report_summary( From 6d8de7567067beed02fe4d771b52861fceec49d7 Mon Sep 17 00:00:00 2001 From: Ryunosuke Iwai Date: Tue, 18 Mar 2025 19:46:01 +0900 Subject: [PATCH 09/10] fix prompt --- agents/utils/parse_response.py | 10 +++- freeplay/basic_agent.py | 94 ++---------------------------- freeplay/conversation_formatter.py | 34 ++++++++++- 3 files changed, 47 insertions(+), 91 deletions(-) diff --git a/agents/utils/parse_response.py b/agents/utils/parse_response.py index 28f2510d0..274cc5f6e 100644 --- a/agents/utils/parse_response.py +++ b/agents/utils/parse_response.py @@ -23,6 +23,14 @@ def parse_response(response) -> Optional[Policy]: try: code, text_response = PythonParser.extract_code(choice) + splits = text_response.split("```python") + + thinking = "" + if len(splits) > 1: + thinking = splits[0] + thinking = thinking.replace("[Planning]", "") + thinking = thinking.replace("[Policy]", "") + except Exception as e: print(f"Failed to extract code from choice: {str(e)}") return None @@ -31,7 +39,7 @@ def parse_response(response) -> Optional[Policy]: return None policy = Policy( - thinking="", + thinking=thinking, code=code, meta=PolicyMeta( output_tokens=output_tokens, diff --git a/freeplay/basic_agent.py b/freeplay/basic_agent.py index 82a5d2968..12a590d12 100644 --- a/freeplay/basic_agent.py +++ b/freeplay/basic_agent.py @@ -236,11 +236,8 @@ def entity_summary_prompt(entities: str): """ -def planning_prompt( - execution_log: str, instruction: str, entity_summary: str, inventory: str -): - return ( - """ +def planning_prompt(instruction: str, entity_summary: str, inventory: str): + return f""" # Factorio LLM Agent Instructions ## Overview @@ -249,67 +246,12 @@ def planning_prompt( - Spatial reasoning - Systematic automation -## Environment Structure -- Operates like an interactive Python shell -- Agent messages = Python programs to execute -- User responses = STDOUT/STDERR from REPL -- Interacts through 27 core API methods (to be specified) - -## Understanding Output - -### Error Messages -```stderr -Error: 1: ("Initial Inventory: {...}") -10: ("Error occurred in following lines...") -``` -- Numbers indicate line of execution -- Previous lines executed successfully -- Fix errors at indicated line - -### Status Updates -```stdout -23: ('Resource collection completed...') -78: ('Entities on map: [...]') -``` -- Shows execution progress -- Provides entity status -- Lists warnings and conditions - -### Entity Status Checking -- Monitor entity `warnings` field -- Check entity `status` field -- Verify resource levels -- Track production states - ## Game Progression - Think about long term objectives, and break them down into smaller, manageable steps. - Advance toward more complex automation - Build on previous successes - Maintain efficient resource usage -## Data Structures -- Use Python's built-in data structures to organize entities -- Sets for unique entity collections: -```python -working_furnaces = {e for e in get_entities() - if e.status == EntityStatus.WORKING} -``` -- Dictionaries for entity mapping: -```python -furnace_by_position = { - (e.position.x, e.position.y): e - for e in get_entities() - if isinstance(e, Furnace) -} -``` -- Lists for ordered operations: -```python -sorted_furnaces = sorted( - get_entities(), - key=lambda e: (e.position.x, e.position.y) -) -``` - ## Important Notes - Use transport belts to keep burners fed with coal - Consider long-term implications of actions @@ -320,21 +262,8 @@ def planning_prompt( - Try to assign a specific and clear role to each entity, and ensure that it is working as expected. Check if similar entities are already present on the map. If exists, try to reuse them or fix the issues with them. ## Instruction -Think through each step extensively in natural language, addressing: -1. Error Analysis - - Was there an error in the previous execution? - - If yes, what was the problem? -2. Next Step Planning - - What is the most useful next step of reasonable size? - - Why is this step valuable? - - Should I -3. Action Planning - - What specific actions are needed? - - What resources are required? -""" - + f""" -### Execution logs -{execution_log} +You are given the existing entities on map, your current inventory. +Your job is to plan a medium-term strategy to achieve the given task. ### Entities on map {entity_summary} @@ -345,7 +274,6 @@ def planning_prompt( ### Your objective {instruction} """ - ) def iteration_summary_prompt( @@ -471,7 +399,6 @@ async def start_iteration( iteration=iteration, instruction=instruction, ) - self.iteration = iteration self.instruction = instruction async def report_summary( @@ -543,21 +470,12 @@ async def step( entity_summary = entity_summary_response.choices[0].message.content # 2. Generate plan - iteration_messages = [] - for message in conversation.messages: - if message.metadata.get("iteration") == self.iteration: - iteration_messages.append(message) - - execution_log = "\n".join( - [f"role: {m.role}\ncontent: {m.content}\n" for m in iteration_messages] - ) - plan_response = await self.llm_factory.acall( messages=[ { "role": "user", "content": planning_prompt( - execution_log, self.instruction, entity_summary, inventory + self.instruction, entity_summary, inventory ), } ], @@ -602,7 +520,7 @@ async def _get_policy(self, conversation: Conversation, plan: str): ) policy = parse_response(response) - policy.thinking = plan + policy.thinking = plan + "\n\n" + policy.thinking if not policy: raise Exception("Not a valid Python policy") diff --git a/freeplay/conversation_formatter.py b/freeplay/conversation_formatter.py index 3609d9177..c3ffa2529 100644 --- a/freeplay/conversation_formatter.py +++ b/freeplay/conversation_formatter.py @@ -13,12 +13,38 @@ from namespace import FactorioNamespace FINAL_INSTRUCTION = """" +Based on the given medium-term strategy, your task is to generate policy code executing actual actions. +Given the execution logs as conversation, existing entities, inventory content and the current plan, decide on the next steps and write Python code to execute them. + ## Response Format + +### 1. PLANNING Stage +Think through each step extensively in natural language, addressing: +1. Error Analysis + - Was there an error in the previous execution? + - If yes, what was the problem? +2. Next Step Planning + - What is the most useful next step of reasonable size? + - Why is this step valuable? + - Should I +3. Action Planning + - What specific actions are needed? + - What resources are required? + +### 2. POLICY Stage Write Python code to execute the planned actions: ```python # Code must be enclosed in Python tags your_code_here ``` + +Your output should be in the following format: +[Planning] +your_planning_here +[Policy] +```python +your_code_here +``` """ @@ -84,13 +110,17 @@ async def format_conversation( content=f""" {FINAL_INSTRUCTION} -## Planned Actions +## Medium-Term Strategy {plan} +## Entities on the Map +{current_entities} + ## Your Inventory {current_inventory} -[Python code] +Your Output: +[Planning] """, ), ] From d088582db73c2b3966bb2cebb28fb926e661ef5c Mon Sep 17 00:00:00 2001 From: Ryunosuke Iwai Date: Tue, 18 Mar 2025 20:07:36 +0900 Subject: [PATCH 10/10] enable previous iteration summary --- freeplay/basic_agent.py | 59 +++--------------------------- freeplay/conversation_formatter.py | 7 +++- freeplay/trajectory_runner.py | 39 ++++++++++---------- 3 files changed, 31 insertions(+), 74 deletions(-) diff --git a/freeplay/basic_agent.py b/freeplay/basic_agent.py index 12a590d12..cba81ff5f 100644 --- a/freeplay/basic_agent.py +++ b/freeplay/basic_agent.py @@ -286,51 +286,9 @@ def iteration_summary_prompt( - Systematic automation ## Instruction -You are given current existing entities, inventory state and logs you have executed in the game, during the previous interation. -You have the following instruction from supervisor: - -[Task] -Build a power plant, consisting of a offshore pomp, boiler, and steam engine. - -[Hints From Supervisor] -- You need to prepare enough iron and copper plates first to craft facilities - -Based on the inventory state and execution logs, you must generate a report of the previous iteration. -The report must have 3 sections: CHANGES, TASK COMPLETION ANALYSIS and ERROR TIPS. Below are instructions for both of them: - -CHANGES -Describe what is done duration the iteration. -- Newly built facilities with position -- Obtained items -- Working status changes of facilities - -Example: -In the previous iteration, -- we built burner mining drill at position(x1). It is supplying iron ores to stone furnace nearby at position(x2). There iron ores are smelted into iron plates, and stored into a wooden chest at position(x3) by a burner inserter at position(x4). -- now we have boiler and steam engine in the inventory, so we can place them in the neighbor of existing offshore pomp at position(x5) to build power plant! -- The burner drill at position(x6) was not working due to insufficient fuel. I fixed the issue by feeding some coals. Because we have no automated coal supplies, I should feed them manually for a while when it is out of fuel. - -TASK COMPLETION ANALYSIS -Analyze how is the task is going, given existing entities, inventory state and execution logs. -If the given task is completed, you should summarize: -- the entities related to the task, its status and positions -- notes useful for the following actions - -If the task is not completed yet, you should summarize: -- the remaining steps planned -- difficulties or obstacles you are facing -- required items to complete the task - -Example: -We have not yet built complete the task of building power plant. -As the remaining steps, we need: -- Get enough amount of iron and copper plates to craft offshore pomp, boiler and steam engine. We need more 30 iron plates and 3 copper plates. -- Craft the entities -- Connect them with pipes - -To get iron and copper plates, we can't craft them and need to smelt ores through furnaces. -I have already built stone furnace for iron plates, but one for copper plates are not yet prepared. -Next we need to build a stone furnace for copper ones. At the same time, coals and ores should be fed into the stone furnace of iron plates to get iron plates constantly. +You are given execution logs you have executed in the game, during the previous interation. +Based on the execution logs, you must generate a report of the previous iteration. +The report must have ERROR_TIPS section. Below is the structure: ERROR TIPS In this section you must analyse the errors that the agent has made and bring out tips how to mitigate these errors. @@ -349,15 +307,6 @@ def iteration_summary_prompt( You must output only the report. Any other texts are forbidden. -## Instruction -{instruction} - -## Entities -{entities} - -## Inventory -{inventory} - ## Execution Logs {logs} @@ -394,10 +343,12 @@ async def start_iteration( self, iteration: int, instruction: str, + previous_iteration_summary: str, ): self.formatter.start_iteration( iteration=iteration, instruction=instruction, + previous_iteration_summary=previous_iteration_summary, ) self.instruction = instruction diff --git a/freeplay/conversation_formatter.py b/freeplay/conversation_formatter.py index c3ffa2529..30d9fa20a 100644 --- a/freeplay/conversation_formatter.py +++ b/freeplay/conversation_formatter.py @@ -61,9 +61,11 @@ def start_iteration( self, iteration: int, instruction: str, + previous_iteration_summary: str, ): self.iteration = iteration self.instruction = instruction + self.previous_iteration_summary = previous_iteration_summary async def format_conversation( self, @@ -109,7 +111,10 @@ async def format_conversation( role="user", content=f""" {FINAL_INSTRUCTION} - + +## Learnings from Previous Iteration +{self.previous_iteration_summary} + ## Medium-Term Strategy {plan} diff --git a/freeplay/trajectory_runner.py b/freeplay/trajectory_runner.py index 5815ecc4e..f7501a808 100644 --- a/freeplay/trajectory_runner.py +++ b/freeplay/trajectory_runner.py @@ -187,12 +187,12 @@ async def run(self): current_entities = f"{instance.namespace.get_entities()}" current_inventory = f"{instance.namespace.inspect_inventory()}" - # (previous_iteration_summary,) = await self.agent.report_summary( - # iteration=iteration, - # current_inventory=current_inventory, - # current_entities=current_entities, - # current_conversation=current_conversation, - # ) + (previous_iteration_summary,) = await self.agent.report_summary( + iteration=iteration, + current_inventory=current_inventory, + current_entities=current_entities, + current_conversation=current_conversation, + ) while True: iteration += 1 @@ -222,6 +222,7 @@ async def run(self): await self.agent.start_iteration( iteration=iteration, instruction=instruction, + previous_iteration_summary=previous_iteration_summary, ) update_spreadsheet_cell( @@ -371,19 +372,19 @@ async def run(self): current_entities = f"{instance.namespace.get_entities()}" current_inventory = f"{instance.namespace.inspect_inventory()}" - # (previous_iteration_summary,) = await self.agent.report_summary( - # iteration=iteration, - # current_inventory=current_inventory, - # current_entities=current_entities, - # current_conversation=current_conversation, - # ) - - # if iteration_row_number: - # update_spreadsheet_cell( - # os.getenv("SPREADSHEET_ID"), - # f"Iterations!G{iteration_row_number}", - # previous_iteration_summary, - # ) + (previous_iteration_summary,) = await self.agent.report_summary( + iteration=iteration, + current_inventory=current_inventory, + current_entities=current_entities, + current_conversation=current_conversation, + ) + + if iteration_row_number: + update_spreadsheet_cell( + os.getenv("SPREADSHEET_ID"), + f"Iterations!G{iteration_row_number}", + previous_iteration_summary, + ) elapsed = time.time() - self.start_time elapsed_str = f"{int(elapsed // 3600):02d}:{int((elapsed % 3600) // 60):02d}:{int(elapsed % 60):02d}"