diff --git a/env/src/models/conversation.py b/env/src/models/conversation.py index 0754d1657..91c7ab489 100644 --- a/env/src/models/conversation.py +++ b/env/src/models/conversation.py @@ -7,17 +7,23 @@ class Conversation(BaseModel): """Tracks dialogue between LLM and Factorio""" + messages: List[Message] = Field(default_factory=list) @classmethod - def parse_raw(cls, data: Dict[str, Any]) -> 'Conversation': - messages = [Message(**msg) if isinstance(msg, dict) else msg - for msg in data['messages']] + def parse_raw(cls, data: Dict[str, Any]) -> "Conversation": + messages = [ + Message(**msg) if isinstance(msg, dict) else msg for msg in data["messages"] + ] return cls(messages=messages) - def add_result(self, program: str, response: str, **kwargs): + def add_result(self, thinking: str, program: str, response: str, **kwargs): """Add program execution result to conversation""" - self.messages.append(Message(role="assistant", content=program, metadata=kwargs)) + self.messages.append( + Message( + role="assistant", + content=f'"""\n{thinking}\n"""\n\n{program}', + metadata=kwargs, + ) + ) self.messages.append(Message(role="user", content=response, metadata=kwargs)) - - diff --git a/freeplay/basic_agent.py b/freeplay/basic_agent.py index a69c64427..cba81ff5f 100644 --- a/freeplay/basic_agent.py +++ b/freeplay/basic_agent.py @@ -155,11 +155,36 @@ def find_idle_furnaces(entities): - Do not encapsulate your code in a function _unless_ you are writing a utility for future use - just write it as if you were typing directly into the Python interpreter. - Your inventory has space for ~2000 items. If it fills up, insert the items into a chest. - Ensure that your factory is arranged in a grid, as this will make things easier. +- Try to assign a specific and clear role to each entity, and ensure that it is working as expected. Check if similar entities are already present on the map. If exists, try to reuse them or fix the issues with them. """ def entity_summary_prompt(entities: str): return f""" +# Factorio LLM Agent Instructions + +## Overview +You are an AI agent designed to play Factorio, specializing in: +- Long-horizon planning +- Spatial reasoning +- Systematic automation + +## Game Progression +- Think about long term objectives, and break them down into smaller, manageable steps. +- Advance toward more complex automation +- Build on previous successes +- Maintain efficient resource usage + +## Important Notes +- Use transport belts to keep burners fed with coal +- Consider long-term implications of actions +- Maintain working systems, and clear entities that aren't working or don't have a clear purpose +- Build incrementally and verify each step +- Your inventory has space for ~2000 items. If it fills up, insert the items into a chest. +- Ensure that your factory is arranged in a grid, as this will make things easier. +- Try to assign a specific and clear role to each entity, and ensure that it is working as expected. Check if similar entities are already present on the map. If exists, try to reuse them or fix the issues with them. + +## Instruction You are a report generating model for the game factorio. Given existing entities, you must summarise what structures the agent has created on the map and what are the use-cases of those structures. You must also bring out the entities and positions of entities of each of those structures. @@ -167,21 +192,40 @@ def entity_summary_prompt(entities: str): ###Electricity generator at position(x) Consists of steam engine(position x), boiler(position y) and offshore pump (position z) +Role: +- Generator produces electricity by burning fuel. It supplies electricity to nearby entities through electric poles. + +Issues: +- It is working as expected +- However, the fuel supply is not automated. We need to automate the coal supply to the boiler occasionally. + ###Copper plate mine at position(x) Consists of following entities - Burner mining drill (position x1) and a furnace at position(y1) - Burner mining drill (position x2) and a furnace at position(y2) - Burner mining drill (position x3) and a furnace at position(y3) +Role: +- Mines copper ore and smelts it into copper plates + +Issues: +- The burner mining drill at position x3 is not working due to lack of fuel. We need to supply coal to it. + ###Copper cable factory Consists of following entities - Burner mining drill (position x1) and a furnace at position(y1) - Assembling machine at position(z1) and inserter at position(a) that puts into assembling machine - Beltgroup (position ) that connects the furnace at position y1 to assembling machine at position(z1) -- If multiple sections are connected, summarise them as one structure -- Do not include any mention of harvesting or crafting activities. That is not the aim of this report and is self-evident as the agent can see its own inventory -- All structures from the previous report that did not have any updates, include them in the new report unchanged +Role: +- Produces copper cables from copper plates + +Issues: +- No issues. It is working as expected. + +If multiple sections are connected, summarise them as one structure. +Do not include any mention of harvesting or crafting activities. That is not the aim of this report and is self-evident as the agent can see its own inventory. +All structures from the previous report that did not have any updates, include them in the new report unchanged. Output the summary only, do not include any other information. @@ -192,6 +236,46 @@ def entity_summary_prompt(entities: str): """ +def planning_prompt(instruction: str, entity_summary: str, inventory: str): + return f""" +# Factorio LLM Agent Instructions + +## Overview +You are an AI agent designed to play Factorio, specializing in: +- Long-horizon planning +- Spatial reasoning +- Systematic automation + +## Game Progression +- Think about long term objectives, and break them down into smaller, manageable steps. +- Advance toward more complex automation +- Build on previous successes +- Maintain efficient resource usage + +## Important Notes +- Use transport belts to keep burners fed with coal +- Consider long-term implications of actions +- Maintain working systems, and clear entities that aren't working or don't have a clear purpose +- Build incrementally and verify each step +- Your inventory has space for ~2000 items. If it fills up, insert the items into a chest. +- Ensure that your factory is arranged in a grid, as this will make things easier. +- Try to assign a specific and clear role to each entity, and ensure that it is working as expected. Check if similar entities are already present on the map. If exists, try to reuse them or fix the issues with them. + +## Instruction +You are given the existing entities on map, your current inventory. +Your job is to plan a medium-term strategy to achieve the given task. + +### Entities on map +{entity_summary} + +### Your current inventory +{inventory} + +### Your objective +{instruction} +""" + + def iteration_summary_prompt( instruction: str, entities: str, inventory: str, logs: str ): @@ -202,51 +286,9 @@ def iteration_summary_prompt( - Systematic automation ## Instruction -You are given current existing entities, inventory state and logs you have executed in the game, during the previous interation. -You have the following instruction from supervisor: - -[Task] -Build a power plant, consisting of a offshore pomp, boiler, and steam engine. - -[Hints From Supervisor] -- You need to prepare enough iron and copper plates first to craft facilities - -Based on the inventory state and execution logs, you must generate a report of the previous iteration. -The report must have 3 sections: CHANGES, TASK COMPLETION ANALYSIS and ERROR TIPS. Below are instructions for both of them: - -CHANGES -Describe what is done duration the iteration. -- Newly built facilities with position -- Obtained items -- Working status changes of facilities - -Example: -In the previous iteration, -- we built burner mining drill at position(x1). It is supplying iron ores to stone furnace nearby at position(x2). There iron ores are smelted into iron plates, and stored into a wooden chest at position(x3) by a burner inserter at position(x4). -- now we have boiler and steam engine in the inventory, so we can place them in the neighbor of existing offshore pomp at position(x5) to build power plant! -- The burner drill at position(x6) was not working due to insufficient fuel. I fixed the issue by feeding some coals. Because we have no automated coal supplies, I should feed them manually for a while when it is out of fuel. - -TASK COMPLETION ANALYSIS -Analyze how is the task is going, given existing entities, inventory state and execution logs. -If the given task is completed, you should summarize: -- the entities related to the task, its status and positions -- notes useful for the following actions - -If the task is not completed yet, you should summarize: -- the remaining steps planned -- difficulties or obstacles you are facing -- required items to complete the task - -Example: -We have not yet built complete the task of building power plant. -As the remaining steps, we need: -- Get enough amount of iron and copper plates to craft offshore pomp, boiler and steam engine. We need more 30 iron plates and 3 copper plates. -- Craft the entities -- Connect them with pipes - -To get iron and copper plates, we can't craft them and need to smelt ores through furnaces. -I have already built stone furnace for iron plates, but one for copper plates are not yet prepared. -Next we need to build a stone furnace for copper ones. At the same time, coals and ores should be fed into the stone furnace of iron plates to get iron plates constantly. +You are given execution logs you have executed in the game, during the previous interation. +Based on the execution logs, you must generate a report of the previous iteration. +The report must have ERROR_TIPS section. Below is the structure: ERROR TIPS In this section you must analyse the errors that the agent has made and bring out tips how to mitigate these errors. @@ -265,15 +307,6 @@ def iteration_summary_prompt( You must output only the report. Any other texts are forbidden. -## Instruction -{instruction} - -## Entities -{entities} - -## Inventory -{inventory} - ## Execution Logs {logs} @@ -317,6 +350,7 @@ async def start_iteration( instruction=instruction, previous_iteration_summary=previous_iteration_summary, ) + self.instruction = instruction async def report_summary( self, @@ -325,20 +359,6 @@ async def report_summary( current_entities: str, current_conversation: Conversation, ): - # entity_summary_response = await self.llm_factory.acall( - # messages=[ - # { - # "role": "user", - # "content": entity_summary_prompt(entities), - # } - # ], - # n_samples=1, # We only need one program per iteration - # temperature=self.generation_params.temperature, - # max_tokens=16384, # use longer max_tokens - # model=self.generation_params.model, - # ) - # entity_summary = entity_summary_response.choices[0].message.content - instruction = "" iteration_messages = [] for message in current_conversation.messages: @@ -385,17 +405,50 @@ async def step( entities: str, inventory: str, ) -> Policy: + # 1. Generate entity summary + entity_summary_response = await self.llm_factory.acall( + messages=[ + { + "role": "user", + "content": entity_summary_prompt(entities), + } + ], + n_samples=1, # We only need one program per iteration + temperature=self.generation_params.temperature, + max_tokens=16384, # use longer max_tokens + model=self.generation_params.model, + ) + entity_summary = entity_summary_response.choices[0].message.content + + # 2. Generate plan + plan_response = await self.llm_factory.acall( + messages=[ + { + "role": "user", + "content": planning_prompt( + self.instruction, entity_summary, inventory + ), + } + ], + n_samples=1, # We only need one program per iteration + temperature=self.generation_params.temperature, + max_tokens=2048, # use longer max_tokens + model=self.generation_params.model, + ) + plan = plan_response.choices[0].message.content + # We format the conversation every N steps to add a context summary to the system prompt formatted_conversation = await self.formatter.format_conversation( conversation, namespace, entities, inventory, + plan, ) # We set the new conversation state for external use self.set_conversation(formatted_conversation) - return await self._get_policy(formatted_conversation) + return await self._get_policy(formatted_conversation, plan) @tenacity.retry( retry=retry_if_exception_type(Exception), @@ -403,7 +456,7 @@ async def step( before_sleep=my_before_sleep, stop=stop_after_attempt(3), ) - async def _get_policy(self, conversation: Conversation): + async def _get_policy(self, conversation: Conversation, plan: str): messages = self.formatter.to_llm_messages(conversation) with open("messages.json", "w") as f: @@ -418,6 +471,7 @@ async def _get_policy(self, conversation: Conversation): ) policy = parse_response(response) + policy.thinking = plan + "\n\n" + policy.thinking if not policy: raise Exception("Not a valid Python policy") diff --git a/freeplay/conversation_formatter.py b/freeplay/conversation_formatter.py index 15b5cbcc6..30d9fa20a 100644 --- a/freeplay/conversation_formatter.py +++ b/freeplay/conversation_formatter.py @@ -13,6 +13,9 @@ from namespace import FactorioNamespace FINAL_INSTRUCTION = """" +Based on the given medium-term strategy, your task is to generate policy code executing actual actions. +Given the execution logs as conversation, existing entities, inventory content and the current plan, decide on the next steps and write Python code to execute them. + ## Response Format ### 1. PLANNING Stage @@ -38,7 +41,6 @@ Your output should be in the following format: [Planning] your_planning_here - [Policy] ```python your_code_here @@ -71,6 +73,7 @@ async def format_conversation( namespace: FactorioNamespace, current_entities: str, current_inventory: str, + plan: str, ) -> Conversation: """ conversations: @@ -92,17 +95,6 @@ async def format_conversation( updated_system_prompt = f""" {self.system_prompt} -## Previous Iteration Summary -{self.previous_iteration_summary} - -## Existing Entities -{current_entities} - -## Current Inventory -{current_inventory} - -{FINAL_INSTRUCTION} - {self.instruction} """ @@ -117,7 +109,24 @@ async def format_conversation( + [ Message( role="user", - content="Your output\n[Planning]", + content=f""" +{FINAL_INSTRUCTION} + +## Learnings from Previous Iteration +{self.previous_iteration_summary} + +## Medium-Term Strategy +{plan} + +## Entities on the Map +{current_entities} + +## Your Inventory +{current_inventory} + +Your Output: +[Planning] +""", ), ] ) diff --git a/freeplay/evaluator.py b/freeplay/evaluator.py index 6bdb0a413..503f7565e 100644 --- a/freeplay/evaluator.py +++ b/freeplay/evaluator.py @@ -66,6 +66,7 @@ async def evaluate( response, task_response ) conversation.add_result( + program.thinking, program.code, final_response, iteration=iteration, diff --git a/freeplay/spreadsheet.py b/freeplay/spreadsheet.py index 1f5febf53..3dd17b324 100644 --- a/freeplay/spreadsheet.py +++ b/freeplay/spreadsheet.py @@ -112,6 +112,10 @@ def insert_to_spreadsheet(spreadsheet_id, range_name, values, max_retries=3): spreadsheet_id (str): スプレッドシートのID range_name (str): データを挿入する範囲(例:'Sheet1!A1:B2') values (list): 挿入するデータ(2次元配列) + + Returns: + tuple: (result, row_number) resultはAPIレスポンス、row_numberは挿入された最初の行番号 + エラー時は (None, None) """ for attempt in range(max_retries): try: @@ -142,7 +146,19 @@ def insert_to_spreadsheet(spreadsheet_id, range_name, values, max_retries=3): ) print(f"{len(values)} 行のデータを追加しました。") - return result + # updatedRangeから行番号を抽出 (例: 'Sheet1!A371:B371' -> 371) + updated_range = result.get("updates", {}).get("updatedRange", "") + row_number = None + if updated_range: + # 'Sheet1!A371:B371' から '371' を抽出 + try: + row_number = int( + "".join(filter(str.isdigit, updated_range.split(":")[0])) + ) + except (ValueError, IndexError): + print("行番号の抽出に失敗しました。") + + return result, row_number except HttpError as error: print(f"試行 {attempt + 1}/{max_retries} でエラーが発生しました: {error}") @@ -151,4 +167,4 @@ def insert_to_spreadsheet(spreadsheet_id, range_name, values, max_retries=3): time.sleep(60) else: print("最大リトライ回数に達しました。") - return None + return None, None diff --git a/freeplay/trajectory_runner.py b/freeplay/trajectory_runner.py index d7d04308f..f7501a808 100644 --- a/freeplay/trajectory_runner.py +++ b/freeplay/trajectory_runner.py @@ -181,21 +181,22 @@ async def run(self): last_response = None # Run trajectory - STEPS_PER_ITERATION = 20 + STEPS_PER_ITERATION = 30 iteration = (depth // STEPS_PER_ITERATION) + 1 + current_entities = f"{instance.namespace.get_entities()}" + current_inventory = f"{instance.namespace.inspect_inventory()}" + + (previous_iteration_summary,) = await self.agent.report_summary( + iteration=iteration, + current_inventory=current_inventory, + current_entities=current_entities, + current_conversation=current_conversation, + ) + while True: iteration += 1 print(f"### Iteration {iteration} ###") - current_entities = f"{instance.namespace.get_entities()}" - current_inventory = f"{instance.namespace.inspect_inventory()}" - - (previous_iteration_summary,) = await self.agent.report_summary( - iteration=iteration - 1, - current_inventory=current_inventory, - current_entities=current_entities, - current_conversation=current_conversation, - ) update_spreadsheet_cell( os.getenv("SPREADSHEET_ID"), @@ -230,8 +231,11 @@ async def run(self): f"[Iteration {iteration}] LLM実行中...", ) + current_entities = f"{instance.namespace.get_entities()}" + current_inventory = f"{instance.namespace.inspect_inventory()}" + # Save results to spreadsheet - insert_to_spreadsheet( + (_, iteration_row_number) = insert_to_spreadsheet( os.getenv("SPREADSHEET_ID"), "Iterations!A1:Z", [ @@ -242,7 +246,6 @@ async def run(self): instruction, current_entities, current_inventory, - previous_iteration_summary, ], ], ) @@ -268,6 +271,25 @@ async def run(self): f"Step {iteration}-{step + 1}" ) + # Save results to spreadsheet + (_, step_row_number) = insert_to_spreadsheet( + os.getenv("SPREADSHEET_ID"), + "Steps!A1:Z", + [ + [ + self.config.version, + self.config.model, + iteration, + step + 1, + program.depth // 2, + current_entities, + current_inventory, + program.thinking, + program.code, + ] + ], + ) + if not program: continue @@ -328,34 +350,42 @@ async def run(self): parent_id = saved_program.id - # Save results to spreadsheet - insert_to_spreadsheet( - os.getenv("SPREADSHEET_ID"), - "Steps!A1:Z", - [ - [ - self.config.version, - self.config.model, - iteration, - step, - current_entities, - current_inventory, - program.thinking, - program.code, - program.response, - ] - ], - ) + if step_row_number: + update_spreadsheet_cell( + os.getenv("SPREADSHEET_ID"), + f"Steps!J{step_row_number}", + program.response, + ) # Update state for next iteration if program.state: current_state = program.state current_conversation = program.conversation + with open("messages.json", "w") as f: + json.dump(current_conversation.messages, f, indent=2) + except Exception as e: print(f"Error in Step {iteration}-{step + 1}: {e}") continue + current_entities = f"{instance.namespace.get_entities()}" + current_inventory = f"{instance.namespace.inspect_inventory()}" + + (previous_iteration_summary,) = await self.agent.report_summary( + iteration=iteration, + current_inventory=current_inventory, + current_entities=current_entities, + current_conversation=current_conversation, + ) + + if iteration_row_number: + update_spreadsheet_cell( + os.getenv("SPREADSHEET_ID"), + f"Iterations!G{iteration_row_number}", + previous_iteration_summary, + ) + elapsed = time.time() - self.start_time elapsed_str = f"{int(elapsed // 3600):02d}:{int((elapsed % 3600) // 60):02d}:{int(elapsed % 60):02d}" print(