From 708696b599e38fd041a693d470647833332db45b Mon Sep 17 00:00:00 2001
From: Alessandro Bouchs <abouchs@ravenpack.com>
Date: Wed, 19 Nov 2025 18:15:06 +0000
Subject: [PATCH 1/2] initial commit for entity search and label

---
 .../labeler/entity_labeler.py                 | 404 ++++++++++++++++++
 .../mindmap/mindmap_utils.py                  | 141 ++++++
 src/bigdata_research_tools/prompts/labeler.py | 237 ++++++++++
 .../search/entities_search.py                 | 325 ++++++++++++++
 .../search/narrative_search.py                |  17 +
 .../search/screener_search.py                 |   7 +
 6 files changed, 1131 insertions(+)
 create mode 100644 src/bigdata_research_tools/labeler/entity_labeler.py
 create mode 100644 src/bigdata_research_tools/search/entities_search.py
diff --git a/src/bigdata_research_tools/labeler/entity_labeler.py b/src/bigdata_research_tools/labeler/entity_labeler.py
new file mode 100644
index 0000000..a5a9077
--- /dev/null
+++ b/src/bigdata_research_tools/labeler/entity_labeler.py
@@ -0,0 +1,404 @@
+from logging import Logger, getLogger
+from typing import Any
+
+from pandas import DataFrame, Series
+
+from bigdata_research_tools.labeler.labeler import Labeler
+from bigdata_research_tools.llm.base import LLMConfig
+from bigdata_research_tools.prompts.labeler import (
+    get_other_entity_placeholder,
+    get_entity_risk_system_prompt,
+    get_entity_theme_system_prompt,
+    get_target_entity_placeholder,
+)
+
+logger: Logger = getLogger(__name__)
+
+class EntityRiskLabeler(Labeler):
+    def __init__(
+        self,
+        llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini",
+        label_prompt: str | None = None,
+        # TODO (cpinto, 2025.02.07) This value is also in the prompt used.
+        #  Changing it here would break the process.
+        unknown_label: str = "unclear",
+    ):
+        """
+        Args:
+            llm_model: Name of the LLM model to use. Expected format:
+                <provider>::<model>, e.g. "openai::gpt-4o-mini"
+            label_prompt: Prompt provided by user to label the search result chunks.
+                If not provided, then our default labelling prompt is used.
+            unknown_label: Label for unclear classifications
+        """
+        super().__init__(llm_model_config, unknown_label)
+        self.label_prompt = label_prompt
+
+    def get_labels(
+        self,
+        main_theme: str,
+        labels: list[str],
+        texts: list[str],
+        max_workers: int = 50,
+        timeout: int | None = 55,
+        textsconfig: list[dict[str, Any]] | None = None,
+    ) -> DataFrame:
+        """
+        Process thematic labels for texts.
+
+        Args:
+            main_theme: The main theme to analyze.
+            labels: Labels for labelling the chunks.
+            texts: List of chunks to label.
+            timeout: Timeout for each LLM request.
+            max_workers: Maximum number of concurrent workers.
+
+        Returns:
+            DataFrame with schema:
+            - index: sentence_id
+            - columns:
+                - motivation
+                - label
+        """
+        system_prompt = (
+            get_entity_risk_system_prompt(main_theme, labels)
+            if self.label_prompt is None
+            else self.label_prompt
+        )
+
+        prompts = self.get_prompts_for_labeler(texts, textsconfig)
+
+        responses = self._run_labeling_prompts(
+            prompts,
+            system_prompt,
+            max_workers=max_workers,
+            timeout=timeout,
+            processing_callbacks=[
+                self.parse_labeling_response,
+                self._deserialize_label_response,
+            ],
+        )
+
+        return self._convert_to_label_df(responses)
+    
+    def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_columns: list[str]) -> DataFrame:
+            """
+            Post-process the labeled DataFrame.
+
+            Args:
+                df: DataFrame to process. Schema:
+                    - Index: int
+                    - Columns:
+                        - timestamp_utc: datetime64
+                        - document_id: str
+                        - sentence_id: str
+                        - headline: str
+                        - entity_id: str
+                        - entity_name: str
+                        - entity_country: str
+                        - text: str
+                        - other_entities: str
+                        - entities: List[Dict[str, Any]]
+                            - key: str
+                            - name: str
+                            - start: int
+                            - end: int
+                        - masked_text: str
+                        - other_entities_map: List[Tuple[int, str]]
+                        - label: str
+                        - motivation: str
+            Returns:
+                Processed DataFrame. Schema:
+                - index: int
+                - Columns:
+                    - Time Period
+                    - Date
+                    - Entity
+                    - Country
+                    - Document ID
+                    - Headline
+                    - Quote
+                    - Motivation
+                    - Theme
+                    - Sentiment
+            """
+            # Filter unlabeled sentences
+            df = df.loc[df["label"] != "unclear"].copy()
+            if df.empty:
+                print(f"Empty dataframe: all rows labelled unclear")
+                return df
+
+            # Process timestamps
+            df["timestamp_utc"] = df["timestamp_utc"].dt.tz_localize(None)
+
+            # Sort and format
+            sort_columns = ["entity_name", "timestamp_utc", "label"]
+            df = df.sort_values(by=sort_columns).reset_index(drop=True)
+
+            # Replace company placeholders
+            df["motivation"] = df.apply(replace_company_placeholders, axis=1)
+
+            # Add formatted columns
+            df["Time Period"] = df["timestamp_utc"].dt.strftime("%b %Y")
+            df["Date"] = df["timestamp_utc"].dt.strftime("%Y-%m-%d")
+
+            df["Document ID"] = df["document_id"] if "document_id" in df.columns else df["rp_document_id"]
+            
+            columns_map = {
+                    "entity_name": "Entity",
+                    "entity_country": "Country",
+                    "headline": "Headline",
+                    "text": "Quote",
+                    "bigdata_sentiment": "Bigdata Sentiment",
+                    "sentiment": "Sentiment",
+                    "motivation": "Motivation",
+                    "label": "Sub-Scenario",
+                    "other_entities_name": "Other Entities",
+                    "other_entities_id": "Other Entities IDs",
+                    "other_entities_type": "Other Entities Types",
+                }
+
+            if 'entity_sentiment' in df.columns:
+                columns_map.update({
+                    "entity_sentiment": "Entity Sentiment",
+                    "entity_text_sentiment": "Entity Text Sentiment"
+                })
+
+            if extra_fields:
+                columns_map.update(extra_fields)
+                if "quotes" in extra_fields.keys():
+                    if "quotes" in df.columns:
+                        df["quotes"] = df.apply(replace_company_placeholders, axis=1, col_name = 'quotes')
+                    else:
+                        print("quotes column not in df")
+
+            df = df.rename(
+                columns=columns_map
+            )
+
+            # Select and order columns
+            export_columns = [
+                "Time Period",
+                "Date",
+                "Entity",
+                "Country",
+                "Document ID",
+                "Headline",
+                "Quote",
+                "Sentiment",
+                "Bigdata Sentiment",
+                "Motivation",
+                "Sub-Scenario",
+                "Other Entities",
+                "Other Entities IDs",
+                "Other Entities Types"
+            ]
+
+            if 'Entity Sentiment' in df.columns:
+                print("Including entity sentiment columns in export")
+                export_columns += ["Entity Sentiment", "Entity Text Sentiment"]
+
+            if extra_columns:
+                export_columns += extra_columns
+
+            return df[export_columns]
+    
+class EntityScreenerLabeler(Labeler):
+    def __init__(
+        self,
+        llm_model_config: str | LLMConfig | dict = "openai::gpt-4o-mini",
+        label_prompt: str | None = None,
+        # TODO (cpinto, 2025.02.07) This value is also in the prompt used.
+        #  Changing it here would break the process.
+        unknown_label: str = "unclear",
+    ):
+        """
+        Args:
+            llm_model: Name of the LLM model to use. Expected format:
+                <provider>::<model>, e.g. "openai::gpt-4o-mini"
+            label_prompt: Prompt provided by user to label the search result chunks.
+                If not provided, then our default labelling prompt is used.
+            unknown_label: Label for unclear classifications
+        """
+        super().__init__(llm_model_config, unknown_label)
+        self.label_prompt = label_prompt
+
+    def get_labels(
+        self,
+        main_theme: str,
+        labels: list[str],
+        texts: list[str],
+        max_workers: int = 50,
+        timeout: int | None = 55,
+        textsconfig: list[dict[str, Any]] | None = None,
+    ) -> DataFrame:
+        """
+        Process thematic labels for texts.
+
+        Args:
+            main_theme: The main theme to analyze.
+            labels: Labels for labelling the chunks.
+            texts: List of chunks to label.
+            timeout: Timeout for each LLM request.
+            max_workers: Maximum number of concurrent workers.
+
+        Returns:
+            DataFrame with schema:
+            - index: sentence_id
+            - columns:
+                - motivation
+                - label
+        """
+        system_prompt = (
+            get_entity_theme_system_prompt(main_theme, labels)
+            if self.label_prompt is None
+            else self.label_prompt
+        )
+
+        prompts = self.get_prompts_for_labeler(texts, textsconfig)
+
+        responses = self._run_labeling_prompts(
+            prompts,
+            system_prompt,
+            max_workers=max_workers,
+            timeout=timeout,
+            processing_callbacks=[
+                self.parse_labeling_response,
+                self._deserialize_label_response,
+            ],
+        )
+
+        return self._convert_to_label_df(responses)
+    
+    def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_columns: list[str]) -> DataFrame:
+            """
+            Post-process the labeled DataFrame.
+
+            Args:
+                df: DataFrame to process. Schema:
+                    - Index: int
+                    - Columns:
+                        - timestamp_utc: datetime64
+                        - document_id: str
+                        - sentence_id: str
+                        - headline: str
+                        - entity_id: str
+                        - entity_name: str
+                        - entity_country: str
+                        - text: str
+                        - other_entities: str
+                        - entities: List[Dict[str, Any]]
+                            - key: str
+                            - name: str
+                            - start: int
+                            - end: int
+                        - masked_text: str
+                        - other_entities_map: List[Tuple[int, str]]
+                        - label: str
+                        - motivation: str
+            Returns:
+                Processed DataFrame. Schema:
+                - index: int
+                - Columns:
+                    - Time Period
+                    - Date
+                    - Entity
+                    - Country
+                    - Document ID
+                    - Headline
+                    - Quote
+                    - Motivation
+                    - Theme
+                    - Sentiment
+            """
+            # Filter unlabeled sentences
+            df = df.loc[df["label"] != "unclear"].copy()
+            if df.empty:
+                print(f"Empty dataframe: all rows labelled unclear")
+                return df
+
+            # Process timestamps
+            df["timestamp_utc"] = df["timestamp_utc"].dt.tz_localize(None)
+
+            # Sort and format
+            sort_columns = ["entity_name", "timestamp_utc", "label"]
+            df = df.sort_values(by=sort_columns).reset_index(drop=True)
+
+            # Replace company placeholders
+            df["motivation"] = df.apply(replace_company_placeholders, axis=1)
+
+            # Add formatted columns
+            df["Time Period"] = df["timestamp_utc"].dt.strftime("%b %Y")
+            df["Date"] = df["timestamp_utc"].dt.strftime("%Y-%m-%d")
+
+            df["Document ID"] = df["document_id"] if "document_id" in df.columns else df["rp_document_id"]
+            
+            columns_map = {
+                    "entity_name": "Entity",
+                    "entity_country": "Country",
+                    "headline": "Headline",
+                    "text": "Quote",
+                    "bigdata_sentiment": "Bigdata Sentiment",
+                    "sentiment": "Sentiment",
+                    "motivation": "Motivation",
+                    "label": "Theme",
+                    "other_entities_name": "Other Entities",
+                    "other_entities_id": "Other Entities IDs",
+                    "other_entities_type": "Other Entities Types",
+                }
+
+            if extra_fields:
+                columns_map.update(extra_fields)
+                if "quotes" in extra_fields.keys():
+                    if "quotes" in df.columns:
+                        df["quotes"] = df.apply(replace_company_placeholders, axis=1, col_name = 'quotes')
+                    else:
+                        print("quotes column not in df")
+
+            df = df.rename(
+                columns=columns_map
+            )
+
+            # Select and order columns
+            export_columns = [
+                "Time Period",
+                "Date",
+                "Entity",
+                "Country",
+                "Document ID",
+                "Headline",
+                "Quote",
+                "Sentiment",
+                "Bigdata Sentiment",
+                "Motivation",
+                "Theme",
+                "Other Entities",
+                "Other Entities IDs",
+                "Other Entities Types"
+            ]
+
+            if extra_columns:
+                export_columns += extra_columns
+
+            return df[export_columns]
+
+def replace_company_placeholders(row: Series) -> str:
+    """
+    Replace company placeholders in text.
+
+    Args:
+        row: Row of the DataFrame. Expected columns:
+            - motivation: str
+            - entity_name: str
+            - other_entities_map: List[Tuple[int, str]]
+    Returns:
+        Text with placeholders replaced.
+    """
+    text = row["motivation"]
+    text = text.replace(get_target_entity_placeholder(), row["entity_name"])
+    if row.get("other_entities_map"):
+        for entity_id, entity_name in row["other_entities_map"]:
+            text = text.replace(
+                f"{get_other_entity_placeholder()}_{entity_id}", entity_name
+            )
+    return text
\ No newline at end of file
diff --git a/src/bigdata_research_tools/mindmap/mindmap_utils.py b/src/bigdata_research_tools/mindmap/mindmap_utils.py
index 0f8e25b..0a0756f 100644
--- a/src/bigdata_research_tools/mindmap/mindmap_utils.py
+++ b/src/bigdata_research_tools/mindmap/mindmap_utils.py
@@ -148,6 +148,147 @@
             """
         ),
     },
+    "risk_entity": {
+        "qualifier": "Risk Scenario",
+        "user_prompt_message": "Your given Risk Scenario is: {main_theme}",
+        "default_instructions": (
+            "Forget all previous prompts."
+            "You are assisting a professional risk analyst tasked with creating a taxonomy to classify the impact of the Risk Scenario '**{main_theme}**' on other entities, such countries, commodities, geographical places, and organizations."
+            "Your objective is to generate a **comprehensive tree structure** that maps the **risk spillovers** stemming from the Risk Scenario '**{main_theme}**', and generates related sub-scenarios. "
+            "Key Instructions:"
+            "1. **Understand the Risk Scenario: '{main_theme}'**:"
+            "    - The Risk Scenario '**{main_theme}**' represents a central, multifaceted concept that may be harmful or beneficial to an entity."
+            "    - Your task is to identify how the Risk Scenario impacts entities through various **risk spillovers** and transmission channels."
+            "    - Summarize the Risk Scenario '**{main_theme}**' in a **short list of essential keywords**."
+            "    - The keyword list should be short (1-2 keywords). Avoid unnecessary, unmentioned, indirectly inferred, or redundant keywords."
+            "2. **Create a Tree Structure for Risk Spillovers and Sub-Scenarios**:"
+            "    - Decompose the Risk Scenario into **distinct, focused, and self-contained risk spillovers**."
+            "    - Each risk spillover must represent a **specific risk channel** through which entities are exposed to as a consequence of the Risk Scenario."
+            "    - Label each **primary node** in the tree explicitly as a \"Risk\" in the `Label` field. For example:"
+            "        - Use 'Cost Risk' instead of 'Cost Impacts'."
+            "        - Use 'Supply Chain Risk' instead of 'Supply Chain Disruptions'."
+            "    - Risk spillovers must:"
+            "        - Cover a wide range of potential impacts on entities' and long-term stabiliity and growth."
+            "        - Explore both macroeconomic and microeconomic dimensions of the Risk Scenario '**{main_theme}**' and analyze their impact on entities when relevant."
+            "        - Include **direct and indirect consequences** of the main scenario."
+            "        - Represent **dimensions of risk** that entities must monitor or mitigate."
+            "        - NOT overlap."
+            "    - Independently identify the most relevant spillovers based on the Risk Scenario '**{main_theme}**', without limiting to predefined categories."
+            "3. **Generate Sub-Scenarios for Each Risk Spillover**:"
+            "    - For each risk spillover, identify **specific sub-scenarios** that will arise as a consequence of the Risk Scenario '**{main_theme}**'."
+            "    - All sub-scenarios must:"
+            "        - Be **concise and descriptive sentences**, clearly stating how the sub-scenario is an event caused by the main scenario."
+            "        - **Explicitly include ALL core concepts and keywords** from the main scenario, including specific geographical locations or temporal details, in every sentence in order to ensure clarity and relevance towards the main scenario."
+            "        - Integrate the Risk Scenario in a natural way, avoiding repetitive or mechanical structures."
+            "        - Not exceed 15 words."
+            "    - Sub-scenarios MUST be mutually exclusive: they CANNOT overlap neither within nor across branches of the tree."
+            "    - Do NOT combine multiple sub-scenarios in a single label."
+            "    - Sub-Scenarios have to be consistent with the parent Risk Spillover (e.g. Market Access related sub-scenarios have to belong to the Market Access Risk node)."
+            "    - Generate 3 OR MORE sub-scenarios for each risk spillover."
+            "    - Generate a short label for each subscenario."
+            "4. **Iterate Based on the Analyst's Focus: '{analyst_focus}'**:"
+            "    - After generating the initial tree structure, use the analyst's focus ('{analyst_focus}') to:"
+            "        - Identify **missing branches** or underexplored areas of the tree."
+            "        - Add new risk spillovers or sub-scenarios that align with the analyst's focus."
+            "        - Ensure that sub-scenarios ALWAYS include ALL core components of the Risk Scenario and are formulated as natural sentences."
+            "        - Ensure that sub-scenarios DO NOT overlap within and across risk spillovers."
+            "        - Ensure that sub-scenarios belong to the correct Risk Spillover."
+            "    - If the analyst focus is empty, skip this step."
+            "    - If you don't understand the analyst focus ('{analyst_focus}'), ask an open-ended question to the analyst."
+            "5. **Review and Expand the Tree for Missing Risks**:"
+            "    - After incorporating the analyst's focus, review the tree structure to ensure it includes a **broad range of risks** and sub-scenarios."
+            "    - Add any missing risks or sub-scenarios to the tree."
+        ),
+        "enforce_structure_string": (
+            """IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n"
+            "    - `node`: an integer representing the unique identifier for the node.\n"
+            "    - `label`: a string for the name of the sub-theme.\n"
+            "    - `summary`: a string to explain briefly in maximum 15 words why the sub-theme is related to the main theme or risk.\n"
+            "    - `children`: an array of child nodes.\n"
+            "Format the JSON object as a nested dictionary. Be careful when specifying keys and items.\n"
+            "Avoid overlapping labels. Break down joint concepts into unique parents so that each parent represents ONLY ONE concept. AVOID creating branch names such as 'Compliance and Regulatory Risk'. Keep risks separate and create a single branch for each risk, such as 'Compliance Risk' and 'Regulatory Risk', each with their own children.\n"
+            "Return ONLY the JSON object, with no extra text, explanation, or markdown.\n"
+            "You MUST use ONLY these field names: label, node, summary, children. Do NOT use underscores, spaces, or any other characters in field names. If you use any other field names, your answer will be rejected.\n"
+            "## Example Structure:\n"
+            "**Theme: Global Warming**\n\n"
+            "{\n"
+            "  \"node\": 1,\n"
+            "  \"label\": \"Global Warming\",\n"
+            "  \"summary\": \"Global Warming is a serious risk\",\n"
+            "  \"children\": [\n"
+            "    {\"node\": 2, \"label\": \"Renewable Energy Adoption\", \"summary\": \"Renewable energy reduces greenhouse gas emissions and thereby global warming and climate change effects\", \"children\": [\n"
+            "      {\"node\": 5, \"label\": \"Solar Energy\", \"summary\": \"Solar energy reduces greenhouse gas emissions\"},\n"
+            "      {\"node\": 6, \"label\": \"Wind Energy\", \"summary\": \"Wind energy reduces greenhouse gas emissions\"},\n"
+            "      {\"node\": 7, \"label\": \"Hydropower\", \"summary\": \"Hydropower reduces greenhouse gas emissions\"}\n"
+            "    ]},\n"
+            "    {\"node\": 3, \"label\": \"Carbon Emission Reduction\", \"summary\": \"Carbon emission reduction decreases greenhouse gases\", \"children\": [\n"
+            "      {\"node\": 8, \"label\": \"Carbon Capture Technology\", \"summary\": \"Carbon capture technology reduces atmospheric CO2\"},\n"
+            "      {\"node\": 9, \"label\": \"Emission Trading Systems\", \"summary\": \"Emission trading systems incentivize reductions in greenhouse gases\"}\n"
+            "    ]}\n"
+            "  ]\n"
+            "}\n"
+            """
+        ),
+    },
+    "theme_entity": {
+        "qualifier": "Main Theme",
+        "user_prompt_message": "Your given Theme is: {main_theme}",
+        "default_instructions": (
+            "Forget all previous prompts."
+            "You are assisting a professional analyst tasked with creating a screener to measure the impact of the theme {main_theme} on other entities, such countries, commodities, geographical places, and organizations."
+            "Your objective is to generate a comprehensive tree structure of distinct sub-themes that will guide the analyst's research process."
+            "Follow these steps strictly:"
+            "1. **Understand the Core Theme {main_theme}**:"
+            "   - The theme {main_theme} is a central concept. All components are essential for a thorough understanding."
+            "2. **Create a Taxonomy of Sub-themes for {main_theme}**:"
+            "   - Decompose the main theme {main_theme} into concise, focused, and self-contained sub-themes."
+            "   - Each sub-theme should represent a singular, concise, informative, and clear aspect of the main theme."
+            "   - Expand the sub-theme to be relevant for the {main_theme}: a single word is not informative enough."
+            "   - Prioritize clarity and specificity in your sub-themes."
+            "   - Avoid repetition and strive for diverse angles of exploration."
+            "   - Provide a comprehensive list of potential sub-themes."
+            "3. **Iterate Based on the Analyst's Focus {analyst_focus}**:"
+            "   - If no specific {analyst_focus} is provided, transition directly to formatting the JSON response."
+            "4. **Format Your Response as a JSON Object**:"
+            "   - Each node in the JSON object must include:"
+            "     - `node`: an integer representing the unique identifier for the node."
+            "     - `label`: a string for the name of the sub-theme."
+            "     - `summary`: a string to explain briefly in maximum 15 words why the sub-theme is related to the theme {main_theme}."
+            "       - For the node referring to the first node {main_theme}, just define briefly in maximum 15 words the theme {main_theme}."
+            "     - `children`: an array of child nodes."
+        ),
+        "enforce_structure_string": (
+            """IMPORTANT: Your response MUST be a valid JSON object. Each node in the JSON object must include:\n"
+	                    "- `node`: an integer representing the unique identifier for the node.\n"
+	                    "- `label`: a string for the name of the sub-theme.\n"
+	                    "- `summary`: a string to explain briefly in maximum 15 words why the sub-theme is related to the theme.\n"
+	                    "- For the node referring to the main theme, just define briefly in maximum 15 words the theme.\n"
+	                    "- `children`: an array of child nodes.\n"
+                        "Format the JSON object as a nested dictionary. Be careful when specifying keys and items.\n"
+	        "Avoid overlapping labels. Break down joint concepts into unique parents so that each parent represents ONLY ONE concept. AVOID creating branch names such as 'Compliance and Regulatory Risk'. Keep risks separate and create a single branch for each risk, such as 'Compliance Risk' and 'Regulatory Risk', each with their own children.\n"
+            "Return ONLY the JSON object, with no extra text, explanation, or markdown.\n"
+            "You MUST use ONLY these field names: label, node, summary, children. Do NOT use underscores, spaces, or any other characters in field names. If you use any other field names, your answer will be rejected.\n"
+            "## Example Structure:\n"
+            "**Theme: Global Warming**\n\n"
+            "{\n"
+            "  \"node\": 1,\n"
+            "  \"label\": \"Global Warming\",\n"
+            "  \"summary\": \"Global Warming is a serious risk\",\n"
+            "  \"children\": [\n"
+            "    {\"node\": 2, \"label\": \"Renewable Energy Adoption\", \"summary\": \"Renewable energy reduces greenhouse gas emissions and thereby global warming and climate change effects\", \"children\": [\n"
+            "      {\"node\": 5, \"label\": \"Solar Energy\", \"summary\": \"Solar energy reduces greenhouse gas emissions\"},\n"
+            "      {\"node\": 6, \"label\": \"Wind Energy\", \"summary\": \"Wind energy reduces greenhouse gas emissions\"},\n"
+            "      {\"node\": 7, \"label\": \"Hydropower\", \"summary\": \"Hydropower reduces greenhouse gas emissions\"}\n"
+            "    ]},\n"
+            "    {\"node\": 3, \"label\": \"Carbon Emission Reduction\", \"summary\": \"Carbon emission reduction decreases greenhouse gases\", \"children\": [\n"
+            "      {\"node\": 8, \"label\": \"Carbon Capture Technology\", \"summary\": \"Carbon capture technology reduces atmospheric CO2\"},\n"
+            "      {\"node\": 9, \"label\": \"Emission Trading Systems\", \"summary\": \"Emission trading systems incentivize reductions in greenhouse gases\"}\n"
+            "    ]}\n"
+            "  ]\n"
+            "}\n"
+            """
+        ),
+    },
 }
 
 
diff --git a/src/bigdata_research_tools/prompts/labeler.py b/src/bigdata_research_tools/prompts/labeler.py
index f7ae4f0..a3181b3 100644
--- a/src/bigdata_research_tools/prompts/labeler.py
+++ b/src/bigdata_research_tools/prompts/labeler.py
@@ -314,3 +314,240 @@ def get_risk_system_prompt(main_theme: str, label_summaries: list[str]) -> str:
     return risk_system_prompt_template.format(
         main_theme=main_theme, label_summaries=label_summaries
     )
+
+def get_entity_risk_system_prompt(main_theme: str, label_summaries: list) -> str:
+    """
+    Generate the risk entity labeler prompt with the provided parameters.
+    
+    Args:
+        main_theme (str): The main risk theme being analyzed
+        label_summaries (list): List of risk sub-scenario summaries
+        
+    Returns:
+        str: The formatted prompt string
+    """
+    return entity_risk_system_prompt_template.format(
+        main_theme=main_theme, label_summaries=label_summaries, BIGDATA_TARGET_ENTITY_PLACEHOLDER = get_target_entity_placeholder(), BIGDATA_OTHER_ENTITY_PLACEHOLDER = get_other_entity_placeholder()
+        )
+
+entity_risk_system_prompt_template: str = """Forget all previous prompts.
+
+You are assisting a professional analyst in evaluating both the exposure and risk classification for "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" regarding the Risk Scenario "{main_theme}".
+This involves a two-step process: confirming exposure of "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" and classifying specific risks if exposure is confirmed. Use the headline for contextual understanding.
+
+<input_details>
+You will receive the following information::
+- ID: [text ID]
+- Headline: [The Headline of the News Article containing Text]
+- Text: [Paragraph requiring analysis]
+- Risk Scenario: "{main_theme}"
+</input_details>
+
+Follow these guidelines:
+
+<exposure_assessment>
+- Examine whether the text explicitly mentions the Risk Scenario "{main_theme}" or any of its core components.
+- Ensure that "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is the main focus of the text and that it is clearly stated that "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is facing or will face consequences caused by the Risk Scenario "{main_theme}".
+- Assess if there are DIRECT consequences on "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}’s" internal and external activities, operations, future performance, stability, sustainability, and long term growth.
+- Designate the exposure as unclear if the text lacks an explicit DIRECT link between "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" and the Risk Scenario
+- Designate the exposure as unclear if the text relies on generic information.
+</exposure_assessment>
+
+<risk_classification>
+If direct exposure of {BIGDATA_TARGET_ENTITY_PLACEHOLDER} is confirmed:
+
+- Identify and classify the specific risk using this list of Risk Sub-Scenarios:
+    "{label_summaries}".
+
+- Follow a detailed classification process:
+    - Examine the text to confirm how the Risk Scenario "{main_theme}" directly impacts "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" through one of the Risk Sub-Scenarios.
+    - Write a concise motivation that explains the direct link between "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" and the Risk Sub-Scenario as stated in the text.
+    - The motivation should always start with "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}".
+    - Identify an appropriate Risk Sub-Scenario label from the list that describes explicitly the impact on {BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s internal and external activities, operations, stability, sustainability, and long term growth or performance.
+    - Be specific in the risk classification, ensure that the risk sub-scenario represents well your motivation statement.
+    - Ensure that the Risk Sub-Scenario label can be directly extracted from the text that it describes with high granularity how "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is affected.
+    - Avoid deriving conclusions based on unstated or inferred information. Focus only on the explicit content of the text or headline.
+</risk_classification>
+
+<verbatim_quotes_extraction>
+- Extract verbatim quotes from the text that support the classification and illustrate {BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s exposure to the specific Risk Sub-Scenario.
+- Ensure quotes directly relate to the impact described and justify the risk label.
+- Extract full sentences or phrases that clearly indicate, as standalone statements, how "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is affected by the Risk Scenario "{main_theme}" and the Sub-Scenario label assigned.
+</verbatim_quotes_extraction>
+
+<sentiment_analysis>
+- If the text does explicitly link "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" with the Risk Scenario "{main_theme}", classify the exposure with a sentiment label speficied as follows:
+    - "negative" if the text indicates that "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is facing or will face negative consequences due to the Risk Scenario "{main_theme}".
+    - "positive" if the text indicates that "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is well positioned in the face of the Risk Scenario "{main_theme}", or is in a better position with respect to the past, intended as previous occurrences of the Risk Scenario from which the situation has improved, or if the text indicates that it is doing better than its peers, or than the {BIGDATA_OTHER_ENTITY_PLACEHOLDER}.
+    - "neutral" if the text indicates that "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is neither positively nor negatively affected by the Risk Scenario "{main_theme}".
+- If the exposure is unclear, assign the sentiment label as "neutral".
+</sentiment_analysis>
+
+<response_format>
+Structure your response as a JSON object with the sentence ID as the key containing:
+"motivation": A concise explanation describing the link between "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" and the Risk Sub-Scenario.
+"label": State the specific risk Sub-Scenario label or 'unclear'.
+"quotes": Present verbatim quotes that justify exposure and risk label assignment.
+"sentiment": State the sentiment label as 'negative', 'positive', or 'neutral'.
+
+Format: {{"<sentence_id>": 
+{{"motivation": "<motivation>", "label": "<risk_classification_label>",
+ "quotes": "<verbatim_quotes>", "sentiment": "<sentiment_label>"}}
+}}.
+</response_format>
+
+<examples>
+ID: 3
+Headline: "Tariffs to Strain Supply Chains Globally"
+Text: "New tariffs against China will significantly impact {BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s operations due to its reliance on raw materials from Chinese suppliers."
+Scenario: "New Tariffs against China"
+Output:
+
+{{3:{{
+  "motivation": "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s supply operations are directly impacted by new tariffs due to their reliance on raw materials sourced from China.",
+  "label": "Supply Chain Disruption",
+  "quotes": ["New tariffs against China will significantly impact {BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s operations", "reliance on raw materials from Chinese suppliers"],
+  "sentiment": "negative"}}
+}}
+
+ID: 5
+Headline: "Interest Rate Fluctuations to Affect Markets"
+Text: "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s analysts are forecasting higher risks associated with potential interest rate changes."
+Scenario: "Interest Rate Volatility"
+Output:
+
+{{5:{{
+  "motivation": "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s analysts are forecasting higher risks associated with potential interest rate changes.",
+  "label": "unclear",
+  "quotes": [],
+  "sentiment": "neutral"}}
+}}
+
+ID: 2
+Headline: "Economic Challenges Ahead Due to Tariffs on China"
+Text: "{BIGDATA_OTHER_ENTITY_PLACEHOLDER}'s analysts report a potential economic downturn in {BIGDATA_TARGET_ENTITY_PLACEHOLDER} linked to new tariffs against China."
+Risk Scenario: "New Tariffs Against China"
+Output:
+
+{{2:{{
+  "motivation": "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s analysts are assessing the potential economic impact of new tariffs against China.",
+  "label": "Economic Downturns",
+  "quotes": ["{BIGDATA_OTHER_ENTITY_PLACEHOLDER}'s analysts report a potential economic downturn in {BIGDATA_TARGET_ENTITY_PLACEHOLDER}"],
+  "sentiment": "negative"}}
+}}
+
+ID: 3
+Headline: "Analyzing External Factors in Business Strategy"
+Text: "{BIGDATA_OTHER_ENTITY_PLACEHOLDER} is studying external factors such as tariffs to gauge potential risks. {BIGDATA_OTHER_ENTITY_PLACEHOLDER}'s analysts report a potential economic downturn in {BIGDATA_TARGET_ENTITY_PLACEHOLDER}."
+Risk Scenario: "New Tariffs on Semiconductors"
+Output:
+
+{{3:{{
+  "motivation": "{BIGDATA_OTHER_ENTITY_PLACEHOLDER}'s analysis of external factors does not establish a direct link to {BIGDATA_TARGET_ENTITY_PLACEHOLDER}.",
+  "label": "unclear",
+  "quotes": [],
+  "sentiment": "neutral"}}
+}}
+
+ID: 4
+Headline: "Market Trends Influence Stock Performance"
+Text: "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}’s stock is influenced by broad market trends."
+Risk Scenario: "Increased Uncertainty and Volatility"
+Output:
+
+{{4:{{
+  "motivation": "The text does not related to the Risk Scenario and it does not mention any specific risk sub-scenario affecting {BIGDATA_TARGET_ENTITY_PLACEHOLDER}.",
+  "label": "unclear",
+  "quotes": [],
+  "sentiment": "neutral"}}
+}}
+
+ID: 5
+
+Headline: "Tariffs and Their Economic Impact"
+Text: "{BIGDATA_OTHER_ENTITY_PLACEHOLDER} researchers estimate that tariffs will affect the broader economy in {BIGDATA_TARGET_ENTITY_PLACEHOLDER}."
+Risk Scenario: "New Tariffs against China"
+Output:
+
+{{5:{{
+  "motivation": "{BIGDATA_TARGET_ENTITY_PLACEHOLDER} is not linked with any specific risk sub-scenario or any tangible effect of the Risk Scenario.",
+  "label": "unclear",
+  "quotes": [],
+  "sentiment": "neutral"}}
+}}
+
+ID: 2
+Headline: "China Tariffs Impact Supply Chains"
+Text: "According to recent reports, {BIGDATA_TARGET_ENTITY_PLACEHOLDER} is heavily dependent on China. The recent tariffs against China have forced {BIGDATA_TARGET_ENTITY_PLACEHOLDER} to reconsider its supply chain, potentially leading to increased logistics costs."
+Risk Scenario: "New Tariffs against China"
+Output:
+
+{{2:{{
+  "motivation": "{BIGDATA_TARGET_ENTITY_PLACEHOLDER} is said to be reconsidering its supply chain in the face of the risk scenario. The text clearly links {BIGDATA_TARGET_ENTITY_PLACEHOLDER} with the Risk Scenario and mentions an explicit Sub-scenario risk of Supply Chain Disruptions.",
+  "label": "Supply Chain Disruption",
+  "quotes": [
+    "{BIGDATA_TARGET_ENTITY_PLACEHOLDER} is heavily dependent on China",
+    "The recent tariffs against China have forced {BIGDATA_TARGET_ENTITY_PLACEHOLDER} to reconsider its supply chain, potentially leading to increased logistics costs."
+  ],
+  "sentiment": "negative"}}
+}}
+</examples>
+"""
+
+def get_entity_theme_system_prompt(main_theme: str, label_summaries: list) -> str:
+    """
+    Generate the entity screener labeler prompt with the provided parameters.
+    
+    Args:
+        main_theme (str): The main theme being analyzed
+        label_summaries (list): List of risk theme summaries
+        
+    Returns:
+        str: The formatted prompt string
+    """
+    return entity_theme_system_prompt_template.format(
+        main_theme=main_theme, label_summaries=label_summaries, BIGDATA_TARGET_ENTITY_PLACEHOLDER = get_target_entity_placeholder()
+    )
+
+entity_theme_system_prompt_template: str = """
+ Forget all previous prompts.
+ You are assisting a professional analyst in evaluating the impact of the theme '{main_theme}' on an entity "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}".
+ Your primary task is first, to ensure that each sentence is explicitly related to '{main_theme}', and second, to accurately associate each given sentence with
+ the relevant label contained within the list '{label_summaries}'.
+
+ Please adhere strictly to the following guidelines:
+
+ 1. **Analyze the Sentence**:
+    - Each input consists of a sentence ID, an entity name ('{BIGDATA_TARGET_ENTITY_PLACEHOLDER}'), and the sentence text.
+    - Analyze the sentence to understand if the content clearly establishes a connection to '{main_theme}'.
+    - Your primary goal is to label as '{unknown_label}' the sentences that don't explicitly mention '{main_theme}'.
+    - Analyze the list of labels '{label_summaries}' used for label assignment. '{label_summaries}' is a Python list variable containing distinct labels and their definition in format 'Label: Summary', you must pick label only from 'Label' part which means left side of the semicolon for each Label:Summary pair.
+    - Your secondary goal is to select the most appropriate label from '{label_summaries}' that corresponds to the content of the sentence.
+
+ 2. **First Label Assignment**:
+    - Assign the label '{unknown_label}' to the sentence related to "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" when it does not explicitly mentions '{main_theme}'. Otherwise, don't assign a label.
+    - Evaluate each sentence independently, focusing solely on the context provided within that specific sentence.
+    - Use only the information contained within the sentence for your label assignment.
+    - When evaluating the sentence, "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" must clearly mention that the entity is clearly impacted by '{main_theme}'.
+    - Many sentences are only tangentially connected to the topic '{main_theme}'. These sentences must be assigned the label '{unknown_label}'.
+
+ 3. **Second Label Assignment**:
+    - For the sentences not labeled as '{unknown_label}' and only for them, assign a unique label from the list '{label_summaries}' to the sentence related to "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}".
+    - Evaluate each sentence independently, focusing solely on the context provided within that specific sentence.
+    - Use only the information contained within the sentence for your label assignment.
+    - Ensure that the sentence clearly establishes a connection to the label you assigned and to the theme '{main_theme}'.
+    - You must not create a new label or choose a label that is not present in '{label_summaries}'.
+    - If the sentence does not explicitly mention the label, assign the label '{unknown_label}'.
+    - When evaluating the sentence, "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" must clearly mention that the entity is impacted by the label assigned and '{main_theme}'.
+
+ 4. **Response Format**:
+    - Your output should be structured as a JSON object that includes:
+          1. A brief motivation for your choice.
+          2. The assigned label.
+    - Each entry must start with the sentence ID and contain a clear motivation that begins with "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}".
+    - The motivation should explain why the label was selected from '{label_summaries}' based on the information in the sentence and in the context of '{main_theme}'. It should also justify the label that had been assigned.
+    - Ensure that the exact context is understood and labels are based only on explicitly mentioned information in the sentence. Otherwise, assign the label '{unknown_label}'.
+    - The assigned label should be only the string that precedes the character ':'.
+    - Format your JSON as follows: {{"<sentence_id>": {{"motivation": "<motivation>", "label": "<label>",}}, ...}}.
+    - Ensure that all strings in the JSON are correctly formatted with proper quotes.
+ """
\ No newline at end of file
diff --git a/src/bigdata_research_tools/search/entities_search.py b/src/bigdata_research_tools/search/entities_search.py
new file mode 100644
index 0000000..6168b50
--- /dev/null
+++ b/src/bigdata_research_tools/search/entities_search.py
@@ -0,0 +1,325 @@
+from bigdata_research_tools.search.query_builder import (
+    build_batched_query,
+    EntitiesToSearch,
+    create_date_ranges,
+)
+from itertools import chain
+from bigdata_research_tools.search.search import run_search
+from bigdata_client.models.search import DocumentType, SortBy
+from bigdata_research_tools.search.search_utils import filter_search_results
+from typing import List, Optional, Dict
+
+from bigdata_client.document import Document
+from bigdata_client.query import SentimentRange
+from bigdata_client.models.advanced_search_query import ListQueryComponent
+from pandas import DataFrame
+import pandas as pd
+from tqdm import tqdm
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from bigdata_research_tools.search.screener_search import mask_sentences
+from bigdata_research_tools.labeler.risk_labeler import (
+    replace_company_placeholders,
+)
+
+
+import os
+os.environ["BIGDATA_OTHER_ENTITY_PLACEHOLDER"] = "Other Entity"
+os.environ["BIGDATA_TARGET_ENTITY_PLACEHOLDER"] = "Target Entity"
+
+def entity_type_checker(entities):
+    unique_types = set(type(entity).__name__ for entity in entities)
+    type_field_map = {
+            'Person':'people',
+            'Product': 'products',
+            'Organization':'org',
+            'Place':'place',
+            'Topic':'topic',
+            'Concept':'concepts', 
+            'Entity':'companies',
+            'Company':'companies'
+        }
+    if len(unique_types) == 1:
+        return type_field_map[unique_types.pop()]
+    else:
+        raise ValueError("Multiple entity types found in the provided watchlist.")
+
+def search_by_entities(entities: list,
+    sentences: List[str],
+    start_date: str,
+    end_date: str,
+    scope: DocumentType = DocumentType.ALL,
+    fiscal_year: Optional[int] = None,
+    sources: Optional[List[str]] = None,
+    keywords: Optional[List[str]] = None,
+    control_entities: Optional[Dict] = None,
+    freq: str = "3M",
+    sort_by: SortBy = SortBy.RELEVANCE,
+    rerank_threshold: Optional[float] = None,
+    sentiment_range: SentimentRange = None,
+    document_limit: int = 50,
+    batch_size: int = 10,
+    enhance_sentiment: bool = False,
+    **kwargs,
+) -> DataFrame:
+    """
+    Screen for documents based on the input sentences and other filters.
+
+    Args:
+        entities (list): The list of entities to use. All entities must be of the same type (i.e. Currencies, People, etc).
+        sentences (List[str]): The list of sentences to screen for.
+        start_date (str): The start date for the search.
+        end_date (str): The end date for the search.
+        scope (DocumentType): The document type scope
+            (e.g., `DocumentType.ALL`, `DocumentType.TRANSCRIPTS`).
+        fiscal_year (int): The fiscal year to filter queries.
+            If None, no fiscal year filter is applied.
+        sources (Optional[List[str]]): List of sources to filter on. If none, we search across all sources.
+        keywords (List[str]): A list of keywords for constructing keyword queries.
+            If None, no keyword queries are created.
+        control_entities (Dict): A dictionary of control entities of different types for creating co-mentions queries.
+        freq (str): The frequency of the date ranges. Defaults to '3M'.
+        sort_by (SortBy): The sorting criterion for the search results.
+            Defaults to SortBy.RELEVANCE.
+        rerank_threshold (Optional[float]): The threshold for reranking the search results.
+            See https://sdk.bigdata.com/en/latest/how_to_guides/rerank_search.html
+        document_limit (int): The maximum number of documents to return per Bigdata query.
+        batch_size (int): The number of entities to include in each batched query.
+
+    Returns:
+        DataFrame: The DataFrame with the screening results.
+        - Index: int
+        - Columns:
+            - timestamp_utc: datetime64
+            - document_id: str
+            - sentence_id: str
+            - headline: str
+            - entity_id: str
+            - document_type: str
+            - is_reporting_entity: bool
+            - entity_name: str
+            - entity_sector: str
+            - entity_industry: str
+            - entity_country: str
+            - entity_ticker: str
+            - text: str
+            - other_entities: str
+            - entities: List[Dict[str, Any]]
+                - key: str
+                - name: str
+                - ticker: str
+                - start: int
+                - end: int
+            - masked_text: str
+            - other_entities_map: List[Tuple[int, str]]
+    """
+    # Extract entities for search querying
+    entity_keys = [entity.id for entity in entities]
+
+    field_entity_type = entity_type_checker(entities)
+
+    # Create entity configs
+    entities_config = EntitiesToSearch(**{field_entity_type:entity_keys})
+
+    # If control_entities are provided, create a control EntityConfig
+    # For this example, assuming control_entities are all company entities
+    control_entities_config = None
+    if control_entities:
+        control_entities_config = EntitiesToSearch(**control_entities)
+
+    # Build batched queries
+    batched_query = build_batched_query(
+        sentences=sentences,
+        keywords=keywords,
+        entities=entities_config,
+        control_entities=control_entities_config,
+        custom_batches=None,
+        sources=sources,
+        batch_size=batch_size,
+        fiscal_year=fiscal_year,
+        scope=scope,
+    )
+
+    batched_query = [bq&sentiment_range for bq in batched_query] if sentiment_range else batched_query
+
+    # Create list of date ranges
+    date_ranges = create_date_ranges(start_date, end_date, freq)
+
+    no_queries = len(batched_query)
+    no_dates = len(date_ranges)
+    total_no = no_dates * no_queries
+
+    print(f"Running {total_no} searches ({no_queries} queries over {no_dates} date ranges)")
+    print(f"Example query:\n{batched_query[0]}\n")
+
+    # Run concurrent search
+    results = run_search(
+        batched_query,
+        date_ranges=date_ranges,
+        limit=document_limit,
+        scope=scope,
+        sortby=sort_by,
+        rerank_threshold=rerank_threshold,
+    )
+
+    if list(chain.from_iterable(results)) is None:
+        print("No results found for the given queries and date ranges.")
+        return DataFrame()  # Return empty DataFrame if no results
+
+    else:
+        results, chunks_entities = filter_search_results(results)
+
+        df = process_entity_search_results(
+            results=results,
+            chunks_entities=chunks_entities,
+            watchlist=entities,
+            document_type=scope,
+            enhance_sentiment=enhance_sentiment)
+
+        return df        
+        
+def process_entity_search_results(
+    results: List[Document],
+    chunks_entities: List[ListQueryComponent],
+    watchlist: list,
+    document_type: DocumentType = DocumentType.NEWS,
+) -> DataFrame:
+    """
+    Build a unified DataFrame from search results for any document type.
+
+    Args:
+        results (List[Document]): A list of Bigdata search results.
+        entities (List[ListQueryComponent]): A list of entities.
+        watchlist (list): A list of entities to filter results and create rows for (your watchlist).
+        document_type (DocumentType): The type of documents being processed.
+
+    Returns:
+        DataFrame: Standardized screening DataFrame with consistent schema:
+        - Index: int
+        - Columns:
+            - timestamp_utc: datetime64
+            - document_id: str
+            - sentence_id: str
+            - headline: str
+            - entity_id: str
+            - document_type: str (metadata field showing the document type)
+            - entity_name: str
+            - text: str
+            - sentiment: float (if available)
+            - other_entities: str
+            - entities: List[Dict[str, Any]]
+            - masked_text: str
+            - other_entities_map: List[Tuple[int, str]]
+            - reporting_entity_name: str (if applicable)
+            - reporting_entity_sector: str (if applicable)
+            - reporting_entity_industry: str (if applicable)
+            - reporting_entity_country: str (if applicable)
+            - reporting_entity_ticker: str (if applicable)
+    """
+    chunks_entity_key_map = {entity.id: entity for entity in chunks_entities}
+
+    rows = []
+
+    for result in tqdm(results, desc=f"Processing {document_type} results..."):
+        
+        for chunk in result.chunks:
+            # Build a list of entities present in the chunk
+            chunk_entities = [
+                {
+                    "key": entity.key,
+                    "name": (
+                        chunks_entity_key_map[entity.key].name
+                        if entity.key in chunks_entity_key_map
+                        else None
+                    ),
+                    "country": (
+                        getattr(chunks_entity_key_map[entity.key], 'country', None) or 
+                        getattr(chunks_entity_key_map[entity.key], 'country_code', None)
+                        if entity.key in chunks_entity_key_map
+                        else None
+                    ),
+                    "type": (
+                        getattr(chunks_entity_key_map[entity.key], 'entity_type', None) or 
+                        getattr(chunks_entity_key_map[entity.key], 'type', None)
+                        if entity.key in chunks_entity_key_map
+                        else None
+                    ),
+                    "start": entity.start,
+                    "end": entity.end,
+                }
+                for entity in chunk.entities
+                if entity.key in chunks_entity_key_map and chunks_entity_key_map[entity.key].entity_type in ['COMP'] or entity.key in [entity.id for entity in watchlist]
+            ]
+            #Other entities to be masked are either Companies found in the chunks or entities in our watchlist.
+            ##TODO: Make this more generic to handle other entity types or entity groups within entity types (i.e. Crypto within Currencies) as well.
+
+            if not chunk_entities:
+                continue  # Skip if no entities are mapped
+
+            # Process standard entities
+            for chunk_entity in chunk_entities:
+                entity_key = chunks_entity_key_map.get(chunk_entity["key"])
+
+                if not entity_key:
+                    continue  # Skip if entity is not found
+                    
+                # # if entity isn't in our original watchlist, skip
+                if watchlist and entity_key not in watchlist:
+                    continue
+
+                # Exclude the entity from other entities
+                other_entities = [
+                    e for e in chunk_entities if e["name"] != chunk_entity["name"]
+                ]
+
+                # Collect information in standard format
+                row_dict = {"timestamp_utc": result.timestamp,
+                            "document_id": result.id,
+                            "sentence_id": f"{result.id}-{chunk.chunk}",
+                            "headline": result.headline,
+                            "entity_id": chunk_entity["key"],
+                            "entity_country": entity_key.country,
+                            "document_type": document_type.value,
+                            "entity_name": entity_key.name,
+                            "text": chunk.text,
+                            "sentiment": chunk.sentiment if chunk.sentiment else None,
+                            "other_entities_name": [e["name"] for e in other_entities],
+                            "other_entities_id": [e["key"] for e in other_entities],
+                            "other_entities_type": [e["type"] for e in other_entities],
+                            "entities": chunk_entities,
+                        }
+
+                # Collect information in standard format
+                rows.append(row_dict)
+                    
+                # Handle differently based on document type
+                if document_type in (DocumentType.FILINGS, DocumentType.TRANSCRIPTS):
+                    # Process reporting entities
+                    if result.reporting_entities:
+                        for re_key in result.reporting_entities:
+                            reporting_entity = chunks_entity_key_map.get(re_key)
+                            # Collect information in standard format
+                            if reporting_entity:
+                                row_dict_copy = row_dict.copy()
+                                row_dict_copy.update({
+                                    "reporting_entity_name": reporting_entity.name,
+                                    "reporting_entity_sector": reporting_entity.sector if reporting_entity.sector else None,
+                                    "reporting_entity_industry": reporting_entity.industry if reporting_entity.industry else None,
+                                    "reporting_entity_country": reporting_entity.country if reporting_entity.country else None,
+                                    "reporting_entity_ticker": reporting_entity.ticker if reporting_entity.ticker else None,
+                                })
+                                rows.append(row_dict_copy)
+                else:
+                    rows.append(row_dict)
+
+    if not rows:
+        raise ValueError("No rows to process")
+
+    df = DataFrame(rows).sort_values("timestamp_utc").reset_index(drop=True)
+
+    # Deduplicate by quote text as well
+    df = df.drop_duplicates(
+        subset=["timestamp_utc", "document_id", "text", "entity_id"]
+    )
+
+    df = mask_sentences(df)
+    return df.reset_index(drop=True)
diff --git a/src/bigdata_research_tools/search/narrative_search.py b/src/bigdata_research_tools/search/narrative_search.py
index d7c8711..1f3ea88 100644
--- a/src/bigdata_research_tools/search/narrative_search.py
+++ b/src/bigdata_research_tools/search/narrative_search.py
@@ -68,6 +68,16 @@ def search_narratives(
                 - document_id: str
                 - sentence_id: str
                 - headline: str
+                - text: str
+                - sentiment: float (if available)
+                - entity: str
+                - country_code: str
+                - entity_type: str
+                - entity_id: str
+                - entity_ticker: str
+                - source_name: str (if applicable)
+                - source_rank: int (if applicable)
+                - url: str (if applicable)
     """
 
     # If control_entities are provided, create a control EntityConfig
@@ -137,9 +147,15 @@ def _process_narrative_search(
             - sentence_id: str
             - headline: str
             - text: str
+            - sentiment: float (if available)
             - entity: str
             - country_code: str
             - entity_type: str
+            - entity_id: str
+            - entity_ticker: str
+            - source_name: str (if applicable)
+            - source_rank: int (if applicable)
+            - url: str (if applicable)
     """
     rows = []
     for result in tqdm(results, desc="Processing screening results..."):
@@ -158,6 +174,7 @@ def _process_narrative_search(
                     "sentence_id": f"{result.id}-{chunk.chunk}",
                     "headline": result.headline,
                     "text": chunk.text,
+                    "sentiment": chunk.sentiment if chunk.sentiment else None,
                     "entity": [entity["name"] for entity in chunk_entities],
                     "country_code": [entity["country"] for entity in chunk_entities],
                     "entity_type": [entity["entity_type"] for entity in chunk_entities],
diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py
index c45f692..cf94b9b 100644
--- a/src/bigdata_research_tools/search/screener_search.py
+++ b/src/bigdata_research_tools/search/screener_search.py
@@ -90,6 +90,7 @@ def search_by_companies(
             - entity_country: str
             - entity_ticker: str
             - text: str
+            - sentiment: float (if available)
             - other_entities: str
             - entities: List[Dict[str, Any]]
                 - key: str
@@ -98,6 +99,9 @@ def search_by_companies(
                 - start: int
                 - end: int
             - masked_text: str
+            - source_name: str (if applicable)
+            - source_rank: int (if applicable)
+            - url: str (if applicable)
             - other_entities_map: List[Tuple[int, str]]
     """
     workflow_start = datetime.now()
@@ -248,6 +252,7 @@ def process_screener_search_results(
             - entity_country: str
             - entity_ticker: str
             - text: str
+            - sentiment: float (if available)
             - other_entities: str
             - entities: List[Dict[str, Any]]
             - topics: List[Dict[str, Any]]
@@ -338,6 +343,7 @@ def process_screener_search_results(
                             "entity_country": reporting_entity.country,
                             "entity_ticker": reporting_entity.ticker,
                             "text": chunk.text,
+                            "sentiment": chunk.sentiment if chunk.sentiment else None,
                             "other_entities": ", ".join(
                                 e["name"] for e in other_entities
                             ),
@@ -378,6 +384,7 @@ def process_screener_search_results(
                             "entity_country": entity_key.country,
                             "entity_ticker": entity_key.ticker,
                             "text": chunk.text,
+                            "sentiment": chunk.sentiment if chunk.sentiment else None,
                             "other_entities": ", ".join(
                                 e["name"] for e in other_entities
                             ),

From 6fc6e8bbe50ffe7a5f75ac85fc6aea51ce0f4e96 Mon Sep 17 00:00:00 2001
From: Alessandro Bouchs <abouchs@ravenpack.com>
Date: Fri, 21 Nov 2025 09:02:26 +0000
Subject: [PATCH 2/2] first attempt entity risk analyzer

---
 examples/entity_risk_analyzer.py              |  83 ++++++++++++
 .../labeler/entity_labeler.py                 |  48 ++++---
 .../labeler/risk_labeler.py                   |   9 +-
 .../labeler/screener_labeler.py               |   5 +-
 .../mindmap/mindmap_utils.py                  |   2 +-
 .../portfolio/motivation.py                   |  17 ++-
 src/bigdata_research_tools/prompts/labeler.py | 126 ++++++++----------
 .../prompts/motivation.py                     |  70 +++++++++-
 .../search/entities_search.py                 |  76 +++++++++--
 .../search/screener_search.py                 |  11 +-
 .../workflows/risk_analyzer.py                | 110 ++++++++++-----
 11 files changed, 414 insertions(+), 143 deletions(-)
 create mode 100644 examples/entity_risk_analyzer.py

diff --git a/examples/entity_risk_analyzer.py b/examples/entity_risk_analyzer.py
new file mode 100644
index 0000000..643cf27
--- /dev/null
+++ b/examples/entity_risk_analyzer.py
@@ -0,0 +1,83 @@
+from pathlib import Path
+
+from bigdata_client.models.search import DocumentType
+
+from bigdata_research_tools.client import bigdata_connection
+from bigdata_research_tools.llm.base import LLMConfig
+from bigdata_research_tools.utils.observer import OberserverNotification, Observer
+from bigdata_research_tools.workflows.risk_analyzer import RiskAnalyzer
+
+
+def risk_analyzer_example(
+    risk_scenario: str,
+    llm_model_config: str | LLMConfig | dict,
+    keywords: list|None = None,
+    control_entities: dict = {'place':['United States', 'China']},
+    focus: str = "",
+    export_path: str = "entity_risk_analyzer_results.xlsx",
+) -> dict:
+    GRID_watchlist_ID = "8747febb-8762-40f9-bf9b-4b9d6909deb4"
+
+    bigdata = bigdata_connection()
+    # Retrieve the watchlist object
+    watchlist_grid = bigdata.watchlists.get(GRID_watchlist_ID)
+    # Access the items within the watchlist
+    entities = bigdata.knowledge_graph.get_entities(watchlist_grid.items)
+
+    analyzer = RiskAnalyzer(
+        main_theme=risk_scenario,
+        entities=entities,
+        start_date="2025-09-01",
+        end_date="2025-09-30",
+        keywords=keywords,
+        document_type=DocumentType.NEWS,
+        control_entities=control_entities,
+        focus=focus,  # Optional focus to narrow the theme,
+        llm_model_config=llm_model_config,
+        ground_mindmap=False,
+    )
+
+    class PrintObserver(Observer):
+        def update(self, message: OberserverNotification):
+            print(f"Notification received: {message}")
+
+    analyzer.register_observer(PrintObserver())
+
+    return analyzer.screen_companies(export_path=export_path)
+
+
+if __name__ == "__main__":
+    import logging
+
+    from dotenv import load_dotenv
+
+    # Load environment variables for authentication
+    print(f"Environment variables loaded: {load_dotenv()}")
+
+    # Set the logging configuration to show the logs of the library
+    logging.basicConfig()
+    logging.getLogger("bigdata_research_tools").setLevel(logging.INFO)
+
+    output_path = Path("outputs/entity_risk_analyzer_results.xlsx")
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    x = risk_analyzer_example(
+        "US China Trade War",
+        focus="Generate a mind map of current and future risks that metals, rare earths and semiconductors commodities traders are facing as a result of increased trade tensions between the United States and China.",
+        export_path=str(output_path),
+        llm_model_config=LLMConfig(
+            model="openai::gpt-4o-mini",
+        ),
+    )
+    # custom_config = {
+    #     'entity_column': 'Entity',
+    #     'heatmap_colorscale': 'Plasma',
+    #     'dashboard_height': 1800,
+    #     'top_themes_count': 5,
+    #     'main_title': 'Custom Thematic Analysis Dashboard'
+    # }
+    df = x["df_entity"]
+    # fig, industry_fig = create_thematic_exposure_dashboard(df, n_companies=15, config=custom_config)
+    # fig.show(renderer="browser")           # Shows the main dashboard
+    # industry_fig.show(renderer="browser")  # Shows the industry analysis
+    print(df.head(10))  # Display the first 10 rows of the DataFrame
diff --git a/src/bigdata_research_tools/labeler/entity_labeler.py b/src/bigdata_research_tools/labeler/entity_labeler.py
index a5a9077..f09b946 100644
--- a/src/bigdata_research_tools/labeler/entity_labeler.py
+++ b/src/bigdata_research_tools/labeler/entity_labeler.py
@@ -66,6 +66,8 @@ def get_labels(
             else self.label_prompt
         )
 
+        logger.info(f"Using system prompt: {system_prompt}")
+
         prompts = self.get_prompts_for_labeler(texts, textsconfig)
 
         responses = self._run_labeling_prompts(
@@ -146,10 +148,10 @@ def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_column
             
             columns_map = {
                     "entity_name": "Entity",
+                    "entity_type": "Entity Type",
                     "entity_country": "Country",
                     "headline": "Headline",
                     "text": "Quote",
-                    "bigdata_sentiment": "Bigdata Sentiment",
                     "sentiment": "Sentiment",
                     "motivation": "Motivation",
                     "label": "Sub-Scenario",
@@ -181,12 +183,12 @@ def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_column
                 "Time Period",
                 "Date",
                 "Entity",
+                "Entity Type",
                 "Country",
                 "Document ID",
                 "Headline",
                 "Quote",
                 "Sentiment",
-                "Bigdata Sentiment",
                 "Motivation",
                 "Sub-Scenario",
                 "Other Entities",
@@ -194,10 +196,6 @@ def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_column
                 "Other Entities Types"
             ]
 
-            if 'Entity Sentiment' in df.columns:
-                print("Including entity sentiment columns in export")
-                export_columns += ["Entity Sentiment", "Entity Text Sentiment"]
-
             if extra_columns:
                 export_columns += extra_columns
 
@@ -335,10 +333,10 @@ def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_column
             
             columns_map = {
                     "entity_name": "Entity",
+                    "entity_type": "Entity Type",
                     "entity_country": "Country",
                     "headline": "Headline",
                     "text": "Quote",
-                    "bigdata_sentiment": "Bigdata Sentiment",
                     "sentiment": "Sentiment",
                     "motivation": "Motivation",
                     "label": "Theme",
@@ -364,12 +362,12 @@ def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_column
                 "Time Period",
                 "Date",
                 "Entity",
+                "Entity Type",
                 "Country",
                 "Document ID",
                 "Headline",
                 "Quote",
                 "Sentiment",
-                "Bigdata Sentiment",
                 "Motivation",
                 "Theme",
                 "Other Entities",
@@ -382,7 +380,9 @@ def post_process_dataframe(self, df: DataFrame, extra_fields: dict, extra_column
 
             return df[export_columns]
 
-def replace_company_placeholders(row: Series) -> str:
+def replace_company_placeholders(
+    row: Series, col_name: str = "motivation"
+) -> str | list[str]:
     """
     Replace company placeholders in text.
 
@@ -394,11 +394,27 @@ def replace_company_placeholders(row: Series) -> str:
     Returns:
         Text with placeholders replaced.
     """
-    text = row["motivation"]
-    text = text.replace(get_target_entity_placeholder(), row["entity_name"])
-    if row.get("other_entities_map"):
-        for entity_id, entity_name in row["other_entities_map"]:
-            text = text.replace(
-                f"{get_other_entity_placeholder()}_{entity_id}", entity_name
-            )
+    text = row[col_name]
+    entity_type = row.get("entity_type", "COMP")
+    if isinstance(text, str):
+        text = text.replace(get_target_entity_placeholder(entity_type), row["entity_name"])
+        if row.get("other_entities_map"):
+            for entity_id, entity_name in row["other_entities_map"]:
+                text = text.replace(
+                    f"{get_other_entity_placeholder(entity_type)}_{entity_id}", entity_name
+                )
+
+    elif isinstance(text, list):
+        text = [
+            t.replace(get_target_entity_placeholder(entity_type), row["entity_name"]) for t in text
+        ]
+        if row.get("other_entities_map"):
+            for entity_id, entity_name in row["other_entities_map"]:
+                text = [
+                    t.replace(
+                        f"{get_other_entity_placeholder(entity_type)}_{entity_id}", entity_name
+                    )
+                    for t in text
+                ]
+
     return text
\ No newline at end of file
diff --git a/src/bigdata_research_tools/labeler/risk_labeler.py b/src/bigdata_research_tools/labeler/risk_labeler.py
index 6a18cb6..2cf129f 100644
--- a/src/bigdata_research_tools/labeler/risk_labeler.py
+++ b/src/bigdata_research_tools/labeler/risk_labeler.py
@@ -228,23 +228,24 @@ def replace_company_placeholders(
         Text with placeholders replaced.
     """
     text = row[col_name]
+    entity_type = row.get("entity_type", "COMP")
     if isinstance(text, str):
-        text = text.replace(get_target_entity_placeholder(), row["entity_name"])
+        text = text.replace(get_target_entity_placeholder(entity_type), row["entity_name"])
         if row.get("other_entities_map"):
             for entity_id, entity_name in row["other_entities_map"]:
                 text = text.replace(
-                    f"{get_other_entity_placeholder()}_{entity_id}", entity_name
+                    f"{get_other_entity_placeholder(entity_type)}_{entity_id}", entity_name
                 )
 
     elif isinstance(text, list):
         text = [
-            t.replace(get_target_entity_placeholder(), row["entity_name"]) for t in text
+            t.replace(get_target_entity_placeholder(entity_type), row["entity_name"]) for t in text
         ]
         if row.get("other_entities_map"):
             for entity_id, entity_name in row["other_entities_map"]:
                 text = [
                     t.replace(
-                        f"{get_other_entity_placeholder()}_{entity_id}", entity_name
+                        f"{get_other_entity_placeholder(entity_type)}_{entity_id}", entity_name
                     )
                     for t in text
                 ]
diff --git a/src/bigdata_research_tools/labeler/screener_labeler.py b/src/bigdata_research_tools/labeler/screener_labeler.py
index 28fc5a7..42aeccc 100644
--- a/src/bigdata_research_tools/labeler/screener_labeler.py
+++ b/src/bigdata_research_tools/labeler/screener_labeler.py
@@ -227,10 +227,11 @@ def replace_company_placeholders(row: Series) -> str:
         Text with placeholders replaced.
     """
     text = row["motivation"]
-    text = text.replace(get_target_entity_placeholder(), row["entity_name"])
+    entity_type = row.get("entity_type", "COMP")
+    text = text.replace(get_target_entity_placeholder(entity_type), row["entity_name"])
     if row.get("other_entities_map"):
         for entity_id, entity_name in row["other_entities_map"]:
             text = text.replace(
-                f"{get_other_entity_placeholder()}_{entity_id}", entity_name
+                f"{get_other_entity_placeholder(entity_type)}_{entity_id}", entity_name
             )
     return text
diff --git a/src/bigdata_research_tools/mindmap/mindmap_utils.py b/src/bigdata_research_tools/mindmap/mindmap_utils.py
index 0a0756f..eccae1e 100644
--- a/src/bigdata_research_tools/mindmap/mindmap_utils.py
+++ b/src/bigdata_research_tools/mindmap/mindmap_utils.py
@@ -153,7 +153,7 @@
         "user_prompt_message": "Your given Risk Scenario is: {main_theme}",
         "default_instructions": (
             "Forget all previous prompts."
-            "You are assisting a professional risk analyst tasked with creating a taxonomy to classify the impact of the Risk Scenario '**{main_theme}**' on other entities, such countries, commodities, geographical places, and organizations."
+            "You are assisting a professional risk analyst tasked with creating a taxonomy to classify the impact of the Risk Scenario '**{main_theme}**' on entities, such as countries, commodities, geographical places, and organizations."
             "Your objective is to generate a **comprehensive tree structure** that maps the **risk spillovers** stemming from the Risk Scenario '**{main_theme}**', and generates related sub-scenarios. "
             "Key Instructions:"
             "1. **Understand the Risk Scenario: '{main_theme}'**:"
diff --git a/src/bigdata_research_tools/portfolio/motivation.py b/src/bigdata_research_tools/portfolio/motivation.py
index 4f9555e..3ae10ae 100644
--- a/src/bigdata_research_tools/portfolio/motivation.py
+++ b/src/bigdata_research_tools/portfolio/motivation.py
@@ -80,12 +80,13 @@ def group_quotes_by_company(self, filtered_df: pd.DataFrame) -> dict:
         ]
 
         if missing_columns:
-            available_columns = list(filtered_df.columns)
-            raise ValueError(
-                f"Missing required columns: {missing_columns}. "
-                f"Available columns are: {available_columns}"
-            )
-
+            required_columns = ["Entity", "Quote", "Theme"]
+            missing_columns = [col for col in required_columns if col not in filtered_df.columns]
+            if missing_columns:
+                raise ValueError(
+                    f"Missing required columns: {missing_columns}. "
+                    f"Available columns are: {list(filtered_df.columns)}"
+                )
         # Check if DataFrame is empty
         if filtered_df.empty:
             logger.warning("Warning: DataFrame is empty. Returning empty dictionary.")
@@ -95,7 +96,7 @@ def group_quotes_by_company(self, filtered_df: pd.DataFrame) -> dict:
 
         # Use .get() with default values as additional safety
         for _, row in filtered_df.iterrows():
-            company = row.get("Company", "Unknown Company")
+            company = row.get("Company", "Entity")
             quote = row.get("Quote", "")
             theme = row.get("Theme", "Unknown Theme")
 
@@ -152,6 +153,7 @@ def generate_company_motivations(
         theme_name: str,
         word_range: tuple[int, int],
         use_case: MotivationType = MotivationType.THEMATIC_SCREENER,
+        entity_type: str = "COMP",
     ) -> pd.DataFrame:
         """
         Generates motivation statement with specified verbosity for companies in a thematic watchlist.
@@ -183,6 +185,7 @@ def generate_company_motivations(
                 word_range[0],
                 word_range[1],
                 use_case=use_case,
+                entity_type=entity_type,
             )
 
             # Generate motivation with this word range
diff --git a/src/bigdata_research_tools/prompts/labeler.py b/src/bigdata_research_tools/prompts/labeler.py
index a3181b3..a556805 100644
--- a/src/bigdata_research_tools/prompts/labeler.py
+++ b/src/bigdata_research_tools/prompts/labeler.py
@@ -1,12 +1,15 @@
-from os import environ
+def get_other_entity_placeholder(entity_type:str) -> str:
+    if entity_type == 'COMP':
+        return "Other Company"
+    else:
+        return "Other Entity"
 
 
-def get_other_entity_placeholder() -> str:
-    return environ.get("BIGDATA_OTHER_ENTITY_PLACEHOLDER", "Other Company")
-
-
-def get_target_entity_placeholder() -> str:
-    return environ.get("BIGDATA_TARGET_ENTITY_PLACEHOLDER", "Target Company")
+def get_target_entity_placeholder(entity_type:str) -> str:
+    if entity_type == 'COMP':
+        return "Target Company"
+    else:
+        return "Target Entity"
 
 
 narrative_system_prompt_template: str = """
@@ -327,13 +330,13 @@ def get_entity_risk_system_prompt(main_theme: str, label_summaries: list) -> str
         str: The formatted prompt string
     """
     return entity_risk_system_prompt_template.format(
-        main_theme=main_theme, label_summaries=label_summaries, BIGDATA_TARGET_ENTITY_PLACEHOLDER = get_target_entity_placeholder(), BIGDATA_OTHER_ENTITY_PLACEHOLDER = get_other_entity_placeholder()
+        main_theme=main_theme, label_summaries=label_summaries
         )
 
 entity_risk_system_prompt_template: str = """Forget all previous prompts.
 
-You are assisting a professional analyst in evaluating both the exposure and risk classification for "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" regarding the Risk Scenario "{main_theme}".
-This involves a two-step process: confirming exposure of "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" and classifying specific risks if exposure is confirmed. Use the headline for contextual understanding.
+You are assisting a professional analyst in evaluating both the exposure and risk classification for "Target Entity" regarding the Risk Scenario "{main_theme}".
+This involves a two-step process: confirming exposure of "Target Entity" and classifying specific risks if exposure is confirmed. Use the headline for contextual understanding.
 
 <input_details>
 You will receive the following information::
@@ -347,149 +350,134 @@ def get_entity_risk_system_prompt(main_theme: str, label_summaries: list) -> str
 
 <exposure_assessment>
 - Examine whether the text explicitly mentions the Risk Scenario "{main_theme}" or any of its core components.
-- Ensure that "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is the main focus of the text and that it is clearly stated that "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is facing or will face consequences caused by the Risk Scenario "{main_theme}".
-- Assess if there are DIRECT consequences on "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}’s" internal and external activities, operations, future performance, stability, sustainability, and long term growth.
-- Designate the exposure as unclear if the text lacks an explicit DIRECT link between "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" and the Risk Scenario
+- Ensure that "Target Entity" is the main focus of the text and that it is clearly stated that "Target Entity" is facing or will face consequences caused by the Risk Scenario "{main_theme}".
+- Assess if there are DIRECT consequences on "Target Entity’s" internal and external activities, operations, future performance, stability, sustainability, and long term growth.
+- Designate the exposure as unclear if the text lacks an explicit DIRECT link between "Target Entity" and the Risk Scenario
 - Designate the exposure as unclear if the text relies on generic information.
 </exposure_assessment>
 
 <risk_classification>
-If direct exposure of {BIGDATA_TARGET_ENTITY_PLACEHOLDER} is confirmed:
+If direct exposure of Target Entity is confirmed:
 
 - Identify and classify the specific risk using this list of Risk Sub-Scenarios:
     "{label_summaries}".
 
 - Follow a detailed classification process:
-    - Examine the text to confirm how the Risk Scenario "{main_theme}" directly impacts "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" through one of the Risk Sub-Scenarios.
-    - Write a concise motivation that explains the direct link between "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" and the Risk Sub-Scenario as stated in the text.
-    - The motivation should always start with "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}".
-    - Identify an appropriate Risk Sub-Scenario label from the list that describes explicitly the impact on {BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s internal and external activities, operations, stability, sustainability, and long term growth or performance.
+    - Examine the text to confirm how the Risk Scenario "{main_theme}" directly impacts "Target Entity" through one of the Risk Sub-Scenarios.
+    - Write a concise motivation that explains the direct link between "Target Entity" and the Risk Sub-Scenario as stated in the text.
+    - The motivation should always start with "Target Entity".
+    - Identify an appropriate Risk Sub-Scenario label from the list that describes explicitly the impact on Target Entity's internal and external activities, operations, stability, sustainability, and long term growth or performance.
     - Be specific in the risk classification, ensure that the risk sub-scenario represents well your motivation statement.
-    - Ensure that the Risk Sub-Scenario label can be directly extracted from the text that it describes with high granularity how "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is affected.
+    - Ensure that the Risk Sub-Scenario label can be directly extracted from the text that it describes with high granularity how "Target Entity" is affected.
     - Avoid deriving conclusions based on unstated or inferred information. Focus only on the explicit content of the text or headline.
 </risk_classification>
 
 <verbatim_quotes_extraction>
-- Extract verbatim quotes from the text that support the classification and illustrate {BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s exposure to the specific Risk Sub-Scenario.
+- Extract verbatim quotes from the text that support the classification and illustrate Target Entity's exposure to the specific Risk Sub-Scenario.
 - Ensure quotes directly relate to the impact described and justify the risk label.
-- Extract full sentences or phrases that clearly indicate, as standalone statements, how "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is affected by the Risk Scenario "{main_theme}" and the Sub-Scenario label assigned.
+- Extract full sentences or phrases that clearly indicate, as standalone statements, how "Target Entity" is affected by the Risk Scenario "{main_theme}" and the Sub-Scenario label assigned.
 </verbatim_quotes_extraction>
 
-<sentiment_analysis>
-- If the text does explicitly link "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" with the Risk Scenario "{main_theme}", classify the exposure with a sentiment label speficied as follows:
-    - "negative" if the text indicates that "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is facing or will face negative consequences due to the Risk Scenario "{main_theme}".
-    - "positive" if the text indicates that "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is well positioned in the face of the Risk Scenario "{main_theme}", or is in a better position with respect to the past, intended as previous occurrences of the Risk Scenario from which the situation has improved, or if the text indicates that it is doing better than its peers, or than the {BIGDATA_OTHER_ENTITY_PLACEHOLDER}.
-    - "neutral" if the text indicates that "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" is neither positively nor negatively affected by the Risk Scenario "{main_theme}".
-- If the exposure is unclear, assign the sentiment label as "neutral".
-</sentiment_analysis>
-
 <response_format>
 Structure your response as a JSON object with the sentence ID as the key containing:
-"motivation": A concise explanation describing the link between "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" and the Risk Sub-Scenario.
+"motivation": A concise explanation describing the link between "Target Entity" and the Risk Sub-Scenario.
 "label": State the specific risk Sub-Scenario label or 'unclear'.
 "quotes": Present verbatim quotes that justify exposure and risk label assignment.
-"sentiment": State the sentiment label as 'negative', 'positive', or 'neutral'.
 
 Format: {{"<sentence_id>": 
 {{"motivation": "<motivation>", "label": "<risk_classification_label>",
- "quotes": "<verbatim_quotes>", "sentiment": "<sentiment_label>"}}
+ "quotes": "<verbatim_quotes>"}}
 }}.
 </response_format>
 
 <examples>
 ID: 3
 Headline: "Tariffs to Strain Supply Chains Globally"
-Text: "New tariffs against China will significantly impact {BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s operations due to its reliance on raw materials from Chinese suppliers."
+Text: "New tariffs against China will significantly impact Target Entity's operations due to its reliance on raw materials from Chinese suppliers."
 Scenario: "New Tariffs against China"
 Output:
 
 {{3:{{
-  "motivation": "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s supply operations are directly impacted by new tariffs due to their reliance on raw materials sourced from China.",
+  "motivation": "Target Entity's supply operations are directly impacted by new tariffs due to their reliance on raw materials sourced from China.",
   "label": "Supply Chain Disruption",
-  "quotes": ["New tariffs against China will significantly impact {BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s operations", "reliance on raw materials from Chinese suppliers"],
-  "sentiment": "negative"}}
+    "quotes": ["New tariffs against China will significantly impact Target Entity's operations", "reliance on raw materials from Chinese suppliers"]
+}}
 }}
 
 ID: 5
 Headline: "Interest Rate Fluctuations to Affect Markets"
-Text: "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s analysts are forecasting higher risks associated with potential interest rate changes."
+Text: "Target Entity's analysts are forecasting higher risks associated with potential interest rate changes."
 Scenario: "Interest Rate Volatility"
 Output:
 
 {{5:{{
-  "motivation": "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s analysts are forecasting higher risks associated with potential interest rate changes.",
+  "motivation": "Target Entity's analysts are forecasting higher risks associated with potential interest rate changes.",
   "label": "unclear",
-  "quotes": [],
-  "sentiment": "neutral"}}
+  "quotes": []
 }}
 
 ID: 2
 Headline: "Economic Challenges Ahead Due to Tariffs on China"
-Text: "{BIGDATA_OTHER_ENTITY_PLACEHOLDER}'s analysts report a potential economic downturn in {BIGDATA_TARGET_ENTITY_PLACEHOLDER} linked to new tariffs against China."
+Text: "Other Entity's analysts report a potential economic downturn in Target Entity linked to new tariffs against China."
 Risk Scenario: "New Tariffs Against China"
 Output:
 
 {{2:{{
-  "motivation": "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}'s analysts are assessing the potential economic impact of new tariffs against China.",
+  "motivation": "Target Entity's analysts are assessing the potential economic impact of new tariffs against China.",
   "label": "Economic Downturns",
-  "quotes": ["{BIGDATA_OTHER_ENTITY_PLACEHOLDER}'s analysts report a potential economic downturn in {BIGDATA_TARGET_ENTITY_PLACEHOLDER}"],
-  "sentiment": "negative"}}
+  "quotes": ["Other Entity's analysts report a potential economic downturn in Target Entity"]
 }}
 
 ID: 3
 Headline: "Analyzing External Factors in Business Strategy"
-Text: "{BIGDATA_OTHER_ENTITY_PLACEHOLDER} is studying external factors such as tariffs to gauge potential risks. {BIGDATA_OTHER_ENTITY_PLACEHOLDER}'s analysts report a potential economic downturn in {BIGDATA_TARGET_ENTITY_PLACEHOLDER}."
+Text: "Other Entity is studying external factors such as tariffs to gauge potential risks. Other Entity's analysts report a potential economic downturn in Target Entity."
 Risk Scenario: "New Tariffs on Semiconductors"
 Output:
 
 {{3:{{
-  "motivation": "{BIGDATA_OTHER_ENTITY_PLACEHOLDER}'s analysis of external factors does not establish a direct link to {BIGDATA_TARGET_ENTITY_PLACEHOLDER}.",
+  "motivation": "Other Entity's analysis of external factors does not establish a direct link to Target Entity.",
   "label": "unclear",
-  "quotes": [],
-  "sentiment": "neutral"}}
+  "quotes": []
 }}
 
 ID: 4
 Headline: "Market Trends Influence Stock Performance"
-Text: "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}’s stock is influenced by broad market trends."
+Text: "Target Entity’s stock is influenced by broad market trends."
 Risk Scenario: "Increased Uncertainty and Volatility"
 Output:
 
 {{4:{{
-  "motivation": "The text does not related to the Risk Scenario and it does not mention any specific risk sub-scenario affecting {BIGDATA_TARGET_ENTITY_PLACEHOLDER}.",
+  "motivation": "The text does not related to the Risk Scenario and it does not mention any specific risk sub-scenario affecting Target Entity.",
   "label": "unclear",
-  "quotes": [],
-  "sentiment": "neutral"}}
+  "quotes": []
 }}
 
 ID: 5
 
 Headline: "Tariffs and Their Economic Impact"
-Text: "{BIGDATA_OTHER_ENTITY_PLACEHOLDER} researchers estimate that tariffs will affect the broader economy in {BIGDATA_TARGET_ENTITY_PLACEHOLDER}."
+Text: "Other Entity researchers estimate that tariffs will affect the broader economy in Target Entity."
 Risk Scenario: "New Tariffs against China"
 Output:
 
 {{5:{{
-  "motivation": "{BIGDATA_TARGET_ENTITY_PLACEHOLDER} is not linked with any specific risk sub-scenario or any tangible effect of the Risk Scenario.",
+  "motivation": "Target Entity is not linked with any specific risk sub-scenario or any tangible effect of the Risk Scenario.",
   "label": "unclear",
-  "quotes": [],
-  "sentiment": "neutral"}}
+  "quotes": []
 }}
 
 ID: 2
 Headline: "China Tariffs Impact Supply Chains"
-Text: "According to recent reports, {BIGDATA_TARGET_ENTITY_PLACEHOLDER} is heavily dependent on China. The recent tariffs against China have forced {BIGDATA_TARGET_ENTITY_PLACEHOLDER} to reconsider its supply chain, potentially leading to increased logistics costs."
+Text: "According to recent reports, Target Entity is heavily dependent on China. The recent tariffs against China have forced Target Entity to reconsider its supply chain, potentially leading to increased logistics costs."
 Risk Scenario: "New Tariffs against China"
 Output:
 
 {{2:{{
-  "motivation": "{BIGDATA_TARGET_ENTITY_PLACEHOLDER} is said to be reconsidering its supply chain in the face of the risk scenario. The text clearly links {BIGDATA_TARGET_ENTITY_PLACEHOLDER} with the Risk Scenario and mentions an explicit Sub-scenario risk of Supply Chain Disruptions.",
+  "motivation": "Target Entity is said to be reconsidering its supply chain in the face of the risk scenario. The text clearly links Target Entity with the Risk Scenario and mentions an explicit Sub-scenario risk of Supply Chain Disruptions.",
   "label": "Supply Chain Disruption",
   "quotes": [
-    "{BIGDATA_TARGET_ENTITY_PLACEHOLDER} is heavily dependent on China",
-    "The recent tariffs against China have forced {BIGDATA_TARGET_ENTITY_PLACEHOLDER} to reconsider its supply chain, potentially leading to increased logistics costs."
+    "Target Entity is heavily dependent on China",
+    "The recent tariffs against China have forced Target Entity to reconsider its supply chain, potentially leading to increased logistics costs."
   ],
-  "sentiment": "negative"}}
 }}
 </examples>
 """
@@ -506,45 +494,45 @@ def get_entity_theme_system_prompt(main_theme: str, label_summaries: list) -> st
         str: The formatted prompt string
     """
     return entity_theme_system_prompt_template.format(
-        main_theme=main_theme, label_summaries=label_summaries, BIGDATA_TARGET_ENTITY_PLACEHOLDER = get_target_entity_placeholder()
+        main_theme=main_theme, label_summaries=label_summaries,
     )
 
 entity_theme_system_prompt_template: str = """
  Forget all previous prompts.
- You are assisting a professional analyst in evaluating the impact of the theme '{main_theme}' on an entity "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}".
+ You are assisting a professional analyst in evaluating the impact of the theme '{main_theme}' on an entity "Target Entity".
  Your primary task is first, to ensure that each sentence is explicitly related to '{main_theme}', and second, to accurately associate each given sentence with
  the relevant label contained within the list '{label_summaries}'.
 
  Please adhere strictly to the following guidelines:
 
  1. **Analyze the Sentence**:
-    - Each input consists of a sentence ID, an entity name ('{BIGDATA_TARGET_ENTITY_PLACEHOLDER}'), and the sentence text.
+    - Each input consists of a sentence ID, an entity name ('Target Entity'), and the sentence text.
     - Analyze the sentence to understand if the content clearly establishes a connection to '{main_theme}'.
     - Your primary goal is to label as '{unknown_label}' the sentences that don't explicitly mention '{main_theme}'.
     - Analyze the list of labels '{label_summaries}' used for label assignment. '{label_summaries}' is a Python list variable containing distinct labels and their definition in format 'Label: Summary', you must pick label only from 'Label' part which means left side of the semicolon for each Label:Summary pair.
     - Your secondary goal is to select the most appropriate label from '{label_summaries}' that corresponds to the content of the sentence.
 
  2. **First Label Assignment**:
-    - Assign the label '{unknown_label}' to the sentence related to "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" when it does not explicitly mentions '{main_theme}'. Otherwise, don't assign a label.
+    - Assign the label '{unknown_label}' to the sentence related to "Target Entity" when it does not explicitly mentions '{main_theme}'. Otherwise, don't assign a label.
     - Evaluate each sentence independently, focusing solely on the context provided within that specific sentence.
     - Use only the information contained within the sentence for your label assignment.
-    - When evaluating the sentence, "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" must clearly mention that the entity is clearly impacted by '{main_theme}'.
+    - When evaluating the sentence, "Target Entity" must clearly mention that the entity is clearly impacted by '{main_theme}'.
     - Many sentences are only tangentially connected to the topic '{main_theme}'. These sentences must be assigned the label '{unknown_label}'.
 
  3. **Second Label Assignment**:
-    - For the sentences not labeled as '{unknown_label}' and only for them, assign a unique label from the list '{label_summaries}' to the sentence related to "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}".
+    - For the sentences not labeled as '{unknown_label}' and only for them, assign a unique label from the list '{label_summaries}' to the sentence related to "Target Entity".
     - Evaluate each sentence independently, focusing solely on the context provided within that specific sentence.
     - Use only the information contained within the sentence for your label assignment.
     - Ensure that the sentence clearly establishes a connection to the label you assigned and to the theme '{main_theme}'.
     - You must not create a new label or choose a label that is not present in '{label_summaries}'.
     - If the sentence does not explicitly mention the label, assign the label '{unknown_label}'.
-    - When evaluating the sentence, "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}" must clearly mention that the entity is impacted by the label assigned and '{main_theme}'.
+    - When evaluating the sentence, "Target Entity" must clearly mention that the entity is impacted by the label assigned and '{main_theme}'.
 
  4. **Response Format**:
     - Your output should be structured as a JSON object that includes:
           1. A brief motivation for your choice.
           2. The assigned label.
-    - Each entry must start with the sentence ID and contain a clear motivation that begins with "{BIGDATA_TARGET_ENTITY_PLACEHOLDER}".
+    - Each entry must start with the sentence ID and contain a clear motivation that begins with "Target Entity".
     - The motivation should explain why the label was selected from '{label_summaries}' based on the information in the sentence and in the context of '{main_theme}'. It should also justify the label that had been assigned.
     - Ensure that the exact context is understood and labels are based only on explicitly mentioned information in the sentence. Otherwise, assign the label '{unknown_label}'.
     - The assigned label should be only the string that precedes the character ':'.
diff --git a/src/bigdata_research_tools/prompts/motivation.py b/src/bigdata_research_tools/prompts/motivation.py
index 5c22488..86b7bfb 100644
--- a/src/bigdata_research_tools/prompts/motivation.py
+++ b/src/bigdata_research_tools/prompts/motivation.py
@@ -35,6 +35,34 @@ def generate_prompt_template() -> str:
     8. Keeps the statement concise ({min_words}-{max_words} words)
     """
 
+def generate_prompt_template_entity() -> str:
+    """
+    Returns the base prompt template with placeholders for formatting.
+    """
+    return """
+    You are an expert financial analyst with specialized knowledge in thematic investment research.
+    Your task is to generate a concise motivation statement explaining why this entity is included in a thematic watchlist.
+
+    Theme: {theme}
+    Entity: {company}
+
+    This entity has {total_quotes} quotes related to the theme, with exposure to the following sub-themes:
+    {label_summary}
+
+    Here are the quotes with their corresponding labels:
+    {quotes_and_labels}
+
+    Generate a concise motivation statement (2-4 sentences) that:
+    1. ALWAYS begins with the entity name
+    2. Summarizes WHY this entity is included in the thematic watchlist
+    3. References the specific sub-themes (from Label column) where the entity shows strongest exposure (has the most number of elements in 'Quote' column)
+    4. For any numerical figures, make sure to quote the exact metric correctly
+    5. Uses objective, evidence-based language referring to the entity's actual activities
+    6. Maintains a neutral, analytical tone without subjective judgments
+    7. Focuses on facts rather than predictions or recommendations
+    8. Keeps the statement concise ({min_words}-{max_words} words)
+    """
+
 
 def generate_prompt_template_risk() -> str:
     """
@@ -66,6 +94,36 @@ def generate_prompt_template_risk() -> str:
     8. Keeps the statement concise ({min_words}-{max_words} words)
     """
 
+def generate_prompt_template_risk_entity() -> str:
+    """
+    Returns the base prompt template with placeholders for formatting.
+    """
+    return """
+    You are an expert financial analyst with specialized knowledge in corporate risk assessment.
+    Your task is to generate a concise risk statement explaining the key risks this entity is exposed to based on the provided data.
+
+    Inputs:
+    Theme: {theme}
+    Entity: {company}
+
+    This entity has {total_quotes} quotes related to the theme, indicating exposure to the following risk categories:
+    {label_summary}
+
+    Here are the quotes with their corresponding labels:
+    {quotes_and_labels}
+
+    Your task:
+    Generate a concise risk statement (2-4 sentences) that:
+    1. ALWAYS begins with the entity name
+    2. Summarizes the key risks the entity faces within the specified theme
+    3. References the specific risk categories where exposure is most significant
+    4. For any numerical figures, make sure to quote the exact metric correctly
+    5. Uses objective, evidence-based language referring to the entity's actual activities
+    6. Maintains a neutral, analytical tone without subjective judgments
+    7. Focuses on facts rather than predictions or recommendations
+    8. Keeps the statement concise ({min_words}-{max_words} words)
+    """
+
 
 def get_motivation_prompt(
     company: str,
@@ -75,6 +133,7 @@ def get_motivation_prompt(
     max_words: int,
     max_data_points: int = 300,
     use_case: MotivationType = MotivationType.THEMATIC_SCREENER,
+    entity_type: str = "COMP",
 ) -> str:
     """
     Formats the motivation prompt using company data and the prompt template.
@@ -107,9 +166,16 @@ def get_motivation_prompt(
 
     match use_case:
         case MotivationType.RISK_ANALYZER:
-            prompt_template = generate_prompt_template_risk()
+            if entity_type != "COMP":
+                prompt_template = generate_prompt_template_risk_entity()
+            else:
+                prompt_template = generate_prompt_template_risk()
         case MotivationType.THEMATIC_SCREENER:
-            prompt_template = generate_prompt_template()
+            if entity_type != "COMP":
+                prompt_template = generate_prompt_template_entity()
+            else:
+                prompt_template = generate_prompt_template()
+            
         case _:
             raise ValueError(f"Unsupported use_case: {use_case}")
 
diff --git a/src/bigdata_research_tools/search/entities_search.py b/src/bigdata_research_tools/search/entities_search.py
index 6168b50..53ad04c 100644
--- a/src/bigdata_research_tools/search/entities_search.py
+++ b/src/bigdata_research_tools/search/entities_search.py
@@ -20,12 +20,9 @@
 from bigdata_research_tools.labeler.risk_labeler import (
     replace_company_placeholders,
 )
+from bigdata_research_tools.search.models import BigdataEntity
 
 
-import os
-os.environ["BIGDATA_OTHER_ENTITY_PLACEHOLDER"] = "Other Entity"
-os.environ["BIGDATA_TARGET_ENTITY_PLACEHOLDER"] = "Target Entity"
-
 def entity_type_checker(entities):
     unique_types = set(type(entity).__name__ for entity in entities)
     type_field_map = {
@@ -42,8 +39,67 @@ def entity_type_checker(entities):
         return type_field_map[unique_types.pop()]
     else:
         raise ValueError("Multiple entity types found in the provided watchlist.")
+    
+def entity_type_checker(entities: list[BigdataEntity]):
+    unique_types = set([entity.entity_type if entity.entity_type else None for entity in entities])
+    type_field_map = {
+            'PEOP':'people',
+            'PRDT': 'products',
+            'ORGA':'org',
+            'PLCE':'place',
+            'TOPC':'topic',
+            'CMDT':'concepts',
+            'CURR':'concepts',
+            'NATL':'concepts',
+            'SUST':'concepts',
+            'ECON':'concepts',
+            'ORGT':'concepts',
+            'POSI':'concepts',
+            'PROD':'concepts',
+            'TEAM':'concepts',
+            'SECT':'concepts',
+            'OTHR':'concepts',
+            "ASTR":'concepts',
+            "ANML":'concepts',
+            "BUSI":'concepts',
+            "CHAR":'concepts',
+            "COLR":'concepts',
+            "CURT":'concepts',
+            "ELEM":'concepts',
+            "EMOT":'concepts',
+            "ETHN":'concepts',
+            "FCTY":'concepts',
+            "FINC":'concepts',
+            "FRTS":'concepts',
+            "INRT":'concepts',
+            "INSE":'concepts',
+            "LAND":'concepts',
+            "LAWS":'concepts',
+            "MDCO":'concepts',
+            "MSIC":'concepts',
+            "PHYS":'concepts',
+            "PLNT":'concepts',
+            "PLTC":'concepts',
+            "PRDT":'concepts',
+            "SCIE":'concepts',
+            "SCTY":'concepts',
+            "SESO":'concepts',
+            "SHPE":'concepts',
+            "SOCI":'concepts',
+            "SPOR":'concepts',
+            "STAT":'concepts',
+            "TEAM":'concepts',
+            "TECH":'concepts',
+            "VEGT":'concepts',
+            "WTHR":'concepts',
+            'COMP':'companies'
+        }
+    if len(unique_types) == 1:
+        return type_field_map[unique_types.pop()]
+    else:
+        raise ValueError("Multiple entity types found in the provided watchlist.")
 
-def search_by_entities(entities: list,
+def search_by_entities(entities: list[BigdataEntity],
     sentences: List[str],
     start_date: str,
     end_date: str,
@@ -172,8 +228,7 @@ def search_by_entities(entities: list,
             results=results,
             chunks_entities=chunks_entities,
             watchlist=entities,
-            document_type=scope,
-            enhance_sentiment=enhance_sentiment)
+            document_type=scope)
 
         return df        
         
@@ -280,8 +335,12 @@ def process_entity_search_results(
                             "entity_country": entity_key.country,
                             "document_type": document_type.value,
                             "entity_name": entity_key.name,
+                            "entity_type": entity_key.entity_type,
                             "text": chunk.text,
-                            "sentiment": chunk.sentiment if chunk.sentiment else None,
+                            "sentiment": chunk.sentiment,
+                            "other_entities": ", ".join(
+                                e["name"] for e in other_entities
+                            ),
                             "other_entities_name": [e["name"] for e in other_entities],
                             "other_entities_id": [e["key"] for e in other_entities],
                             "other_entities_type": [e["type"] for e in other_entities],
@@ -306,6 +365,7 @@ def process_entity_search_results(
                                     "reporting_entity_industry": reporting_entity.industry if reporting_entity.industry else None,
                                     "reporting_entity_country": reporting_entity.country if reporting_entity.country else None,
                                     "reporting_entity_ticker": reporting_entity.ticker if reporting_entity.ticker else None,
+                                    "reporting_entity_type": reporting_entity.entity_type if reporting_entity.entity_type else None,
                                 })
                                 rows.append(row_dict_copy)
                 else:
diff --git a/src/bigdata_research_tools/search/screener_search.py b/src/bigdata_research_tools/search/screener_search.py
index cf94b9b..0343787 100644
--- a/src/bigdata_research_tools/search/screener_search.py
+++ b/src/bigdata_research_tools/search/screener_search.py
@@ -32,7 +32,7 @@
 
 
 def search_by_companies(
-    companies: list[Company],
+    companies: list[BigdataEntity],
     sentences: list[str],
     start_date: str,
     end_date: str,
@@ -338,6 +338,7 @@ def process_screener_search_results(
                             "document_type": document_type.value,
                             "is_reporting_entity": True,
                             "entity_name": reporting_entity.name,
+                            "entity_type": reporting_entity.entity_type,
                             "entity_sector": reporting_entity.sector,
                             "entity_industry": reporting_entity.industry,
                             "entity_country": reporting_entity.country,
@@ -379,6 +380,7 @@ def process_screener_search_results(
                             "document_type": document_type.value,
                             "is_reporting_entity": False,
                             "entity_name": entity_key.name,
+                            "entity_type": entity_key.entity_type,
                             "entity_sector": entity_key.sector,
                             "entity_industry": entity_key.industry,
                             "entity_country": entity_key.country,
@@ -462,6 +464,7 @@ def mask_entity_coordinates(
     # Process each row
     for idx, row in df.iterrows():
         text = row["text"]
+        entity_type = row["entity_type"]
         entities = sorted(row["entities"], key=lambda x: x["start"], reverse=True)
         masked_text = text
 
@@ -480,20 +483,20 @@ def mask_entity_coordinates(
 
             if entity["key"] == row["entity_id"]:
                 # Mask target entity
-                masked_text = f"{masked_text[:start]}{get_target_entity_placeholder()}{masked_text[end:]}"
+                masked_text = f"{masked_text[:start]}{get_target_entity_placeholder(entity_type)}{masked_text[end:]}"
 
             elif start not in target_start and end not in target_end:
                 # Mask other entities
                 if entity["key"] not in entity_counter:
                     entity_counter[entity["key"]] = i
-                    mask = f"{get_other_entity_placeholder()}_{entity_counter[entity['key']]}"
+                    mask = f"{get_other_entity_placeholder(entity_type)}_{entity_counter[entity['key']]}"
                     masked_text = f"{masked_text[:start]}{mask}{masked_text[end:]}"
                     other_entity_map.append(
                         (entity_counter[entity["key"]], entity["name"])
                     )
                     i += 1
                 else:
-                    mask = f"{get_other_entity_placeholder()}_{entity_counter[entity['key']]}"
+                    mask = f"{get_other_entity_placeholder(entity_type)}_{entity_counter[entity['key']]}"
                     masked_text = f"{masked_text[:start]}{mask}{masked_text[end:]}"
                     other_entity_map.append(
                         (entity_counter[entity["key"]], entity["name"])
diff --git a/src/bigdata_research_tools/workflows/risk_analyzer.py b/src/bigdata_research_tools/workflows/risk_analyzer.py
index 592af0e..ce6c653 100644
--- a/src/bigdata_research_tools/workflows/risk_analyzer.py
+++ b/src/bigdata_research_tools/workflows/risk_analyzer.py
@@ -1,13 +1,15 @@
 from datetime import datetime
 from logging import Logger, getLogger
 
-from bigdata_client.models.entities import Company
+#from bigdata_client.models.entities import Company
+from bigdata_client.models.entities import QueryComponentMixin
 from bigdata_client.models.search import DocumentType
 from pandas import DataFrame, merge
 
 from bigdata_research_tools.client import init_bigdata_client
 from bigdata_research_tools.excel import check_excel_dependencies, save_to_excel
 from bigdata_research_tools.labeler.risk_labeler import RiskLabeler, map_risk_category
+from bigdata_research_tools.labeler.entity_labeler import EntityRiskLabeler
 from bigdata_research_tools.llm.base import LLMConfig
 from bigdata_research_tools.mindmap.mindmap import (
     MindMap,
@@ -15,7 +17,9 @@
 from bigdata_research_tools.mindmap.mindmap_generator import MindMapGenerator
 from bigdata_research_tools.portfolio.motivation import Motivation
 from bigdata_research_tools.prompts.motivation import MotivationType
+from bigdata_research_tools.search.models import BigdataEntity
 from bigdata_research_tools.search.screener_search import search_by_companies
+from bigdata_research_tools.search.entities_search import entity_type_checker, search_by_entities
 from bigdata_research_tools.tracing import (
     WorkflowStatus,
     WorkflowTraceEvent,
@@ -34,7 +38,7 @@ def __init__(
         self,
         llm_model_config: str | LLMConfig | dict,
         main_theme: str,
-        companies: list[Company],
+        entities: list[QueryComponentMixin],
         start_date: str,
         end_date: str,
         document_type: DocumentType,
@@ -70,7 +74,6 @@ def __init__(
         """
         super().__init__()
         self.main_theme = main_theme
-        self.companies = companies
         self.start_date = start_date
         self.end_date = end_date
         self.fiscal_year = fiscal_year
@@ -89,6 +92,12 @@ def __init__(
         elif isinstance(llm_model_config, LLMConfig):
             self.llm_model_config = llm_model_config
 
+        ## entity casting
+        self.entities = [BigdataEntity.from_sdk(entity) for entity in entities]
+        
+        # Extract entities for search querying
+        self.entity_type = entity_type_checker(self.entities)
+
     def create_taxonomy(self):
         """Create a risk taxonomy based on the main theme and focus.
         Returns:
@@ -106,13 +115,15 @@ def create_taxonomy(self):
         mindmap_generator = MindMapGenerator(
             llm_model_config_base=self.llm_model_config
         )
+        map_type = "risk" if self.entity_type == "COMP" else "risk_entity"
+        logger.info(f"Generating {map_type} mindmap")
         risk_tree, _ = mindmap_generator.generate_one_shot(
             main_theme=self.main_theme,
             focus=self.focus,
             allow_grounding=self.ground_mindmap,
             instructions=None,
             date_range=None,
-            map_type="risk",
+            map_type=map_type,
         )
 
         risk_summaries = risk_tree.get_terminal_summaries()
@@ -143,22 +154,44 @@ def retrieve_results(
         """
 
         ## To Do: import the search class and make search_by_companies a class method
-        df_sentences = search_by_companies(
-            companies=self.companies,
-            sentences=sentences,
-            start_date=self.start_date,
-            end_date=self.end_date,
-            scope=self.document_type,
-            keywords=self.keywords,
-            control_entities=self.control_entities,
-            fiscal_year=self.fiscal_year,
-            sources=self.sources,
-            rerank_threshold=self.rerank_threshold,
-            frequency=frequency,
-            document_limit=document_limit,
-            batch_size=batch_size,
-            workflow_name=RiskAnalyzer.name,
-        )
+        if self.entity_type == "COMP":
+            logger.info("Searching by companies")
+            df_sentences = search_by_companies(
+                companies=self.entities,
+                sentences=sentences,
+                start_date=self.start_date,
+                end_date=self.end_date,
+                scope=self.document_type,
+                keywords=self.keywords,
+                control_entities=self.control_entities,
+                fiscal_year=self.fiscal_year,
+                sources=self.sources,
+                rerank_threshold=self.rerank_threshold,
+                frequency=frequency,
+                document_limit=document_limit,
+                batch_size=batch_size,
+                workflow_name=RiskAnalyzer.name,
+            )
+        else:
+            logger.info("Searching by entities")
+            df_sentences = search_by_entities(
+                entities=self.entities,
+                sentences=sentences,
+                start_date=self.start_date,
+                end_date=self.end_date,
+                scope=self.document_type,
+                keywords=self.keywords,
+                control_entities=self.control_entities,
+                fiscal_year=self.fiscal_year,
+                sources=self.sources,
+                rerank_threshold=self.rerank_threshold,
+                frequency=frequency,
+                document_limit=document_limit,
+                batch_size=batch_size,
+                workflow_name=RiskAnalyzer.name,
+            )
+
+            logger.info(f"Search by entities returned {(df_sentences[['entity_type','masked_text']].head(5))} results")
 
         return df_sentences
 
@@ -204,11 +237,18 @@ def label_search_results(
             DataFrame: The port-processed DataFrame with labeled search results.
         """
 
-        prompt_fields = self._add_prompt_fields(df_sentences, additional_prompt_fields)
+        
         # Label the search results with our theme labels
         ## To Do: generalize the labeler or pass it as an argument
         # to allow for different labelers to be used.
-        labeler = RiskLabeler(llm_model_config=self.llm_model_config)
+        if self.entity_type == "COMP":
+            logger.info("Using RiskLabeler for labeling")
+            labeler = RiskLabeler(llm_model_config=self.llm_model_config)
+            prompt_fields = self._add_prompt_fields(df_sentences, additional_prompt_fields)
+        else:
+            logger.info("Using EntityRiskLabeler for labeling")
+            labeler = EntityRiskLabeler(llm_model_config=self.llm_model_config)
+            prompt_fields = None
         df_labels = labeler.get_labels(
             main_theme=self.main_theme,
             labels=terminal_labels,
@@ -262,15 +302,23 @@ def generate_results(
         if df_labeled.empty:
             logger.warning("Empty dataframe: no relevant content")
             return df_company, df_industry
+        
+        columns_needed = ["Company", "Ticker", "Sector", "Industry"] if self.entity_type == "COMP" else ["Entity", "Entity Type"]
 
         df_company = get_scored_df(
             df_labeled,
-            index_columns=["Company", "Ticker", "Sector", "Industry"],
+            index_columns=columns_needed,
             pivot_column="Sub-Scenario",
         )
-        df_industry = get_scored_df(
+
+        if self.entity_type == "COMP":
+            df_industry = get_scored_df(
             df_labeled, index_columns=["Industry"], pivot_column="Sub-Scenario"
         )
+        else:
+            df_industry = get_scored_df(
+            df_labeled, index_columns=["Entity Type"], pivot_column="Sub-Scenario"
+        )
 
         motivation_generator = Motivation(llm_model_config=self.llm_model_config)
         motivation_df = motivation_generator.generate_company_motivations(
@@ -278,8 +326,8 @@ def generate_results(
             theme_name=self.main_theme,
             word_range=word_range,
             use_case=MotivationType.RISK_ANALYZER,
+            entity_type=self.entity_type,
         )
-        print(motivation_df)
 
         return df_company, df_industry, motivation_df
 
@@ -301,13 +349,14 @@ def save_results(
             df_industry (DataFrame): The DataFrame with the output by industry.
             export_path (str): The path to export the results to.
         """
+        keys = ('By Company', 'By Industry') if self.entity_type == "COMP" else ('By Entity', 'By Entity Type')
         if export_path:
             save_to_excel(
                 file_path=export_path,
                 tables={
                     "Semantic Labels": (df_labeled, (0, 0)),
-                    "By Company": (df_company, (2, 4)),
-                    "By Industry": (df_industry, (2, 2)),
+                    keys[0]: (df_company, (2, 4)),
+                    keys[1]: (df_industry, (2, 2)),
                     "Motivations": (motivation_df, (0, 0)),
                 },
             )
@@ -377,7 +426,7 @@ def screen_companies(
                 batch_size=batch_size,
             )
             self.notify_observers(
-                f"Search completed. {len(df_sentences)} chunks found for {len(self.companies)} companies."
+                f"Search completed. {len(df_sentences)} chunks found for {len(self.entities)} entities."
             )
             self.notify_observers(
                 df_sentences[
@@ -444,10 +493,11 @@ def screen_companies(
                     status=workflow_status,
                 ),
             )
+        keys = ('df_company', 'df_industry') if self.entity_type == "COMP" else ('df_entity', 'df_entity_type')
         return {
             "df_labeled": df_labeled,
-            "df_company": df_company,
-            "df_industry": df_industry,
+            keys[0]: df_company,
+            keys[1]: df_industry,
             "df_motivation": df_motivation,
             "risk_tree": risk_tree,
         }