NVIDIA · AbhiramDwivedi · Jul 1, 2026
diff --git a/src/skillspector/nodes/analyzers/__init__.py b/src/skillspector/nodes/analyzers/__init__.py
@@ -42,6 +42,9 @@
 from skillspector.nodes.analyzers.static_patterns_data_exfiltration import (
     node as static_patterns_data_exfiltration_node,
 )
+from skillspector.nodes.analyzers.static_patterns_deserialization import (
+    node as static_patterns_deserialization_node,
+)
 from skillspector.nodes.analyzers.static_patterns_excessive_agency import (
     node as static_patterns_excessive_agency_node,
 )
@@ -92,6 +95,7 @@
     "static_patterns_agent_snooping",
     "static_patterns_anti_refusal",
     "static_patterns_ssrf",
+    "static_patterns_deserialization",
     "static_yara",
     "behavioral_ast",
     "behavioral_taint_tracking",
@@ -118,6 +122,7 @@
     "static_patterns_agent_snooping": static_patterns_agent_snooping_node,
     "static_patterns_anti_refusal": static_patterns_anti_refusal_node,
     "static_patterns_ssrf": static_patterns_ssrf_node,
+    "static_patterns_deserialization": static_patterns_deserialization_node,
     "static_yara": static_yara_node,
     "behavioral_ast": behavioral_ast_node,
     "behavioral_taint_tracking": behavioral_taint_tracking_node,

diff --git a/src/skillspector/nodes/analyzers/behavioral_ast.py b/src/skillspector/nodes/analyzers/behavioral_ast.py
@@ -84,6 +84,34 @@
     }
 )
 
+# Deserializers that reconstruct arbitrary objects (or execute code) from their
+# input, regardless of arguments. Feeding attacker-controlled bytes to any of
+# these is equivalent to code execution: pickle invokes ``__reduce__`` during
+# unpickling, ``yaml.unsafe_load`` constructs arbitrary Python objects, etc.
+# ``yaml.load``/``torch.load``/``numpy.load`` are handled separately because
+# their safety depends on arguments (see ``_deserialization_message``).
+_DESERIALIZATION_SINKS = frozenset(
+    {
+        "pickle.load",
+        "pickle.loads",
+        "cPickle.load",
+        "cPickle.loads",
+        "_pickle.load",
+        "_pickle.loads",
+        "marshal.load",
+        "marshal.loads",
+        "dill.load",
+        "dill.loads",
+        "jsonpickle.decode",
+        "pandas.read_pickle",
+        "joblib.load",
+        "yaml.unsafe_load",
+    }
+)
+
+# Loader classes that make ``yaml.load`` safe (no arbitrary object construction).
+_SAFE_YAML_LOADERS = frozenset({"SafeLoader", "CSafeLoader", "BaseLoader"})
+
 _RULE_MESSAGES: dict[str, str] = {
     "AST1": "exec() call detected",
     "AST2": "eval() call detected",
@@ -94,6 +122,7 @@
     "AST7": "Dynamic attribute access via getattr()",
     "AST8": "Dangerous execution chain",
     "AST9": "Reflective dangerous call via getattr() with a literal sink name",
+    "AST10": "Insecure deserialization of untrusted data",
 }
 
 _RULE_SEVERITIES: dict[str, Severity] = {
@@ -106,6 +135,7 @@
     "AST7": Severity.LOW,
     "AST8": Severity.CRITICAL,
     "AST9": Severity.HIGH,
+    "AST10": Severity.MEDIUM,
 }
 
 _RULE_CONFIDENCES: dict[str, float] = {
@@ -118,6 +148,7 @@
     "AST7": 0.50,
     "AST8": 0.95,
     "AST9": 0.85,
+    "AST10": 0.70,
 }
 
 _TAG = "Dangerous Code Execution"
@@ -148,6 +179,57 @@ def _contains_dangerous_source(node: ast.AST, aliases: dict[str, str] | None = N
     return None
 
 
+def _loader_arg_name(node: ast.expr) -> str | None:
+    """Return the trailing name of a yaml ``Loader`` argument (``yaml.SafeLoader`` → 'SafeLoader')."""
+    if isinstance(node, ast.Attribute):
+        return node.attr
+    if isinstance(node, ast.Name):
+        return node.id
+    return None
+
+
+def _kwarg_is_true(node: ast.Call, name: str) -> bool:
+    """True if keyword *name* is passed as a literal ``True``."""
+    return any(
+        kw.arg == name and isinstance(kw.value, ast.Constant) and kw.value.value is True
+        for kw in node.keywords
+    )
+
+
+def _deserialization_message(call_name: str, node: ast.Call) -> str | None:
+    """Return an AST10 message if *node* is an unsafe deserialization call, else None.
+
+    ``_DESERIALIZATION_SINKS`` are unconditionally unsafe. ``yaml.load``, ``torch.load``,
+    and ``numpy.load`` are argument-dependent: an explicit safe ``Loader``,
+    ``weights_only=True``, or the default ``allow_pickle=False`` respectively make them
+    safe and must not be flagged (avoids false positives on the hardened forms).
+    """
+    if call_name in _DESERIALIZATION_SINKS:
+        return f"Insecure deserialization: {call_name}()"
+    if call_name == "yaml.load":
+        for kw in node.keywords:
+            if kw.arg == "Loader":
+                if _loader_arg_name(kw.value) in _SAFE_YAML_LOADERS:
+                    return None
+                return "Insecure deserialization: yaml.load() with an unsafe Loader"
+        if len(node.args) >= 2 and _loader_arg_name(node.args[1]) in _SAFE_YAML_LOADERS:
+            return None
+        return "Insecure deserialization: yaml.load() without SafeLoader"
+    if call_name == "torch.load":
+        return (
+            None
+            if _kwarg_is_true(node, "weights_only")
+            else ("Insecure deserialization: torch.load() without weights_only=True")
+        )
+    if call_name == "numpy.load":
+        return (
+            "Insecure deserialization: numpy.load(allow_pickle=True)"
+            if _kwarg_is_true(node, "allow_pickle")
+            else None
+        )
+    return None
+
+
 def _analyze_python(content: str, file_path: str) -> list[AnalyzerFinding]:
     try:
         tree = ast.parse(content, filename=file_path)
@@ -223,6 +305,9 @@ def _emit(
             if attr in _OS_EXEC_CALLS:
                 _emit("AST5", lineno, end_lineno)
 
+        elif (deser_msg := _deserialization_message(call_name, ast_node)) is not None:
+            _emit("AST10", lineno, end_lineno, deser_msg)
+
         elif call_name == "getattr" and len(ast_node.args) >= 2:
             second_arg = ast_node.args[1]
             if not isinstance(second_arg, ast.Constant):

diff --git a/src/skillspector/nodes/analyzers/behavioral_taint_tracking.py b/src/skillspector/nodes/analyzers/behavioral_taint_tracking.py
@@ -134,7 +134,32 @@
     }
 )
 
-_ALL_SINKS = _NETWORK_OUTPUT_SINKS | _EXEC_SINKS | _FILE_WRITE_SINKS
+# Deserializers that reconstruct arbitrary objects / execute code on their input.
+# When untrusted data (network, user, or a bundled/downloaded file) reaches one of
+# these, it is an RCE-class flow — the deserialization analogue of _EXEC_SINKS.
+# Only unconditionally-unsafe names are listed; argument-dependent forms
+# (yaml.load / torch.load / numpy.load) are handled by behavioral_ast (AST10) where
+# keyword arguments can be inspected without false positives on the hardened forms.
+_DESERIALIZATION_SINKS = frozenset(
+    {
+        "pickle.load",
+        "pickle.loads",
+        "cPickle.load",
+        "cPickle.loads",
+        "_pickle.load",
+        "_pickle.loads",
+        "marshal.load",
+        "marshal.loads",
+        "dill.load",
+        "dill.loads",
+        "jsonpickle.decode",
+        "pandas.read_pickle",
+        "joblib.load",
+        "yaml.unsafe_load",
+    }
+)
+
+_ALL_SINKS = _NETWORK_OUTPUT_SINKS | _EXEC_SINKS | _FILE_WRITE_SINKS | _DESERIALIZATION_SINKS
 
 # Pre-computed for _pick_rule — avoids rebuilding the union on every call.
 _EXTERNAL_INPUT_SOURCES = _NETWORK_INPUT_SOURCES | _USER_INPUT_SOURCES
@@ -145,6 +170,7 @@
     "TT3": Severity.CRITICAL,
     "TT4": Severity.HIGH,
     "TT5": Severity.CRITICAL,
+    "TT6": Severity.HIGH,
 }
 
 _RULE_CONFIDENCES: dict[str, float] = {
@@ -153,6 +179,7 @@
     "TT3": 0.90,
     "TT4": 0.80,
     "TT5": 0.90,
+    "TT6": 0.85,
 }
 
 _TAG = "Data Flow"
@@ -168,6 +195,7 @@
     (_NETWORK_OUTPUT_SINKS, "network output"),
     (_EXEC_SINKS, "code execution"),
     (_FILE_WRITE_SINKS, "file write"),
+    (_DESERIALIZATION_SINKS, "deserialization"),
 ]
 
 
@@ -204,6 +232,10 @@ def _pick_rule(source_name: str, sink_name: str, is_direct: bool) -> str:
         return "TT4"
     if source_name in _EXTERNAL_INPUT_SOURCES and sink_name in _EXEC_SINKS:
         return "TT5"
+    if sink_name in _DESERIALIZATION_SINKS and (
+        source_name in _EXTERNAL_INPUT_SOURCES or source_name in _FILE_READ_SOURCES
+    ):
+        return "TT6"
     return "TT1" if is_direct else "TT2"
 
 

diff --git a/src/skillspector/nodes/analyzers/pattern_defaults.py b/src/skillspector/nodes/analyzers/pattern_defaults.py
@@ -41,6 +41,7 @@ class PatternCategory(StrEnum):
     AGENT_SNOOPING = "Agent Snooping"
     ANTI_REFUSAL = "Anti-Refusal"
     SERVER_SIDE_REQUEST_FORGERY = "Server-Side Request Forgery"
+    DESERIALIZATION = "Insecure Deserialization"
 
 
 # Pattern-specific explanations (why the finding is dangerous)
@@ -100,6 +101,7 @@ class PatternCategory(StrEnum):
     "TT3": "Credentials or environment variables flow to a network sink. This is a high-confidence indicator of credential exfiltration.",
     "TT4": "File contents flow to a network sink. This may indicate data exfiltration of sensitive files.",
     "TT5": "External input (network, user) flows to a code execution sink. This enables remote code execution or command injection.",
+    "TT6": "External input or file contents flow to an insecure deserializer (pickle, marshal, dill, jsonpickle, joblib, yaml.unsafe_load). Deserializing untrusted data reconstructs arbitrary objects and enables remote code execution.",
     # Behavioral AST (B.2.1)
     "AST1": "Direct exec() call allows arbitrary code execution. An attacker can inject code that runs with the full privileges of the process.",
     "AST2": "Direct eval() call evaluates arbitrary expressions. This can be exploited to execute malicious code or exfiltrate data.",
@@ -110,6 +112,7 @@ class PatternCategory(StrEnum):
     "AST7": "Dynamic getattr() with a non-literal attribute name can access arbitrary object attributes, potentially bypassing access controls.",
     "AST8": "A dangerous execution chain combines code execution (exec/eval) with a dynamic source (network, encoded data, dynamic import), creating a high-confidence attack vector.",
     "AST9": "Reflective access to an execution sink via getattr() with a constant name (e.g. getattr(os, 'system'), getattr(builtins, 'exec')) is functionally identical to a direct exec/os.system call but evades name-based detection. This is a deliberate evasion technique rather than idiomatic code.",
+    "AST10": "Untrusted data is passed to an insecure deserializer (pickle, marshal, dill, jsonpickle, joblib, yaml.load without a safe Loader, or torch.load without weights_only). These deserializers reconstruct arbitrary objects and invoke callables during loading, so deserializing attacker-controlled bytes is equivalent to arbitrary code execution.",
     # YARA (B.1.12)
     "YR1": "YARA rule matched a known malware signature (reverse shell, backdoor, ransomware, C2 framework, or info stealer).",
     "YR2": "YARA rule matched a known webshell pattern (PHP, Python, JSP, or ASPX webshell).",
@@ -137,6 +140,11 @@ class PatternCategory(StrEnum):
     "SSRF1": "Code accesses a cloud instance metadata endpoint (e.g. 169.254.169.254). A single request can return temporary IAM credentials, making this a high-value SSRF target for credential theft.",
     "SSRF2": "Code issues a request to a loopback, link-local, or private-range host. This can reach internal services not meant to be exposed and is a common SSRF pivot.",
     "SSRF3": "Request target host is built from a dynamic or untrusted value. If the host is attacker-influenced, this enables SSRF to arbitrary internal or metadata endpoints.",
+    # Insecure Deserialization (multi-language)
+    "DS1": "PHP unserialize() on untrusted input enables object injection: crafted serialized data instantiates arbitrary classes and triggers magic methods (__wakeup/__destruct), leading to POP-chain code execution.",
+    "DS2": "Ruby Marshal.load / Marshal.restore reconstructs arbitrary objects from a binary blob. On attacker-controlled input this is a well-known remote code execution vector.",
+    "DS3": "Ruby YAML.load / Psych.load / Oj.load (in object mode) can instantiate arbitrary Ruby objects from untrusted YAML/JSON, enabling code execution. Use YAML.safe_load or Psych.safe_load instead.",
+    "DS4": "JavaScript deserialization via node-serialize/funcster/serialize-to-js evaluates embedded functions (the _$$ND_FUNC$$_ marker), so unserializing attacker input executes arbitrary code in the Node process.",
 }
 
 # Rule ID -> category (for report output)
@@ -187,6 +195,7 @@ class PatternCategory(StrEnum):
     "TT3": PatternCategory.DATA_EXFILTRATION.value,
     "TT4": PatternCategory.DATA_EXFILTRATION.value,
     "TT5": PatternCategory.PRIVILEGE_ESCALATION.value,
+    "TT6": PatternCategory.DESERIALIZATION.value,
     # YARA (B.1.12)
     "YR1": PatternCategory.YARA_MATCH.value,
     "YR2": PatternCategory.YARA_MATCH.value,
@@ -214,6 +223,11 @@ class PatternCategory(StrEnum):
     "SSRF1": PatternCategory.SERVER_SIDE_REQUEST_FORGERY.value,
     "SSRF2": PatternCategory.SERVER_SIDE_REQUEST_FORGERY.value,
     "SSRF3": PatternCategory.SERVER_SIDE_REQUEST_FORGERY.value,
+    # Insecure Deserialization (multi-language)
+    "DS1": PatternCategory.DESERIALIZATION.value,
+    "DS2": PatternCategory.DESERIALIZATION.value,
+    "DS3": PatternCategory.DESERIALIZATION.value,
+    "DS4": PatternCategory.DESERIALIZATION.value,
 }
 
 # Rule ID -> pattern display name (for report output)
@@ -291,6 +305,14 @@ class PatternCategory(StrEnum):
     "SSRF1": "Cloud Metadata Access",
     "SSRF2": "Internal Network Request",
     "SSRF3": "Dynamic Request Target",
+    # Behavioral AST / Taint deserialization
+    "AST10": "Insecure Deserialization",
+    "TT6": "Untrusted Data to Deserializer Flow",
+    # Insecure Deserialization (multi-language)
+    "DS1": "PHP Object Injection",
+    "DS2": "Ruby Marshal Deserialization",
+    "DS3": "Unsafe Ruby YAML Deserialization",
+    "DS4": "Unsafe JavaScript Deserialization",
 }
 
 # Pattern-specific remediations (how to fix the issue)
@@ -354,12 +376,14 @@ class PatternCategory(StrEnum):
     "AST7": "Replace dynamic getattr() with explicit attribute access or a dictionary lookup with an allowlist of permitted attributes.",
     "AST8": "Remove the execution chain entirely. Never pass network data, decoded bytes, or dynamically imported code to exec()/eval(). Use structured data formats instead.",
     "AST9": "Call the function directly instead of reflectively (write exec(...) / os.system(...) explicitly), or remove it. If reflection is genuinely required, restrict it to an allowlist of safe attribute names that excludes execution sinks.",
+    "AST10": "Never deserialize untrusted input with pickle/marshal/dill/jsonpickle/joblib. Use a data-only format such as JSON. For YAML use yaml.safe_load; for PyTorch use torch.load(..., weights_only=True); for numpy avoid allow_pickle=True. If a binary format is unavoidable, verify an HMAC/signature over the bytes before loading.",
     # Behavioral Taint Tracking (B.2.2)
     "TT1": "Add validation or sanitization between the data source and sink. Never pass raw source data directly to a sink without checking its content.",
     "TT2": "Validate tainted variables before passing them to sinks. Use allowlists, type checks, or sanitization functions on data from external sources.",
     "TT3": "Never send credentials or environment variables over the network. Use secure credential stores and avoid transmitting secrets in request bodies or URLs.",
     "TT4": "Validate and filter file contents before sending over the network. Ensure sensitive files (credentials, configs) are never transmitted to external endpoints.",
     "TT5": "Never pass external input to exec(), eval(), os.system(), or subprocess without strict validation. Use allowlists and parameterized commands instead.",
+    "TT6": "Do not deserialize external input or bundled/downloaded files with pickle/marshal/dill/jsonpickle/joblib/yaml.unsafe_load. Use JSON or another data-only format, and verify integrity (HMAC/signature) before loading any binary blob.",
     # YARA (B.1.12)
     "YR1": "Remove the malware payload or compromised file entirely. Investigate how it entered the skill and audit all other artifacts for additional indicators of compromise.",
     "YR2": "Remove the webshell code immediately. Webshells provide unauthorized remote command execution. Audit the skill for additional backdoors or persistence mechanisms.",
@@ -387,6 +411,11 @@ class PatternCategory(StrEnum):
     "SSRF1": "Remove access to cloud metadata endpoints unless strictly required. If metadata is needed, restrict it (e.g. IMDSv2 with hop limit) and never expose returned credentials.",
     "SSRF2": "Avoid requests to loopback/link-local/private hosts from skill code. If internal access is intended, document it and validate the target against an allowlist.",
     "SSRF3": "Do not build request URLs from untrusted input. Validate the host against an allowlist and reject internal/metadata addresses before issuing the request.",
+    # Insecure Deserialization (multi-language)
+    "DS1": "Avoid unserialize() on untrusted PHP input. Use json_decode() for data, or restrict allowed classes via the second argument: unserialize($data, ['allowed_classes' => false]).",
+    "DS2": "Never call Marshal.load/Marshal.restore on untrusted data. Use JSON.parse for data exchange; Marshal is only safe for data you produced and trust.",
+    "DS3": "Replace YAML.load/Psych.load with YAML.safe_load (or Psych.safe_load) and pass an explicit permitted-classes allowlist. For Oj, avoid :object mode on untrusted input.",
+    "DS4": "Do not use node-serialize/funcster/serialize-to-js to deserialize untrusted input. Use JSON.parse for data, which never executes embedded code.",
 }