Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions lib/ex_saml/core/binding.ex
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ defmodule ExSaml.Core.Binding do
saml_response
|> :base64.decode()
|> :zlib.unzip()
|> to_charlist()
|> :binary.bin_to_list()

{xml, _} = :xmerl_scan.string(xml_data, namespace_conformant: true, allow_entities: false)
xml
Expand All @@ -77,11 +77,11 @@ defmodule ExSaml.Core.Binding do

xml_data =
try do
:zlib.unzip(data) |> to_charlist()
:zlib.unzip(data) |> :binary.bin_to_list()
rescue
_e -> to_charlist(data)
_e -> :binary.bin_to_list(data)
catch
_kind, _reason -> to_charlist(data)
_kind, _reason -> :binary.bin_to_list(data)
end

{xml, _} = :xmerl_scan.string(xml_data, namespace_conformant: true, allow_entities: false)
Expand Down
2 changes: 1 addition & 1 deletion lib/ex_saml/core/sp.ex
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ defmodule ExSaml.Core.Sp do
assertion_xml = block_decrypt(to_string(algorithm), symmetric_key, cipher_value)

{assertion, _} =
:xmerl_scan.string(to_charlist(assertion_xml),
:xmerl_scan.string(:binary.bin_to_list(assertion_xml),
namespace_conformant: true,
allow_entities: false
)
Expand Down
2 changes: 1 addition & 1 deletion lib/ex_saml/metadata.ex
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ defmodule ExSaml.Metadata do
# ---------------------------------------------------------------------------

defp parse(xml) do
charlist = String.to_charlist(xml)
charlist = :binary.bin_to_list(xml)

try do
{root, _rest} =
Expand Down
129 changes: 129 additions & 0 deletions test/ex_saml/core/binding_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,40 @@ defmodule ExSaml.Core.BindingTest do

Record.defrecord(:xmlText, Record.extract(:xmlText, from_lib: "xmerl/include/xmerl.hrl"))

Record.defrecord(
:xmlAttribute,
Record.extract(:xmlAttribute, from_lib: "xmerl/include/xmerl.hrl")
)

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

# Concatenates the text content of every direct xmlText child of `elem`
# (xmerl returns text as charlists of codepoints).
defp text_value(elem) do
elem
|> xmlElement(:content)
|> Enum.flat_map(fn
child when Record.is_record(child, :xmlElement) ->
child |> xmlElement(:content) |> Enum.flat_map(&extract_text/1)

other ->
extract_text(other)
end)
end

defp extract_text(node) when Record.is_record(node, :xmlText), do: xmlText(node, :value)
defp extract_text(_), do: []

defp attribute_value(elem, name) do
elem
|> xmlElement(:attributes)
|> Enum.find_value(fn attr ->
if xmlAttribute(attr, :name) == name, do: xmlAttribute(attr, :value)
end)
end

defp simple_request_element do
# Build a minimal <AuthnRequest/> xmerl element
xmlElement(
Expand Down Expand Up @@ -161,6 +191,105 @@ defmodule ExSaml.Core.BindingTest do
end
end

# ---------------------------------------------------------------------------
# decode_response/2 — UTF-8 character regression
#
# Reproduces a production crash where xmerl_scan rejected character 233 (é)
# because the XML byte stream had been pre-decoded into Unicode codepoints
# via `to_charlist/1` before being handed to xmerl. xmerl expects a list of
# raw UTF-8 bytes and does its own decoding. The fix passes the bytes via
# `:binary.bin_to_list/1`. These tests guard the regression across the full
# UTF-8 range (2-byte, 3-byte, 4-byte sequences) and across both decode
# paths (DEFLATE and plain base64), in text content and in attribute values.
# ---------------------------------------------------------------------------

describe "decode_response/2 — non-ASCII (UTF-8) regression" do
# {label, sample, expected_codepoints}
@utf8_samples [
# 2-byte UTF-8 — Latin-1 Supplement
{"french é (the prod case)", "Hélène", [?H, 233, ?l, 232, ?n, ?e]},
{"french é + space", "Émilie Côté", [?É, ?m, ?i, ?l, ?i, ?e, ?\s, ?C, ?ô, ?t, ?é]},
{"spanish ñ", "Núñez", [?N, ?ú, ?ñ, ?e, ?z]},
{"german ß", "Straße", [?S, ?t, ?r, ?a, ?ß, ?e]},
{"german umlaut ü", "Müller", [?M, ?ü, ?l, ?l, ?e, ?r]},
{"nordic ø", "Bjørn", [?B, ?j, ?ø, ?r, ?n]},
# 2-byte UTF-8 — Latin Extended
{"polish ł", "Łukasz", [?Ł, ?u, ?k, ?a, ?s, ?z]},
{"czech č", "Černý", [?Č, ?e, ?r, ?n, ?ý]},
# 3-byte UTF-8 — BMP
{"euro sign €", "10€", [?1, ?0, ?€]},
{"greek Ω", "ΩΑΘ", [?Ω, ?Α, ?Θ]},
{"cyrillic", "Иван", [?И, ?в, ?а, ?н]},
{"chinese", "中文", [?中, ?文]},
{"japanese", "日本語", [?日, ?本, ?語]},
# 4-byte UTF-8 — supplementary planes (emoji)
{"emoji 🎉", "party 🎉", [?p, ?a, ?r, ?t, ?y, ?\s, 0x1F389]}
]

for {label, sample, expected} <- @utf8_samples do
test "decodes #{label} in element text (non-deflate)" do
sample = unquote(sample)
expected = unquote(expected)

xml = "<Response><Name>" <> sample <> "</Name></Response>"
b64 = Base.encode64(xml)

decoded = Binding.decode_response("", b64)

assert Record.is_record(decoded, :xmlElement)
assert xmlElement(decoded, :name) == :Response
assert text_value(decoded) == expected
end

test "decodes #{label} in attribute value (non-deflate)" do
sample = unquote(sample)
# We escape only the chars that need escaping in attribute values.
attr = sample |> String.replace("\"", "&quot;") |> String.replace("&", "&amp;")
xml = "<Response label=\"" <> attr <> "\"><Name>x</Name></Response>"
b64 = Base.encode64(xml)

decoded = Binding.decode_response("", b64)

assert Record.is_record(decoded, :xmlElement)
assert attribute_value(decoded, :label) == unquote(expected)
end

test "decodes #{label} in element text (DEFLATE)" do
sample = unquote(sample)
expected = unquote(expected)

xml = "<Response><Name>" <> sample <> "</Name></Response>"
b64 = xml |> :zlib.zip() |> Base.encode64()

decoded =
Binding.decode_response(
"urn:oasis:names:tc:SAML:2.0:bindings:URL-Encoding:DEFLATE",
b64
)

assert Record.is_record(decoded, :xmlElement)
assert text_value(decoded) == expected
end
end

test "decodes a realistic SAMLResponse with multiple accented AttributeValues" do
xml =
~s(<samlp:Response xmlns:samlp="urn:oasis:names:tc:SAML:2.0:protocol" ) <>
~s(xmlns="urn:oasis:names:tc:SAML:2.0:assertion">) <>
"<Assertion><AttributeStatement>" <>
~s(<Attribute Name="given_name"><AttributeValue>Anaïs</AttributeValue></Attribute>) <>
~s(<Attribute Name="family_name"><AttributeValue>Lefèvre-Béchamp</AttributeValue></Attribute>) <>
~s(<Attribute Name="display"><AttributeValue>Müller — €10 🎉</AttributeValue></Attribute>) <>
"</AttributeStatement></Assertion></samlp:Response>"

b64 = Base.encode64(xml)
decoded = Binding.decode_response("", b64)

assert Record.is_record(decoded, :xmlElement)
assert xmlElement(decoded, :name) == :"samlp:Response"
end
end

# ---------------------------------------------------------------------------
# encode_http_post/4 with nonce
# ---------------------------------------------------------------------------
Expand Down
36 changes: 36 additions & 0 deletions test/ex_saml/metadata_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,42 @@ defmodule ExSaml.MetadataTest do
assert {:error, %ValidationResult{errors: [%{code: :invalid_xml}]}} =
Metadata.validate("")
end

# Regression: parse/1 used to call String.to_charlist on the raw UTF-8
# binary, producing a list of codepoints. xmerl_scan expects raw UTF-8
# bytes and crashed with {:wfc_Legal_Character, {:bad_character, _}} on
# any non-ASCII character (e.g. an Organization name with an accent).
for {label, sample} <- [
{"latin-1 supplement (é)", "Société Élysée SAS"},
{"german umlaut (ü/ß)", "Müller Straße GmbH"},
{"polish (Ł)", "Łukasz Sp. z o.o."},
{"euro sign (€)", "10€ Org"},
{"greek (Ω)", "ΩΑΘ Foundation"},
{"cyrillic", "Иван Lab"},
{"chinese", "中文公司"},
{"emoji (🎉)", "Party 🎉 Inc."}
] do
test "parses metadata containing #{label} without crashing" do
xml = """
<?xml version="1.0" encoding="UTF-8"?>
<md:EntityDescriptor xmlns:md="urn:oasis:names:tc:SAML:2.0:metadata"
entityID="https://sp.example.com/saml">
<md:SPSSODescriptor protocolSupportEnumeration="urn:oasis:names:tc:SAML:2.0:protocol">
<md:AssertionConsumerService index="0" isDefault="true"
Binding="urn:oasis:names:tc:SAML:2.0:bindings:HTTP-POST"
Location="https://sp.example.com/saml/acs"/>
</md:SPSSODescriptor>
<md:Organization>
<md:OrganizationName xml:lang="fr">#{unquote(sample)}</md:OrganizationName>
<md:OrganizationDisplayName xml:lang="fr">#{unquote(sample)}</md:OrganizationDisplayName>
<md:OrganizationURL xml:lang="fr">https://example.com</md:OrganizationURL>
</md:Organization>
</md:EntityDescriptor>
"""

assert {:ok, %ValidationResult{}} = Metadata.validate(xml)
end
end
end

describe "root element" do
Expand Down
Loading