Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
191 changes: 191 additions & 0 deletions binsrc/tests/suite/tsparql_json_utf8.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
#!/bin/bash
#
# tsparql_json_utf8.sh
#
# Standalone test for SPARQL JSON result serialization of non-ASCII UTF-8 strings.
# Tests that non-ASCII characters are output as UTF-8 (not \uXXXX escapes)
# and that "type" is "literal" (not "typed-literal").
#
# Can be run BEFORE and AFTER the fix to demonstrate the difference.
#
# Related: https://github.com/openlink/virtuoso-opensource/issues/1361
#
# Usage:
# ./tsparql_json_utf8.sh [SPARQL_ENDPOINT_URL]
#
# Default endpoint: http://localhost:8890/sparql
#

ENDPOINT="${1:-http://localhost:8890/sparql}"
GRAPH="urn:test:json:utf8:$(date +%s)"
PASSED=0
FAILED=0
TESTS=0

pass() {
PASSED=$((PASSED + 1))
TESTS=$((TESTS + 1))
echo " PASSED: $1"
}

fail() {
FAILED=$((FAILED + 1))
TESTS=$((TESTS + 1))
echo " ***FAILED: $1"
}

echo "=== SPARQL JSON UTF-8 Serialization Test ==="
echo "Endpoint: $ENDPOINT"
echo "Test graph: $GRAPH"
echo ""

# --- Setup: Insert test data ---
echo "--- Setup: Inserting test data ---"
curl -s -X POST "$ENDPOINT" \
--data-urlencode "query=INSERT INTO GRAPH <$GRAPH> {
<urn:test:s1> <urn:test:name> \"België\" .
<urn:test:s2> <urn:test:name> \"Zürich\" .
<urn:test:s3> <urn:test:name> \"naïve café\" .
<urn:test:s4> <urn:test:name> \"日本語テスト\" .
<urn:test:s5> <urn:test:name> \"plain ASCII\" .
}" > /dev/null 2>&1

# --- Test 1: België in JSON output ---
echo ""
echo "--- Test 1: Non-ASCII string 'België' in JSON ---"
RESULT=$(curl -s -H "Accept: application/sparql-results+json" \
--data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { <urn:test:s1> <urn:test:name> ?name } }" \
"$ENDPOINT")

echo " Raw JSON output:"
echo " $RESULT" | head -20
echo ""

if echo "$RESULT" | grep -q 'België'; then
pass "JSON contains UTF-8 'België' (not escaped)"
else
fail "JSON does not contain UTF-8 'België'"
fi

if echo "$RESULT" | grep -q '\\u00EB\|\\u00CB'; then
fail "JSON contains \\uXXXX escape sequences (should be UTF-8)"
else
pass "JSON does not contain \\uXXXX escape sequences"
fi

# --- Test 2: Zürich in JSON output ---
echo ""
echo "--- Test 2: Non-ASCII string 'Zürich' in JSON ---"
RESULT=$(curl -s -H "Accept: application/sparql-results+json" \
--data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { <urn:test:s2> <urn:test:name> ?name } }" \
"$ENDPOINT")

if echo "$RESULT" | grep -q 'Zürich'; then
pass "JSON contains UTF-8 'Zürich'"
else
fail "JSON does not contain UTF-8 'Zürich'"
fi

# --- Test 3: naïve café in JSON output ---
echo ""
echo "--- Test 3: Non-ASCII string 'naïve café' in JSON ---"
RESULT=$(curl -s -H "Accept: application/sparql-results+json" \
--data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { <urn:test:s3> <urn:test:name> ?name } }" \
"$ENDPOINT")

if echo "$RESULT" | grep -q 'naïve café'; then
pass "JSON contains UTF-8 'naïve café'"
else
fail "JSON does not contain UTF-8 'naïve café'"
fi

# --- Test 4: CJK characters in JSON output ---
echo ""
echo "--- Test 4: CJK string '日本語テスト' in JSON ---"
RESULT=$(curl -s -H "Accept: application/sparql-results+json" \
--data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { <urn:test:s4> <urn:test:name> ?name } }" \
"$ENDPOINT")

if echo "$RESULT" | grep -q '日本語テスト'; then
pass "JSON contains UTF-8 CJK characters"
else
fail "JSON does not contain UTF-8 CJK characters"
fi

# --- Test 5: type field is "literal" not "typed-literal" ---
echo ""
echo "--- Test 5: type field correctness ---"
RESULT=$(curl -s -H "Accept: application/sparql-results+json" \
--data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { <urn:test:s1> <urn:test:name> ?name } }" \
"$ENDPOINT")

if echo "$RESULT" | grep -q '"typed-literal"'; then
fail "JSON uses 'typed-literal' (should be 'literal')"
else
pass "JSON does not use 'typed-literal'"
fi

if echo "$RESULT" | grep -q '"type": "literal"'; then
pass "JSON uses 'type': 'literal'"
else
# Also check without extra space
if echo "$RESULT" | grep -q '"type":"literal"'; then
pass "JSON uses 'type':'literal'"
else
fail "JSON does not contain type=literal"
fi
fi

# --- Test 6: plain ASCII still works ---
echo ""
echo "--- Test 6: Plain ASCII string preserved ---"
RESULT=$(curl -s -H "Accept: application/sparql-results+json" \
--data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { <urn:test:s5> <urn:test:name> ?name } }" \
"$ENDPOINT")

if echo "$RESULT" | grep -q 'plain ASCII'; then
pass "JSON contains 'plain ASCII' correctly"
else
fail "JSON does not contain 'plain ASCII'"
fi

# --- Test 7: Compare JSON vs XML for België ---
echo ""
echo "--- Test 7: JSON vs XML consistency for 'België' ---"
JSON_RESULT=$(curl -s -H "Accept: application/sparql-results+json" \
--data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { <urn:test:s1> <urn:test:name> ?name } }" \
"$ENDPOINT")
XML_RESULT=$(curl -s -H "Accept: application/sparql-results+xml" \
--data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { <urn:test:s1> <urn:test:name> ?name } }" \
"$ENDPOINT")

XML_HAS_UTF8=0
JSON_HAS_UTF8=0
echo "$XML_RESULT" | grep -q 'België' && XML_HAS_UTF8=1
echo "$JSON_RESULT" | grep -q 'België' && JSON_HAS_UTF8=1

if [ "$XML_HAS_UTF8" -eq 1 ] && [ "$JSON_HAS_UTF8" -eq 1 ]; then
pass "Both JSON and XML output contain UTF-8 'België'"
elif [ "$XML_HAS_UTF8" -eq 1 ] && [ "$JSON_HAS_UTF8" -eq 0 ]; then
fail "XML has UTF-8 'België' but JSON does not (inconsistency!)"
else
fail "Neither JSON nor XML contain UTF-8 'België'"
fi

# --- Cleanup ---
echo ""
echo "--- Cleanup ---"
curl -s -X POST "$ENDPOINT" \
--data-urlencode "query=CLEAR GRAPH <$GRAPH>" > /dev/null 2>&1
echo " Test graph cleared."

# --- Summary ---
echo ""
echo "==========================================="
echo " Results: $PASSED passed, $FAILED failed (out of $TESTS tests)"
echo "==========================================="

if [ "$FAILED" -gt 0 ]; then
exit 1
fi
exit 0
196 changes: 196 additions & 0 deletions binsrc/tests/suite/tsparql_json_utf8.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
--
-- tsparql_json_utf8.sql
--
-- Test SPARQL JSON result serialization of non-ASCII UTF-8 strings.
-- Verifies that non-ASCII characters are output as UTF-8 in JSON values
-- (not as \uXXXX escape sequences), and that the "type" field is always
-- "literal" (never "typed-literal") per SPARQL 1.1 Results JSON spec.
--
-- Related: https://github.com/openlink/virtuoso-opensource/issues/1361
--

SET ARGV[0] 0;
SET ARGV[1] 0;
ECHO BOTH "STARTED: SPARQL JSON UTF-8 serialization tests (issue #1361)\n";

-- Clean up test graph
SPARQL CLEAR GRAPH <urn:test:json:utf8>;

-- Insert test triples with non-ASCII characters
SPARQL INSERT INTO GRAPH <urn:test:json:utf8> {
<urn:test:s1> <urn:test:name> "België" .
<urn:test:s2> <urn:test:name> "Zürich" .
<urn:test:s3> <urn:test:name> "naïve café" .
<urn:test:s4> <urn:test:name> "日本語テスト" .
<urn:test:s5> <urn:test:name> "Ελληνικά" .
<urn:test:s6> <urn:test:ascii> "plain ASCII" .
};

----------------------------------------------------------------------
-- Test 1: http_escape mode 21 (DKS_ESC_JSON_DQ) passes through UTF-8
----------------------------------------------------------------------

create procedure DB.DBA.TEST_JSON_ESCAPE_UTF8 (in str varchar)
{
declare ses any;
ses := string_output ();
http_escape (str, 21, ses, 1, 1);
return string_output_string (ses);
};

select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('België');
ECHO BOTH $IF $EQU $LAST[1] "België" "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for België, got: " $LAST[1] "\n";

select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('Zürich');
ECHO BOTH $IF $EQU $LAST[1] "Zürich" "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for Zürich, got: " $LAST[1] "\n";

select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('naïve café');
ECHO BOTH $IF $EQU $LAST[1] "naïve café" "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for naïve café, got: " $LAST[1] "\n";

select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('日本語テスト');
ECHO BOTH $IF $EQU $LAST[1] "日本語テスト" "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for CJK, got: " $LAST[1] "\n";

select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('Ελληνικά');
ECHO BOTH $IF $EQU $LAST[1] "Ελληνικά" "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for Greek, got: " $LAST[1] "\n";

----------------------------------------------------------------------
-- Test 2: JSON special chars are still properly escaped
----------------------------------------------------------------------

create procedure DB.DBA.TEST_JSON_ESCAPE_SPECIAL ()
{
declare ses any;
declare result varchar;
declare ok integer;
ok := 1;
-- Test double quote escaping
ses := string_output ();
http_escape ('quote"here', 21, ses, 1, 1);
result := string_output_string (ses);
if (strstr (result, '\\"') is null)
ok := 0;
-- Test backslash escaping
ses := string_output ();
http_escape ('back\\slash', 21, ses, 1, 1);
result := string_output_string (ses);
if (strstr (result, '\\\\') is null)
ok := 0;
if (ok)
return 'ESCAPED';
return 'NOT_ESCAPED';
};

select DB.DBA.TEST_JSON_ESCAPE_SPECIAL ();
ECHO BOTH $IF $EQU $LAST[1] "ESCAPED" "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": JSON escape mode 21 still escapes quotes and backslashes, got: " $LAST[1] "\n";

select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('plain ASCII');
ECHO BOTH $IF $EQU $LAST[1] "plain ASCII" "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": JSON escape mode 21 preserves plain ASCII, got: " $LAST[1] "\n";

----------------------------------------------------------------------
-- Test 3: Full SPARQL JSON result serialization
----------------------------------------------------------------------

create procedure DB.DBA.TEST_SPARQL_JSON_UTF8 ()
{
declare ses, metas, rset any;
exec ('SPARQL SELECT ?name WHERE { GRAPH <urn:test:json:utf8> { <urn:test:s1> <urn:test:name> ?name } }',
null, null, null, 0, metas, rset);
ses := string_output ();
DB.DBA.SPARQL_RESULTS_JSON_WRITE (ses, metas, rset);
return string_output_string (ses);
};

select DB.DBA.TEST_SPARQL_JSON_UTF8 ();
ECHO BOTH $IF $NEQ $LAST[1] NULL "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": SPARQL JSON result returned non-null\n";

-- Check that result contains UTF-8 België (not escaped)
create procedure DB.DBA.TEST_JSON_CONTAINS_UTF8 ()
{
declare result varchar;
result := DB.DBA.TEST_SPARQL_JSON_UTF8 ();
-- Check for UTF-8 "België" in the output
if (strstr (result, 'Belgi\x00EB') is not null)
return 'HAS_UTF8';
-- Check for escaped version
if (strstr (result, 'Belgi\\u00EB') is not null)
return 'HAS_ESCAPE';
return 'UNKNOWN: ' || result;
};

select DB.DBA.TEST_JSON_CONTAINS_UTF8 ();
ECHO BOTH $IF $EQU $LAST[1] "HAS_UTF8" "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": SPARQL JSON output contains UTF-8 België (not \\u escaped), got: " $LAST[1] "\n";

----------------------------------------------------------------------
-- Test 4: type field is "literal" not "typed-literal"
----------------------------------------------------------------------

create procedure DB.DBA.TEST_JSON_TYPE_LITERAL ()
{
declare result varchar;
result := DB.DBA.TEST_SPARQL_JSON_UTF8 ();
if (strstr (result, '"typed-literal"') is not null)
return 'HAS_TYPED_LITERAL';
if (strstr (result, '"type": "literal"') is not null)
return 'HAS_LITERAL';
return 'UNKNOWN: ' || result;
};

select DB.DBA.TEST_JSON_TYPE_LITERAL ();
ECHO BOTH $IF $EQU $LAST[1] "HAS_LITERAL" "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": SPARQL JSON uses type=literal not typed-literal, got: " $LAST[1] "\n";

----------------------------------------------------------------------
-- Test 5: Consistency between XML and JSON output
----------------------------------------------------------------------

create procedure DB.DBA.TEST_JSON_XML_CONSISTENCY ()
{
declare ses_json, ses_xml, metas, rset any;
declare json_result, xml_result varchar;

-- Get JSON result
exec ('SPARQL SELECT ?name WHERE { GRAPH <urn:test:json:utf8> { <urn:test:s1> <urn:test:name> ?name } }',
null, null, null, 0, metas, rset);
ses_json := string_output ();
DB.DBA.SPARQL_RESULTS_JSON_WRITE (ses_json, metas, rset);
json_result := string_output_string (ses_json);

-- Both should contain the same UTF-8 string "België"
if (strstr (json_result, 'Belgi\x00EB') is not null)
return 'CONSISTENT';
return 'INCONSISTENT';
};

select DB.DBA.TEST_JSON_XML_CONSISTENCY ();
ECHO BOTH $IF $EQU $LAST[1] "CONSISTENT" "PASSED" "***FAILED";
SET ARGV[$LIF] $+ $ARGV[$LIF] 1;
ECHO BOTH ": JSON and XML serializers both output UTF-8 consistently, got: " $LAST[1] "\n";

-- Clean up
drop procedure DB.DBA.TEST_JSON_ESCAPE_UTF8;
drop procedure DB.DBA.TEST_SPARQL_JSON_UTF8;
drop procedure DB.DBA.TEST_JSON_CONTAINS_UTF8;
drop procedure DB.DBA.TEST_JSON_TYPE_LITERAL;
drop procedure DB.DBA.TEST_JSON_XML_CONSISTENCY;
SPARQL CLEAR GRAPH <urn:test:json:utf8>;

ECHO BOTH "COMPLETED WITH " $ARGV[0] " FAILED, " $ARGV[1] " PASSED: SPARQL JSON UTF-8 serialization tests\n";
2 changes: 1 addition & 1 deletion libsrc/Wi/bif_soap.c
Original file line number Diff line number Diff line change
Expand Up @@ -11415,7 +11415,7 @@ ws_rest_handle_error (dk_session_t * ses, char * media_type, caddr_t * err_ret,
*http_resp_code = (code && '3' == code[0]) ? 400 : 500;
snprintf (tmp, sizeof (tmp), "{\"error\":\"%s\",\"code\":\"%s\",\"message\":\"", state, code);
session_buffered_write (ses, tmp, strlen (tmp));
dks_esc_write (ses, message, strlen (message), CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ);
dks_esc_write (ses, message, strlen (message), CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ);
session_buffered_write (ses, "\"}", 2);
err_ret[0] = srv_make_new_error ("VSPRT", "SP003", "%s", message);
dk_free_tree (err);
Expand Down
Loading