From 4e1cb44c6680aab00c7ab8a019a690c53ec830c3 Mon Sep 17 00:00:00 2001 From: Boris De Vloed Date: Mon, 23 Feb 2026 09:57:43 +0100 Subject: [PATCH 1/3] fix Inconsistent string typing in SPARQL JSON results Fixes #1361 --- libsrc/Wi/bif_soap.c | 2 +- libsrc/Wi/dks_esc.c | 48 ++++++++++++++++++++--------------------- libsrc/Wi/http.h | 3 ++- libsrc/Wi/rdfbox.c | 38 ++++++++++++++++---------------- libsrc/Wi/sparql.sql | 14 ++++++------ libsrc/Wi/sparql_io.sql | 32 +++++++++++++-------------- libsrc/Wi/system2.sql | 6 +++--- 7 files changed, 72 insertions(+), 71 deletions(-) diff --git a/libsrc/Wi/bif_soap.c b/libsrc/Wi/bif_soap.c index d5b6d78fe7..2a834cce96 100644 --- a/libsrc/Wi/bif_soap.c +++ b/libsrc/Wi/bif_soap.c @@ -11415,7 +11415,7 @@ ws_rest_handle_error (dk_session_t * ses, char * media_type, caddr_t * err_ret, *http_resp_code = (code && '3' == code[0]) ? 400 : 500; snprintf (tmp, sizeof (tmp), "{\"error\":\"%s\",\"code\":\"%s\",\"message\":\"", state, code); session_buffered_write (ses, tmp, strlen (tmp)); - dks_esc_write (ses, message, strlen (message), CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, message, strlen (message), CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); session_buffered_write (ses, "\"}", 2); err_ret[0] = srv_make_new_error ("VSPRT", "SP003", "%s", message); dk_free_tree (err); diff --git a/libsrc/Wi/dks_esc.c b/libsrc/Wi/dks_esc.c index 38b8377a9e..f0ebd1f4ae 100644 --- a/libsrc/Wi/dks_esc.c +++ b/libsrc/Wi/dks_esc.c @@ -86,30 +86,30 @@ unsigned char dks_esc_char_props[0x100] = { dks_charclass_props_t dks_charclasses['R'+1-'>'] = { -/* |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 |16 |17 |18 |19 ,20 */ -/* |0x00 |0x01 |0x02 |0x03 |0x04 |0x05 |0x06 |0x07 |0x08 |0x09 |0x0A |0x0B |0x0C |0x0D |0x0E |0x0F |0x10 |0x11 |0x12 |0x13 ,0x14 */ -/* |NONE |PTEXT |SQATTR |DQATTR |COMMENT|CDATA |URI |DAV |URI_R |URI_NR |TTL_SQ |TTL_DQ |TTLIRI |JS_SQ |JS_DQ |hTTL_SQ|hTTL_DQ|hTTLIRI|JAVA_SQ|JAVA_DQ,QNAME11*/ -/* > wide */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,PCT ,PCT ,PCT ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU4 ,BSLAU4 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,0 }, -/* ? enc.miss*/ {BAD ,LATTICE,LATTICE,LATTICE,LATTICE,CDATA2 ,PCT ,PCT ,PCT ,PCT ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU4 ,BSLAU4 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,0 }, -/* @ letters */ {0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, -/* A 8-bit */ {0 ,0 ,0 ,0 ,0 ,0 ,0 ,PCT ,PCT ,PCT ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU4 ,BSLAU4 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,0 }, -/* B < 0x20 */ {BAD ,LATTICE,LATTICE,LATTICE,0 ,0 ,PCT ,0 ,PCT ,PCT ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU4 ,BSLAU4 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BAD }, -/* C ! */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,PCT ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, -/* D 0x09 */ {0 ,0 ,LATTICE,LATTICE,0 ,0 ,PCT ,0 ,PCT ,PCT ,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BAD }, -/* E 0x0A */ {0 ,0 ,LATTICE,LATTICE,0 ,0 ,PCT ,0 ,PCT ,PCT ,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BAD }, -/* F 0x0D */ {0 ,SOAPCR ,LATTICE,LATTICE,0 ,0 ,PCT ,0 ,PCT ,PCT ,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BAD }, -/* G " */ {0 ,QUOT ,0 ,QUOT ,0 ,0 ,PCT ,PCT ,PCT ,PCT ,0 ,BSLASHC,BSLAU8 ,0 ,BSLASHC,QUOT ,BSLASHC,BSLAU8 ,0 ,BSLASHC,BAD }, -/* H & */ {0 ,AMP ,AMPATTR,AMPATTR,0 ,0 ,PCT ,PCT ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,AMP ,AMP ,AMP ,0 ,0 ,0 }, -/* I ' */ {0 ,LATTICE,LATTICE,0 ,0 ,0 ,PCT ,0 ,0 ,PCT ,BSLASHC,0 ,BSLAU8 ,BSLASHC,0 ,BSLASHC,LATTICE,BSLAU8 ,BSLASHC,0 ,0 }, -/* J 0x20 */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,PCT ,PCT ,PCT ,0 ,0 ,BSLAU8 ,0 ,0 ,0 ,0 ,BSLAU8 ,0 ,0 ,0 }, -/* K < */ {0 ,LT ,LTATTR ,LTATTR ,0 ,0 ,PCT ,PCT ,PCT ,PCT ,0 ,0 ,BSLAU8 ,0 ,0 ,LT ,LT ,BSLAU8 ,0 ,0 ,BAD }, -/* L > */ {0 ,GT ,GTATTR ,GTATTR ,COMMENT,CDATA ,PCT ,PCT ,PCT ,PCT ,0 ,0 ,BSLAU8 ,0 ,0 ,GT ,GT ,BSLAU8 ,0 ,0 ,BAD }, -/* M % */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,0 ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, -/* N / */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, -/* O * */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, -/* P punct-! */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, -/* Q \ */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,0 ,0 ,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BAD }, -/* R |, 0x7f */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,PCT ,PCT ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 } }; +/* |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 |16 |17 |18 |19 |20 ,21 */ +/* |0x00 |0x01 |0x02 |0x03 |0x04 |0x05 |0x06 |0x07 |0x08 |0x09 |0x0A |0x0B |0x0C |0x0D |0x0E |0x0F |0x10 |0x11 |0x12 |0x13 |0x14 ,0x15 */ +/* |NONE |PTEXT |SQATTR |DQATTR |COMMENT|CDATA |URI |DAV |URI_R |URI_NR |TTL_SQ |TTL_DQ |TTLIRI |JS_SQ |JS_DQ |hTTL_SQ|hTTL_DQ|hTTLIRI|JAVA_SQ|JAVA_DQ|QNAME11,JSON_DQ*/ +/* > wide */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,PCT ,PCT ,PCT ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU4 ,BSLAU4 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,0 ,0 }, +/* ? enc.miss*/ {BAD ,LATTICE,LATTICE,LATTICE,LATTICE,CDATA2 ,PCT ,PCT ,PCT ,PCT ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU4 ,BSLAU4 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,0 ,BSLAU4 }, +/* @ letters */ {0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, +/* A 8-bit */ {0 ,0 ,0 ,0 ,0 ,0 ,0 ,PCT ,PCT ,PCT ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU4 ,BSLAU4 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,0 ,0 }, +/* B < 0x20 */ {BAD ,LATTICE,LATTICE,LATTICE,0 ,0 ,PCT ,0 ,PCT ,PCT ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU4 ,BSLAU4 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BSLAU8 ,BAD ,BSLAU4 }, +/* C ! */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,PCT ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, +/* D 0x09 */ {0 ,0 ,LATTICE,LATTICE,0 ,0 ,PCT ,0 ,PCT ,PCT ,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BAD ,BSLASHC}, +/* E 0x0A */ {0 ,0 ,LATTICE,LATTICE,0 ,0 ,PCT ,0 ,PCT ,PCT ,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BAD ,BSLASHC}, +/* F 0x0D */ {0 ,SOAPCR ,LATTICE,LATTICE,0 ,0 ,PCT ,0 ,PCT ,PCT ,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BAD ,BSLASHC}, +/* G " */ {0 ,QUOT ,0 ,QUOT ,0 ,0 ,PCT ,PCT ,PCT ,PCT ,0 ,BSLASHC,BSLAU8 ,0 ,BSLASHC,QUOT ,BSLASHC,BSLAU8 ,0 ,BSLASHC,BAD ,BSLASHC}, +/* H & */ {0 ,AMP ,AMPATTR,AMPATTR,0 ,0 ,PCT ,PCT ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,AMP ,AMP ,AMP ,0 ,0 ,0 ,0 }, +/* I ' */ {0 ,LATTICE,LATTICE,0 ,0 ,0 ,PCT ,0 ,0 ,PCT ,BSLASHC,0 ,BSLAU8 ,BSLASHC,0 ,BSLASHC,LATTICE,BSLAU8 ,BSLASHC,0 ,0 ,0 }, +/* J 0x20 */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,PCT ,PCT ,PCT ,0 ,0 ,BSLAU8 ,0 ,0 ,0 ,0 ,BSLAU8 ,0 ,0 ,0 ,0 }, +/* K < */ {0 ,LT ,LTATTR ,LTATTR ,0 ,0 ,PCT ,PCT ,PCT ,PCT ,0 ,0 ,BSLAU8 ,0 ,0 ,LT ,LT ,BSLAU8 ,0 ,0 ,BAD ,0 }, +/* L > */ {0 ,GT ,GTATTR ,GTATTR ,COMMENT,CDATA ,PCT ,PCT ,PCT ,PCT ,0 ,0 ,BSLAU8 ,0 ,0 ,GT ,GT ,BSLAU8 ,0 ,0 ,BAD ,0 }, +/* M % */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,0 ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, +/* N / */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, +/* O * */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, +/* P punct-! */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 }, +/* Q \ */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,0 ,0 ,0 ,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BSLASHC,BSLASHC,BSLAU8 ,BSLASHC,BSLASHC,BAD ,BSLASHC}, +/* R |, 0x7f */ {0 ,0 ,0 ,0 ,0 ,0 ,PCT ,PCT ,PCT ,PCT ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 ,0 } }; unsigned char dks_esc_bslashc[0x80] = { /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ diff --git a/libsrc/Wi/http.h b/libsrc/Wi/http.h index 424aae68c9..6c77614134 100644 --- a/libsrc/Wi/http.h +++ b/libsrc/Wi/http.h @@ -359,7 +359,8 @@ the order of columns in dks_charclasses, file dks_esc.c */ #define DKS_ESC_JAVA_SQ 0x12 /*! 18 */ #define DKS_ESC_JAVA_DQ 0x13 /*! 19 */ #define DKS_ESC_QNAME_11 0x14 /*! 20 */ -#define COUNTOF__DKS_ESC 0x15 /*! 21 */ +#define DKS_ESC_JSON_DQ 0x15 /*! 21 - like JSWRITE_DQ but outputs non-ASCII as UTF-8 instead of \\uXXXX */ +#define COUNTOF__DKS_ESC 0x16 /*! 22 */ #define DKS_ESC_COMPAT_HTML 0x100 #define DKS_ESC_COMPAT_SOAP 0x200 diff --git a/libsrc/Wi/rdfbox.c b/libsrc/Wi/rdfbox.c index 8c822d80c2..e871fb5436 100644 --- a/libsrc/Wi/rdfbox.c +++ b/libsrc/Wi/rdfbox.c @@ -4699,14 +4699,14 @@ http_talis_json_write_ref_obj (dk_session_t *ses, caddr_t obj_iri, int obj_is_bn if (obj_is_bnode) /* 0 1 2 3 */ { /* 01.23456.7890.123456.789.012345.6789.01 */ session_buffered_write (ses, "{ \"type\" : \"bnode\", \"value\" : \"", 31); - dks_esc_write (ses, obj_iri, box_length (obj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, obj_iri, box_length (obj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); /* .0123 */ session_buffered_write (ses, "\" }", 3); } else /* 0 1 2 */ { /* 01.23456.7890.1234.567.890123.4567.89 */ session_buffered_write (ses, "{ \"type\" : \"uri\", \"value\" : \"", 29); - dks_esc_write (ses, obj_iri, box_length (obj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, obj_iri, box_length (obj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); /* .0123 */ session_buffered_write (ses, "\" }", 3); } @@ -4763,12 +4763,12 @@ http_talis_json_write_literal_obj (dk_session_t *ses, query_instance_t *qi, cadd } case DV_STRING: session_buffered_write_char ('\"', ses); - dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', ses); break; case DV_WIDE: session_buffered_write_char ('\"', ses); - dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - sizeof (wchar_t), CHARSET_UTF8, CHARSET_WIDE, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - sizeof (wchar_t), CHARSET_UTF8, CHARSET_WIDE, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', ses); break; case DV_XML_ENTITY: @@ -4804,7 +4804,7 @@ http_talis_json_write_literal_obj (dk_session_t *ses, query_instance_t *qi, cadd if (NULL != lang_id) /* just in case if lang cannot be found, may be signal an error ? */ { /* 012.34567.8901.23 */ session_buffered_write (ses, " , \"lang\" : \"", 13); - dks_esc_write (ses, lang_id, box_length (lang_id) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, lang_id, box_length (lang_id) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', ses); dk_free_box (lang_id); } @@ -4816,7 +4816,7 @@ http_talis_json_write_literal_obj (dk_session_t *ses, query_instance_t *qi, cadd sqlr_new_error ("22023", "SR625", "Unsupported datatype %d in TALIS-style JSON serialization of an RDF object", obj_dtp); /* 012.345678901.2345.67 */ session_buffered_write (ses, " , \"datatype\" : \"", 17); - dks_esc_write (ses, type_uri, box_length (type_uri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, type_uri, box_length (type_uri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', ses); dk_free_box (type_uri); } @@ -4872,7 +4872,7 @@ bif_http_talis_json_triple (caddr_t * qst, caddr_t * err_ret, state_slot_t ** ar dk_free_tree (env->tje_prev_pred); env->tje_prev_pred = NULL; } session_buffered_write_char ('\"', ses); - dks_esc_write (ses, subj_iri, box_length (subj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, subj_iri, box_length (subj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); /* .0123456 */ session_buffered_write (ses, "\" : { ", 6); env->tje_prev_subj = subj_iri_is_new ? subj_iri : box_copy (subj_iri); subj_iri_is_new = 0; @@ -4885,7 +4885,7 @@ bif_http_talis_json_triple (caddr_t * qst, caddr_t * err_ret, state_slot_t ** ar dk_free_tree (env->tje_prev_pred); env->tje_prev_pred = NULL; } session_buffered_write_char ('\"', ses); - dks_esc_write (ses, pred_iri, box_length (pred_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, pred_iri, box_length (pred_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); /* .0123456 */ session_buffered_write (ses, "\" : [ ", 6); env->tje_prev_pred = pred_iri_is_new ? pred_iri : box_copy (pred_iri); pred_iri_is_new = 0; @@ -4998,12 +4998,12 @@ http_ld_json_write_literal_obj (dk_session_t *ses, query_instance_t *qi, caddr_t { case DV_STRING: session_buffered_write_char ('\"', ses); - dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', ses); return; case DV_WIDE: session_buffered_write_char ('\"', ses); - dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - sizeof (wchar_t), CHARSET_UTF8, CHARSET_WIDE, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - sizeof (wchar_t), CHARSET_UTF8, CHARSET_WIDE, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', ses); return; case DV_LONG_INT: @@ -5043,12 +5043,12 @@ http_ld_json_write_literal_obj (dk_session_t *ses, query_instance_t *qi, caddr_t } case DV_STRING: session_buffered_write_char ('\"', ses); - dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', ses); break; case DV_WIDE: session_buffered_write_char ('\"', ses); - dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - sizeof (wchar_t), CHARSET_UTF8, CHARSET_WIDE, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, obj_box_value, box_length (obj_box_value) - sizeof (wchar_t), CHARSET_UTF8, CHARSET_WIDE, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', ses); break; case DV_XML_ENTITY: @@ -5085,7 +5085,7 @@ http_ld_json_write_literal_obj (dk_session_t *ses, query_instance_t *qi, caddr_t /* 0 1 */ { /* 012.3456789012.3456.78 */ session_buffered_write (ses, " , \"@language\" : \"", 18); - dks_esc_write (ses, lang_id, box_length (lang_id) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, lang_id, box_length (lang_id) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', ses); dk_free_box (lang_id); } @@ -5098,7 +5098,7 @@ http_ld_json_write_literal_obj (dk_session_t *ses, query_instance_t *qi, caddr_t /* 0 1 */ /* 012.345678.9012.34 */ session_buffered_write (ses, " , \"@type\" : \"", 14); - dks_esc_write (ses, type_uri, box_length (type_uri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (ses, type_uri, box_length (type_uri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', ses); dk_free_box (type_uri); } @@ -5221,7 +5221,7 @@ bif_http_ld_json_triple_impl (ld_json_env_t *env, ld_json_env2_t *e2, caddr_t su } /* 01.2345.678.90 */ session_buffered_write (e2->ses, "{ \"@id\": \"", 10); - dks_esc_write (e2->ses, subj_iri, box_length (subj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (e2->ses, subj_iri, box_length (subj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); /* .01.23 */ session_buffered_write (e2->ses, "\",\n", 3); TAB_WS_INDENT (e2->ses, e2->nesting_level + 2); @@ -5249,9 +5249,9 @@ bif_http_ld_json_triple_impl (ld_json_env_t *env, ld_json_env2_t *e2, caddr_t su if (pred_is_type) session_buffered_write (e2->ses, "@type", 5); else if (pred_iri_or_id != p_shorthand) - dks_esc_write (e2->ses, p_shorthand, box_length (p_shorthand) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (e2->ses, p_shorthand, box_length (p_shorthand) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); else - dks_esc_write (e2->ses, pred_iri, box_length (pred_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (e2->ses, pred_iri, box_length (pred_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); if (e2->obj_is_single) { /* .0123 */ @@ -5309,13 +5309,13 @@ bif_http_ld_json_triple_impl (ld_json_env_t *env, ld_json_env2_t *e2, caddr_t su else if (pred_is_type || (pred_iri_or_id != p_shorthand)) /* Fix for 17108: values of @type should be printed without { "@id" : ... } enclosing */ { session_buffered_write_char ('\"', e2->ses); - dks_esc_write (e2->ses, obj_iri, box_length (obj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (e2->ses, obj_iri, box_length (obj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); session_buffered_write_char ('\"', e2->ses); } else { /* 01 2345 678 90*/ session_buffered_write (e2->ses, "{ \"@id\": \"", 10); - dks_esc_write (e2->ses, obj_iri, box_length (obj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSWRITE_DQ); + dks_esc_write (e2->ses, obj_iri, box_length (obj_iri) - 1, CHARSET_UTF8, CHARSET_UTF8, DKS_ESC_JSON_DQ); session_buffered_write (e2->ses, "\"}", 2); } } diff --git a/libsrc/Wi/sparql.sql b/libsrc/Wi/sparql.sql index 3e397f40c5..e56bd2d706 100644 --- a/libsrc/Wi/sparql.sql +++ b/libsrc/Wi/sparql.sql @@ -5746,7 +5746,7 @@ create procedure DB.DBA.RDF_TRIPLES_TO_JSON_MICRODATA (inout triples any, inout subj_iri := id_to_iri (subj); if (starts_with (subj_iri, 'nodeID://')) subj_iri := '_:' || subseq (subj_iri, 9); - http ('\n { "id" : "', ses); http_escape (subj_iri, 14, ses, 1, 1); http ('",\n "properties" : {', ses); + http ('\n { "id" : "', ses); http_escape (subj_iri, 21, ses, 1, 1); http ('",\n "properties" : {', ses); prev_subj := subj; prev_pred := null; } @@ -5755,7 +5755,7 @@ create procedure DB.DBA.RDF_TRIPLES_TO_JSON_MICRODATA (inout triples any, inout dict_zap (objs_of_sp, 2); if (prev_pred is not null) http (' ] ,', ses); - http ('\n "', ses); http_escape (case when isstring (pred) then pred else id_to_iri (pred) end, 14, ses, 1, 1); http ('" : [ ', ses); + http ('\n "', ses); http_escape (case when isstring (pred) then pred else id_to_iri (pred) end, 21, ses, 1, 1); http ('" : [ ', ses); prev_pred := pred; obj_needs_comma := 0; } @@ -5774,7 +5774,7 @@ create procedure DB.DBA.RDF_TRIPLES_TO_JSON_MICRODATA (inout triples any, inout obj_iri := id_to_iri (obj); if (starts_with (obj_iri, 'nodeID://')) obj_iri := '_:' || subseq (obj_iri, 9); - http ('{ "id" : "', ses); http_escape (obj_iri, 14, ses, 1, 1); http ('" }', ses); + http ('{ "id" : "', ses); http_escape (obj_iri, 21, ses, 1, 1); http ('" }', ses); } else { @@ -5837,16 +5837,16 @@ create procedure DB.DBA.RDF_TRIPLES_TO_JSON_MICRODATA (inout triples any, inout declare tmpses any; tmpses := string_output(); http_value (sqlval, 0, tmpses); - http ('"', ses); http_escape (string_output_string (tmpses), 14, ses, 1, 1); http ('"', ses); + http ('"', ses); http_escape (string_output_string (tmpses), 21, ses, 1, 1); http ('"', ses); } else if (__tag of varchar = __tag (sqlval)) { - http ('"', ses); http_escape (sqlval, 14, ses, 1, 1); http ('"', ses); + http ('"', ses); http_escape (sqlval, 21, ses, 1, 1); http ('"', ses); } else { sqlval := __rdf_strsqlval (obj); - http ('"', ses); http_escape (sqlval, 14, ses, 1, 1); http ('"', ses); + http ('"', ses); http_escape (sqlval, 21, ses, 1, 1); http ('"', ses); } } skip_obj: ; @@ -5983,7 +5983,7 @@ create procedure DB.DBA.RDF_TRIPLES_TO_ODATA_JSON (inout triples any, inout ses { http ('"', ses); if (__tag of datetime <> __tag (obj)) - http_escape (obj, 14, ses, 1, 1); + http_escape (obj, 21, ses, 1, 1); else __rdf_long_to_ttl (obj, ses); http ('"', ses); diff --git a/libsrc/Wi/sparql_io.sql b/libsrc/Wi/sparql_io.sql index 6262e380b8..3e29483cf7 100644 --- a/libsrc/Wi/sparql_io.sql +++ b/libsrc/Wi/sparql_io.sql @@ -1534,7 +1534,7 @@ end_of_val_print: ; create procedure DB.DBA.SPARQL_RESULTS_JSON_WRITE_BINDING (inout ses any, in colname varchar, inout val any) { http(' "', ses); - http_escape (colname, 14, ses, 1, 1); + http_escape (colname, 21, ses, 1, 1); http('": { ', ses); if (isiri_id (val)) { @@ -1543,7 +1543,7 @@ create procedure DB.DBA.SPARQL_RESULTS_JSON_WRITE_BINDING (inout ses any, in col else { http ('"type": "uri", "value": "', ses); - http_escape (id_to_iri (val), 14, ses, 1, 1); + http_escape (id_to_iri (val), 21, ses, 1, 1); } } else if (__tag of rdf_box = __tag (val)) @@ -1559,7 +1559,7 @@ create procedure DB.DBA.SPARQL_RESULTS_JSON_WRITE_BINDING (inout ses any, in col res := coalesce ((select RDT_QNAME from DB.DBA.RDF_DATATYPE where RDT_TWOBYTE = typ)); else res := cast (__xsd_type (dat) as varchar); - http_escape (res, 14, ses, 1, 1); + http_escape (res, 21, ses, 1, 1); http ('", "value": "', ses); dat := __rdf_strsqlval (dat); } @@ -1567,14 +1567,14 @@ create procedure DB.DBA.SPARQL_RESULTS_JSON_WRITE_BINDING (inout ses any, in col { http ('"type": "literal", "datatype": "', ses); res := coalesce ((select RDT_QNAME from DB.DBA.RDF_DATATYPE where RDT_TWOBYTE = typ)); - http_escape (res, 14, ses, 1, 1); + http_escape (res, 21, ses, 1, 1); http ('", "value": "', ses); } else if (257 <> rdf_box_lang (val)) { http ('"type": "literal", "xml:lang": "', ses); res := coalesce ((select RL_ID from DB.DBA.RDF_LANGUAGE where RL_TWOBYTE = rdf_box_lang (val))); - http_escape (res, 14, ses, 1, 1); + http_escape (res, 21, ses, 1, 1); http ('", "value": "', ses); } else @@ -1582,7 +1582,7 @@ create procedure DB.DBA.SPARQL_RESULTS_JSON_WRITE_BINDING (inout ses any, in col if (__tag of datetime = rdf_box_data_tag (val)) __rdf_long_to_ttl (val, ses); else - http_escape (dat, 14, ses, 1, 1); + http_escape (dat, 21, ses, 1, 1); } else if (__tag of varchar = __tag (val)) { @@ -1593,13 +1593,13 @@ create procedure DB.DBA.SPARQL_RESULTS_JSON_WRITE_BINDING (inout ses any, in col else { http ('"type": "uri", "value": "', ses); - http_escape (val, 14, ses, 1, 1); + http_escape (val, 21, ses, 1, 1); } } else { http ('"type": "literal", "value": "', ses); - http_escape (val, 14, ses, 1, 1); + http_escape (val, 21, ses, 1, 1); } } else if (__tag of UNAME = __tag (val)) @@ -1609,35 +1609,35 @@ create procedure DB.DBA.SPARQL_RESULTS_JSON_WRITE_BINDING (inout ses any, in col else { http ('"type": "uri", "value": "', ses); - http_escape (val, 14, ses, 1, 1); + http_escape (val, 21, ses, 1, 1); } } else if (__tag of varbinary = __tag (val)) { http ('"type": "literal", "value": "', ses); - http_escape (val, 14, ses, 0, 0); + http_escape (val, 21, ses, 0, 0); } else if (__tag of stream = __tag (val)) { http ('"type": "literal", "value": "', ses); - http_escape (cast (val as varchar), 14, ses, 1, 1); + http_escape (cast (val as varchar), 21, ses, 1, 1); } else if (__tag of XML = __tag (val)) { http ('"type": "literal", "value": "', ses); - http_escape (serialize_to_UTF8_xml (val), 14, ses, 1, 1); + http_escape (serialize_to_UTF8_xml (val), 21, ses, 1, 1); } else if (isnumeric(val) or __tag (val) in (__tag of date, __tag of time, __tag of datetime)) { http ('"type": "literal", "datatype": "', ses); - http_escape (cast (__xsd_type (val) as varchar), 14, ses, 1, 1); + http_escape (cast (__xsd_type (val) as varchar), 21, ses, 1, 1); http ('", "value": "', ses); - http_escape (__rdf_strsqlval (val), 14, ses, 1, 1); + http_escape (__rdf_strsqlval (val), 21, ses, 1, 1); } else { http ('"type": "literal", "value": "', ses); - http_escape (__rdf_strsqlval (val), 14, ses, 1, 1); + http_escape (__rdf_strsqlval (val), 21, ses, 1, 1); } http ('" }', ses); } @@ -1655,7 +1655,7 @@ create procedure DB.DBA.SPARQL_RESULTS_JSON_WRITE (inout ses any, inout metas an http(', "', ses); else http('"', ses); - http_escape (metas[0][varctr][0], 14, ses, 1, 1); + http_escape (metas[0][varctr][0], 21, ses, 1, 1); http('"', ses); } http ('] },\n "results": { "distinct": false, "ordered": true, "bindings": [', ses); diff --git a/libsrc/Wi/system2.sql b/libsrc/Wi/system2.sql index ca8fc2a1a0..3407bc4685 100644 --- a/libsrc/Wi/system2.sql +++ b/libsrc/Wi/system2.sql @@ -1150,7 +1150,7 @@ create procedure DB.DBA.JSON_SERIALIZE_INNER (inout ses any, in o any, in depth else if (isstring (o) or __tag of uname = __tag (o)) { http ('"', ses); - http_escape (o, 14, ses, 1, 1); + http_escape (o, 21, ses, 1, 1); http ('"', ses); } else if (__tag(o) = __tag of datetime) @@ -1189,7 +1189,7 @@ create procedure DB.DBA.JSON_SERIALIZE_INNER (inout ses any, in o any, in depth http (repeat (' ', (depth * indent)), ses); } http ('"', ses); - http_escape (elm, 14, ses, 1, 1); + http_escape (elm, 21, ses, 1, 1); http ('":', ses); if (indent) http(' ', ses); DB.DBA.JSON_SERIALIZE_INNER (ses, aref(o,inx + 1), depth, indent); @@ -1229,7 +1229,7 @@ create procedure DB.DBA.JSON_SERIALIZE_INNER (inout ses any, in o any, in depth http (repeat (' ', (depth * indent)), ses); } http ('"', ses); - http_escape (field, 14, ses, 1, 1); + http_escape (field, 21, ses, 1, 1); http ('":', ses); if (indent) http(' ', ses); DB.DBA.JSON_SERIALIZE_INNER (ses, v, depth, indent); From 82eadf3bf7a5aad7643993bf5cb7eb50b62dbe81 Mon Sep 17 00:00:00 2001 From: Boris De Vloed Date: Mon, 23 Feb 2026 10:29:52 +0100 Subject: [PATCH 2/3] Add tests --- binsrc/tests/suite/tsparql_json_utf8.sh | 191 +++++++++++++++++++++++ binsrc/tests/suite/tsparql_json_utf8.sql | 178 +++++++++++++++++++++ 2 files changed, 369 insertions(+) create mode 100755 binsrc/tests/suite/tsparql_json_utf8.sh create mode 100644 binsrc/tests/suite/tsparql_json_utf8.sql diff --git a/binsrc/tests/suite/tsparql_json_utf8.sh b/binsrc/tests/suite/tsparql_json_utf8.sh new file mode 100755 index 0000000000..9eeaf4674a --- /dev/null +++ b/binsrc/tests/suite/tsparql_json_utf8.sh @@ -0,0 +1,191 @@ +#!/bin/bash +# +# tsparql_json_utf8.sh +# +# Standalone test for SPARQL JSON result serialization of non-ASCII UTF-8 strings. +# Tests that non-ASCII characters are output as UTF-8 (not \uXXXX escapes) +# and that "type" is "literal" (not "typed-literal"). +# +# Can be run BEFORE and AFTER the fix to demonstrate the difference. +# +# Related: https://github.com/openlink/virtuoso-opensource/issues/1361 +# +# Usage: +# ./tsparql_json_utf8.sh [SPARQL_ENDPOINT_URL] +# +# Default endpoint: http://localhost:8890/sparql +# + +ENDPOINT="${1:-http://localhost:8890/sparql}" +GRAPH="urn:test:json:utf8:$(date +%s)" +PASSED=0 +FAILED=0 +TESTS=0 + +pass() { + PASSED=$((PASSED + 1)) + TESTS=$((TESTS + 1)) + echo " PASSED: $1" +} + +fail() { + FAILED=$((FAILED + 1)) + TESTS=$((TESTS + 1)) + echo " ***FAILED: $1" +} + +echo "=== SPARQL JSON UTF-8 Serialization Test ===" +echo "Endpoint: $ENDPOINT" +echo "Test graph: $GRAPH" +echo "" + +# --- Setup: Insert test data --- +echo "--- Setup: Inserting test data ---" +curl -s -X POST "$ENDPOINT" \ + --data-urlencode "query=INSERT INTO GRAPH <$GRAPH> { + \"België\" . + \"Zürich\" . + \"naïve café\" . + \"日本語テスト\" . + \"plain ASCII\" . + }" > /dev/null 2>&1 + +# --- Test 1: België in JSON output --- +echo "" +echo "--- Test 1: Non-ASCII string 'België' in JSON ---" +RESULT=$(curl -s -H "Accept: application/sparql-results+json" \ + --data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { ?name } }" \ + "$ENDPOINT") + +echo " Raw JSON output:" +echo " $RESULT" | head -20 +echo "" + +if echo "$RESULT" | grep -q 'België'; then + pass "JSON contains UTF-8 'België' (not escaped)" +else + fail "JSON does not contain UTF-8 'België'" +fi + +if echo "$RESULT" | grep -q '\\u00EB\|\\u00CB'; then + fail "JSON contains \\uXXXX escape sequences (should be UTF-8)" +else + pass "JSON does not contain \\uXXXX escape sequences" +fi + +# --- Test 2: Zürich in JSON output --- +echo "" +echo "--- Test 2: Non-ASCII string 'Zürich' in JSON ---" +RESULT=$(curl -s -H "Accept: application/sparql-results+json" \ + --data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { ?name } }" \ + "$ENDPOINT") + +if echo "$RESULT" | grep -q 'Zürich'; then + pass "JSON contains UTF-8 'Zürich'" +else + fail "JSON does not contain UTF-8 'Zürich'" +fi + +# --- Test 3: naïve café in JSON output --- +echo "" +echo "--- Test 3: Non-ASCII string 'naïve café' in JSON ---" +RESULT=$(curl -s -H "Accept: application/sparql-results+json" \ + --data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { ?name } }" \ + "$ENDPOINT") + +if echo "$RESULT" | grep -q 'naïve café'; then + pass "JSON contains UTF-8 'naïve café'" +else + fail "JSON does not contain UTF-8 'naïve café'" +fi + +# --- Test 4: CJK characters in JSON output --- +echo "" +echo "--- Test 4: CJK string '日本語テスト' in JSON ---" +RESULT=$(curl -s -H "Accept: application/sparql-results+json" \ + --data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { ?name } }" \ + "$ENDPOINT") + +if echo "$RESULT" | grep -q '日本語テスト'; then + pass "JSON contains UTF-8 CJK characters" +else + fail "JSON does not contain UTF-8 CJK characters" +fi + +# --- Test 5: type field is "literal" not "typed-literal" --- +echo "" +echo "--- Test 5: type field correctness ---" +RESULT=$(curl -s -H "Accept: application/sparql-results+json" \ + --data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { ?name } }" \ + "$ENDPOINT") + +if echo "$RESULT" | grep -q '"typed-literal"'; then + fail "JSON uses 'typed-literal' (should be 'literal')" +else + pass "JSON does not use 'typed-literal'" +fi + +if echo "$RESULT" | grep -q '"type": "literal"'; then + pass "JSON uses 'type': 'literal'" +else + # Also check without extra space + if echo "$RESULT" | grep -q '"type":"literal"'; then + pass "JSON uses 'type':'literal'" + else + fail "JSON does not contain type=literal" + fi +fi + +# --- Test 6: plain ASCII still works --- +echo "" +echo "--- Test 6: Plain ASCII string preserved ---" +RESULT=$(curl -s -H "Accept: application/sparql-results+json" \ + --data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { ?name } }" \ + "$ENDPOINT") + +if echo "$RESULT" | grep -q 'plain ASCII'; then + pass "JSON contains 'plain ASCII' correctly" +else + fail "JSON does not contain 'plain ASCII'" +fi + +# --- Test 7: Compare JSON vs XML for België --- +echo "" +echo "--- Test 7: JSON vs XML consistency for 'België' ---" +JSON_RESULT=$(curl -s -H "Accept: application/sparql-results+json" \ + --data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { ?name } }" \ + "$ENDPOINT") +XML_RESULT=$(curl -s -H "Accept: application/sparql-results+xml" \ + --data-urlencode "query=SELECT ?name WHERE { GRAPH <$GRAPH> { ?name } }" \ + "$ENDPOINT") + +XML_HAS_UTF8=0 +JSON_HAS_UTF8=0 +echo "$XML_RESULT" | grep -q 'België' && XML_HAS_UTF8=1 +echo "$JSON_RESULT" | grep -q 'België' && JSON_HAS_UTF8=1 + +if [ "$XML_HAS_UTF8" -eq 1 ] && [ "$JSON_HAS_UTF8" -eq 1 ]; then + pass "Both JSON and XML output contain UTF-8 'België'" +elif [ "$XML_HAS_UTF8" -eq 1 ] && [ "$JSON_HAS_UTF8" -eq 0 ]; then + fail "XML has UTF-8 'België' but JSON does not (inconsistency!)" +else + fail "Neither JSON nor XML contain UTF-8 'België'" +fi + +# --- Cleanup --- +echo "" +echo "--- Cleanup ---" +curl -s -X POST "$ENDPOINT" \ + --data-urlencode "query=CLEAR GRAPH <$GRAPH>" > /dev/null 2>&1 +echo " Test graph cleared." + +# --- Summary --- +echo "" +echo "===========================================" +echo " Results: $PASSED passed, $FAILED failed (out of $TESTS tests)" +echo "===========================================" + +if [ "$FAILED" -gt 0 ]; then + exit 1 +fi +exit 0 diff --git a/binsrc/tests/suite/tsparql_json_utf8.sql b/binsrc/tests/suite/tsparql_json_utf8.sql new file mode 100644 index 0000000000..1feed59554 --- /dev/null +++ b/binsrc/tests/suite/tsparql_json_utf8.sql @@ -0,0 +1,178 @@ +-- +-- tsparql_json_utf8.sql +-- +-- Test SPARQL JSON result serialization of non-ASCII UTF-8 strings. +-- Verifies that non-ASCII characters are output as UTF-8 in JSON values +-- (not as \uXXXX escape sequences), and that the "type" field is always +-- "literal" (never "typed-literal") per SPARQL 1.1 Results JSON spec. +-- +-- Related: https://github.com/openlink/virtuoso-opensource/issues/1361 +-- + +SET ARGV[0] 0; +SET ARGV[1] 0; +ECHO BOTH "STARTED: SPARQL JSON UTF-8 serialization tests (issue #1361)\n"; + +-- Clean up test graph +SPARQL CLEAR GRAPH ; + +-- Insert test triples with non-ASCII characters +SPARQL INSERT INTO GRAPH { + "België" . + "Zürich" . + "naïve café" . + "日本語テスト" . + "Ελληνικά" . + "plain ASCII" . +}; + +---------------------------------------------------------------------- +-- Test 1: http_escape mode 21 (DKS_ESC_JSON_DQ) passes through UTF-8 +---------------------------------------------------------------------- + +create procedure DB.DBA.TEST_JSON_ESCAPE_UTF8 (in str varchar) +{ + declare ses any; + ses := string_output (); + http_escape (str, 21, ses, 1, 1); + return string_output_string (ses); +}; + +select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('België'); +ECHO BOTH $IF $EQU $LAST[1] "België" "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for België, got: " $LAST[1] "\n"; + +select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('Zürich'); +ECHO BOTH $IF $EQU $LAST[1] "Zürich" "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for Zürich, got: " $LAST[1] "\n"; + +select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('naïve café'); +ECHO BOTH $IF $EQU $LAST[1] "naïve café" "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for naïve café, got: " $LAST[1] "\n"; + +select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('日本語テスト'); +ECHO BOTH $IF $EQU $LAST[1] "日本語テスト" "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for CJK, got: " $LAST[1] "\n"; + +select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('Ελληνικά'); +ECHO BOTH $IF $EQU $LAST[1] "Ελληνικά" "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for Greek, got: " $LAST[1] "\n"; + +---------------------------------------------------------------------- +-- Test 2: JSON special chars are still properly escaped +---------------------------------------------------------------------- + +select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('quote"here'); +ECHO BOTH $IF $EQU $LAST[1] 'quote\"here' "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": JSON escape mode 21 still escapes double quotes\n"; + +select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('back\\slash'); +ECHO BOTH $IF $EQU $LAST[1] 'back\\\\slash' "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": JSON escape mode 21 still escapes backslashes\n"; + +select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('plain ASCII'); +ECHO BOTH $IF $EQU $LAST[1] "plain ASCII" "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": JSON escape mode 21 preserves plain ASCII, got: " $LAST[1] "\n"; + +---------------------------------------------------------------------- +-- Test 3: Full SPARQL JSON result serialization +---------------------------------------------------------------------- + +create procedure DB.DBA.TEST_SPARQL_JSON_UTF8 () +{ + declare ses, metas, rset any; + exec ('SPARQL SELECT ?name WHERE { GRAPH { ?name } }', + null, null, null, 0, metas, rset); + ses := string_output (); + DB.DBA.SPARQL_RESULTS_JSON_WRITE (ses, metas, rset); + return string_output_string (ses); +}; + +select DB.DBA.TEST_SPARQL_JSON_UTF8 (); +ECHO BOTH $IF $NEQ $LAST[1] NULL "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": SPARQL JSON result returned non-null\n"; + +-- Check that result contains UTF-8 België (not escaped) +create procedure DB.DBA.TEST_JSON_CONTAINS_UTF8 () +{ + declare result varchar; + result := DB.DBA.TEST_SPARQL_JSON_UTF8 (); + -- Check for UTF-8 "België" in the output + if (strstr (result, 'Belgi\x00EB') is not null) + return 'HAS_UTF8'; + -- Check for escaped version + if (strstr (result, 'Belgi\\u00EB') is not null) + return 'HAS_ESCAPE'; + return 'UNKNOWN: ' || result; +}; + +select DB.DBA.TEST_JSON_CONTAINS_UTF8 (); +ECHO BOTH $IF $EQU $LAST[1] "HAS_UTF8" "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": SPARQL JSON output contains UTF-8 België (not \\u escaped), got: " $LAST[1] "\n"; + +---------------------------------------------------------------------- +-- Test 4: type field is "literal" not "typed-literal" +---------------------------------------------------------------------- + +create procedure DB.DBA.TEST_JSON_TYPE_LITERAL () +{ + declare result varchar; + result := DB.DBA.TEST_SPARQL_JSON_UTF8 (); + if (strstr (result, '"typed-literal"') is not null) + return 'HAS_TYPED_LITERAL'; + if (strstr (result, '"type": "literal"') is not null) + return 'HAS_LITERAL'; + return 'UNKNOWN: ' || result; +}; + +select DB.DBA.TEST_JSON_TYPE_LITERAL (); +ECHO BOTH $IF $EQU $LAST[1] "HAS_LITERAL" "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": SPARQL JSON uses type=literal not typed-literal, got: " $LAST[1] "\n"; + +---------------------------------------------------------------------- +-- Test 5: Consistency between XML and JSON output +---------------------------------------------------------------------- + +create procedure DB.DBA.TEST_JSON_XML_CONSISTENCY () +{ + declare ses_json, ses_xml, metas, rset any; + declare json_result, xml_result varchar; + + -- Get JSON result + exec ('SPARQL SELECT ?name WHERE { GRAPH { ?name } }', + null, null, null, 0, metas, rset); + ses_json := string_output (); + DB.DBA.SPARQL_RESULTS_JSON_WRITE (ses_json, metas, rset); + json_result := string_output_string (ses_json); + + -- Both should contain the same UTF-8 string "België" + if (strstr (json_result, 'Belgi\x00EB') is not null) + return 'CONSISTENT'; + return 'INCONSISTENT'; +}; + +select DB.DBA.TEST_JSON_XML_CONSISTENCY (); +ECHO BOTH $IF $EQU $LAST[1] "CONSISTENT" "PASSED" "***FAILED"; +SET ARGV[$LIF] $+ $ARGV[$LIF] 1; +ECHO BOTH ": JSON and XML serializers both output UTF-8 consistently, got: " $LAST[1] "\n"; + +-- Clean up +drop procedure DB.DBA.TEST_JSON_ESCAPE_UTF8; +drop procedure DB.DBA.TEST_SPARQL_JSON_UTF8; +drop procedure DB.DBA.TEST_JSON_CONTAINS_UTF8; +drop procedure DB.DBA.TEST_JSON_TYPE_LITERAL; +drop procedure DB.DBA.TEST_JSON_XML_CONSISTENCY; +SPARQL CLEAR GRAPH ; + +ECHO BOTH "COMPLETED WITH " $ARGV[0] " FAILED, " $ARGV[1] " PASSED: SPARQL JSON UTF-8 serialization tests\n"; From 7664d94c1a8bd7b9b8d42fec2a9835dd262102b4 Mon Sep 17 00:00:00 2001 From: Boris De Vloed Date: Fri, 27 Feb 2026 16:50:05 +0100 Subject: [PATCH 3/3] procedure based test --- binsrc/tests/suite/tsparql_json_utf8.sql | 32 ++++++++++++++++++------ 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/binsrc/tests/suite/tsparql_json_utf8.sql b/binsrc/tests/suite/tsparql_json_utf8.sql index 1feed59554..cbdebe6a62 100644 --- a/binsrc/tests/suite/tsparql_json_utf8.sql +++ b/binsrc/tests/suite/tsparql_json_utf8.sql @@ -67,15 +67,33 @@ ECHO BOTH ": JSON escape mode 21 preserves UTF-8 for Greek, got: " $LAST[1] "\n" -- Test 2: JSON special chars are still properly escaped ---------------------------------------------------------------------- -select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('quote"here'); -ECHO BOTH $IF $EQU $LAST[1] 'quote\"here' "PASSED" "***FAILED"; -SET ARGV[$LIF] $+ $ARGV[$LIF] 1; -ECHO BOTH ": JSON escape mode 21 still escapes double quotes\n"; +create procedure DB.DBA.TEST_JSON_ESCAPE_SPECIAL () +{ + declare ses any; + declare result varchar; + declare ok integer; + ok := 1; + -- Test double quote escaping + ses := string_output (); + http_escape ('quote"here', 21, ses, 1, 1); + result := string_output_string (ses); + if (strstr (result, '\\"') is null) + ok := 0; + -- Test backslash escaping + ses := string_output (); + http_escape ('back\\slash', 21, ses, 1, 1); + result := string_output_string (ses); + if (strstr (result, '\\\\') is null) + ok := 0; + if (ok) + return 'ESCAPED'; + return 'NOT_ESCAPED'; +}; -select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('back\\slash'); -ECHO BOTH $IF $EQU $LAST[1] 'back\\\\slash' "PASSED" "***FAILED"; +select DB.DBA.TEST_JSON_ESCAPE_SPECIAL (); +ECHO BOTH $IF $EQU $LAST[1] "ESCAPED" "PASSED" "***FAILED"; SET ARGV[$LIF] $+ $ARGV[$LIF] 1; -ECHO BOTH ": JSON escape mode 21 still escapes backslashes\n"; +ECHO BOTH ": JSON escape mode 21 still escapes quotes and backslashes, got: " $LAST[1] "\n"; select DB.DBA.TEST_JSON_ESCAPE_UTF8 ('plain ASCII'); ECHO BOTH $IF $EQU $LAST[1] "plain ASCII" "PASSED" "***FAILED";