From 704acd14a41446df6e453aaa269743295c27656a Mon Sep 17 00:00:00 2001 From: heiko Date: Tue, 4 Sep 2018 15:12:24 +0200 Subject: [PATCH] =?UTF-8?q?address=20utf8=20/=20utf8mb4=20issue=20on=20tab?= =?UTF-8?q?le=20sessions=20=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit when session data with 4 byte UTF-8 characters is saved into mysql 3-byte ‚utf8‘ charset data field. There are two options: - APP_ DB_USE_UTF8MB4_ON_UTF8 uses SET NAMES 'utf8mb4' COLLATE 'utf8mb4_general_ci‘ - APP_DB_TR_UTF8_TO_UTF8MB4 + has to be used in addition to APP_DB_USE_UTF8MB4_ON_UTF8 + could auto-convert charset and collates to utf8mb4 - but is restricted yet to only the session table (due the 768 byte limit of indexes, which get exceeded on other system tables). + This option doesn’t help on a already created session table, you have to convert it manually. --- gyro/core/config.cls.php | 5 ++++ gyro/core/constants.inc.php | 6 +++++ gyro/core/model/base/db.cls.php | 25 ++++++++++++++++++- .../model/drivers/mysql/dbdriver.mysql.php | 6 ++++- 4 files changed, 40 insertions(+), 2 deletions(-) diff --git a/gyro/core/config.cls.php b/gyro/core/config.cls.php index d2bc06cb..2221a018 100644 --- a/gyro/core/config.cls.php +++ b/gyro/core/config.cls.php @@ -48,6 +48,11 @@ class Config { const LOG_TRANSLATIONS = 'LOG_TRANSLATIONS'; const LOG_HTML_ERROR_STATUS = 'LOG_HTML_ERROR_STATUS'; const LOG_HTTPREQUESTS = 'LOG_HTTPREQUESTS'; + /** + * DB UTF8 / UTF8MB4 + */ + const DB_USE_UTF8MB4_ON_UTF8 = 'DB_USE_UTF8MB4_ON_UTF8'; + const DB_TR_UTF8_TO_UTF8MB4 = 'DB_TR_UTF8_TO_UTF8MB4'; /** * Added to each email subject line */ diff --git a/gyro/core/constants.inc.php b/gyro/core/constants.inc.php index 27a96abc..b08c7410 100644 --- a/gyro/core/constants.inc.php +++ b/gyro/core/constants.inc.php @@ -172,6 +172,12 @@ */ Config::set_value_from_constant(Config::DB_SLOW_QUERY_THRESHOLD, 'APP_DB_SLOW_QUERY_THRESHOLD', 0.0100); +/** + * DB UTF8 / UTF8MB4 + */ +Config::set_value_from_constant(Config::DB_USE_UTF8MB4_ON_UTF8, 'APP_DB_USE_UTF8MB4_ON_UTF8', false); +Config::set_value_from_constant(Config::DB_TR_UTF8_TO_UTF8MB4, 'APP_DB_TR_UTF8_TO_UTF8MB4', false); + /** * Cache headers */ diff --git a/gyro/core/model/base/db.cls.php b/gyro/core/model/base/db.cls.php index 080f9085..40318b39 100644 --- a/gyro/core/model/base/db.cls.php +++ b/gyro/core/model/base/db.cls.php @@ -326,6 +326,9 @@ public static function execute_script($file, $connection = self::DEFAULT_CONNECT $dao = self::create('cache'); while($query = self::extract_next_sql_statement($handle)) { if ($query != ';') { + if (Config::has_feature(Config::DB_USE_UTF8MB4_ON_UTF8) && Config::has_feature(Config::DB_TR_UTF8_TO_UTF8MB4)) { + $query = self::translate_query_utf8_utf8mb4($query); + } $status->merge($conn->execute($query)); if ($status->is_error()) { break; @@ -341,7 +344,27 @@ public static function execute_script($file, $connection = self::DEFAULT_CONNECT } return $status; } - + + /** + * Experimental sql statement translation of: + * + * - "CHARSET=utf8" => "CHARSET=utf8mb4" + * - "CHARACTER SET 'utf8'" => "CHARACTER SET 'utf8mb4'" + * - "COLLATE=utf8_general_ci" => "COLLATE=utf8mb4_general_ci" + * + * This should cover all utf8 use cases in install.sql and update.sql scripts, + * but currently only used for table=`sessions`. + */ + protected static function translate_query_utf8_utf8mb4($query) { + if (stripos($query, 'CREATE TABLE IF NOT EXISTS `sessions`') !== false) { + $query = preg_replace('/CHARSET\s*=\s*utf8(?!mb4)/i', 'CHARSET=utf8mb4', $query); + $query = preg_replace('/CHARACTER\s+SET\s*\'utf8\'/i', 'CHARACTER SET \'utf8mb4\'', $query); + $query = preg_replace('/COLLATE\s*=\s*utf8_general_ci/i', 'COLLATE=utf8mb4_general_ci', $query); + } + + return $query; + } + public static function extract_next_sql_statement($handle) { $ret = ''; $last = ''; diff --git a/gyro/core/model/drivers/mysql/dbdriver.mysql.php b/gyro/core/model/drivers/mysql/dbdriver.mysql.php index 587db085..3dd91cad 100644 --- a/gyro/core/model/drivers/mysql/dbdriver.mysql.php +++ b/gyro/core/model/drivers/mysql/dbdriver.mysql.php @@ -102,7 +102,11 @@ protected function connect() { if ($err->is_ok()) { // We are connected if (GyroLocale::get_charset() == 'UTF-8') { - $this->execute("SET NAMES 'utf8' COLLATE 'utf8_general_ci'"); + if (Config::has_feature(Config::DB_USE_UTF8MB4_ON_UTF8)) { + $this->execute("SET NAMES 'utf8mb4' COLLATE 'utf8mb4_general_ci'"); + } else { + $this->execute("SET NAMES 'utf8' COLLATE 'utf8_general_ci'"); + } } //$this->execute("SET sql_mode=STRICT_ALL_TABLES"); $this->execute("SET sql_mode='TRADITIONAL'");