From 332187ebe7e753e75f58c7a3e93663116af14920 Mon Sep 17 00:00:00 2001 From: Yangph <814773976@qq.com> Date: Tue, 16 Mar 2021 18:17:17 +0800 Subject: [PATCH 1/4] =?UTF-8?q?1.support=20PHP7=20=EF=BC=8Cneed=20php=20Ds?= =?UTF-8?q?=EF=BC=88Map=EF=BC=89.=202.add=20column=20type=20cast.=203.proc?= =?UTF-8?q?ess=20bytea=20to=20longblob,=20For=20example=20bytea=20content?= =?UTF-8?q?=20cast=20'\xa234bc23'=20to=20UNHEX('a234bc23').=204.add=20colu?= =?UTF-8?q?mn=20comment=20and=20table=20comment.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pg2mysql.inc.php | 56 ++++++++++++++++++++++++++++-------------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/pg2mysql.inc.php b/pg2mysql.inc.php index 28db475..752391f 100644 --- a/pg2mysql.inc.php +++ b/pg2mysql.inc.php @@ -37,7 +37,7 @@ function getfieldname($l) { //first check if its in nice quotes for us - if (preg_match("`(.*)`", $l, $regs)) { + if (preg_match("/`(.*)`/", $l, $regs)) { if ($regs[1]) { return $regs[1]; } else { @@ -45,7 +45,7 @@ function getfieldname($l) } } //if its not in quotes, then it should (we hope!) be the first "word" on the line, up to the first space. - elseif (preg_match("([^\ ]*)", trim($l), $regs)) { + elseif (preg_match("/([^\ ]*)/", trim($l), $regs)) { if ($regs[1]) { return $regs[1]; } else { @@ -193,39 +193,40 @@ function pg2mysql($input, $header=true) $line=str_replace(" bool DEFAULT true", " bool DEFAULT 1", $line); $line=str_replace(" bool DEFAULT false", " bool DEFAULT 0", $line); $line=str_replace("` `text`", "` text", $line); // fix because pg_dump quotes text type for some reason - if (preg_match(" character varying\(([0-9]*)\)", $line, $regs)) { + if (preg_match("/ character varying\(([0-9]*)\)/", $line, $regs)) { $num=$regs[1]; if ($num<=255) { - $line=preg_replace(" character varying\([0-9]*\)", " varchar($num)", $line); + # Pattern delimniter "/" fails here. Use alternatively "|". + $line=preg_replace("| character varying\([0-9]*\)|", " varchar($num)", $line); } else { - $line=preg_replace(" character varying\([0-9]*\)", " text", $line); + $line=preg_replace("/ character varying\([0-9]*\)/", " text", $line); } } //character varying with no size, we will default to varchar(255) - if (preg_match(" character varying", $line)) { - $line=preg_replace(" character varying", " varchar(255)", $line); + if (preg_match("/ character varying/", $line)) { + $line=preg_replace("/ character varying/", " varchar(255)", $line); } - if (preg_match("DEFAULT \('([0-9]*)'::int", $line, $regs) || - preg_match("DEFAULT \('([0-9]*)'::smallint", $line, $regs) || - preg_match("DEFAULT \('([0-9]*)'::bigint", $line, $regs) + if (preg_match("/DEFAULT \('([0-9]*)'::int/", $line, $regs) || + preg_match("/DEFAULT \('([0-9]*)'::smallint/", $line, $regs) || + preg_match("/DEFAULT \('([0-9]*)'::bigint/", $line, $regs) ) { $num=$regs[1]; - $line=preg_replace(" DEFAULT \('([0-9]*)'[^ ,]*", " DEFAULT $num ", $line); + $line=preg_replace("/ DEFAULT \('([0-9]*)'[^ ,]*/", " DEFAULT $num ", $line); } - if (preg_match("DEFAULT \(([0-9\-]*)\)", $line, $regs)) { + if (preg_match("/DEFAULT \(([0-9\-]*)\)/", $line, $regs)) { $num=$regs[1]; - $line=preg_replace(" DEFAULT \(([0-9\-]*)\)", " DEFAULT $num ", $line); + $line=preg_replace("/ DEFAULT \(([0-9\-]*)\)/", " DEFAULT $num ", $line); } - $line=preg_replace(" DEFAULT nextval\(.*\) ", " auto_increment ", $line); - $line=preg_replace("::.*,", ",", $line); - $line=preg_replace("::.*$", "\n", $line); - if (preg_match("character\(([0-9]*)\)", $line, $regs)) { + $line=preg_replace("/ DEFAULT nextval\(.*\) /", " auto_increment ", $line); + $line=preg_replace("/::.*,/", ",", $line); + $line=preg_replace("/::.*$/", "\n", $line); + if (preg_match("/character\(([0-9]*)\)/", $line, $regs)) { $num=$regs[1]; if ($num<=255) { - $line=preg_replace(" character\([0-9]*\)", " varchar($num)", $line); + $line=preg_replace("/ character\([0-9]*\)/", " varchar($num)", $line); } else { - $line=preg_replace(" character\([0-9]*\)", " text", $line); + $line=preg_replace("/ character\([0-9]*\)/", " text", $line); } } //timestamps @@ -375,15 +376,24 @@ function pg2mysql($input, $header=true) } if (substr($line, 0, 5) == 'COPY ') { - preg_match('/COPY (.*) FROM stdin/', $line, $matches); - $heads = str_replace('"', "`", $matches[1]); + # Wrap all table and column names in "`" to prevent clashes with reserved names in mysql. + preg_match('/COPY\s+(\S+)\s*\((.*)\)\s+FROM\s+stdin/', $line, $matches); + $table = preg_replace('/^["`]*(\S+)["`]*$/', "`$1`", $matches[1]); + $columns = ''; + foreach (explode(',', $matches[2]) as $columnName) { + $columnName = preg_replace('/^\s*["`]*([^"`\s]+)["`]*\s*$/', "`$1`", $columnName); + if ($columns) { + $columns .= ', '; + } + $columns .= $columnName; + } $values = array(); $in_insert = true; } elseif ($in_insert) { if ($line == "\\.\n") { $in_insert = false; if ($values) { - $output .= "INSERT INTO $heads VALUES\n" . implode(",\n", $values) . ";\n\n"; + $output .= "INSERT INTO $table ($columns) VALUES\n" . implode(",\n", $values) . ";\n\n"; } } else { $vals = explode(' ', $line); @@ -405,7 +415,7 @@ function pg2mysql($input, $header=true) } $values[] = '(' . implode(',', $vals) . ')'; if (count($values) >= 1000) { - $output .= "INSERT INTO $heads VALUES\n" . implode(",\n", $values) . ";\n"; + $output .= "INSERT INTO $table ($columns) VALUES\n" . implode(",\n", $values) . ";\n"; $values = array(); } } From be690653ec1aec9a30f7e73fb0a87cd10cc9b986 Mon Sep 17 00:00:00 2001 From: Yangph <814773976@qq.com> Date: Thu, 25 Mar 2021 15:44:22 +0800 Subject: [PATCH 2/4] Support insert multiple rows using a single SQL INSERT statement. --- pg2mysql.inc.php | 50 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/pg2mysql.inc.php b/pg2mysql.inc.php index bc1144b..4b7ba37 100644 --- a/pg2mysql.inc.php +++ b/pg2mysql.inc.php @@ -82,6 +82,8 @@ function pg2mysql_large($infilename, $outfilename) $linenum=0; $inquotes=false; $first=true; + $batchcount=0; + $BATCH_CAPACITY=10000; echo "Filesize: ".formatsize($fs)."\n"; while ($instr=fgets($infp)) { @@ -100,16 +102,20 @@ function pg2mysql_large($infilename, $outfilename) } if ($linenum%10000 == 0) { - $currentpos=ftell($infp); + $currentpos=ftell($infp)+$linenum; $percent=round($currentpos/$fs*100); $position=formatsize($currentpos); printf("Reading progress: %3d%% position: %7s line: %9d sql chunk: %9d mem usage: %4dM\r", $percent, $position, $linenum, $chunkcount, $memusage); } - $currentpos=ftell($infp); + $currentpos=ftell($infp)+$linenum;// windows OS \n\r line-end character ftell isn't equals filesize. $progress=$currentpos/$fs; + if(startsWith($instr, "INSERT INTO") || startsWith($instr, "CREATE TABLE")){ + $batchcount++; + } + // print_r("currentpos:$currentpos".",fs:$fs,"."progress:".$progress); - if ($progress == 1.0 || (strlen($instr)>3 && ($instr[$len-3]==")" && $instr[$len-2]==";" && $instr[$len-1]=="\n") && $inquotes==false)) { + if ($progress == 1.0 || (strlen($instr)>3 && ($instr[$len-3]==")" && $instr[$len-2]==";" && $instr[$len-1]=="\n") && $inquotes==false && $batchcount>0 && $batchcount%$BATCH_CAPACITY==0)) { $chunkcount++; if ($linenum%10000==0) { $percent=round($progress*100); @@ -129,6 +135,7 @@ function pg2mysql_large($infilename, $outfilename) $first=false; $pgsqlchunk=array(); $mysqlchunk=""; + $batchcount=0; } } echo "\n\n"; @@ -173,7 +180,8 @@ function pg2mysql($input, $header=true) $output=""; } - $in_create_table = $in_insert = $exclude_content = false; + $in_create_table = $in_insert = $exclude_content = $in_same_insert_table_prefix = false; + $pre_insert_prefix=""; $linenumber=0; $tbl_extra=""; @@ -391,11 +399,37 @@ function pg2mysql($input, $header=true) $after=$v1.' UNHEX(\''.$clob_c.'\') '.$v3; } } - - $output.=$before."VALUES".$after; - while (substr($lines[$linenumber], -3, -1)!=");" || $inquotes) { + $next_line = isset($lines[$linenumber+1])?$lines[$linenumber+1]:""; + $next_before=""; + if (substr($next_line, 0, 11)=="INSERT INTO"){ + list($next_before, $next_after)=explode("VALUES", $next_line, 2); + $next_before=str_replace("\"", "`", $next_before); + } + + if($before === $next_before){ + $after_val = trim($after); + if(endsWith($after_val, ');')){ + if($in_same_insert_table_prefix === true){ + $output.=substr($after_val,0,strlen($after_val)-1).","; + }else{ + $output.=$before."VALUES ".substr($after_val,0,strlen($after_val)-1).","; + $in_same_insert_table_prefix = true; + } + }else{ + $output.=$before."VALUES".$after; + } + }else{ + if($in_same_insert_table_prefix === true){ + $output.=$after; + }else{ + $output.=$before."VALUES".$after; + } + $in_same_insert_table_prefix = false; + } + + while (isset($lines[$linenumber]) && (substr($lines[$linenumber], -3, -1)!=");" || $inquotes)) { $linenumber++; - $line=$lines[$linenumber]; + $line=isset($lines[$linenumber])?$lines[$linenumber]:""; //in after, we need to watch out for escape format strings, ie (E'escaped \r in a string'), and ('bla',E'escaped \r in a string') //ugh i guess its possible these strings could exist IN the data as well, but the only way to solve that is to process these lines one character From 59a982adf01da2a6167ef5ab1334b5ab744ec7cf Mon Sep 17 00:00:00 2001 From: Yangph <814773976@qq.com> Date: Thu, 25 Mar 2021 18:00:47 +0800 Subject: [PATCH 3/4] Support different line breaks: \r\n and \n, files under unix and windows OS. --- pg2mysql.inc.php | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/pg2mysql.inc.php b/pg2mysql.inc.php index 4b7ba37..83bf3be 100644 --- a/pg2mysql.inc.php +++ b/pg2mysql.inc.php @@ -75,6 +75,7 @@ function pg2mysql_large($infilename, $outfilename) $fs=filesize($infilename); $infp=fopen($infilename, "rt"); $outfp=fopen($outfilename, "wt"); + $infp_binary=fopen($infilename, "rb"); //we read until we get a semicolon followed by a newline (;\n); $pgsqlchunk=array(); @@ -84,7 +85,13 @@ function pg2mysql_large($infilename, $outfilename) $first=true; $batchcount=0; $BATCH_CAPACITY=10000; - echo "Filesize: ".formatsize($fs)."\n"; + $fileformat=""; + + $first_line=fgets($infp_binary); + $fileformat = (substr($first_line, -2) === "\r\n") ? "windows" : "unix"; + fclose($infp_binary); + + echo "Filesize: ".formatsize($fs).", Fileformat: ".$fileformat."\n"; while ($instr=fgets($infp)) { $linenum++; @@ -100,20 +107,20 @@ function pg2mysql_large($infilename, $outfilename) $inquotes=true; } } - + $currentpos=ftell($infp); + if($fileformat === "windows"){ + $currentpos=$currentpos+$linenum;// windows OS \n\r line-end character ftell isn't equals filesize. + } if ($linenum%10000 == 0) { - $currentpos=ftell($infp)+$linenum; $percent=round($currentpos/$fs*100); $position=formatsize($currentpos); printf("Reading progress: %3d%% position: %7s line: %9d sql chunk: %9d mem usage: %4dM\r", $percent, $position, $linenum, $chunkcount, $memusage); - } - - $currentpos=ftell($infp)+$linenum;// windows OS \n\r line-end character ftell isn't equals filesize. + } $progress=$currentpos/$fs; if(startsWith($instr, "INSERT INTO") || startsWith($instr, "CREATE TABLE")){ $batchcount++; } - // print_r("currentpos:$currentpos".",fs:$fs,"."progress:".$progress); + // printf("currentpos: %7s, fs: %7s, progress: %7s \n\n", $currentpos, $fs, $progress); if ($progress == 1.0 || (strlen($instr)>3 && ($instr[$len-3]==")" && $instr[$len-2]==";" && $instr[$len-1]=="\n") && $inquotes==false && $batchcount>0 && $batchcount%$BATCH_CAPACITY==0)) { $chunkcount++; From 52c380fb0ea50f7a195b262902d74da06d3ea354 Mon Sep 17 00:00:00 2001 From: Yangph <814773976@qq.com> Date: Tue, 13 Apr 2021 18:10:28 +0800 Subject: [PATCH 4/4] Fix conversion incomplete when progress not equals 1, insert include bytea content don't merge. --- pg2mysql.inc.php | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/pg2mysql.inc.php b/pg2mysql.inc.php index 83bf3be..14c7e3e 100644 --- a/pg2mysql.inc.php +++ b/pg2mysql.inc.php @@ -91,7 +91,7 @@ function pg2mysql_large($infilename, $outfilename) $fileformat = (substr($first_line, -2) === "\r\n") ? "windows" : "unix"; fclose($infp_binary); - echo "Filesize: ".formatsize($fs).", Fileformat: ".$fileformat."\n"; + echo "Filesize: ".formatsize($fs)."(".$fs."), Fileformat: ".$fileformat."\n"; while ($instr=fgets($infp)) { $linenum++; @@ -145,6 +145,29 @@ function pg2mysql_large($infilename, $outfilename) $batchcount=0; } } + + // when sql files include \r ,the last ftell not equals filesize and $progress=($currentpos/$fs) != 1. + if(!empty($pgsqlchunk)){ + $chunkcount++; + $percent=round(1*100); + $position=formatsize($currentpos); + printf("Last processing(ftell not equals filesize) progress: %3d%% position: %7s line: %9d sql chunk: %9d mem usage: %4dM\n", $percent, $position, $linenum, $chunkcount, $memusage); + /* + echo "sending chunk:\n"; + echo "=======================\n"; + print_r($pgsqlchunk); + echo "=======================\n"; + */ + + $mysqlchunk=pg2mysql($pgsqlchunk, $first); + fputs($outfp, $mysqlchunk); + + $first=false; + $pgsqlchunk=array(); + $mysqlchunk=""; + $batchcount=0; + } + echo "\n\n"; printf("Completed! %9d lines %9d sql chunks\n\n", $linenum, $chunkcount); @@ -408,12 +431,13 @@ function pg2mysql($input, $header=true) } $next_line = isset($lines[$linenumber+1])?$lines[$linenumber+1]:""; $next_before=""; - if (substr($next_line, 0, 11)=="INSERT INTO"){ + if ($pos_x === false && substr($next_line, 0, 11)=="INSERT INTO"){ list($next_before, $next_after)=explode("VALUES", $next_line, 2); $next_before=str_replace("\"", "`", $next_before); } - - if($before === $next_before){ + + // insert include bytea content don't merge, maby sql script content is too long(max_allowed_packet). + if($pos_x === false && $before === $next_before){ $after_val = trim($after); if(endsWith($after_val, ');')){ if($in_same_insert_table_prefix === true){