From af74c3dffa8d7dbb5d6de4aad72f402970698490 Mon Sep 17 00:00:00 2001 From: "Laurence Alexander Hurst (IT Services)" Date: Thu, 15 Aug 2019 15:52:13 +0100 Subject: [PATCH 1/9] Add support for .. (which is the same as ..) in mediawiki. --- wikicontent.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wikicontent.py b/wikicontent.py index 737adc1..b4ab304 100644 --- a/wikicontent.py +++ b/wikicontent.py @@ -130,7 +130,8 @@ def convert(style, context, trailing_newline): "sup" : ("",""), "big" : ("**", "**"), # not in dokuwiki so use bold "-" : ("
", "
"), # use dokuwikis Blockquote Plugin for this - "u" : ("", "") #
already handled in TagNode @visitor + "u" : ("", ""), #
already handled in TagNode @visitor + "s" : ("", "") # According to the mediawiki docs .. is synonymous with ... (the 2nd form is the same in dokuwiki) }.get(style.caption, None) if formatter is None: print("WARNING: Ignoring unknown formatter %s" % style.caption) From da32e652a305dd4d3cee2bff2cb7bb5aa14583e3 Mon Sep 17 00:00:00 2001 From: "Laurence Alexander Hurst (IT Services)" Date: Thu, 15 Aug 2019 16:04:19 +0100 Subject: [PATCH 2/9] Added support for strikethrough text with tag --- wikicontent.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wikicontent.py b/wikicontent.py index b4ab304..8ec06f0 100644 --- a/wikicontent.py +++ b/wikicontent.py @@ -131,6 +131,7 @@ def convert(style, context, trailing_newline): "big" : ("**", "**"), # not in dokuwiki so use bold "-" : ("
", "
"), # use dokuwikis Blockquote Plugin for this "u" : ("", ""), #
already handled in TagNode @visitor + "del": ("", ""), "s" : ("", "") # According to the mediawiki docs .. is synonymous with ... (the 2nd form is the same in dokuwiki) }.get(style.caption, None) if formatter is None: From df663b5197dcce0889dd3b2bc2112024b944efd2 Mon Sep 17 00:00:00 2001 From: "Laurence Alexander Hurst (IT Services)" Date: Thu, 15 Aug 2019 16:13:22 +0100 Subject: [PATCH 3/9] Added content test for del and s tags. --- tests/del_tags/dokuwiki.txt | 6 ++++++ tests/del_tags/mediawiki.txt | 6 ++++++ 2 files changed, 12 insertions(+) create mode 100644 tests/del_tags/dokuwiki.txt create mode 100644 tests/del_tags/mediawiki.txt diff --git a/tests/del_tags/dokuwiki.txt b/tests/del_tags/dokuwiki.txt new file mode 100644 index 0000000..4132c29 --- /dev/null +++ b/tests/del_tags/dokuwiki.txt @@ -0,0 +1,6 @@ +del should be fine on a plain paragraph. + +As should s, but converted to del. + +Within pragraphs del tags should come through unchanged, However the MediaWiki specific s tags should be converted to del. + diff --git a/tests/del_tags/mediawiki.txt b/tests/del_tags/mediawiki.txt new file mode 100644 index 0000000..8f8e4a6 --- /dev/null +++ b/tests/del_tags/mediawiki.txt @@ -0,0 +1,6 @@ +del should be fine on a plain paragraph. + +As should s, but converted to del. + +Within pragraphs del tags should come through unchanged, However the MediaWiki specific s tags should be converted to del. + From 743ce3141d85f5a9318d86e5923665309ba0aa0b Mon Sep 17 00:00:00 2001 From: "Laurence Alexander Hurst (IT Services)" Date: Thu, 15 Aug 2019 16:15:05 +0100 Subject: [PATCH 4/9] Missed a change in last commit - del belongs in Tag not Style --- wikicontent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wikicontent.py b/wikicontent.py index 8ec06f0..8c47098 100644 --- a/wikicontent.py +++ b/wikicontent.py @@ -131,8 +131,7 @@ def convert(style, context, trailing_newline): "big" : ("**", "**"), # not in dokuwiki so use bold "-" : ("
", "
"), # use dokuwikis Blockquote Plugin for this "u" : ("", ""), #
already handled in TagNode @visitor - "del": ("", ""), - "s" : ("", "") # According to the mediawiki docs .. is synonymous with ... (the 2nd form is the same in dokuwiki) + "s" : ("", "") # According to the mediawiki docs .. is synonymous with ... (although one is treates as a tag and one a style in the parser??) }.get(style.caption, None) if formatter is None: print("WARNING: Ignoring unknown formatter %s" % style.caption) @@ -243,6 +242,7 @@ def convert(tag, context, trailing_newline): "tt" : ("''", "''"), "ref" : ("((","))"), # references converted to footnotes "code" : ("",""), + "del": ("", ""), } if tag.tagname in simple_tagitems: pre,post = simple_tagitems[tag.tagname] From 98a4c18495250300e0495b1c822beaae9fc62f68 Mon Sep 17 00:00:00 2001 From: "Laurence Alexander Hurst (IT Services)" Date: Thu, 15 Aug 2019 16:26:14 +0100 Subject: [PATCH 5/9] Added support for table captions. --- wikicontent.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/wikicontent.py b/wikicontent.py index 737adc1..d7e9ae7 100644 --- a/wikicontent.py +++ b/wikicontent.py @@ -273,6 +273,19 @@ def convert(node, context, trailing_newline): # anything else is inline term return "$" + node.math + "$" + +@visitor.when(Caption) +def convert(node, context, trailing_newline): + """ + Convert table captions to level 5 headings. + + Because we ignore the tags when converting to dokuwiki, + we can get away with simply converting to a heading without + worrying about it being inside
(which
should be) + in the rendered HTML. + """ + return "== %s ==" % convert_children(node, context) + # catchall for Node, which is the parent class of everything above @visitor.when(Node) def convert(node, context, trailing_newline): From 4e96c53a4d0a6a94e4169e44ae5001ce901b3191 Mon Sep 17 00:00:00 2001 From: "Laurence Alexander Hurst (IT Services)" Date: Thu, 15 Aug 2019 16:29:48 +0100 Subject: [PATCH 6/9] Changed to convert captions to bold text and add tests. Thought bold less likely to be controversial than converting to a specific level heading. --- tests/tables/dokuwiki.txt | 6 ++++++ tests/tables/mediawiki.txt | 14 ++++++++++++++ wikicontent.py | 6 +++--- 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 tests/tables/dokuwiki.txt create mode 100644 tests/tables/mediawiki.txt diff --git a/tests/tables/dokuwiki.txt b/tests/tables/dokuwiki.txt new file mode 100644 index 0000000..9fae72c --- /dev/null +++ b/tests/tables/dokuwiki.txt @@ -0,0 +1,6 @@ +Taken from the MediaWiki examples at https://www.mediawiki.org/wiki/Help:Tables + +** Food complements ** +| Orange| Apple | +| Bread| Pie | +| Butter| Ice cream | diff --git a/tests/tables/mediawiki.txt b/tests/tables/mediawiki.txt new file mode 100644 index 0000000..0bcb9d1 --- /dev/null +++ b/tests/tables/mediawiki.txt @@ -0,0 +1,14 @@ +Taken from the MediaWiki examples at https://www.mediawiki.org/wiki/Help:Tables + +{| class="wikitable" +|+Food complements +|- +|Orange +|Apple +|- +|Bread +|Pie +|- +|Butter +|Ice cream +|} diff --git a/wikicontent.py b/wikicontent.py index d7e9ae7..5058f01 100644 --- a/wikicontent.py +++ b/wikicontent.py @@ -277,14 +277,14 @@ def convert(node, context, trailing_newline): @visitor.when(Caption) def convert(node, context, trailing_newline): """ - Convert table captions to level 5 headings. + Convert table captions to bold paragraph preceeding the table. Because we ignore the tags when converting to dokuwiki, - we can get away with simply converting to a heading without + we can get away with simply converting to bold text without worrying about it being inside
(which
should be) in the rendered HTML. """ - return "== %s ==" % convert_children(node, context) + return "** %s **\n" % convert_children(node, context) # catchall for Node, which is the parent class of everything above @visitor.when(Node) From 311d221115783bde49d475bd6158d36526f6d3b7 Mon Sep 17 00:00:00 2001 From: "Laurence Alexander Hurst (IT Services)" Date: Tue, 20 Aug 2019 16:02:38 +0100 Subject: [PATCH 7/9] Fixed bugs with pagesnames that have ':' or begin with '/' or space. --- dokuwiki.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dokuwiki.py b/dokuwiki.py index 724286e..6b116c1 100644 --- a/dokuwiki.py +++ b/dokuwiki.py @@ -192,7 +192,14 @@ def make_dokuwiki_pagename(mediawiki_name): Any namespacing that is in the form of a / is replaced with a : """ result = mediawiki_name.replace(" ","_") + # We have pages that have ':' in them - replace with underscores + result = result.replace(':', '_') result = names.clean_id(camel_to_underscore(result)).replace("/",":") + # Some of our mediawiki page names begin with a '/', which results in os.path.join assuming the page is an absolute path. + if result[0] == ':': + result = result.lstrip(':') + # Fix any pages that began with a space, because that breaks dokuwiki + result = result.replace(":_", ":") result = codecs.encode(result, sys.getfilesystemencoding(), "replace") return result From 0f0f070d8cd4e605d457e7c460ea2791c71b5c3a Mon Sep 17 00:00:00 2001 From: "Laurence Alexander Hurst (IT Services)" Date: Tue, 20 Aug 2019 16:13:20 +0100 Subject: [PATCH 8/9] Print url of downloaded image - for debugging failed fetches --- dokuwiki.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dokuwiki.py b/dokuwiki.py index 724286e..43a7636 100644 --- a/dokuwiki.py +++ b/dokuwiki.py @@ -56,7 +56,7 @@ def write_images(self, images, file_namespace, http_user=None, http_pass=None): ensure_directory_exists(filemeta) for image in images: # download the image from the Mediawiki server - print("Downloading %s..." % image['name']) + print("Downloading %s... (%s)" % (image['name'], image['url'])) r = requests.get(image['url'], auth=auth) # write the actual image out to the data/file directory name = make_dokuwiki_pagename(image['name']) From b1d1215baa7217edb26d61639c8932359fdbf1a5 Mon Sep 17 00:00:00 2001 From: "Laurence Alexander Hurst (IT Services)" Date: Tue, 20 Aug 2019 16:17:17 +0100 Subject: [PATCH 9/9] Working on migrating to a sub namespace in dokuwiki --- dokuwiki.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dokuwiki.py b/dokuwiki.py index 724286e..7f5231f 100644 --- a/dokuwiki.py +++ b/dokuwiki.py @@ -50,16 +50,20 @@ def write_images(self, images, file_namespace, http_user=None, http_pass=None): """ auth=None if http_user is None else HTTPBasicAuth(http_user, http_pass) file_namespace = file_namespace.lower() - filedir = os.path.join(self.data, "media", file_namespace) - ensure_directory_exists(filedir) - filemeta = os.path.join(self.data, "media_meta", file_namespace) - ensure_directory_exists(filemeta) for image in images: # download the image from the Mediawiki server print("Downloading %s..." % image['name']) r = requests.get(image['url'], auth=auth) # write the actual image out to the data/file directory name = make_dokuwiki_pagename(image['name']) + if ':' in name: + this_file_namespace, name = name.split(':') + else: + this_file_namespace = file_namespace + filedir = os.path.join(self.data, "media", this_file_namespace) + ensure_directory_exists(filedir) + filemeta = os.path.join(self.data, "media_meta", this_file_namespace) + ensure_directory_exists(filemeta) imagepath = os.path.join(filedir, name) with open(imagepath, "wb") as f: f.write(r.content) @@ -69,7 +73,7 @@ def write_images(self, images, file_namespace, http_user=None, http_pass=None): # write a .changes file out to the media_meta/file directory changepath = os.path.join(filemeta, "%s.changes" % name) with codecs.open(changepath, "w", "utf-8") as f: - fields = (str(timestamp), "::1", "C", u"%s:%s"%(file_namespace,name), "", "created") + fields = (str(timestamp), "::1", "C", u"%s:%s"%(this_file_namespace,name), "", "created") f.write(u"\t".join(fields) + "\r\n") # aggregate all the new changes to the media_meta/_media.changes file self._aggregate_changes(os.path.join(self.data, "media_meta"), "_media.changes") @@ -193,6 +197,8 @@ def make_dokuwiki_pagename(mediawiki_name): """ result = mediawiki_name.replace(" ","_") result = names.clean_id(camel_to_underscore(result)).replace("/",":") + # Add custom namespace + result = "old_wiki:" + result result = codecs.encode(result, sys.getfilesystemencoding(), "replace") return result