#!/usr/bin/python # # Script to create a confluence page # from sets import Set from urllib import * import cPickle import re import os import sys import urllib import xmlrpclib # This script is set up to create a confluence space corresponding # to your TWiki web (i.e. space) name. It will also use the # first page you create as the content for the space "Home" # -- otherwise confluence creates a "Home" page that isn't # linked to anything. # # Other notes: The first time you run this, it creates # a persistent copy of the entire twiki web you read in -- # which is then used for subsequent runs. This simplifies # running it repeatedly to tweak the transformation regexs' # But it prevents you from running the script against # multiple twiki webs. Just delete the ".pickle" file or # delete that bit of code in writeToConfluence() # # # Set these up to find your pages # twikiurlbase = "https://some.twikiwiki.com" twikibase = "/twiki/bin/view" twikistartspace = "MWdev" twikistartconcept = "WebHome" confluenceurlbase = "https://some.confluencewiki.com/confluence" twikilogin = "username" twikipass = "mypass" confluencelogin = "username" confluencepass = "mypass" class Page: def __init__( self, spaceName ): self._spaceName = spaceName self._content = None self._references = Set() self._confluenceRefName = self.confluenceName(spaceName[1]) self._imagelist = [] self._id = -1 self.parent = None self.blank = False self.listContext = "" def __repr__(self): return self._spaceName.__repr__() def __str__(self): return str(self._spaceName) def __eq__(self, other) : return self._spaceName.__eq__(other._spaceName) def __ne__(self, other) : return self._spaceName.__ne__(other._spaceName) def __hash__(self): return self._spaceName.__hash__() # Compile some patterns for efficiency noConceptPattern = re.compile(".*NOTE: This Wiki topic does not exist yet", re.DOTALL ) rawWikiContentPattern = \ re.compile("(.*)
.*", re.IGNORECASE | re.DOTALL ) # Parts of wiki content not searched for references # Some folks coded verbatim on our site very weirdly... ignoredTextPatterns = [ re.compile("(?s)", r"\n\n" ),
(r"
|%BR%", r"\\"),
(r"([^A-Z])!([A-Z])", r"\1\2" ), # !WikiWord escaping in TWiki, isn't needed in Confluence
# Some folks at our site did very weird things in the wiki content for some reason
(r"<verbatim>", r"{code}"),
(r"</verbatim>", r"{code}"),
(r"</?pre>", r"{noformat}"),
(r"</?nop>", r""),
(r"<p>", r"\n\n" ),
(r"<br>|%BR%", r"\\\\"),
# Replace some of the most-used TWiki icons with Confluence equivalents
(r"%T%", r"(on)" ),
(r"%Y%", r"(/)" ),
# % symbols are stored in the pickle as HTML/XML escape sequences? # Why?
(r"%Y%", r"(/)" ),
(r"%X%", r"(!)" ) ]
if self._content == None:
return None
transformedContent = self._content
for transform in transformations:
transformedContent = \
re.sub(transform[0], transform[1], transformedContent)
return transformedContent
class CachedURLopener(FancyURLopener):
def get_user_passwd(self, host, realm, clear_cache):
return (twikilogin, twikipass)
class Mapper:
def __init__(self):
self._mappedPages = Set()
self._orderedPages = []
self.opener = CachedURLopener()
# Add pages to a map, and maintains them in order read
def addPage(self, page):
# Hmmpf. add() doesn't have the nice property of telling if it added
if page in self._mappedPages:
return False
self._mappedPages.add( page )
self._orderedPages.append( page )
return True
def process(self, spaceName, parent):
p = Page(spaceName)
p._parent = parent
if self.addPage( p ):
try:
fd = self.opener.open(twikiurlbase + twikibase + "/" +
spaceName[0] + "/" + spaceName[1] + "?raw=1")
print "Reading " + str(p._spaceName)
content = fd.read()
fd.close()
p.setContent(content)
# Recurse into pages referred to by current page
for ref in p._references:
self.process( ref, p)
except:
# TWiki allows lots of complex ways to link, and also lots of
# complex ways to *not* link (example, putting text inside a
# =monospace= line). So, if we fail to open a page, we assume
# it's just a mistake with trying to follow a non-reference.
print "Problem reading",p._spaceName,"... probably wasn't a real reference."
def writeToConfluence(self):
# Write the Set of twiki pages into confluence
s = xmlrpclib.Server(confluenceurlbase + "/rpc/xmlrpc")
token = s.confluence1.login(confluencelogin, confluencepass)
for p in self._orderedPages:
space = p._spaceName[0]
name = p._confluenceRefName
existingspace = None
newspace = None
homepage = None
try:
existingspace = s.confluence1.getSpace(token, space)
except xmlrpclib.Fault:
# Omit the description and homepage. Maybe make homepage
# first page created?
spaceobj = {
'key': space,
'name': space,
'url' : confluenceurlbase + "/display/" + space,
}
newspace = s.confluence1.addSpace(token, spaceobj)
homepage = newspace['homePage']
content = p.getTransformedContent()
if (content):
print "Storing " + str(p._spaceName) + " as '" + name + "'"
page = {
'creator':'twikiToConfluence',
'url': confluenceurlbase + "/display/" +
space + "/" + re.sub(' ','+',name),
'title': name,
'space': space,
'content': content }
# Changed so first page created in a newspace
# is the home page. Can't change title
# so links back to "Web Home" will be broken.
if homepage:
page = s.confluence1.getPage(token, homepage)
print page
page['url'] = confluenceurlbase + "/display/" + \
space + "/" + re.sub(' ','+',name)
# This doesn't seem to be allowed.
#page['title'] = name
page['content'] = content
if p._parent:
if p._parent._id != -1 :
page['parentId'] = p._parent._id
page = s.confluence1.storePage(token, page)
p._id = page.get("id")
for image in p._imagelist:
# Doh. There is no rpc for attachments
print "Attach the image from " + \
twikiurlbase + twikibase + "/" + \
image[0] + "/" + image[1] + "/" + image[2]
print " to the Confluence page " + \
p._confluenceRefName + " as " + image[2]
# #1 Get the image
# fd = self.opener.open(twikiurlbase + twikibase + "/" +
# image[0] + "/" + image[1] + "/" + image[2] )
# imagedata = fd.read()
# fd.close()
# Use xmlrpc to add an attachment with content imagedata
# Doh. There is no rpc for attachments
#
# Create a Mapper that loads the start page, and follows any references it finds
#
# Save/restore the loaded twiki in a pickle
# This saves the loaded twiki to allow tuning all the
# transformation regex's without reloading.
if os.path.exists("twikimapper.pickle"):
dumpfile = open("twikimapper.pickle","r")
twikimapper = cPickle.load(dumpfile)
dumpfile.close()
else:
twikimapper = Mapper()
print "connecting to TWiki..."
twikimapper.process((twikistartspace,twikistartconcept), None)
dumpfile = open("twikimapper.pickle","w")
cPickle.dump(twikimapper, dumpfile)
dumpfile.close()
#twikimapper = Mapper()
#twikimapper.process((twikistartspace,twikistartconcept), None)
print "connecting to Confluence..."
twikimapper.writeToConfluence()