Loading
Rubén Laguna home

Merging two TikiWiki's

Comments

I’ve created a ruby script to merge the content of a TikiWiki into another one. This script will read the tiki_pages, tiki_history and tiki_links tables from the MySQL backend of the source TikiWiki and import the contents into the destination TikiWiki. The script is ‘safe’, meaning that it will not overwrite any page if it already exists in the destination. The history of the page will be merged as well if the page exists at the destination. The script doesn’t work with page attachments yet.

Here is the script, be sure to change the script parameters depending on your user/password on the databases (search oldtiki and newtiki variables in the script) and the charsets (search SET_CHARSET_NAME in the script).

#
# script to merge two tikiwiki s
#
require "rubygems"
require "mysql"
require "log4r"
require "iconv"
include Log4r

def createInsert(dbh,  table,  row ) 
      insertSt = "insert into #{table} "
      col ="("
      values = "VALUES("
      
      row.each { |key, value|
        col += "#{key},"
        if value and not value.empty?  then
          values +=  "'#{dbh.escape_string(value)}',"
        else 
          values += "NULL,"
        end    
      }
      col = col[0..-2] + ")"      
      values = values[0..-2] + ")"
      insertSt += col + " " + values
      insertSt +";"
      insertSt
end

$conflictNotif = {}
def addToConflictNotifList (user, pagename) 
  
  pageList = $conflictNotif[user]
  if not pageList then 
    $conflictNotif[user] = []
    pageList = $conflictNotif[user]
  end
  pageList << pagename        
end

$notif = {}
def addToNotifList (user, pagename) 
  
  pageList = $notif[user]
  if not pageList then 
    $notif[user] = []
    pageList = $notif[user]
  end
  pageList << pagename        
end

def printToFile (path, hash) 
  fileConflicts   = File.open(path, File::WRONLY|File::TRUNC|File::CREAT) 
  hash.each { |user, list|
     fileConflicts.print "#{user}: "
     fileConflicts.print list.join(',')
     fileConflicts.print "\n"
  }
  fileConflicts.close
end

Log4r::Logger.root.level = Log4r::DEBUG

l = Logger.new 'tiki_pages'
l.outputters = Outputter.stdout,FileOutputter.new("tiki_pages", :filename => "tiki_pages.txt", :trunc => true, :level => Log4r::DEBUG)

lh = Logger.new 'tiki_history'

lh.outputters = Outputter.stdout,FileOutputter.new("tiki_history", :filename => "tiki_history.txt", :trunc => true, :level => Log4r::DEBUG)

lm = Logger.new 'mysqlstatements'
lm.outputters = FileOutputter.new("sqlfile", :filename => "commands.sql", :trunc => true, :level=>Log4r::DEBUG)

l.debug "Starting migration script"

oldtiki="sql1.example.com"
olduser="root"
olddbname="tiki"
oldpwd="secret"

newtiki="sql2.example.com"
newuser="root"
newdbname="rd_tiki_wiki"
newpwd="secret"

#select login,email from users_users;
#mysql> select tiki_pages.pagename,users_users.email from tiki_pages,users_users where tiki_pages.user=users_users=login;


begin
  #connect to the MySQL server
  l.debug "trying to connect..."
  dbhold = Mysql.init
  dbhold.options(Mysql::SET_CHARSET_DIR, "/root/tikiWikiScript/charsets/")
  dbhold.options(Mysql::SET_CHARSET_NAME, "utf8")
  dbhold.real_connect(oldtiki,olduser, oldpwd,olddbname)
  # get server version string and display it
  l.info "#{oldtiki} mysql version: " + dbhold.get_server_info

  dbhnew = Mysql.init
  dbhnew.options(Mysql::SET_CHARSET_DIR, "/root/tikiWikiScript/charsets/")
  dbhnew.options(Mysql::SET_CHARSET_NAME, "latin1")
  dbhnew.real_connect(newtiki,newuser,newpwd,newdbname,3307)
  l.info "#{newtiki} mysql version: " + dbhnew.get_server_info

  l.info "retrieving all pagenames from old tiki..."
  
  res= dbhold.query("select * from tiki_pages");
  
  num_pages_old_tiki = res.num_rows
  l.info "number of pages in old tiki : #{num_pages_old_tiki}"
  insertions = 0
  conflicts = 0
  history_updates = 0
  history_version_conflicts = 0
  while row = res.fetch_hash do
    #l.debug row
    pagename = row["pageName"]
    lastModif = row["lastModif"]
    user = row["user"]
    creator = row["creator"]
    
    l.debug "checking pageName='#{pagename}'"
    escapePageName = dbhnew.escape_string(pagename)
    l.debug "escapePageName='#{escapePageName}'"
    query = "select lastModif,user,creator from tiki_pages where pageName='#{escapePageName}'"
    res2 = dbhnew.query(query)
    
    if (res2.num_rows == 0) then
      pageid =  dbhnew.query("select max(page_id)+1 from tiki_pages").fetch_row[0]
      
      l.info "Creating page '#{pagename}' with pageid #{pageid}"        
      
      row["page_id"] = pageid;
      insertSt = createInsert(dbhnew,  "tiki_pages", row ) 
      lm.debug "#{insertSt}"   
      
      dbhnew.query "#{insertSt}"   
      
      addToNotifList( user, pagename);
      addToNotifList( creator, pagename) unless user == creator
      insertions += 1
    else
      if (res2.num_rows > 1) then
        l.error "database invariant violated: entry for pagename #{pagename} not found in the new tiki"
        fail
      end
      row2 = res2.fetch_hash
      lastModif2 = row2["lastModif"]
      l.debug "Comparing last modification of page #{pagename} in old tiki with same page in new tiki"
      if (lastModif > lastModif2) then                
        l.warn "pagename \"#{pagename}\" is newer in #{oldtiki} than in #{newtiki}"
        l.warn "we should send an email to #{user} and #{creator}"
        
        addToConflictNotifList( user, pagename)
        addToConflictNotifList( creator, pagename) unless user == creator
        
        conflicts += 1
      end
      
    end
    res2.free
    historyRes =  dbhold.query("select * from tiki_history where pageName='#{escapePageName}' ORDER BY version ASC")
    anyHistoryUpdate = false;
    while oldTikiHistoryEntry = historyRes.fetch_hash do
      m = oldTikiHistoryEntry["lastModif"]
      historyRes2 = dbhnew.query("select pageName from tiki_history where pageName='#{escapePageName}' and lastModif='#{m}'")
      if (historyRes2.num_rows == 0) then
      # this history entry was not present we must add new entry
        version = oldTikiHistoryEntry["version"]        
        lh.info "adding version #{version} last updated on #{oldTikiHistoryEntry["lastModif"]} to tiki_history for page '#{pagename}'"
        historyRes3 = dbhnew.query("select lastModif from tiki_history where pageName='#{escapePageName}' and version='#{version}'")
        if (historyRes3.num_rows == 0) then
          lh.debug "version: #{version} not present in #{newtiki}. Insert the entry"
        else
          lh.debug "version: #{version} of '#{pagename}' already exists in #{newtiki}. Now we have to insert the entry in the middle."
          dbhnew.query("update tiki_history set version=version+1 where pageName='#{escapePageName}' and version >= '#{version}' ORDER BY version DESC")
          history_version_conflicts += 1
        end
        insertSt = createInsert(dbhnew, "tiki_history",  oldTikiHistoryEntry ) 
        lm.info "#{insertSt}"
        dbhnew.query "#{insertSt}"
        history_updates += 1
        anyHistoryUpdate = true;
      else 
        lh.debug "The history entry with modification date #{m} of pageName=#{pagename} was already present in the tiki_history of #{newtiki}. Skipping it"
      end
    end
    l.info "History of page #{pagename} updated." if anyHistoryUpdate
    historyRes.free
  end
  l.info "number of pages in old tiki : #{num_pages_old_tiki}"
  l.info "number of insertions in the new wiki #{insertions}"
  l.info "number of conflicts in the new wiki #{conflicts}"
  l.info "number of history updates #{history_updates}"
  l.info "number of history version conflicts #{history_version_conflicts}"
  
  dateString = Time.now.to_s
  filename = "conflicts"+dateString+".txt"
  printToFile(filename, $conflictNotif)
  l.info "#{filename} created"
  filename = "notifications"+dateString+".txt"
  printToFile(filename, $notif)
  l.info "#{filename} created"
  
  res.free
  
  #Update tiki_links that is responsible for the backlinks feature
  link_insertions = 0
  l.debug "retrieve all links"
  linkRes = dbhold.query "select * from tiki_links"
  while linkRow = linkRes.fetch_hash do
    fromPage = dbhnew.escape_string(linkRow["fromPage"])
    toPage = dbhnew.escape_string(linkRow["toPage"])
    #check if this link already exists
    checkRes = dbhnew.query "select * from tiki_links where fromPage='#{fromPage}' and toPage='#{toPage}'"
    if (checkRes.num_rows == 0) then 
      insertSt = createInsert(dbhnew, "tiki_links", linkRow)
      lm.info "#{insertSt}"
      dbhnew.query insertSt
      link_insertions += 1      
    end    
    checkRes.free
  end
  linkRes.free
  l.info "number of link_insertions #{link_insertions}"


rescue Mysql::Error => e
  l.error "Error code: #{e.errno}"
  l.error "Error message: #{e.error}"
  l.error "Error SQLSTATE: #{e.sqlstate}" if e.respond_to?("sqlstate")
ensure
  # disconnect from server
  l.debug "disconnecting from database"
  dbhold.close if dbhold
  dbhnew.close if dbhnew
end

The code is also available as a gist

You’ll need to install rubygems and Mysql module gem first.

blog comments powered by Disqus