File contents
#!/usr/bin/env python
# repozo.py - make backups of the ZODB Data.fs file.
# start:
# store file, size
#
# incremental:
# if file has shrunk, or existing file has changed:
# store complete new file # pack operation has happened.
# else:
# store all bytes from size to new_size
# recovery walks through files from newest to oldest (before recovery
# date) and stops when it finds a full file. It will then concat the full
# file and the deltas together.
# for more, see the usage string
# TODO:
# regexp to only find valid file names.
# allow gzipping of backup files.
# allow backup files in subdirectories.
# Notes:
#
# This is a really quick-n-dirty hack. It seems to work fine for me,
# but YMMV. For god's sake don't use this as an excuse to learn Python.
#
# $Id: repozo.py,v 1.2 2000/01/12 07:27:31 anthony Exp $
import os, sys, time, string
import md5
# config bits: this should be the repository directory.
saveDir = "/opt/zope/saveDir"
curFile = 'var/Data.fs'
recFile = 'Data.fs.recover'
verbose = 0
def str2hex(str):
l = map(lambda x,hex=hex,ord=ord:hex(ord(x))[-2:], str)
return string.join(l, '')
def md5bytes(filename, n):
" return md5 of first 'n' bytes of file "
READBLOCK = 100000L
m = md5.new()
fp = open(filename, 'rb')
i = 0 ; b = 1
while ( i + READBLOCK ) <= n:
b = fp.read(READBLOCK)
m.update(b)
i = i + READBLOCK
if i < n:
b = fp.read(n - i)
m.update(b)
fp.close()
return str2hex(m.digest())
#
def makeDeltaFile(file, start, n, outfile):
" copy 'n' bytes from 'file', offset 'start', and write to 'outfile' "
print "writing %d bytes from offset %d to %s" % ( n, start, outfile )
ifp = open(file, 'rb')
ifp.seek(start)
ofp = open(outfile, 'wb')
b = ifp.read(n)
print "delta: read %d bytes, wanted %d"%(len(b), n)
ofp.write(b)
ofp.close()
ifp.close()
def copyFile(file, outfile):
" copy contents of file to outfile "
print "copying from %s to %s"%(file, outfile)
ifp = open(file, 'rb')
ofp = open(outfile, 'wb')
b = ifp.read(100000)
ofp.write(b)
while b:
b = ifp.read(100000)
ofp.write(b)
ofp.close()
ifp.close()
def genFileName(typ):
import time
if typ == "full": ext = 'fs'
if typ == "delta": ext = 'deltafs'
t = time.gmtime(time.time())[:5]
t= t+(ext,)
return "%04d-%02d-%02d-%02d-%02d.%s"%t
def listSaveDir():
files = os.listdir(saveDir)
if files:
# XXXX todo: filter file names for acceptable patterns
# XXXX todo: handle gzipped files
# sort chronologically
files.sort()
files.reverse()
return files
def buildFileList(when=None):
" return a list of files needed to produce state at time 'when' "
if not when:
t = time.gmtime(time.time())[:5]
when="%04d-%02d-%02d-%02d-%02d"%t
if verbose:
print "looking for files earlier than", when
files = listSaveDir()
# now find a list of files to produce given state in repos.
needed = []
for f in files:
if f > when: continue # file is "after" when we want.
needed.append(f)
# XXXX gzip
if os.path.splitext(f)[-1] == '.fs':
if verbose:
print "found a full file, stopping"
break
needed.reverse()
needed = map(lambda x,j=os.path.join,d=saveDir: j(d,x), needed)
if verbose:
print "files needed to recover state as at", when
for f in needed:
print "\t",f
return needed
def concatFiles(files, outfile):
""" concatenate a bunch of files from the repository, output to 'outfile'
return the number of bytes written and the md5sum as a tuple """
if outfile: ofp = open(outfile, 'wb')
else: ofp = None
size = 0
m = md5.new()
for f in files:
ifp = open(f, 'rb')
b = ifp.read(100000)
m.update(b)
size = size + len(b)
if ofp: ofp.write(b)
while b:
b = ifp.read(100000)
m.update(b)
size = size + len(b)
if ofp: ofp.write(b)
if ofp: ofp.flush() # not really necessary, but ...
if ofp: ofp.close()
return ( size, str2hex(m.digest()) )
def doRecover(outfile, date):
repos_f = buildFileList(date)
if not repos_f:
if date:
usage("no files in repository before the given date (%s)"%date)
else:
usage("no files in repository!")
rsz, rmd5 = concatFiles(repos_f, outfile=outfile)
if verbose:
print "recovered to file %s"%outfile
print "recovered file %d bytes, checksum %s"%(rsz, rmd5)
def doUpdate(curFile, forceFull=0):
repos_f = buildFileList()
if not repos_f:
# init db
if verbose: print "nothing in repository, initialising..."
copyFile(curFile, os.path.join(saveDir, genFileName('full')))
else:
rsz, rmd5 = concatFiles(repos_f, outfile=None)
if verbose: print "repository state: %s bytes, md5: %s" % (rsz, rmd5)
fsz = os.stat(curFile)[6]
fmd5 = md5bytes(curFile, fsz)
cmd5 = md5bytes(curFile, rsz)
if verbose: print "current state: %s bytes, md5: %s" % ( fsz, fmd5 )
if verbose: print "first %d bytes of current: md5: %s" % ( rsz, cmd5 )
doFullFile = forceFull
doDelta = 0
if ( fsz == rsz ) and ( rmd5 == cmd5 ):
print "no changes, no action taken"
elif fsz < rsz:
if verbose: print "file shrunk: pack? Writing full file"
doFullFile = 1
else: # fsz > rsz
if rmd5 != cmd5:
if verbose: print "file changed: pack? Writing full file"
doFullFile = 1
else:
if verbose: print "additional %d bytes to be written"%(fsz-rsz)
doDelta = 1
if doFullFile:
copyFile(curFile, os.path.join(saveDir, genFileName('full')))
else:
if doDelta:
dfile = os.path.join(saveDir, genFileName('delta'))
if verbose: print "writing delta to", dfile
makeDeltaFile(curFile, rsz, fsz-rsz, dfile)
else:
if verbose: print "doing nothing"
def usage(mesg,r=1):
sys.stderr.write("""%s: %s
Usage:
-S --store backup current ZODB file
-R --recover restore stored ZODB file
Flags for --store and --recover:
-z file --zodb=file ZODB file (default %s)
-r dir --repository=dir repository directory (default %s)
-v --verbose verbose mode - say what's happening
Flags for --store:
-F --force force a full ZODB backup
Flags for --recover:
-D str --date=str recover state as at this date. str in format
yyyy-mm-dd[-hh[-mm]]
-o file --output=file write recovered ZODB to file (default %s)
""" % ( os.path.basename(sys.argv[0]), mesg, curFile, saveDir , recFile) )
sys.exit(r)
def main():
import getopt
storeMode = 0
recoverMode = 0
force = 0
date = None
global curFile, saveDir, recFile, verbose
args = sys.argv[1:]
optlist, args = getopt.getopt(args, 'SRz:r:vFD:o:',
[ 'store', 'recover', 'zodb=', 'repository=', 'verbose', 'force',
'date=', 'output=' ] )
if args:
usage("unrecognised arguments")
for opt, val in optlist:
if opt in ( '-S', '--store' ):
storeMode = 1
elif opt in ( '-R', '--recover' ):
recoverMode = 1
elif opt in ( '-z', '--zodb' ):
curFile = val
elif opt in ( '-r', '--repository' ):
saveDir = val
elif opt in ( '-v', '--verbose' ):
verbose = 1
elif opt in ( '-F', '--force' ):
force = 1
elif opt in ( '-D', '--date' ):
date = val
elif opt in ( '-o', '--output' ):
recFile = val
else:
print "unrecognised option",opt,val
if not storeMode and not recoverMode:
usage("Nothing to do! Specify either -S or -R")
if storeMode and recoverMode:
usage("Can't store AND recover! Choose either -S or -R")
if storeMode:
if verbose:
print "storing"
print "zodb:", curFile
print "repository:", saveDir
doUpdate(curFile, force)
if verbose:
print "done."
if recoverMode:
if verbose:
print "recovering"
print "repository:", saveDir
print "to:", recFile
if date:
print "as at:", date
else:
print "(current version)"
doRecover(recFile, date)
if verbose:
print "done."
main()
#
# $Log: repozo.py,v $
# Revision 1.2 2000/01/12 07:27:31 anthony
# oops. str2hex needed (can't assume everyone has mxCrypto installed).
# Filthy quick hack version added.
#
# Revision 1.1.1.1 2000/01/12 07:15:59 anthony
# initial version
#
#