You are not logged in Log in Join
You are here: Home » Members » anthony » software » Take Snapshots of ZODB file » View File

Log in
Name

Password

 

Take Snapshots of ZODB file

File details
Size
7 K
File type
text/plain

File contents

#!/usr/bin/env python 

# repozo.py - make backups of the ZODB Data.fs file.

# start:
#    store file, size
#
# incremental:
#     if file has shrunk, or existing file has changed:
#        store complete new file # pack operation has happened.
#     else:
#        store all bytes from size to new_size

# recovery walks through files from newest to oldest (before recovery
# date) and stops when it finds a full file. It will then concat the full
# file and the deltas together.

# for more, see the usage string


# TODO:
#    regexp to only find valid file names.
#    allow gzipping of backup files.
#    allow backup files in subdirectories.

# Notes:
# 
#   This is a really quick-n-dirty hack. It seems to work fine for me, 
# but YMMV. For god's sake don't use this as an excuse to learn Python. 
#

# $Id: repozo.py,v 1.2 2000/01/12 07:27:31 anthony Exp $

import os, sys, time, string
import md5

# config bits: this should be the repository directory.

saveDir = "/opt/zope/saveDir"
curFile = 'var/Data.fs'
recFile = 'Data.fs.recover'
verbose = 0

def str2hex(str):
    l = map(lambda x,hex=hex,ord=ord:hex(ord(x))[-2:], str)
    return string.join(l, '')

def md5bytes(filename, n):
    " return md5 of first 'n' bytes of file "
    READBLOCK = 100000L
    m = md5.new()
    fp = open(filename, 'rb')
    i = 0 ; b = 1
    while ( i + READBLOCK ) <= n:
	b = fp.read(READBLOCK)
	m.update(b)
	i = i + READBLOCK
    if i < n:
	b = fp.read(n - i)
	m.update(b)
    fp.close()
    return str2hex(m.digest())

# 


def makeDeltaFile(file, start, n, outfile):
    " copy 'n' bytes from 'file', offset 'start', and write to 'outfile' "
    print "writing %d bytes from offset %d to %s" % ( n, start, outfile )
    ifp = open(file, 'rb')
    ifp.seek(start)
    ofp = open(outfile, 'wb')
    b = ifp.read(n)
    print "delta: read %d bytes, wanted %d"%(len(b), n)
    ofp.write(b)
    ofp.close()
    ifp.close()

def copyFile(file, outfile):
    " copy contents of file to outfile "
    print "copying from %s to %s"%(file, outfile)
    ifp = open(file, 'rb')
    ofp = open(outfile, 'wb')
    b = ifp.read(100000)
    ofp.write(b)
    while b:
	b = ifp.read(100000)
	ofp.write(b)
    ofp.close()
    ifp.close()

def genFileName(typ):
    import time
    if typ == "full": ext = 'fs'
    if typ == "delta": ext = 'deltafs'
    t = time.gmtime(time.time())[:5]
    t= t+(ext,)
    return "%04d-%02d-%02d-%02d-%02d.%s"%t

def listSaveDir():
    files = os.listdir(saveDir)
    if files:
	# XXXX todo: filter file names for acceptable patterns
	# XXXX todo: handle gzipped files
	# sort chronologically
	files.sort()
	files.reverse()
    return files

def buildFileList(when=None):
    " return a list of files needed to produce state at time 'when' "
    if not when:
	t = time.gmtime(time.time())[:5]
	when="%04d-%02d-%02d-%02d-%02d"%t
    if verbose:
	print "looking for files earlier than", when
    files = listSaveDir()
    # now find a list of files to produce given state in repos.
    needed = []
    for f in files:
	if f > when: continue # file is "after" when we want.
	needed.append(f)
	# XXXX gzip
	if os.path.splitext(f)[-1] == '.fs':
	    if verbose:
		print "found a full file, stopping"
	    break
    needed.reverse()
    needed = map(lambda x,j=os.path.join,d=saveDir: j(d,x), needed)
    if verbose:
	print "files needed to recover state as at", when
	for f in needed:
	    print "\t",f
    return needed

def concatFiles(files, outfile):
    """ concatenate a bunch of files from the repository, output to 'outfile' 
	return the number of bytes written and the md5sum as a tuple """
    if outfile: ofp = open(outfile, 'wb')
    else: ofp = None
    size = 0
    m = md5.new()
    for f in files:
	ifp = open(f, 'rb')
	b = ifp.read(100000)
	m.update(b)
	size = size + len(b)
	if ofp: ofp.write(b)
	while b:
	    b = ifp.read(100000)
	    m.update(b)
	    size = size + len(b)
	    if ofp: ofp.write(b)
	if ofp: ofp.flush() # not really necessary, but ...
    if ofp: ofp.close()
    return ( size, str2hex(m.digest()) )

def doRecover(outfile, date):
    repos_f = buildFileList(date)
    if not repos_f:
	if date:
	    usage("no files in repository before the given date (%s)"%date)
	else:
	    usage("no files in repository!")
    rsz, rmd5 = concatFiles(repos_f, outfile=outfile)
    if verbose:
	print "recovered to file %s"%outfile
	print "recovered file %d bytes, checksum %s"%(rsz, rmd5)

def doUpdate(curFile, forceFull=0):
    repos_f = buildFileList()
    if not repos_f:
	# init db
	if verbose: print "nothing in repository, initialising..."
	copyFile(curFile, os.path.join(saveDir, genFileName('full')))
    else:
	rsz, rmd5 = concatFiles(repos_f, outfile=None)
	if verbose: print "repository state: %s bytes, md5: %s" % (rsz, rmd5)
	fsz = os.stat(curFile)[6]
	fmd5 = md5bytes(curFile, fsz)
	cmd5 = md5bytes(curFile, rsz)
	if verbose: print "current state: %s bytes, md5: %s" % ( fsz, fmd5 )
	if verbose: print "first %d bytes of current:  md5: %s" % ( rsz, cmd5 )
	doFullFile = forceFull
	doDelta = 0
	if ( fsz == rsz ) and ( rmd5 == cmd5 ):  
	    print "no changes, no action taken"
	elif fsz < rsz:
	    if verbose: print "file shrunk: pack? Writing full file"
	    doFullFile = 1
	else:   #  fsz > rsz
	    if rmd5 != cmd5:
		if verbose: print "file changed: pack? Writing full file"
		doFullFile = 1
	    else:
		if verbose: print "additional %d bytes to be written"%(fsz-rsz)
		doDelta = 1
	if doFullFile:
	    copyFile(curFile, os.path.join(saveDir, genFileName('full')))
	else:
	    if doDelta:
		dfile = os.path.join(saveDir, genFileName('delta'))
		if verbose: print "writing delta to", dfile
		makeDeltaFile(curFile, rsz, fsz-rsz, dfile)
	    else:
		if verbose: print "doing nothing"
	    
def usage(mesg,r=1):
    sys.stderr.write("""%s: %s

Usage:
    -S --store			backup current ZODB file
    -R --recover		restore stored ZODB file

Flags for --store and --recover:
    -z file --zodb=file		ZODB file (default %s)
    -r dir --repository=dir   	repository directory (default %s)
    -v --verbose		verbose mode - say what's happening

Flags for --store:
    -F --force			force a full ZODB backup

Flags for --recover:
    -D str --date=str		recover state as at this date. str in format
				yyyy-mm-dd[-hh[-mm]] 
    -o file --output=file	write recovered ZODB to file (default %s)
""" % ( os.path.basename(sys.argv[0]), mesg, curFile, saveDir , recFile) )
    sys.exit(r)

def main():
    import getopt
    storeMode = 0 
    recoverMode = 0
    force = 0
    date = None
    global curFile, saveDir, recFile, verbose

    args = sys.argv[1:]
    optlist, args = getopt.getopt(args, 'SRz:r:vFD:o:', 
	[ 'store', 'recover', 'zodb=', 'repository=', 'verbose', 'force',
	  'date=', 'output=' ] )
    if args:
	usage("unrecognised arguments")

    for opt, val in optlist:
    	if opt in ( '-S', '--store' ):
	    storeMode = 1
    	elif opt in ( '-R', '--recover' ):
	    recoverMode = 1
    	elif opt in ( '-z', '--zodb' ):
	    curFile = val
    	elif opt in ( '-r', '--repository' ):
	    saveDir = val
    	elif opt in ( '-v', '--verbose' ):
	    verbose = 1
    	elif opt in ( '-F', '--force' ):
	    force = 1
    	elif opt in ( '-D', '--date' ):
	    date = val
    	elif opt in ( '-o', '--output' ):
	    recFile = val
	else:
	    print "unrecognised option",opt,val
    if not storeMode and not recoverMode:
	usage("Nothing to do! Specify either -S or -R")
    if storeMode and recoverMode:
	usage("Can't store AND recover! Choose either -S or -R")
    if storeMode:
	if verbose:
	    print "storing"
	    print "zodb:", curFile
	    print "repository:", saveDir
	doUpdate(curFile, force)
	if verbose:
	    print "done."

    if recoverMode:
	if verbose:
	    print "recovering"
	    print "repository:", saveDir
	    print "to:", recFile
	    if date:
		print "as at:", date
	    else:
		print "(current version)"
	doRecover(recFile, date)
	if verbose:
	    print "done."
	
main()

#
# $Log: repozo.py,v $
# Revision 1.2  2000/01/12 07:27:31  anthony
# oops. str2hex needed (can't assume everyone has mxCrypto installed).
# Filthy quick hack version added.
#
# Revision 1.1.1.1  2000/01/12 07:15:59  anthony
# initial version
#
#