[EDIT: minor bugfix for rsync return value]
I used to use an rsync script called
RIBS to back up my Linode, but I recently built an OpenSolaris storage server for my home network, and decided I wanted something that would take advantage of ZFS's features such as on-the-fly compression and deduplication, and (especially) snapshotting.
I've written a Perl script that works in a manner similar to RIBS, which I have christened ZIBS. Instead of using hard links, it just uses ZFS snapshots to accomplish the same basic thing. The snapshots are rotated according to the configuration in the script. Only the "hourly" backups actually run rsync; others merely create and rotate snapshots. It can back up mounted NFS filesystems as well as using SSH to a remote location, and it can also just create and rotate snapshots on a local ZFS data set.
Since ZFS snapshots are copy-on-write, storage is very efficient; furthermore, you can turn on compression. If you have multiple systems using the same distro, you can save further space by using deduplication. Dedup and compression should be turned on before making backups, since they're in-line operations rather than after-the-fact.
This script could be used on FreeBSD (which has native ZFS support in 7.2 and 8.0) or on Linux with ZFS-FUSE, but you'll need Solaris or OpenSolaris if you want deduplication.
Usage example:
Code:
# Create ZFS datasets for your backups (assuming a ZFS pool called "tank", and ZIBS itself set up as below):
zfs create -o compression=gzip -o dedup=on tank/my_centos_site1
zfs create -o compression=gzip -o dedup=on tank/my_centos_site2
zfs create -o compression=gzip -o dedup=on tank/my_debian_site
# Run this four times daily (changeable in script)
zibs ALL hourly
# Run this once a week
zibs ALL weekly
# Run once a month
zibs ALL monthly
# Run once a year
zibs ALL annual
# Rename a snapshot to take it out of the rotation
zfs rename tank/my_centos_site1@hourly.0 specialsnap
# List snapshots in your ZIBS datasets
zibs ALL list
# Back up only one system
zibs my_debian_site hourly
The script:
Code:
#!/usr/bin/perl
# ZIBS: ZFS Incremental Backup Script
# A backup script loosely inspired by RIBS, but making use of
# ZFS snapshots (and written in Perl instead of PHP).
# by Russ Price
use strict;
# Location of SSH key for offsite backups.
my $ssh_key = '/path/to/ssh-backup-key';
# Configuration data. Master keys are backup sets. Each backup set gets
# its own configuration. The subkeys are as follows:
# source: Where the backup comes from; either a filesystem path for local
# backups (e.g. via NFS), or host:path for offsite backups via SSH.
# Be sure to include trailing slash.
# If source is not present, assume a local ZFS data set, and
# make snapshots only.
# offsite: Flag to indicate offsite backup via SSH.
# dataset: The destination ZFS dataset for the backups. This is not
# an absolute path.
# sched: The number of snapshots to keep for each schedule type. The
# "hourly" entry is mandatory.
# excludes: An array of paths to exclude from the backup.
my %conf = (
# Local file storage
u => {
dataset => 'tank/u',
sched => {
hourly => 4,
daily => 7,
weekly => 4,
monthly => 12,
annual => 10
}
},
# Mounted NFS filesystem from another server
server2 => {
source => '/server2/www',
offsite => 0,
dataset => 'tank/server2-www',
sched => {
hourly => 4,
daily => 7,
weekly => 4,
monthly => 12,
annual => 10
}
},
my_centos_site1 => {
source => 'site1.example.com:/',
offsite => 1,
dataset => 'tank/my_centos_site1',
sched => {
hourly => 4,
daily => 7,
weekly => 4,
monthly => 12,
annual => 10
},
excludes => [
'/proc/**',
'/dev/pts/**',
'/dev/shm/**',
'/aquota.group',
'/aquota.user',
'/etc/mtab',
'/var/spool/mqueue/**',
'/var/mail/*',
'/var/named/chroot/proc/**',
'/var/named/chroot/dev/**',
'/sys/**'
]
},
my_centos_site2 => {
source => 'site2.example.com:/',
offsite => 1,
dataset => 'tank/my_centos_site2',
sched => {
hourly => 4,
daily => 7,
weekly => 4,
monthly => 12,
annual => 10
},
excludes => [
'/proc/**',
'/dev/pts/**',
'/dev/shm/**',
'/aquota.group',
'/aquota.user',
'/etc/mtab',
'/var/spool/mqueue/**',
'/var/mail/*',
'/var/named/chroot/proc/**',
'/var/named/chroot/dev/**',
'/old/**',
'/sys/**'
]
},
my_debian_site => {
source => 'site3.example.com:/',
offsite => 1,
dataset => 'tank/my_debian_site',
sched => {
hourly => 4,
daily => 7,
weekly => 4,
monthly => 12,
annual => 10
},
excludes => [
'/proc/**',
'/dev/pts/**',
'/dev/shm/**',
'/aquota.group',
'/aquota.user',
'/etc/mtab',
'/var/spool/mqueue/**',
'/sys/**'
]
}
);
sub get_snaps($$) {
my ($dataset, $type) = @_;
# Get list of relevant snapshots
open SNAPS, "zfs list -H -t snapshot | grep '$dataset' |";
my @snaps;
while(<SNAPS>) {
my $s;
if($type) {
($s) = (m/^(\S+\@\Q$type\E\.\d+)\t.*$/o);
} else {
($s) = (m/^(\S+\@\S+)\t.*$/o);
}
@snaps = (@snaps, $s) if($s);
}
close SNAPS;
return @snaps;
}
sub do_backup($$) {
my ($set, $type) = @_;
my %info = %{$conf{$set}};
my $dataset = $info{dataset};
my $source = $info{source};
my $offsite = $info{offsite};
my %h = %{$info{sched}};
my $schedmax = $h{$type};
print "\nStarting $type backup of $set (max $schedmax)\n";
my @snaps = get_snaps($dataset, $type);
# Only use rsync if we have a source.
if($source) {
# Set up excludes
my @excludes = @{$info{excludes}};
my $exclude_args = '';
foreach my $e (@excludes) {
$exclude_args .= "--exclude=\"$e\" ";
}
# Use SSH key and compression if offsite.
my $offsite_args = '';
if($offsite) {
$offsite_args = "-z -e \"ssh -i $ssh_key -p 22\"";
}
# If it's hourly, do an rsync
if($type eq 'hourly') {
my $result = system "rsync -artplv --numeric-ids --delete --delete-excluded --stats $offsite_args $exclude_args $source /$dataset";
# Get actual return value
$result >>= 8;
if ($result) {
if($result == 24) {
print "WARNING: File(s) vanished before they could be transferred.\n";
} else {
print "WARNING: rsync returned error code $result - NOT rotating snapshots\n";
return;
}
}
}
}
# Rotate the snaps, destroying those beyond the limit
foreach my $r (reverse sort @snaps) {
my ($snapname, $snapnum) = ($r =~ m/^(\S+\@\Q$type\E)\.(\d+)$/o);
if ($snapnum >= $schedmax - 1) {
system "zfs destroy $r";
} else {
system "zfs rename $r $snapname." . ($snapnum + 1);
}
}
# Create latest snapshot for given type
print "Creating snapshot $dataset\@$type.0\n";
system "zfs snapshot $dataset\@$type.0";
}
sub do_list($) {
my ($system) = @_;
my %info = %{$conf{$system}};
my $dataset = $info{dataset};
my @snaps = get_snaps($dataset, 0);
foreach my $snap(@snaps) {
open PROPS, "zfs get -H creation $snap |";
while(<PROPS>) {
my ($creation) = (m/^\S+\tcreation\t(.*)\t-$/o);
printf("%-55s %s\n", $snap, $creation) if($creation);
}
close PROPS;
}
}
if(scalar @ARGV != 2) {
print "Usage: $0 system|ALL hourly|daily|weekly|monthly|annual|list\n\n";
print "Systems defined:\n";
foreach my $system(keys %conf) {
print "$system\n";
}
exit(1);
}
print "ZIBS: ZFS Incremental Backup Script\n";
if($ARGV[0] ne 'ALL') {
if($ARGV[1] eq 'list') {
do_list($ARGV[0]);
} else {
print "Performing " . $ARGV[1] . " backup for " . $ARGV[0] . "\n";
do_backup($ARGV[0], $ARGV[1]);
}
} else {
print "Performing " . $ARGV[1] . " backup for " . $ARGV[0] . "\n" if($ARGV[1] ne 'list');
foreach my $system (keys %conf) {
if($ARGV[1] eq 'list') {
do_list($system);
} else {
do_backup($system, $ARGV[1]);
}
}
}