# = S3 Rails-free Rake - Use S3 as a backup repository for your code directory and MySQL database # # Original author:: Adam Greene # Copyright:: (c) 2006 6 Bar 8, LLC., Sweetspot.dm # License:: GNU # # Feedback appreciated: adam at [nospam] 6bar8 dt com # # = Synopsis # # from the CommandLine within your RubyOnRails application folder # $ rake -T # rake s3:backup # Backup code, database, and scm to S3 # rake s3:backup:code # Backup the code to S3 # rake s3:backup:db # Backup the database to S3 # rake s3:manage:clean_up # Remove all but the last 10 most recent backup archive or optionally specify KEEP=5 to keep # the last 5 # rake s3:manage:delete_bucket # delete bucket. You need to pass in NAME=bucket_to_delete. Set FORCE=true if you want to # # delete the bucket even if there are items in it. # rake s3:manage:list # list all your backup archives # rake s3:manage:list_buckets # list all your S3 buckets # rake s3:retrieve # retrieve the latest revision of code, database, and scm from S3. # # If you need to specify a specific version, call the individual retrieve tasks # rake s3:retrieve:code # retrieve the latest code backup from S3, or optionally specify a VERSION=this_archive.tar.gz # rake s3:retrieve:db # retrieve the latest db backup from S3, or optionally specify a VERSION=this_archive.tar.gz # # = Description # # Once these two requirements are met, you can easily integrate these rake tasks into capistrano tasks or into cron. # * For cron, put this into a file like .backup.cron. You can drop this file into /etc/cron.daily, # and make sure you chmod +x .backup.cron. Also make sure it is owned by the appropriate user (probably 'root'.): # # #!/bin/sh # # # change the paths as you need... # cd /var/www/apps//current/ && rake s3:backup >/dev/null 2>&1 # cd /var/www/apps/staging./current/ && rake s3:backup >/dev/null 2>&1 # # = Credits and License # # inspired by rshll, developed by Dominic Da Silva: # http://rubyforge.org/projects/rsh3ll/ # # This library is licensed under the GNU General Public License (GPL) # [http://dev.perl.org/licenses/gpl1.html]. # # require 'lib/S3.rb' require 'yaml' require 'erb' #require 'active_record' namespace :s3 do desc "Backup code and database to S3" task :backup => [ "s3:backup:code", "s3:backup:db"] namespace :backup do desc "Backup the code to S3" task :code do msg "backing up CODE to S3" make_bucket('code') archive = "/tmp/#{archive_name('code')}" # copy it to tmp just to play it safe... path = retrieve_code_info cmd = "cp -rp #{path} #{archive}" msg "extracting code directory" puts cmd result = system(cmd) raise("copy of code dir failed.. msg: #{$?}") unless result send_to_s3('code', archive) end #end code task desc "Backup the database to S3" task :db do msg "backing up the DATABASE to S3" make_bucket('db') archive = "/tmp/#{archive_name('db')}" msg "retrieving db info" host, database, user, password = retrieve_db_info msg "dumping db" cmd = "mysqldump --opt --skip-add-locks -u#{user} " puts cmd + "... [password filtered]" cmd += " -h #{host} " unless password.nil? cmd += " -p'#{password}' " unless password.nil? cmd += " #{database} > #{archive}" result = system(cmd) raise("mysqldump failed. msg: #{$?}") unless result send_to_s3('db', archive) end end # end backup namespace desc "retrieve the latest revision of code and database. If you need to specify a specific version, call the individual retrieve tasks" task :retrieve => [ "s3:retrieve:code", "s3:retrieve:db"] namespace :retrieve do desc "retrieve the latest code backup from S3, or optionally specify a VERSION=this_archive.tar.gz" task :code do retrieve_file 'code', ENV['VERSION'] end desc "retrieve the latest db backup from S3, or optionally specify a VERSION=this_archive.tar.gz" task :db do retrieve_file 'db', ENV['VERSION'] end end #end retrieve namespace namespace :manage do desc "Remove all but the last 10 most recent backup archive or optionally specify KEEP=5 to keep the last 5" task :clean_up do keep_num = ENV['KEEP'] ? ENV['KEEP'].to_i : 10 puts "keeping the last #{keep_num}" cleanup_bucket('code', keep_num) cleanup_bucket('db', keep_num) end desc "list all your backup archives" task :list do print_bucket 'code' print_bucket 'db' end desc "list all your S3 buckets" task :list_buckets do puts conn.list_all_my_buckets.entries.map { |bucket| bucket.name } end desc "delete bucket. You need to pass in NAME=bucket_to_delete. Set FORCE=true if you want to delete the bucket even if there are items in it." task :delete_bucket do name = ENV['NAME'] raise "Specify a NAME=bucket that you want deleted" if name.blank? force = ENV['FORCE'] == 'true' ? true : false cleanup_bucket(name, 0, false) if force response = conn.delete_bucket(name).http_response.message response = "Yes" if response == 'No Content' puts "deleting bucket #{bucket_name(name)}. Successful? #{response}" end end #end manage namespace end private # will save the file from S3 in the pwd. def retrieve_file(name, specific_file) msg "retrieving a #{name} backup from S3" entries = conn.list_bucket(bucket_name(name)).entries raise "No #{name} backups to retrieve" if entries.size < 1 entry = entries.find{|entry| entry.key == specific_file} raise "Could not find the file '#{specific_key}' in the #{name} bucket" if entry.nil? && !specific_file.nil? entry_key = specific_file.nil? ? entries.last.key : entry.key msg "retrieving archive: #{entry_key}" data = conn.get(bucket_name('db'), entry_key).object.data File.open(entry_key, "wb") { |f| f.write(data) } msg "retrieved file './#{entry_key}'" end # print information about an item in a particular bucket def print_bucket(name) puts "Bucket: #{bucket_name(name)}" conn.list_bucket(bucket_name(name)).entries.map do |entry| puts " -- #{entry.key}\n\tSize: #{(entry.size).to_s} bytes\n\tLast Modified: " + Time.parse(entry.last_modified).strftime('%c') + " UTC\n\n" end end # go through and keep a certain number of items within a particular bucket, # and remove everything else. def cleanup_bucket(name, keep_num, convert_name=true) puts "Cleaning up the #{name} bucket" bucket = convert_name ? bucket_name(name) : name entries = conn.list_bucket(bucket).entries #will only retrieve the last 1000 remove = entries.size-keep_num-1 entries[0..remove].each do |entry| response = conn.delete(bucket, entry.key).http_response.message response = "Yes" if response == 'No Content' puts " -- deleting #{bucket}/#{entry.key}\n\tLast Modified: " + Time.parse(entry.last_modified).strftime('%c') + " UTC.\n\tSuccessful: #{response}\n\n" end unless remove < 0 end # open a S3 connection def conn @s3_configs ||= YAML::load(ERB.new(IO.read("config/s3.yml")).result) @conn ||= S3::AWSAuthConnection.new(@s3_configs['aws_access_key'], @s3_configs['aws_secret_access_key'], @s3_configs['options']['use_ssl']) end # programatically figure out what to call the backup bucket and # the archive files. Is there another way to do this? def project_name # using Dir.pwd will return something like: # /var/www/apps/staging.sweetspot.dm/releases/20061006155448 # instead of # /var/www/apps/staging.sweetspot.dm/current pwd = retrieve_project_info || Dir.pwd #another hack..ugh. If using standard capistrano setup, pwd will be the 'current' symlink. pwd = File.dirname(pwd) if File.symlink?(pwd) File.basename(pwd) end # create S3 bucket. If it already exists, not a problem! def make_bucket(name) msg = conn.create_bucket(bucket_name(name)).http_response.message raise "Could not make bucket #{bucket_name(name)}. Msg: #{msg}" if msg != 'OK' msg "using bucket: #{bucket_name(name)}" end def bucket_name(name) # it would be 'nicer' if could use '/' instead of '_' for bucket names...but for some reason S3 doesn't like that "#{token(name)}_backup" end def token(name) "#{project_name}_#{name}" end def archive_name(name) @timestamp ||= Time.now.utc.strftime("%Y%m%d%H%M%S") token(name).sub('_', '.') + ".#{@timestamp}" end # put files in a zipped tar everything that goes to s3 # send it to the appropriate backup bucket # then does a cleanup def send_to_s3(name, tmp_file) archive = "/tmp/#{archive_name(name)}.tar.gz" msg "archiving #{name}" cmd = "tar -cpzf #{archive} #{tmp_file}" puts cmd system cmd msg "sending archived #{name} to S3" # put file with default 'private' ACL bytes = nil File.open(archive, "rb") { |f| bytes = f.read } #set the acl as private headers = { 'x-amz-acl' => 'private', 'Content-Length' => FileTest.size(archive).to_s } response = conn.put(bucket_name(name), archive.split('/').last, bytes, headers).http_response.message msg "finished sending #{name} S3" msg "cleaning up" cmd = "rm -rf #{archive} #{tmp_file}" puts cmd system cmd end def msg(text) puts " -- msg: #{text}" end def retrieve_project_info conn return @s3_configs['project_name'] end def retrieve_code_info conn return @s3_configs['code']['path'] end def retrieve_db_info conn return [ @s3_configs['db']['host'], @s3_configs['db']['database'], @s3_configs['db']['username'], @s3_configs['db']['password'] ] end