diff --git a/migrate_metadata_to_redis.rb b/migrate_metadata_to_redis.rb index de1e812..7458ff8 100755 --- a/migrate_metadata_to_redis.rb +++ b/migrate_metadata_to_redis.rb @@ -2,49 +2,45 @@ require "rest_client" require "redis" +require "yaml" class Migrator - attr_accessor :username, :token, :base_url + attr_accessor :username, :base_url, :swift_host, :swift_token, + :environment, :dry_run, :logging, :settings - def initialize(username, token) + def initialize(username) @username = username - @token = token - @base_url = "https://storage.5apps.com" - end - def configure_redis(redis_config) - @redis_config = redis_config + @environment = ENV["ENVIRONMENT"] || "staging" + @settings = YAML.load(File.read('config.yml'))[@environment] + @swift_host = @settings["swift"]["host"] + @swift_token = File.read("tmp/swift_token.txt") + + @dry_run = false # disables writing anything to Redis when true + @logging = true end def root_url "#{@base_url}/#{@username}" end - def headers - {"authorization" => "Bearer #{@token}"} - end - def is_dir?(name) name[-1] == "/" end def url_for(directory, parent_directory="") - # base_path = [root_url, parent_directory].join("/") "#{root_url}#{parent_directory}#{directory}" end def migrate - work_on_dir("", "/") + work_on_dir("", "") end def work_on_dir(directory, parent_directory) - url = url_for(directory, parent_directory) + puts "retrieving listing for '#{parent_directory}#{directory}'" if logging - # puts "work on dir: #{url}" - - response = RestClient.get(url, headers) - listing = JSON.parse(response.body) + listing = get_directory_listing_from_swift("#{parent_directory}#{directory}") timestamp = (Time.now.to_f * 1000).to_i @@ -67,16 +63,16 @@ class Migrator def add_item_to_parent_dir(dir, item) key = "rs_meta:#{username}:#{parent_directory_for(dir)}:items" - # puts "adding item #{item} to #{key}" - redis.sadd key, item + puts "adding item #{item} to #{key}" if logging + redis.sadd(key, item) unless dry_run end def save_directory_data(dir, item, data, timestamp) key = "rs_meta:#{username}:#{dir.gsub(/^\//, "")}#{item}" metadata = {etag: data["ETag"], modified: timestamp} - # puts "metadata for dir #{key}: #{metadata}" - redis.hmset(key, *metadata) + puts "metadata for dir #{key}: #{metadata}" if logging + redis.hmset(key, *metadata) unless dry_run end def save_document_data(dir, item, data, timestamp) @@ -87,31 +83,132 @@ class Migrator type: data["Content-Type"], modified: timestamp } - # puts "metadata for document #{key}: #{metadata}" - redis.hmset(key, *metadata) + puts "metadata for document #{key}: #{metadata}" if logging + redis.hmset(key, *metadata) unless dry_run end def parent_directory_for(directory) - return directory if directory == "/" - - return directory[0..directory.rindex("/")].gsub(/^\//, "") + if directory.match(/\//) + return directory[0..directory.rindex("/")] + else + return "/" + end end def redis - @redis ||= Redis.new(@redis_config) + @redis ||= Redis.new(@settings["redis"]) end + def get_directory_listing_from_swift(directory) + is_root_listing = directory.empty? + + get_response = nil + + do_head_request("#{url_for_directory(@username, directory)}") do |response| + return directory_listing([]) if response.code == 404 + + if is_root_listing + get_response = do_get_request("#{container_url_for(@username)}/?format=json&path=") + else + get_response = do_get_request("#{container_url_for(@username)}/?format=json&path=#{escape(directory)}/") + end + end + + if body = JSON.parse(get_response.body) + listing = directory_listing(body) + else + puts "listing not JSON" + end + + listing + end + + def directory_listing(res_body) + listing = { + "@context" => "http://remotestorage.io/spec/folder-description", + "items" => {} + } + + res_body.each do |entry| + name = entry["name"] + name.sub!("#{File.dirname(entry["name"])}/", '') + if name[-1] == "/" # It's a directory + listing["items"].merge!({ + name => { + "ETag" => entry["hash"], + } + }) + else # It's a file + listing["items"].merge!({ + name => { + "ETag" => entry["hash"], + "Content-Type" => entry["content_type"], + "Content-Length" => entry["bytes"] + } + }) + end + end + + listing + end + + def etag_for(body) + objects = JSON.parse(body) + + if objects.empty? + Digest::MD5.hexdigest "" + else + Digest::MD5.hexdigest objects.map { |o| o["hash"] }.join + end + end + + def do_head_request(url, &block) + RestClient.head(url, default_headers, &block) + end + + def do_get_request(url, &block) + RestClient.get(url, default_headers, &block) + end + + def default_headers + {"x-auth-token" => @swift_token} + end + + def url_for_directory(user, directory) + if directory.empty? + container_url_for(user) + else + "#{container_url_for(user)}/#{escape(directory)}" + end + end + + def container_url_for(user) + "#{base_url}/#{container_for(user)}" + end + + def base_url + @base_url ||= @swift_host + end + + def container_for(user) + "rs:#{environment.to_s.chars.first}:#{user}" + end + + def escape(url) + # We want spaces to turn into %20 and slashes to stay slashes + CGI::escape(url).gsub('+', '%20').gsub('%2F', '/') + end end username = ARGV[0] -token = ARGV[1] -migrator = Migrator.new username, token -migrator.configure_redis({host: "localhost", port: 6379}) +unless username + puts "No username given." + puts "Usage:" + puts "ENVIRONMENT=staging ./migrate_metadata_to_redis.rb " + exit 1 +end +migrator = Migrator.new username migrator.migrate - - - -