diff --git a/lib/remote_storage/swift.rb b/lib/remote_storage/swift.rb index c929d5d..849e0f2 100644 --- a/lib/remote_storage/swift.rb +++ b/lib/remote_storage/swift.rb @@ -88,11 +88,11 @@ module RemoteStorage lua_script = <<-EOF local user = ARGV[1] local directory = ARGV[2] - local items = redis.call("smembers", "rs_meta:"..user..":"..directory.."/:items") + local items = redis.call("smembers", "rs:m:"..user..":"..directory.."/:items") local listing = {} for index, name in pairs(items) do - local redis_key = "rs_meta:"..user..":" + local redis_key = "rs:m:"..user..":" if directory == "" then redis_key = redis_key..name else @@ -108,10 +108,10 @@ module RemoteStorage metadata[metadata_values[idx]] = metadata_values[idx + 1] end - listing[name] = {["ETag"] = metadata["etag"]} + listing[name] = {["ETag"] = metadata["e"]} if string.sub(name, -1) ~= "/" then - listing[name]["Content-Type"] = metadata["type"] - listing[name]["Content-Length"] = tonumber(metadata["size"]) + listing[name]["Content-Type"] = metadata["t"] + listing[name]["Content-Length"] = tonumber(metadata["s"]) end end @@ -122,7 +122,7 @@ module RemoteStorage end def get_directory_listing_from_redis(user, directory) - etag = redis.hget "rs_meta:#{user}:#{directory}/", "etag" + etag = redis.hget "rs:m:#{user}:#{directory}/", "e" none_match = (server.env["HTTP_IF_NONE_MATCH"] || "").split(",").map(&:strip) server.halt 304 if none_match.include? etag @@ -189,16 +189,18 @@ module RemoteStorage res = do_put_request(url, data, content_type) - # TODO get last modified from response and add to metadata + # TODO use actual last modified time from the document put request + timestamp = (Time.now.to_f * 1000).to_i + metadata = { - etag: res.headers[:etag], - size: data.size, - type: content_type + e: res.headers[:etag], + s: data.size, + t: content_type, + m: timestamp } if update_metadata_object(user, directory, key, metadata) && - # TODO provide the last modified to use for the dir objects as well - update_dir_objects(user, directory) + update_dir_objects(user, directory, timestamp) server.headers["ETag"] = %Q("#{res.headers[:etag]}") server.halt 200 else @@ -312,23 +314,23 @@ module RemoteStorage end -- check for existing directory with the same name as the document - local redis_key = "rs_meta:"..user..":" + local redis_key = "rs:m:"..user..":" if directory == "" then redis_key = redis_key..key.."/" else redis_key = redis_key..directory.."/"..key.."/" end - if redis.call("hget", redis_key, "etag") then + if redis.call("hget", redis_key, "e") then return true end for index, dir in pairs(parent_directories) do - if redis.call("hget", "rs_meta:"..user..":"..dir.."/", "etag") then + if redis.call("hget", "rs:m:"..user..":"..dir.."/", "e") then -- the directory already exists, no need to do further checks return false else -- check for existing document with same name as directory - if redis.call("hget", "rs_meta:"..user..":"..dir, "etag") then + if redis.call("hget", "rs:m:"..user..":"..dir, "e") then return true end end @@ -400,17 +402,14 @@ module RemoteStorage end def update_metadata_object(user, directory, key, metadata) - redis_key = "rs_meta:#{user}:#{directory}/#{key}" + redis_key = "rs:m:#{user}:#{directory}/#{key}" redis.hmset(redis_key, *metadata) - redis.sadd "rs_meta:#{user}:#{directory}/:items", key + redis.sadd "rs:m:#{user}:#{directory}/:items", key true end - def update_dir_objects(user, directory) - # TODO use actual last modified time from the document put request - timestamp = (Time.now.to_f * 1000).to_i - + def update_dir_objects(user, directory, timestamp) parent_directories_for(directory).each do |dir| unless dir == "" res = do_put_request("#{url_for_directory(user, dir)}/", timestamp.to_s, "text/plain") @@ -420,10 +419,10 @@ module RemoteStorage etag = etag_for(get_response.body) end - key = "rs_meta:#{user}:#{dir}/" - metadata = {etag: etag, modified: timestamp} + key = "rs:m:#{user}:#{dir}/" + metadata = {e: etag, m: timestamp} redis.hmset(key, *metadata) - redis.sadd "rs_meta:#{user}:#{parent_directory_for(dir)}:items", "#{top_directory(dir)}/" + redis.sadd "rs:m:#{user}:#{parent_directory_for(dir)}:items", "#{top_directory(dir)}/" end true @@ -438,21 +437,22 @@ module RemoteStorage end def delete_metadata_objects(user, directory, key) - redis_key = "rs_meta:#{user}:#{directory}/#{key}" + redis_key = "rs:m:#{user}:#{directory}/#{key}" redis.del(redis_key) - redis.srem "rs_meta:#{user}:#{directory}/:items", key + redis.srem "rs:m:#{user}:#{directory}/:items", key end def delete_dir_objects(user, directory) + timestamp = (Time.now.to_f * 1000).to_i + parent_directories_for(directory).each do |dir| if dir_empty?(user, dir) unless dir == "" do_delete_request("#{url_for_directory(user, dir)}/") end - redis.del "rs_meta:#{user}:#{directory}/" - redis.srem "rs_meta:#{user}:#{parent_directory_for(dir)}:items", "#{dir}/" + redis.del "rs:m:#{user}:#{directory}/" + redis.srem "rs:m:#{user}:#{parent_directory_for(dir)}:items", "#{dir}/" else - timestamp = (Time.now.to_f * 1000).to_i unless dir == "" res = do_put_request("#{url_for_directory(user, dir)}/", timestamp.to_s, "text/plain") etag = res.headers[:etag] @@ -460,15 +460,15 @@ module RemoteStorage get_response = do_get_request("#{container_url_for(user)}/?format=json&path=") etag = etag_for(get_response.body) end - metadata = {etag: etag, modified: timestamp} - redis.hmset("rs_meta:#{user}:#{dir}/", *metadata) + metadata = {e: etag, m: timestamp} + redis.hmset("rs:m:#{user}:#{dir}/", *metadata) end end end def dir_empty?(user, dir) if directory_backend(user).match(/new/) - redis.smembers("rs_meta:#{user}:#{dir}/:items").empty? + redis.smembers("rs:m:#{user}:#{dir}/:items").empty? else do_get_request("#{container_url_for(user)}/?format=plain&limit=1&path=#{escape(dir)}/") do |res| return res.headers[:content_length] == "0" @@ -538,7 +538,7 @@ module RemoteStorage end def directory_backend(user) - @directory_backend ||= redis.get("rs_config:dir_backend:#{user}") || "legacy" + @directory_backend ||= redis.get("rsc:db:#{user}") || "legacy" end def etag_for(body) diff --git a/migrate_metadata_to_redis.rb b/migrate_metadata_to_redis.rb new file mode 100755 index 0000000..bd28e75 --- /dev/null +++ b/migrate_metadata_to_redis.rb @@ -0,0 +1,246 @@ +#!/usr/bin/env ruby + +require "rest_client" +require "redis" +require "yaml" +require "logger" +require "active_support/core_ext/hash" + +class Migrator + + attr_accessor :username, :base_url, :swift_host, :swift_token, + :environment, :dry_run, :settings, :logger + + def initialize(username) + @username = username + + @environment = ENV["ENVIRONMENT"] || "staging" + @settings = YAML.load(File.read('config.yml'))[@environment] + + @swift_host = @settings["swift"]["host"] + @swift_token = File.read("tmp/swift_token.txt").strip + + @dry_run = ENV["DRYRUN"] || false # disables writing anything to Redis when true + + @logger = Logger.new("log/migrate_metadata_to_redis.log") + log_level = ENV["LOGLEVEL"] || "INFO" + logger.level = Kernel.const_get "Logger::#{log_level}" + logger.progname = username + end + + def root_url + "#{@base_url}/#{@username}" + end + + def is_dir?(name) + name[-1] == "/" + end + + def url_for(directory, parent_directory="") + "#{root_url}#{parent_directory}#{directory}" + end + + def migrate + logger.info "Starting migration for '#{username}'" + set_directory_backend("legacy_locked") + begin + work_on_dir("", "") + rescue Exception => ex + logger.error "Error migrating metadata for '#{username}': #{ex}" + set_directory_backend("legacy") + # write username to file for later reference + File.open('log/failed_migration.log', 'a') { |f| f.puts username } + exit 1 + end + set_directory_backend("new") + logger.info "Finished migration for '#{username}'" + end + + def set_directory_backend(backend) + redis.set("rsc:db:#{username}", backend) unless dry_run + end + + def work_on_dir(directory, parent_directory) + logger.debug "Retrieving listing for '#{parent_directory}#{directory}'" + + listing = get_directory_listing_from_swift("#{parent_directory}#{directory}") + + timestamp = (Time.now.to_f * 1000).to_i + + if listing["items"].any? + items = listing["items"] + items.each do |item, data| + if is_dir? item + save_directory_data("#{parent_directory}#{directory}", item, data, timestamp) + + # get dir listing and repeat + work_on_dir(item, "#{parent_directory}#{directory}") + else + save_document_data("#{parent_directory}#{directory}", item, data) + end + + add_item_to_parent_dir("#{parent_directory}#{directory}", item) + end + end + end + + def add_item_to_parent_dir(dir, item) + key = "rs:m:#{username}:#{parent_directory_for(dir)}:items" + logger.debug "Adding item #{item} to #{key}" + redis.sadd(key, item) unless dry_run + end + + def save_directory_data(dir, item, data, timestamp) + key = "rs:m:#{username}:#{dir.gsub(/^\//, "")}#{item}" + metadata = { + e: data["ETag"], + m: timestamp_for(data["Last-Modified"]) + } + + logger.debug "Metadata for dir #{key}: #{metadata}" + redis.hmset(key, *metadata) unless dry_run + end + + def save_document_data(dir, item, data) + key = "rs:m:#{username}:#{dir.gsub(/^\//, "")}#{item}" + metadata = { + e: data["ETag"], + s: data["Content-Length"], + t: data["Content-Type"], + m: timestamp_for(data["Last-Modified"]) + } + logger.debug "Metadata for document #{key}: #{metadata}" + redis.hmset(key, *metadata) unless dry_run + end + + def parent_directory_for(directory) + if directory.match(/\//) + return directory[0..directory.rindex("/")] + else + return "/" + end + end + + def timestamp_for(date) + return DateTime.parse(date).strftime("%Q").to_i + end + + def redis + @redis ||= Redis.new(@settings["redis"].symbolize_keys) + end + + def get_directory_listing_from_swift(directory) + is_root_listing = directory.empty? + + get_response = nil + + do_head_request("#{url_for_directory(@username, directory)}") do |response| + return directory_listing([]) if response.code == 404 + + if is_root_listing + get_response = do_get_request("#{container_url_for(@username)}/?format=json&path=") + else + get_response = do_get_request("#{container_url_for(@username)}/?format=json&path=#{escape(directory)}") + end + end + + if body = JSON.parse(get_response.body) + listing = directory_listing(body) + else + puts "listing not JSON" + end + + listing + end + + def directory_listing(res_body) + listing = { + "@context" => "http://remotestorage.io/spec/folder-description", + "items" => {} + } + + res_body.each do |entry| + name = entry["name"] + name.sub!("#{File.dirname(entry["name"])}/", '') + if name[-1] == "/" # It's a directory + listing["items"].merge!({ + name => { + "ETag" => entry["hash"], + "Last-Modified" => entry["last_modified"] + } + }) + else # It's a file + listing["items"].merge!({ + name => { + "ETag" => entry["hash"], + "Content-Type" => entry["content_type"], + "Content-Length" => entry["bytes"], + "Last-Modified" => entry["last_modified"] + } + }) + end + end + + listing + end + + def etag_for(body) + objects = JSON.parse(body) + + if objects.empty? + Digest::MD5.hexdigest "" + else + Digest::MD5.hexdigest objects.map { |o| o["hash"] }.join + end + end + + def do_head_request(url, &block) + RestClient.head(url, default_headers, &block) + end + + def do_get_request(url, &block) + RestClient.get(url, default_headers, &block) + end + + def default_headers + {"x-auth-token" => @swift_token} + end + + def url_for_directory(user, directory) + if directory.empty? + container_url_for(user) + else + "#{container_url_for(user)}/#{escape(directory)}" + end + end + + def container_url_for(user) + "#{base_url}/#{container_for(user)}" + end + + def base_url + @base_url ||= @swift_host + end + + def container_for(user) + "rs:#{environment.to_s.chars.first}:#{user}" + end + + def escape(url) + # We want spaces to turn into %20 and slashes to stay slashes + CGI::escape(url).gsub('+', '%20').gsub('%2F', '/') + end +end + +username = ARGV[0] + +unless username + puts "No username given." + puts "Usage:" + puts "ENVIRONMENT=staging ./migrate_metadata_to_redis.rb " + exit 1 +end + +migrator = Migrator.new username +migrator.migrate + diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 4975f52..dfc955f 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -39,7 +39,7 @@ if app.settings.respond_to? :redis end def purge_redis - redis.keys("rs_*").each do |key| + redis.keys("rs*").each do |key| redis.del key end end diff --git a/spec/swift/app_spec.rb b/spec/swift/app_spec.rb index 052b750..56d03a1 100644 --- a/spec/swift/app_spec.rb +++ b/spec/swift/app_spec.rb @@ -16,7 +16,7 @@ describe "App" do before do purge_redis - redis.set "rs_config:dir_backend:phil", "new" + redis.set "rsc:db:phil", "new" end context "authorized" do @@ -31,11 +31,11 @@ describe "App" do put "/phil/food/aguacate", "si" end - metadata = redis.hgetall "rs_meta:phil:food/aguacate" - metadata["size"].must_equal "2" - metadata["type"].must_equal "text/plain; charset=utf-8" - metadata["etag"].must_equal "bla" - metadata["modified"].must_equal nil + metadata = redis.hgetall "rs:m:phil:food/aguacate" + metadata["s"].must_equal "2" + metadata["t"].must_equal "text/plain; charset=utf-8" + metadata["e"].must_equal "bla" + metadata["m"].length.must_equal 13 end it "creates the directory objects metadata in redis" do @@ -50,20 +50,20 @@ describe "App" do end end - metadata = redis.hgetall "rs_meta:phil:/" - metadata["etag"].must_equal "rootetag" - metadata["modified"].length.must_equal 13 + metadata = redis.hgetall "rs:m:phil:/" + metadata["e"].must_equal "rootetag" + metadata["m"].length.must_equal 13 - metadata = redis.hgetall "rs_meta:phil:food/" - metadata["etag"].must_equal "bla" - metadata["modified"].length.must_equal 13 + metadata = redis.hgetall "rs:m:phil:food/" + metadata["e"].must_equal "bla" + metadata["m"].length.must_equal 13 - food_items = redis.smembers "rs_meta:phil:food/:items" + food_items = redis.smembers "rs:m:phil:food/:items" food_items.each do |food_item| ["camaron", "aguacate"].must_include food_item end - root_items = redis.smembers "rs_meta:phil:/:items" + root_items = redis.smembers "rs:m:phil:/:items" root_items.must_equal ["food/"] end @@ -81,8 +81,8 @@ describe "App" do last_response.status.must_equal 200 - metadata = redis.hgetall "rs_meta:phil:food/aguacate" - metadata["size"].must_equal "2" + metadata = redis.hgetall "rs:m:phil:food/aguacate" + metadata["s"].must_equal "2" end it "conflicts when there is a directory with same name as document" do @@ -94,7 +94,7 @@ describe "App" do last_response.status.must_equal 409 - metadata = redis.hgetall "rs_meta:phil:food" + metadata = redis.hgetall "rs:m:phil:food" metadata.must_be_empty end @@ -107,7 +107,7 @@ describe "App" do last_response.status.must_equal 409 - metadata = redis.hgetall "rs_meta:phil:food/aguacate/empanado" + metadata = redis.hgetall "rs:m:phil:food/aguacate/empanado" metadata.must_be_empty end end @@ -115,7 +115,7 @@ describe "App" do describe "directory backend configuration" do context "locked new backed" do before do - redis.set "rs_config:dir_backend:phil", "new-locked" + redis.set "rsc:db:phil", "new-locked" end it "responds with 503" do @@ -123,14 +123,14 @@ describe "App" do last_response.status.must_equal 503 - metadata = redis.hgetall "rs_meta:phil:food/aguacate" + metadata = redis.hgetall "rs:m:phil:food/aguacate" metadata.must_be_empty end end context "locked legacy backend" do before do - redis.set "rs_config:dir_backend:phil", "legacy-locked" + redis.set "rsc:db:phil", "legacy-locked" end it "responds with 503" do @@ -138,7 +138,7 @@ describe "App" do last_response.status.must_equal 503 - metadata = redis.hgetall "rs_meta:phil:food/aguacate" + metadata = redis.hgetall "rs:m:phil:food/aguacate" metadata.must_be_empty end end @@ -150,7 +150,7 @@ describe "App" do before do purge_redis - redis.set "rs_config:dir_backend:phil", "new" + redis.set "rsc:db:phil", "new" end context "authorized" do @@ -178,12 +178,12 @@ describe "App" do end end - metadata = redis.hgetall "rs_meta:phil:food/aguacate" + metadata = redis.hgetall "rs:m:phil:food/aguacate" metadata.must_be_empty end it "deletes the directory objects metadata in redis" do - old_metadata = redis.hgetall "rs_meta:phil:food/" + old_metadata = redis.hgetall "rs:m:phil:food/" put_stub = OpenStruct.new(headers: {etag: "newetag"}) get_stub = OpenStruct.new(body: "rootbody") @@ -197,15 +197,15 @@ describe "App" do end end - metadata = redis.hgetall "rs_meta:phil:food/" - metadata["etag"].must_equal "newetag" - metadata["modified"].length.must_equal 13 - metadata["modified"].wont_equal old_metadata["modified"] + metadata = redis.hgetall "rs:m:phil:food/" + metadata["e"].must_equal "newetag" + metadata["m"].length.must_equal 13 + metadata["m"].wont_equal old_metadata["m"] - food_items = redis.smembers "rs_meta:phil:food/:items" + food_items = redis.smembers "rs:m:phil:food/:items" food_items.must_equal ["camaron"] - root_items = redis.smembers "rs_meta:phil:/:items" + root_items = redis.smembers "rs:m:phil:/:items" root_items.must_equal ["food/"] end @@ -223,13 +223,13 @@ describe "App" do end end - metadata = redis.hgetall "rs_meta:phil:food/" + metadata = redis.hgetall "rs:m:phil:food/" metadata.must_be_empty - food_items = redis.smembers "rs_meta:phil:food/:items" + food_items = redis.smembers "rs:m:phil:food/:items" food_items.must_be_empty - root_items = redis.smembers "rs_meta:phil:/:items" + root_items = redis.smembers "rs:m:phil:/:items" root_items.must_be_empty end end @@ -239,7 +239,7 @@ describe "App" do before do purge_redis - redis.set "rs_config:dir_backend:phil", "new" + redis.set "rsc:db:phil", "new" end context "authorized" do @@ -318,7 +318,7 @@ describe "App" do put "/phil/food/camaron", "yummi" end - redis.set "rs_config:dir_backend:phil", "legacy" + redis.set "rsc:db:phil", "legacy" end it "serves directory listing from Swift backend" do