Merge pull request #73 from 5apps/features/redis_migration

Migration script for copying metadata to Redis
This commit is contained in:
galfert 2016-03-02 23:47:42 +01:00
commit d8fc27c8dd
4 changed files with 317 additions and 71 deletions

View File

@ -88,11 +88,11 @@ module RemoteStorage
lua_script = <<-EOF
local user = ARGV[1]
local directory = ARGV[2]
local items = redis.call("smembers", "rs_meta:"..user..":"..directory.."/:items")
local items = redis.call("smembers", "rs:m:"..user..":"..directory.."/:items")
local listing = {}
for index, name in pairs(items) do
local redis_key = "rs_meta:"..user..":"
local redis_key = "rs:m:"..user..":"
if directory == "" then
redis_key = redis_key..name
else
@ -108,10 +108,10 @@ module RemoteStorage
metadata[metadata_values[idx]] = metadata_values[idx + 1]
end
listing[name] = {["ETag"] = metadata["etag"]}
listing[name] = {["ETag"] = metadata["e"]}
if string.sub(name, -1) ~= "/" then
listing[name]["Content-Type"] = metadata["type"]
listing[name]["Content-Length"] = tonumber(metadata["size"])
listing[name]["Content-Type"] = metadata["t"]
listing[name]["Content-Length"] = tonumber(metadata["s"])
end
end
@ -122,7 +122,7 @@ module RemoteStorage
end
def get_directory_listing_from_redis(user, directory)
etag = redis.hget "rs_meta:#{user}:#{directory}/", "etag"
etag = redis.hget "rs:m:#{user}:#{directory}/", "e"
none_match = (server.env["HTTP_IF_NONE_MATCH"] || "").split(",").map(&:strip)
server.halt 304 if none_match.include? etag
@ -189,16 +189,18 @@ module RemoteStorage
res = do_put_request(url, data, content_type)
# TODO get last modified from response and add to metadata
# TODO use actual last modified time from the document put request
timestamp = (Time.now.to_f * 1000).to_i
metadata = {
etag: res.headers[:etag],
size: data.size,
type: content_type
e: res.headers[:etag],
s: data.size,
t: content_type,
m: timestamp
}
if update_metadata_object(user, directory, key, metadata) &&
# TODO provide the last modified to use for the dir objects as well
update_dir_objects(user, directory)
update_dir_objects(user, directory, timestamp)
server.headers["ETag"] = %Q("#{res.headers[:etag]}")
server.halt 200
else
@ -312,23 +314,23 @@ module RemoteStorage
end
-- check for existing directory with the same name as the document
local redis_key = "rs_meta:"..user..":"
local redis_key = "rs:m:"..user..":"
if directory == "" then
redis_key = redis_key..key.."/"
else
redis_key = redis_key..directory.."/"..key.."/"
end
if redis.call("hget", redis_key, "etag") then
if redis.call("hget", redis_key, "e") then
return true
end
for index, dir in pairs(parent_directories) do
if redis.call("hget", "rs_meta:"..user..":"..dir.."/", "etag") then
if redis.call("hget", "rs:m:"..user..":"..dir.."/", "e") then
-- the directory already exists, no need to do further checks
return false
else
-- check for existing document with same name as directory
if redis.call("hget", "rs_meta:"..user..":"..dir, "etag") then
if redis.call("hget", "rs:m:"..user..":"..dir, "e") then
return true
end
end
@ -400,17 +402,14 @@ module RemoteStorage
end
def update_metadata_object(user, directory, key, metadata)
redis_key = "rs_meta:#{user}:#{directory}/#{key}"
redis_key = "rs:m:#{user}:#{directory}/#{key}"
redis.hmset(redis_key, *metadata)
redis.sadd "rs_meta:#{user}:#{directory}/:items", key
redis.sadd "rs:m:#{user}:#{directory}/:items", key
true
end
def update_dir_objects(user, directory)
# TODO use actual last modified time from the document put request
timestamp = (Time.now.to_f * 1000).to_i
def update_dir_objects(user, directory, timestamp)
parent_directories_for(directory).each do |dir|
unless dir == ""
res = do_put_request("#{url_for_directory(user, dir)}/", timestamp.to_s, "text/plain")
@ -420,10 +419,10 @@ module RemoteStorage
etag = etag_for(get_response.body)
end
key = "rs_meta:#{user}:#{dir}/"
metadata = {etag: etag, modified: timestamp}
key = "rs:m:#{user}:#{dir}/"
metadata = {e: etag, m: timestamp}
redis.hmset(key, *metadata)
redis.sadd "rs_meta:#{user}:#{parent_directory_for(dir)}:items", "#{top_directory(dir)}/"
redis.sadd "rs:m:#{user}:#{parent_directory_for(dir)}:items", "#{top_directory(dir)}/"
end
true
@ -438,21 +437,22 @@ module RemoteStorage
end
def delete_metadata_objects(user, directory, key)
redis_key = "rs_meta:#{user}:#{directory}/#{key}"
redis_key = "rs:m:#{user}:#{directory}/#{key}"
redis.del(redis_key)
redis.srem "rs_meta:#{user}:#{directory}/:items", key
redis.srem "rs:m:#{user}:#{directory}/:items", key
end
def delete_dir_objects(user, directory)
timestamp = (Time.now.to_f * 1000).to_i
parent_directories_for(directory).each do |dir|
if dir_empty?(user, dir)
unless dir == ""
do_delete_request("#{url_for_directory(user, dir)}/")
end
redis.del "rs_meta:#{user}:#{directory}/"
redis.srem "rs_meta:#{user}:#{parent_directory_for(dir)}:items", "#{dir}/"
redis.del "rs:m:#{user}:#{directory}/"
redis.srem "rs:m:#{user}:#{parent_directory_for(dir)}:items", "#{dir}/"
else
timestamp = (Time.now.to_f * 1000).to_i
unless dir == ""
res = do_put_request("#{url_for_directory(user, dir)}/", timestamp.to_s, "text/plain")
etag = res.headers[:etag]
@ -460,15 +460,15 @@ module RemoteStorage
get_response = do_get_request("#{container_url_for(user)}/?format=json&path=")
etag = etag_for(get_response.body)
end
metadata = {etag: etag, modified: timestamp}
redis.hmset("rs_meta:#{user}:#{dir}/", *metadata)
metadata = {e: etag, m: timestamp}
redis.hmset("rs:m:#{user}:#{dir}/", *metadata)
end
end
end
def dir_empty?(user, dir)
if directory_backend(user).match(/new/)
redis.smembers("rs_meta:#{user}:#{dir}/:items").empty?
redis.smembers("rs:m:#{user}:#{dir}/:items").empty?
else
do_get_request("#{container_url_for(user)}/?format=plain&limit=1&path=#{escape(dir)}/") do |res|
return res.headers[:content_length] == "0"
@ -538,7 +538,7 @@ module RemoteStorage
end
def directory_backend(user)
@directory_backend ||= redis.get("rs_config:dir_backend:#{user}") || "legacy"
@directory_backend ||= redis.get("rsc:db:#{user}") || "legacy"
end
def etag_for(body)

246
migrate_metadata_to_redis.rb Executable file
View File

@ -0,0 +1,246 @@
#!/usr/bin/env ruby
require "rest_client"
require "redis"
require "yaml"
require "logger"
require "active_support/core_ext/hash"
class Migrator
attr_accessor :username, :base_url, :swift_host, :swift_token,
:environment, :dry_run, :settings, :logger
def initialize(username)
@username = username
@environment = ENV["ENVIRONMENT"] || "staging"
@settings = YAML.load(File.read('config.yml'))[@environment]
@swift_host = @settings["swift"]["host"]
@swift_token = File.read("tmp/swift_token.txt").strip
@dry_run = ENV["DRYRUN"] || false # disables writing anything to Redis when true
@logger = Logger.new("log/migrate_metadata_to_redis.log")
log_level = ENV["LOGLEVEL"] || "INFO"
logger.level = Kernel.const_get "Logger::#{log_level}"
logger.progname = username
end
def root_url
"#{@base_url}/#{@username}"
end
def is_dir?(name)
name[-1] == "/"
end
def url_for(directory, parent_directory="")
"#{root_url}#{parent_directory}#{directory}"
end
def migrate
logger.info "Starting migration for '#{username}'"
set_directory_backend("legacy_locked")
begin
work_on_dir("", "")
rescue Exception => ex
logger.error "Error migrating metadata for '#{username}': #{ex}"
set_directory_backend("legacy")
# write username to file for later reference
File.open('log/failed_migration.log', 'a') { |f| f.puts username }
exit 1
end
set_directory_backend("new")
logger.info "Finished migration for '#{username}'"
end
def set_directory_backend(backend)
redis.set("rsc:db:#{username}", backend) unless dry_run
end
def work_on_dir(directory, parent_directory)
logger.debug "Retrieving listing for '#{parent_directory}#{directory}'"
listing = get_directory_listing_from_swift("#{parent_directory}#{directory}")
timestamp = (Time.now.to_f * 1000).to_i
if listing["items"].any?
items = listing["items"]
items.each do |item, data|
if is_dir? item
save_directory_data("#{parent_directory}#{directory}", item, data, timestamp)
# get dir listing and repeat
work_on_dir(item, "#{parent_directory}#{directory}")
else
save_document_data("#{parent_directory}#{directory}", item, data)
end
add_item_to_parent_dir("#{parent_directory}#{directory}", item)
end
end
end
def add_item_to_parent_dir(dir, item)
key = "rs:m:#{username}:#{parent_directory_for(dir)}:items"
logger.debug "Adding item #{item} to #{key}"
redis.sadd(key, item) unless dry_run
end
def save_directory_data(dir, item, data, timestamp)
key = "rs:m:#{username}:#{dir.gsub(/^\//, "")}#{item}"
metadata = {
e: data["ETag"],
m: timestamp_for(data["Last-Modified"])
}
logger.debug "Metadata for dir #{key}: #{metadata}"
redis.hmset(key, *metadata) unless dry_run
end
def save_document_data(dir, item, data)
key = "rs:m:#{username}:#{dir.gsub(/^\//, "")}#{item}"
metadata = {
e: data["ETag"],
s: data["Content-Length"],
t: data["Content-Type"],
m: timestamp_for(data["Last-Modified"])
}
logger.debug "Metadata for document #{key}: #{metadata}"
redis.hmset(key, *metadata) unless dry_run
end
def parent_directory_for(directory)
if directory.match(/\//)
return directory[0..directory.rindex("/")]
else
return "/"
end
end
def timestamp_for(date)
return DateTime.parse(date).strftime("%Q").to_i
end
def redis
@redis ||= Redis.new(@settings["redis"].symbolize_keys)
end
def get_directory_listing_from_swift(directory)
is_root_listing = directory.empty?
get_response = nil
do_head_request("#{url_for_directory(@username, directory)}") do |response|
return directory_listing([]) if response.code == 404
if is_root_listing
get_response = do_get_request("#{container_url_for(@username)}/?format=json&path=")
else
get_response = do_get_request("#{container_url_for(@username)}/?format=json&path=#{escape(directory)}")
end
end
if body = JSON.parse(get_response.body)
listing = directory_listing(body)
else
puts "listing not JSON"
end
listing
end
def directory_listing(res_body)
listing = {
"@context" => "http://remotestorage.io/spec/folder-description",
"items" => {}
}
res_body.each do |entry|
name = entry["name"]
name.sub!("#{File.dirname(entry["name"])}/", '')
if name[-1] == "/" # It's a directory
listing["items"].merge!({
name => {
"ETag" => entry["hash"],
"Last-Modified" => entry["last_modified"]
}
})
else # It's a file
listing["items"].merge!({
name => {
"ETag" => entry["hash"],
"Content-Type" => entry["content_type"],
"Content-Length" => entry["bytes"],
"Last-Modified" => entry["last_modified"]
}
})
end
end
listing
end
def etag_for(body)
objects = JSON.parse(body)
if objects.empty?
Digest::MD5.hexdigest ""
else
Digest::MD5.hexdigest objects.map { |o| o["hash"] }.join
end
end
def do_head_request(url, &block)
RestClient.head(url, default_headers, &block)
end
def do_get_request(url, &block)
RestClient.get(url, default_headers, &block)
end
def default_headers
{"x-auth-token" => @swift_token}
end
def url_for_directory(user, directory)
if directory.empty?
container_url_for(user)
else
"#{container_url_for(user)}/#{escape(directory)}"
end
end
def container_url_for(user)
"#{base_url}/#{container_for(user)}"
end
def base_url
@base_url ||= @swift_host
end
def container_for(user)
"rs:#{environment.to_s.chars.first}:#{user}"
end
def escape(url)
# We want spaces to turn into %20 and slashes to stay slashes
CGI::escape(url).gsub('+', '%20').gsub('%2F', '/')
end
end
username = ARGV[0]
unless username
puts "No username given."
puts "Usage:"
puts "ENVIRONMENT=staging ./migrate_metadata_to_redis.rb <username>"
exit 1
end
migrator = Migrator.new username
migrator.migrate

View File

@ -39,7 +39,7 @@ if app.settings.respond_to? :redis
end
def purge_redis
redis.keys("rs_*").each do |key|
redis.keys("rs*").each do |key|
redis.del key
end
end

View File

@ -16,7 +16,7 @@ describe "App" do
before do
purge_redis
redis.set "rs_config:dir_backend:phil", "new"
redis.set "rsc:db:phil", "new"
end
context "authorized" do
@ -31,11 +31,11 @@ describe "App" do
put "/phil/food/aguacate", "si"
end
metadata = redis.hgetall "rs_meta:phil:food/aguacate"
metadata["size"].must_equal "2"
metadata["type"].must_equal "text/plain; charset=utf-8"
metadata["etag"].must_equal "bla"
metadata["modified"].must_equal nil
metadata = redis.hgetall "rs:m:phil:food/aguacate"
metadata["s"].must_equal "2"
metadata["t"].must_equal "text/plain; charset=utf-8"
metadata["e"].must_equal "bla"
metadata["m"].length.must_equal 13
end
it "creates the directory objects metadata in redis" do
@ -50,20 +50,20 @@ describe "App" do
end
end
metadata = redis.hgetall "rs_meta:phil:/"
metadata["etag"].must_equal "rootetag"
metadata["modified"].length.must_equal 13
metadata = redis.hgetall "rs:m:phil:/"
metadata["e"].must_equal "rootetag"
metadata["m"].length.must_equal 13
metadata = redis.hgetall "rs_meta:phil:food/"
metadata["etag"].must_equal "bla"
metadata["modified"].length.must_equal 13
metadata = redis.hgetall "rs:m:phil:food/"
metadata["e"].must_equal "bla"
metadata["m"].length.must_equal 13
food_items = redis.smembers "rs_meta:phil:food/:items"
food_items = redis.smembers "rs:m:phil:food/:items"
food_items.each do |food_item|
["camaron", "aguacate"].must_include food_item
end
root_items = redis.smembers "rs_meta:phil:/:items"
root_items = redis.smembers "rs:m:phil:/:items"
root_items.must_equal ["food/"]
end
@ -81,8 +81,8 @@ describe "App" do
last_response.status.must_equal 200
metadata = redis.hgetall "rs_meta:phil:food/aguacate"
metadata["size"].must_equal "2"
metadata = redis.hgetall "rs:m:phil:food/aguacate"
metadata["s"].must_equal "2"
end
it "conflicts when there is a directory with same name as document" do
@ -94,7 +94,7 @@ describe "App" do
last_response.status.must_equal 409
metadata = redis.hgetall "rs_meta:phil:food"
metadata = redis.hgetall "rs:m:phil:food"
metadata.must_be_empty
end
@ -107,7 +107,7 @@ describe "App" do
last_response.status.must_equal 409
metadata = redis.hgetall "rs_meta:phil:food/aguacate/empanado"
metadata = redis.hgetall "rs:m:phil:food/aguacate/empanado"
metadata.must_be_empty
end
end
@ -115,7 +115,7 @@ describe "App" do
describe "directory backend configuration" do
context "locked new backed" do
before do
redis.set "rs_config:dir_backend:phil", "new-locked"
redis.set "rsc:db:phil", "new-locked"
end
it "responds with 503" do
@ -123,14 +123,14 @@ describe "App" do
last_response.status.must_equal 503
metadata = redis.hgetall "rs_meta:phil:food/aguacate"
metadata = redis.hgetall "rs:m:phil:food/aguacate"
metadata.must_be_empty
end
end
context "locked legacy backend" do
before do
redis.set "rs_config:dir_backend:phil", "legacy-locked"
redis.set "rsc:db:phil", "legacy-locked"
end
it "responds with 503" do
@ -138,7 +138,7 @@ describe "App" do
last_response.status.must_equal 503
metadata = redis.hgetall "rs_meta:phil:food/aguacate"
metadata = redis.hgetall "rs:m:phil:food/aguacate"
metadata.must_be_empty
end
end
@ -150,7 +150,7 @@ describe "App" do
before do
purge_redis
redis.set "rs_config:dir_backend:phil", "new"
redis.set "rsc:db:phil", "new"
end
context "authorized" do
@ -178,12 +178,12 @@ describe "App" do
end
end
metadata = redis.hgetall "rs_meta:phil:food/aguacate"
metadata = redis.hgetall "rs:m:phil:food/aguacate"
metadata.must_be_empty
end
it "deletes the directory objects metadata in redis" do
old_metadata = redis.hgetall "rs_meta:phil:food/"
old_metadata = redis.hgetall "rs:m:phil:food/"
put_stub = OpenStruct.new(headers: {etag: "newetag"})
get_stub = OpenStruct.new(body: "rootbody")
@ -197,15 +197,15 @@ describe "App" do
end
end
metadata = redis.hgetall "rs_meta:phil:food/"
metadata["etag"].must_equal "newetag"
metadata["modified"].length.must_equal 13
metadata["modified"].wont_equal old_metadata["modified"]
metadata = redis.hgetall "rs:m:phil:food/"
metadata["e"].must_equal "newetag"
metadata["m"].length.must_equal 13
metadata["m"].wont_equal old_metadata["m"]
food_items = redis.smembers "rs_meta:phil:food/:items"
food_items = redis.smembers "rs:m:phil:food/:items"
food_items.must_equal ["camaron"]
root_items = redis.smembers "rs_meta:phil:/:items"
root_items = redis.smembers "rs:m:phil:/:items"
root_items.must_equal ["food/"]
end
@ -223,13 +223,13 @@ describe "App" do
end
end
metadata = redis.hgetall "rs_meta:phil:food/"
metadata = redis.hgetall "rs:m:phil:food/"
metadata.must_be_empty
food_items = redis.smembers "rs_meta:phil:food/:items"
food_items = redis.smembers "rs:m:phil:food/:items"
food_items.must_be_empty
root_items = redis.smembers "rs_meta:phil:/:items"
root_items = redis.smembers "rs:m:phil:/:items"
root_items.must_be_empty
end
end
@ -239,7 +239,7 @@ describe "App" do
before do
purge_redis
redis.set "rs_config:dir_backend:phil", "new"
redis.set "rsc:db:phil", "new"
end
context "authorized" do
@ -318,7 +318,7 @@ describe "App" do
put "/phil/food/camaron", "yummi"
end
redis.set "rs_config:dir_backend:phil", "legacy"
redis.set "rsc:db:phil", "legacy"
end
it "serves directory listing from Swift backend" do