Use data directly from Swift for metadata migration

This commit is contained in:
Garret Alfert 2016-02-22 16:08:53 +01:00
parent 5370df9c75
commit 54176b6928

View File

@ -2,49 +2,45 @@
require "rest_client"
require "redis"
require "yaml"
class Migrator
attr_accessor :username, :token, :base_url
attr_accessor :username, :base_url, :swift_host, :swift_token,
:environment, :dry_run, :logging, :settings
def initialize(username, token)
def initialize(username)
@username = username
@token = token
@base_url = "https://storage.5apps.com"
end
def configure_redis(redis_config)
@redis_config = redis_config
@environment = ENV["ENVIRONMENT"] || "staging"
@settings = YAML.load(File.read('config.yml'))[@environment]
@swift_host = @settings["swift"]["host"]
@swift_token = File.read("tmp/swift_token.txt")
@dry_run = false # disables writing anything to Redis when true
@logging = true
end
def root_url
"#{@base_url}/#{@username}"
end
def headers
{"authorization" => "Bearer #{@token}"}
end
def is_dir?(name)
name[-1] == "/"
end
def url_for(directory, parent_directory="")
# base_path = [root_url, parent_directory].join("/")
"#{root_url}#{parent_directory}#{directory}"
end
def migrate
work_on_dir("", "/")
work_on_dir("", "")
end
def work_on_dir(directory, parent_directory)
url = url_for(directory, parent_directory)
puts "retrieving listing for '#{parent_directory}#{directory}'" if logging
# puts "work on dir: #{url}"
response = RestClient.get(url, headers)
listing = JSON.parse(response.body)
listing = get_directory_listing_from_swift("#{parent_directory}#{directory}")
timestamp = (Time.now.to_f * 1000).to_i
@ -67,16 +63,16 @@ class Migrator
def add_item_to_parent_dir(dir, item)
key = "rs_meta:#{username}:#{parent_directory_for(dir)}:items"
# puts "adding item #{item} to #{key}"
redis.sadd key, item
puts "adding item #{item} to #{key}" if logging
redis.sadd(key, item) unless dry_run
end
def save_directory_data(dir, item, data, timestamp)
key = "rs_meta:#{username}:#{dir.gsub(/^\//, "")}#{item}"
metadata = {etag: data["ETag"], modified: timestamp}
# puts "metadata for dir #{key}: #{metadata}"
redis.hmset(key, *metadata)
puts "metadata for dir #{key}: #{metadata}" if logging
redis.hmset(key, *metadata) unless dry_run
end
def save_document_data(dir, item, data, timestamp)
@ -87,31 +83,132 @@ class Migrator
type: data["Content-Type"],
modified: timestamp
}
# puts "metadata for document #{key}: #{metadata}"
redis.hmset(key, *metadata)
puts "metadata for document #{key}: #{metadata}" if logging
redis.hmset(key, *metadata) unless dry_run
end
def parent_directory_for(directory)
return directory if directory == "/"
return directory[0..directory.rindex("/")].gsub(/^\//, "")
if directory.match(/\//)
return directory[0..directory.rindex("/")]
else
return "/"
end
end
def redis
@redis ||= Redis.new(@redis_config)
@redis ||= Redis.new(@settings["redis"])
end
def get_directory_listing_from_swift(directory)
is_root_listing = directory.empty?
get_response = nil
do_head_request("#{url_for_directory(@username, directory)}") do |response|
return directory_listing([]) if response.code == 404
if is_root_listing
get_response = do_get_request("#{container_url_for(@username)}/?format=json&path=")
else
get_response = do_get_request("#{container_url_for(@username)}/?format=json&path=#{escape(directory)}/")
end
end
if body = JSON.parse(get_response.body)
listing = directory_listing(body)
else
puts "listing not JSON"
end
listing
end
def directory_listing(res_body)
listing = {
"@context" => "http://remotestorage.io/spec/folder-description",
"items" => {}
}
res_body.each do |entry|
name = entry["name"]
name.sub!("#{File.dirname(entry["name"])}/", '')
if name[-1] == "/" # It's a directory
listing["items"].merge!({
name => {
"ETag" => entry["hash"],
}
})
else # It's a file
listing["items"].merge!({
name => {
"ETag" => entry["hash"],
"Content-Type" => entry["content_type"],
"Content-Length" => entry["bytes"]
}
})
end
end
listing
end
def etag_for(body)
objects = JSON.parse(body)
if objects.empty?
Digest::MD5.hexdigest ""
else
Digest::MD5.hexdigest objects.map { |o| o["hash"] }.join
end
end
def do_head_request(url, &block)
RestClient.head(url, default_headers, &block)
end
def do_get_request(url, &block)
RestClient.get(url, default_headers, &block)
end
def default_headers
{"x-auth-token" => @swift_token}
end
def url_for_directory(user, directory)
if directory.empty?
container_url_for(user)
else
"#{container_url_for(user)}/#{escape(directory)}"
end
end
def container_url_for(user)
"#{base_url}/#{container_for(user)}"
end
def base_url
@base_url ||= @swift_host
end
def container_for(user)
"rs:#{environment.to_s.chars.first}:#{user}"
end
def escape(url)
# We want spaces to turn into %20 and slashes to stay slashes
CGI::escape(url).gsub('+', '%20').gsub('%2F', '/')
end
end
username = ARGV[0]
token = ARGV[1]
migrator = Migrator.new username, token
migrator.configure_redis({host: "localhost", port: 6379})
unless username
puts "No username given."
puts "Usage:"
puts "ENVIRONMENT=staging ./migrate_metadata_to_redis.rb <username>"
exit 1
end
migrator = Migrator.new username
migrator.migrate