Use opslog instead of counters

Account for eventual consistency by storing log items for
create/update/delete operations instead of using counter keys. We can
then map/reduce over the log items in order to extract category sizes
and object counts. Furthermore, we can combine single items from time to
time in order to keep things fast and tidy.
This commit is contained in:
Basti 2013-04-29 22:11:41 +02:00
parent c78be51e8f
commit eaa3dbfe83
3 changed files with 75 additions and 87 deletions

View File

@ -30,8 +30,8 @@ module RemoteStorage
@binary_bucket ||= client.bucket(settings.riak['buckets']['binaries']) @binary_bucket ||= client.bucket(settings.riak['buckets']['binaries'])
end end
def info_bucket def opslog_bucket
@info_bucket ||= client.bucket(LiquorCabinet.config['buckets']['info']) @opslog_bucket ||= client.bucket(settings.riak['buckets']['opslog'])
end end
def authorize_request(user, directory, token, listing=false) def authorize_request(user, directory, token, listing=false)
@ -106,8 +106,9 @@ module RemoteStorage
object.store object.store
log_object_count(user, directory, 1) unless object_exists log_action = object_exists ? "update" : "create"
log_object_size(user, directory, new_object_size, existing_object_size) log_operation(user, directory, log_action, new_object_size, existing_object_size)
update_all_directory_objects(user, directory, timestamp) update_all_directory_objects(user, directory, timestamp)
halt 200 halt 200
@ -126,8 +127,7 @@ module RemoteStorage
riak_response = data_bucket.delete("#{user}:#{directory}:#{key}") riak_response = data_bucket.delete("#{user}:#{directory}:#{key}")
if riak_response[:code] != 404 if riak_response[:code] != 404
log_object_count(user, directory, -1) log_operation(user, directory, "delete", 0, existing_object_size)
log_object_size(user, directory, 0, existing_object_size)
end end
timestamp = (Time.now.to_f * 1000).to_i timestamp = (Time.now.to_f * 1000).to_i
@ -161,31 +161,21 @@ module RemoteStorage
object object
end end
def log_object_size(user, directory, new_size=0, old_size=0) def log_operation(user, directory, action, new_size=0, old_size=0)
category = extract_category(directory) log_entry = opslog_bucket.new
info = info_bucket.get_or_new("usage:#{user}:#{category}") log_entry.content_type = "application/json"
info.content_type = "application/json" log_entry.data = {
info.data ||= {} "action" => action,
info.data["size"] ||= 0 "size" => (-old_size + new_size),
info.data["size"] += (-old_size + new_size) "category" => extract_category(directory)
info.indexes.merge!({:user_id_bin => [user]}) }
info.store log_entry.indexes.merge!({:user_id_bin => [user]})
end log_entry.store
def log_object_count(user, directory, change)
category = extract_category(directory)
info = info_bucket.get_or_new("usage:#{user}:#{category}")
info.content_type = "application/json"
info.data ||= {}
info.data["count"] ||= 0
info.data["count"] += change
info.indexes.merge!({:user_id_bin => [user]})
info.store
end end
def object_size(object) def object_size(object)
if binary_link = object.links.select {|l| l.tag == "binary"}.first if binary_link = object.links.select {|l| l.tag == "binary"}.first
response = head(LiquorCabinet.config['buckets']['binaries'], escape(binary_link.key)) response = head(settings.riak['buckets']['binaries'], escape(binary_link.key))
response[:headers]["content-length"].first.to_i response[:headers]["content-length"].first.to_i
else else
object.raw_data.nil? ? 0 : object.raw_data.size object.raw_data.nil? ? 0 : object.raw_data.size
@ -206,7 +196,7 @@ module RemoteStorage
# A URI object that can be used with HTTP backend methods # A URI object that can be used with HTTP backend methods
def riak_uri(bucket, key) def riak_uri(bucket, key)
rc = LiquorCabinet.config['riak'].symbolize_keys rc = settings.riak.symbolize_keys
URI.parse "http://#{rc[:host]}:#{rc[:http_port]}/riak/#{bucket}/#{key}" URI.parse "http://#{rc[:host]}:#{rc[:http_port]}/riak/#{bucket}/#{key}"
end end

View File

@ -1,14 +1,5 @@
require_relative "spec_helper" require_relative "spec_helper"
def set_usage_info(user, category, type, value)
object = info_bucket.get_or_new("usage:#{user}:#{category}")
object.content_type = "application/json"
data = object.data || {}
data.merge!(type => value)
object.data = data
object.store
end
describe "App with Riak backend" do describe "App with Riak backend" do
include Rack::Test::Methods include Rack::Test::Methods
@ -94,7 +85,6 @@ describe "App with Riak backend" do
describe "PUT" do describe "PUT" do
before do before do
header "Authorization", "Bearer 123" header "Authorization", "Bearer 123"
set_usage_info "jimmy", "documents", "size", 23
end end
describe "with implicit content type" do describe "with implicit content type" do
@ -112,12 +102,6 @@ describe "App with Riak backend" do
data_bucket.get("jimmy:documents:bar").content_type.must_equal "text/plain; charset=utf-8" data_bucket.get("jimmy:documents:bar").content_type.must_equal "text/plain; charset=utf-8"
end end
it "increases the usage size counter" do
usage = info_bucket.get("usage:jimmy:documents")
usage.data["size"].must_equal 35
usage.indexes["user_id_bin"].must_include "jimmy"
end
it "indexes the data set" do it "indexes the data set" do
indexes = data_bucket.get("jimmy:documents:bar").indexes indexes = data_bucket.get("jimmy:documents:bar").indexes
indexes["user_id_bin"].must_be_kind_of Set indexes["user_id_bin"].must_be_kind_of Set
@ -126,10 +110,15 @@ describe "App with Riak backend" do
indexes["directory_bin"].must_include "documents" indexes["directory_bin"].must_include "documents"
end end
# it "logs the operation" do it "logs the operation" do
# logs = storage_client.get_index("rs_opslog", "user_id_bin", "jimmy") objects = []
# logs.count.must_equal 1 opslog_bucket.keys.each { |k| objects << opslog_bucket.get(k) rescue nil }
# end
log_entry = objects.select{|o| o.data["action"] == "create"}.first
log_entry.data["size"].must_equal 12
log_entry.data["category"].must_equal "documents"
log_entry.indexes["user_id_bin"].must_include "jimmy"
end
end end
describe "with explicit content type" do describe "with explicit content type" do
@ -148,10 +137,6 @@ describe "App with Riak backend" do
data_bucket.get("jimmy:documents:jason").content_type.must_equal "application/json" data_bucket.get("jimmy:documents:jason").content_type.must_equal "application/json"
end end
it "increases the category size counter" do
info_bucket.get("usage:jimmy:documents").data["size"].must_equal 49
end
it "delivers the data correctly" do it "delivers the data correctly" do
header "Authorization", "Bearer 123" header "Authorization", "Bearer 123"
get "/jimmy/documents/jason" get "/jimmy/documents/jason"
@ -211,7 +196,6 @@ describe "App with Riak backend" do
describe "with existing content" do describe "with existing content" do
before do before do
set_usage_info "jimmy", "documents", "size", 10
put "/jimmy/documents/archive/foo", "lorem ipsum" put "/jimmy/documents/archive/foo", "lorem ipsum"
put "/jimmy/documents/archive/foo", "some awesome content" put "/jimmy/documents/archive/foo", "some awesome content"
end end
@ -221,15 +205,24 @@ describe "App with Riak backend" do
data_bucket.get("jimmy:documents/archive:foo").data.must_equal "some awesome content" data_bucket.get("jimmy:documents/archive:foo").data.must_equal "some awesome content"
end end
it "increases the category size counter" do it "logs the operations" do
info_bucket.get("usage:jimmy:documents").data["size"].must_equal 30 objects = []
opslog_bucket.keys.each { |k| objects << opslog_bucket.get(k) rescue nil }
create_entry = objects.select{|o| o.data["action"] == "create"}.first
create_entry.data["size"].must_equal 11
create_entry.data["category"].must_equal "documents"
create_entry.indexes["user_id_bin"].must_include "jimmy"
update_entry = objects.select{|o| o.data["action"] == "update"}.first
update_entry.data["size"].must_equal 9
update_entry.data["category"].must_equal "documents"
update_entry.indexes["user_id_bin"].must_include "jimmy"
end end
end end
describe "public data" do describe "public data" do
before do before do
set_usage_info "jimmy", "public/documents", "size", 10
set_usage_info "jimmy", "public/documents", "count", 100
put "/jimmy/public/documents/notes/foo", "note to self" put "/jimmy/public/documents/notes/foo", "note to self"
end end
@ -238,12 +231,14 @@ describe "App with Riak backend" do
data_bucket.get("jimmy:public/documents/notes:foo").data.must_equal "note to self" data_bucket.get("jimmy:public/documents/notes:foo").data.must_equal "note to self"
end end
it "increases the category size counter" do it "logs the operation" do
info_bucket.get("usage:jimmy:public/documents").data["size"].must_equal 22 objects = []
end opslog_bucket.keys.each { |k| objects << opslog_bucket.get(k) rescue nil }
it "increases the category object counter" do log_entry = objects.select{|o| o.data["action"] == "create"}.first
info_bucket.get("usage:jimmy:public/documents").data["count"].must_equal 101 log_entry.data["size"].must_equal 12
log_entry.data["category"].must_equal "public/documents"
log_entry.indexes["user_id_bin"].must_include "jimmy"
end end
end end
@ -270,10 +265,6 @@ describe "App with Riak backend" do
last_response.body.must_equal @image last_response.body.must_equal @image
end end
it "increases the category size counter" do
info_bucket.get("usage:jimmy:documents").data["size"].must_equal 16067
end
it "indexes the binary set" do it "indexes the binary set" do
indexes = binary_bucket.get("jimmy:documents:jaypeg").indexes indexes = binary_bucket.get("jimmy:documents:jaypeg").indexes
indexes["user_id_bin"].must_be_kind_of Set indexes["user_id_bin"].must_be_kind_of Set
@ -281,6 +272,16 @@ describe "App with Riak backend" do
indexes["directory_bin"].must_include "documents" indexes["directory_bin"].must_include "documents"
end end
it "logs the operation" do
objects = []
opslog_bucket.keys.each { |k| objects << opslog_bucket.get(k) rescue nil }
log_entry = objects.select{|o| o.data["action"] == "create"}.first
log_entry.data["size"].must_equal 16044
log_entry.data["category"].must_equal "documents"
log_entry.indexes["user_id_bin"].must_include "jimmy"
end
end end
context "no binary charset in content-type header" do context "no binary charset in content-type header" do
@ -358,8 +359,6 @@ describe "App with Riak backend" do
describe "DELETE" do describe "DELETE" do
before do before do
header "Authorization", "Bearer 123" header "Authorization", "Bearer 123"
set_usage_info "jimmy", "documents", "size", 123
set_usage_info "jimmy", "documents", "count", 1000
delete "/jimmy/documents/foo" delete "/jimmy/documents/foo"
end end
@ -370,25 +369,25 @@ describe "App with Riak backend" do
}.must_raise Riak::HTTPFailedRequest }.must_raise Riak::HTTPFailedRequest
end end
it "decreases the category size counter" do it "logs the operation" do
info_bucket.get("usage:jimmy:documents").data["size"].must_equal 101 objects = []
end opslog_bucket.keys.each { |k| objects << opslog_bucket.get(k) rescue nil }
it "decreases the category object counter" do log_entry = objects.select{|o| o.data["action"] == "delete"}.first
info_bucket.get("usage:jimmy:documents").data["count"].must_equal 999 log_entry.data["size"].must_equal(-22)
log_entry.data["category"].must_equal "documents"
log_entry.indexes["user_id_bin"].must_include "jimmy"
end end
context "non-existing object" do context "non-existing object" do
before do before do
set_usage_info "jimmy", "documents", "size", 10
set_usage_info "jimmy", "documents", "count", 10
delete "/jimmy/documents/foozius" delete "/jimmy/documents/foozius"
end end
it "doesn't change the category usage info" do it "doesn't log the operation" do
usage = info_bucket.get("usage:jimmy:documents").data objects = []
usage["size"].must_equal 10 opslog_bucket.keys.each { |k| objects << opslog_bucket.get(k) rescue nil }
usage["count"].must_equal 10 objects.select{|o| o.data["action"] == "delete"}.size.must_equal 1
end end
end end
@ -398,8 +397,6 @@ describe "App with Riak backend" do
filename = File.join(File.expand_path(File.dirname(__FILE__)), "fixtures", "rockrule.jpeg") filename = File.join(File.expand_path(File.dirname(__FILE__)), "fixtures", "rockrule.jpeg")
@image = File.open(filename, "r").read @image = File.open(filename, "r").read
put "/jimmy/documents/jaypeg", @image put "/jimmy/documents/jaypeg", @image
set_usage_info "jimmy", "documents", "size", 100000
set_usage_info "jimmy", "documents", "count", 10
delete "/jimmy/documents/jaypeg" delete "/jimmy/documents/jaypeg"
end end
@ -418,12 +415,13 @@ describe "App with Riak backend" do
}.must_raise Riak::HTTPFailedRequest }.must_raise Riak::HTTPFailedRequest
end end
it "decreases the category size counter" do it "logs the operation" do
info_bucket.get("usage:jimmy:documents").data["size"].must_equal 83956 objects = []
end opslog_bucket.keys.each { |k| objects << opslog_bucket.get(k) rescue nil }
it "decreases the category object counter" do log_entry = objects.select{|o| o.data["action"] == "delete" && o.data["size"] == -16044}.first
info_bucket.get("usage:jimmy:documents").data["count"].must_equal 9 log_entry.data["category"].must_equal "documents"
log_entry.indexes["user_id_bin"].must_include "jimmy"
end end
end end
end end

View File

@ -58,7 +58,7 @@ if app.settings.riak
end end
def purge_all_buckets def purge_all_buckets
[data_bucket, directory_bucket, auth_bucket, binary_bucket].each do |bucket| [data_bucket, directory_bucket, auth_bucket, binary_bucket, opslog_bucket].each do |bucket|
bucket.keys.each {|key| bucket.delete key} bucket.keys.each {|key| bucket.delete key}
end end
end end