Files
chef/site-cookbooks/kosmos_prometheus/recipes/server.rb
T
raucao 821b56e6ab Set node name as prometheus instance name
And DRY up the recipe so it's easy and expressive to add more
metrics/targets
2026-07-05 12:41:46 +02:00

134 lines
3.0 KiB
Ruby

#
# Cookbook:: kosmos_prometheus
# Recipe:: server
#
include_recipe "firewall"
version = node["kosmos_prometheus"]["version"]
checksum = node["kosmos_prometheus"]["checksum"]
tarball = "#{Chef::Config[:file_cache_path]}/prometheus-#{version}.linux-amd64.tar.gz"
binary_url = "https://github.com/prometheus/prometheus/releases/download/v#{version}/prometheus-#{version}.linux-amd64.tar.gz"
group "prometheus"
user "prometheus" do
gid "prometheus"
system true
shell "/bin/false"
home "/nonexistent"
end
directory "/var/lib/prometheus" do
owner "prometheus"
group "prometheus"
mode "0755"
recursive true
end
directory "/etc/prometheus" do
owner "root"
group "root"
mode "0755"
recursive true
end
directory "/etc/prometheus/rules" do
owner "root"
group "root"
mode "0755"
recursive true
end
package %w(tar bzip2)
remote_file tarball do
source binary_url
checksum checksum
action :create
notifies :run, "execute[install_prometheus]", :immediately
end
execute "install_prometheus" do
command "tar -xzf #{tarball} -C /usr/local/bin --strip-components=1 prometheus-#{version}.linux-amd64/prometheus"
action :nothing
notifies :restart, "service[prometheus]", :delayed
end
file "/usr/local/bin/prometheus" do
owner "root"
group "root"
mode "0755"
notifies :restart, "service[prometheus]", :delayed
end
jobs = node["kosmos_prometheus"]["jobs"].merge(
{
# node exporter
"node" => {
"query" => "role:base",
"port" => 9100
},
# garage metrics
"garage" => {
"query" => "role:garage_node",
"port" => 3903
},
}.transform_values do |config|
{
"targets" => search(:node, config["query"]).map do |n|
target = { "target" => "#{n['knife_zero']['host']}:#{config['port']}", "instance" => n.name }
target["env"] = n.chef_environment if n.chef_environment
target
end.compact.sort_by { |t| t["instance"] },
}
end
)
template "/etc/prometheus/prometheus.yml" do
source "prometheus.yml.erb"
owner "root"
group "prometheus"
mode "0644"
variables(
global_config: node["kosmos_prometheus"]["global"],
jobs: jobs,
rule_files: node["kosmos_prometheus"]["rule_files"]
)
notifies :reload, "service[prometheus]", :delayed
end
systemd_unit "prometheus.service" do
content({
Unit: {
Description: "Prometheus",
After: "network.target",
},
Service: {
Type: "simple",
User: "prometheus",
Group: "prometheus",
ExecStart: "/usr/local/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/var/lib/prometheus --storage.tsdb.retention.time=15d --web.listen-address=:9090 --web.enable-lifecycle",
ExecReload: "/bin/kill -HUP $MAINPID",
Restart: "on-failure",
RestartSec: "5",
},
Install: {
WantedBy: "multi-user.target",
},
})
triggers_reload true
action :create
end
service "prometheus" do
action [:enable, :start]
end
firewall_rule "prometheus web" do
port 9090
source "10.1.1.0/24"
protocol :tcp
command :allow
end