Remove upstream prometheus cookbook, migrate to our own

This commit is contained in:
2026-07-04 15:27:18 +02:00
parent 2d835335b5
commit 63534e1cf5
37 changed files with 230 additions and 1480 deletions
@@ -5,20 +5,89 @@
include_recipe "firewall"
prometheus_alertmanager_install "alertmanager" do
version node["prometheus"]["alertmanager"]["version"]
checksum node["prometheus"]["alertmanager"]["checksum"]
version = node["kosmos_prometheus"]["alertmanager"]["version"]
checksum = node["kosmos_prometheus"]["alertmanager"]["checksum"]
tarball = "#{Chef::Config[:file_cache_path]}/alertmanager-#{version}.linux-amd64.tar.gz"
binary_url = "https://github.com/prometheus/alertmanager/releases/download/v#{version}/alertmanager-#{version}.linux-amd64.tar.gz"
group "alertmanager"
user "alertmanager" do
gid "alertmanager"
system true
shell "/bin/false"
home "/nonexistent"
end
prometheus_alertmanager_config "alertmanager"
directory "/var/lib/alertmanager" do
owner "alertmanager"
group "alertmanager"
mode "0755"
recursive true
end
execute "restart alertmanager config" do
command "systemctl restart alertmanager.service"
directory "/etc/prometheus" do
owner "root"
group "root"
mode "0755"
recursive true
end
package %w(tar bzip2)
remote_file tarball do
source binary_url
checksum checksum
action :create
notifies :run, "execute[install_alertmanager]", :immediately
end
execute "install_alertmanager" do
command "tar -xzf #{tarball} -C /usr/local/bin --strip-components=1 alertmanager-#{version}.linux-amd64/alertmanager"
action :nothing
subscribes :run, "template[/opt/prometheus/alertmanager.yml]", :delayed
notifies :restart, "service[alertmanager]", :delayed
end
prometheus_alertmanager_service "alertmanager"
file "/usr/local/bin/alertmanager" do
owner "root"
group "root"
mode "0755"
notifies :restart, "service[alertmanager]", :delayed
end
template "/etc/prometheus/alertmanager.yml" do
source "alertmanager.yml.erb"
owner "root"
group "alertmanager"
mode "0644"
notifies :restart, "service[alertmanager]", :delayed
end
systemd_unit "alertmanager.service" do
content({
Unit: {
Description: "Prometheus Alertmanager",
After: "network.target",
},
Service: {
Type: "simple",
User: "alertmanager",
Group: "alertmanager",
ExecStart: "/usr/local/bin/alertmanager --config.file=/etc/prometheus/alertmanager.yml --storage.path=/var/lib/alertmanager --web.listen-address=:9093",
Restart: "on-failure",
RestartSec: "5",
},
Install: {
WantedBy: "multi-user.target",
},
})
triggers_reload true
action :create
end
service "alertmanager" do
action [:enable, :start]
end
firewall_rule "prometheus alertmanager" do
port 9093
@@ -5,8 +5,8 @@
include_recipe "firewall"
version = node["prometheus"]["node_exporter"]["version"]
checksum = node["prometheus"]["node_exporter"]["checksum"]
version = node["kosmos_prometheus"]["node_exporter"]["version"]
checksum = node["kosmos_prometheus"]["node_exporter"]["checksum"]
tarball = "#{Chef::Config[:file_cache_path]}/node_exporter-#{version}.linux-amd64.tar.gz"
binary_url = "https://github.com/prometheus/node_exporter/releases/download/v#{version}/node_exporter-#{version}.linux-amd64.tar.gz"
@@ -5,45 +5,101 @@
include_recipe "firewall"
prometheus_install "prometheus" do
version node["prometheus"]["version"]
checksum node["prometheus"]["checksum"]
version = node["kosmos_prometheus"]["version"]
checksum = node["kosmos_prometheus"]["checksum"]
tarball = "#{Chef::Config[:file_cache_path]}/prometheus-#{version}.linux-amd64.tar.gz"
binary_url = "https://github.com/prometheus/prometheus/releases/download/v#{version}/prometheus-#{version}.linux-amd64.tar.gz"
group "prometheus"
user "prometheus" do
gid "prometheus"
system true
shell "/bin/false"
home "/nonexistent"
end
prometheus_config "prometheus" do
global_config(
"scrape_interval" => "30s",
"evaluation_interval" => "30s"
directory "/var/lib/prometheus" do
owner "prometheus"
group "prometheus"
mode "0755"
recursive true
end
directory "/etc/prometheus" do
owner "root"
group "root"
mode "0755"
recursive true
end
directory "/etc/prometheus/rules" do
owner "root"
group "root"
mode "0755"
recursive true
end
package %w(tar bzip2)
remote_file tarball do
source binary_url
checksum checksum
action :create
notifies :run, "execute[install_prometheus]", :immediately
end
execute "install_prometheus" do
command "tar -xzf #{tarball} -C /usr/local/bin --strip-components=1 prometheus-#{version}.linux-amd64/prometheus"
action :nothing
notifies :restart, "service[prometheus]", :delayed
end
file "/usr/local/bin/prometheus" do
owner "root"
group "root"
mode "0755"
notifies :restart, "service[prometheus]", :delayed
end
template "/etc/prometheus/prometheus.yml" do
source "prometheus.yml.erb"
owner "root"
group "prometheus"
mode "0644"
variables(
global_config: node["kosmos_prometheus"]["global"],
jobs: node["kosmos_prometheus"]["jobs"],
rule_files: node["kosmos_prometheus"]["rule_files"]
)
notifies :reload, "service[prometheus]", :delayed
end
prometheus_job "prometheus" do
target "localhost:9090"
end
prometheus_job "node" do
target "localhost:9100"
end
with_run_context :root do
execute "reload prometheus config" do
command "systemctl reload prometheus.service"
action :nothing
subscribes :run, "template[/opt/prometheus/prometheus.yml]", :delayed
end
end
prometheus_service "prometheus" do
cli_options({
"config.file" => "/opt/prometheus/prometheus.yml",
"log.level" => "info",
"query.max-concurrency" => 20,
"query.lookback-delta" => "5m",
"query.timeout" => "2m",
"storage.tsdb.path" => "/var/lib/prometheus",
"storage.tsdb.retention.time" => "15d",
"web.listen-address" => ":9090"
systemd_unit "prometheus.service" do
content({
Unit: {
Description: "Prometheus",
After: "network.target",
},
Service: {
Type: "simple",
User: "prometheus",
Group: "prometheus",
ExecStart: "/usr/local/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/var/lib/prometheus --storage.tsdb.retention.time=15d --web.listen-address=:9090 --web.enable-lifecycle",
ExecReload: "/bin/kill -HUP $MAINPID",
Restart: "on-failure",
RestartSec: "5",
},
Install: {
WantedBy: "multi-user.target",
},
})
triggers_reload true
action :create
end
service "prometheus" do
action [:enable, :start]
end
firewall_rule "prometheus web" do