Remove upstream prometheus cookbook, migrate to our own
This commit is contained in:
@@ -1,8 +1,19 @@
|
||||
node.default["prometheus"]["version"] = "3.13.0"
|
||||
node.default["prometheus"]["checksum"] = "744d93324cc024d82089921737bd797474d7f1e5dbbfd1c6b387bad258538cb9"
|
||||
node.default["kosmos_prometheus"]["version"] = "3.13.0"
|
||||
node.default["kosmos_prometheus"]["checksum"] = "744d93324cc024d82089921737bd797474d7f1e5dbbfd1c6b387bad258538cb9"
|
||||
|
||||
node.default["prometheus"]["alertmanager"]["version"] = "0.33.0"
|
||||
node.default["prometheus"]["alertmanager"]["checksum"] = "8ce11c42e8a6dfbbf93a59c0b193cb1329210b36d0c7ef3df7b745608675a1d1"
|
||||
node.default["kosmos_prometheus"]["alertmanager"]["version"] = "0.33.0"
|
||||
node.default["kosmos_prometheus"]["alertmanager"]["checksum"] = "8ce11c42e8a6dfbbf93a59c0b193cb1329210b36d0c7ef3df7b745608675a1d1"
|
||||
|
||||
node.default["prometheus"]["node_exporter"]["version"] = "1.11.1"
|
||||
node.default["prometheus"]["node_exporter"]["checksum"] = "9f5ea48e5bc7b656f8a91a32e7d7deb89f70f73dabd0d974418aca15f37d6810"
|
||||
node.default["kosmos_prometheus"]["node_exporter"]["version"] = "1.11.1"
|
||||
node.default["kosmos_prometheus"]["node_exporter"]["checksum"] = "9f5ea48e5bc7b656f8a91a32e7d7deb89f70f73dabd0d974418aca15f37d6810"
|
||||
|
||||
node.default["kosmos_prometheus"]["global"] = {
|
||||
"scrape_interval" => "30s",
|
||||
"evaluation_interval" => "30s",
|
||||
}
|
||||
|
||||
node.default["kosmos_prometheus"]["jobs"] = {
|
||||
"prometheus" => { "targets" => ["localhost:9090"] }
|
||||
}
|
||||
|
||||
node.default["kosmos_prometheus"]["rule_files"] = []
|
||||
|
||||
@@ -6,7 +6,7 @@ description 'Installs/Configures prometheus'
|
||||
version '0.1.0'
|
||||
chef_version '>= 16.0'
|
||||
|
||||
depends "prometheus"
|
||||
depends "firewall"
|
||||
|
||||
# The `issues_url` points to the location where issues for this cookbook are
|
||||
# tracked. A `View Issues` link will be displayed on this cookbook's page when
|
||||
|
||||
@@ -5,20 +5,89 @@
|
||||
|
||||
include_recipe "firewall"
|
||||
|
||||
prometheus_alertmanager_install "alertmanager" do
|
||||
version node["prometheus"]["alertmanager"]["version"]
|
||||
checksum node["prometheus"]["alertmanager"]["checksum"]
|
||||
version = node["kosmos_prometheus"]["alertmanager"]["version"]
|
||||
checksum = node["kosmos_prometheus"]["alertmanager"]["checksum"]
|
||||
tarball = "#{Chef::Config[:file_cache_path]}/alertmanager-#{version}.linux-amd64.tar.gz"
|
||||
binary_url = "https://github.com/prometheus/alertmanager/releases/download/v#{version}/alertmanager-#{version}.linux-amd64.tar.gz"
|
||||
|
||||
group "alertmanager"
|
||||
|
||||
user "alertmanager" do
|
||||
gid "alertmanager"
|
||||
system true
|
||||
shell "/bin/false"
|
||||
home "/nonexistent"
|
||||
end
|
||||
|
||||
prometheus_alertmanager_config "alertmanager"
|
||||
directory "/var/lib/alertmanager" do
|
||||
owner "alertmanager"
|
||||
group "alertmanager"
|
||||
mode "0755"
|
||||
recursive true
|
||||
end
|
||||
|
||||
execute "restart alertmanager config" do
|
||||
command "systemctl restart alertmanager.service"
|
||||
directory "/etc/prometheus" do
|
||||
owner "root"
|
||||
group "root"
|
||||
mode "0755"
|
||||
recursive true
|
||||
end
|
||||
|
||||
package %w(tar bzip2)
|
||||
|
||||
remote_file tarball do
|
||||
source binary_url
|
||||
checksum checksum
|
||||
action :create
|
||||
notifies :run, "execute[install_alertmanager]", :immediately
|
||||
end
|
||||
|
||||
execute "install_alertmanager" do
|
||||
command "tar -xzf #{tarball} -C /usr/local/bin --strip-components=1 alertmanager-#{version}.linux-amd64/alertmanager"
|
||||
action :nothing
|
||||
subscribes :run, "template[/opt/prometheus/alertmanager.yml]", :delayed
|
||||
notifies :restart, "service[alertmanager]", :delayed
|
||||
end
|
||||
|
||||
prometheus_alertmanager_service "alertmanager"
|
||||
file "/usr/local/bin/alertmanager" do
|
||||
owner "root"
|
||||
group "root"
|
||||
mode "0755"
|
||||
notifies :restart, "service[alertmanager]", :delayed
|
||||
end
|
||||
|
||||
template "/etc/prometheus/alertmanager.yml" do
|
||||
source "alertmanager.yml.erb"
|
||||
owner "root"
|
||||
group "alertmanager"
|
||||
mode "0644"
|
||||
notifies :restart, "service[alertmanager]", :delayed
|
||||
end
|
||||
|
||||
systemd_unit "alertmanager.service" do
|
||||
content({
|
||||
Unit: {
|
||||
Description: "Prometheus Alertmanager",
|
||||
After: "network.target",
|
||||
},
|
||||
Service: {
|
||||
Type: "simple",
|
||||
User: "alertmanager",
|
||||
Group: "alertmanager",
|
||||
ExecStart: "/usr/local/bin/alertmanager --config.file=/etc/prometheus/alertmanager.yml --storage.path=/var/lib/alertmanager --web.listen-address=:9093",
|
||||
Restart: "on-failure",
|
||||
RestartSec: "5",
|
||||
},
|
||||
Install: {
|
||||
WantedBy: "multi-user.target",
|
||||
},
|
||||
})
|
||||
triggers_reload true
|
||||
action :create
|
||||
end
|
||||
|
||||
service "alertmanager" do
|
||||
action [:enable, :start]
|
||||
end
|
||||
|
||||
firewall_rule "prometheus alertmanager" do
|
||||
port 9093
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
include_recipe "firewall"
|
||||
|
||||
version = node["prometheus"]["node_exporter"]["version"]
|
||||
checksum = node["prometheus"]["node_exporter"]["checksum"]
|
||||
version = node["kosmos_prometheus"]["node_exporter"]["version"]
|
||||
checksum = node["kosmos_prometheus"]["node_exporter"]["checksum"]
|
||||
tarball = "#{Chef::Config[:file_cache_path]}/node_exporter-#{version}.linux-amd64.tar.gz"
|
||||
binary_url = "https://github.com/prometheus/node_exporter/releases/download/v#{version}/node_exporter-#{version}.linux-amd64.tar.gz"
|
||||
|
||||
|
||||
@@ -5,45 +5,101 @@
|
||||
|
||||
include_recipe "firewall"
|
||||
|
||||
prometheus_install "prometheus" do
|
||||
version node["prometheus"]["version"]
|
||||
checksum node["prometheus"]["checksum"]
|
||||
version = node["kosmos_prometheus"]["version"]
|
||||
checksum = node["kosmos_prometheus"]["checksum"]
|
||||
tarball = "#{Chef::Config[:file_cache_path]}/prometheus-#{version}.linux-amd64.tar.gz"
|
||||
binary_url = "https://github.com/prometheus/prometheus/releases/download/v#{version}/prometheus-#{version}.linux-amd64.tar.gz"
|
||||
|
||||
group "prometheus"
|
||||
|
||||
user "prometheus" do
|
||||
gid "prometheus"
|
||||
system true
|
||||
shell "/bin/false"
|
||||
home "/nonexistent"
|
||||
end
|
||||
|
||||
prometheus_config "prometheus" do
|
||||
global_config(
|
||||
"scrape_interval" => "30s",
|
||||
"evaluation_interval" => "30s"
|
||||
directory "/var/lib/prometheus" do
|
||||
owner "prometheus"
|
||||
group "prometheus"
|
||||
mode "0755"
|
||||
recursive true
|
||||
end
|
||||
|
||||
directory "/etc/prometheus" do
|
||||
owner "root"
|
||||
group "root"
|
||||
mode "0755"
|
||||
recursive true
|
||||
end
|
||||
|
||||
directory "/etc/prometheus/rules" do
|
||||
owner "root"
|
||||
group "root"
|
||||
mode "0755"
|
||||
recursive true
|
||||
end
|
||||
|
||||
package %w(tar bzip2)
|
||||
|
||||
remote_file tarball do
|
||||
source binary_url
|
||||
checksum checksum
|
||||
action :create
|
||||
notifies :run, "execute[install_prometheus]", :immediately
|
||||
end
|
||||
|
||||
execute "install_prometheus" do
|
||||
command "tar -xzf #{tarball} -C /usr/local/bin --strip-components=1 prometheus-#{version}.linux-amd64/prometheus"
|
||||
action :nothing
|
||||
notifies :restart, "service[prometheus]", :delayed
|
||||
end
|
||||
|
||||
file "/usr/local/bin/prometheus" do
|
||||
owner "root"
|
||||
group "root"
|
||||
mode "0755"
|
||||
notifies :restart, "service[prometheus]", :delayed
|
||||
end
|
||||
|
||||
template "/etc/prometheus/prometheus.yml" do
|
||||
source "prometheus.yml.erb"
|
||||
owner "root"
|
||||
group "prometheus"
|
||||
mode "0644"
|
||||
variables(
|
||||
global_config: node["kosmos_prometheus"]["global"],
|
||||
jobs: node["kosmos_prometheus"]["jobs"],
|
||||
rule_files: node["kosmos_prometheus"]["rule_files"]
|
||||
)
|
||||
notifies :reload, "service[prometheus]", :delayed
|
||||
end
|
||||
|
||||
prometheus_job "prometheus" do
|
||||
target "localhost:9090"
|
||||
end
|
||||
|
||||
prometheus_job "node" do
|
||||
target "localhost:9100"
|
||||
end
|
||||
|
||||
with_run_context :root do
|
||||
execute "reload prometheus config" do
|
||||
command "systemctl reload prometheus.service"
|
||||
action :nothing
|
||||
subscribes :run, "template[/opt/prometheus/prometheus.yml]", :delayed
|
||||
end
|
||||
end
|
||||
|
||||
prometheus_service "prometheus" do
|
||||
cli_options({
|
||||
"config.file" => "/opt/prometheus/prometheus.yml",
|
||||
"log.level" => "info",
|
||||
"query.max-concurrency" => 20,
|
||||
"query.lookback-delta" => "5m",
|
||||
"query.timeout" => "2m",
|
||||
"storage.tsdb.path" => "/var/lib/prometheus",
|
||||
"storage.tsdb.retention.time" => "15d",
|
||||
"web.listen-address" => ":9090"
|
||||
systemd_unit "prometheus.service" do
|
||||
content({
|
||||
Unit: {
|
||||
Description: "Prometheus",
|
||||
After: "network.target",
|
||||
},
|
||||
Service: {
|
||||
Type: "simple",
|
||||
User: "prometheus",
|
||||
Group: "prometheus",
|
||||
ExecStart: "/usr/local/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/var/lib/prometheus --storage.tsdb.retention.time=15d --web.listen-address=:9090 --web.enable-lifecycle",
|
||||
ExecReload: "/bin/kill -HUP $MAINPID",
|
||||
Restart: "on-failure",
|
||||
RestartSec: "5",
|
||||
},
|
||||
Install: {
|
||||
WantedBy: "multi-user.target",
|
||||
},
|
||||
})
|
||||
triggers_reload true
|
||||
action :create
|
||||
end
|
||||
|
||||
service "prometheus" do
|
||||
action [:enable, :start]
|
||||
end
|
||||
|
||||
firewall_rule "prometheus web" do
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
global:
|
||||
resolve_timeout: 5m
|
||||
|
||||
route:
|
||||
receiver: default
|
||||
group_by: ['alertname']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 3h
|
||||
|
||||
receivers:
|
||||
- name: default
|
||||
@@ -0,0 +1,31 @@
|
||||
global:
|
||||
<% @global_config.each do |k, v| %>
|
||||
<%= k %>: "<%= v %>"
|
||||
<% end %>
|
||||
|
||||
scrape_configs:
|
||||
<% @jobs.each do |name, job| %>
|
||||
- job_name: "<%= name %>"
|
||||
<% if job['scrape_interval'] %>
|
||||
scrape_interval: "<%= job['scrape_interval'] %>"
|
||||
<% end %>
|
||||
<% if job['scrape_timeout'] %>
|
||||
scrape_timeout: "<%= job['scrape_timeout'] %>"
|
||||
<% end %>
|
||||
metrics_path: "<%= job.fetch('metrics_path', '/metrics') %>"
|
||||
static_configs:
|
||||
- targets: <%= Array(job['targets']) %>
|
||||
<% if job['labels'] %>
|
||||
labels:
|
||||
<% job['labels'].each do |label, label_config| %>
|
||||
<%= label %>: <%= label_config %>
|
||||
<% end %>
|
||||
<% end %>
|
||||
<% end %>
|
||||
|
||||
<% if @rule_files && !@rule_files.empty? %>
|
||||
rule_files:
|
||||
<% @rule_files.each do |filename| %>
|
||||
- <%= filename %>
|
||||
<% end %>
|
||||
<% end %>
|
||||
Reference in New Issue
Block a user