15 Commits

Author SHA1 Message Date
raucao f2ebda4a1a Update node configs 2026-07-04 16:30:15 +02:00
raucao 67f62ebd6c Scrape garage metrics 2026-07-04 16:14:36 +02:00
raucao 7dc4895da3 Remove old garage nodes 2026-07-04 15:51:39 +02:00
raucao 153b1e77c5 Add all nodes with base role as node exporter targets 2026-07-04 15:46:41 +02:00
raucao ea69c7cec6 Add prometheus node exporter to base role 2026-07-04 15:45:57 +02:00
raucao 5813a45987 Use base role instead of recipe in all runlists 2026-07-04 15:45:23 +02:00
raucao 63534e1cf5 Remove upstream prometheus cookbook, migrate to our own 2026-07-04 15:27:18 +02:00
raucao 2d835335b5 Re-add global config, change values 2026-07-04 14:15:55 +02:00
raucao e21797b402 Apply changed configs to prometheus and alertmanager 2026-07-04 14:15:38 +02:00
raucao 7396af5ca4 WIP Add node exporter 2026-07-04 14:15:33 +02:00
raucao df8c8d1742 Remove obsolete CLI option
Was left over from trying to overwrite the defaults before
2026-07-04 13:31:20 +02:00
Greg Karekinian 765d0b080e WIP Initial kosmos_prometheus wrapper cookbook 2026-07-03 17:47:13 +02:00
Greg Karekinian 4cd6c41254 Add community prometheus cookbook 2026-07-03 17:46:18 +02:00
raucao ec73dd5b57 Set Chef environment for node 2026-07-03 15:52:46 +02:00
raucao 850db344b7 Add prometheus node 2026-07-03 15:50:15 +02:00
38 changed files with 632 additions and 192 deletions
-4
View File
@@ -1,4 +0,0 @@
{
"name": "garage-10",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAw2+3Wo+KkXVJCOX1SxT9\nSdwKXgPbCDM3EI9uwoxhMxQfRyN53dxIsBDsQUVOIe1Z8yqm4FenMQlNmeDR+QLE\nvNFf1fisinW+D9VVRm+CjcJy96i/Dyt786Z6YRrDlB860HxCbfTL2Zv5BRtbyIKg\nhz5gO+9PMEpPVR2ij9iue4K6jbM1AAL2ia/P6zDWLJqeIzUocCeHV5N0Z3jXH6qr\nf444v78x35MMJ+3tg5h95SU1/PDCpdSTct4uHEuKIosiN7p4DlYMoM5iSyvVoujr\nflRQPEpGzS9qEt3rDo/F4ltzYMx6bf1tB/0QaBKD+zwPZWTTwf61tSBo5/NkGvJc\nFQIDAQAB\n-----END PUBLIC KEY-----\n"
}
-4
View File
@@ -1,4 +0,0 @@
{
"name": "garage-12",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA9GtHHi298BjiIqpZ3WkT\nkYAPfWD60hFe/8icYcq/F/6cHLYKZQ4chek9X/hDCMq4tHEN6Oh58T5x/nuNdPrK\nIAMGyVAGk6ekWlmD4jwdEf6TGb/J3ffJTRDvwX/I8xD/DW3wtXsN+X24T59ByGTm\nrnwRmmmwHF3otRx9wnCsIgDQ0AjiUujsfNNv1FcLXD/WJLys9lEeU5aJ4XtHTwDv\ntJM8YyVEFhEnuvgdKmzn5+F5k9VGdUwForlFOBfvzbCnTZMDMmDVeiUtAUv/7xWQ\nQl2mLUGCtgWuYJYXsQacAJ6pa3h+7cQyshC6w3dwUG+1fS9lNO0Yp1GGX1AGYKpp\nPQIDAQAB\n-----END PUBLIC KEY-----\n"
}
-4
View File
@@ -1,4 +0,0 @@
{
"name": "garage-13",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAvbqWc6OwRxgHfsQuTNL4\naxeVvNen5d9srYpZSHjuBB/k9NHB+9P6vU5qF37XHkw1lVUGeYbPHzhYsx3O0/kZ\nH5f4+4SMy/P9jc6SE7AJF4qtYKgJ88koZdqCww07c6K9g+BnEGFFZui/h3hUBxWj\nTfhBHEWPyQ2bl/lr9sIJwsEz+EN0isGn/eIXkmw9J6LdLJ5Q0LLks33K28FNOU7q\nfeAN4MiBVMUtgCGyT2Voe6WrOXwQLSDXQONOp3sfSfFExsIJ1s24xdd7AMD7/9a7\n4sFDZ4swhqAWgWmW2giR7Kb8wTvGQLO/O/uUbmKz3DZXgkOKXHdHCEB/PZx1mRNM\nEwIDAQAB\n-----END PUBLIC KEY-----\n"
}
-4
View File
@@ -1,4 +0,0 @@
{
"name": "garage-9",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAnMHzKE8JBrsQkmRDeMjX\n71mBzvRzNM90cwA8xtvIkXesdTyGqohX9k/PJbCY5ySGK9PpMaYDPVAnwnUP8LFQ\n3G98aSbLxUjqU/PBzRsnWpihehr05uz9zYcNFzr4LTNvGQZsq47nN9Tk+LG3zHP7\nAZViv2mJ4ZRnukXf6KHlyoVvhuTu+tiBM8QzjTF97iP/aguNPzYHmrecy9Uf5bSA\nZrbNZT+ayxtgswC2OclhRucx7XLSuHXtpwFqsQzSAhiX1aQ3wwCyH9WJtVwpfUsE\nlxTjcQiSM9aPZ8iSC0shpBaKD1j3iF/2K2Jk+88++zMhJJPLermvaJxzsdePgvyk\nKQIDAQAB\n-----END PUBLIC KEY-----\n"
}
+4
View File
@@ -0,0 +1,4 @@
{
"name": "prometheus-1",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAp7T/OBo/TZm3YqnN4+ok\nHwcJ0kW9w2rl9UfrOlWUvoPHBd2LrqpEv3Az3a150IylQ1H/UozmQA7DtjIoTA7d\nV3oLY970vYrYiURcojOo8qAZBy8EH7dfAHxuZryUeELr+3vdcHF5WrrfSt2FdFVX\nPTY95ikafAnOO0Nt8jvnlPoDn7REV8TOE6KOiUzcHKa2xGlfaIe0oRC21LD86uQm\nR09xY1YaJkVgZfeN/opoRjZawkU3FFs3jlUEVBF8k153oOw9W3bgsFFjSOtRtRRg\nDwyQ7oDeMH83kXnaCdpkNZd59wjzPcpxYAL4LRN52ZXA4Btr4DTi+GxHz98Dr0kU\nUQIDAQAB\n-----END PUBLIC KEY-----\n"
}
+4
View File
@@ -0,0 +1,4 @@
{
"name": "rsk-testnet-5",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAx/UHlgcSeh9Do7CTCKXC\n/4/aO2OvT+ijDVmrMYCNtE4sMeuFqKPnV1zxJZmRm4VNhkSQDkdWYD+6XvuFYW60\nyjB/N6D5lLlyjG4HD6fTkfh0K6f7t5mOYV7o4T59OoA3cBZuSROjtWmJ8jEFJ+k9\nII2kcyhPQcFN01ckzvZKRSPbVRccMoc+AKTjB3ZUfs/ERtlVoDrK4jEHluXOxUJO\nBKCcLonjJuLlpRLh7QfKrKFcR4idn5Ir43R6aSUesI/ipKwKsXnR3Bu7vXp74VF3\nMJ3EkdSBG+qJzy51fbRfQiUPAr/vSoVQZwW7FkIhIqqLkMaYCymn7qKfTGujoNU7\nlwIDAQAB\n-----END PUBLIC KEY-----\n"
}
+1 -1
View File
@@ -61,7 +61,7 @@
}
},
"run_list": [
"recipe[kosmos-base]",
"role[base]",
"role[kvm_guest]",
"role[garage_gateway]",
"role[kosmos_discourse]"
+1 -1
View File
@@ -55,7 +55,7 @@
}
},
"run_list": [
"recipe[kosmos-base]",
"role[base]",
"role[kvm_guest]",
"role[drone]"
]
-64
View File
@@ -1,64 +0,0 @@
{
"name": "garage-10",
"chef_environment": "production",
"normal": {
"knife_zero": {
"host": "10.1.1.27"
}
},
"automatic": {
"fqdn": "garage-10",
"os": "linux",
"os_version": "5.4.0-1090-kvm",
"hostname": "garage-10",
"ipaddress": "192.168.122.70",
"roles": [
"base",
"kvm_guest",
"garage_node"
],
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
"kosmos_garage::firewall_rpc",
"kosmos_garage::firewall_apis",
"apt::default",
"timezone_iii::default",
"timezone_iii::debian",
"ntp::default",
"ntp::apparmor",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
"kosmos-base::firewall",
"kosmos-postfix::default",
"postfix::default",
"postfix::_common",
"postfix::_attributes",
"postfix::sasl_auth",
"hostname::default",
"firewall::default"
],
"platform": "ubuntu",
"platform_version": "20.04",
"cloud": null,
"chef_packages": {
"chef": {
"version": "18.5.0",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.5.0/lib",
"chef_effortless": null
},
"ohai": {
"version": "18.1.11",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.1.11/lib/ohai"
}
}
},
"run_list": [
"role[base]",
"role[kvm_guest]",
"role[garage_node]"
]
}
+2
View File
@@ -20,6 +20,7 @@
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_prometheus::node_exporter",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
@@ -30,6 +31,7 @@
"timezone_iii::debian",
"ntp::default",
"ntp::apparmor",
"kosmos-base::journald_conf",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
"kosmos-base::firewall",
-65
View File
@@ -1,65 +0,0 @@
{
"name": "garage-12",
"chef_environment": "production",
"normal": {
"knife_zero": {
"host": "10.1.1.224"
}
},
"automatic": {
"fqdn": "garage-12",
"os": "linux",
"os_version": "5.15.0-1059-kvm",
"hostname": "garage-12",
"ipaddress": "192.168.122.173",
"roles": [
"base",
"kvm_guest",
"garage_node"
],
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
"kosmos_garage::firewall_rpc",
"kosmos_garage::firewall_apis",
"apt::default",
"timezone_iii::default",
"timezone_iii::debian",
"ntp::default",
"ntp::apparmor",
"kosmos-base::journald_conf",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
"kosmos-base::firewall",
"kosmos-postfix::default",
"postfix::default",
"postfix::_common",
"postfix::_attributes",
"postfix::sasl_auth",
"hostname::default",
"firewall::default"
],
"platform": "ubuntu",
"platform_version": "22.04",
"cloud": null,
"chef_packages": {
"chef": {
"version": "18.7.10",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.7.10/lib",
"chef_effortless": null
},
"ohai": {
"version": "18.2.5",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.2.5/lib/ohai"
}
}
},
"run_list": [
"role[base]",
"role[kvm_guest]",
"role[garage_node]"
]
}
+1
View File
@@ -20,6 +20,7 @@
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_prometheus::node_exporter",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
+1
View File
@@ -20,6 +20,7 @@
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_prometheus::node_exporter",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
+1
View File
@@ -20,6 +20,7 @@
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_prometheus::node_exporter",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
+2
View File
@@ -20,6 +20,7 @@
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_prometheus::node_exporter",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
@@ -30,6 +31,7 @@
"timezone_iii::debian",
"ntp::default",
"ntp::apparmor",
"kosmos-base::journald_conf",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
"kosmos-base::firewall",
+2
View File
@@ -20,6 +20,7 @@
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_prometheus::node_exporter",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
@@ -30,6 +31,7 @@
"timezone_iii::debian",
"ntp::default",
"ntp::apparmor",
"kosmos-base::journald_conf",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
"kosmos-base::firewall",
+1 -1
View File
@@ -60,7 +60,7 @@
}
},
"run_list": [
"recipe[kosmos-base]",
"role[base]",
"role[kvm_guest]",
"role[ipfs_gateway]"
]
+1 -1
View File
@@ -57,7 +57,7 @@
}
},
"run_list": [
"recipe[kosmos-base]",
"role[base]",
"role[kvm_guest]",
"role[dirsrv_supplier]"
]
+1 -1
View File
@@ -83,7 +83,7 @@
}
},
"run_list": [
"recipe[kosmos-base]",
"role[base]",
"role[kvm_guest]",
"role[ldap_client]",
"role[garage_gateway]",
@@ -1,35 +1,32 @@
{
"name": "garage-13",
"name": "prometheus-1",
"chef_environment": "production",
"normal": {
"knife_zero": {
"host": "10.1.1.179"
"host": "10.1.1.146"
}
},
"automatic": {
"fqdn": "garage-13",
"fqdn": "prometheus-1",
"os": "linux",
"os_version": "5.15.0-1059-kvm",
"hostname": "garage-13",
"ipaddress": "192.168.122.27",
"os_version": "6.8.0-134-generic",
"hostname": "prometheus-1",
"ipaddress": "192.168.122.166",
"roles": [
"base",
"kvm_guest",
"garage_node"
"prometheus_server"
],
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_prometheus::node_exporter",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
"kosmos_garage::firewall_rpc",
"kosmos_garage::firewall_apis",
"kosmos_prometheus::server",
"kosmos_prometheus::alertmanager",
"apt::default",
"timezone_iii::default",
"timezone_iii::debian",
"ntp::default",
"ntp::apparmor",
"kosmos-base::journald_conf",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
@@ -43,23 +40,23 @@
"firewall::default"
],
"platform": "ubuntu",
"platform_version": "22.04",
"platform_version": "24.04",
"cloud": null,
"chef_packages": {
"chef": {
"version": "18.7.10",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.7.10/lib",
"version": "18.10.17",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.10.17/lib",
"chef_effortless": null
},
"ohai": {
"version": "18.2.5",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.2.5/lib/ohai"
"version": "18.2.13",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.2.13/lib/ohai"
}
}
},
"run_list": [
"role[base]",
"role[kvm_guest]",
"role[garage_node]"
"role[prometheus_server]"
]
}
+1 -1
View File
@@ -55,7 +55,7 @@
}
},
"run_list": [
"recipe[kosmos-base]",
"role[base]",
"role[kvm_guest]",
"role[remotestorage_discourse]"
]
@@ -1,30 +1,26 @@
{
"name": "garage-9",
"chef_environment": "production",
"name": "rsk-testnet-5",
"normal": {
"knife_zero": {
"host": "10.1.1.223"
"host": "10.1.1.194"
}
},
"automatic": {
"fqdn": "garage-9",
"fqdn": "rsk-testnet-5",
"os": "linux",
"os_version": "5.4.0-1090-kvm",
"hostname": "garage-9",
"ipaddress": "192.168.122.21",
"os_version": "5.4.0-1103-kvm",
"hostname": "rsk-testnet-5",
"ipaddress": "192.168.122.171",
"roles": [
"base",
"kvm_guest",
"garage_node"
"rskj_testnet"
],
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
"kosmos_garage::firewall_rpc",
"kosmos_garage::firewall_apis",
"kosmos_rsk::rskj",
"apt::default",
"timezone_iii::default",
"timezone_iii::debian",
@@ -39,6 +35,7 @@
"postfix::_attributes",
"postfix::sasl_auth",
"hostname::default",
"kosmos_rsk::firewall",
"firewall::default"
],
"platform": "ubuntu",
@@ -46,19 +43,19 @@
"cloud": null,
"chef_packages": {
"chef": {
"version": "18.5.0",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.5.0/lib",
"version": "18.3.0",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.3.0/lib",
"chef_effortless": null
},
"ohai": {
"version": "18.1.11",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.1.11/lib/ohai"
"version": "18.1.4",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.1.4/lib/ohai"
}
}
},
"run_list": [
"role[base]",
"role[kvm_guest]",
"role[garage_node]"
"role[rskj_testnet]"
]
}
+1 -1
View File
@@ -60,7 +60,7 @@
}
},
"run_list": [
"recipe[kosmos-base]",
"role[base]",
"role[kvm_guest]",
"recipe[kosmos-ejabberd::upload_service]"
]
+1
View File
@@ -2,4 +2,5 @@ name "base"
run_list %w(
kosmos-base::default
kosmos_prometheus::node_exporter
)
+12
View File
@@ -0,0 +1,12 @@
name "prometheus_server"
default_run_list = [
"kosmos_prometheus::server",
"kosmos_prometheus::alertmanager"
]
env_run_lists(
"_default" => default_run_list,
"development" => default_run_list,
"production" => default_run_list
)
@@ -10,10 +10,10 @@ tor_service "ejabberd" do
public_key tor_services['ejabberd']['public_key']
secret_key tor_services['ejabberd']['secret_key']
# TODO configure IP from node attribute
# (This is hardcoded for ejabberd-4 atm)
ports [ "5222 10.1.1.113:5222",
"5223 10.1.1.113:5223",
"5269 10.1.1.113:5269" ]
# (This is hardcoded for draco atm)
ports [ "5222 148.251.237.73:5222",
"5223 148.251.237.73:5223",
"5269 148.251.237.73:5269" ]
end
tor_service "web" do
@@ -0,0 +1,25 @@
.vagrant
*~
*#
.#*
\#*#
.*.sw[a-z]
*.un~
# Bundler
Gemfile.lock
gems.locked
bin/*
.bundle/*
# test kitchen
.kitchen/
kitchen.local.yml
# Chef Infra
Berksfile.lock
.zero-knife.rb
Policyfile.lock.json
.idea/
@@ -0,0 +1,7 @@
# kosmos_prometheus CHANGELOG
This file is used to list changes made in each version of the kosmos_prometheus cookbook.
## 0.1.0
Initial release.
+20
View File
@@ -0,0 +1,20 @@
Copyright (c) 2019 Kosmos Developers
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,4 @@
# kosmos_prometheus
TODO: Enter the cookbook description here.
@@ -0,0 +1,19 @@
node.default["kosmos_prometheus"]["version"] = "3.13.0"
node.default["kosmos_prometheus"]["checksum"] = "744d93324cc024d82089921737bd797474d7f1e5dbbfd1c6b387bad258538cb9"
node.default["kosmos_prometheus"]["alertmanager"]["version"] = "0.33.0"
node.default["kosmos_prometheus"]["alertmanager"]["checksum"] = "8ce11c42e8a6dfbbf93a59c0b193cb1329210b36d0c7ef3df7b745608675a1d1"
node.default["kosmos_prometheus"]["node_exporter"]["version"] = "1.11.1"
node.default["kosmos_prometheus"]["node_exporter"]["checksum"] = "9f5ea48e5bc7b656f8a91a32e7d7deb89f70f73dabd0d974418aca15f37d6810"
node.default["kosmos_prometheus"]["global"] = {
"scrape_interval" => "30s",
"evaluation_interval" => "30s",
}
node.default["kosmos_prometheus"]["jobs"] = {
"prometheus" => { "targets" => ["localhost:9090"] },
}
node.default["kosmos_prometheus"]["rule_files"] = []
+115
View File
@@ -0,0 +1,115 @@
# Put files/directories that should be ignored in this file when uploading
# to a Chef Infra Server or Supermarket.
# Lines that start with '# ' are comments.
# OS generated files #
######################
.DS_Store
ehthumbs.db
Icon?
nohup.out
Thumbs.db
.envrc
# EDITORS #
###########
.#*
.project
.settings
*_flymake
*_flymake.*
*.bak
*.sw[a-z]
*.tmproj
*~
\#*
REVISION
TAGS*
tmtags
.vscode
.editorconfig
## COMPILED ##
##############
*.class
*.com
*.dll
*.exe
*.o
*.pyc
*.so
*/rdoc/
a.out
mkmf.log
# Testing #
###########
.circleci/*
.codeclimate.yml
.delivery/*
.foodcritic
.kitchen*
.mdlrc
.overcommit.yml
.rspec
.rubocop.yml
.travis.yml
.watchr
.yamllint
azure-pipelines.yml
Dangerfile
examples/*
features/*
Guardfile
kitchen.yml*
mlc_config.json
Procfile
Rakefile
spec/*
test/*
# SCM #
#######
.git
.gitattributes
.gitconfig
.github/*
.gitignore
.gitkeep
.gitmodules
.svn
*/.bzr/*
*/.git
*/.hg/*
*/.svn/*
# Berkshelf #
#############
Berksfile
Berksfile.lock
cookbooks/*
tmp
# Bundler #
###########
vendor/*
Gemfile
Gemfile.lock
# Policyfile #
##############
Policyfile.rb
Policyfile.lock.json
# Documentation #
#############
CODE_OF_CONDUCT*
CONTRIBUTING*
documentation/*
TESTING*
UPGRADING*
# Vagrant #
###########
.vagrant
Vagrantfile
@@ -0,0 +1,21 @@
name 'kosmos_prometheus'
maintainer 'Kosmos Developers'
maintainer_email 'mail@kosmos.org'
license 'MIT'
description 'Installs/Configures prometheus'
version '0.1.0'
chef_version '>= 16.0'
depends "firewall"
# The `issues_url` points to the location where issues for this cookbook are
# tracked. A `View Issues` link will be displayed on this cookbook's page when
# uploaded to a Supermarket.
#
# issues_url 'https://github.com/<insert_org_here>/kosmos_prometheus/issues'
# The `source_url` points to the development repository for this cookbook. A
# `View Source` link will be displayed on this cookbook's page when uploaded to
# a Supermarket.
#
# source_url 'https://github.com/<insert_org_here>/kosmos_prometheus'
@@ -0,0 +1,97 @@
#
# Cookbook:: kosmos_prometheus
# Recipe:: alertmanager
#
include_recipe "firewall"
version = node["kosmos_prometheus"]["alertmanager"]["version"]
checksum = node["kosmos_prometheus"]["alertmanager"]["checksum"]
tarball = "#{Chef::Config[:file_cache_path]}/alertmanager-#{version}.linux-amd64.tar.gz"
binary_url = "https://github.com/prometheus/alertmanager/releases/download/v#{version}/alertmanager-#{version}.linux-amd64.tar.gz"
group "alertmanager"
user "alertmanager" do
gid "alertmanager"
system true
shell "/bin/false"
home "/nonexistent"
end
directory "/var/lib/alertmanager" do
owner "alertmanager"
group "alertmanager"
mode "0755"
recursive true
end
directory "/etc/prometheus" do
owner "root"
group "root"
mode "0755"
recursive true
end
package %w(tar bzip2)
remote_file tarball do
source binary_url
checksum checksum
action :create
notifies :run, "execute[install_alertmanager]", :immediately
end
execute "install_alertmanager" do
command "tar -xzf #{tarball} -C /usr/local/bin --strip-components=1 alertmanager-#{version}.linux-amd64/alertmanager"
action :nothing
notifies :restart, "service[alertmanager]", :delayed
end
file "/usr/local/bin/alertmanager" do
owner "root"
group "root"
mode "0755"
notifies :restart, "service[alertmanager]", :delayed
end
template "/etc/prometheus/alertmanager.yml" do
source "alertmanager.yml.erb"
owner "root"
group "alertmanager"
mode "0644"
notifies :restart, "service[alertmanager]", :delayed
end
systemd_unit "alertmanager.service" do
content({
Unit: {
Description: "Prometheus Alertmanager",
After: "network.target",
},
Service: {
Type: "simple",
User: "alertmanager",
Group: "alertmanager",
ExecStart: "/usr/local/bin/alertmanager --config.file=/etc/prometheus/alertmanager.yml --storage.path=/var/lib/alertmanager --web.listen-address=:9093",
Restart: "on-failure",
RestartSec: "5",
},
Install: {
WantedBy: "multi-user.target",
},
})
triggers_reload true
action :create
end
service "alertmanager" do
action [:enable, :start]
end
firewall_rule "prometheus alertmanager" do
port 9093
source "10.1.1.0/24"
protocol :tcp
command :allow
end
@@ -0,0 +1,85 @@
#
# Cookbook:: kosmos_prometheus
# Recipe:: node_exporter
#
include_recipe "firewall"
version = node["kosmos_prometheus"]["node_exporter"]["version"]
checksum = node["kosmos_prometheus"]["node_exporter"]["checksum"]
tarball = "#{Chef::Config[:file_cache_path]}/node_exporter-#{version}.linux-amd64.tar.gz"
binary_url = "https://github.com/prometheus/node_exporter/releases/download/v#{version}/node_exporter-#{version}.linux-amd64.tar.gz"
group "node_exporter"
user "node_exporter" do
gid "node_exporter"
system true
shell "/bin/false"
home "/nonexistent"
end
directory "/var/lib/node_exporter/textfile" do
owner "node_exporter"
group "node_exporter"
mode "0755"
recursive true
end
package %w(tar bzip2)
remote_file tarball do
source binary_url
checksum checksum
action :create
notifies :run, "execute[install_node_exporter]", :immediately
end
execute "install_node_exporter" do
command "tar -xzf #{tarball} -C /usr/local/bin --strip-components=1 node_exporter-#{version}.linux-amd64/node_exporter"
action :nothing
notifies :restart, "service[node_exporter]", :delayed
end
file "/usr/local/bin/node_exporter" do
owner "root"
group "root"
mode "0755"
notifies :restart, "service[node_exporter]", :delayed
end
systemd_unit "node_exporter.service" do
content({
Unit: {
Description: "Prometheus node exporter",
Documentation: ["https://github.com/prometheus/node_exporter"],
},
Service: {
Type: "simple",
User: "node_exporter",
Group: "node_exporter",
ExecStart: "/usr/local/bin/node_exporter --web.listen-address=:9100 --collector.textfile.directory=/var/lib/node_exporter/textfile",
Restart: "on-failure",
RestartSec: "5",
NoNewPrivileges: "yes",
ProtectSystem: "full",
ProtectHome: "yes",
},
Install: {
WantedBy: "multi-user.target",
},
})
triggers_reload true
action :create
end
service "node_exporter" do
action [:enable, :start]
end
firewall_rule "node_exporter" do
port 9100
source "10.1.1.0/24"
protocol :tcp
command :allow
end
@@ -0,0 +1,125 @@
#
# Cookbook:: kosmos_prometheus
# Recipe:: server
#
include_recipe "firewall"
version = node["kosmos_prometheus"]["version"]
checksum = node["kosmos_prometheus"]["checksum"]
tarball = "#{Chef::Config[:file_cache_path]}/prometheus-#{version}.linux-amd64.tar.gz"
binary_url = "https://github.com/prometheus/prometheus/releases/download/v#{version}/prometheus-#{version}.linux-amd64.tar.gz"
group "prometheus"
user "prometheus" do
gid "prometheus"
system true
shell "/bin/false"
home "/nonexistent"
end
directory "/var/lib/prometheus" do
owner "prometheus"
group "prometheus"
mode "0755"
recursive true
end
directory "/etc/prometheus" do
owner "root"
group "root"
mode "0755"
recursive true
end
directory "/etc/prometheus/rules" do
owner "root"
group "root"
mode "0755"
recursive true
end
package %w(tar bzip2)
remote_file tarball do
source binary_url
checksum checksum
action :create
notifies :run, "execute[install_prometheus]", :immediately
end
execute "install_prometheus" do
command "tar -xzf #{tarball} -C /usr/local/bin --strip-components=1 prometheus-#{version}.linux-amd64/prometheus"
action :nothing
notifies :restart, "service[prometheus]", :delayed
end
file "/usr/local/bin/prometheus" do
owner "root"
group "root"
mode "0755"
notifies :restart, "service[prometheus]", :delayed
end
node_targets = search(:node, "role:base").map { |n| n["knife_zero"]["host"] }
.compact
.sort_by { |ip| ip.split(".").map(&:to_i) }
.map { |ip| "#{ip}:9100" }
garage_targets = search(:node, "role:garage_node").map { |n| n["knife_zero"]["host"] }
.compact
.sort_by { |ip| ip.split(".").map(&:to_i) }
.map { |ip| "#{ip}:3903" }
jobs = node["kosmos_prometheus"]["jobs"].merge(
"node" => { "targets" => node_targets },
"garage" => { "targets" => garage_targets }
)
template "/etc/prometheus/prometheus.yml" do
source "prometheus.yml.erb"
owner "root"
group "prometheus"
mode "0644"
variables(
global_config: node["kosmos_prometheus"]["global"],
jobs: jobs,
rule_files: node["kosmos_prometheus"]["rule_files"]
)
notifies :reload, "service[prometheus]", :delayed
end
systemd_unit "prometheus.service" do
content({
Unit: {
Description: "Prometheus",
After: "network.target",
},
Service: {
Type: "simple",
User: "prometheus",
Group: "prometheus",
ExecStart: "/usr/local/bin/prometheus --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/var/lib/prometheus --storage.tsdb.retention.time=15d --web.listen-address=:9090 --web.enable-lifecycle",
ExecReload: "/bin/kill -HUP $MAINPID",
Restart: "on-failure",
RestartSec: "5",
},
Install: {
WantedBy: "multi-user.target",
},
})
triggers_reload true
action :create
end
service "prometheus" do
action [:enable, :start]
end
firewall_rule "prometheus web" do
port 9090
source "10.1.1.0/24"
protocol :tcp
command :allow
end
@@ -0,0 +1,12 @@
global:
resolve_timeout: 5m
route:
receiver: default
group_by: ['alertname']
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
receivers:
- name: default
@@ -0,0 +1,31 @@
global:
<% @global_config.each do |k, v| %>
<%= k %>: "<%= v %>"
<% end %>
scrape_configs:
<% @jobs.each do |name, job| %>
- job_name: "<%= name %>"
<% if job['scrape_interval'] %>
scrape_interval: "<%= job['scrape_interval'] %>"
<% end %>
<% if job['scrape_timeout'] %>
scrape_timeout: "<%= job['scrape_timeout'] %>"
<% end %>
metrics_path: "<%= job.fetch('metrics_path', '/metrics') %>"
static_configs:
- targets: <%= Array(job['targets']) %>
<% if job['labels'] %>
labels:
<% job['labels'].each do |label, label_config| %>
<%= label %>: <%= label_config %>
<% end %>
<% end %>
<% end %>
<% if @rule_files && !@rule_files.empty? %>
rule_files:
<% @rule_files.each do |filename| %>
- <%= filename %>
<% end %>
<% end %>