3 Commits

Author SHA1 Message Date
b0cdeac30e Add new RSK testnet VM 2026-04-12 08:52:24 +04:00
1c6bf34386 Upgrade rskj to v9.0.1 and Ubuntu 24.04 2026-04-11 19:16:19 +04:00
41e6b29b97 Add AGENTS.md 2026-04-11 15:36:54 +04:00
26 changed files with 185 additions and 304 deletions

41
AGENTS.md Normal file
View File

@@ -0,0 +1,41 @@
# AGENTS.md
Welcome, AI Agent! This file contains essential context and rules for interacting with the Kosmos Chef repository. Read this carefully before planning or executing any changes.
## 🏢 Project Overview
This repository contains the infrastructure automation code used by Kosmos to provision and configure bare metal servers (KVM hosts) and Ubuntu virtual machines (KVM guests).
We use **Chef Infra**, managed locally via **Knife Zero** (agentless Chef), and **Berkshelf** for dependency management.
## 📂 Directory Structure & Rules
* **`site-cookbooks/`**: 🟢 **EDITABLE.** This directory contains all custom, internal cookbooks written specifically for Kosmos services (e.g., `kosmos-postgresql`, `kosmos_gitea`, `kosmos-mastodon`). *Active development happens here.*
* **`cookbooks/`**: 🔴 **DO NOT EDIT.** This directory contains third-party/community cookbooks that are vendored. These are managed by Berkshelf. Modifying them directly will result in lost changes.
* **`roles/`**: 🟢 **EDITABLE.** Contains Chef roles written in Ruby (e.g., `base.rb`, `kvm_guest.rb`, `postgresql_primary.rb`). These define run-lists and role-specific default attributes for servers.
* **`environments/`**: Contains Chef environment definitions (like `production.rb`).
* **`data_bags/`**: Contains data bag configurations, often encrypted. Be cautious and do not expose secrets. (Note: Agents should not manage data bag secrets directly unless provided the `.chef/encrypted_data_bag_secret`).
* **`nodes/`**: Contains JSON state files for bootstrapped nodes. *Agents typically do not edit these directly unless cleaning up a deleted node.*
* **`Berksfile`**: Defines community cookbook dependencies.
* **`Vagrantfile` / `.kitchen/`**: Used for local virtualization and integration testing.
## 🛠️ Tooling & Workflows
1. **Dependency Management (Berkshelf)**
If a new community cookbook is required:
- Add it to the `Berksfile` at the root.
- Instruct the user to run `berks install` and `berks vendor cookbooks/ --delete` (or run it via the `bash` tool if permitted).
2. **Provisioning (Knife Zero)**
- Bootstrapping and converging nodes is done using `knife zero`.
- *Example:* `knife zero converge name:server-name.kosmos.org`
3. **Code Style & Conventions**
- Chef recipes, resources, and roles are written in **Ruby**.
- Follow standard Chef and Ruby (RuboCop) idioms. Look at neighboring files in `site-cookbooks/` or `roles/` to match formatting and naming conventions.
## 🚨 Core Directives for AI Agents
1. **Infrastructure as Code**: Manual server configurations are highly discouraged. All changes must be codified in a cookbook or role.
2. **Test Safety Nets**: Look for `.kitchen.yml` within specific `site-cookbooks/<name>` to understand if local integration tests are available.
3. **No Assumptions**: Do not assume standard test commands. Check `README.md` and repository config files first.
4. **Secret Handling**: Avoid hardcoding passwords or API keys in recipes or roles. Assume sensitive information is managed via Chef `data_bags`.

View File

@@ -0,0 +1,4 @@
{
"name": "rsk-testnet-6",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAl1p4+F536/peA4XWMJtm\njggPl6yJb42V5bg3kDa8SHoIoQgXn59d3BclZ1Oz2+JhFd3Rrn4FN3Z1wzGpP+gA\nnxQOfgRG1ucahh7Nxaw3IdoHm7r/EdEOc9FrxvGJ+09YnmLfzn4iVQpsUiOiNVS7\n0LXtMXYtsjD+o6BTbOhGU8FMmGhMhQfXFVgoDdTiM/Q62zPw8Vtpa3yFpFJAu+dA\n+mm5h5W6FnaWJXM2arn3PxDOt+JQSWp5PYG4goU1FFreU9iFuoeGEfLy8unlbbXt\ne96QhNuCkOA15xqta0Z3oL7IlXWns7dLgZYlpZT9zaExIs3AEDaQcleacQPzXKSG\nswIDAQAB\n-----END PUBLIC KEY-----\n"
}

60
nodes/rsk-testnet-6.json Normal file
View File

@@ -0,0 +1,60 @@
{
"name": "rsk-testnet-6",
"normal": {
"knife_zero": {
"host": "10.1.1.20"
}
},
"automatic": {
"fqdn": "rsk-testnet-6",
"os": "linux",
"os_version": "6.8.0-107-generic",
"hostname": "rsk-testnet-6",
"ipaddress": "192.168.122.231",
"roles": [
"base",
"kvm_guest",
"rskj_testnet"
],
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_kvm::guest",
"kosmos_rsk::rskj",
"apt::default",
"timezone_iii::default",
"timezone_iii::debian",
"kosmos-base::journald_conf",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
"kosmos-base::firewall",
"kosmos-postfix::default",
"postfix::default",
"postfix::_common",
"postfix::_attributes",
"postfix::sasl_auth",
"hostname::default",
"kosmos_rsk::firewall",
"firewall::default"
],
"platform": "ubuntu",
"platform_version": "24.04",
"cloud": null,
"chef_packages": {
"chef": {
"version": "18.10.17",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.10.17/lib",
"chef_effortless": null
},
"ohai": {
"version": "18.2.13",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.2.13/lib/ohai"
}
}
},
"run_list": [
"role[base]",
"role[kvm_guest]",
"role[rskj_testnet]"
]
}

View File

@@ -1,7 +0,0 @@
name "postgresql_replica_logical"
run_list %w(
kosmos_postgresql::hostsfile
kosmos_postgresql::replica_logical
kosmos_postgresql::firewall
)

View File

@@ -1,8 +1,3 @@
node.default['kosmos_postgresql']['postgresql_version'] = "14"
# This is set to false by default, and set to true in the server resource
# for replicas.
node.default['kosmos_postgresql']['ready_to_set_up_replica'] = false
# Address space from which clients are allowed to connect
node.default['kosmos_postgresql']['access_addr'] = "10.1.1.0/24"

View File

@@ -1,34 +0,0 @@
#!/bin/bash
set -e
echo "== Creating publication in each database =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template0','template1')"); do
echo "Processing DB: $db"
# Create publication (idempotent)
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_publication WHERE pubname = 'migrate_pub'
) THEN
CREATE PUBLICATION migrate_pub FOR ALL TABLES;
END IF;
END
\$\$;
SQL
# Create logical replication slot (idempotent-ish)
SLOT="migrate_slot_${db}"
if ! psql -d "$db" -Atqc "SELECT 1 FROM pg_replication_slots WHERE slot_name = '$SLOT'" | grep -q 1; then
echo " Creating slot: $SLOT"
psql -d "$db" -c "SELECT pg_create_logical_replication_slot('$SLOT', 'pgoutput');"
else
echo " Slot already exists: $SLOT"
fi
done
echo "== Done =="

View File

@@ -1,33 +0,0 @@
set -e
echo "== Dropping subscriptions slots and publications on PRIMARY =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template0','template1')"); do
echo "Processing DB: $db"
SLOT="migrate_slot_${db}"
# Drop slot if exists
if psql -d "$db" -Atqc "SELECT 1 FROM pg_replication_slots WHERE slot_name = '$SLOT'" | grep -q 1; then
echo " Dropping slot: $SLOT"
psql -d "$db" -c "SELECT pg_drop_replication_slot('$SLOT');"
else
echo " Slot not found: $SLOT"
fi
# Drop publication if exists
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_publication WHERE pubname = 'migrate_pub'
) THEN
DROP PUBLICATION migrate_pub;
END IF;
END
\$\$;
SQL
done
echo "== Done =="

View File

@@ -1,28 +0,0 @@
set -e
echo "== Dropping subscriptions on PG14 =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template0,'template1'')"); do
echo "Processing DB: $db"
SUB="migrate_sub_${db}"
# Disable first (important)
psql -d "$db" -c "ALTER SUBSCRIPTION $SUB DISABLE;" 2>/dev/null || true
# Drop subscription if exists
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_subscription WHERE subname = '$SUB'
) THEN
DROP SUBSCRIPTION $SUB;
END IF;
END
\$\$;
SQL
done
echo "== Done =="

View File

@@ -1,9 +0,0 @@
#!/bin/bash
cd /tmp && \
(pg_dumpall --globals-only > globals.sql) && \
psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN (''template0'')" | \
xargs -I{} -P4 sh -c "
pg_dump -Fd -j 4 -d \"{}\" -f dump_{} &&
tar -cf - dump_{} | zstd -19 -T0 > dump_{}.tar.zst &&
rm -rf dump_{}
"

View File

@@ -1,5 +0,0 @@
#!/bin/bash
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn"); do
echo "DB: $db"
psql -d "$db" -Atqc "SELECT pubname FROM pg_publication;"
done

View File

@@ -1,5 +0,0 @@
#!/bin/bash
psql -c "
SELECT slot_name,
pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn))
FROM pg_replication_slots;"

View File

@@ -1,5 +0,0 @@
#!/bin/bash
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template0','template1')"); do
echo "==== DB: $db ===="
psql -d "$db" -c "SELECT * FROM pg_stat_subscription;"
done

View File

@@ -1,8 +0,0 @@
#!/bin/bash
cd /tmp
for f in dump_*.tar.zst; do
db=$(echo $f | sed "s/dump_\(.*\)\.tar\.zst/\1/")
echo "Restoring $db"
zstd -d "$f" -c | tar -xf -
pg_restore -j 4 -d "$db" dump_$db
done

View File

@@ -36,16 +36,10 @@ class Chef
end
end
def postgresql_version
node['kosmos_postgresql']['postgresql_version']
end
def postgresql_service_name
postgresql_version = "12"
def postgresql_service
"postgresql@#{postgresql_version}-main"
end
def postgresql_data_dir
"/var/lib/postgresql/#{postgresql_version}/main"
end
end
end

View File

@@ -3,41 +3,31 @@
# Recipe:: primary
#
postgresql_version = "12"
postgresql_service = "postgresql@#{postgresql_version}-main"
service postgresql_service do
supports restart: true, status: true, reload: true
end
postgresql_custom_server postgresql_version do
role "primary"
end
cookbook_file "/usr/local/bin/pg_dump_all_databases" do
source "dump_all_databases.sh"
user "postgres"
group "postgres"
mode "0744"
postgresql_access "zerotier members" do
access_type "host"
access_db "all"
access_user "all"
access_addr "10.1.1.0/24"
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end
cookbook_file "/usr/local/bin/pg_create_replication_publications" do
source "create_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_drop_replication_publications" do
source "drop_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_publications" do
source "list_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_slots" do
source "list_replication_slots.sh"
user "postgres"
group "postgres"
mode "0744"
postgresql_access "zerotier members replication" do
access_type "host"
access_db "replication"
access_user "replication"
access_addr "10.1.1.0/24"
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end

View File

@@ -3,32 +3,54 @@
# Recipe:: replica
#
service postgresql_service do
supports restart: true, status: true, reload: true
end
postgresql_version = "12"
postgresql_service = "postgresql@#{postgresql_version}-main"
postgresql_custom_server postgresql_version do
role "replica"
end
service postgresql_service do
supports restart: true, status: true, reload: true
end
postgresql_data_bag_item = data_bag_item('credentials', 'postgresql')
primary = postgresql_primary
if primary.nil?
Chef::Log.warn("No PostgreSQL primary node found. Skipping replication setup.")
return
end
unless primary.nil?
# TODO
postgresql_data_dir = "/var/lib/postgresql/#{postgresql_version}/main"
execute "set up replication" do
command <<-EOF
# FIXME get zerotier IP
execute "set up replication" do
command <<-EOF
systemctl stop #{postgresql_service}
mv #{postgresql_data_dir} #{postgresql_data_dir}.old
pg_basebackup -h pg.kosmos.local -U replication -D #{postgresql_data_dir} -R
chown -R postgres:postgres #{postgresql_data_dir}
systemctl start #{postgresql_service}
EOF
environment 'PGPASSWORD' => postgresql_data_bag_item['replication_password']
sensitive true
not_if { ::File.exist? "#{postgresql_data_dir}/standby.signal" }
EOF
environment 'PGPASSWORD' => postgresql_data_bag_item['replication_password']
sensitive true
not_if { ::File.exist? "#{postgresql_data_dir}/standby.signal" }
end
postgresql_access "zerotier members" do
access_type "host"
access_db "all"
access_user "all"
access_addr "10.1.1.0/24"
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end
postgresql_access "zerotier members replication" do
access_type "host"
access_db "replication"
access_user "replication"
access_addr "10.1.1.0/24"
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end
end

View File

@@ -1,50 +0,0 @@
#
# Cookbook:: kosmos_postgresql
# Recipe:: replica_logical
#
service postgresql_service do
supports restart: true, status: true, reload: true
end
postgresql_custom_server postgresql_version do
role "replica_logical"
end
postgresql_data_bag_item = data_bag_item('credentials', 'postgresql')
primary = postgresql_primary
if primary.nil?
Chef::Log.warn("No PostgreSQL primary node found. Skipping replication setup.")
return
end
template "/usr/local/bin/pg_create_replication_subscriptions" do
source "create_subscriptions.sh.erb"
user "postgres"
group "postgres"
mode "0740"
sensitive true
end
cookbook_file "/usr/local/bin/pg_drop_replication_subscriptions" do
source "drop_subscriptions.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_subscriptions" do
source "list_subscriptions.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_restore_all_databases" do
source "restore_all_databases.sh"
user "postgres"
group "postgres"
mode "0744"
end

View File

@@ -56,9 +56,7 @@ action :create do
timezone: "UTC", # default is GMT
listen_addresses: "0.0.0.0",
promote_trigger_file: "#{postgresql_data_dir}/failover.trigger",
wal_level: "logical",
wal_keep_size: 4096, # 256 segments, 16MB each
max_replication_slots: 16
wal_keep_segments: 256
}
postgresql_server_conf "main" do
@@ -72,24 +70,6 @@ action :create do
replication true
password postgresql_credentials['replication_password']
end
postgresql_access "all members" do
access_type "host"
access_db "all"
access_user "all"
access_addr node['kosmos_postgresql']['access_addr']
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end
postgresql_access "replication members" do
access_type "host"
access_db "replication"
access_user "replication"
access_addr node['kosmos_postgresql']['access_addr']
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end
end
action_class do

View File

@@ -1,33 +0,0 @@
set -e
echo "== Creating subscriptions for all databases =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template0','template1')"); do
echo "Processing DB: $db"
SLOT="migrate_slot_${db}"
SUB="migrate_sub_${db}"
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_subscription WHERE subname = '$SUB'
) THEN
CREATE SUBSCRIPTION $SUB
CONNECTION 'host=<%= @pg_host %> port=<%= @pg_port %> dbname=$db user=<%= @pg_user %> password=<%= @pg_pass %>'
PUBLICATION migrate_pub
WITH (
slot_name = '$SLOT',
create_slot = false,
copy_data = false,
enabled = true
);
END IF;
END
\$\$;
SQL
done
echo "== Done =="

View File

@@ -1,5 +1,8 @@
source 'https://supermarket.chef.io'
cookbook 'kosmos-nginx', path: '../../site-cookbooks/kosmos-nginx'
cookbook 'kosmos_openresty', path: '../../site-cookbooks/kosmos_openresty'
cookbook 'kosmos-base', path: '../../site-cookbooks/kosmos-base'
cookbook 'openresty', path: '../../site-cookbooks/openresty'
cookbook 'kosmos-postfix', path: '../../site-cookbooks/kosmos-postfix'
metadata

View File

@@ -1,4 +1,4 @@
node.default['rskj']['version'] = '7.0.0~jammy'
node.default['rskj']['version'] = '9.0.1~noble'
node.default['rskj']['network'] = 'testnet'
node.default['rskj']['nginx']['domain'] = nil

View File

@@ -34,9 +34,9 @@ verifier:
name: inspec
platforms:
- name: ubuntu-22.04
- name: ubuntu-24.04
driver:
image: dokken/ubuntu-22.04
image: dokken/ubuntu-24.04
privileged: true
pid_one_command: /usr/lib/systemd/systemd
intermediate_instructions:

View File

@@ -3,7 +3,7 @@ maintainer 'Kosmos Developers'
maintainer_email 'ops@kosmos.org'
license 'MIT'
description 'Installs/configures RSKj and related software'
version '0.4.0'
version '0.5.0'
chef_version '>= 18.2'
issues_url 'https://gitea.kosmos.org/kosmos/chef/issues'
source_url 'https://gitea.kosmos.org/kosmos/chef'

View File

@@ -20,10 +20,19 @@ apt_repository 'rskj' do
end
apt_package 'openjdk-17-jdk'
apt_package 'debconf-utils'
execute 'preseed-rskj-license' do
command 'echo "rskj shared/accepted-rsk-license-v1-1 boolean true" | debconf-set-selections'
not_if 'debconf-get-selections | grep -q "shared/accepted-rsk-license-v1-1.*true"'
end
execute 'preseed-rskj-config' do
command "echo \"rskj shared/config select #{node['rskj']['network']}\" | debconf-set-selections"
not_if "debconf-get-selections | grep -q \"shared/config.*#{node['rskj']['network']}\""
end
apt_package 'rskj' do
response_file 'rskj-preseed.cfg.erb'
response_file_variables network: node['rskj']['network']
options '--assume-yes'
version node['rskj']['version']
end

View File

@@ -1,6 +1,6 @@
#_preseed_V1
# Do you agree to the terms of the applicable licenses?
rskj shared/accepted-rsk-license-v1-1 select true
rskj shared/accepted-rsk-license-v1-1 boolean true
# Choose a configuration environment to run your node.
# Choices: mainnet, testnet, regtest
rskj shared/config select <%= @network %>

View File

@@ -9,7 +9,7 @@ end
describe package('rskj') do
it { should be_installed }
its('version') { should eq '7.0.0~jammy' }
its('version') { should eq '9.0.1~noble' }
end
describe service('rsk') do