15 Commits

Author SHA1 Message Date
fddcd4899e Ignore default db for migration/management 2026-04-10 12:57:51 +04:00
8e11df4544 Update PG migration runbook 2026-04-10 12:49:28 +04:00
0020677ab2 Drone: Make database details configurable 2026-04-10 08:37:06 +04:00
09412f69e8 Move doc 2026-04-10 08:36:54 +04:00
bc3f291bd2 WIP Prepare postgres for migration by replication 2026-04-10 08:25:55 +04:00
6583cd7010 Upgrade WAL config for PG14 2026-04-08 15:41:13 +04:00
290af8177a Refactor postgres server recipes/resource 2026-04-08 15:41:10 +04:00
2cb5540a7b Add new postgres replica (v12) 2026-04-07 16:56:01 +04:00
002ad2ca62 Update Gandi API key
Co-authored-by: Greg Karékinian <greg@karekinian.com>
2026-04-07 16:53:43 +04:00
7710231fc4 Add CORS headers for Garage web access
Fixes Discourse plugin JS usage
2026-04-07 16:53:09 +04:00
Greg Karekinian
d68deb96e9 Update openresty submodule 2026-04-07 11:40:35 +02:00
01cdd000cb Update nodes 2026-03-27 14:30:46 +04:00
ea8e2de70a Merge pull request 'Use Ubuntu 22.04 for new VMs' (#521) from jammy_jellyfish into master
Reviewed-on: #521
2026-03-27 10:28:22 +00:00
8ad3674c4d Install libvirt CLI on KVM hosts 2026-03-27 14:27:28 +04:00
Râu Cao
25192ad3ce Use Ubuntu 22.04 for new VMs
Also, remove the custom config image generation and replace it with
`--cloud-init` options.
2026-03-26 20:35:30 +04:00
40 changed files with 1036 additions and 107 deletions

View File

@@ -1,4 +1,4 @@
{
"name": "garage-14",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqNY8AuaM4byhaTZacfRJ\nv/qyHxcDJOMX/ElF1H908spdbB2ZiLXHOH1Ucw1d+NV6/QUtWk+ikKFPpasnatD7\nmjE57noH+H47Rll0nD7oT+in+fOBDHF9R0P6/qyRSdJbJkHOh0iC0MG4LcUfv0AY\nnVBW5iLZSe/PC3+PvhCv7yrx3ikSs0mg1ZWppw0ka5Ek3ZCZp5FB4L6++GYWpM+1\n6YI0CjMoRcXsaEQsJWhxHXT8/KDhW0BR8woZUGm0/Yn4teLYJzioxRfBep3lbygx\nOIsDN9IJzo2zVTGPDZQLXhVemIhzaepqTC77ibH7F0gN/1vsQBc/qf7UhbwaF4rR\ndQIDAQAB\n-----END PUBLIC KEY-----\n"
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAypINv1zTZ7+pyT0iRhik\n0W70ASYADo7qK7QyE9/3nu2sUrP1IjoNFsv/ceKwicH7Fw2Ei1o+yKZlKn7zJzY7\n93YRZndF04VH2bmqy0uOWK0Bdat7gCld5bvS6FmRflg7g64LFb33/64QIVsVGHGL\nYF2TO//x79t9JKcQDa4h5MOWzJNTFuEcUGa0gJjMYpWGVHEJSgRuIgyhXmyIJJgY\nguj6ymTm5+3VS7NzoNy2fbTt1LRpHb5UWrCR15oiLZiDSMLMx0CcGOCmrhvODi4k\n0umw+2NPd1G50s9z7KVbTqybuQ65se2amRnkVcNfaBIU5qk9bVqcmhZlEozmBZCd\ndwIDAQAB\n-----END PUBLIC KEY-----\n"
}

4
clients/garage-15.json Normal file
View File

@@ -0,0 +1,4 @@
{
"name": "garage-15",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAy14sTt5gxVZi9C3KIEBu\nDyUgbb6jc3/GR22fNPTqV6uDHhxzhE2UsYwY/7yuA1RasdwHEOBWZaoC0Om5/Zmi\n8gn6//v1ILyLNaAcw+SQcxZkCN8Sk/0atRS9HYk1agE8Mvh72Fe2z3l+92VMefy7\nJwJUNNBTbnV2WVCchChoWnfhI7bkSLSHp0M2MO2pI+lkpSdmfkJSa5z9zihgxKO8\nXfvhryDCZNvfRVHhwc+ffpap0gLF0H9riGKE4FwLy4YqbuW1Tgm6bObb9bpOIw6Q\nVfH3kC/KMK5FlnxGmYtDkhRJ/wjGInRBk9WK/QOmjyd2FVxipEQmA4RdjlznRC9I\nrwIDAQAB\n-----END PUBLIC KEY-----\n"
}

4
clients/leo.json Normal file
View File

@@ -0,0 +1,4 @@
{
"name": "leo",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAnFfQsJnREjbXTtpT6BVt\naBaUzRmCQi8Du0TzeUG0ENrY0p5Exqleye2rC6bJlB3PER1xr5zdtuXLgbcVumIb\nzroU5JPtFbQk7r/pj0atT+UEYzl16iuEpprQ/bug+f0nE514USr6YG4G+tlZ/jBI\nSHsCQF1P8ufXFLW0ewC7rdvBkgA+DwK14naRxS4jO5MSl4wmNTjs/jymTg508mQq\nf5tG52t8qFdgn9pRdBXmyTpPtwK7I4rZ+1Qn+1E5m4oQUZsxh8Ba1bGbKotVO7Ua\nYL1yCGx7zRRUvLLIdSMvlRXTJBUSQtQ8P4QUDWTY1Na2w3t9sulKg2Lwsw8tktvC\nCwIDAQAB\n-----END PUBLIC KEY-----\n"
}

4
clients/postgres-10.json Normal file
View File

@@ -0,0 +1,4 @@
{
"name": "postgres-10",
"public_key": "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA2oBb5omC7ZionWhudgFm\n2NGcRXsI0c7+g1+0duaxj5dziaRTltqzpRJTfiJD6R36FcvEqwGc+qQgYSMzc1Xd\nY4OTvJFIDiFAmROm/DZYgFtTDldVNJZO2bbU3COYf/Z2Poq56gC4zLLd/zf6shgb\n2Mty8PlQ82JJAY9EMI3aAifdnZ1k/g4weFC4LFg9lUcNNXOwlAjp//LJ3ku3aY1r\nwW74msSeWEjE44YZdWyMYgM7Fy1hz5giHFQtRdOLemRCWQ8h26wn/cmWld7lsLg+\nlYqxokxWXGv8r5zR8kDTBkd0dxY7ZMbo7oESY4Uhuf4UReMe2ZGHto1E7w3llSj+\n7wIDAQAB\n-----END PUBLIC KEY-----\n"
}

View File

@@ -1,23 +1,16 @@
{
"id": "gandi_api",
"key": {
"encrypted_data": "lU7/xYTmP5Sb6SsK5TNNIyegWozzBtUzpg7oDdl6gcz9FEMmG2ft0Ljh5Q==\n",
"iv": "EZPQD3C+wsP/mBhF\n",
"auth_tag": "vF9E8Pj4Z8quJJdOMg/QTw==\n",
"version": 3,
"cipher": "aes-256-gcm"
},
"access_token": {
"encrypted_data": "1Uw69JkNrmb8LU/qssuod1SlqxxrWR7TJQZeeivRrNzrMIVTEW/1uwJIYL6b\nM4GeeYl9lIRlMMmLBkc=\n",
"iv": "cc1GJKu6Cf4DkIgX\n",
"auth_tag": "ERem4S7ozG695kjvWIMghw==\n",
"encrypted_data": "+skwxHnpAj/3d3e2u7s7B9EydbETj8b0flWahvb5gt/o4JYFWHrhIyX/0IVa\n4wgmu08eDgU51i0knGA=\n",
"iv": "ONKrFCt8Oj3GKIQ5\n",
"auth_tag": "j9Hrk8ZZFMQub4NUO+2e4g==\n",
"version": 3,
"cipher": "aes-256-gcm"
},
"domains": {
"encrypted_data": "scZ5blsSjs54DlitR7KZ3enLbyceOR5q0wjHw1golQ==\n",
"iv": "oDcHm7shAzW97b4t\n",
"auth_tag": "62Zais9yf68SwmZRsmZ3hw==\n",
"encrypted_data": "lGfoPHdXEYYdJmoIA9M119wjVl1v4UzIv5gHADwx0A==\n",
"iv": "q6XKbxhW7X9ONxNt\n",
"auth_tag": "ns9WJH8Oe75siWu+sOZkRg==\n",
"version": 3,
"cipher": "aes-256-gcm"
}

287
doc/postgres/migration.md Normal file
View File

@@ -0,0 +1,287 @@
# Migrating PostgreSQL cluster to a new major version
## Summary
1. Dump from a replica
2. Restore to fresh VM running new major version
3. Add logical replication for delta sync from current/old primary
4. Switch primary to new server
5. Remove logical replication on new server
## Runbook
* Primary host: `PRIMARY_HOST`
* Replica host: `REPLICA_HOST`
* New PG14 host: `NEW_HOST`
* PostgreSQL superuser: `postgres`
* Running locally on each machine via `sudo -u postgres`
Adjust hostnames/IPs/etc. where needed.
---
### 🟢 0. PRIMARY — Pre-checks
```bash
sudo -u postgres psql -c "SHOW wal_level;"
sudo -u postgres psql -c "SHOW max_replication_slots;"
```
If needed, edit config:
```bash
sudo -u postgres vi $PGDATA/postgresql.conf
```
Ensure:
```conf
wal_level = logical
max_replication_slots = 10
```
Restart if changed:
```bash
sudo systemctl restart postgresql
```
---
### 🔵🟡 3. Create keypair for syncing dump later
🔵 On NEW_HOST:
```bash
sudo mkdir -p /home/postgres/.ssh && \
sudo chown -R postgres:postgres /home/postgres && \
sudo chmod 700 /home/postgres/.ssh && \
sudo -u postgres bash -c 'ssh-keygen -t ecdsa -b 256 -f /home/postgres/.ssh/id_ecdsa -N "" -C "postgres@$(hostname)"' && \
sudo cat /home/postgres/.ssh/id_ecdsa.pub
```
Copy the public key from the above output
🟡 On replica:
```bash
sudo mkdir -p /home/postgres/.ssh && \
sudo chown -R postgres:postgres /home/postgres && \
sudo chmod 700 /home/postgres/.ssh && \
echo [public_key] | sudo tee /home/postgres/.ssh/authorized_keys > /dev/null && \
sudo chmod 700 /home/postgres/.ssh
```
---
### 🟢 1. PRIMARY — Create publication and replication slots
```bash
sudo -u postgres pg_create_replication_publications
```
or
```bash
sudo -u postgres pg_create_replication_publication [db_name]
```
Listing publications and slots:
```bash
sudo -u postgres pg_list_replication_publications
sudo -u postgres pg_list_replication_slots
```
---
### 🟡 3. REPLICA — Pause replication
```bash
sudo -u postgres psql -c "SELECT pg_wal_replay_pause();"
```
Verify:
```bash
sudo -u postgres psql -c "SELECT pg_is_wal_replay_paused();"
```
---
### 🟡 4. REPLICA — Run dump
```bash
sudo -u postgres pg_dump_all_databases
```
or
```bash
sudo -u postgres bash -c "pg_dumpall --globals-only > /tmp/globals.sql"
sudo -u postgres pg_dump_database [db_name]
```
---
### 🟡 5. REPLICA — Resume replication
```bash
sudo -u postgres psql -c "SELECT pg_wal_replay_resume();"
```
---
### 🔵 6. COPY dumps to NEW HOST
From NEW_HOST:
```bash
export REPLICA_HOST=[private_ip] && \
cd /tmp && \
sudo -u postgres scp "postgres@$REPLICA_HOST:/tmp/globals.sql" . && \
sudo -u postgres scp "postgres@$REPLICA_HOST:/tmp/dump_*.tar.zst" .
```
---
### 🔵 7. NEW HOST (PostgreSQL 14) — Restore
#### 7.1 Restore globals
```bash
sudo -u postgres psql -f /tmp/globals.sql
```
---
#### 7.2 Create databases
```bash
sudo -u postgres psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1')" | \
xargs -I{} sudo -u postgres createdb {}
```
or
```bash
sudo -u postgres createdb [db_name]
```
---
#### 7.3 Restore each database
```bash
sudo -u postgres pg_restore_all_databases
```
or
```bash
sudo -u postgres pg_restore_database [db_name]
```
---
### 🔵 8. NEW HOST — Create subscriptions
```bash
sudo -u postgres pg_create_replication_subscriptions
```
or
```bash
sudo -u postgres pg_create_replication_subscription [db_name]
```
---
### 🔵 9. NEW HOST — Monitor replication
```bash
sudo -u postgres pg_list_replication_subscriptions
```
---
### 🔴 11. CUTOVER
#### 11.1 Stop writes on old primary
Put app(s) in maintenance mode, stop the app/daemons.
---
#### 11.2 Wait for replication to catch up
TODO: not the best way to check, since WAL LSNs keep increasing
```bash
sudo -u postgres psql -d [db_name] -c "SELECT * FROM pg_stat_subscription;"
```
---
#### 11.3 Fix sequences
Run per DB:
```bash
sudo -u postgres pg_fix_sequences_in_all_databases
```
or
```bash
sudo -u postgres pg_fix_sequences [db_name]
```
---
#### 11.4 Point app to NEW_HOST
1. Update `pg.kosmos.local` in `/etc/hosts` on app server(s). For example:
```bash
export NEW_PG_PRIMARY=[private_ip]
bundle exec knife ssh roles:ejabberd -a knife_zero.host "sudo sed -r \"s/^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+\s(pg.kosmos.local)/$NEW_PG_PRIMARY\t\1/\" -i /etc/hosts"
```
Or override node attribute(s) if necessary and/or approporiate.
2. Start the app/daemons, and deactivate maintenance mode.
---
### 🧹 12. CLEANUP NEW_HOST
```bash
sudo -u postgres pg_drop_replication_subscriptions
```
---
### 🧹 13. CLEANUP PRIMARY
TODO: Looks like slots are dropped automatically, when subscriptions are dropped
```bash
sudo -u postgres pg_drop_replication_publications
```
---
### 🧹 13. CLEANUP Chef
Once all apps/databases are migrated, update the role in the node
config of the new primary to 'postgres_primary' and converge it.
Also delete the old primary node config from the Chef repo.
---
### ✅ DONE
---

View File

@@ -3,15 +3,15 @@
"chef_environment": "production",
"normal": {
"knife_zero": {
"host": "10.1.1.157"
"host": "10.1.1.151"
}
},
"automatic": {
"fqdn": "garage-14",
"os": "linux",
"os_version": "5.15.0-1059-kvm",
"os_version": "5.15.0-1095-kvm",
"hostname": "garage-14",
"ipaddress": "192.168.122.251",
"ipaddress": "192.168.122.36",
"roles": [
"base",
"kvm_guest",
@@ -30,6 +30,7 @@
"timezone_iii::debian",
"ntp::default",
"ntp::apparmor",
"kosmos-base::journald_conf",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
"kosmos-base::firewall",
@@ -46,13 +47,13 @@
"cloud": null,
"chef_packages": {
"chef": {
"version": "18.8.54",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.8.54/lib",
"version": "18.10.17",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.10.17/lib",
"chef_effortless": null
},
"ohai": {
"version": "18.2.8",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.2.8/lib/ohai"
"version": "18.2.13",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.2.13/lib/ohai"
}
}
},

65
nodes/garage-15.json Normal file
View File

@@ -0,0 +1,65 @@
{
"name": "garage-15",
"chef_environment": "production",
"normal": {
"knife_zero": {
"host": "10.1.1.82"
}
},
"automatic": {
"fqdn": "garage-15",
"os": "linux",
"os_version": "5.15.0-1095-kvm",
"hostname": "garage-15",
"ipaddress": "192.168.122.57",
"roles": [
"base",
"kvm_guest",
"garage_node"
],
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_kvm::guest",
"kosmos_garage",
"kosmos_garage::default",
"kosmos_garage::firewall_rpc",
"kosmos_garage::firewall_apis",
"apt::default",
"timezone_iii::default",
"timezone_iii::debian",
"ntp::default",
"ntp::apparmor",
"kosmos-base::journald_conf",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
"kosmos-base::firewall",
"kosmos-postfix::default",
"postfix::default",
"postfix::_common",
"postfix::_attributes",
"postfix::sasl_auth",
"hostname::default",
"firewall::default"
],
"platform": "ubuntu",
"platform_version": "22.04",
"cloud": null,
"chef_packages": {
"chef": {
"version": "18.10.17",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.10.17/lib",
"chef_effortless": null
},
"ohai": {
"version": "18.2.13",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.2.13/lib/ohai"
}
}
},
"run_list": [
"role[base]",
"role[kvm_guest]",
"role[garage_node]"
]
}

56
nodes/leo.json Normal file
View File

@@ -0,0 +1,56 @@
{
"name": "leo",
"normal": {
"knife_zero": {
"host": "leo.kosmos.org"
}
},
"automatic": {
"fqdn": "leo",
"os": "linux",
"os_version": "5.15.0-164-generic",
"hostname": "leo",
"ipaddress": "5.9.81.116",
"roles": [
"base"
],
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_kvm::host",
"apt::default",
"timezone_iii::default",
"timezone_iii::debian",
"ntp::default",
"ntp::apparmor",
"kosmos-base::journald_conf",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
"kosmos-base::firewall",
"kosmos-postfix::default",
"postfix::default",
"postfix::_common",
"postfix::_attributes",
"postfix::sasl_auth",
"hostname::default"
],
"platform": "ubuntu",
"platform_version": "22.04",
"cloud": null,
"chef_packages": {
"chef": {
"version": "18.10.17",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.10.17/lib",
"chef_effortless": null
},
"ohai": {
"version": "18.2.13",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.2.13/lib/ohai"
}
}
},
"run_list": [
"role[base]",
"recipe[kosmos_kvm::host]"
]
}

63
nodes/postgres-10.json Normal file
View File

@@ -0,0 +1,63 @@
{
"name": "postgres-10",
"chef_environment": "production",
"normal": {
"knife_zero": {
"host": "10.1.1.176"
}
},
"automatic": {
"fqdn": "postgres-10",
"os": "linux",
"os_version": "5.15.0-1095-kvm",
"hostname": "postgres-10",
"ipaddress": "192.168.122.41",
"roles": [
"base",
"kvm_guest",
"postgresql_replica"
],
"recipes": [
"kosmos-base",
"kosmos-base::default",
"kosmos_kvm::guest",
"kosmos_postgresql::hostsfile",
"kosmos_postgresql::replica",
"kosmos_postgresql::firewall",
"apt::default",
"timezone_iii::default",
"timezone_iii::debian",
"ntp::default",
"ntp::apparmor",
"kosmos-base::journald_conf",
"kosmos-base::systemd_emails",
"apt::unattended-upgrades",
"kosmos-base::firewall",
"kosmos-postfix::default",
"postfix::default",
"postfix::_common",
"postfix::_attributes",
"postfix::sasl_auth",
"hostname::default"
],
"platform": "ubuntu",
"platform_version": "22.04",
"cloud": null,
"chef_packages": {
"chef": {
"version": "18.10.17",
"chef_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/chef-18.10.17/lib",
"chef_effortless": null
},
"ohai": {
"version": "18.2.13",
"ohai_root": "/opt/chef/embedded/lib/ruby/gems/3.1.0/gems/ohai-18.2.13/lib/ohai"
}
}
},
"run_list": [
"role[base]",
"role[kvm_guest]",
"role[postgresql_replica]"
]
}

View File

@@ -0,0 +1,8 @@
name "postgresql_replica_logical"
run_list [
"kosmos_postgresql::hostsfile",
"kosmos_postgresql::replica_logical",
"kosmos_postgresql::firewall",
"kosmos_postgresql::management_scripts"
]

View File

@@ -1,2 +1,6 @@
node.default["kosmos_drone"]["domain"] = "drone.kosmos.org"
node.default["kosmos_drone"]["upstream_port"] = 80
node.default["kosmos_drone"]["pg_host"] = "pg.kosmos.local"
node.default["kosmos_drone"]["pg_port"] = 5432
node.default["kosmos_drone"]["pg_db"] = "drone"
node.default["kosmos_drone"]["pg_user"] = "drone"

View File

@@ -9,11 +9,11 @@ credentials = data_bag_item("credentials", "drone")
drone_credentials = data_bag_item('credentials', 'drone')
postgres_config = {
username: "drone",
password: drone_credentials["postgresql_password"],
host: "pg.kosmos.local",
port: 5432,
database: "drone"
host: node["kosmos_drone"]["pg_host"],
port: node["kosmos_drone"]["pg_port"],
database: node["kosmos_drone"]["pg_db"],
username: node["kosmos_drone"]["pg_user"],
password: drone_credentials["postgresql_password"]
}
directory deploy_path do

View File

@@ -18,6 +18,7 @@ server {
}
location / {
add_header 'Access-Control-Allow-Origin' '*' always;
proxy_intercept_errors on;
proxy_cache garage_cache;
proxy_pass http://garage_web;

View File

@@ -1,9 +1,9 @@
release = "20240514"
release = "20260320"
img_filename = "ubuntu-22.04-server-cloudimg-amd64-disk-kvm"
node.default["kosmos_kvm"]["host"]["qemu_base_image"] = {
"url" => "https://cloud-images.ubuntu.com/releases/jammy/release-#{release}/#{img_filename}.img",
"checksum" => "2e7698b3ebd7caead06b08bd3ece241e6ce294a6db01f92ea12bcb56d6972c3f",
"checksum" => "f7173eb7137b4f0ebeaea8fffe68ecdab1e3c787bde1fd8dfdf27103554332b3",
"path" => "/var/lib/libvirt/images/base/#{img_filename}-#{release}.qcow2"
}

View File

@@ -3,7 +3,7 @@
# Recipe:: host
#
package %w(virtinst libvirt-daemon-system)
package %w(virtinst libvirt-daemon-system libvirt-clients)
directory "/var/lib/libvirt/images/base" do
recursive true

View File

@@ -17,7 +17,7 @@ DISKSIZE=${4:-10} # 10GB default
# Directory where image files will be stored
IMAGE_DIR=/var/lib/libvirt/images
IMAGE_PATH=$IMAGE_DIR/${VMNAME}.qcow2
CIDATA_PATH=${IMAGE_DIR}/cidata-${VMNAME}.iso
CIDATA_PATH=${IMAGE_DIR}/${VMNAME}-cloudinit
BASE_FILE=<%= @base_image_path %>
# Create the VM image if it does not already exist
@@ -38,9 +38,8 @@ qemu-img info "$IMAGE_PATH"
# Check if the cloud-init metadata file exists
# if not, generate it
if [ ! -r $CIDATA_PATH ]; then
pushd $(dirname $CIDATA_PATH)
mkdir -p $VMNAME
cd $VMNAME
mkdir -p $CIDATA_PATH
pushd $CIDATA_PATH
cat > user-data <<-EOS
#cloud-config
@@ -62,25 +61,19 @@ instance-id: $VMNAME
local-hostname: $VMNAME
EOS
genisoimage -output "$CIDATA_PATH" -volid cidata -joliet -rock user-data meta-data
chown libvirt-qemu:kvm "$CIDATA_PATH"
chmod 600 "$CIDATA_PATH"
popd
fi
# setting --os-variant to ubuntu20.04 and ubuntu18.04 breaks SSH and networking
virt-install \
--name "$VMNAME" \
--ram "$RAM" \
--vcpus "$CPUS" \
--cpu host \
--arch x86_64 \
--os-type linux \
--os-variant ubuntu16.04 \
--osinfo detect=on,name=ubuntujammy \
--hvm \
--virt-type kvm \
--disk "$IMAGE_PATH" \
--cdrom "$CIDATA_PATH" \
--boot hd \
--network=bridge=virbr0,model=virtio \
--graphics none \
@@ -88,4 +81,5 @@ virt-install \
--console pty \
--channel unix,mode=bind,path=/var/lib/libvirt/qemu/$VMNAME.guest_agent.0,target_type=virtio,name=org.qemu.guest_agent.0 \
--autostart \
--import
--import \
--cloud-init root-password-generate=off,disable=on,meta-data=$CIDATA_PATH/meta-data,user-data=$CIDATA_PATH/user-data

View File

@@ -1,3 +1,8 @@
node.default['kosmos_postgresql']['postgresql_version'] = "14"
# This is set to false by default, and set to true in the server resource
# for replicas.
node.default['kosmos_postgresql']['ready_to_set_up_replica'] = false
# Address space from which clients are allowed to connect
node.default['kosmos_postgresql']['access_addr'] = "10.1.1.0/24"

View File

@@ -0,0 +1,31 @@
#!/bin/bash
set -euo pipefail
DB_NAME="${1:?Usage: $0 <database_name>}"
echo "== Processing DB: $DB_NAME =="
# Create publication (idempotent)
psql -d "$DB_NAME" -v ON_ERROR_STOP=1 <<'SQL'
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_publication WHERE pubname = 'migrate_pub'
) THEN
CREATE PUBLICATION migrate_pub FOR ALL TABLES;
END IF;
END
$$;
SQL
# Create logical replication slot (idempotent-ish)
SLOT="migrate_slot_${DB_NAME}"
if ! psql -d "$DB_NAME" -Atqc "SELECT 1 FROM pg_replication_slots WHERE slot_name = '$SLOT'" | grep -q 1; then
echo " Creating slot: $SLOT"
psql -d "$DB_NAME" -c "SELECT pg_create_logical_replication_slot('$SLOT', 'pgoutput');"
else
echo " Slot already exists: $SLOT"
fi
echo "== Done =="

View File

@@ -0,0 +1,34 @@
#!/bin/bash
set -e
echo "== Creating publication in each database =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1','postgres')"); do
echo "Processing DB: $db"
# Create publication (idempotent)
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_publication WHERE pubname = 'migrate_pub'
) THEN
CREATE PUBLICATION migrate_pub FOR ALL TABLES;
END IF;
END
\$\$;
SQL
# Create logical replication slot (idempotent-ish)
SLOT="migrate_slot_${db}"
if ! psql -d "$db" -Atqc "SELECT 1 FROM pg_replication_slots WHERE slot_name = '$SLOT'" | grep -q 1; then
echo " Creating slot: $SLOT"
psql -d "$db" -c "SELECT pg_create_logical_replication_slot('$SLOT', 'pgoutput');"
else
echo " Slot already exists: $SLOT"
fi
done
echo "== Done =="

View File

@@ -0,0 +1,34 @@
#!/bin/bash
set -e
echo "== Dropping subscriptions slots and publications =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1','postgres')"); do
echo "Processing DB: $db"
SLOT="migrate_slot_${db}"
# Drop slot if exists
if psql -d "$db" -Atqc "SELECT 1 FROM pg_replication_slots WHERE slot_name = '$SLOT'" | grep -q 1; then
echo " Dropping slot: $SLOT"
psql -d "$db" -c "SELECT pg_drop_replication_slot('$SLOT');"
else
echo " Slot not found: $SLOT"
fi
# Drop publication if exists
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_publication WHERE pubname = 'migrate_pub'
) THEN
DROP PUBLICATION migrate_pub;
END IF;
END
\$\$;
SQL
done
echo "== Done =="

View File

@@ -0,0 +1,29 @@
#!/usr/bin/env bash
set -e
echo "== Dropping subscriptions =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1','postgres')"); do
echo "Processing DB: $db"
SUB="migrate_sub_${db}"
# Check if subscription exists
EXISTS=$(psql -d "$db" -Atqc "SELECT 1 FROM pg_subscription WHERE subname = '$SUB'")
if [ "$EXISTS" = "1" ]; then
echo " Found subscription: $SUB"
# Disable first (good practice)
psql -d "$db" -c "ALTER SUBSCRIPTION $SUB DISABLE;"
# Drop it (must be top-level)
psql -d "$db" -c "DROP SUBSCRIPTION $SUB;"
else
echo " No subscription: $SUB"
fi
done
echo "== Done =="

View File

@@ -0,0 +1,9 @@
#!/bin/bash
cd /tmp && \
(pg_dumpall --globals-only > globals.sql) && \
psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN (''template0'',''postgres'')" | \
xargs -I{} -P4 sh -c "
pg_dump -Fd -j 4 -d \"{}\" -f dump_{} &&
tar -cf - dump_{} | zstd -19 -T0 > dump_{}.tar.zst &&
rm -rf dump_{}
"

View File

@@ -0,0 +1,10 @@
#!/bin/bash
set -euo pipefail
DB_NAME="${1:?Usage: $0 <database_name>}"
cd /tmp
pg_dump -Fd -j 4 -d "$DB_NAME" -f "dump_${DB_NAME}"
tar -cf - "dump_${DB_NAME}" | zstd -19 -T0 > "dump_${DB_NAME}.tar.zst"
rm -rf "dump_${DB_NAME}"

View File

@@ -0,0 +1,35 @@
#!/bin/bash
set -e
DB="$1"
if [ -z "$DB" ]; then
echo "Usage: $0 <database>"
exit 1
fi
echo "== Fixing sequences in database: $DB =="
SQL=$(psql -d "$DB" -Atqc "
SELECT
'SELECT setval(' ||
quote_literal(pg_get_serial_sequence(quote_ident(n.nspname)||'.'||quote_ident(c.relname), a.attname)) ||
', COALESCE(MAX(' || quote_ident(a.attname) || '), 0) + 1, false) FROM ' ||
quote_ident(n.nspname)||'.'||quote_ident(c.relname) || ';'
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
JOIN pg_attribute a ON a.attrelid = c.oid
WHERE c.relkind = 'r'
AND a.attnum > 0
AND NOT a.attisdropped
AND pg_get_serial_sequence(quote_ident(n.nspname)||'.'||quote_ident(c.relname), a.attname) IS NOT NULL;
")
if [ -z "$SQL" ]; then
echo "No sequences found in $DB"
exit 0
fi
echo "$SQL" | psql -d "$DB"
echo "== Done =="

View File

@@ -0,0 +1,38 @@
#!/bin/bash
set -e
echo "== Fixing sequences across all databases =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1','postgres')"); do
echo "---- DB: $db ----"
# Generate fix statements
SQL=$(psql -d "$db" -Atqc "
SELECT
'SELECT setval(' ||
quote_literal(pg_get_serial_sequence(quote_ident(n.nspname)||'.'||quote_ident(c.relname), a.attname)) ||
', COALESCE(MAX(' || quote_ident(a.attname) || '), 0) + 1, false) FROM ' ||
quote_ident(n.nspname)||'.'||quote_ident(c.relname) || ';'
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
JOIN pg_attribute a ON a.attrelid = c.oid
WHERE c.relkind = 'r'
AND a.attnum > 0
AND NOT a.attisdropped
AND pg_get_serial_sequence(quote_ident(n.nspname)||'.'||quote_ident(c.relname), a.attname) IS NOT NULL;
")
if [ -z "$SQL" ]; then
echo "No sequences found in $db"
continue
fi
echo "Fixing sequences in $db..."
# Execute generated statements
echo "$SQL" | psql -d "$db"
done
echo "== Done fixing sequences =="

View File

@@ -0,0 +1,5 @@
#!/bin/bash
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1','postgres')"); do
echo "DB: $db"
psql -d "$db" -Atqc "SELECT pubname FROM pg_publication;"
done

View File

@@ -0,0 +1,5 @@
#!/bin/bash
psql -c "
SELECT slot_name,
pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn))
FROM pg_replication_slots;"

View File

@@ -0,0 +1,5 @@
#!/bin/bash
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1','postgres')"); do
echo "==== DB: $db ===="
psql -d "$db" -c "SELECT * FROM pg_stat_subscription;"
done

View File

@@ -0,0 +1,12 @@
#!/bin/bash
set -euo pipefail
cd /tmp
for f in dump_*.tar.zst; do
db=$(echo $f | sed "s/dump_\(.*\)\.tar\.zst/\1/")
echo "Restoring $db"
zstd -d "$f" -c | tar -xf -
pg_restore -j 4 -d "$db" dump_$db
rm -rf "dump_$db"
done

View File

@@ -0,0 +1,14 @@
#!/bin/bash
set -euo pipefail
DB_NAME="${1:?Usage: $0 <database_name>}"
cd /tmp
FILE="dump_${DB_NAME}.tar.zst"
DIR="dump_${DB_NAME}"
echo "Restoring $DB_NAME"
zstd -d "$FILE" -c | tar -xf -
pg_restore -j 4 -d "$DB_NAME" "$DIR"
rm -rf "$DIR"

View File

@@ -36,10 +36,16 @@ class Chef
end
end
def postgresql_service_name
postgresql_version = "12"
def postgresql_version
node['kosmos_postgresql']['postgresql_version']
end
def postgresql_service
"postgresql@#{postgresql_version}-main"
end
def postgresql_data_dir
"/var/lib/postgresql/#{postgresql_version}/main"
end
end
end

View File

@@ -0,0 +1,121 @@
#
# Cookbook:: kosmos_postgresql
# Recipe:: management_scripts
#
credentials = data_bag_item('credentials', 'postgresql')
cookbook_file "/usr/local/bin/pg_dump_all_databases" do
source "dump_all_databases.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_dump_database" do
source "dump_database.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_restore_all_databases" do
source "restore_all_databases.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_restore_database" do
source "restore_database.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_create_replication_publications" do
source "create_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_create_replication_publication" do
source "create_publication.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_drop_replication_publications" do
source "drop_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_publications" do
source "list_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_slots" do
source "list_replication_slots.sh"
user "postgres"
group "postgres"
mode "0744"
end
template "/usr/local/bin/pg_create_replication_subscriptions" do
source "create_subscriptions.sh.erb"
user "postgres"
group "postgres"
mode "0740"
variables pg_host: "pg.kosmos.local",
pg_port: 5432,
pg_user: "replication",
pg_pass: credentials["replication_password"]
sensitive true
end
template "/usr/local/bin/pg_create_replication_subscription" do
source "create_subscription.sh.erb"
user "postgres"
group "postgres"
mode "0740"
variables pg_host: "pg.kosmos.local",
pg_port: 5432,
pg_user: "replication",
pg_pass: credentials["replication_password"]
sensitive true
end
cookbook_file "/usr/local/bin/pg_drop_replication_subscriptions" do
source "drop_subscriptions.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_subscriptions" do
source "list_subscriptions.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_fix_sequences_in_all_databases" do
source "fix_sequences.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_fix_sequences" do
source "fix_sequences.sh"
user "postgres"
group "postgres"
mode "0744"
end

View File

@@ -3,31 +3,6 @@
# Recipe:: primary
#
postgresql_version = "12"
postgresql_service = "postgresql@#{postgresql_version}-main"
service postgresql_service do
supports restart: true, status: true, reload: true
end
postgresql_custom_server postgresql_version do
role "primary"
end
postgresql_access "zerotier members" do
access_type "host"
access_db "all"
access_user "all"
access_addr "10.1.1.0/24"
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end
postgresql_access "zerotier members replication" do
access_type "host"
access_db "replication"
access_user "replication"
access_addr "10.1.1.0/24"
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end

View File

@@ -3,54 +3,31 @@
# Recipe:: replica
#
postgresql_version = "12"
postgresql_service = "postgresql@#{postgresql_version}-main"
postgresql_custom_server postgresql_version do
role "replica"
end
service postgresql_service do
supports restart: true, status: true, reload: true
end
postgresql_data_bag_item = data_bag_item('credentials', 'postgresql')
primary = postgresql_primary
unless primary.nil?
# TODO
postgresql_data_dir = "/var/lib/postgresql/#{postgresql_version}/main"
if primary.nil?
Chef::Log.warn("No PostgreSQL primary node found. Skipping replication setup.")
return
end
# FIXME get zerotier IP
execute "set up replication" do
command <<-EOF
# TODO Replace pg.kosmos.local with private IP once available
# via proper node attribute
# https://gitea.kosmos.org/kosmos/chef/issues/263
execute "set up replication" do
command <<-EOF
systemctl stop #{postgresql_service}
mv #{postgresql_data_dir} #{postgresql_data_dir}.old
pg_basebackup -h pg.kosmos.local -U replication -D #{postgresql_data_dir} -R
chown -R postgres:postgres #{postgresql_data_dir}
systemctl start #{postgresql_service}
EOF
environment 'PGPASSWORD' => postgresql_data_bag_item['replication_password']
sensitive true
not_if { ::File.exist? "#{postgresql_data_dir}/standby.signal" }
end
postgresql_access "zerotier members" do
access_type "host"
access_db "all"
access_user "all"
access_addr "10.1.1.0/24"
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end
postgresql_access "zerotier members replication" do
access_type "host"
access_db "replication"
access_user "replication"
access_addr "10.1.1.0/24"
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end
EOF
environment 'PGPASSWORD' => postgresql_data_bag_item['replication_password']
sensitive true
not_if { ::File.exist? "#{postgresql_data_dir}/standby.signal" }
end

View File

@@ -0,0 +1,15 @@
#
# Cookbook:: kosmos_postgresql
# Recipe:: replica_logical
#
postgresql_custom_server postgresql_version do
role "replica_logical"
end
# primary = postgresql_primary
#
# if primary.nil?
# Chef::Log.warn("No PostgreSQL primary node found. Skipping replication setup.")
# return
# end

View File

@@ -56,13 +56,15 @@ action :create do
timezone: "UTC", # default is GMT
listen_addresses: "0.0.0.0",
promote_trigger_file: "#{postgresql_data_dir}/failover.trigger",
wal_keep_segments: 256
wal_level: "logical",
wal_keep_size: 4096, # 256 segments, 16MB each
max_replication_slots: 16
}
postgresql_server_conf "main" do
version postgresql_version
additional_config additional_config
notifies :reload, "service[#{postgresql_service}]", :delayed
notifies :restart, "service[#{postgresql_service}]", :delayed
end
postgresql_user "replication" do
@@ -70,6 +72,24 @@ action :create do
replication true
password postgresql_credentials['replication_password']
end
postgresql_access "all members" do
access_type "host"
access_db "all"
access_user "all"
access_addr node['kosmos_postgresql']['access_addr']
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end
postgresql_access "replication members" do
access_type "host"
access_db "replication"
access_user "replication"
access_addr node['kosmos_postgresql']['access_addr']
access_method "md5"
notifies :reload, "service[#{postgresql_service}]", :immediately
end
end
action_class do

View File

@@ -0,0 +1,31 @@
#!/bin/bash
set -euo pipefail
DB_NAME="${1:?Usage: $0 <database_name>}"
echo "== Processing DB: $DB_NAME =="
SLOT="migrate_slot_${DB_NAME}"
SUB="migrate_sub_${DB_NAME}"
psql -d "$DB_NAME" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_subscription WHERE subname = '$SUB'
) THEN
CREATE SUBSCRIPTION $SUB
CONNECTION 'host=<%= @pg_host %> port=<%= @pg_port %> dbname=$DB_NAME user=<%= @pg_user %> password=<%= @pg_pass %>'
PUBLICATION migrate_pub
WITH (
slot_name = '$SLOT',
create_slot = false,
copy_data = false,
enabled = true
);
END IF;
END
\$\$;
SQL
echo "== Done =="

View File

@@ -0,0 +1,34 @@
#!/bin/bash
set -e
echo "== Creating subscriptions for all databases =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1','postgres')"); do
echo "Processing DB: $db"
SLOT="migrate_slot_${db}"
SUB="migrate_sub_${db}"
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_subscription WHERE subname = '$SUB'
) THEN
CREATE SUBSCRIPTION $SUB
CONNECTION 'host=<%= @pg_host %> port=<%= @pg_port %> dbname=$db user=<%= @pg_user %> password=<%= @pg_pass %>'
PUBLICATION migrate_pub
WITH (
slot_name = '$SLOT',
create_slot = false,
copy_data = false,
enabled = true
);
END IF;
END
\$\$;
SQL
done
echo "== Done =="