WIP Prepare postgres for migration by replication

This commit is contained in:
2026-04-08 15:41:56 +04:00
parent 6583cd7010
commit 48e8ee0160
13 changed files with 254 additions and 1 deletions

View File

@@ -0,0 +1,7 @@
name "postgresql_replica_logical"
run_list %w(
kosmos_postgresql::hostsfile
kosmos_postgresql::replica_logical
kosmos_postgresql::firewall
)

View File

@@ -0,0 +1,34 @@
#!/bin/bash
set -e
echo "== Creating publication in each database =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template0','template1')"); do
echo "Processing DB: $db"
# Create publication (idempotent)
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_publication WHERE pubname = 'migrate_pub'
) THEN
CREATE PUBLICATION migrate_pub FOR ALL TABLES;
END IF;
END
\$\$;
SQL
# Create logical replication slot (idempotent-ish)
SLOT="migrate_slot_${db}"
if ! psql -d "$db" -Atqc "SELECT 1 FROM pg_replication_slots WHERE slot_name = '$SLOT'" | grep -q 1; then
echo " Creating slot: $SLOT"
psql -d "$db" -c "SELECT pg_create_logical_replication_slot('$SLOT', 'pgoutput');"
else
echo " Slot already exists: $SLOT"
fi
done
echo "== Done =="

View File

@@ -0,0 +1,33 @@
set -e
echo "== Dropping subscriptions slots and publications on PRIMARY =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template0','template1')"); do
echo "Processing DB: $db"
SLOT="migrate_slot_${db}"
# Drop slot if exists
if psql -d "$db" -Atqc "SELECT 1 FROM pg_replication_slots WHERE slot_name = '$SLOT'" | grep -q 1; then
echo " Dropping slot: $SLOT"
psql -d "$db" -c "SELECT pg_drop_replication_slot('$SLOT');"
else
echo " Slot not found: $SLOT"
fi
# Drop publication if exists
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_publication WHERE pubname = 'migrate_pub'
) THEN
DROP PUBLICATION migrate_pub;
END IF;
END
\$\$;
SQL
done
echo "== Done =="

View File

@@ -0,0 +1,28 @@
set -e
echo "== Dropping subscriptions on PG14 =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template0,'template1'')"); do
echo "Processing DB: $db"
SUB="migrate_sub_${db}"
# Disable first (important)
psql -d "$db" -c "ALTER SUBSCRIPTION $SUB DISABLE;" 2>/dev/null || true
# Drop subscription if exists
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_subscription WHERE subname = '$SUB'
) THEN
DROP SUBSCRIPTION $SUB;
END IF;
END
\$\$;
SQL
done
echo "== Done =="

View File

@@ -0,0 +1,9 @@
#!/bin/bash
cd /tmp && \
(pg_dumpall --globals-only > globals.sql) && \
psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN (''template0'')" | \
xargs -I{} -P4 sh -c "
pg_dump -Fd -j 4 -d \"{}\" -f dump_{} &&
tar -cf - dump_{} | zstd -19 -T0 > dump_{}.tar.zst &&
rm -rf dump_{}
"

View File

@@ -0,0 +1,5 @@
#!/bin/bash
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn"); do
echo "DB: $db"
psql -d "$db" -Atqc "SELECT pubname FROM pg_publication;"
done

View File

@@ -0,0 +1,5 @@
#!/bin/bash
psql -c "
SELECT slot_name,
pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn))
FROM pg_replication_slots;"

View File

@@ -0,0 +1,5 @@
#!/bin/bash
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template0','template1')"); do
echo "==== DB: $db ===="
psql -d "$db" -c "SELECT * FROM pg_stat_subscription;"
done

View File

@@ -0,0 +1,8 @@
#!/bin/bash
cd /tmp
for f in dump_*.tar.zst; do
db=$(echo $f | sed "s/dump_\(.*\)\.tar\.zst/\1/")
echo "Restoring $db"
zstd -d "$f" -c | tar -xf -
pg_restore -j 4 -d "$db" dump_$db
done

View File

@@ -7,3 +7,37 @@ postgresql_custom_server postgresql_version do
role "primary"
end
cookbook_file "/usr/local/bin/pg_dump_all_databases" do
source "dump_all_databases.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_create_replication_publications" do
source "create_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_drop_replication_publications" do
source "drop_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_publications" do
source "list_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_slots" do
source "list_replication_slots.sh"
user "postgres"
group "postgres"
mode "0744"
end

View File

@@ -0,0 +1,50 @@
#
# Cookbook:: kosmos_postgresql
# Recipe:: replica_logical
#
service postgresql_service do
supports restart: true, status: true, reload: true
end
postgresql_custom_server postgresql_version do
role "replica_logical"
end
postgresql_data_bag_item = data_bag_item('credentials', 'postgresql')
primary = postgresql_primary
if primary.nil?
Chef::Log.warn("No PostgreSQL primary node found. Skipping replication setup.")
return
end
template "/usr/local/bin/pg_create_replication_subscriptions" do
source "create_subscriptions.sh.erb"
user "postgres"
group "postgres"
mode "0740"
sensitive true
end
cookbook_file "/usr/local/bin/pg_drop_replication_subscriptions" do
source "drop_subscriptions.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_subscriptions" do
source "list_subscriptions.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_restore_all_databases" do
source "restore_all_databases.sh"
user "postgres"
group "postgres"
mode "0744"
end

View File

@@ -56,7 +56,9 @@ action :create do
timezone: "UTC", # default is GMT
listen_addresses: "0.0.0.0",
promote_trigger_file: "#{postgresql_data_dir}/failover.trigger",
wal_keep_size: 4096 # 256 segments, 16MB each
wal_level: "logical",
wal_keep_size: 4096, # 256 segments, 16MB each
max_replication_slots: 16
}
postgresql_server_conf "main" do

View File

@@ -0,0 +1,33 @@
set -e
echo "== Creating subscriptions for all databases =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template0','template1')"); do
echo "Processing DB: $db"
SLOT="migrate_slot_${db}"
SUB="migrate_sub_${db}"
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_subscription WHERE subname = '$SUB'
) THEN
CREATE SUBSCRIPTION $SUB
CONNECTION 'host=<%= @pg_host %> port=<%= @pg_port %> dbname=$db user=<%= @pg_user %> password=<%= @pg_pass %>'
PUBLICATION migrate_pub
WITH (
slot_name = '$SLOT',
create_slot = false,
copy_data = false,
enabled = true
);
END IF;
END
\$\$;
SQL
done
echo "== Done =="