WIP Prepare postgres for migration by replication

This commit is contained in:
2026-04-08 15:41:56 +04:00
parent 6583cd7010
commit bc3f291bd2
22 changed files with 748 additions and 7 deletions

View File

@@ -0,0 +1,31 @@
#!/bin/bash
set -euo pipefail
DB_NAME="${1:?Usage: $0 <database_name>}"
echo "== Processing DB: $DB_NAME =="
# Create publication (idempotent)
psql -d "$DB_NAME" -v ON_ERROR_STOP=1 <<'SQL'
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_publication WHERE pubname = 'migrate_pub'
) THEN
CREATE PUBLICATION migrate_pub FOR ALL TABLES;
END IF;
END
$$;
SQL
# Create logical replication slot (idempotent-ish)
SLOT="migrate_slot_${DB_NAME}"
if ! psql -d "$DB_NAME" -Atqc "SELECT 1 FROM pg_replication_slots WHERE slot_name = '$SLOT'" | grep -q 1; then
echo " Creating slot: $SLOT"
psql -d "$DB_NAME" -c "SELECT pg_create_logical_replication_slot('$SLOT', 'pgoutput');"
else
echo " Slot already exists: $SLOT"
fi
echo "== Done =="

View File

@@ -0,0 +1,34 @@
#!/bin/bash
set -e
echo "== Creating publication in each database =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1')"); do
echo "Processing DB: $db"
# Create publication (idempotent)
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_publication WHERE pubname = 'migrate_pub'
) THEN
CREATE PUBLICATION migrate_pub FOR ALL TABLES;
END IF;
END
\$\$;
SQL
# Create logical replication slot (idempotent-ish)
SLOT="migrate_slot_${db}"
if ! psql -d "$db" -Atqc "SELECT 1 FROM pg_replication_slots WHERE slot_name = '$SLOT'" | grep -q 1; then
echo " Creating slot: $SLOT"
psql -d "$db" -c "SELECT pg_create_logical_replication_slot('$SLOT', 'pgoutput');"
else
echo " Slot already exists: $SLOT"
fi
done
echo "== Done =="

View File

@@ -0,0 +1,34 @@
#!/bin/bash
set -e
echo "== Dropping subscriptions slots and publications =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1')"); do
echo "Processing DB: $db"
SLOT="migrate_slot_${db}"
# Drop slot if exists
if psql -d "$db" -Atqc "SELECT 1 FROM pg_replication_slots WHERE slot_name = '$SLOT'" | grep -q 1; then
echo " Dropping slot: $SLOT"
psql -d "$db" -c "SELECT pg_drop_replication_slot('$SLOT');"
else
echo " Slot not found: $SLOT"
fi
# Drop publication if exists
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF EXISTS (
SELECT 1 FROM pg_publication WHERE pubname = 'migrate_pub'
) THEN
DROP PUBLICATION migrate_pub;
END IF;
END
\$\$;
SQL
done
echo "== Done =="

View File

@@ -0,0 +1,29 @@
#!/usr/bin/env bash
set -e
echo "== Dropping subscriptions =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1')"); do
echo "Processing DB: $db"
SUB="migrate_sub_${db}"
# Check if subscription exists
EXISTS=$(psql -d "$db" -Atqc "SELECT 1 FROM pg_subscription WHERE subname = '$SUB'")
if [ "$EXISTS" = "1" ]; then
echo " Found subscription: $SUB"
# Disable first (good practice)
psql -d "$db" -c "ALTER SUBSCRIPTION $SUB DISABLE;"
# Drop it (must be top-level)
psql -d "$db" -c "DROP SUBSCRIPTION $SUB;"
else
echo " No subscription: $SUB"
fi
done
echo "== Done =="

View File

@@ -0,0 +1,9 @@
#!/bin/bash
cd /tmp && \
(pg_dumpall --globals-only > globals.sql) && \
psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN (''template1'')" | \
xargs -I{} -P4 sh -c "
pg_dump -Fd -j 4 -d \"{}\" -f dump_{} &&
tar -cf - dump_{} | zstd -19 -T0 > dump_{}.tar.zst &&
rm -rf dump_{}
"

View File

@@ -0,0 +1,10 @@
#!/bin/bash
set -euo pipefail
DB_NAME="${1:?Usage: $0 <database_name>}"
cd /tmp
pg_dump -Fd -j 4 -d "$DB_NAME" -f "dump_${DB_NAME}"
tar -cf - "dump_${DB_NAME}" | zstd -19 -T0 > "dump_${DB_NAME}.tar.zst"
rm -rf "dump_${DB_NAME}"

View File

@@ -0,0 +1,35 @@
#!/bin/bash
set -e
DB="$1"
if [ -z "$DB" ]; then
echo "Usage: $0 <database>"
exit 1
fi
echo "== Fixing sequences in database: $DB =="
SQL=$(psql -d "$DB" -Atqc "
SELECT
'SELECT setval(' ||
quote_literal(pg_get_serial_sequence(quote_ident(n.nspname)||'.'||quote_ident(c.relname), a.attname)) ||
', COALESCE(MAX(' || quote_ident(a.attname) || '), 0) + 1, false) FROM ' ||
quote_ident(n.nspname)||'.'||quote_ident(c.relname) || ';'
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
JOIN pg_attribute a ON a.attrelid = c.oid
WHERE c.relkind = 'r'
AND a.attnum > 0
AND NOT a.attisdropped
AND pg_get_serial_sequence(quote_ident(n.nspname)||'.'||quote_ident(c.relname), a.attname) IS NOT NULL;
")
if [ -z "$SQL" ]; then
echo "No sequences found in $DB"
exit 0
fi
echo "$SQL" | psql -d "$DB"
echo "== Done =="

View File

@@ -0,0 +1,38 @@
#!/bin/bash
set -e
echo "== Fixing sequences across all databases =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1')"); do
echo "---- DB: $db ----"
# Generate fix statements
SQL=$(psql -d "$db" -Atqc "
SELECT
'SELECT setval(' ||
quote_literal(pg_get_serial_sequence(quote_ident(n.nspname)||'.'||quote_ident(c.relname), a.attname)) ||
', COALESCE(MAX(' || quote_ident(a.attname) || '), 0) + 1, false) FROM ' ||
quote_ident(n.nspname)||'.'||quote_ident(c.relname) || ';'
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
JOIN pg_attribute a ON a.attrelid = c.oid
WHERE c.relkind = 'r'
AND a.attnum > 0
AND NOT a.attisdropped
AND pg_get_serial_sequence(quote_ident(n.nspname)||'.'||quote_ident(c.relname), a.attname) IS NOT NULL;
")
if [ -z "$SQL" ]; then
echo "No sequences found in $db"
continue
fi
echo "Fixing sequences in $db..."
# Execute generated statements
echo "$SQL" | psql -d "$db"
done
echo "== Done fixing sequences =="

View File

@@ -0,0 +1,5 @@
#!/bin/bash
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1')"); do
echo "DB: $db"
psql -d "$db" -Atqc "SELECT pubname FROM pg_publication;"
done

View File

@@ -0,0 +1,5 @@
#!/bin/bash
psql -c "
SELECT slot_name,
pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn))
FROM pg_replication_slots;"

View File

@@ -0,0 +1,5 @@
#!/bin/bash
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1')"); do
echo "==== DB: $db ===="
psql -d "$db" -c "SELECT * FROM pg_stat_subscription;"
done

View File

@@ -0,0 +1,12 @@
#!/bin/bash
set -euo pipefail
cd /tmp
for f in dump_*.tar.zst; do
db=$(echo $f | sed "s/dump_\(.*\)\.tar\.zst/\1/")
echo "Restoring $db"
zstd -d "$f" -c | tar -xf -
pg_restore -j 4 -d "$db" dump_$db
rm -rf "dump_$db"
done

View File

@@ -0,0 +1,14 @@
#!/bin/bash
set -euo pipefail
DB_NAME="${1:?Usage: $0 <database_name>}"
cd /tmp
FILE="dump_${DB_NAME}.tar.zst"
DIR="dump_${DB_NAME}"
echo "Restoring $DB_NAME"
zstd -d "$FILE" -c | tar -xf -
pg_restore -j 4 -d "$DB_NAME" "$DIR"
rm -rf "$DIR"

View File

@@ -0,0 +1,121 @@
#
# Cookbook:: kosmos_postgresql
# Recipe:: management_scripts
#
credentials = data_bag_item('credentials', 'postgresql')
cookbook_file "/usr/local/bin/pg_dump_all_databases" do
source "dump_all_databases.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_dump_database" do
source "dump_database.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_restore_all_databases" do
source "restore_all_databases.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_restore_database" do
source "restore_database.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_create_replication_publications" do
source "create_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_create_replication_publication" do
source "create_publication.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_drop_replication_publications" do
source "drop_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_publications" do
source "list_publications.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_slots" do
source "list_replication_slots.sh"
user "postgres"
group "postgres"
mode "0744"
end
template "/usr/local/bin/pg_create_replication_subscriptions" do
source "create_subscriptions.sh.erb"
user "postgres"
group "postgres"
mode "0740"
variables pg_host: "pg.kosmos.local",
pg_port: 5432,
pg_user: "replication",
pg_pass: credentials["replication_password"]
sensitive true
end
template "/usr/local/bin/pg_create_replication_subscription" do
source "create_subscription.sh.erb"
user "postgres"
group "postgres"
mode "0740"
variables pg_host: "pg.kosmos.local",
pg_port: 5432,
pg_user: "replication",
pg_pass: credentials["replication_password"]
sensitive true
end
cookbook_file "/usr/local/bin/pg_drop_replication_subscriptions" do
source "drop_subscriptions.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_list_replication_subscriptions" do
source "list_subscriptions.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_fix_sequences_in_all_databases" do
source "fix_sequences.sh"
user "postgres"
group "postgres"
mode "0744"
end
cookbook_file "/usr/local/bin/pg_fix_sequences" do
source "fix_sequences.sh"
user "postgres"
group "postgres"
mode "0744"
end

View File

@@ -6,4 +6,3 @@
postgresql_custom_server postgresql_version do
role "primary"
end

View File

@@ -3,10 +3,6 @@
# Recipe:: replica
#
service postgresql_service do
supports restart: true, status: true, reload: true
end
postgresql_custom_server postgresql_version do
role "replica"
end
@@ -20,6 +16,9 @@ if primary.nil?
return
end
# TODO Replace pg.kosmos.local with private IP once available
# via proper node attribute
# https://gitea.kosmos.org/kosmos/chef/issues/263
execute "set up replication" do
command <<-EOF
systemctl stop #{postgresql_service}

View File

@@ -0,0 +1,15 @@
#
# Cookbook:: kosmos_postgresql
# Recipe:: replica_logical
#
postgresql_custom_server postgresql_version do
role "replica_logical"
end
# primary = postgresql_primary
#
# if primary.nil?
# Chef::Log.warn("No PostgreSQL primary node found. Skipping replication setup.")
# return
# end

View File

@@ -56,13 +56,15 @@ action :create do
timezone: "UTC", # default is GMT
listen_addresses: "0.0.0.0",
promote_trigger_file: "#{postgresql_data_dir}/failover.trigger",
wal_keep_size: 4096 # 256 segments, 16MB each
wal_level: "logical",
wal_keep_size: 4096, # 256 segments, 16MB each
max_replication_slots: 16
}
postgresql_server_conf "main" do
version postgresql_version
additional_config additional_config
notifies :reload, "service[#{postgresql_service}]", :delayed
notifies :restart, "service[#{postgresql_service}]", :delayed
end
postgresql_user "replication" do

View File

@@ -0,0 +1,31 @@
#!/bin/bash
set -euo pipefail
DB_NAME="${1:?Usage: $0 <database_name>}"
echo "== Processing DB: $DB_NAME =="
SLOT="migrate_slot_${DB_NAME}"
SUB="migrate_sub_${DB_NAME}"
psql -d "$DB_NAME" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_subscription WHERE subname = '$SUB'
) THEN
CREATE SUBSCRIPTION $SUB
CONNECTION 'host=<%= @pg_host %> port=<%= @pg_port %> dbname=$DB_NAME user=<%= @pg_user %> password=<%= @pg_pass %>'
PUBLICATION migrate_pub
WITH (
slot_name = '$SLOT',
create_slot = false,
copy_data = false,
enabled = true
);
END IF;
END
\$\$;
SQL
echo "== Done =="

View File

@@ -0,0 +1,34 @@
#!/bin/bash
set -e
echo "== Creating subscriptions for all databases =="
for db in $(psql -Atqc "SELECT datname FROM pg_database WHERE datallowconn AND datname NOT IN ('template1')"); do
echo "Processing DB: $db"
SLOT="migrate_slot_${db}"
SUB="migrate_sub_${db}"
psql -d "$db" -v ON_ERROR_STOP=1 <<SQL
DO \$\$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM pg_subscription WHERE subname = '$SUB'
) THEN
CREATE SUBSCRIPTION $SUB
CONNECTION 'host=<%= @pg_host %> port=<%= @pg_port %> dbname=$db user=<%= @pg_user %> password=<%= @pg_pass %>'
PUBLICATION migrate_pub
WITH (
slot_name = '$SLOT',
create_slot = false,
copy_data = false,
enabled = true
);
END IF;
END
\$\$;
SQL
done
echo "== Done =="