Commit 0286eab6 authored by Jenny's avatar Jenny
Browse files

move pgsql-2 module in pgsql-2 repo

parent 661c09ba
......@@ -3,7 +3,6 @@
{
imports = [
./keepalived
./pgsql-2
./monitoring-client
];
}
{ config, pkgs, lib, ... }:
with lib;
let
cfg = config.fem.pgsql-2;
scripts = {
pgsql2_add_user = pkgs.writeShellScriptBin "pgsql2_add_user" ''
if [ -z $1 ]
then
echo "No username given!"
exit 1
fi
if [ -z $2 ]
then
PASSWD=$(${pkgs.coreutils}/bin/head -c 64 /dev/urandom | ${pkgs.coreutils}/bin/md5sum | ${pkgs.coreutils}/bin/head -c 20)
echo "Generated password: $PASSWD"
else
PASSWD=$2
fi
${config.services.postgresql.package}/bin/psql -h ${cfg.clusterIp} -f - <<EOT
CREATE ROLE "$1" NOSUPERUSER NOREPLICATION NOCREATEDB NOCREATEROLE INHERIT LOGIN ENCRYPTED PASSWORD '$PASSWD';
EOT
'';
pgsql2_add_db = pkgs.writeScriptBin "pgsql2_add_db" ''
#!${pkgs.bash}/bin/bash
if [ -z $1 ]
then
echo "No db-name given!"
exit 1
fi
if [ -z $2 ]
then
echo "No db-user given!"
exit 1
fi
if [ -z $3 ]
then
PASSWD=$(${pkgs.coreutils}/bin/head -c 64 /dev/urandom | ${pkgs.coreutils}/bin/md5sum | ${pkgs.coreutils}/bin/head -c 20)
echo "Generated password: $PASSWD"
else
PASSWD=$3
fi
${config.services.postgresql.package}/bin/psql -h ${cfg.clusterIp} -f - <<EOT
CREATE ROLE "$2" NOSUPERUSER NOREPLICATION NOCREATEDB NOCREATEROLE NOINHERIT LOGIN ENCRYPTED PASSWORD '$PASSWD';
CREATE DATABASE "$1";
EOT
${config.services.postgresql.package}/bin/psql -h ${cfg.clusterIp} $1 -f - <<EOT
ALTER DATABASE "$1" OWNER TO "$2";
REVOKE ALL ON DATABASE "$1" FROM PUBLIC;
ALTER SCHEMA public OWNER TO "$2";
EOT
'';
pgsql2_grant_db_access = pkgs.writeShellScriptBin "pgsql2_grant_db_access" ''
if [ -z $1 ]
then
echo "No username given!"
exit 1
fi
if [ -z $2 ]
then
echo "No db-name given!"
exit 1
fi
DBOWNER=$(${config.services.postgresql.package}/bin/psql -h ${cfg.clusterIp} -t -A -c "SELECT datdba::regrole FROM pg_database WHERE datname = '$2';")
if [ -z $DBOWNER ]
then
echo "Database not found!"
exit 1
fi
${config.services.postgresql.package}/bin/psql -h ${cfg.clusterIp} $2 -f - <<EOT
GRANT "$DBOWNER" TO "$1";
EOT
'';
pgsql2_revoke_db_access = pkgs.writeShellScriptBin "pgsql2_revoke_db_access" ''
if [ -z $1 ]
then
echo "No username given!"
exit 1
fi
if [ -z $2 ]
then
echo "No db-name given!"
exit 1
fi
DBOWNER=$(${config.services.postgresql.package}/bin/psql -h ${cfg.clusterIp} -t -A -c "SELECT datdba::regrole FROM pg_database WHERE datname = '$2';")
if [ -z $DBOWNER ]
then
echo "Database not found!"
exit 1
fi
${config.services.postgresql.package}/bin/psql -h ${cfg.clusterIp} $2 -f - <<EOT
REVOKE "$DBOWNER" FROM $1;
EOT
'';
pgsql2_rejoin = pkgs.writeShellScriptBin "pgsql2_rejoin" ''
check_output=$(${config.services.postgresql.package.pkgs.repmgr}/bin/repmgr node check 2>&1)
echo $check_output | grep 'CRITICAL' > /dev/null
critical=$?
if [ $critical -eq 0 ]
then
echo $check_output | grep 'Downstream servers: CRITICAL' > /dev/null
downstream_critical=$?
echo $check_output | grep 'Replication slots: CRITICAL' > /dev/null
replication_critical=$?
echo 'Found critical services'
if [ $downstream_critical -eq 0 ] && [ $replication_critical -eq 0 ]
then
echo 'Downstream servers and replication slots are critical'
echo '--> we are an old primary --> trying to rejoin the cluster'
current_primary=$(${config.services.postgresql.package.pkgs.repmgr}/bin/repmgr cluster show --compact 2>&1 | grep '! running as primary' | awk '{print $3}')
echo "Estimated current primary node: $current_primary"
echo 'Checking estimated primary node for other false primaries...'
estimated_primary_outout=$(${pkgs.openssh}/bin/ssh $current_primary -C "repmgr cluster show --compact 2>&1")
echo $estimated_primary_output | grep '! running as primary' > /dev/null
estimated_primary_result=$?
if [ $estimated_primary_result -ne 0 ]
then
echo "Estimated primary node $current_primary seems to be real primary node"
echo "--> rejoining cluster:"
echo "Stopping postgres:"
${config.services.postgresql.package.pkgs.repmgr}/bin/repmgr node service --action=stop --checkpoint
echo "Waiting 10 seconds for stop..."
sleep 10
echo "Done"
echo "Performing rejoin with pg_rewind:"
${pkgs.coreutils}/bin/rm ${config.services.postgresql.dataDir}/postgresql.conf
${config.services.postgresql.package.pkgs.repmgr}/bin/repmgr -h $current_primary -U repmgr -d repmgr -f /etc/repmgr.conf node rejoin --force-rewind=${config.services.postgresql.package}/bin/pg_rewind
echo "Done"
echo "Cluster rejoin successful!"
else
echo "Estimated primary node also has false primaries"
echo "--> aborting"
fi
else
echo "Either downstream servers and replication slots are not critical"
echo "--> aborting"
fi
else
echo 'No services critical --> exiting'
fi
'';
pgsql2_primary_check = pkgs.writeShellScriptBin "pgsql2_primary_check" ''
# check for critical services
critical_check_output=$(${config.services.postgresql.package.pkgs.repmgr}/bin/repmgr node check 2>&1)
critical_check_exit=$?
# if the check exit code is not zero, postgres seems not to be running
# in this case, exit 1
if [ $critical_check_exit -ne 0 ]
then
echo "postgres failure"
exit 1
fi
echo $critical_check_output | grep 'CRITICAL' > /dev/null
critical=$?
if [ $critical -eq 0 ]
then
# services are critical --> exit 1
echo "services are critical"
exit 1
fi
# check for node running as primary
primary_check_output=$(${config.services.postgresql.package.pkgs.repmgr}/bin/repmgr node status 2>&1)
primary_check_exit=$?
# again check for exit code in case of postgres failure
if [ $primary_check_exit -ne 0 ]
then
echo "postgres failure"
exit 1
fi
echo $primary_check_output | grep 'Role: primary' > /dev/null
primary=$?
if [ $primary -ne 0 ]
then
# not running as primary --> exit 1
echo "not running as primary"
exit 1
fi
echo "success"
exit 0
'';
pgsql2_backup = pkgs.writeShellScriptBin "pgsql2_backup" ''
backup_dir="/var/lib/postgresql/backups"
current_date=$(date -I)
current_backup_dir="$backup_dir/$current_date"
current_backup_databases_dir="$current_backup_dir/databases"
mkdir -p $current_backup_databases_dir
${config.services.postgresql.package}/bin/pg_dumpall -h ${cfg.clusterIp} 2>/dev/null > $current_backup_dir/all.sql
${config.services.postgresql.package}/bin/pg_dumpall -h ${cfg.clusterIp} -r 2>/dev/null > $current_backup_dir/roles.sql
dbs=$(${config.services.postgresql.package}/bin/psql -h ${cfg.clusterIp} -t -A -c "SELECT datname FROM pg_database WHERE datallowconn = true;" 2>/dev/null)
for db in $dbs
do
${config.services.postgresql.package}/bin/pg_dump -h ${cfg.clusterIp} $db 2>/dev/null > $current_backup_databases_dir/$db.sql
done
${pkgs.rsync}/bin/rsync --remove-source-files -ax -e "${pkgs.openssh}/bin/ssh -p ${toString cfg.backupSSHPort} -i ${cfg.backupSSHKeyFile}" $backup_dir/ ${cfg.backupSSHUser}@${cfg.backupTarget}:backups/
rsync_result=$?
if [[ $rsync_result -eq 0 ]]
then
${pkgs.findutils}/bin/find $backup_dir -depth -type d -empty -delete
fi
'';
monitoring_check_primaries = pkgs.writeShellScriptBin "check_primaries" ''
primary_count=$(${config.services.postgresql.package.pkgs.repmgr}/bin/repmgr service status --csv | awk -F"," '{ print $3 }' | grep primary | wc -l)
if [[ $primary_count -eq 1 ]]
then
echo "OK - only 1 node is running as primary"
exit 0
else
echo "CRITICAL - $primary_count nodes are running as primary"
exit 2
fi
'';
monitoring_check_paused = pkgs.writeShellScriptBin "check_paused" ''
paused_outputs=$(${config.services.postgresql.package.pkgs.repmgr}/bin/repmgr service status --csv | awk -F"," '{ print $7 }')
for output in $paused_outputs
do
if [[ $output -ne 0 ]]
then
echo "CRITICAL - at least one node has paused status $output"
exit 2
fi
done
echo "OK - all nodes have paused status 0"
exit 0
'';
};
in
{
options = {
fem.pgsql-2 = {
enable = mkEnableOption "FeM pgsql-2 Node";
id = mkOption {
type = types.str;
description = "ID of this node";
};
name = mkOption {
type = types.str;
description = "Name of this node";
};
ip = mkOption {
type = types.str;
description = "IP of this node";
};
peers = mkOption {
type = with types; listOf str;
description = "IPs of the other nodes";
};
allowedSubnets = mkOption {
type = with types; listOf str;
description = "Subnets from that connections to the postgreSQL server are allowed";
};
clusterIp = mkOption {
type = types.str;
description = "VRRP-IP for keepalived";
};
clusterIpPrefixLength = mkOption {
type = types.int;
description = "Prefix-length of the VRRP-IP-Subnet (CIDR)";
};
keepalivedInterface = mkOption {
type = types.str;
description = "Interface on which keepalived should listen";
};
keepalivedPass = mkOption {
type = types.str;
description = "Password for keepalived";
default = "$KEEPALIVED_PASSWORD";
};
enableBackup = mkOption {
type = types.bool;
description = "Should this node create SQL backups";
default = false;
};
backupSSHUser = mkOption {
type = types.str;
description = "SSH-User for the backups";
};
backupSSHPort = mkOption {
type = types.int;
description = "SSH-Port for the backups";
};
backupSSHKeyFile = mkOption {
type = types.str;
description = "SSH private key file for backup rsync";
};
backupTarget = mkOption {
type = types.str;
description = "Target host for rsync backups";
};
};
};
config = mkIf cfg.enable {
services.postgresql = {
enable = true;
package = pkgs.postgresql_12;
port = 5432;
enableTCPIP = true;
extraPlugins = [ pkgs.postgresql_12.pkgs.repmgr ];
initialScript = pkgs.writeText "db-init-script" ''
CREATE DATABASE "repmgr";
CREATE ROLE "repmgr" LOGIN SUPERUSER;
GRANT ALL PRIVILEGES ON DATABASE "repmgr" TO "repmgr;
'';
settings = {
shared_preload_libraries = "repmgr";
max_wal_senders = 15;
max_replication_slots = 15;
wal_level = "replica";
wal_log_hints = true;
hot_standby = true;
archive_mode = true;
archive_command = "${pkgs.coreutils}/bin/true";
wal_keep_segments = 500;
};
authentication = concatStringsSep "\n" ([''
local replication repmgr trust
host replication repmgr 127.0.0.1/32 trust
host replication repmgr ::1/128 trust'']
++ forEach ([cfg.clusterIp cfg.ip] ++ cfg.peers) (x: "host replication repmgr " + x + "/32 trust") ++ [''
local repmgr repmgr trust
host repmgr repmgr 127.0.0.1/32 trust
host repmgr repmgr ::1/128 trust'']
++ forEach ([cfg.clusterIp cfg.ip] ++ cfg.peers) (x: "host repmgr repmgr " + x + "/32 trust")
++ forEach cfg.allowedSubnets (x: "host all all " + x + " md5" ));
};
security.sudo.extraRules = [ {
users = [ "postgres" ];
runAs = "root";
commands = [
{ command = "${pkgs.systemd}/bin/systemctl start postgresql"; options = [ "NOPASSWD" ]; }
{ command = "${pkgs.systemd}/bin/systemctl stop postgresql"; options = [ "NOPASSWD" ]; }
{ command = "${pkgs.systemd}/bin/systemctl restart postgresql"; options = [ "NOPASSWD" ]; }
];
} ];
fem.monitoring-client.checks = [
{name = "check_root_disk"; file = "check_disk"; params = "-w 20 -c 10 -p /";}
{name = "check_postgres_disk"; file = "check_disk"; params = "-w 20 -c 10 -p /var/lib/postgresql/";}
{name = "check_service_postgresql"; file = "check_service"; package = pkgs.fem-monitoring-plugins; params = "-s postgresql";}
{name = "check_service_repmgrd"; file = "check_service"; package = pkgs.fem-monitoring-plugins; params = "-s repmgrd";}
{name = "check_primaries"; file = "check_primaries"; package = scripts.monitoring_check_primaries;}
{name = "check_paused"; file = "check_paused"; package = scripts.monitoring_check_paused;}
];
users.users.postgres = {
home = pkgs.lib.mkForce "/var/lib/postgresql";
packages =
[ scripts.pgsql2_rejoin scripts.pgsql2_add_user scripts.pgsql2_add_db scripts.pgsql2_grant_db_access scripts.pgsql2_revoke_db_access ]
++ optionals cfg.enableBackup [ scripts.pgsql2_backup ];
};
services.cron = mkIf cfg.enableBackup {
enable = true;
systemCronJobs = [
"0 4 * * * postgres ${scripts.pgsql2_backup}/bin/pgsql2_backup"
];
};
environment.etc."repmgr.conf" = {
enable = true;
uid = config.users.users.postgres.uid;
gid = config.users.groups.postgres.gid;
mode = "0644";
text = ''
node_id=${cfg.id}
node_name='${cfg.name}'
conninfo='host=${cfg.name} user=repmgr dbname=repmgr connect_timeout=2'
data_directory='${config.services.postgresql.dataDir}'
use_replication_slots=yes
monitoring_history=yes
priority = 100
service_start_command = '/run/wrappers/bin/sudo ${pkgs.systemd}/bin/systemctl start postgresql'
service_stop_command = '/run/wrappers/bin/sudo ${pkgs.systemd}/bin/systemctl stop postgresql'
service_restart_command = '/run/wrappers/bin/sudo ${pkgs.systemd}/bin/systemctl restart postgresql'
service_reload_command = '${config.services.postgresql.package}/bin/pg_ctl reload -D ${config.services.postgresql.dataDir}'
service_promote_command = '${config.services.postgresql.package}/bin/pg_ctl promote -D ${config.services.postgresql.dataDir}'
promote_check_timeout = 15
failover=automatic
promote_command='${config.services.postgresql.package.pkgs.repmgr}/bin/repmgr standby promote -f /etc/repmgr.conf --log-to-file'
follow_command='${config.services.postgresql.package.pkgs.repmgr}/bin/repmgr standby follow -f /etc/repmgr.conf --log-to-file --upstream-node-id=%n'
log_file='/var/lib/postgresql/repmgrd.log'
'';
};
networking.firewall.allowedTCPPorts = [ 5432 ];
systemd.services.repmgrd = {
wantedBy = [ "multi-user.target" ];
after = [ "network.target" "postgresql.service" ];
description = "repmgrd service";
path = [ pkgs.openssh ];
serviceConfig = {
Type = "simple";
User = "postgres";
Group = "postgres";
ExecStart = "${config.services.postgresql.package.pkgs.repmgr}/bin/repmgrd -f /etc/repmgr.conf --daemonize=false";
};
};
services.keepalived = {
enable = true;
vrrpScripts.primary_check = {
script = "${scripts.pgsql2_primary_check}/bin/pgsql2_primary_check";
user = "postgres";
interval = 5;
timeout = 5;
weight = 5;
rise = 3;
fall = 3;
};
vrrpInstances.pgcluster = {
state = "MASTER";
interface = cfg.keepalivedInterface;
virtualRouterId = 51;
trackScripts = [ "primary_check" ];
unicastSrcIp = cfg.ip;
unicastPeers = cfg.peers;
virtualIps = [
{
addr = cfg.clusterIp + "/" + toString cfg.clusterIpPrefixLength;
}
];
extraConfig = ''
authentication {
auth_type PASS
auth_pass ${cfg.keepalivedPass}
}
'';
};
secretFile = "/var/secure/keepalived";
};
};
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment