Configuration using env vars. Adding standby works.
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
@@ -15,3 +15,5 @@ RUN localedef -i cs_CZ -c -f UTF-8 -A /usr/share/locale/locale.alias cz_CZ.UTF-8
|
||||
apt-get update && apt-get install -y postgresql-16-auto-failover pg-auto-failover-cli && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
#ENV LANG en_GB.utf8
|
||||
|
||||
ENTRYPOINT [ "/pg_autoconfig" ]
|
||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
Postgres image with PostGIS and pg_auto_failover extensions that allows running a high-availability Postgres cluster with query load-balancing.
|
||||
|
||||
# How to use this image
|
||||
|
||||
The expected configuration is at least two `worker` nodes with one `monitor` node. The container is expected to have one or more peer configuration containers that implement query load balancing (activated using `AUTOCONFIG_LOCAL_PEER` and/or `AUTOCONFIG_REMOTE_PEERS`). One of such implementations is [docker-pgpool](/docker-pgpool) that implements read-only/read-write splitting and load balancing using `pgpool`. Another is [docker-pgtraefik](/docker-pgtraefik) that load balances all traffic to the primary node.
|
||||
|
||||
The configuration peers act as servers and this image connects to them. They can run either locally communicating over a socket or remotely over HTTP.
|
||||
|
||||
# Environmental variables
|
||||
|
||||
## General
|
||||
|
||||
- `AUTOCONFIG_LOG_LEVEL` Log level of the application that manages the execution (one of `trace`, `debug`, `info`, `warn`, `error`, `fatal`). Default `info`. Optional.
|
||||
- `AUTOCONFIG_MODE` Determines the role of the container. Either
|
||||
- empty/not set to get the behaviour of plain Postgres image
|
||||
- `monitor` this container becomes a monitor (only one container should have this set)
|
||||
- `postgres` this container becomes a worker (usually at least two containers have this)
|
||||
|
||||
No default. Required
|
||||
- `AUTOCONFIG_FORMATION` Name of the pg_auto_failover formation. Default `default`. Optional
|
||||
- `AUTOCONFIG_MONITOR_HOST` Hostname of the monitor node. Default none. Required.
|
||||
- `AUTOCONFIG_MONITOR_PORT` Postgres port on the monitor node. Default `PGPORT` and if not set `5432`. Optional.
|
||||
- `AUTOCONFIG_LOCAL_PEER` True if configuration peer container is running locally and is reachable over a socket. Default `false`. Optional.
|
||||
- `AUTOCONFIG_SOCKET` Path to the socket of the locally run peer configuration container. Default `/var/run/pg_autoconfig.sock`. Optional.
|
||||
- `AUTOCONFIG_REMOTE_PEERS` Hostnames with remote configuration peers separated by comma. Each hostname has format `hostname[:port]` (default port 5420). Default none. Optional.
|
||||
|
||||
## Security
|
||||
|
||||
- `AUTOCONFIG_MONITOR_PASSWORD` or `AUTOCONFIG_MONITOR_PASSWORD_FILE` password (resp. path to file with the password) that is set on the monitor node for the user `autoctl_node` and worker nodes use it to report their status in pg_auto_failover. Default none. Required.
|
||||
- `AUTOCONFIG_REPLICATION_PASSWORD` or `AUTOCONFIG_REPLICATION_PASSWORD_FILE` password (resp. path to file with the password) that is set on the worker nodes for the user `pgautofailover_replicator` that allows other nodes to fetch state from the currently primary node
|
||||
- `POSTGRES_USERNAME` Postgres admin user name
|
||||
- `POSTGRES_PASSWORD` or `POSTGRES_PASSWORD_FILE` password (resp. path to file with the password) that is set for the user Postgres admin user
|
||||
- `AUTOCONFIG_LINK_HBA_CONF` path to `pg_hba.conf` file that shall be linked to after the Postgres storage gets initialized (Postgres refuses to initialize non-empty directory with `pg_hba.conf` volume-binded from docker host). Default none. Optional.
|
||||
|
||||
This image does not deploy nor touch the Postgres file `pg_hba.conf`. You are expected to deploy it by whatever tools you have for it into some directory outside of `PGDATA`. However following rules are required for `pg_auto_failure` to work on monitor:
|
||||
- `local all all`
|
||||
- `host pg_auto_failover autoctl_node <worker ip address> scram-sha-256` (for each worker)
|
||||
|
||||
on worker:
|
||||
- `local all all`
|
||||
- `host all pgautofailover_monitor <monitor ip address> trust`
|
||||
- `host replication pgautofailover_replicator <worker ip address> scram-sha-256` (for each oher worker)
|
||||
|
||||
## pg_auto_failover
|
||||
- `XDG_CONFIG_HOME` Persistent storage for pg_auto_failover configuration
|
||||
- `XDG_DATA_HOME` Persistent state of pg_auto_failover
|
||||
|
||||
See others in the pg_auto_failover documentation.
|
||||
|
||||
## Postgres
|
||||
- `PGDATA` path to Postgres persistent storage.
|
||||
- `PGPORT` port for Postgres to listen on. Default 5432. Optional.
|
||||
- `POSTGRES_USERNAME` (see above)
|
||||
- `POSTGRES_PASSWORD` or `POSTGRES_PASSWORD_FILE` (see above)
|
||||
|
||||
See others in postgres docker image documentation.
|
||||
|
||||
# Runtime status
|
||||
|
||||
You can check the state of the cluster by executing `pg_autoctl show state --formation <formation name ie. default>` in the container.
|
||||
7
docker-test.sh
Normal file
7
docker-test.sh
Normal file
@@ -0,0 +1,7 @@
|
||||
docker create --name pg-Monitor -e AUTOCONFIG_MODE=monitor -e AUTOCONFIG_LOG_LEVEL=trace -e POSTGRES_USERNAME=postgres -e POSTGRES_PASSWORD=heslo -e AUTOCONFIG_MONITOR_PASSWORD=heslo2 -e AUTOCONFIG_REPLICATION_PASSWORD=heslo3 -e PGPORT=5431 -e XDG_CONFIG_HOME=/data/failover-config -e XDG_DATA_HOME=/data/failover-state -e PGDATA=/data/data -e AUTOCONFIG_LINK_HBA_CONF=/data/pg_hba.conf --hostname pg-Monitor --user 999:999 --network app -v /temp/pgMon:/data -v /temp/pg_hba.conf:/data/pg_hba.conf postgis-test
|
||||
|
||||
docker create --name pg-1 -e AUTOCONFIG_MODE=postgres -e AUTOCONFIG_LOG_LEVEL=trace -e POSTGRES_USERNAME=postgres -e POSTGRES_PASSWORD=heslo -e AUTOCONFIG_MONITOR_PASSWORD=heslo2 -e AUTOCONFIG_REPLICATION_PASSWORD=heslo3 -e PGPORT=5431 -e XDG_CONFIG_HOME=/data/failover-config -e XDG_DATA_HOME=/data/failover-state -e PGDATA=/data/data -e AUTOCONFIG_LINK_HBA_CONF=/data/pg_hba.conf -e AUTOCONFIG_MONITOR_HOST=pg-Monitor -e AUTOCONFIG_LOCAL_PEER=true -e AUTOCONFIG_SOCKET=/data/pg_autoconfig.sock --hostname pg-1 --user 999:999 --network app -v /temp/pg1:/data -v /temp/pg_hba.conf:/data/pg_hba.conf postgis-test
|
||||
|
||||
docker create --name pg-2 -e AUTOCONFIG_MODE=postgres -e AUTOCONFIG_LOG_LEVEL=trace -e POSTGRES_USERNAME=postgres -e POSTGRES_PASSWORD=heslo -e AUTOCONFIG_MONITOR_PASSWORD=heslo2 -e AUTOCONFIG_REPLICATION_PASSWORD=heslo3 -e PGPORT=5431 -e XDG_CONFIG_HOME=/data/failover-config -e XDG_DATA_HOME=/data/failover-state -e PGDATA=/data/data -e AUTOCONFIG_LINK_HBA_CONF=/data/pg_hba.conf -e AUTOCONFIG_MONITOR_HOST=pg-Monitor -e AUTOCONFIG_LOCAL_PEER=true -e AUTOCONFIG_SOCKET=/data/pg_autoconfig.sock --hostname pg-2 --user 999:999 --network app -v /temp/pg2:/data -v /temp/pg_hba.conf:/data/pg_hba.conf postgis-test
|
||||
|
||||
docker create --name pg-2-pool --hostname pg-2-pool -e PGPOOL_ADMIN_USERNAME=pgpool -e PGPOOL_ADMIN_PASSWORD=heslo -e PGPOOL_HEALTH_CHECK_USER=postgres -e PGPOOL_SR_CHECK_USER=postgres -e PGPOOL_ENABLE_POOL_PASSWD=false -e PGPOOL_ENABLE_POOL_HBA=false -e AUTOPOOL_LOG_LEVEL=trace --user 999:999 --network app -v /temp/pg2:/data pgpool-test --local --socket /data/pg_autoconfig.sock
|
||||
452
main.go
452
main.go
@@ -8,6 +8,8 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -22,9 +24,19 @@ const GroupStatePrimaryAlone = "single"
|
||||
const GroupStateStandby = "secondary"
|
||||
const DefaultFormation = "default"
|
||||
const DefaultSocketPath = "/var/run/pg_autoconfig.sock"
|
||||
const CyclesPerPing = 10
|
||||
const ExecutionCycleSleep = 1000 * time.Millisecond
|
||||
const ExecutionCyclesPerPing = 10
|
||||
const DefaultRemotePort = 5420
|
||||
const MonitorUpCheckDelay = 15 * time.Second
|
||||
const PostgresOriginalEntrypoint = "/usr/local/bin/docker-entrypoint.sh"
|
||||
const PostgresOriginalCmd = "postgres"
|
||||
const AutoFailoverExecutable = "pg_autoctl"
|
||||
const AutoFailoverPidPath = "/tmp/pg_autoctl/data/data/pg_autoctl.pid"
|
||||
const AutoFailoverPidPath2 = "/tmp/pg_autoctl/db/data/pg_autoctl_postgres.pid"
|
||||
const AutoFailoverPidPath3 = "/tmp/pg_autoctl/db/data/pg_autoctl_listener.pid"
|
||||
const PgHbaConfFileName = "pg_hba.conf"
|
||||
const PostgresInitDoneFileName = "PG_VERSION"
|
||||
const PostgresDataDefaultPath = "/var/lib/postgresql/data"
|
||||
|
||||
type postgresInstance struct {
|
||||
// Locally available info
|
||||
@@ -53,18 +65,11 @@ type configFiles struct {
|
||||
Pid string `json:"pid"`
|
||||
}
|
||||
|
||||
type IConfiguration interface {
|
||||
configure(nodes []postgresInstance) ([]byte, error)
|
||||
type configMessage struct {
|
||||
Instances []configInstance
|
||||
}
|
||||
|
||||
type pgpoolConfiguration struct {
|
||||
}
|
||||
|
||||
type pgpoolConfigMessage struct {
|
||||
Instances []pgpoolInstance
|
||||
}
|
||||
|
||||
type pgpoolInstance struct {
|
||||
type configInstance struct {
|
||||
IpAddress net.IP
|
||||
Port int
|
||||
HostName string
|
||||
@@ -72,10 +77,24 @@ type pgpoolInstance struct {
|
||||
IsReadOnly bool
|
||||
}
|
||||
|
||||
type pgpoolPingMessage struct {
|
||||
type pingMessage struct {
|
||||
NeedsConfig bool
|
||||
}
|
||||
|
||||
type global struct {
|
||||
formation string
|
||||
useLocal bool
|
||||
socketPath string
|
||||
remotes []string
|
||||
isMonitor bool
|
||||
monitorHost string
|
||||
monitorPort int
|
||||
monitorPassword string
|
||||
replicationPassword string
|
||||
hbaConfPath string
|
||||
env []string
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Logging
|
||||
var logLevelS string
|
||||
@@ -91,105 +110,225 @@ func main() {
|
||||
|
||||
log.Info("Starting pg_auto_failover load balancer configurator, version " + version)
|
||||
|
||||
nextIsType := false
|
||||
var config IConfiguration
|
||||
useLocal := false
|
||||
nextIsSocket := false
|
||||
socketPath := DefaultSocketPath
|
||||
nextIsRemotes := false
|
||||
remotes := []string{}
|
||||
nextIsFormation := false
|
||||
formation := DefaultFormation
|
||||
argsLeft := os.Args[1:]
|
||||
for i, j := range argsLeft {
|
||||
if nextIsType {
|
||||
nextIsType = false
|
||||
switch j {
|
||||
case "pgpool":
|
||||
config = &pgpoolConfiguration{}
|
||||
default:
|
||||
log.WithFields(log.Fields{"value": j}).Fatal("Unknown configuration type")
|
||||
// Mode of operation
|
||||
var innerExec string
|
||||
var innerArgs []string
|
||||
innerEnv := os.Environ()
|
||||
doComplete := false
|
||||
if mode := os.Getenv("AUTOCONFIG_MODE"); mode == "monitor" || mode == "postgres" {
|
||||
innerExec = AutoFailoverExecutable
|
||||
innerArgs = append(innerArgs, []string{"create", mode, "--no-ssl"}...)
|
||||
|
||||
var state global
|
||||
state.formation = getEnvOrDefault("AUTOCONFIG_FORMATION", DefaultFormation)
|
||||
state.useLocal = getEnvOrDefaultBool("AUTOCONFIG_LOCAL_PEER", false)
|
||||
state.socketPath = getEnvOrDefault("AUTOCONFIG_SOCKET", DefaultSocketPath)
|
||||
state.remotes = slices.DeleteFunc(strings.Split(getEnvOrDefault("AUTOCONFIG_REMOTE_PEERS", ""), ","), func(x string) bool {
|
||||
return x == ""
|
||||
})
|
||||
if !state.useLocal && len(state.remotes) == 0 {
|
||||
log.Warning("No configuration peers configured using AUTOCONFIG_LOCAL_PEER or AUTOCONFIG_REMOTE_PEERS. Load balancing to the Postgres primary will only work if supported on the application level. Load balancing of the readonly queries may be suboptimal.")
|
||||
}
|
||||
} else if nextIsSocket {
|
||||
nextIsSocket = false
|
||||
socketPath = j
|
||||
} else if nextIsRemotes {
|
||||
nextIsRemotes = false
|
||||
remotes = strings.Split(j, ",")
|
||||
} else if nextIsFormation {
|
||||
nextIsFormation = false
|
||||
formation = j
|
||||
} else if j == "--" {
|
||||
argsLeft = argsLeft[i+1:]
|
||||
break
|
||||
|
||||
// Use custom or automatic pg_hba.conf (critical during secondary initialization)
|
||||
state.hbaConfPath = getEnvOrDefault("AUTOCONFIG_LINK_HBA_CONF", "")
|
||||
if state.hbaConfPath == "" {
|
||||
innerArgs = append(innerArgs, []string{"--auth", "scram-sha-256", "--pg-hba-lan"}...)
|
||||
} else {
|
||||
switch j {
|
||||
case "--type":
|
||||
nextIsType = true
|
||||
case "--remotes":
|
||||
nextIsRemotes = true
|
||||
case "--local":
|
||||
useLocal = true
|
||||
case "--socket":
|
||||
nextIsSocket = true
|
||||
case "--formation":
|
||||
nextIsFormation = true
|
||||
default:
|
||||
log.WithFields(log.Fields{"value": j}).Fatal("Unknown command switch")
|
||||
innerArgs = append(innerArgs, "--skip-pg-hba")
|
||||
}
|
||||
|
||||
state.monitorPassword = getEnvOrDefault("AUTOCONFIG_MONITOR_PASSWORD", "")
|
||||
monitorPasswordPath := getEnvOrDefault("AUTOCONFIG_MONITOR_PASSWORD_FILE", "")
|
||||
if monitorPasswordFile, err := os.Stat(monitorPasswordPath); err == nil && !monitorPasswordFile.IsDir() {
|
||||
if v, err := os.ReadFile(monitorPasswordPath); err == nil {
|
||||
state.monitorPassword = string(v)
|
||||
}
|
||||
}
|
||||
if state.monitorPassword == "" {
|
||||
log.Fatal("AUTOCONFIG_MONITOR_PASSWORD or AUTOCONFIG_MONITOR_PASSWORD_FILE must be set")
|
||||
}
|
||||
|
||||
if mode == "monitor" {
|
||||
state.isMonitor = true
|
||||
innerArgs = append(innerArgs, "--run")
|
||||
log.Info("Starting a monitor node.")
|
||||
|
||||
} else {
|
||||
state.isMonitor = false
|
||||
state.monitorHost = getEnvOrDefault("AUTOCONFIG_MONITOR_HOST", "")
|
||||
if state.monitorHost == "" {
|
||||
log.Fatal("AUTOCONFIG_MONITOR_HOST must be set")
|
||||
}
|
||||
state.monitorPort = getEnvOrDefaultInt("AUTOCONFIG_MONITOR_PORT", getEnvOrDefaultInt("PGPORT", 5432))
|
||||
|
||||
state.replicationPassword = getEnvOrDefault("AUTOCONFIG_REPLICATION_PASSWORD", "")
|
||||
replicationPasswordPath := getEnvOrDefault("AUTOCONFIG_REPLICATION_PASSWORD_FILE", "")
|
||||
if replicationPasswordFile, err := os.Stat(replicationPasswordPath); err == nil && !replicationPasswordFile.IsDir() {
|
||||
if v, err := os.ReadFile(replicationPasswordPath); err == nil {
|
||||
state.monitorPassword = string(v)
|
||||
}
|
||||
}
|
||||
if state.replicationPassword == "" {
|
||||
log.Fatal("AUTOCONFIG_REPLICATION_PASSWORD or AUTOCONFIG_REPLICATION_PASSWORD_FILE must be set")
|
||||
}
|
||||
|
||||
// Pass the monitor through an env var which is a little bit better than an argument
|
||||
// Note: For security reasons we do not want to pass the monitor URI as an argument as it contains a password
|
||||
monitorEnvUri := "PG_AUTOCTL_MONITOR=postgresql://autoctl_node:" + state.monitorPassword + "@" + state.monitorHost + ":" + strconv.Itoa(state.monitorPort) + "/pg_auto_failover?sslmode=prefer"
|
||||
monitorSet := false
|
||||
for i, j := range innerEnv {
|
||||
if strings.HasPrefix(j, "PG_AUTOCTL_MONITOR=") {
|
||||
innerEnv[i] = monitorEnvUri
|
||||
monitorSet = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !monitorSet {
|
||||
innerEnv = append(innerEnv, monitorEnvUri)
|
||||
}
|
||||
|
||||
state.env = innerEnv
|
||||
|
||||
// The first secondary initialization is tricky as we must inject pg_hba.conf
|
||||
// and soon after set the replication password for the second time
|
||||
targetDir := getEnvPgData()
|
||||
initDonePath := filepath.Join(targetDir, PostgresInitDoneFileName)
|
||||
if _, err := os.Stat(initDonePath); err != nil && !os.IsNotExist(err) {
|
||||
log.Fatal("Failed to access Postgres data directory")
|
||||
} else if err != nil {
|
||||
log.WithFields(log.Fields{"path": targetDir}).Info("Postgres data dir is not initialized")
|
||||
if state.hbaConfPath != "" {
|
||||
go func() {
|
||||
for {
|
||||
if _, err := os.Stat(initDonePath); err == nil {
|
||||
break
|
||||
} else if !os.IsNotExist(err) {
|
||||
log.Fatal("Failed to access Postgres data directory")
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
ensurePgHbaConfReplaced(state.hbaConfPath)
|
||||
}()
|
||||
}
|
||||
|
||||
// Propagate the replication password
|
||||
// Note: There is no option to pass the replication password and even pg_autoctl create without --run already
|
||||
// assumes it is present. Neither pg_autoctl config set works at this stage
|
||||
replEnvPassword := "PGPASSWORD=" + state.replicationPassword
|
||||
replSet := false
|
||||
initEnv := innerEnv
|
||||
for i, j := range initEnv {
|
||||
if strings.HasPrefix(j, "PGPASSWORD=") {
|
||||
initEnv[i] = replEnvPassword
|
||||
replSet = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !replSet {
|
||||
initEnv = append(initEnv, replEnvPassword)
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{"name": innerExec, "args": innerArgs}).Info("Initializing pg_auto_failure on disk")
|
||||
cmdInit := exec.Command(innerExec, innerArgs...)
|
||||
cmdInit.Env = initEnv
|
||||
cmdInit.Stdout = os.Stdout
|
||||
cmdInit.Stderr = os.Stderr
|
||||
if err := cmdInit.Run(); err != nil {
|
||||
log.WithError(err).Warn("Initialization of pg_auto_failure encountered problems")
|
||||
}
|
||||
|
||||
log.Info("Setting replication password in the configuration")
|
||||
setPassCmd := exec.Command(AutoFailoverExecutable, "config", "set", "replication.password", state.replicationPassword)
|
||||
setPassCmd.Env = state.env
|
||||
if err := setPassCmd.Run(); err != nil {
|
||||
log.WithError(err).Fatal("Failed to set password of user pgautofailover_replicator in the configuration")
|
||||
}
|
||||
}
|
||||
|
||||
if config == nil {
|
||||
log.Fatal("No configuration type specified. Use --type <type>")
|
||||
}
|
||||
if !useLocal && len(remotes) == 0 {
|
||||
log.Fatal("At least one peer connector must be used, add --remotes, --local or both")
|
||||
}
|
||||
if len(argsLeft) == 0 {
|
||||
log.Fatal("No inner command found to execute. Use -- to separate the inner executable and its args")
|
||||
innerArgs = append(innerArgs, "--run")
|
||||
log.Info("Starting a worker node.")
|
||||
}
|
||||
|
||||
// Run the auto configuration
|
||||
doComplete := false
|
||||
go func() {
|
||||
// Wait for the inner process to start
|
||||
time.Sleep(10 * time.Second)
|
||||
go executionLoop(state, &doComplete)
|
||||
|
||||
} else if mode != "" {
|
||||
log.Fatal("AUTOCONFIG_MODE must be either 'monitor' or 'node' to activate pg_auto_failover. Leave it empty for plain Postgres operation.")
|
||||
} else {
|
||||
// Execute the original entrypoint as if we never existed
|
||||
log.Info("No pg_auto_failover mode specified. Will execute plain Postgres.")
|
||||
innerExec = PostgresOriginalEntrypoint
|
||||
innerArgs = []string{PostgresOriginalCmd}
|
||||
}
|
||||
|
||||
// Clean-up any PIDs left from previous run
|
||||
os.Remove(AutoFailoverPidPath)
|
||||
os.Remove(AutoFailoverPidPath2)
|
||||
os.Remove(AutoFailoverPidPath3)
|
||||
|
||||
// Start the inner executable (usually starting with "pg_autoctl create postgres" or "pg_autoctl create monitor")
|
||||
log.WithFields(log.Fields{"name": innerExec, "args": innerArgs}).Info("Handling over to the inner process.")
|
||||
cmd := exec.Command(innerExec, innerArgs...)
|
||||
cmd.Env = innerEnv
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
cmd.Run()
|
||||
|
||||
log.Info("pg_auto_failover load balancer configurator has completed.")
|
||||
doComplete = true
|
||||
}
|
||||
|
||||
func executionLoop(state global, doComplete *bool) {
|
||||
// Link pg_hba.conf if asked to do so
|
||||
ensurePostgresReadyForQueries()
|
||||
ensurePgHbaConfReplaced(state.hbaConfPath)
|
||||
|
||||
ensurePostgresReadyForQueries()
|
||||
|
||||
// State
|
||||
var instances []postgresInstance
|
||||
monitorIsDown := false
|
||||
var lastSentStateJson []byte
|
||||
|
||||
// Determine if we are a monitor
|
||||
cmdIsMonitor := exec.Command("/usr/bin/pg_autoctl", "show", "uri", "--json")
|
||||
cmdIsMonitor.Env = os.Environ()
|
||||
var isMonitor bool
|
||||
if configsB, err := cmdIsMonitor.Output(); err != nil {
|
||||
log.WithError(err).Warn("Failed to detect if we are a monitor node.")
|
||||
isMonitor = false
|
||||
} else {
|
||||
var configs *configFiles
|
||||
if err := json.Unmarshal(configsB, &configs); err != nil {
|
||||
log.WithError(err).Warn("Failed to parse monitor node detection")
|
||||
isMonitor = false
|
||||
} else {
|
||||
isMonitor = configs.State == ""
|
||||
log.WithFields(log.Fields{"isMonitor": isMonitor}).Info("Determined if this is a monitor")
|
||||
// Monitor automatically creates user autoctl_node but we must set/propagate its password
|
||||
if state.isMonitor {
|
||||
log.Info("This is a monitor node, setting password for autoctl_node user")
|
||||
setPassCmd := exec.Command("psql", "-d", "pg_auto_failover", "-c", "alter user autoctl_node password '"+state.monitorPassword+"';")
|
||||
if err := setPassCmd.Run(); err != nil {
|
||||
log.WithError(err).Fatal("Failed to set password of user autoctl_node on the monitor node")
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{"rule": "host pg_auto_failover autoctl_node <node ip>/32 <auth-type>"}).Info(
|
||||
"Check rules exist in pg_hba.conf for each other node")
|
||||
|
||||
} else {
|
||||
log.Info("Setting replication password in the database (will succeed on primary only)")
|
||||
setPassCmd2 := exec.Command("psql", "-c", "alter user pgautofailover_replicator password '"+state.replicationPassword+"';")
|
||||
if err := setPassCmd2.Run(); err != nil {
|
||||
log.WithError(err).Info("Failed to set password of user pgautofailover_replicator in the database (probably a stand-by read-only node)")
|
||||
}
|
||||
|
||||
// TODO How to set password of pgautofailover_monitor?
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"ruleMonitor": "host all pgautofailover_monitor <monitor ip>/32 <auth-type>",
|
||||
"ruleRegular1": "host postgres pgautofailover_replicator <node ip>/32 <auth-type>",
|
||||
"ruleRegular2": "host replication pgautofailover_replicator <node ip>/32 <auth-type>",
|
||||
}).Info("Check rules exist in pg_hba.conf for each other node and type")
|
||||
}
|
||||
|
||||
peers := []http.Client{}
|
||||
if useLocal {
|
||||
if state.useLocal {
|
||||
peers = append(peers, http.Client{
|
||||
Transport: &http.Transport{
|
||||
DialContext: func(_ context.Context, _, _ string) (net.Conn, error) {
|
||||
return net.Dial("unix", socketPath)
|
||||
return net.Dial("unix", state.socketPath)
|
||||
},
|
||||
},
|
||||
})
|
||||
}
|
||||
for _, i := range remotes {
|
||||
for _, i := range state.remotes {
|
||||
if iParts := strings.Split(i, ":"); len(iParts) == 1 {
|
||||
i = i + ":" + strconv.Itoa(DefaultRemotePort)
|
||||
}
|
||||
@@ -203,19 +342,19 @@ func main() {
|
||||
}
|
||||
|
||||
pingWaitCounter := 0
|
||||
for !doComplete {
|
||||
for !*doComplete {
|
||||
if !monitorIsDown {
|
||||
cmdFormationState := exec.Command("/usr/bin/pg_autoctl", "show", "state", "--formation", formation, "--json")
|
||||
cmdFormationState.Env = os.Environ()
|
||||
cmdFormationState := exec.Command("/usr/bin/pg_autoctl", "show", "state", "--formation", state.formation, "--json")
|
||||
cmdFormationState.Env = state.env
|
||||
if localStateB, err := cmdFormationState.Output(); err != nil {
|
||||
log.WithError(err).Warn("Failed to obtain the formation state from pg_autoctl. Monitor is probably down.")
|
||||
monitorIsDown = true
|
||||
// Delegate the monitor up-checking to a separate go routine so we do not block our main task of
|
||||
// sending pings to the configuration peer
|
||||
go func() {
|
||||
for !doComplete {
|
||||
cmdMonitorUp := exec.Command("/usr/bin/pg_autoctl", "show", "state", "--formation", formation, "--json")
|
||||
cmdMonitorUp.Env = os.Environ()
|
||||
for !*doComplete {
|
||||
cmdMonitorUp := exec.Command("/usr/bin/pg_autoctl", "show", "state", "--formation", state.formation, "--json")
|
||||
cmdMonitorUp.Env = state.env
|
||||
if _, err := cmdMonitorUp.Output(); err == nil {
|
||||
log.Warn("Monitor node is no longer down.")
|
||||
monitorIsDown = false
|
||||
@@ -234,12 +373,12 @@ func main() {
|
||||
}
|
||||
}
|
||||
|
||||
if monitorIsDown && !isMonitor && len(instances) == 0 {
|
||||
if monitorIsDown && !state.isMonitor && len(instances) == 0 {
|
||||
// Try to obtain info about the current node. That way we may still be able to operate
|
||||
// in a read-only mode or with luck even in read-write mode. If the configuration peer
|
||||
// is connected to all the nodes then it can reconstruct the full picture.
|
||||
cmdLocalState := exec.Command("/usr/bin/pg_autoctl", "show", "state", "--local", "--json")
|
||||
cmdLocalState.Env = os.Environ()
|
||||
cmdLocalState.Env = state.env
|
||||
if localStateB, err := cmdLocalState.Output(); err != nil {
|
||||
log.WithError(err).Warn("Failed to obtain the local state from pg_autoctl")
|
||||
} else {
|
||||
@@ -256,12 +395,12 @@ func main() {
|
||||
// Note: We do keep producing configuration files as some volatile values (like
|
||||
// IP addresses) still may change
|
||||
if !monitorIsDown || len(instances) != 0 {
|
||||
if newState, err := config.configure(instances); err != nil {
|
||||
if newState, err := configure(instances); err != nil {
|
||||
log.WithFields(log.Fields{"instances": instances}).Warn("Failed to produce configuration from instances.")
|
||||
} else if !bytes.Equal(newState, lastSentStateJson) {
|
||||
// Send the new state
|
||||
lastSentStateJson = newState
|
||||
log.WithFields(log.Fields{"state": string(newState)}).Info("Sending configuration to the peer processes")
|
||||
log.WithFields(log.Fields{"state": string(newState)}).Info("Sending configuration to the peer processes (if any)")
|
||||
for _, i := range peers {
|
||||
if response, err := i.Post("http://unix/config", "application/octet-stream", bytes.NewReader(newState)); err != nil {
|
||||
log.WithError(err).WithFields(log.Fields{"peer": i, "response": response}).Warn("Failed to send configuration to a peer process.")
|
||||
@@ -271,18 +410,18 @@ func main() {
|
||||
}
|
||||
|
||||
// Send a periodic ping to the configuration worker
|
||||
if pingWaitCounter == CyclesPerPing {
|
||||
if pingWaitCounter == ExecutionCyclesPerPing {
|
||||
pingWaitCounter = 0
|
||||
log.Trace("Pinging configuration peer")
|
||||
log.Trace("Pinging configuration peers (if any)")
|
||||
for _, i := range peers {
|
||||
if response, err := i.Get("http://unix/ping"); err != nil {
|
||||
log.WithError(err).WithFields(log.Fields{"peer": i, "response": response}).Trace("Failed to ping to a peer process.")
|
||||
} else {
|
||||
var msg pgpoolPingMessage
|
||||
var msg pingMessage
|
||||
if err := json.NewDecoder(response.Body).Decode(&msg); err != nil {
|
||||
log.WithError(err).WithFields(log.Fields{"peer": i}).Warn("Failed to decode ping response.")
|
||||
} else {
|
||||
log.WithFields(log.Fields{"peer": i, "response": msg}).Trace("Ping response")
|
||||
log.WithFields(log.Fields{"response": msg}).Trace("Ping response")
|
||||
if msg.NeedsConfig {
|
||||
log.WithFields(log.Fields{"peer": i, "state": string(lastSentStateJson)}).Info("Sending configuration to a peer process")
|
||||
if response, err := i.Post("http://unix/config", "application/octet-stream", bytes.NewReader(lastSentStateJson)); err != nil {
|
||||
@@ -296,24 +435,73 @@ func main() {
|
||||
pingWaitCounter++
|
||||
}
|
||||
|
||||
time.Sleep(1000 * time.Millisecond)
|
||||
time.Sleep(ExecutionCycleSleep)
|
||||
}
|
||||
}()
|
||||
|
||||
// Start the inner executable (usually starting with "pg_autoctl create postgres" or "pg_autoctl create monitor")
|
||||
log.WithFields(log.Fields{"name": argsLeft[0], "args": argsLeft[1:]}).Info("Handling over to inner process.")
|
||||
cmd := exec.Command(argsLeft[0], argsLeft[1:]...)
|
||||
cmd.Env = os.Environ()
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
cmd.Run()
|
||||
|
||||
log.Info("pg_auto_failover load balancer has completed.")
|
||||
doComplete = true
|
||||
}
|
||||
|
||||
func (t *pgpoolConfiguration) configure(nodes []postgresInstance) ([]byte, error) {
|
||||
var msg pgpoolConfigMessage
|
||||
func ensurePostgresReadyForQueries() {
|
||||
for {
|
||||
log.Trace("Testing if we can query the Postgres database")
|
||||
testCmd := exec.Command("psql", "-c", "SELECT 0;")
|
||||
if err := testCmd.Run(); err == nil {
|
||||
break
|
||||
}
|
||||
time.Sleep(1000 * time.Millisecond)
|
||||
}
|
||||
log.Trace("Postgres database accepts queries")
|
||||
}
|
||||
|
||||
func ensurePgHbaConfReplaced(hbaConfPath string) {
|
||||
needsPgConfReload := false
|
||||
if hbaConfPath != "" {
|
||||
// Postgres refuses to initialize a data directory that is initially not empty
|
||||
targetDir := getEnvPgData()
|
||||
targetPath := filepath.Join(targetDir, PgHbaConfFileName)
|
||||
initDonePath := filepath.Join(targetDir, PostgresInitDoneFileName)
|
||||
log.WithFields(log.Fields{"path": targetDir}).Info("Ensuring Postgres data dir is initialized")
|
||||
for {
|
||||
if _, err := os.Stat(initDonePath); err == nil {
|
||||
break
|
||||
} else if !os.IsNotExist(err) {
|
||||
log.Fatal("Failed to access Postgres data directory")
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
if info, err := os.Stat(hbaConfPath); err != nil || info.IsDir() {
|
||||
log.WithError(err).Fatal("Cannot find the pg_hba.conf to be linked as requested by AUTOCONFIG_LINK_HBA_CONF")
|
||||
}
|
||||
|
||||
if info, err := os.Lstat(targetPath); (err != nil && os.IsNotExist(err)) || (err == nil && info.Mode().Type() != os.ModeSymlink) {
|
||||
// During secondary node initialization the pg_hba.conf is completely missing
|
||||
if !os.IsNotExist(err) {
|
||||
if err := os.Remove(targetPath); err != nil {
|
||||
log.Warn("Failed to remove the original pg_hba.conf")
|
||||
}
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{"path": hbaConfPath}).Info("Applying the requested pg_hba.conf")
|
||||
if err := os.Symlink(hbaConfPath, targetPath); err != nil {
|
||||
log.WithError(err).Fatal("Failed to sym-link the pg_hb.conf requested by AUTOCONFIG_LINK_HBA_CONF")
|
||||
}
|
||||
|
||||
needsPgConfReload = true
|
||||
}
|
||||
}
|
||||
|
||||
if needsPgConfReload {
|
||||
ensurePostgresReadyForQueries()
|
||||
|
||||
log.Info("Reloading Postgres pg_hba.conf")
|
||||
reloadCmd := exec.Command("psql", "-c", "SELECT 0 FROM pg_reload_conf();")
|
||||
if err := reloadCmd.Run(); err != nil {
|
||||
log.WithError(err).Fatal("Failed to reload Postgres configuration to apply new pg_hba.conf")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func configure(nodes []postgresInstance) ([]byte, error) {
|
||||
var msg configMessage
|
||||
for _, i := range nodes {
|
||||
var isPrimary bool
|
||||
switch i.AssignedState {
|
||||
@@ -331,7 +519,7 @@ func (t *pgpoolConfiguration) configure(nodes []postgresInstance) ([]byte, error
|
||||
// fixed. Therefore we do it ourselves and our peer application will
|
||||
// mark any IP addresses no longer used for Postgres nodes as down so
|
||||
// Pgpool happily uses the ones that are up.
|
||||
var a pgpoolInstance
|
||||
var a configInstance
|
||||
if ips, err := net.LookupIP(i.NodeHost); err != nil || len(ips) == 0 {
|
||||
log.WithError(err).WithFields(log.Fields{"host": i.NodeHost}).Warn("Failed resolve node's host name, skipping.")
|
||||
continue
|
||||
@@ -351,3 +539,37 @@ func (t *pgpoolConfiguration) configure(nodes []postgresInstance) ([]byte, error
|
||||
|
||||
return json.Marshal(msg)
|
||||
}
|
||||
|
||||
func getEnvOrDefault(name string, defaultValue string) string {
|
||||
if val := os.Getenv(name); val != "" {
|
||||
return val
|
||||
} else {
|
||||
return defaultValue
|
||||
}
|
||||
}
|
||||
|
||||
func getEnvOrDefaultBool(name string, defaultValue bool) bool {
|
||||
if valS := os.Getenv(name); valS != "" {
|
||||
valS = strings.ToLower(valS)
|
||||
if valS == "1" || valS == "yes" || valS == "true" {
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
return defaultValue
|
||||
}
|
||||
}
|
||||
|
||||
func getEnvOrDefaultInt(name string, defaultValue int) int {
|
||||
if val := os.Getenv(name); val != "" {
|
||||
if val2, err := strconv.Atoi(val); err == nil {
|
||||
return val2
|
||||
}
|
||||
}
|
||||
return defaultValue
|
||||
}
|
||||
|
||||
func getEnvPgData() string {
|
||||
return getEnvOrDefault("PGDATA", PostgresDataDefaultPath)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user