#!/bin/bash

# Run commands on CTDB nodes.

# See http://ctdb.samba.org/ for more information about CTDB.

# Copyright (C) Martin Schwenke  2008

# Based on an earlier script by Andrew Tridgell and Ronnie Sahlberg.

# Copyright (C) Andrew Tridgell  2007

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
   
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
   
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.

prog=$(basename "$0")

usage ()
{
    cat >&2 <<EOF
Usage: onnode [OPTION] ... <NODES> <COMMAND> ...
  options:
    -c          Run in current working directory on specified nodes.
    -f          Specify nodes file, overrides CTDB_NODES_FILE.
    -i          Keep standard input open - the default is to close it.
    -n          Allow nodes to be specified by name.
    -o <prefix> Save standard output from each node to file <prefix>.<ip>
    -p          Run command in parallel on specified nodes.
    -P          Push given files to nodes instead of running commands.
    -q          Do not print node addresses (overrides -v).
    -v          Print node address even for a single node.
  <NODES>       "all", "any", "ok" (or "healthy"), "con" (or "connected") ; or
                a node number (0 base); or
                a hostname (if -n is specified); or
                list (comma separated) of <NODES>; or
                range (hyphen separated) of node numbers.
EOF
    exit 1

}

invalid_nodespec ()
{
    echo "Invalid <nodespec>" >&2 ; echo >&2
    usage
}

# Defaults.
current=false
parallel=false
verbose=false
quiet=false
prefix=""
names_ok=false
push=false
stdin=false

if [ -z "$CTDB_BASE" ] ; then
    CTDB_BASE="/usr/local/etc/ctdb"
fi

. "${CTDB_BASE}/functions"
loadconfig "ctdb"

parse_options ()
{
    # $POSIXLY_CORRECT means that the command passed to onnode can
    # take options and getopt won't reorder things to make them
    # options ot onnode.
    local temp
    # Not on the previous line - local returns 0!
    temp=$(POSIXLY_CORRECT=1 getopt -n "$prog" -o "cf:hno:pqvPi" -l help -- "$@")

    # No! Checking the exit code afterwards is actually clearer...
    # shellcheck disable=SC2181
    [ $? -eq 0 ] || usage

    eval set -- "$temp"

    while true ; do
	case "$1" in
	    -c) current=true ; shift ;;
	    -f) CTDB_NODES_FILE="$2" ; shift 2 ;;
	    -n) names_ok=true ; shift ;;
	    -o) prefix="$2" ; shift 2 ;;
	    -p) parallel=true ; shift ;;
	    -q) quiet=true ; shift ;;
	    -v) verbose=true ; shift ;;
	    -P) push=true ; shift ;;
	    -i) stdin=true ; shift ;;
	    --) shift ; break ;;
	    -h|--help|*) usage ;; # Shouldn't happen, so this is reasonable.
	esac
    done

    [ $# -lt 2 ] && usage

    nodespec="$1" ; shift
    command="$*"
}

echo_nth ()
{
    local n="$1" ; shift

    shift "$n"
    local node="$1"

    if [ -n "$node" -a "$node" != "#DEAD" ] ; then
	echo "$node"
    else
	echo "${prog}: \"node ${n}\" does not exist" >&2
	exit 1
    fi
}

parse_nodespec ()
{
    # Subshell avoids hacks to restore $IFS.
    (
	IFS=","
	for i in $1 ; do
	    case "$i" in
		*-*) seq "${i%-*}" "${i#*-}" 2>/dev/null || invalid_nodespec ;;
		all|any|ok|healthy|con|connected) echo "$i" ;;
		*)
		    [ "$i" -gt -1 ] 2>/dev/null || $names_ok || invalid_nodespec
		    echo "$i"
	    esac
	done
    )
}

ctdb_status_output="" # cache
get_nodes_with_status ()
{
    local all_nodes="$1"
    local status="$2"

    if [ -z "$ctdb_status_output" ] ; then
	ctdb_status_output=$(ctdb -X status 2>&1)
	# No! Checking the exit code afterwards is actually clearer...
	# shellcheck disable=SC2181
	if [ $? -ne 0 ] ; then
	    echo "${prog}: unable to get status of CTDB nodes" >&2
	    echo "$ctdb_status_output" >&2
	    exit 1
	fi
	local nl="
"
	ctdb_status_output="${ctdb_status_output#*${nl}}"
    fi

    (
	local i
	IFS="${IFS}|"
	while IFS="" read i ; do

	    # Intentional word splitting
	    # shellcheck disable=SC2086
	    set -- $i # split line on colons
	    shift     # line starts with : so 1st field is empty
	    local pnn="$1" ; shift
	    shift # ignore IP address but need status bits below

	    case "$status" in
		healthy)
		    # If any bit is 1, don't match this address.
		    local s
		    for s ; do
			[ "$s" != "1" ] || continue 2
		    done
		    ;;
		connected)
		    # If disconnected bit is not 0, don't match this address.
		    [ "$1" = "0" ] || continue
		    ;;
		*)
		    invalid_nodespec
	    esac

	    # Intentional multi-word expansion
	    # shellcheck disable=SC2086
	    echo_nth "$pnn" $all_nodes
	done <<<"$ctdb_status_output"
    )
}

get_any_available_node ()
{
    local all_nodes="$1"

    # We do a recursive onnode to find which nodes are up and running.
    local out line
    out=$("$0" -pq all ctdb pnn 2>&1)
    while read line ; do
	if [[ "$line" =~ ^[0-9]+$ ]] ; then
	    local pnn="$line"
	    # Intentional multi-word expansion
	    # shellcheck disable=SC2086
	    echo_nth "$pnn" $all_nodes
	    return 0
	fi
	# Else must be an error message from a down node.
    done <<<"$out"
    return 1
}

get_nodes ()
{
    local all_nodes

    if [ -n "$CTDB_NODES_SOCKETS" ] ; then 
	all_nodes="$CTDB_NODES_SOCKETS"
    else
	local f="${CTDB_BASE}/nodes"
	if [ -n "$CTDB_NODES_FILE" ] ; then
	    f="$CTDB_NODES_FILE"
	    if [ ! -e "$f" -a "${f#/}" = "$f" ] ; then
		# $f is relative, try in $CTDB_BASE
		f="${CTDB_BASE}/${f}"
	    fi
	elif [ -n "$CTDB_NODES" ] ; then
	    f="$CTDB_NODES"
	fi

	if [ ! -r "$f" ] ; then
	    echo "${prog}: unable to open nodes file  \"${f}\"" >&2
	    exit 1
	fi

	all_nodes=$(sed -e 's@#.*@@g' -e 's@ *@@g' -e 's@^$@#DEAD@' "$f")
    fi

    local n nodes
    nodes=$(parse_nodespec "$1") || exit $?
    for n in $nodes ; do
	case "$n" in
	    all)
		echo "${all_nodes//#DEAD/}"
		;;
	    any)
		get_any_available_node "$all_nodes" || exit 1
		;;
	    ok|healthy) 
		get_nodes_with_status "$all_nodes" "healthy" || exit 1
		;;
	    con|connected) 
		get_nodes_with_status "$all_nodes" "connected" || exit 1
		;;
	    [0-9]|[0-9][0-9]|[0-9][0-9][0-9])
		# Intentional multi-word expansion
		# shellcheck disable=SC2086
		echo_nth "$n" $all_nodes
		;;
	    *)
		$names_ok || invalid_nodespec
		echo "$n"
	esac
    done
}

push()
{
    local host="$1"
    local files="$2"

    local f
    for f in $files ; do
        $verbose && echo "Pushing $f"
        case "$f" in
	    /*) rsync "$f" "[${host}]:${f}" ;;
	    *)  rsync "${PWD}/${f}" "[${host}]:${PWD}/${f}" ;;
	esac
    done
}

fakessh ()
{
    CTDB_SOCKET="$1" sh -c "$2" 3>/dev/null
}

stdout_filter ()
{
    if [ -n "$prefix" ] ; then
	cat >"${prefix}.${n//\//_}"
    elif $verbose && $parallel ; then
	sed -e "s@^@[$n] @"
    else
	cat
    fi
}

stderr_filter ()
{
    if $verbose && $parallel ; then
	sed -e "s@^@[$n] @"
    else
	cat
    fi
}

######################################################################

parse_options "$@"

ssh_opts=
if $push ; then
    SSH=push
    EXTRA_SSH_OPTS=""
else
    $current && command="cd $PWD && $command"

    if [ -n "$CTDB_NODES_SOCKETS" ] ; then
	SSH=fakessh
	EXTRA_SSH_OPTS=""
    else 
	# Could "2>/dev/null || true" but want to see errors from typos in file.
	[ -r "${CTDB_BASE}/onnode.conf" ] && . "${CTDB_BASE}/onnode.conf"
	[ -n "$SSH" ] || SSH=ssh
	if [ "$SSH" = "ssh" ] ; then
	    if $parallel || ! $stdin ; then
		ssh_opts="-n"
	    fi
	else
	    : # rsh? All bets are off!
	fi
    fi
fi

######################################################################

nodes=$(get_nodes "$nodespec") || exit $?

if $quiet ; then
    verbose=false
else
    # If $nodes contains a space or a newline then assume multiple nodes.
    nl="
"
    [ "$nodes" != "${nodes%[ ${nl}]*}" ] && verbose=true
fi

pids=""
# Intentional multi-word expansion
# shellcheck disable=SC2086
trap 'kill -TERM $pids 2>/dev/null' INT TERM
# There's a small race here where the kill can fail if no processes
# have been added to $pids and the script is interrupted.  However,
# the part of the window where it matter is very small.
retcode=0
for n in $nodes ; do
    set -o pipefail 2>/dev/null

    # The following code applies stdout_filter and stderr_filter to
    # the relevant streams.  Both filters are at the end of pipes so
    # they read from stdin and (by default) write to stdout.  To allow
    # the filters to operate independently, the output of
    # stdout_filter is sent to a temporary file descriptor (3), which
    # is redirected back to stdout at the outermost level.
    if $parallel ; then
	{
	    exec 3>&1
	    {
		$SSH $ssh_opts $EXTRA_SSH_OPTS "$n" "$command" |
		    stdout_filter >&3
	    } 2>&1 | stderr_filter
	} &
	pids="${pids} $!"
    else
	if $verbose ; then
	    echo >&2 ; echo ">> NODE: $n <<" >&2
	fi

	{
	    exec 3>&1
	    {
		$SSH $ssh_opts $EXTRA_SSH_OPTS "$n" "$command" |
		    stdout_filter >&3
	    } 2>&1 | stderr_filter
	} || retcode=$?
    fi
done

if $parallel ; then
    for p in $pids; do
	wait "$p" || retcode=$?
    done
fi

exit $retcode
