#!/bin/bash
#
# Copyright (c) 2010-2013 SUSE LLC, All Rights Reserved.
#
# Author: Tim Serong <tserong@suse.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like.  Any license provided herein, whether implied or
# otherwise, applies only to this software file.  Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#

. /usr/lib/ha-cluster-functions

declare SEED_HOST

usage()
{
	cat <<END
Usage: $0 [options] [stage]

Options:
    -c <host>   IP address or hostname of existing cluster node
    -i <if>     Default to IP address on interface 'if' instead of eth0
    -h,--help   Display this usage information
    -q          Be quiet (don't describe what's happening, just do it)
    -y          Answer "yes" to all prompts (use with care)

Stage can be one of:
    ssh         Obtain SSH keys from existing cluster node (requires -c <ip>)
    csync2      Configure csync2 (requires -c <ip>)
    ssh_merge   Merge root's SSH known_hosts across all nodes (csync2 must
                already be configured).
    cluster     Start the cluster on this node

If stage is not specified, each stage will be invoked in sequence.
END
	exit 0
}

join_ssh()
{
	[ -n "$SEED_HOST" ] || error "No existing IP/hostname specified (use -c option)"

	local -i got_keys=0

	start_service sshd.service

	invoke mkdir -m 700 -p /root/.ssh

	local tmp_dir=/tmp/ha-cluster-ssh.$$
	invoke mkdir -p $tmp_dir || error "Can't create temporary directory $tmp_dir"
	invoke rm -f $tmp_dir/*

	status "Retrieving SSH keys from $SEED_HOST"
	invoke scp -oStrictHostKeyChecking=no \
		root@$SEED_HOST:'/root/.ssh/id_*' $tmp_dir/ \
		|| error "Can't retrieve SSH keys from $SEED_HOST"

	# This supports all SSH key types, for the case where ha-cluster-init
	# wasn't used to set up the seed node, and the user has manually
	# created, for example, DSA keys (bnc#878080)
	for key in id_rsa id_dsa id_ecdsa id_ed25519 ; do
		[ -f $tmp_dir/$key ] || continue
		if [ -f /root/.ssh/$key ]; then
			confirm \
				"/root/.ssh/$key already exists - overwrite?" || continue
		fi
		invoke mv $tmp_dir/$key* /root/.ssh/
		grep -q -s "$(cat /root/.ssh/$key.pub)" /root/.ssh/authorized_keys \
			|| append /root/.ssh/$key.pub /root/.ssh/authorized_keys
		let got_keys=$got_keys+1
	done

	invoke rm -r $tmp_dir

	[ $got_keys -eq 0 ] && status "No new SSH keys installed"
	[ $got_keys -eq 1 ] && status "One new SSH key installed"
	[ $got_keys -gt 1 ] && status "$got_keys new SSH keys installed"

	# This makes sure the seed host has its own SSH keys in its own
	# authorized_keys file (again, to help with the case where the
	# user has done manual initial setup without the assistance of
	# ha-cluster-init).
	invoke ssh root@$SEED_HOST ha-cluster-init ssh_remote \
		|| error "Can't invoke ha-cluster-init ssh_remote on $SEED_HOST"
}

join_csync2()
{
	[ -n "$SEED_HOST" ] || error "No existing IP/hostname specified (use -c option)"

	status "Configuring csync2"

	# Necessary if re-running join on a node that's been configured before.
	invoke rm -f /var/lib/csync2/$(hostname).db3

	# Not automatically updating /etc/hosts - risky in the general case.
	#etc_hosts_add_me
	#local hosts_line=$(etc_hosts_get_me)
	#[ -n "$hosts_line" ] || error "No valid entry for $(hostname) in /etc/hosts - csync2 can't work"

	# If we *were* updating /etc/hosts, the next line would have "\"$hosts_line\"" as
	# the last arg (but this requires re-enabling this functionality in ha-cluster-init)
	invoke ssh root@$SEED_HOST ha-cluster-init csync2_remote $(hostname) \
		|| error "Can't invoke ha-cluster-init csync2_remote on $SEED_HOST"

	# This is necessary if syncing /etc/hosts (to ensure everyone's got the
	# same list of hosts)
	#local tmp_conf=/etc/hosts.$$
	#invoke scp root@$SEED_HOST:/etc/hosts $tmp_conf \
	#	|| error "Can't retrieve /etc/hosts from $SEED_HOST"
	#install_tmp $tmp_conf /etc/hosts

	invoke scp root@$SEED_HOST:'/etc/csync2/{csync2.cfg,key_hagroup}' \
		/etc/csync2 \
		|| error "Can't retrieve csync2 config from $SEED_HOST"

	start_service csync2.socket

	# Sync new config out.  This goes to all hosts; csync2.cfg definitely
	# needs to go to all hosts (else hosts other than the seed and the
	# joining host won't have the joining host in their config yet).
	# Strictly, the rest of the files need only go to the new host which
	# could theoretically be effected using `csync2 -xv -P $(hostname)`,
	# but this still leaves all the other files in dirty state (becuase
	# they haven't gone to all nodes in the cluster, which means a
	# subseqent join of another node can fail its sync of corosync.conf
	# when it updates expected_votes.  Grrr...
	invoke ssh root@$SEED_HOST "csync2 -mr / ; csync2 -fr / ; csync2 -xv" \
		|| warn "csync2 run failed - some files may not be sync'd"
}

join_ssh_merge()
{
	status "Merging known_hosts"

	local host_args=$(awk '/^[[:space:]]*host[[:space:]]/ { print "-H", $2 }' \
		$CSYNC2_CFG | sed -e 's/;//')

	[ -z "$host_args" ] && error "Unable to extract host list from $CSYNC2_CFG"

	local tmp_dir=/tmp/ha-cluster-pssh.$$
	invoke mkdir -p $tmp_dir || error "Can't create temporary directory $tmp_dir"
	invoke rm -f $tmp_dir/*
	# The act of using pssh to connect to every host (without strict host key
	# checking) ensures that at least *this* host has every other host in its
	# known_hosts
	invoke pssh $host_args -O StrictHostKeyChecking=no -o $tmp_dir \
		cat ~/.ssh/known_hosts
	local rc=$?
	# 0 == success, 5 == ssh OK but remote command failed - we're ignoring
	# this because it's most likely known_hosts doesn't exist.
	[ $rc -ne 0 -a $rc -ne 5 ] && warn "known_hosts collection may be incomplete"
	# Sort combines everything we grabbed with our latest local known_hosts
	sort -u $tmp_dir/* ~/.ssh/known_hosts > ~/.ssh/known_hosts.new
	invoke pscp $host_args -O StrictHostKeyChecking=no \
		~/.ssh/known_hosts.new ~/.ssh/known_hosts \
		|| warn "known_hosts merge may be incomplete"
	invoke rm ~/.ssh/known_hosts.new
	invoke rm -r $tmp_dir
}

join_cluster()
{
	# Need to do this if second (or subsequent) node happens to be up and
	# connected to storage while it's being repartitioned on the first node.
	probe_partitions

	# It would be massively useful at this point if new nodes could come
	# up in standby mode, so we could query the CIB locally to see if
	# there was any further local setup that needed doing, e.g.: creating
	# mountpoints for clustered filesystems.  Unfortunately we don't have
	# that yet, so the following crawling horror takes a punt on the seed
	# node being up, then asks it for a list of mountpoints...
	if [ -n "$SEED_HOST" ]; then
		local m
		local mtpts=$(ssh root@$SEED_HOST 'cibadmin -Q -A '\''//primitive[@class="ocf" and @provider="heartbeat" and @type="Filesystem"]/instance_attributes/nvpair[@name="directory"]'\'' 2>/dev/null | grep '\''<nvpair'\'' | sed '\''s/.*value="\([^"]*\)".*/\1/'\''')
		for m in $mtpts; do
			invoke mkdir -p $m
		done
	else
		status "No existing IP/hostname specified - skipping mountpoint detection/creation"
	fi

	# Bump expected_votes in corosync.conf
	# TODO(must): this is rather fragile (see related code in ha-cluster-remove)
	local tmp_conf=${COROSYNC_CONF}.$$
	local new_quorum=$(awk -F[^0-9] '/^[[:space:]]*expected_votes/ { print $NF + 1 };' $COROSYNC_CONF)
	local two_node=0
	[ $new_quorum -eq 2 ] && two_node=1
	sed \
		-e 's/^\([[:space:]]*expected_votes:[[:space:]]*\).*/\1'$new_quorum'/' \
		-e 's/^\([[:space:]]*two_node:[[:space:]]*\).*/\1'$two_node'/' \
		$COROSYNC_CONF > $tmp_conf
	install_tmp $tmp_conf $COROSYNC_CONF
	invoke csync2 -m $COROSYNC_CONF
	invoke csync2 -f $COROSYNC_CONF
	invoke csync2 -xv $COROSYNC_CONF

	# ...now that that's out of the way, let's initialize the cluster.
	init_cluster_local

        # Trigger corosync config reload to ensure expected_votes is propagated
	invoke corosync-cfgtool -R

	# Ditch no-quorum-policy=ignore if we're going over two nodes
	if [ $new_quorum -gt 2 ] && crm configure show | grep -q 'no-quorum-policy=.*ignore' ; then
		invoke crm_attribute --attr-name no-quorum-policy --delete-attr
	fi
}

#------------------------------------------------------------------------------

# for --help option
[ "$1" == "--help" ] && usage

while getopts 'c:i:hqy' o; do
	case $o in
	c) SEED_HOST=$OPTARG;;
	i) NET_IF=$OPTARG;;
	h) usage;;
	q) BE_QUIET=true;;
	y) YES_TO_ALL=true;;
	esac
done

shift $(($OPTIND - 1))

if [ "$1" == "ssh_test" ]; then
	# Check whether passwordless ssh works or not.  Not a "regular" stage (i.e.
	# never executed automatically, intended for use by external programs), so
	# currently undocumented in usage text or man page.  Requires -c <host>.

	[ -n "$SEED_HOST" ] || _die "No existing IP/hostname specified (use -c option)"
	ssh -oBatchmode=yes -oStrictHostKeyChecking=no \
		root@$SEED_HOST true 2>/dev/null
	exit $?
fi

if [ "$1" != "ssh" -a "$1" != "csync2" -a "$1" != "ssh_merge" ]; then
	systemctl -q is-active corosync.service
	rc=$?
	[ $rc -eq 0 ] && error "Cluster is currently active - can't run"
fi

# Need hostname resolution to work, want NTP
check_prereqs

case $1 in
ssh|csync2|ssh_merge|cluster)
	init
	join_$1
	;;
"")
	init

	if ! $YES_TO_ALL && [ -z "$SEED_HOST" ]; then
		status "
Join This Node to Cluster:
  You will be asked for the IP address of an existing node, from which
  configuration will be copied.  If you have not already configured
  passwordless ssh between nodes, you will be prompted for the root
  password of the existing node.
"
		SEED_HOST=$(prompt_for_string \
			'IP address or hostname of existing node (e.g.: 192.168.1.1)' \
			'.+')
	fi

	join_ssh
	join_csync2
	join_ssh_merge
	join_cluster
	;;
*)	echo -e "Invalid stage ($1)\n"
	usage
esac

status "Done (log saved to $LOG_FILE)"
exit 0
