IBM Books

Group Services Programming Guide and Reference


The sample_schg.c sample program

/* IBM_PROLOG_BEGIN_TAG                                                   */
/* This is an automatically generated prolog.                             */
/*                                                                        */
/*                                                                        */
/*                                                                        */
/* Licensed Materials - Property of IBM                                   */
/*                                                                        */
/* (C) COPYRIGHT International Business Machines Corp. 1996,2001          */
/* All Rights Reserved                                                    */
/*                                                                        */
/* US Government Users Restricted Rights - Use, duplication or            */
/* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.      */
/*                                                                        */
/* IBM_PROLOG_END_TAG                                                     */
 
static char *sccsid = "@(#)92   1.7   src/rsct/pgs/samples/sample_schg.c, gssamples, rsct_r43x 5/14/01 09:43:13";
 
#if !defined(_HAGSD_COPYRIGHT_H)
#define _HAGSD_COPYRIGHT_H
static char copyright[] = "Licensed Materials - Property of IBM\n\
(C) COPYRIGHT International Business Machines Corp. 1996,2001.\n\
All Rights Reserved.\n\
US Government Users Restricted Rights - Use, duplication or \n\
disclosure restricted by GSA ADP Schedule Contract with IBM Corp.\n";
#endif
 
#ifdef __linux__
#define NFDS(a) a
#endif
 
/*********************************************************************/
/*
 * Name:  sample_schg.c
 *
 * This module provides a non-interactive program that can be used as
 * an example for constructing an application that exploits the Group
 * Services interfaces provided by IBM PSSP.
 *
 * Components:
 *   sample_schg.c - is the bulk of the program, and contains the main()
 *      function, as well as all necessary callback functions, and some
 *      internal utility functions for the application.
 *
 *   sample_utility.c - provides the definitions for the utility functions
 *      used by the sample_test and sample_schg programs.
 *
 *   sample_utility.h - declarations for the utility functions contained
 *      in sample_utility.c
 *
 * This program is intended as a "simple" Group Services application.
 * When started, it will:
 *
 *   - read and validate the command-line arguments to set various
 *      options, such as group name and timing options.
 *   - attempt to initialize itself as a Group Services client.
 *   - once initialized, attempt to join the group as a provider.
 *   - once joined, each provider will use the timing control given
 *      and will propose a group state value change at each desired
 *      time interval.
 *   - this continues until the program is killed (via kill command,
 *      or otherwise sending it a signal.)
 *
 * The group name, protocol controls, and the various timing aspects
 * of the program are controllable via command-line arguments.
 *
 * This program will write out informational messages as it goes along
 * on stdout.  It is recommended to run this program in the background,
 * and to redirect stdout to a file, and monitor the program by
 * monitoring the file.
 */
/*********************************************************************/
 
/*********************************************************************/
/*
 * To build this program, use the Makefile in this directory:
 *
 *      make all (to build all sample programs)
 * or
 *      make sample_schg
 *
 * To execute this program, you need to have Group Services active on
 * your system.  Please refer to the manual if you have questions as to
 * how you may verify this.  Additionally, you need to execute as a
 * privileged (root) process.  Also, you should take care in the group
 * names you use, to avoid clashing with other groups.
 *
 * Prior to starting this program, you need to set the name of your SP
 * partition in the environment of the process running the program.
 * You should export the variable HA_SYSPAR_NAME to be the name of the
 * partition.  Contact your system administrator if you do not know how
 * to determine this.
 *
 * Once all this is done, you can then start up copies of this program
 * on as many nodes as desired.  All of those with the same group name
 * will attempt to join the same group.  For these, you should ensure
 * that any specified protocol control arguments match, to ensure that
 * the group attributes all match.  If this is not done, then not all
 * copies of the program will be allowed to join the group.
 */
/*********************************************************************/
 
/*********************************************************************/
/*
 * Command-line arguments:
 *
 *  -g <groupname> -- name of the group this process should join.
 *   Default is "SampleGroup01".
 *
 *  -t <time interval (in seconds)> -- number of seconds between attempts
 *      to submit a state value change proposal.
 *   Default is 10.
 *
 *  -P <n-phase pctg> -- percentage of the state value change proposals
 *      that should be n-phase.  Remainder will be 1-phase.
 *   Default is 0 (i.e., all state value change proposals will be 1-phase.)
 *
 *  -T <state time limit> -- voting time limit for n-phase state value
 *      change proposals.
 *   Default is 0.
 *
 *  -r -- randomize the time between making state value change proposals?
 *      If this is given, then the actual time interval between proposals
 *      will use a value inclusive of 1 second as the minimum, and (2 * the
 *      base time interval (specified by the '-t' flag)) as the maximum.
 *   Default is to not randomize.
 *
 *  -s <seed> -- random number seed.  If not specified, then the program
 *      will use the TOD clock to get a seed.  This allows you to set up
 *      "random" runs with recreatable series of proposal timing *for any
 *      one process*.  Note that if you are running multiple providers in
 *      a group, due to variations in message timing, there is no guarantee
 *      that the overall group timing and protocol execution is guaranteed
 *      to be recreated.
 *
 *  -j <join/failure num phases> -- number of phases (1-phase or n-phase)
 *      for join and failure leave protocols.  Specify '1' (the default) to
 *      specify 1-phase protocols, anything else for n-phase protocols.
 *   Default is 1-phase.
 *
 *  -p <provider instance number> -- provider instance number for this
 *      process.
 *   Default is 1.
 *
 *  -f <log print frequency> -- how many times per hour should a log status
 *      message be written, assuming debug level (see -d flag) is 1.
 *   Default is 2 (i.e., every 30 minutes write a status report message.)
 *
 *  -n <notify script> -- path name to "notify" script.  Used by the internal
 *      notify() routine to attempt to send information to interested users.
 *   Default is /usr/sbin/rsct/samples/hags/notify.
 *
 *  -d <debug level> -- amount of data to dump out.  The program will
 *      write out join and failure notifications, and using the '-f'
 *      flag, periodic status reports (e.g., number of protocols submitted
 *      and executed, etc.)  The debug settings:
 *      1 -- default, as above.
 *      2 -- as above, plus every protocol submission.
 *      3 -- as for 1 & 2, plus write out every notification received in
 *            full.
 *   Default is 1.
 */
/*********************************************************************/
 
/*********************************************************************/
/*
 * Include "standard" system files.  Note that pthread.h must be the
 * first file included, if it is to be included (see the standard AIX
 * documentation for more information about AIX thread support.)
 */
/*********************************************************************/
 
#include <stdio.h>
#include <stdlib.h>
#include <sys/select.h>
#include <sys/time.h>
#define _XOPEN_EXTENDED_SOURCE          /* AIX 4.1 */
#include <arpa/inet.h>
#undef _XOPEN_EXTENDED_SOURCE           /* AIX 4.1 */
#include <signal.h>
#include <errno.h>
#include <strings.h>
#include <memory.h>
#include <time.h>
 
/*********************************************************************/
/*
 * Include the Group Services declarations file.
 */
/*********************************************************************/
 
#include <ha_gs.h>
 
/*********************************************************************/
/*
 * Include the set of declarations for external utility functions for
 * this program.
 */
/*********************************************************************/
 
#include "sample_utility.h"
 
/*********************************************************************/
/*
 * The set of callback functions used by this program.
 */
void n_phase_cb(const ha_gs_n_phase_notification_t *note);
void approved_cb(const ha_gs_approved_notification_t *note);
void rejected_cb(const ha_gs_rejected_notification_t *note);
void my_delayed_error_cb(const ha_gs_delayed_error_notification_t *note);
void announce_cb(const ha_gs_announcement_notification_t *note);
 
void query_cb(const ha_gs_query_notification_t *note);
void delayed_error_cb(const ha_gs_delayed_error_notification_t *note);
ha_gs_callback_rc_t responsive_cb(const ha_gs_responsiveness_notification_t *note);
 
/*********************************************************************/
/*
 * Prepare a protocol proposal for submission.
 */
int     submit_state_change_proposal(void);
 
/*********************************************************************/
/*
 * Internal utility functions.
 */
void    notify( char *message );
int     get_node_number(void);
void    print_usage(void);
 
/*********************************************************************/
/*
 * Global variables.
 */
 
/*
 * Keep statistics.
 */
/*
 * Number of state value change proposals submitted, number that were
 * 1-phase/n-phase.
 */
int	nSubmitted, nSubmitted1Phase, nSubmittedNPhase;
/*
 * Number of our proposals that were returned due to collisions.
 */
int	nCollided, nSynchCollides;
/*
 * Number of state value change protocols that have executed in the
 * group and that we have participated in.
 */
int     nStateChangesSeen, nStateChanges1Phase, nStateChangesNPhase;
 
/*
 * Keep track of group membership, state value.
 */
int     state;                          /* Current group state value, from the */
                                        /*  last successfully-executed protocol. */
int     proposed_state;                 /* Our to-be-proposed state value. */
int     joined;                         /* Have we joined the group yet? */
struct  timeval proposal_time;          /* Keep track of time last proposal */
                                        /*  submitted. */
 
/*
 * These are the variables that hold values determined by the various
 * command-line flags.  Most are initialized here, but may be overridden
 * once we parse the command line.
 */
char   *theGroupName = "SampleGroup01"; /* -g flag: name of the group we should */
                                        /*  join. */
 
int     testInterval = 10;              /* -t flag: number of seconds between */
                                        /*  making state value change proposals. */
                                        /*  Used as time limit for select() call. */
 
int     providerInstanceNumber = 1;	/* -p flag: provider instance number for */
                                        /*  this process. */
 
int     debug = 1;                      /* -d flag: debug level.  Higher dumps */
                                        /* more info. */
 
int     freq = 2;                       /* -f flag: how many times per hour to */
                                        /*  print out status messages to log? */
int     printfreq;                      /* How often to print log msg? */
                                        /*  calculated as number of log prints/hour */
 
int     randomize = 0;                  /* -r flag: should the wait time between */
                                        /*  making state value change proposals */
                                        /*  be randomized or fixed? */
 
int     nphasestates = 0;               /* -P flag: percentag of time to make our */
                                        /*  state value change proposals n-phase. */
                                        /*  Remainder will be 1-phase. */
 
int     stateTimeLimit = 0;             /* -T flag: voting time limit for n-phase */
                                        /*  state value change protocols. */
 
ha_gs_num_phases_t jfphases = HA_GS_1_PHASE; /* -j flag: number of phases for */
                                             /*  join and failure protocols. */
 
/*
 * The "notify" script is a small shell script that can be modified
 * to mail important error/status messages to an interested user.
 * If you want to place this script elsewhere, you can dynamically
 * specify the path name via the '-n' command-line flag.
 *
 * The script is used by the notify() routine in this program.
 */
char   *theNotifyScript = "/usr/sbin/rsct/samples/hags/notify";
 
/*
 * The following are set during our join protocol.
 */
ha_gs_provider_t        ourProviderId;  /* Our provider ID, returned asynchronously */
                                        /*  when our join protocol executes. */
 
ha_gs_token_t           provider_token; /* Our provider token, returned */
                                        /*  synchronously by ha_gs_join(). */
 
/*
 * The arguments, program name, handy buffer.
 */
char   *args;
char   *progname;
char    buffer[256];
 
/*********************************************************************/
/*
 * Main() function.  Parse command-line arguments, initialize with Group
 * Services, join the group, then drop into the while() loop until we
 * get killed.  Use select() to wait for messages from Group Services,
 * and propose state value changes every so often.  Simple as that.
 */
int main(int argc, char ** argv)
{
    ha_gs_descriptor_t     socket_fd;
    ha_gs_responsiveness_t responsiveness = {HA_GS_PING_RESPONSIVENESS,
                                             3600,
                                             10,
                                             (char *)0,
                                             0};
    char               *script="/dev/null";
    ha_gs_rc_t          rc;
    ha_gs_proposal_info_t info;
    char                input;
    char               *function;
    time_t		overdue;
    char                theArgs[1024];
 
    int         i, found;
    int         phase;
    int		init_tries;
#define		MAX_TRIES	10
    int         argCtr;
    int         seedGiven;
    unsigned int        randSeed;
    struct timeval  current_time;
    struct timezone tz;
    float       p;
 
    /* select stuff */
    int         select_rc;
    int         highestDescriptor;
    int         howMany;
    fd_set      socketsForSelect; /* Maintain all registered sockets in mask. */
    fd_set      socketSelectMask; /* Used for the actual select. */
    struct      timeval nextJob;  /* Wait time for select. */
 
    seedGiven = 0;
 
    /*
     * Parse the command-line arguments.  Copy them to "args" to preserve
     * them so we can easily write them to the log.
     */
    progname = argv[0];
    args = theArgs;
    args[0] = '\0';
    for(argCtr=1;argCtr < argc;argCtr++) {
        if(!strcmp( argv[argCtr], "-g")) {
            theGroupName = argv[++argCtr];
            strcat(args, " -g ");
            strcat(args, theGroupName);
        } else if(!strcmp( argv[argCtr], "-n")) {
            theNotifyScript = argv[++argCtr];
            strcat(args, " -n ");
            strcat(args, theNotifyScript);
        } else if(!strcmp( argv[argCtr], "-t")) {
            testInterval = atoi( argv[++argCtr]);
            strcat(args, " -t ");
            strcat(args, argv[argCtr]);
        } else if (!strcmp(argv[argCtr], "-p")) {
            providerInstanceNumber = atoi( argv[++argCtr]);
            strcat(args, " -p ");
            strcat(args, argv[argCtr]);
        } else if (!strcmp(argv[argCtr], "-d")) {
            debug = atoi( argv[++argCtr]);
            strcat(args, " -d ");
            strcat(args, argv[argCtr]);
        } else if (!strcmp(argv[argCtr], "-j")) {
            phase = atoi( argv[++argCtr]);
            if (1 == phase) {
                jfphases = HA_GS_1_PHASE;
            } else {
                jfphases = HA_GS_N_PHASE;
            }
            strcat(args, " -j ");
            strcat(args, argv[argCtr]);       
        } else if (!strcmp(argv[argCtr], "-f")) {
            freq = atoi( argv[++argCtr]);
            strcat(args, " -f ");
            strcat(args, argv[argCtr]);
        } else if (!strcmp(argv[argCtr], "-s")) {
            randSeed = atoi( argv[++argCtr]);
            seedGiven = 1;
            strcat(args, " -s ");
            strcat(args, argv[argCtr]);
        } else if (!strcmp(argv[argCtr], "-r")) {
            randomize = 1;
            strcat(args, " -r ");
        } else if (!strcmp(argv[argCtr], "-P")) {
            nphasestates = atoi( argv[++argCtr] );
            strcat(args, " -P ");
            strcat(args, argv[argCtr]);
        } else if (!strcmp(argv[argCtr], "-T")) {
            stateTimeLimit = atoi( argv[++argCtr] );
            strcat(args, " -T ");
            strcat(args, argv[argCtr]);
        } else {
            print_usage();
            exit(3);
        }
    }
 
    /*
     * Calculate time between logging via determining number of state changes per hour
     * divided by number of log entries to be made per hour.  Ensure no less than one.
     * Use this to then cut log entry every "printfreq" state changes.
     */
    if (0 >= freq) {
        printfreq = 1;
    } else {
        printfreq = ((3600 / testInterval) / freq);
        if (0 >= printfreq) {
            printfreq = 1;
        }
    }
 
    /*
     * If no random seed given (-s) then use the (TOD seconds * node_number).
     * In any case, print out the seed being used in case we want to use it
     * again, whatever it is.
     */
    if (!seedGiven) {
	gettimeofday(&current_time, &tz);
        randSeed = current_time.tv_sec * get_node_number();
    }
    srandom(randSeed);
    printf("Random seed is [%d]\n", randSeed);
    fflush(stdout);
 
    /*
     * This variable tells us how many seconds to wait between making each
     * state value change proposal.  Note that we may override this later,
     * if we are "randomizing" this time interval.
     */
    nextJob.tv_sec = testInterval;
    nextJob.tv_usec = 0;
 
    /*
     * No sockets yet.
     */
    highestDescriptor = 0;
    FD_ZERO(&socketsForSelect);
 
    /*
     * This outer while() loop is used in case we have to reconnect
     * to Group Services.
     *
     * In this loop, try to initialize (ha_gs_init()).  Once we
     * do that, set up our select() handling information (socket
     * file descriptor, etc.).
     *
     * Then, attempt to join the group.
     *
     * After that, fall into the inner loop, which is basically just a
     * select(), where we wait to either receive messages from Goup
     * Services, or time out when it is time to make our next state value
     * change proposal.
     */
    while(1) {
 
	joined = 0;                     /* Not yet joined to the group. */
	state = 0;                      /* Start with a state value of 0. */
	proposal_time.tv_sec = 0;       /* No outstanding proposal. */
 
	/*
         * Attempt to initialize with Group Services.
         */
	for( init_tries = 1; MAX_TRIES > init_tries; init_tries++ ) {
            rc = ha_gs_init(&socket_fd,
                            HA_GS_SOCKET_NO_SIGNAL,
                            &responsiveness,
                            script,
                            responsive_cb,
                            my_delayed_error_cb,
                            query_cb);
            /*
             * Set up to use the select() system call.
             */
            if (HA_GS_OK == rc) {
                FD_SET(socket_fd, &socketsForSelect);
                if (socket_fd > highestDescriptor) {
                    highestDescriptor = socket_fd;
                }
                break;
            } else if (HA_GS_EXISTS == rc) {
                printf("You have already initialized!  Why do it again?\n");
                break;
            } else {
                printf("Bad news - ha_gs_init() returned rc:[%s]\n", write_an_rc(rc));
                sleep(10);
            }
        }
	if ( MAX_TRIES <= init_tries ) {
            sprintf(buffer, "ABORT \"Can't get started after %d tries!\n\"", init_tries );
            notify( buffer );
            exit(rc);
	}
        fflush(stdout);
 
        /*
         * Prepare group attributes, join/failure phases set by
         * command-line 'j' argument.
         */
	gattr[1] = malloc(sizeof(ha_gs_group_attributes_t));
	gattr[1]->gs_version = 1;
	gattr[1]->gs_sizeof_group_attributes = sizeof(ha_gs_group_attributes_t);
	gattr[1]->gs_client_version = 1;
	gattr[1]->gs_batch_control = HA_GS_BATCH_BOTH;
	gattr[1]->gs_num_phases = jfphases;
	gattr[1]->gs_source_reflection_num_phases = HA_GS_1_PHASE;
	gattr[1]->gs_group_default_vote = HA_GS_VOTE_APPROVE;
	gattr[1]->gs_merge_control = HA_GS_DISSOLVE_MERGE;
	gattr[1]->gs_time_limit = 0;
	gattr[1]->gs_source_reflection_time_limit = 0;
	gattr[1]->gs_group_name = theGroupName;
	gattr[1]->gs_source_group_name = NULL;
        instance_numbers[1] = providerInstanceNumber;
 
	write_join_information(1);
 
	info.gs_join_request.gs_group_attributes = gattr[1];
        info.gs_join_request.gs_provider_instance = providerInstanceNumber;
	info.gs_join_request.gs_provider_local_name = "SampleProvider";
	info.gs_join_request.gs_n_phase_protocol_callback = n_phase_cb;
	info.gs_join_request.gs_protocol_approved_callback = approved_cb;
	info.gs_join_request.gs_protocol_rejected_callback = rejected_cb;
	info.gs_join_request.gs_announcement_callback = announce_cb;
	info.gs_join_request.gs_merge_callback = NULL;
 
        /*
         * Attempt to join the group.
         */
	rc = ha_gs_join(&gid[1], &info);
 
        /*
         * Save the provider token returned by ha_gs_join().
         */
	provider_token = gid[1];
 
        printf("ha_gs_join returned rc:[%s]\n", write_an_rc(rc));
 
        rc = HA_GS_OK;
 
        /*
         * This inner while() loop will simply wait on select() for
         * notifications to arrive, and when ready, will attempt to
         * submit a state value change proposal.
         */
        while (HA_GS_NOT_OK != rc) {
 
            /*
             * Determine the random interval until we should attempt to
             * make the our next state value change proposal.
             */
            if ( randomize ) {
                nextJob.tv_sec = randomn( 2 * testInterval );
            }
 
            /*
             * select().  See AIX documentation.
             *
             * The "nextJob" parameter is at timeval struct.  It is set to
             * the number of seconds until our next state value change
             * proposal should be made.  If no input appears on our socket
             * connection from Group Services, then we will wake up and
             * go to make the proposal.
             */
            memcpy(&socketSelectMask, &socketsForSelect, sizeof(socketsForSelect));
            select_rc = select(highestDescriptor + 1,
                               &socketSelectMask,
                               0,
                               0,
                               &nextJob);
 
            /*
             * If anything other than "select interrupted (EINTR)" just exit.
             */
            if (select_rc < 0) {
                if (errno == EINTR) {
                    printf("Got EINTR during the select.\n");
                    continue;
                } else {
                    perror("Error on select");
                    exit(errno);
                }
            } else if (0 < (howMany = NFDS(select_rc))) {
                /*
                 * Input arrived on socket.  Should only ever be our one
                 * socket.
                 */
                if (1 < howMany) {
                    printf("Input on more than our socket??  Have [%d]!\n",
                           howMany);
                    exit(howMany);
                }
                /*
                 * OK so far, call ha_gs_dispatch() and process the message(s).
                 */
	    	rc = ha_gs_dispatch(HA_GS_NON_BLOCKING);
                if (HA_GS_OK != rc) {
                    printf("Bad news, bad return code from dispatch[%s]!  Exiting.\n",
                           write_an_rc(rc));
 
                    /*
                     * Something is wrong.  Drop out of inner loop, and then
                     * reinitialize and start over.
                     */
                    break;	
                }
            } else {
                if (!joined) {
                    /*
                     * We have not yet seen the "approved" notification for our join
                     * request, so we cannot yet handle any other proposals.  Just
                     * wait some more.
                     */
                    printf("Select timed out, not joined, nothing arrived in [%d] seconds.  Try again.\n",
                           nextJob.tv_sec);
                } else {
                    /*
                     * We are part of the group, so it is time to make a
                     * proposal.  Go do it.
                     */
                    submit_state_change_proposal();
                }
                fflush(stdout);
            }
        }                               /* end while(HA_GS_NOT_OK != rc) */
 
        /*
         * Close current connection to Group Services.
         */
        rc = ha_gs_quit();
 
        printf("ha_gs_quit returned rc:[%s]\n", write_an_rc(rc));
 
        if ( HA_GS_OK != rc ) {
            sprintf(buffer,
                    "ERROR \"ha_gs_quit returned rc:[%s]\n\"",
                    write_an_rc(rc));
            notify( buffer );
        }
 
        /*
         * Need to remove the "old" socket before establishing a new one.
         */
        FD_CLR(socket_fd, &socketsForSelect);
    }                                   /* end while(1) */
}                                       /* end main() */
 
/*********************************************************************/
/*
 * Our timer has popped, and is time to submit a state value change
 * proposal for our group.  Determine whether we should submit a 1-phase
 * or n-phase proposal, optionally display the current counts, and then
 * prepare the data to call ha_gs_change_state_value().
 *
 * The data submitted will be the 'current group state value + 1'.
 *
 * Note that if there are multiple providers in the group, then it is
 * possible for our proposal to be returned with a delayed error due to
 * a protocol collision (delayed error code of HA_GS_COLLIDE).  That is
 * fine, as that means another provider's proposal will have executed
 * to increment the state value.  We worry about that later, in the
 * delayed error callback (my_delayed_error_cb()).
 */
int submit_state_change_proposal(void)
{
    ha_gs_proposal_info_t info;
    ha_gs_num_phases_t    phases;
    ha_gs_token_t         group_token;
 
    int         rc;
    struct      timezone  tz;
    int         nphase;
 
    proposed_state = state + 1;         /* Proposed new state value. */
 
    /*
     * Use random number to determine how many phases.
     */
    if (randomn(100) >= nphasestates) {
        phases = HA_GS_1_PHASE;
        nSubmitted1Phase++;
    } else {
        phases = HA_GS_N_PHASE;
        nSubmittedNPhase++;
    }
 
    gettimeofday(&proposal_time, &tz);
 
    /*
     * We do not normally display every proposal, but instead just "every
     * so often".  However, if 'debug' is set, then always display.
     */
    if((2 <= debug) ||
       (0 == (nStateChangesSeen % printfreq)))
        printf("state changes: submitted/seen(1-/n-phase) %d(%d/%d)/%d(%d/%d) collided %d.\n",
               nSubmitted,
               nSubmitted1Phase,
               nSubmittedNPhase,
               nStateChangesSeen,
               nStateChanges1Phase,
               nStateChangesNPhase,
               nCollided);
    if((2 <= debug) ||
       (0 == (nStateChangesSeen % printfreq)))
        printf(" submit %s state change to %d on %s",
               ((HA_GS_1_PHASE == phases) ? "1-phase" : "N-phase"),
               proposed_state,
               ctime( (time_t *) &proposal_time.tv_sec ) );
 
    /*
     * Prepare the proposal data for the call to ha_gs_change_state_value().
     */
    info.gs_state_change_request.gs_num_phases = phases;
    info.gs_state_change_request.gs_time_limit = stateTimeLimit;
    info.gs_state_change_request.gs_new_state.gs_length = sizeof(int);
    info.gs_state_change_request.gs_new_state.gs_state =
        (char *) & proposed_state;
    nSubmitted++;
 
    /*
     * Submit the request to the Group Services library code.
     */
    rc = ha_gs_change_state_value( provider_token, &info );
 
    /*
     * HA_GS_OK says that the proposal was accepted, but does NOT necessarily
     * indicate that it will be executed (it may be returned asynchronously
     * via a delayed error).  But, HA_GS_OK does indicate that there were no
     * syntactical problems with the proposal.
     *
     * At this point, HA_GS_COLLIDE indicates that another protocol is already
     * executing in the group.  It is rather rare to have timing such that we
     * get this, but, since we have providers distributed on many nodes, it is
     * possible.
     *
     * Any other return codes here indicate serious problems with the program...
     */
    if((3 <= debug) ||
       ((HA_GS_OK != rc) &&
        (HA_GS_COLLIDE != rc))) {
        sprintf(buffer,
                "ha_gs_change_state_value returned rc [%s]!\n\"",
                write_an_rc(rc));
        notify(buffer);
        printf("ha_gs_change_state_value returned rc:[%s]\n",
               write_an_rc(rc));
        if (HA_GS_COLLIDE == rc) {
            nCollided++;
            nSynchCollides++;
        }
    } else if (HA_GS_COLLIDE == rc) {
        nCollided++;
        nSynchCollides++;
    }
    return( rc );
}
 
/*********************************************************************/
/*
 * The callback functions used by this program.
 */
 
/*
 * This callback is executed when we need to "vote" on an n-phase protocol
 * proposal.  This may be a join, failure leave, or more commonly, a state
 * value change proposal.
 *
 * For simplicity, we always just vote APPROVE here.  Various experiments
 * here would be to add cases where CONTINUE or REJECT may be voted in
 * different cases.  Exercises left to the reader.
 *
 * Note that this would be the routine where we would put "actions" that
 * a "real" recovery program may want to execute.  It would perform those
 * actions here, and vote based upon the result of getting them done.
 * Additional actions may be taken in the approved and rejected callback
 * functions, but if we want the subsystem to perform a synchronized set
 * of steps, we would do it here.
 */
void n_phase_cb(const ha_gs_n_phase_notification_t *note)
{
    ha_gs_request_t pType;
    ha_gs_token_t   noteToken;
    ha_gs_rc_t      rCode;
    int             calledNotify = 0;
 
    /*
     * Grab the provider token, and the type of the protocol.
     */
    noteToken = note->gs_provider_token;
    pType = note->gs_protocol_type;
 
    /*
     * Process based on protocol type.  In all cases, we just want to vote
     * by calling ha_gs_vote(), and getting out.  Only interesting thing to
     * do is if we get any sort of error on the vote call.
     */
    switch(pType) {
 
        /*
         * Just approve any joins or failures.  Nothing else to bother doing
         * here.  We have more work to do when the approval notification for
         * a join arrives.  See the approved_cb().
         */
      case HA_GS_JOIN:
      case HA_GS_FAILURE_LEAVE:
        if (HA_GS_OK != (rCode = ha_gs_vote(noteToken,
                                            HA_GS_VOTE_APPROVE,
                                            NULL,
                                            NULL,
                                            HA_GS_NULL_VOTE))) {
            sprintf(buffer,
                    "Strange error code [%s] attempting to approve join/failure!\n\"",
                    write_an_rc(rCode));
            notify(buffer);
            write_the_notification(1, (void *)note, HA_GS_N_PHASE_NOTIFICATION);
            calledNotify = 1;
            printf("Strange error code from approve join/failure ha_gs_vote [%s]\n",
                   write_an_rc(rCode));
        }
        break;
 
        /*
         * Again, just vote approve.
         */
      case HA_GS_STATE_VALUE_CHANGE:
        if (HA_GS_OK != (rCode = ha_gs_vote(noteToken,
                                            HA_GS_VOTE_APPROVE,
                                            NULL,
                                            NULL,
                                            HA_GS_NULL_VOTE))) {
            sprintf(buffer,
                    "Strange error code [%s] attempting to approve state change!\n\"",
                    write_an_rc(rCode));
            notify(buffer);
            write_the_notification(1, (void *)note, HA_GS_N_PHASE_NOTIFICATION);
            calledNotify = 1;
            printf("Strange error code from approve state change ha_gs_vote [%s]\n",
                   write_an_rc(rCode));
        }
        if ((3 > debug) && (!calledNotify)) {
            calledNotify = 1;           /* Do not print these, unless error or high debug. */
        }
        break;
 
        /*
         * If this should happen, NOW we want to vote REJECT.  As currently
         * written, we will never propose any other protocols (provider-broadcast
         * messages, voluntary leaves) so just reject and get out.
         */
      default:
        notify("ERROR \"N-Phase Callback called but not for join/failure/state!  Reject!\n\"");
        if (HA_GS_OK != (rCode = ha_gs_vote(noteToken,
                                            HA_GS_VOTE_REJECT,
                                            NULL,
                                            NULL,
                                            HA_GS_NULL_VOTE))) {
            sprintf(buffer,
                    "Strange error code [%s] attempting to vote reject!\n\"",
                    write_an_rc(rCode));
            notify(buffer);
            write_the_notification(1, (void *)note, HA_GS_N_PHASE_NOTIFICATION);
            calledNotify = 1;
            printf("Strange error code from reject ha_gs_vote [%s]\n",
                   write_an_rc(rCode));
        }
    }
 
    if ( ! calledNotify ) {
        printf("\nNotifying....  %s\n", time_now() );
    	write_the_notification(1, (void *)note, HA_GS_N_PHASE_NOTIFICATION);
    }
 
    fflush(stdout);
    return;
}
 
/*
 * Deal with the "protocol approved" notifications.  If this is for a
 * state value change protocol, then need to grab the newly-updated state
 * value, to use in proposing our next state value change protocol.
 *
 * If this is a join, may need to grab our provider ID for future reference,
 * as well as set our internal "joined" flag.
 *
 * If this is a failure, then one of the other providers in our group has
 * failed (or, the node on which it was running failed).  Nothing special
 * to do here.
 */
void approved_cb(const ha_gs_approved_notification_t *note)
{
    ha_gs_token_t       noteToken;
    ha_gs_num_phases_t  phases;
    int                 new_state;
    struct timeval      current_time;
    struct timezone     tz;
    ha_gs_provider_t    pProposer;
    int                 i;
    int                 numberCollided;
 
    /*
     * Grab the provider token, and the proposer.
     */
    noteToken = note->gs_provider_token;
    pProposer = note->gs_proposal->gs_proposed_by;
 
    if (2 <= debug)
    	printf("Approved Callback called\n");
 
    /*
     * For the "unusual" notifications (not those related to state value
     * changes) write out the whole thing, unless debug is high.
     */
    if ((HA_GS_STATE_VALUE_CHANGE != note->gs_protocol_type) || (3 <= debug) )
    {
        printf("\nApproved callback called on %s", time_now() );
    	write_the_notification(1, (void *)note, HA_GS_APPROVED_NOTIFICATION);
    }
 
    /*
     * Process based on protocol type.
     */
    switch (note->gs_protocol_type) {
 
      case HA_GS_STATE_VALUE_CHANGE:
        /*
         * Ours or another providers state value change proposal has been
         * approved.  Grab the newly-update state value, and update our
         * various and sundry counters.
         */
        nStateChangesSeen++;
        phases = note->gs_proposal->gs_phase_info.gs_num_phases;
        if (HA_GS_1_PHASE == phases) {
            nStateChanges1Phase++;
        } else {
            nStateChangesNPhase++;
        }
        new_state = *((int *)note->gs_proposal->gs_current_state_value->gs_state);
        if ((2 <= debug) ||
            (0 == (nStateChangesSeen % printfreq)))
            printf("old state %d, new state %d;    %s",
                   state, new_state, time_now() );
        if ( state + 1 != new_state ) {
            printf("ERROR: bad state!  old state %d, new state %d;    %s",
                   state, new_state, time_now() );
            notify("ERROR \"bad state!\n\"");
        }
        numberCollided = nCollided;
        if ( ( pProposer.gs_provider_id != ourProviderId.gs_provider_id )
            && proposal_time.tv_sec ) {
            /*
             * We made a proposal, but ours wasn't run.  Therefore, ours
             * must have collided!
             */
            numberCollided++;
        }
        if((2 <= debug) ||
           (0 == (nStateChangesSeen % printfreq)))
            printf("state changes: submitted/seen(1-/n-phase) %d(%d/%d)/%d(%d/%d) collided %d.\n",
                   nSubmitted,
                   nSubmitted1Phase,
                   nSubmittedNPhase,
                   nStateChangesSeen,
                   nStateChanges1Phase,
                   nStateChangesNPhase,
                   numberCollided);
        proposal_time.tv_sec = 0;
        state = new_state;
        break;
 
      case HA_GS_JOIN:
        /*
         * If this is a join, then the "proposer" id will always be us. 
         *
         * If we are in the changing providers list, this is "our" join,
         * so grab the provider id from the "proposer" field and keep it.
         */
        if ( !joined ) {
            ourProviderId = pProposer;
            joined = 1;
            state = *((int *)note->gs_proposal->gs_current_state_value->gs_state);
            printf("Joining, state is %d\n", state);
        }
        break;
 
      case HA_GS_FAILURE_LEAVE:
        sprintf(buffer,
                "We have lost one or more providers due to failure.");
        notify(buffer);
        break;
 
      default:
        sprintf(buffer,
                "Received unexpected notification for protocol type %s",
                proto_type(note->gs_protocol_type));
        notify(buffer);
    }
 
    if (noteToken != gid[1])
    {
	printf("Provider token = %d my provider token is %d\n",
	       note->gs_provider_token, gid[1]);
    }
    fflush(stdout);
    return;
}
 
/*
 * This should be quite rare, as we do not normally reject protocols, and
 * we set our group default vote to APPROVE, to approve protocols even if
 * a provider should fail during a voting protocol.
 *
 * However, we have to have this callback, and it IS possible that we will
 * vote to reject unrecognized proposals.
 *
 * Also, if you decide to add (in n_phase_cb()) the ability to reject the
 * occasional join or state value change, then this routine becomes more
 * interesting.
 */
void rejected_cb(const ha_gs_rejected_notification_t *note)
{
    ha_gs_token_t       noteToken;
    ha_gs_num_phases_t  phases;
    int                 new_state;
    struct timeval      current_time;
    struct timezone     tz;
    ha_gs_provider_t    pProposer;
    int                 i;
    int                 numberCollided;
 
    /*
     * Grab the provider token, and the proposer.
     */
    noteToken = note->gs_provider_token;
    pProposer = note->gs_proposal->gs_proposed_by;
 
    if (2 <= debug)
    	printf("Rejected Callback called\n");
 
    voting_phase = 1;                   /* reset for next protocol. */
 
    write_the_notification(1, (void *)note, HA_GS_REJECTED_NOTIFICATION);
 
    if (noteToken != gid[1])
    {
	printf("Provider token = %d my provider token is %d\n",
	       note->gs_provider_token, gid[1]);
    }
    fflush(stdout);
    return;
}
 
/*
 * Deal with a delayed error notification.  We normally expect these to only
 * occur for collisions with our state value change proposals.
 *
 * Note that it is also possible for a delayed error to arrive in response
 * to a join request, if different processes attempting to join the group
 * have different group attributes (e.g., they specify different join/failure
 * protocol controls via the '-j' command-line argument.)
 */
void my_delayed_error_cb(const ha_gs_delayed_error_notification_t *note)
{
    /*
     * The "normal" case, we need to adjust counts, and determine when our
     * next proposal should be made.
     */
    if ( (HA_GS_STATE_VALUE_CHANGE == note->gs_protocol_type)
        && (HA_GS_COLLIDE == note->gs_delayed_return_code) ) {
        
        /*
         * It would appear that someone else proposed a state value
         * change, and that ours lost the collision contest.
         */
        if ( 2 <= debug )
            printf("Delayed Error Callback; HA_GS_COLLIDE on HA_GS_STATE_VALUE_CHANGE\n");
        nCollided++;
        return;
    }
 
    notify("ERROR \"Delayed Error Callback called\n\"");
    
    write_the_delayed_error(note);
 
    fflush(stdout);
    return;
}
 
/*
 * Callback function executed when an announcement notification arrives
 * for our group from Group Services.
 */
void announce_cb(const ha_gs_announcement_notification_t *note)
{
    if ( HA_GS_GROUP_SERVICES_HAS_DIED_HORRIBLY == note->gs_summary_code ){
        notify( "CRASH \" - die a horrible death!\n\"" );
    } else {
    	notify("ERROR \"Announce Callback One called\n\"");
    }
 
    write_the_notification(1, (void *)note, HA_GS_ANNOUNCEMENT_NOTIFICATION);
    fflush(stdout);
    return;
}
 
/*
 * Deal with responsiveness notification.  Quite simple, just say we are
 * ok.
 */
ha_gs_callback_rc_t responsive_cb(const ha_gs_responsiveness_notification_t *note) {
 
    ha_gs_callback_rc_t  _rc;
    const ha_gs_responsiveness_t *_response;
    ha_gs_time_limit_t      _time;
 
    handledResponsiveness = 1;
 
    _response = &(note->gs_responsiveness_information);
    _time = _response->gs_responsiveness_response_time_limit;
 
    if (2 <= debug) {
        write_the_notification(0, (void *)note, HA_GS_RESPONSIVENESS_NOTIFICATION);
    }
 
    fflush(stdout);
    return HA_GS_CALLBACK_OK;
}
 
/*
 * Query callback function for query response notifications.  Not
 * something we expect to see.
 */
void query_cb(const ha_gs_query_notification_t *note){
    write_the_time();
    printf("Query callback called\n");
    fflush(stdout); return;
}
 
/*********************************************************************/
/*
 * Used to write out exceptional conditions.  It will write the given
 * "message" to stdout, and it will also attempt to execute an external
 * shell script named by the variable "theNotifyScript", sending it the
 * given message.  That script may mail the info to an interested user,
 * log it, or do nothing.
 */
void notify( char *message )
{
    static char buffer[1024];
    static char msg[1024];
    printf( message );
    sprintf( msg, "\"%s\" \"%s\" \"state: %d settings: -g %s, -t %d, -p %d -d %d -j %d -f %d -n %s\" %s",
            progname, args, state,
            theGroupName, testInterval,
            providerInstanceNumber, debug,
            jfphases, freq,
            theNotifyScript,
            message );
    printf("\nNotifying....  %s\n", time_now() );
    sprintf( buffer, "%s %s", theNotifyScript, msg );
    printf( buffer );
    fflush( stdout );
    system( buffer );
}
 
/*********************************************************************/
/*
 * Internal-usage utility functions.
 */
/*
 * Print help, such as it is.
 */
void print_usage(void)
{
    fprintf(stderr,
            "Usage: schg [ -g <groupname> -t <time interval (in seconds)> -d <debug level>\n");
    fprintf(stderr,
            "              -p <provider instance number> -n <notify script> \n");
    fprintf(stderr,
            "              -j <join/failure num phases> -f <log print frequency>\n");
    fprintf(stderr,
            "              -r -s <seed> -P <n-phase pctg> -T <state time limit>]\n");
    fprintf(stderr,
            "\ndefaults:   -g SampleGroup01 -t 10 -p 1 -d 1 -n /u/tedkirby/bin/notify -j 1 -f 2\n");
    fflush(stderr);
}
 
/*
 * Function to obtain the number of the node upon which schg is executing.
 * The number is obtained by executing the 'node_number' program.
 */
#define NODE_NUMBER_PROG "/usr/lpp/ssp/install/bin/node_number"
 
int     get_node_number(void)
{
    FILE    *fp;
    char    inbuf[16];
    int     status;
    int     node;
 
    fp = popen(NODE_NUMBER_PROG, "r");
    if (NULL == fp) {
        /* oh well, not here.  return something. */
        return(1);
    }
    if (NULL == fgets(inbuf, sizeof(inbuf), fp)) {
        /* oh well, not here.  return something. */
        return(2);
    }
    fclose(fp);
 
    /* clean up terminated process created by popen() */
 
    wait(&status);
 
    node = atoi(inbuf);
 
    if (0 == node) {
        /* if on CWS, return real high node number */
        return(2049);
    }
 
    return(node);
}
 
 


[ Top of Page | Previous Page | Next Page | Table of Contents | Index ]