Group Services Programming Guide and Reference
/* IBM_PROLOG_BEGIN_TAG */
/* This is an automatically generated prolog. */
/* */
/* */
/* */
/* Licensed Materials - Property of IBM */
/* */
/* (C) COPYRIGHT International Business Machines Corp. 1996,2001 */
/* All Rights Reserved */
/* */
/* US Government Users Restricted Rights - Use, duplication or */
/* disclosure restricted by GSA ADP Schedule Contract with IBM Corp. */
/* */
/* IBM_PROLOG_END_TAG */
static char *sccsid = "@(#)92 1.7 src/rsct/pgs/samples/sample_schg.c, gssamples, rsct_r43x 5/14/01 09:43:13";
#if !defined(_HAGSD_COPYRIGHT_H)
#define _HAGSD_COPYRIGHT_H
static char copyright[] = "Licensed Materials - Property of IBM\n\
(C) COPYRIGHT International Business Machines Corp. 1996,2001.\n\
All Rights Reserved.\n\
US Government Users Restricted Rights - Use, duplication or \n\
disclosure restricted by GSA ADP Schedule Contract with IBM Corp.\n";
#endif
#ifdef __linux__
#define NFDS(a) a
#endif
/*********************************************************************/
/*
* Name: sample_schg.c
*
* This module provides a non-interactive program that can be used as
* an example for constructing an application that exploits the Group
* Services interfaces provided by IBM PSSP.
*
* Components:
* sample_schg.c - is the bulk of the program, and contains the main()
* function, as well as all necessary callback functions, and some
* internal utility functions for the application.
*
* sample_utility.c - provides the definitions for the utility functions
* used by the sample_test and sample_schg programs.
*
* sample_utility.h - declarations for the utility functions contained
* in sample_utility.c
*
* This program is intended as a "simple" Group Services application.
* When started, it will:
*
* - read and validate the command-line arguments to set various
* options, such as group name and timing options.
* - attempt to initialize itself as a Group Services client.
* - once initialized, attempt to join the group as a provider.
* - once joined, each provider will use the timing control given
* and will propose a group state value change at each desired
* time interval.
* - this continues until the program is killed (via kill command,
* or otherwise sending it a signal.)
*
* The group name, protocol controls, and the various timing aspects
* of the program are controllable via command-line arguments.
*
* This program will write out informational messages as it goes along
* on stdout. It is recommended to run this program in the background,
* and to redirect stdout to a file, and monitor the program by
* monitoring the file.
*/
/*********************************************************************/
/*********************************************************************/
/*
* To build this program, use the Makefile in this directory:
*
* make all (to build all sample programs)
* or
* make sample_schg
*
* To execute this program, you need to have Group Services active on
* your system. Please refer to the manual if you have questions as to
* how you may verify this. Additionally, you need to execute as a
* privileged (root) process. Also, you should take care in the group
* names you use, to avoid clashing with other groups.
*
* Prior to starting this program, you need to set the name of your SP
* partition in the environment of the process running the program.
* You should export the variable HA_SYSPAR_NAME to be the name of the
* partition. Contact your system administrator if you do not know how
* to determine this.
*
* Once all this is done, you can then start up copies of this program
* on as many nodes as desired. All of those with the same group name
* will attempt to join the same group. For these, you should ensure
* that any specified protocol control arguments match, to ensure that
* the group attributes all match. If this is not done, then not all
* copies of the program will be allowed to join the group.
*/
/*********************************************************************/
/*********************************************************************/
/*
* Command-line arguments:
*
* -g <groupname> -- name of the group this process should join.
* Default is "SampleGroup01".
*
* -t <time interval (in seconds)> -- number of seconds between attempts
* to submit a state value change proposal.
* Default is 10.
*
* -P <n-phase pctg> -- percentage of the state value change proposals
* that should be n-phase. Remainder will be 1-phase.
* Default is 0 (i.e., all state value change proposals will be 1-phase.)
*
* -T <state time limit> -- voting time limit for n-phase state value
* change proposals.
* Default is 0.
*
* -r -- randomize the time between making state value change proposals?
* If this is given, then the actual time interval between proposals
* will use a value inclusive of 1 second as the minimum, and (2 * the
* base time interval (specified by the '-t' flag)) as the maximum.
* Default is to not randomize.
*
* -s <seed> -- random number seed. If not specified, then the program
* will use the TOD clock to get a seed. This allows you to set up
* "random" runs with recreatable series of proposal timing *for any
* one process*. Note that if you are running multiple providers in
* a group, due to variations in message timing, there is no guarantee
* that the overall group timing and protocol execution is guaranteed
* to be recreated.
*
* -j <join/failure num phases> -- number of phases (1-phase or n-phase)
* for join and failure leave protocols. Specify '1' (the default) to
* specify 1-phase protocols, anything else for n-phase protocols.
* Default is 1-phase.
*
* -p <provider instance number> -- provider instance number for this
* process.
* Default is 1.
*
* -f <log print frequency> -- how many times per hour should a log status
* message be written, assuming debug level (see -d flag) is 1.
* Default is 2 (i.e., every 30 minutes write a status report message.)
*
* -n <notify script> -- path name to "notify" script. Used by the internal
* notify() routine to attempt to send information to interested users.
* Default is /usr/sbin/rsct/samples/hags/notify.
*
* -d <debug level> -- amount of data to dump out. The program will
* write out join and failure notifications, and using the '-f'
* flag, periodic status reports (e.g., number of protocols submitted
* and executed, etc.) The debug settings:
* 1 -- default, as above.
* 2 -- as above, plus every protocol submission.
* 3 -- as for 1 & 2, plus write out every notification received in
* full.
* Default is 1.
*/
/*********************************************************************/
/*********************************************************************/
/*
* Include "standard" system files. Note that pthread.h must be the
* first file included, if it is to be included (see the standard AIX
* documentation for more information about AIX thread support.)
*/
/*********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <sys/select.h>
#include <sys/time.h>
#define _XOPEN_EXTENDED_SOURCE /* AIX 4.1 */
#include <arpa/inet.h>
#undef _XOPEN_EXTENDED_SOURCE /* AIX 4.1 */
#include <signal.h>
#include <errno.h>
#include <strings.h>
#include <memory.h>
#include <time.h>
/*********************************************************************/
/*
* Include the Group Services declarations file.
*/
/*********************************************************************/
#include <ha_gs.h>
/*********************************************************************/
/*
* Include the set of declarations for external utility functions for
* this program.
*/
/*********************************************************************/
#include "sample_utility.h"
/*********************************************************************/
/*
* The set of callback functions used by this program.
*/
void n_phase_cb(const ha_gs_n_phase_notification_t *note);
void approved_cb(const ha_gs_approved_notification_t *note);
void rejected_cb(const ha_gs_rejected_notification_t *note);
void my_delayed_error_cb(const ha_gs_delayed_error_notification_t *note);
void announce_cb(const ha_gs_announcement_notification_t *note);
void query_cb(const ha_gs_query_notification_t *note);
void delayed_error_cb(const ha_gs_delayed_error_notification_t *note);
ha_gs_callback_rc_t responsive_cb(const ha_gs_responsiveness_notification_t *note);
/*********************************************************************/
/*
* Prepare a protocol proposal for submission.
*/
int submit_state_change_proposal(void);
/*********************************************************************/
/*
* Internal utility functions.
*/
void notify( char *message );
int get_node_number(void);
void print_usage(void);
/*********************************************************************/
/*
* Global variables.
*/
/*
* Keep statistics.
*/
/*
* Number of state value change proposals submitted, number that were
* 1-phase/n-phase.
*/
int nSubmitted, nSubmitted1Phase, nSubmittedNPhase;
/*
* Number of our proposals that were returned due to collisions.
*/
int nCollided, nSynchCollides;
/*
* Number of state value change protocols that have executed in the
* group and that we have participated in.
*/
int nStateChangesSeen, nStateChanges1Phase, nStateChangesNPhase;
/*
* Keep track of group membership, state value.
*/
int state; /* Current group state value, from the */
/* last successfully-executed protocol. */
int proposed_state; /* Our to-be-proposed state value. */
int joined; /* Have we joined the group yet? */
struct timeval proposal_time; /* Keep track of time last proposal */
/* submitted. */
/*
* These are the variables that hold values determined by the various
* command-line flags. Most are initialized here, but may be overridden
* once we parse the command line.
*/
char *theGroupName = "SampleGroup01"; /* -g flag: name of the group we should */
/* join. */
int testInterval = 10; /* -t flag: number of seconds between */
/* making state value change proposals. */
/* Used as time limit for select() call. */
int providerInstanceNumber = 1; /* -p flag: provider instance number for */
/* this process. */
int debug = 1; /* -d flag: debug level. Higher dumps */
/* more info. */
int freq = 2; /* -f flag: how many times per hour to */
/* print out status messages to log? */
int printfreq; /* How often to print log msg? */
/* calculated as number of log prints/hour */
int randomize = 0; /* -r flag: should the wait time between */
/* making state value change proposals */
/* be randomized or fixed? */
int nphasestates = 0; /* -P flag: percentag of time to make our */
/* state value change proposals n-phase. */
/* Remainder will be 1-phase. */
int stateTimeLimit = 0; /* -T flag: voting time limit for n-phase */
/* state value change protocols. */
ha_gs_num_phases_t jfphases = HA_GS_1_PHASE; /* -j flag: number of phases for */
/* join and failure protocols. */
/*
* The "notify" script is a small shell script that can be modified
* to mail important error/status messages to an interested user.
* If you want to place this script elsewhere, you can dynamically
* specify the path name via the '-n' command-line flag.
*
* The script is used by the notify() routine in this program.
*/
char *theNotifyScript = "/usr/sbin/rsct/samples/hags/notify";
/*
* The following are set during our join protocol.
*/
ha_gs_provider_t ourProviderId; /* Our provider ID, returned asynchronously */
/* when our join protocol executes. */
ha_gs_token_t provider_token; /* Our provider token, returned */
/* synchronously by ha_gs_join(). */
/*
* The arguments, program name, handy buffer.
*/
char *args;
char *progname;
char buffer[256];
/*********************************************************************/
/*
* Main() function. Parse command-line arguments, initialize with Group
* Services, join the group, then drop into the while() loop until we
* get killed. Use select() to wait for messages from Group Services,
* and propose state value changes every so often. Simple as that.
*/
int main(int argc, char ** argv)
{
ha_gs_descriptor_t socket_fd;
ha_gs_responsiveness_t responsiveness = {HA_GS_PING_RESPONSIVENESS,
3600,
10,
(char *)0,
0};
char *script="/dev/null";
ha_gs_rc_t rc;
ha_gs_proposal_info_t info;
char input;
char *function;
time_t overdue;
char theArgs[1024];
int i, found;
int phase;
int init_tries;
#define MAX_TRIES 10
int argCtr;
int seedGiven;
unsigned int randSeed;
struct timeval current_time;
struct timezone tz;
float p;
/* select stuff */
int select_rc;
int highestDescriptor;
int howMany;
fd_set socketsForSelect; /* Maintain all registered sockets in mask. */
fd_set socketSelectMask; /* Used for the actual select. */
struct timeval nextJob; /* Wait time for select. */
seedGiven = 0;
/*
* Parse the command-line arguments. Copy them to "args" to preserve
* them so we can easily write them to the log.
*/
progname = argv[0];
args = theArgs;
args[0] = '\0';
for(argCtr=1;argCtr < argc;argCtr++) {
if(!strcmp( argv[argCtr], "-g")) {
theGroupName = argv[++argCtr];
strcat(args, " -g ");
strcat(args, theGroupName);
} else if(!strcmp( argv[argCtr], "-n")) {
theNotifyScript = argv[++argCtr];
strcat(args, " -n ");
strcat(args, theNotifyScript);
} else if(!strcmp( argv[argCtr], "-t")) {
testInterval = atoi( argv[++argCtr]);
strcat(args, " -t ");
strcat(args, argv[argCtr]);
} else if (!strcmp(argv[argCtr], "-p")) {
providerInstanceNumber = atoi( argv[++argCtr]);
strcat(args, " -p ");
strcat(args, argv[argCtr]);
} else if (!strcmp(argv[argCtr], "-d")) {
debug = atoi( argv[++argCtr]);
strcat(args, " -d ");
strcat(args, argv[argCtr]);
} else if (!strcmp(argv[argCtr], "-j")) {
phase = atoi( argv[++argCtr]);
if (1 == phase) {
jfphases = HA_GS_1_PHASE;
} else {
jfphases = HA_GS_N_PHASE;
}
strcat(args, " -j ");
strcat(args, argv[argCtr]);
} else if (!strcmp(argv[argCtr], "-f")) {
freq = atoi( argv[++argCtr]);
strcat(args, " -f ");
strcat(args, argv[argCtr]);
} else if (!strcmp(argv[argCtr], "-s")) {
randSeed = atoi( argv[++argCtr]);
seedGiven = 1;
strcat(args, " -s ");
strcat(args, argv[argCtr]);
} else if (!strcmp(argv[argCtr], "-r")) {
randomize = 1;
strcat(args, " -r ");
} else if (!strcmp(argv[argCtr], "-P")) {
nphasestates = atoi( argv[++argCtr] );
strcat(args, " -P ");
strcat(args, argv[argCtr]);
} else if (!strcmp(argv[argCtr], "-T")) {
stateTimeLimit = atoi( argv[++argCtr] );
strcat(args, " -T ");
strcat(args, argv[argCtr]);
} else {
print_usage();
exit(3);
}
}
/*
* Calculate time between logging via determining number of state changes per hour
* divided by number of log entries to be made per hour. Ensure no less than one.
* Use this to then cut log entry every "printfreq" state changes.
*/
if (0 >= freq) {
printfreq = 1;
} else {
printfreq = ((3600 / testInterval) / freq);
if (0 >= printfreq) {
printfreq = 1;
}
}
/*
* If no random seed given (-s) then use the (TOD seconds * node_number).
* In any case, print out the seed being used in case we want to use it
* again, whatever it is.
*/
if (!seedGiven) {
gettimeofday(¤t_time, &tz);
randSeed = current_time.tv_sec * get_node_number();
}
srandom(randSeed);
printf("Random seed is [%d]\n", randSeed);
fflush(stdout);
/*
* This variable tells us how many seconds to wait between making each
* state value change proposal. Note that we may override this later,
* if we are "randomizing" this time interval.
*/
nextJob.tv_sec = testInterval;
nextJob.tv_usec = 0;
/*
* No sockets yet.
*/
highestDescriptor = 0;
FD_ZERO(&socketsForSelect);
/*
* This outer while() loop is used in case we have to reconnect
* to Group Services.
*
* In this loop, try to initialize (ha_gs_init()). Once we
* do that, set up our select() handling information (socket
* file descriptor, etc.).
*
* Then, attempt to join the group.
*
* After that, fall into the inner loop, which is basically just a
* select(), where we wait to either receive messages from Goup
* Services, or time out when it is time to make our next state value
* change proposal.
*/
while(1) {
joined = 0; /* Not yet joined to the group. */
state = 0; /* Start with a state value of 0. */
proposal_time.tv_sec = 0; /* No outstanding proposal. */
/*
* Attempt to initialize with Group Services.
*/
for( init_tries = 1; MAX_TRIES > init_tries; init_tries++ ) {
rc = ha_gs_init(&socket_fd,
HA_GS_SOCKET_NO_SIGNAL,
&responsiveness,
script,
responsive_cb,
my_delayed_error_cb,
query_cb);
/*
* Set up to use the select() system call.
*/
if (HA_GS_OK == rc) {
FD_SET(socket_fd, &socketsForSelect);
if (socket_fd > highestDescriptor) {
highestDescriptor = socket_fd;
}
break;
} else if (HA_GS_EXISTS == rc) {
printf("You have already initialized! Why do it again?\n");
break;
} else {
printf("Bad news - ha_gs_init() returned rc:[%s]\n", write_an_rc(rc));
sleep(10);
}
}
if ( MAX_TRIES <= init_tries ) {
sprintf(buffer, "ABORT \"Can't get started after %d tries!\n\"", init_tries );
notify( buffer );
exit(rc);
}
fflush(stdout);
/*
* Prepare group attributes, join/failure phases set by
* command-line 'j' argument.
*/
gattr[1] = malloc(sizeof(ha_gs_group_attributes_t));
gattr[1]->gs_version = 1;
gattr[1]->gs_sizeof_group_attributes = sizeof(ha_gs_group_attributes_t);
gattr[1]->gs_client_version = 1;
gattr[1]->gs_batch_control = HA_GS_BATCH_BOTH;
gattr[1]->gs_num_phases = jfphases;
gattr[1]->gs_source_reflection_num_phases = HA_GS_1_PHASE;
gattr[1]->gs_group_default_vote = HA_GS_VOTE_APPROVE;
gattr[1]->gs_merge_control = HA_GS_DISSOLVE_MERGE;
gattr[1]->gs_time_limit = 0;
gattr[1]->gs_source_reflection_time_limit = 0;
gattr[1]->gs_group_name = theGroupName;
gattr[1]->gs_source_group_name = NULL;
instance_numbers[1] = providerInstanceNumber;
write_join_information(1);
info.gs_join_request.gs_group_attributes = gattr[1];
info.gs_join_request.gs_provider_instance = providerInstanceNumber;
info.gs_join_request.gs_provider_local_name = "SampleProvider";
info.gs_join_request.gs_n_phase_protocol_callback = n_phase_cb;
info.gs_join_request.gs_protocol_approved_callback = approved_cb;
info.gs_join_request.gs_protocol_rejected_callback = rejected_cb;
info.gs_join_request.gs_announcement_callback = announce_cb;
info.gs_join_request.gs_merge_callback = NULL;
/*
* Attempt to join the group.
*/
rc = ha_gs_join(&gid[1], &info);
/*
* Save the provider token returned by ha_gs_join().
*/
provider_token = gid[1];
printf("ha_gs_join returned rc:[%s]\n", write_an_rc(rc));
rc = HA_GS_OK;
/*
* This inner while() loop will simply wait on select() for
* notifications to arrive, and when ready, will attempt to
* submit a state value change proposal.
*/
while (HA_GS_NOT_OK != rc) {
/*
* Determine the random interval until we should attempt to
* make the our next state value change proposal.
*/
if ( randomize ) {
nextJob.tv_sec = randomn( 2 * testInterval );
}
/*
* select(). See AIX documentation.
*
* The "nextJob" parameter is at timeval struct. It is set to
* the number of seconds until our next state value change
* proposal should be made. If no input appears on our socket
* connection from Group Services, then we will wake up and
* go to make the proposal.
*/
memcpy(&socketSelectMask, &socketsForSelect, sizeof(socketsForSelect));
select_rc = select(highestDescriptor + 1,
&socketSelectMask,
0,
0,
&nextJob);
/*
* If anything other than "select interrupted (EINTR)" just exit.
*/
if (select_rc < 0) {
if (errno == EINTR) {
printf("Got EINTR during the select.\n");
continue;
} else {
perror("Error on select");
exit(errno);
}
} else if (0 < (howMany = NFDS(select_rc))) {
/*
* Input arrived on socket. Should only ever be our one
* socket.
*/
if (1 < howMany) {
printf("Input on more than our socket?? Have [%d]!\n",
howMany);
exit(howMany);
}
/*
* OK so far, call ha_gs_dispatch() and process the message(s).
*/
rc = ha_gs_dispatch(HA_GS_NON_BLOCKING);
if (HA_GS_OK != rc) {
printf("Bad news, bad return code from dispatch[%s]! Exiting.\n",
write_an_rc(rc));
/*
* Something is wrong. Drop out of inner loop, and then
* reinitialize and start over.
*/
break;
}
} else {
if (!joined) {
/*
* We have not yet seen the "approved" notification for our join
* request, so we cannot yet handle any other proposals. Just
* wait some more.
*/
printf("Select timed out, not joined, nothing arrived in [%d] seconds. Try again.\n",
nextJob.tv_sec);
} else {
/*
* We are part of the group, so it is time to make a
* proposal. Go do it.
*/
submit_state_change_proposal();
}
fflush(stdout);
}
} /* end while(HA_GS_NOT_OK != rc) */
/*
* Close current connection to Group Services.
*/
rc = ha_gs_quit();
printf("ha_gs_quit returned rc:[%s]\n", write_an_rc(rc));
if ( HA_GS_OK != rc ) {
sprintf(buffer,
"ERROR \"ha_gs_quit returned rc:[%s]\n\"",
write_an_rc(rc));
notify( buffer );
}
/*
* Need to remove the "old" socket before establishing a new one.
*/
FD_CLR(socket_fd, &socketsForSelect);
} /* end while(1) */
} /* end main() */
/*********************************************************************/
/*
* Our timer has popped, and is time to submit a state value change
* proposal for our group. Determine whether we should submit a 1-phase
* or n-phase proposal, optionally display the current counts, and then
* prepare the data to call ha_gs_change_state_value().
*
* The data submitted will be the 'current group state value + 1'.
*
* Note that if there are multiple providers in the group, then it is
* possible for our proposal to be returned with a delayed error due to
* a protocol collision (delayed error code of HA_GS_COLLIDE). That is
* fine, as that means another provider's proposal will have executed
* to increment the state value. We worry about that later, in the
* delayed error callback (my_delayed_error_cb()).
*/
int submit_state_change_proposal(void)
{
ha_gs_proposal_info_t info;
ha_gs_num_phases_t phases;
ha_gs_token_t group_token;
int rc;
struct timezone tz;
int nphase;
proposed_state = state + 1; /* Proposed new state value. */
/*
* Use random number to determine how many phases.
*/
if (randomn(100) >= nphasestates) {
phases = HA_GS_1_PHASE;
nSubmitted1Phase++;
} else {
phases = HA_GS_N_PHASE;
nSubmittedNPhase++;
}
gettimeofday(&proposal_time, &tz);
/*
* We do not normally display every proposal, but instead just "every
* so often". However, if 'debug' is set, then always display.
*/
if((2 <= debug) ||
(0 == (nStateChangesSeen % printfreq)))
printf("state changes: submitted/seen(1-/n-phase) %d(%d/%d)/%d(%d/%d) collided %d.\n",
nSubmitted,
nSubmitted1Phase,
nSubmittedNPhase,
nStateChangesSeen,
nStateChanges1Phase,
nStateChangesNPhase,
nCollided);
if((2 <= debug) ||
(0 == (nStateChangesSeen % printfreq)))
printf(" submit %s state change to %d on %s",
((HA_GS_1_PHASE == phases) ? "1-phase" : "N-phase"),
proposed_state,
ctime( (time_t *) &proposal_time.tv_sec ) );
/*
* Prepare the proposal data for the call to ha_gs_change_state_value().
*/
info.gs_state_change_request.gs_num_phases = phases;
info.gs_state_change_request.gs_time_limit = stateTimeLimit;
info.gs_state_change_request.gs_new_state.gs_length = sizeof(int);
info.gs_state_change_request.gs_new_state.gs_state =
(char *) & proposed_state;
nSubmitted++;
/*
* Submit the request to the Group Services library code.
*/
rc = ha_gs_change_state_value( provider_token, &info );
/*
* HA_GS_OK says that the proposal was accepted, but does NOT necessarily
* indicate that it will be executed (it may be returned asynchronously
* via a delayed error). But, HA_GS_OK does indicate that there were no
* syntactical problems with the proposal.
*
* At this point, HA_GS_COLLIDE indicates that another protocol is already
* executing in the group. It is rather rare to have timing such that we
* get this, but, since we have providers distributed on many nodes, it is
* possible.
*
* Any other return codes here indicate serious problems with the program...
*/
if((3 <= debug) ||
((HA_GS_OK != rc) &&
(HA_GS_COLLIDE != rc))) {
sprintf(buffer,
"ha_gs_change_state_value returned rc [%s]!\n\"",
write_an_rc(rc));
notify(buffer);
printf("ha_gs_change_state_value returned rc:[%s]\n",
write_an_rc(rc));
if (HA_GS_COLLIDE == rc) {
nCollided++;
nSynchCollides++;
}
} else if (HA_GS_COLLIDE == rc) {
nCollided++;
nSynchCollides++;
}
return( rc );
}
/*********************************************************************/
/*
* The callback functions used by this program.
*/
/*
* This callback is executed when we need to "vote" on an n-phase protocol
* proposal. This may be a join, failure leave, or more commonly, a state
* value change proposal.
*
* For simplicity, we always just vote APPROVE here. Various experiments
* here would be to add cases where CONTINUE or REJECT may be voted in
* different cases. Exercises left to the reader.
*
* Note that this would be the routine where we would put "actions" that
* a "real" recovery program may want to execute. It would perform those
* actions here, and vote based upon the result of getting them done.
* Additional actions may be taken in the approved and rejected callback
* functions, but if we want the subsystem to perform a synchronized set
* of steps, we would do it here.
*/
void n_phase_cb(const ha_gs_n_phase_notification_t *note)
{
ha_gs_request_t pType;
ha_gs_token_t noteToken;
ha_gs_rc_t rCode;
int calledNotify = 0;
/*
* Grab the provider token, and the type of the protocol.
*/
noteToken = note->gs_provider_token;
pType = note->gs_protocol_type;
/*
* Process based on protocol type. In all cases, we just want to vote
* by calling ha_gs_vote(), and getting out. Only interesting thing to
* do is if we get any sort of error on the vote call.
*/
switch(pType) {
/*
* Just approve any joins or failures. Nothing else to bother doing
* here. We have more work to do when the approval notification for
* a join arrives. See the approved_cb().
*/
case HA_GS_JOIN:
case HA_GS_FAILURE_LEAVE:
if (HA_GS_OK != (rCode = ha_gs_vote(noteToken,
HA_GS_VOTE_APPROVE,
NULL,
NULL,
HA_GS_NULL_VOTE))) {
sprintf(buffer,
"Strange error code [%s] attempting to approve join/failure!\n\"",
write_an_rc(rCode));
notify(buffer);
write_the_notification(1, (void *)note, HA_GS_N_PHASE_NOTIFICATION);
calledNotify = 1;
printf("Strange error code from approve join/failure ha_gs_vote [%s]\n",
write_an_rc(rCode));
}
break;
/*
* Again, just vote approve.
*/
case HA_GS_STATE_VALUE_CHANGE:
if (HA_GS_OK != (rCode = ha_gs_vote(noteToken,
HA_GS_VOTE_APPROVE,
NULL,
NULL,
HA_GS_NULL_VOTE))) {
sprintf(buffer,
"Strange error code [%s] attempting to approve state change!\n\"",
write_an_rc(rCode));
notify(buffer);
write_the_notification(1, (void *)note, HA_GS_N_PHASE_NOTIFICATION);
calledNotify = 1;
printf("Strange error code from approve state change ha_gs_vote [%s]\n",
write_an_rc(rCode));
}
if ((3 > debug) && (!calledNotify)) {
calledNotify = 1; /* Do not print these, unless error or high debug. */
}
break;
/*
* If this should happen, NOW we want to vote REJECT. As currently
* written, we will never propose any other protocols (provider-broadcast
* messages, voluntary leaves) so just reject and get out.
*/
default:
notify("ERROR \"N-Phase Callback called but not for join/failure/state! Reject!\n\"");
if (HA_GS_OK != (rCode = ha_gs_vote(noteToken,
HA_GS_VOTE_REJECT,
NULL,
NULL,
HA_GS_NULL_VOTE))) {
sprintf(buffer,
"Strange error code [%s] attempting to vote reject!\n\"",
write_an_rc(rCode));
notify(buffer);
write_the_notification(1, (void *)note, HA_GS_N_PHASE_NOTIFICATION);
calledNotify = 1;
printf("Strange error code from reject ha_gs_vote [%s]\n",
write_an_rc(rCode));
}
}
if ( ! calledNotify ) {
printf("\nNotifying.... %s\n", time_now() );
write_the_notification(1, (void *)note, HA_GS_N_PHASE_NOTIFICATION);
}
fflush(stdout);
return;
}
/*
* Deal with the "protocol approved" notifications. If this is for a
* state value change protocol, then need to grab the newly-updated state
* value, to use in proposing our next state value change protocol.
*
* If this is a join, may need to grab our provider ID for future reference,
* as well as set our internal "joined" flag.
*
* If this is a failure, then one of the other providers in our group has
* failed (or, the node on which it was running failed). Nothing special
* to do here.
*/
void approved_cb(const ha_gs_approved_notification_t *note)
{
ha_gs_token_t noteToken;
ha_gs_num_phases_t phases;
int new_state;
struct timeval current_time;
struct timezone tz;
ha_gs_provider_t pProposer;
int i;
int numberCollided;
/*
* Grab the provider token, and the proposer.
*/
noteToken = note->gs_provider_token;
pProposer = note->gs_proposal->gs_proposed_by;
if (2 <= debug)
printf("Approved Callback called\n");
/*
* For the "unusual" notifications (not those related to state value
* changes) write out the whole thing, unless debug is high.
*/
if ((HA_GS_STATE_VALUE_CHANGE != note->gs_protocol_type) || (3 <= debug) )
{
printf("\nApproved callback called on %s", time_now() );
write_the_notification(1, (void *)note, HA_GS_APPROVED_NOTIFICATION);
}
/*
* Process based on protocol type.
*/
switch (note->gs_protocol_type) {
case HA_GS_STATE_VALUE_CHANGE:
/*
* Ours or another providers state value change proposal has been
* approved. Grab the newly-update state value, and update our
* various and sundry counters.
*/
nStateChangesSeen++;
phases = note->gs_proposal->gs_phase_info.gs_num_phases;
if (HA_GS_1_PHASE == phases) {
nStateChanges1Phase++;
} else {
nStateChangesNPhase++;
}
new_state = *((int *)note->gs_proposal->gs_current_state_value->gs_state);
if ((2 <= debug) ||
(0 == (nStateChangesSeen % printfreq)))
printf("old state %d, new state %d; %s",
state, new_state, time_now() );
if ( state + 1 != new_state ) {
printf("ERROR: bad state! old state %d, new state %d; %s",
state, new_state, time_now() );
notify("ERROR \"bad state!\n\"");
}
numberCollided = nCollided;
if ( ( pProposer.gs_provider_id != ourProviderId.gs_provider_id )
&& proposal_time.tv_sec ) {
/*
* We made a proposal, but ours wasn't run. Therefore, ours
* must have collided!
*/
numberCollided++;
}
if((2 <= debug) ||
(0 == (nStateChangesSeen % printfreq)))
printf("state changes: submitted/seen(1-/n-phase) %d(%d/%d)/%d(%d/%d) collided %d.\n",
nSubmitted,
nSubmitted1Phase,
nSubmittedNPhase,
nStateChangesSeen,
nStateChanges1Phase,
nStateChangesNPhase,
numberCollided);
proposal_time.tv_sec = 0;
state = new_state;
break;
case HA_GS_JOIN:
/*
* If this is a join, then the "proposer" id will always be us.
*
* If we are in the changing providers list, this is "our" join,
* so grab the provider id from the "proposer" field and keep it.
*/
if ( !joined ) {
ourProviderId = pProposer;
joined = 1;
state = *((int *)note->gs_proposal->gs_current_state_value->gs_state);
printf("Joining, state is %d\n", state);
}
break;
case HA_GS_FAILURE_LEAVE:
sprintf(buffer,
"We have lost one or more providers due to failure.");
notify(buffer);
break;
default:
sprintf(buffer,
"Received unexpected notification for protocol type %s",
proto_type(note->gs_protocol_type));
notify(buffer);
}
if (noteToken != gid[1])
{
printf("Provider token = %d my provider token is %d\n",
note->gs_provider_token, gid[1]);
}
fflush(stdout);
return;
}
/*
* This should be quite rare, as we do not normally reject protocols, and
* we set our group default vote to APPROVE, to approve protocols even if
* a provider should fail during a voting protocol.
*
* However, we have to have this callback, and it IS possible that we will
* vote to reject unrecognized proposals.
*
* Also, if you decide to add (in n_phase_cb()) the ability to reject the
* occasional join or state value change, then this routine becomes more
* interesting.
*/
void rejected_cb(const ha_gs_rejected_notification_t *note)
{
ha_gs_token_t noteToken;
ha_gs_num_phases_t phases;
int new_state;
struct timeval current_time;
struct timezone tz;
ha_gs_provider_t pProposer;
int i;
int numberCollided;
/*
* Grab the provider token, and the proposer.
*/
noteToken = note->gs_provider_token;
pProposer = note->gs_proposal->gs_proposed_by;
if (2 <= debug)
printf("Rejected Callback called\n");
voting_phase = 1; /* reset for next protocol. */
write_the_notification(1, (void *)note, HA_GS_REJECTED_NOTIFICATION);
if (noteToken != gid[1])
{
printf("Provider token = %d my provider token is %d\n",
note->gs_provider_token, gid[1]);
}
fflush(stdout);
return;
}
/*
* Deal with a delayed error notification. We normally expect these to only
* occur for collisions with our state value change proposals.
*
* Note that it is also possible for a delayed error to arrive in response
* to a join request, if different processes attempting to join the group
* have different group attributes (e.g., they specify different join/failure
* protocol controls via the '-j' command-line argument.)
*/
void my_delayed_error_cb(const ha_gs_delayed_error_notification_t *note)
{
/*
* The "normal" case, we need to adjust counts, and determine when our
* next proposal should be made.
*/
if ( (HA_GS_STATE_VALUE_CHANGE == note->gs_protocol_type)
&& (HA_GS_COLLIDE == note->gs_delayed_return_code) ) {
/*
* It would appear that someone else proposed a state value
* change, and that ours lost the collision contest.
*/
if ( 2 <= debug )
printf("Delayed Error Callback; HA_GS_COLLIDE on HA_GS_STATE_VALUE_CHANGE\n");
nCollided++;
return;
}
notify("ERROR \"Delayed Error Callback called\n\"");
write_the_delayed_error(note);
fflush(stdout);
return;
}
/*
* Callback function executed when an announcement notification arrives
* for our group from Group Services.
*/
void announce_cb(const ha_gs_announcement_notification_t *note)
{
if ( HA_GS_GROUP_SERVICES_HAS_DIED_HORRIBLY == note->gs_summary_code ){
notify( "CRASH \" - die a horrible death!\n\"" );
} else {
notify("ERROR \"Announce Callback One called\n\"");
}
write_the_notification(1, (void *)note, HA_GS_ANNOUNCEMENT_NOTIFICATION);
fflush(stdout);
return;
}
/*
* Deal with responsiveness notification. Quite simple, just say we are
* ok.
*/
ha_gs_callback_rc_t responsive_cb(const ha_gs_responsiveness_notification_t *note) {
ha_gs_callback_rc_t _rc;
const ha_gs_responsiveness_t *_response;
ha_gs_time_limit_t _time;
handledResponsiveness = 1;
_response = &(note->gs_responsiveness_information);
_time = _response->gs_responsiveness_response_time_limit;
if (2 <= debug) {
write_the_notification(0, (void *)note, HA_GS_RESPONSIVENESS_NOTIFICATION);
}
fflush(stdout);
return HA_GS_CALLBACK_OK;
}
/*
* Query callback function for query response notifications. Not
* something we expect to see.
*/
void query_cb(const ha_gs_query_notification_t *note){
write_the_time();
printf("Query callback called\n");
fflush(stdout); return;
}
/*********************************************************************/
/*
* Used to write out exceptional conditions. It will write the given
* "message" to stdout, and it will also attempt to execute an external
* shell script named by the variable "theNotifyScript", sending it the
* given message. That script may mail the info to an interested user,
* log it, or do nothing.
*/
void notify( char *message )
{
static char buffer[1024];
static char msg[1024];
printf( message );
sprintf( msg, "\"%s\" \"%s\" \"state: %d settings: -g %s, -t %d, -p %d -d %d -j %d -f %d -n %s\" %s",
progname, args, state,
theGroupName, testInterval,
providerInstanceNumber, debug,
jfphases, freq,
theNotifyScript,
message );
printf("\nNotifying.... %s\n", time_now() );
sprintf( buffer, "%s %s", theNotifyScript, msg );
printf( buffer );
fflush( stdout );
system( buffer );
}
/*********************************************************************/
/*
* Internal-usage utility functions.
*/
/*
* Print help, such as it is.
*/
void print_usage(void)
{
fprintf(stderr,
"Usage: schg [ -g <groupname> -t <time interval (in seconds)> -d <debug level>\n");
fprintf(stderr,
" -p <provider instance number> -n <notify script> \n");
fprintf(stderr,
" -j <join/failure num phases> -f <log print frequency>\n");
fprintf(stderr,
" -r -s <seed> -P <n-phase pctg> -T <state time limit>]\n");
fprintf(stderr,
"\ndefaults: -g SampleGroup01 -t 10 -p 1 -d 1 -n /u/tedkirby/bin/notify -j 1 -f 2\n");
fflush(stderr);
}
/*
* Function to obtain the number of the node upon which schg is executing.
* The number is obtained by executing the 'node_number' program.
*/
#define NODE_NUMBER_PROG "/usr/lpp/ssp/install/bin/node_number"
int get_node_number(void)
{
FILE *fp;
char inbuf[16];
int status;
int node;
fp = popen(NODE_NUMBER_PROG, "r");
if (NULL == fp) {
/* oh well, not here. return something. */
return(1);
}
if (NULL == fgets(inbuf, sizeof(inbuf), fp)) {
/* oh well, not here. return something. */
return(2);
}
fclose(fp);
/* clean up terminated process created by popen() */
wait(&status);
node = atoi(inbuf);
if (0 == node) {
/* if on CWS, return real high node number */
return(2049);
}
return(node);
}
[ Top of Page | Previous Page | Next Page | Table of Contents | Index ]