Skip to content

Commit

Permalink
Added the Parrot Paranoia Patch from Brian Bockelman.
Browse files Browse the repository at this point in the history
This feature adds a watchdog process that runs alongside Parrot.
If either the watchdog or Parrot dies, all of the child processes
will be cleaned up.


git-svn-id: file:///afs/nd.edu/user37/ccl/svn/cctools/trunk@2181 a4d8336d-3463-0410-8bba-c098c45d37a8
  • Loading branch information
dthain committed Jul 5, 2012
1 parent 1b9dee5 commit 929faa8
Show file tree
Hide file tree
Showing 8 changed files with 401 additions and 35 deletions.
4 changes: 4 additions & 0 deletions CREDITS
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ Many thanks go a host of contributors and colleagues, in alphabetical order.
IBCP
Developed new support for Parrot to work with the EGEE software and bioinformatics applications.

Brian Bockelman
University of Nebraska-Lincoln
Contributed the Parrot "Paranoid" shutdown mode.

Hoang Bui
University of Notre Dame
Created the chirp_distribute and chirp_matrix modules.
Expand Down
2 changes: 1 addition & 1 deletion parrot/src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ PROGRAMS = parrot_run parrot_lsalloc parrot_mkalloc parrot_getacl parrot_setacl
LIBRARIES = libparrot_helper.so libparrot_client.a
SCRIPTS = make_growfs parrot_identity_box parrot_run_hdfs

PARROT_OBJECTS = pfs_main.o pfs_poll.o tracer.o pfs_dispatch.o pfs_dispatch64.o pfs_process.o pfs_channel.o pfs_sys.o pfs_table.o pfs_resolve.o pfs_service.o pfs_file.o pfs_file_cache.o pfs_dir.o pfs_dircache.o pfs_pointer.o pfs_location.o ibox_acl.o pfs_service_local.o pfs_service_http.o pfs_service_grow.o pfs_service_chirp.o pfs_service_multi.o pfs_service_nest.o pfs_service_ftp.o pfs_service_rfio.o pfs_service_dcap.o pfs_service_irods.o irods_reli.o pfs_service_hdfs.o pfs_service_bxgrid.o pfs_service_s3.o pfs_service_xrootd.o pfs_service_cvmfs.o
PARROT_OBJECTS = pfs_main.o pfs_poll.o tracer.o pfs_paranoia.o pfs_dispatch.o pfs_dispatch64.o pfs_process.o pfs_channel.o pfs_sys.o pfs_table.o pfs_resolve.o pfs_service.o pfs_file.o pfs_file_cache.o pfs_dir.o pfs_dircache.o pfs_pointer.o pfs_location.o ibox_acl.o pfs_service_local.o pfs_service_http.o pfs_service_grow.o pfs_service_chirp.o pfs_service_multi.o pfs_service_nest.o pfs_service_ftp.o pfs_service_rfio.o pfs_service_dcap.o pfs_service_irods.o irods_reli.o pfs_service_hdfs.o pfs_service_bxgrid.o pfs_service_s3.o pfs_service_xrootd.o pfs_service_cvmfs.o

LOCAL_LDFLAGS=-lchirp -ls3client -ldttools -lftp_lite -ldl ${CCTOOLS_INTERNAL_LDFLAGS}

Expand Down
69 changes: 35 additions & 34 deletions parrot/src/pfs_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ See the file COPYING for details.
#include "pfs_poll.h"
#include "pfs_service.h"
#include "pfs_critical.h"
#include "pfs_paranoia.h"

extern "C" {
#include "tracer.h"
Expand Down Expand Up @@ -59,6 +60,7 @@ int pfs_session_cache = 0;
int pfs_use_helper = 1;
int pfs_checksum_files = 1;
int pfs_write_rval = 0;
int pfs_paranoid_mode = 0;
const char *pfs_write_rval_file = "parrot.rval";
int pfs_enable_small_file_optimizations = 1;
char pfs_temp_dir[PFS_PATH_MAX];
Expand Down Expand Up @@ -190,6 +192,7 @@ static void show_use( const char *cmd )
printf(" -o <file> Send debugging messages to this file. (PARROT_DEBUG_FILE)\n");
printf(" -O <bytes> Rotate debug files of this size. (PARROT_DEBUG_FILE_SIZE)\n");
printf(" -p <hst:p> Use this proxy server for HTTP requests. (HTTP_PROXY)\n");
printf(" -P Enable paranoid mode for identity boxing mode.\n");
printf(" -Q Inhibit catalog queries to list /chirp.\n");
printf(" -r <repos> CVMFS repositories to enable. (PARROT_CVMFS_REPO)\n");
printf(" -R <cksum> Enforce this root filesystem checksum, where available.\n");
Expand Down Expand Up @@ -252,30 +255,6 @@ static void ignore_signal( int sig )
{
}

/*
It would be nice if we could clean up everyone quietly and then
some time later, kill hard. However, on Linux, if someone kills
us before we have a chance to clean up, then due to a "feature"
of ptrace, all our children will be left stuck in a debug-wait
state. So, rather than chance ourselves getting killed, we
will be very agressive about cleaning up. Upon receiving any
shutdown signal, we immediately blow away everyone involved,
and then kill ourselves.
*/

static void kill_everyone( int sig )
{
debug(D_NOTICE,"received signal %d (%s), killing all my children...",sig,string_signal(sig));
pfs_process_killall();
debug(D_NOTICE,"sending myself %d (%s), goodbye!",sig,string_signal(sig));
while(1) {
signal(sig,SIG_DFL);
sigsetmask(~sigmask(sig));
kill(getpid(),sig);
kill(getpid(),SIGKILL);
}
}

void pfs_abort()
{
kill(getpid(),SIGTERM);
Expand Down Expand Up @@ -416,14 +395,14 @@ int main( int argc, char *argv[] )
debug_config_fatal(pfs_process_killall);
debug_config_getpid(pfs_process_getpid);

install_handler(SIGQUIT,kill_everyone);
install_handler(SIGILL,kill_everyone);
install_handler(SIGABRT,kill_everyone);
install_handler(SIGIOT,kill_everyone);
install_handler(SIGBUS,kill_everyone);
install_handler(SIGFPE,kill_everyone);
install_handler(SIGSEGV,kill_everyone);
install_handler(SIGTERM,kill_everyone);
install_handler(SIGQUIT,pfs_process_kill_everyone);
install_handler(SIGILL,pfs_process_kill_everyone);
install_handler(SIGABRT,pfs_process_kill_everyone);
install_handler(SIGIOT,pfs_process_kill_everyone);
install_handler(SIGBUS,pfs_process_kill_everyone);
install_handler(SIGFPE,pfs_process_kill_everyone);
install_handler(SIGSEGV,pfs_process_kill_everyone);
install_handler(SIGTERM,pfs_process_kill_everyone);
install_handler(SIGHUP,pass_through);
install_handler(SIGINT,pass_through);
install_handler(SIGTTIN,control_terminal);
Expand Down Expand Up @@ -521,7 +500,7 @@ int main( int argc, char *argv[] )

sprintf(pfs_temp_dir,"/tmp/parrot.%d",getuid());

while((c=getopt(argc,argv,"+hA:a:b:B:c:Cd:DFfG:Hi:I:kKl:m:M:N:o:O:p:Qr:R:sSt:T:U:u:vw:WY"))!=(char)-1) {
while((c=getopt(argc,argv,"+hA:a:b:B:c:Cd:DFfG:Hi:I:kKl:m:M:N:o:O:p:PQr:R:sSt:T:U:u:vw:WY"))!=(char)-1) {
switch(c) {
case 'a':
if(!auth_register_byname(optarg)) {
Expand Down Expand Up @@ -594,6 +573,9 @@ int main( int argc, char *argv[] )
case 'p':
setenv("HTTP_PROXY",optarg,1);
break;
case 'P':
pfs_paranoid_mode = 1;
break;
case 'Q':
chirp_global_inhibit_catalog(1);
break;
Expand Down Expand Up @@ -666,6 +648,16 @@ int main( int argc, char *argv[] )

if(pfs_use_helper) pfs_helper_init(argv[0]);

pid_t pfs_watchdog_pid = -2;
if (pfs_paranoid_mode) {
pfs_watchdog_pid = pfs_paranoia_setup();
if (pfs_watchdog_pid < 0) {
fatal("couldn't initialize paranoid mode.");
} else {
debug(D_PROCESS,"watchdog PID %d",pfs_watchdog_pid);
}
}

pfs_poll_init();

/*
Expand All @@ -686,6 +678,7 @@ int main( int argc, char *argv[] )
if(pid>0) {
debug(D_PROCESS,"pid %d started",pid);
} else if(pid==0) {
pfs_paranoia_payload();
setpgrp();
tracer_prepare();
kill(getpid(),SIGSTOP);
Expand Down Expand Up @@ -733,7 +726,13 @@ int main( int argc, char *argv[] )
flags = WUNTRACED|__WALL|WNOHANG;
}
pid = wait4(trace_this_pid,&status,flags,&usage);
if(pid>0) {
if (pid == pfs_watchdog_pid) {
if (WIFEXITED(status) || WIFSIGNALED(status)) {
debug(D_NOTICE,"watchdog died unexpectedly; killing everyone");
pfs_process_kill_everyone(SIGKILL);
break;
}
} else if(pid>0) {
handle_event(pid,status,usage);
} else {
break;
Expand Down Expand Up @@ -768,6 +767,8 @@ int main( int argc, char *argv[] )
#endif
}

if(pfs_paranoid_mode) pfs_paranoia_cleanup();

if(WIFEXITED(root_exitstatus)) {
status = WEXITSTATUS(root_exitstatus);
debug(D_PROCESS,"%s exited normally with status %d",argv[optind],status);
Expand Down
Loading

0 comments on commit 929faa8

Please sign in to comment.