This archive contains answers to questions sent to Unidata support through mid-2025. Note that the archive is no longer being updated. We provide the archive for reference; many of the answers presented here remain technically correct, even if somewhat outdated. For the most up-to-date information on the use of NSF Unidata software and data services, please consult the Software Documentation first.
=============================================================================== Robb Kambic Unidata Program Center Software Engineer III Univ. Corp for Atmospheric Research address@hidden WWW: http://www.unidata.ucar.edu/ =============================================================================== ---------- Forwarded message ---------- Date: Wed, 17 May 2000 13:17:14 -0600 From: Doug Hunt <address@hidden> To: address@hidden Subject: troubles stopping ldm with ldmadmin on linux Hi all: I have recently been having troubles stopping LDM via 'ldmadmin stop' on linux. The ldmadmin script seems to not check correctly if all LDM kids are killed off. The result is that after an 'ldmadmin stop', one must wait for a minute or so for all rpc.ldmd children to die. If one tries 'ldmadmin start' during this time, it hangs... I have made a small patch to 'ldmadmin' which seems to clean up this problem. Instead of just killing off the rpc.ldmd process group leader, it kills off all the kids too. Attached is the new ldmadmin script. Regards, Doug Hunt -- address@hidden Software Engineer III UCAR - COSMIC Tel. (303) 497-2611
#!/bin/perl # # $Id: ldmadmin.in,v 1.45 1999/08/04 20:40:17 rkambic Exp $ # # File: ldmadmin # # Copyright 1995 University Corporation for Atmospheric Research # See ../COPYRIGHT file for copying and redistribution conditions. # # Description: This perl script provides a command line interface to LDM5 # programs. # # Files: # # $ldmhome/ldm.pid file containing process group ID # /tmp/.ldmadmin.lck lock file for operations that modify the LDM # # Environment Variables: # # Usage: # # ldmadmin command [options] [conf_file] # # commands: # # start [-v] [-q queue_path] # stop # restart [-v] [-q queue_path] # mkqueue [-v] [-c] [-q queue_path] [-s size] # delqueue [-q queue_path] # mksurfqueue [-v] [-c] [-q queue_path] [-s size] # delsurfqueue [-q queue_path] # newlog [-n numlogs] [-l logfile] # dostats # scour # isrunning # check [-t hours] # pqactcheck [-p pqact.conf] # pqactHUP # queuecheck # watch [-f feedset] # ps # config # log # tail # clean # usage # ############################################################################### # # get the existing runtime environment # ($os,$hostname,$version) = split(/ /,`uname -a`); ############################################################################### # CONFIGURATION SECTION ############################################################################### # the fully qualified hostname of the machine. $hostname = "typhoon.cosmic.ucar.edu"; # set perl location $perl = "/bin/perl"; # LDM home directory. $ldmhome = "/usr/local/ldm"; # UDUNITS PATH # # If you will be running the gribtonc(1) decoder, you should set this path # to the location of the udunits.dat file used by the udunits package. (i.e. # $udunits = "/usr/local/etc/udunits.dat"). This should only need to be done # if you installed the udunits library from the binary distribution, or if you # built the udunits library from source code and moved the udunits.dat file # to a different location afterwards. # #$udunits = "/usr/local/ldm/etc/udunits.dat"; # product queue size. By default this is set to 100MBytes. This should be # sufficient size to hold one hours worth of data from the HRS,DDS,PPS,IDS, # and MCIDAS data streams. $pq_size = 100000000; # product queue size for pqsurf. By defualt this is set to 2MBytes. You # probably won't need to change this. $surf_size = 2000000; # defualt number of logs to rotate with the newlog command $numlogs = 4; # file paths - everything here is based on the ldmhome variable by default. $bin_path = "$ldmhome/bin"; $etc_path = "$ldmhome/etc"; $log_path = "$ldmhome/logs"; $data_path = "$ldmhome/data"; $pq_path = "$data_path/ldm.pq"; $surf_path = "$data_path/pqsurf.pq"; # ldmadmin file locations and names $pid_file = "$ldmhome/ldmd.pid"; $lock_file = "/tmp/.ldmadmin.lck"; $log_file = "$log_path/ldmd.log"; $ldmd_conf = "$etc_path/ldmd.conf"; $pqact_conf = "$etc_path/pqact.conf"; $scour_file = "$etc_path/scour.conf"; # set this to 0 if you don't want the ldm log files rotated whenever you # start or restart the ldm. $log_rotate = 1; ############################################################################### # END OF CONFIGURATION - You should not need to change anything below this # point. ############################################################################### # # we need added runtime stuff here $progname = "ldmadmin"; # for feedhere $ENV{'LDMHOSTNAME'} = "$hostname"; $ENV{'PATH'} = "$bin_path:/bin:/usr/bin:/usr/sbin:/sbin:/usr/ucb:/usr/usb:/usr/etc:/etc:$ENV{'PATH'}"; # a few more parameters $log_hours = 24; $feedset = "ANY"; # set up the UDUNITS environment variable if needed if (defined $udunits) { $ENV{'UDUNITS'} = "$udunits"; } # we want a flush after every print statement $| = 1; # # get the command, error if no command specified # $_ = $ARGV[0]; shift; $command = $_; while ($_ = $ARGV[0]) { shift; /^([a-z]|[A-Z]|\/)/ && ($ldmd_conf = $_); /^-q/ && ($q_path = shift); /^-s/ && ($q_size = shift); /^-c/ && $pq_clobber++; /^-v/ && $verbose++; /^-n/ && ($numlogs = shift); /^-l/ && ($log_file = shift); /^-t/ && ($log_hours = shift); /^-f/ && ($feedset = shift); /^-p/ && ($pqact_conf = shift); } if (!$command) { print_usage(); } # # process the command request # if ($command eq "start") { # start the ldm $status = start_ldm(); } elsif ($command eq "stop") { # stop the ldm $status = stop_ldm(); } elsif ($command eq "restart") { # restart the ldm $status = restart_ldm(); } elsif ($command eq "mkqueue") { # create a product queue using pqcreate(1) $status = make_pq(); } elsif ($command eq "delqueue") { # delete a product queue $status = delete_pq(); } elsif ($command eq "mksurfqueue") { # create a product queue for pqsurf(1) $status = make_surf_pq(); } elsif ($command eq "delsurfqueue") { # delete a pqsurf product queue $status = del_surf_pq(); } elsif ($command eq "newlog") { # rotate the log files make_lockfile(); $status = new_log(); rm_lockfile(); } elsif ($command eq "dostats") { # mail stats to Unidata `$perl $bin_path/mailpqstats -d $log_path -h $hostname`; $status = $?; } elsif ($command eq "scour") { # scour data directories `scour $scour_file`; $status = $?; } elsif ($command eq "isrunning") { # check if the ldm is running $status = check_running(); } elsif ($command eq "check") { # analyze the log files exec("$bin_path/ldmcheck -d $log_path"); } elsif ($command eq "watch") { # monitor incoming products exec("$bin_path/pqutil -f $feedset -w $pq_path"); } elsif ($command eq "pqactcheck") { # check pqact file for errors ldmadmin_pqactcheck(); } elsif ($command eq "pqactHUP") { # HUP pqact ldmadmin_pqactHUP(); } elsif ($command eq "queuecheck") { # check queue for corruption $status = ldmadmin_queuecheck(); } elsif ($command eq "ps") { # get the ldm process information ldmadmin_ps(); } elsif ($command eq "config") { # show the ldm configuration $status = ldm_config(); } elsif ($command eq "log") { # do a more on the logfile system("more","$log_file"); $status = $?; } elsif ($command eq "tail") { # do a tail -f on the logfile system("tail","-f","$log_file"); $status = $?; } elsif ($command eq "clean") { # rm lockfile and ldmd.pid file system("rm -f $lock_file $pid_file"); $status = $?; } elsif ($command eq "usage") { # print usage message $status = print_usage(); } else { # bad command $status = print_usage(); } # # that's all folks # exit $status; ############################################################################### # bad_exit error routine. Writes error to both stderr and via syslogd. ############################################################################### sub bad_exit { my($err_str) = @_; my($date_str) = get_date(); # remove the lockfile if it exists if (-e $lock_file) { rm_lockfile(); } # output to standard error print STDERR "$date_str $hostname $progname[$<]: $err_str\n"; # exit with extreme prejudice exit 1; } ############################################################################### # Date Routine. Gets data and time as GMT in the same format as the LDM log # file. ############################################################################### sub get_date { @month_array = (Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec); my($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time()); my($date_string) = sprintf("%s %d %02d:%02d:%02d UTC", $month_array[$mon], $mday, $hour, $min,$sec); return $date_string; } ############################################################################### # Print a usage message and exit. Should only be called when the command is # usage, or command line arguments are bad or missing. ############################################################################### sub print_usage { print "\n$progname\n"; print "Usage: $progname command [options] [conf_file]"; print "\n\ncommands:"; print "\n\tstart [-v] [-q q_path]\t\t\tStart the LDM"; print "\n\tstop\t\t\t\t\tStop the LDM"; print "\n\trestart [-v] [-q q_path]\t\tRestart a running LDM"; print "\n\tmkqueue [-v] [-c] [-q q_path]\tCreate a product queue"; print "\n\tdelqueue [-q q_path]\t\t\tDelete a product queue"; print "\n\tmksurfqueue [-v] [-c] [-q q_path]"; print "\n\t\t\t\t\t\tCreate a product queue"; print "\n\t\t\t\t\t\tfor pqsurf"; print "\n\tdelsurfqueue [-q q_path]\t\tDelete a pqsurf product queue"; print "\n\tnewlog [-n numlogs] [-l logfile]\tRotate a log file"; print "\n\tdostats\t\t\t\t\tMail statistics to Unidata"; print "\n\tscour\t\t\t\t\tScour data directories"; print "\n\tisrunning\t\t\t\tExit status 0 if LDM is running,"; print "\n\t\t\t\t\t\t else exit 1"; print "\n\tcheck [-t hours]\t\t\tAnalyze the LDM log files"; print "\n\tpqactcheck [-p pqact_conf]\t\tCheck syntax for pqact files"; print "\n\tpqactHUP\t\t\t\t\tSend HUP signal to pqact program"; print "\n\tqueuecheck\t\tCheck for queue corruption"; print "\n\twatch [-f feedpat]\t\t\tMonitor incoming products"; print "\n\tps\t\t\t\t\tPrint LDM process information"; print "\n\tconfig\t\t\t\t\tPrint LDM configuration"; print "\n\tlog\t\t\t\t\tPage through the LDM log file"; print "\n\ttail\t\t\t\t\tMonitor the LDM log file"; print "\n\tclean\t\t\t\t\tRemoves lock and pid files"; print "\n\tusage\t\t\t\t\tThis message\n"; print "\n\noptions:"; print "\n\t-v\t\tTurn on verbose mode"; print "\n\t-c\t\tClobber an exisiting product queue"; print "\n\t-q q_path\tSpecify a product queue path"; print "\n\t\t\t Default $pq_path for LDM"; print "\n\t\t\t Default $surf_path for pqsurf"; print "\n\t-n numlogs\tNumber of logs to rotate"; print "\n\t\t\t Default $numlogs"; print "\n\t-l logfile\tName of logfile"; print "\n\t\t\t Default $log_file"; print "\n\t-t hours\tNumber of hours to apply to command"; print "\n\t\t\t Default $log_hours"; print "\n\t-f feedset\tFeed set to use with command"; print "\n\t\t\t Default $feedset"; print "\n\nconf_file:"; print "\n\twhich ldmd.conf file to use"; print "\n\t Default $ldmd_conf"; print "\n"; # force the exit exit 1; # assumption is that this routine is called # because of incorrect usage. } ############################################################################### # check for the existence of the lock file. Exit if found, create if not # found. ############################################################################### sub make_lockfile { if (-e $lock_file) { bad_exit("make_lockfile: another ldmadmin process exists"); } open(LOCKFILE,">$lock_file") || bad_exit("make_lockfile: Can't open lock file $lock_file"); close(LOCKFILE); } ############################################################################### # remove a lock file. exit if not found. ############################################################################### sub rm_lockfile { if (-e $lock_file) { unlink($lock_file); } else { bad_exit("rm_lockfile: Lock file does not exist"); } } ############################################################################### # create a product queue ############################################################################### sub make_pq { # lock file check make_lockfile(); # can't do this while there is a server running if (!check_running()) { bad_exit("make_pq: There is a server running, mkqueue aborted"); } # set path and size if necessary if ($q_path) { $pq_path = $q_path; } # build the command line $cmd_line = "pqcreate"; if ($verbose) { $cmd_line .= " -v"; } if ($pq_clobber) { $cmd_line .= " -c"; } $cmd_line .= " -q $pq_path -s $pq_size"; # execute pqcreate `$cmd_line`; if ($?) { rm_lockfile(); bad_exit("make_pq: mkqueue failed"); return 1; } # remove the lockfile rm_lockfile(); return 0; } ############################################################################### # delete a product queue - this needs to use isrunning once it is written ############################################################################### sub delete_pq { # lock file check make_lockfile(); # check to see if the server is running. Exit if it is if (!check_running()) { bad_exit("delete_pq: A server is running, cannot delete the queue"); } # check for queue_path if ($q_path) { $pq_path = $q_path; } # kill the queue if (-e $pq_path) { unlink($pq_path); } else { bad_exit("delete_pq: $pq_path does not exist"); } # remove the lock file rm_lockfile(); return 0; } ############################################################################### # create a pqsurf product queue ############################################################################### sub make_surf_pq { # lock file check make_lockfile(); # can't do this while there is a server running if (!check_running()) { bad_exit("make_surf_pq: There is a server running, mkqueue aborted"); } # set path and size if necessary if ($q_path) { $surf_path = $q_path; } if ($q_size) { $surf_size = $q_size; } # need the number of slots to create $surf_slots = $surf_size / 1000000 * 6881; # build the command line $cmd_line = "pqcreate"; if ($verbose) { $cmd_line .= " -v"; } if ($pq_clobber) { $cmd_line .= " -c"; } $cmd_line .= " -S $surf_slots -q $surf_path -s $surf_size"; # execute pqcreate `$cmd_line`; $retval = $?; # remove the lockfile rm_lockfile(); return $retval; } ############################################################################### # delete a pqsurf product queue ############################################################################### sub del_surf_pq { # lock file check make_lockfile(); # check to see if the server is running. Exit if it is if (!check_running()) { bad_exit("del_surf_pq: A server is running, cannot delete the queue"); } # check for the queue path if ($q_path) { $surf_path = $q_path; } # kill the queue if (-e $surf_path) { unlink($surf_path); } else { bad_exit("del_surf_pq: $surf_path does not exist"); } # remove the lock file rm_lockfile(); return 0; } ############################################################################### # start the LDM server ############################################################################### sub start_ldm { my($loopcount) = 1; print "starting the LDM server...\n"; # create the lockfile make_lockfile(); # make sure there is no other server running if (!check_running()) { bad_exit("start_ldm: There is another server running, start aborted"); } if (!check_registered()) { bad_exit("start_ldm: There is another server registered with the portmapper, start aborted"); } # make sure we have a product queue in place if (!-e $pq_path) { bad_exit("product queue, $pq_path, does not exist"); } # if log_rotate is other than 0, rotate the ldm logs if ($log_rotate) { new_log(); } # build the command line $cmd_line = "rpc.ldmd"; if ($verbose) { $cmd_line .= " -v"; } $cmd_line .= " -q $pq_path $ldmd_conf > $pid_file"; `$cmd_line`; # check to make sure things are running do { if($loopcount > 65) { bad_exit("start_ldm: Server not started or registered."); } $loopcount++; sleep($loopcount); } while(check_running() || check_registered()); print "the LDM server has been started\n"; # remove the lockfile rm_lockfile(); return 0; } ############################################################################### # stop the LDM server ############################################################################### sub stop_ldm { my($loopcount) = 1; print "stopping the LDM server...\n"; # create the lockfile make_lockfile(); # handle linux better... if ($os eq 'Linux') { my @pids = split(' ', `pidof rpc.ldmd`); foreach my $pid (@pids) { system "kill $pid"; } } else { # get pid $rpc_pid = getPid() ; # kill the server and associated processes system( "kill $rpc_pid" ) if( $rpc_pid != -1 ) ; } # we may need to sleep to make sure that the port is deregistered # Beware the inverse logic of check_registered() and check_running() do { if($loopcount > 65) { bad_exit("stop_ldm: Server not dead."); } $loopcount++; sleep($loopcount); } while(!check_running() && !check_registered()); print "LDM server stopped\n"; # remove the lockfile rm_lockfile(); # get rid of the pid file unlink($pid_file); return 0; } ############################################################################### # rotate the specified log file, keeping $numlog files ############################################################################### sub new_log { # rotate the log `newlog $log_file $numlogs`; # if rotation successful, notify syslogd if ($?) { bad_exit("new_log: log rotation failed"); } else { `hupsyslog`; } return 0; } ############################################################################### # print the LDM configuration information ############################################################################### sub ldm_config { print "\nhostname: $hostname\n"; print "ldmhome: $ldmhome\n"; print "bin path: $bin_path\n"; print "conf file: $ldmd_conf\n"; print "log file: $log_file\n"; print "data path: $data_path\n"; print "product queue: $pq_path\n"; print "queue size: $pq_size bytes\n"; print "PID file: $pid_file\n"; print "LDMHOSTNAME: $ENV{'LDMHOSTNAME'}\n"; print "PATH: $ENV{'PATH'}\n\n"; return 0; } ############################################################################### # check if the LDM is registered with the local portmapper. # Return 0 if so, 1 if not. ############################################################################### sub check_registered { $rpcinfo_cmd = "rpcinfo -t localhost 300029"; `$rpcinfo_cmd 5 > /dev/null 2>&1`; if($?) { `$rpcinfo_cmd 4 > /dev/null 2>&1`; if($?) { return 1; } } return 0; } ############################################################################### # check if the LDM is running. return 0 if running, 1 if not. ############################################################################### sub check_running { if ($os eq 'Linux') { my @pids = split (' ', `pidof rpc.ldmd`); return (@pids == 0); } my($pid_num, $ps_cmd); $pid_num = getPid() ; return 1 if( $pid_num == -1 ) ; if (($os eq "SunOS" && $version =~ /^4/) || $os eq "Linux" ) { $ps_cmd = "ps $pid_num | grep rpc.ldmd | grep -v grep"; } else { $ps_cmd = "ps -p $pid_num | grep rpc.ldmd | grep -v grep"; } `$ps_cmd`; if ($?) { return 1; } else { return 0; } } ############################################################################### # get PID number. return pid or -1 ############################################################################### sub getPid { my( $i, @F, $pid_num ) ; if (-e $pid_file) { open(PIDFILE,"<$pid_file"); $pid_num = <PIDFILE>; chomp( $pid_num ); close( PIDFILE ) ; return $pid_num if( $pid_num =~ /^\d{1,6}/ ) ; } #the hard way if ($os eq "SunOS" && $version =~ /^4/) { open( IN, "ps -gawxl |" ) || bad_exit("ps: Cannot open ps"); $default = 3 ; } elsif( $os eq "Linux") { open( IN, "ps ajx |" ) || bad_exit("ps: Cannot open ps"); $default = 0 ; } else { open( IN, "ps -eaf |" ) || bad_exit("ps: Cannot open ps"); $default = 2 ; } # each platform has fields in different order, looking for PPID $_ = <IN> ; s/^\s*([A-Z].*)/\1/ ; $index = -1 ; ( @F ) = split( /[ \t]+/, $_ ) ; for( $i = 0; $i <= $#F; $i++ ) { if( $F[ $i ] =~ /PPID/i ) { $index = $i ; last ; } } $index = $default if( $index == -1 ) ; @F = ( ) ; # search through all processes, looking for parent of pqexpire while( <IN> ) { next unless( /pqexpire/ ) ; # get parent of pqexpire s/^\s*([a-z0-9].*)/\1/ ; ( @F ) = split( /[ \t]+/, $_ ) ; last ; } close( IN ) ; # no pid, no ldm running return -1 if( $#F == -1 ) ; return $F[ $index ] ; } ############################################################################### # restart the LDM, stopping it first if it is already running. ############################################################################### sub restart_ldm { stop_ldm(); sleep( 15 ); start_ldm(); } ############################################################################### # list processes running under the current LDM process group ############################################################################### sub ldmadmin_ps { my( $gpid_num, $ps_cmd, $ps_output ); if (check_running()) { bad_exit("ldmadmin_ps: no LDM server is running"); } $gpid_num = getPid() ; if ($os eq "SunOS" && $version =~ /^4/) { $ps_cmd = "ps -agwxj | egrep $gpid_num"; } elsif( $os eq "Linux") { $ps_cmd = "ps ajx | grep $gpid_num" ; } else { $ps_cmd = "ps -lfg $gpid_num"; } $ps_output = `$ps_cmd`; print "$ps_output\n"; } ############################################################################### # Check the pqact.conf file for errors ############################################################################### sub ldmadmin_pqactcheck { my( @output ) ; ( @output ) = `pqact -vl - -q /dev/null $pqact_conf 2>&1` ; if( $output[ 1 ] =~ /Successfully read/ ) { print "$pqact_conf is syntactically correct\n" ; } else { print "$output[1]\n" } } ############################################################################### # HUP the pqact program ############################################################################### sub ldmadmin_pqactHUP { if ($os eq "SunOS" && $version =~ /^4/) { open( IN, "ps -gawxl |" ) || bad_exit("ps: Cannot open ps"); $default = 0 ; } elsif( $os eq "Linux") { open( IN, "ps ajx |" ) || bad_exit("ps: Cannot open ps"); $default = 1 ; } else { open( IN, "ps -eaf |" ) || bad_exit("ps: Cannot open ps"); $default = 1 ; } # each platform has fields in different order, looking for PID $_ = <IN> ; s/^\s*([A-Z].*)/\1/ ; $index = -1 ; ( @F ) = split( /[ \t]+/, $_ ) ; for( $i = 0; $i <= $#F; $i++ ) { next if( $F[ $i ] =~ /PPID/i ) ; if( $F[ $i ] =~ /PID/i ) { $index = $i ; last ; } } $index = $default if( $index == -1 ) ; @F = ( ) ; # search through all processes, looking for pqact while( <IN> ) { next unless( /pqact/ ) ; # get parent of pqexpire s/^\s*([a-z0-9].*)/\1/ ; ( @F ) = split( /[ \t]+/, $_ ) ; $pqactPid .= " $F[ $index ]" ; } close( IN ) ; print "Check pqact HUP with command ldmadmin tail\n" ; system( "kill -HUP $pqactPid" ); } ############################################################################### # Check the queue file for errors ############################################################################### sub ldmadmin_queuecheck { `ulimit -f 0` ; `pqcat -l - > /dev/null 2>&1` ; if( $? ) { return 1; } else { return 0; } }