#!/bin/sh # # This script will allow privileged users to get all pertinent # tape information when tape problems arise. As with all scripts # which are "universally" written please test this out on your # machine. If you have any comments or suggestions please email # either Frithjov Iversen (fi@crayamid) or Bob Rekieta (bar@crayamid). # # Jan 92 - original version, derived from CCN "tpdfixup" script # Dec 92 - modified to include more info, more options # Apr 94 - fixed syntax error in the crayels function definition # discovered in unicos 8.0. # Jul 95 - add errfile, add message for ELS to save whatmic output # Dec 95 - remove reference to /usr/adm/ldr_config and add j90 to # crayels test. # #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ # Shell functions #------------------------------------------------------------------------------ crayels () { crayympel || crayympj90 } model_d () { cray2 || crayympe || crayels; test $? -ne 0 } #------------------------------------------------------------------------------ # Set defaults, process options, provide help #------------------------------------------------------------------------------ MYID="`/bin/id | sed 's/).*$//;s/^.*(//'`" MAILGROUP="$MYID@`hostname`" RESTART=no COLLECT=yes SOLICIT=yes /bin/tty -s || SOLICIT=no CRASH=/etc/crash TPDFIXUP=`type $0 | cut -f3 -d" "` test "$TPDFIXUP" = "found" && TPDFIXUP=$0 PARAMS="$@" USAGE="Usage: `basename $TPDFIXUP` [-ntrR] [-p path] [-c \"crash dump unicos\"]" USAGE="$USAGE [email...]" while getopts ntrRp:c:C OPTION do case "$OPTION" in n) SOLICIT=no ;; t) COLLECT=no ;; r) RESTART=yes ;; R) RESTART=yes COLLECT=no SOLICIT=no ;; p) if [ -d "$OPTARG" ] then DIR=$OPTARG else echo "WARNING - $OPTARG not a directory, -p ignored" fi ;; c) CRASH=$OPTARG RESTART=no test "`echo $CRASH | wc -w`" -gt 1 ;; C) CRASH="./crash* ./[dc][uo]*[mr][pe]* ./unicos*" RESTART=no ;; ?) echo "$USAGE" exit 1 ;; esac done shift `expr $OPTIND - 1` if [ $# -ne 0 ] then MAILGROUP="$@" fi #------------------------------------------------------------------------------ # The caller must be superuser for everything to work properly. #------------------------------------------------------------------------------ CRASH_AVAIL=yes id | grep '^uid=0(' >/dev/null if [ $? -ne 0 ]; then echo "WARNING - you are not superuser. Parts of this script may fail." if [ "$CRASH" = "/etc/crash" ] then if [ -x /etc/crash -a -r /dev/mem -a -r /unicos ] then : else echo "WARNING - $CRASH is not usable from your account" echo "WARNING - 'ps' will be used instead to collect info" CRASH_AVAIL=no fi fi fi #------------------------------------------------------------------------------ # Here we go #------------------------------------------------------------------------------ echo "***" echo "*** Now executing $TPDFIXUP $PARAMS" echo "***" #------------------------------------------------------------------------------ # Figure out which columns in the output of ps and crash correspond to # the process id, parent process id, and priority. The fields may vary # from machine to machine. Put values into the environment variables # PS_PID, PS_PPID, PS_PRI, CRASH_PID, CRASH_PPID, CRASH_PRI, CRASH_EVENT. #------------------------------------------------------------------------------ eval `ps -lf | head -1 | awk ' { for (i = 0; i <= NF; i++) { if ($i == "PID" || $i == "PPID" || \ $i == "PRI" || $i == "WCHAN" || $i == "COMMAND") printf("PS_%s=%d\n",$i,i) } }` if [ "$CRASH_AVAIL" = "yes" ] then eval `echo proc | $CRASH 2>/dev/null | grep PPID | awk ' { for (i = 0; i < NF; i++) { if ($i == "PID" || $i == "PPID" || \ $i == "PRI" || $i == "EVENT" || $i == "NAME") printf("CRASH_%s=%d\n",$i,i) } }` fi #------------------------------------------------------------------------------ # Determine TEMPDIR first. #------------------------------------------------------------------------------ TEMPDIR=/usr/spool/tape TEMPDIR=`(if cray2; then grep TEMPDIR /usr/include/tapereq.h else grep TEMPDIR /usr/include/tapedef.h fi) | awk '{print $3}' | sed 's/"//g'` #------------------------------------------------------------------------------ # Determine the machine and the system level, directory for info #------------------------------------------------------------------------------ MACHINE=`/bin/hostname` LEVEL=`uname -a | cut -d" " -f3` if [ -z "$DIR" ] then DIR="$TEMPDIR/tmp/tp.`date +%m%d.%H%M%S`" mkdir -p $DIR if [ "$?" -ne 0 ] then DIR="/tmp/tp.`date +%m%d.%H%M%S`" mkdir -p $DIR if [ "$?" -ne 0 ] then DIR="$TMPDIR/tp.`date +%m%d.%H%M%S`" mkdir -p $DIR fi fi fi chmod 777 $DIR MAILFILE="${DIR}/mailfile" COMMENTFILE="${DIR}/comments" echo "*** tpdfixup run on $MACHINE $LEVEL by $MYID at `/bin/date`" \ | tee -a $MAILFILE echo "*** Copying information to $DIR" | tee -a $MAILFILE #------------------------------------------------------------------------------ # Determine TAPECONFIG #------------------------------------------------------------------------------ TAPECONFIG=/etc/config/tapeconfig grep "tape config file is" $TEMPDIR/daemon.stderr | while read line do TAPECONFIG=`echo $line | sed 's/"$//;s/^.*"//'` done #------------------------------------------------------------------------------ # Determine SAVEPFX, TRACEPFX, NDEVS from tape configuration #------------------------------------------------------------------------------ echo "*** Extracting information from configuration files" | tee -a $MAILFILE SAVEPFX=$TEMPDIR/save/trace TRACEPFX=$TEMPDIR/trace NDEVS=18 eval `while read key rest do case "$key" in SAVEPFX) echo SAVEPFX=$rest ;; TRACEPFX) echo TRACEPFX=$rest ;; -DEVICES) ndev=2 while read line do case "$line" in -*) break ;; "#*") ;; *) ndev=\`expr $ndev + 1 \` esac done echo NDEVS=$ndev ;; esac done < $TAPECONFIG` #------------------------------------------------------------------------------ # Pull constants we need later out of .h files #------------------------------------------------------------------------------ echo "*** Extracting information from include files" | tee -a $MAILFILE TRSIZE=1536 BMXTAB=72 UNICOS8=no UNICOS7=no cat > $DIR/get_const.c <<\! #include #include #if CRAY1 #include #include #include #include #include #include #endif #include #include #include #include #include #include main() { struct proc p; #if CRAY1 #if UNICOS_LVL >= '7.0' printf("UNICOS7=yes\n"); #endif #if UNICOS_LVL < '7.1' printf("TRSIZE=%d\n",BMX_TRSIZE); printf("BMXTAB=%d\n",sizeof(struct bmxtab) / NBPW); #else printf("TRSIZE=%d\n",TPD_TRSIZE); printf("BMXTAB=%d\n",sizeof(struct tpdtab) / NBPW); printf("UNICOS8=yes\n"); #endif printf("SN=%d\n",SN); printf("PRFRZD=%d\n",((int) &p.p_pcomm.pc_frzdepth - (int) &p)); #endif printf("PRSIZE=%d\n",sizeof(struct proc) / NBPW); } ! (cd $DIR; /bin/cc -o get_const get_const.c) && eval `$DIR/get_const 2>/dev/null` #------------------------------------------------------------------------------ # Set values for bmx kernel information (non-Cray2 only) #------------------------------------------------------------------------------ if cray2 then : else TRHDR_N=`expr $NDEVS \* 4 + 8` TRBUF_N=`expr $NDEVS \* $TRSIZE` BMXTB_N=`expr $NDEVS \* $BMXTAB` trhdr=bmxtrhdr trbuf=bmxtrbuf tbl=bmxtbl TRHDR_0=`echo od bmxtrhdr | $CRASH 2>/dev/null | \ head -1 | cut -f2 -d" "` if [ "$TRHDR_0" = "\"bmxtrhdr\"" ] then trhdr=tpdtrhdr trbuf=tpdtrbuf tbl=tpdtbl TRHDR_0=`echo od $trhdr | $CRASH 2>/dev/null | \ head -1 | cut -f2 -d" "` UNICOS8=yes fi if [ "$TRHDR_0" -eq 0 ] then TRHDR_0=$trhdr TRBUF_0=$trbuf BMXTB_0=$tbl else TRBUF_0=`echo od $trbuf | $CRASH 2>/dev/null | \ head -1 | cut -f2 -d" "` BMXTB_0=`echo od $tbl | $CRASH 2>/dev/null | \ head -1 | cut -f2 -d" "` fi fi #------------------------------------------------------------------------------ # Find tpdaemon and see what he is up to #------------------------------------------------------------------------------ echo "*** Determining status of tpdaemon" if [ "$CRASH_AVAIL" = yes ] then TPDAEMON=`echo proc | $CRASH 2>/dev/null | grep tpdaemon` TPDPRI=`echo $TPDAEMON | cut -f$CRASH_PRI -d" "` TPDCHN=`echo $TPDAEMON | cut -f$CRASH_EVENT -d" "` SELWAIT=`echo nm selwait | $CRASH 2>/dev/null | tail -1 | \ awk '{print $2}'` else TPDAEMON=`/bin/ps -elf | grep tpdaemon` TPDPRI=`echo $TPDAEMON | cut -f$PS_PRI -d" "` TPDCHN=`echo $TPDAEMON | cut -f$PS_WCHAN -d" "` SELWAIT=$TPDCHN fi if [ -z "$TPDAEMON" ] then TPD_ST=DOWN else if [ "$TPDCHN" = "$SELWAIT" -a "$TPDPRI" = 26 ] then TPD_ST=OK else TPD_ST=BUSY fi fi echo "*** Status of tpdaemon: $TPD_ST " | tee -a $MAILFILE #------------------------------------------------------------------------------ # If tpdaemon is "BUSY", look at special cases #------------------------------------------------------------------------------ if [ "$TPD_ST" = "BUSY" -a "$CRASH_AVAIL" = "yes" -a "$TPDCHN" -gt 0 ] then proctab=`echo nm proc | $CRASH 2>/dev/null | tail -1 | \ awk '{print $2}'` proctab=`echo "obase=10;ibase=8;$proctab" | /usr/bin/bc` nproc=`echo var | $CRASH 2>/dev/null | grep '^v_proc:' | \ sed 's/^v_proc://'` channel=`echo "obase=10;ibase=8;$TPDCHN" | /usr/bin/bc` procslt=`expr \( $channel - $proctab \) / $PRSIZE` procofs=`expr \( $channel - $proctab \) % $PRSIZE` if [ $procslt -gt 0 -a $procslt -lt $nproc ] then p=`echo "proc $procslt" | $CRASH 2>&1 | tail -1 | \ cut -f2 -d" "` echo "*** tpdaemon sleeps on proc[$procslt]+$procofs (pid $p)"\ | tee -a $MAILFILE if cray2 then : else if [ $procofs -eq $PRFRZD ] then echo "*** tpdaemon sleeps on pc_frzdepth !!" \ | tee -a $MAILFILE TPD_ST=OK echo "*** tpdaemon status set to $TPD_ST" \ | tee -a $MAILFILE fi fi fi fi #------------------------------------------------------------------------------ # Send mail describing what happened to all interested parties. #------------------------------------------------------------------------------ trap \ "/usr/bin/mailx -s \"tpdaemon on $MACHINE: $DIR\" $MAILGROUP < $MAILFILE;exit" \ 0 1 2 3 6 7 15 26 27 echo "*** Mail will be sent to: $MAILGROUP" | tee -a $MAILFILE #------------------------------------------------------------------------------ # Start of collection phase #------------------------------------------------------------------------------ if [ "$COLLECT" = "yes" ] then #------------------------------------------------------------------------------ # Save daemon traces and stderr/stdout #------------------------------------------------------------------------------ echo "*** Saving current daemon traces" | tee -a $MAILFILE cp $TRACEPFX.* $DIR 2>/dev/null || echo "*** cp failed" | tee -a $MAILFILE echo "*** Saving old daemon traces in save/" | tee -a $MAILFILE mkdir $DIR/save 2>/dev/null ; chmod 777 $DIR/save cp $SAVEPFX.* $DIR/save 2>/dev/null | tee -a $MAILFILE echo "*** Saving daemon stderr/stdout" | tee -a $MAILFILE cp $TEMPDIR/daemon.std* $DIR 2>&1 | tee -a $MAILFILE #------------------------------------------------------------------------------ # Save msglog #------------------------------------------------------------------------------ echo "*** Saving msglog" | tee -a $MAILFILE cp /usr/spool/msg/msglog.log $DIR 2>&1 | tee -a $MAILFILE #------------------------------------------------------------------------------ # Save pipes in TEMPDIR #------------------------------------------------------------------------------ echo "*** Saving pipes" | tee -a $MAILFILE (cd $TEMPDIR; /bin/ls | grep '\\' xargs -i find {} -type p -print 2>/dev/null) | \ cpio -pdumva $DIR 2>/dev/null | tee -a $MAILFILE #------------------------------------------------------------------------------ # Save a copy of tpdfixup #------------------------------------------------------------------------------ echo "*** Saving tpdfixup" | tee -a $MAILFILE cp $TPDFIXUP $DIR 2>&1 | tee -a $MAILFILE #------------------------------------------------------------------------------ # Save kernel device traces and kernel error messages (machine specific) #------------------------------------------------------------------------------ if cray2 then # Execute the '$CRASH' program and get the ctc information echo "*** Saving ctc information in ctc.out" | tee -a $MAILFILE echo "ctc -" | $CRASH > $DIR/crash.out 2>&1 # Execute the '/etc/krnout' program echo "*** Saving kernel output in krnout" | tee -a $MAILFILE /etc/krnout > $DIR/krnout 2>&1 else # Execute the '/etc/tpbmx' program echo "*** Saving tpbmx output in tpbmx.out" | tee -a $MAILFILE echo "*** /etc/tpbmx `date`" > $DIR/tpbmx.out /etc/tpbmx >> $DIR/tpbmx.out 2>&1 /etc/tpbmx -d >> $DIR/tpbmx.out 2>&1 # For Unicos 8, save formatted traces, otherwise save tables if [ "$UNICOS8" = "yes" ] then echo "*** Tape structures in tps.out" | tee -a $MAILFILE echo "tps" | $CRASH > $DIR/tps.out 2>&1 echo "tps -" | $CRASH >> $DIR/tps.out 2>&1 echo "*** Tape device traces in tpt.out" | tee -a $MAILFILE echo "tpt" | $CRASH > $DIR/tpt.out 2>&1 echo "tpt -t -" | $CRASH >> $DIR/tpt.out 2>&1 fi # Save kernel device traces echo "*** Kernel device traces in tpbmxtr.*" | tee -a $MAILFILE echo od $TRHDR_0 $TRHDR_N | $CRASH 2>/dev/null>$DIR/tpbmxtr.hdr echo od $TRBUF_0 $TRBUF_N | $CRASH 2>/dev/null>$DIR/tpbmxtr.buf echo od $BMXTB_0 $BMXTB_N | $CRASH 2>/dev/null>$DIR/tpbmxtbl.out # Save kernel error messages echo "*** Saving kernel output in daylog" | tee -a $MAILFILE cp /usr/adm/syslog/daylog $DIR 2>&1 | tee -a $MAILFILE fi #------------------------------------------------------------------------------ # Save configuration files #------------------------------------------------------------------------------ list="$TAPECONFIG " if cray2 then list="$list /usr/include/tapereq.h" else list="$list /usr/include/tapedef.h" fi for file in $list do cpy=`basename $file` if [ -r $file ] then echo "*** Copying $file to $cpy" | tee -a $MAILFILE cp $file $DIR/$cpy 2>&1 | tee -a $MAILFILE else echo "*** Cannot access $file" | tee -a $MAILFILE fi done #------------------------------------------------------------------------------ # Identify daemon processes and user tape processes #------------------------------------------------------------------------------ if [ "$CRASH_AVAIL" = "yes" ] then TPID=`echo $TPDAEMON | cut -f$CRASH_PID -d" "` PIDS=$TPID`echo proc | $CRASH 2>/dev/null | awk \ "{if (\\\$$CRASH_PPID == \"$TPID\") printf(\"|%d\",\\\$$CRASH_PID)}" echo ""` COMS=tpdaemon`echo proc | $CRASH 2>/dev/null | awk \ "{ if (\\\$$CRASH_PPID == \"$TPID\") printf(\" %s\",\\\$$CRASH_NAME)}" echo ""` else TPID=`echo $TPDAEMON | cut -f$PS_PID -d" "` PIDS=$TPID`ps -elf | awk \ "{if (\\\$$PS_PPID == \"$TPID\") printf(\"|%d\",\\\$$PS_PID) }" echo ""` COMS=tpdaemon`ps -elf | awk \ "{ if (\\\$$PS_PPID == \"$TPID\") printf(\" %s\",\\\$$PS_COMMAND)}" echo ""` fi if [ "$TPD_ST" = "OK" ] then if cray2 then tpstat=/usr/bin/tpstat else tpstat=/bin/tpstat fi echo "*** tpdaemon $TPD_ST - user job info from $tpstat" \ | tee -a $MAILFILE eval `$tpstat | tail +2 | cut -c10-14 | uniq | while read jobid do test "$jobid" = "" && continue jstat -j $jobid | tail +3 | cut -c2-6 | while read pid rest do echo 'PIDS=$PIDS\|'$pid done done` else if cray2 then if [ "$CRASH_AVAIL" = "yes" ] then echo "*** tpdaemon $TPD_ST - user job info from crash/ctc" \ | tee -a $MAILFILE eval `echo ctc | $CRASH 2>/dev/null | grep '^ Job ID' | \ sort | uniq | while read a1 a2 a3 jobid do test "$jobid" = "0" && continue jstat -j $jobid | tail +3 | cut -c2-6 | \ while read pid rest do echo 'PIDS=$PIDS\|'$pid done done` else echo "*** tpdaemon $TPD_ST - user job info omitted" \ | tee -a $MAILFILE fi else echo "*** tpdaemon $TPD_ST - user job info from /etc/tpbmx" \ | tee -a $MAILFILE PIDS=$PIDS`/etc/tpbmx -d | grep '^bmx_pid' | sort | uniq | \ awk '{if ($2 != 0) printf("|%d",$2) }'; echo ""` fi fi PIDS=`echo $PIDS | sed 's/^|//'` # for the case where tpdaemon is gone echo "*** daemon processes active: $COMS" | tee -a $MAILFILE echo "*** process id list is $PIDS" | tee -a $MAILFILE #------------------------------------------------------------------------------ # Save output from $CRASH #------------------------------------------------------------------------------ if [ "$CRASH_AVAIL" = "yes" ] then echo "*** Saving process information in crash.out" | tee -a $MAILFILE ( echo "status" if cray2 then echo "proc" else echo "proc -w" fi echo "od 0 2000" echo proc | $CRASH 2>/dev/null | tail +2 | egrep "$PIDS" | \ while read slot x do echo "user $slot" echo "stack $slot" done ) | while read command do echo "> $command (`date`)" >> $DIR/crash.out echo "$command" | $CRASH 2>/dev/null >> $DIR/crash.out done fi #------------------------------------------------------------------------------ # Look for a core file in TEMPDIR #------------------------------------------------------------------------------ if [ -r $TEMPDIR/core ] then echo "*** Saving core file" | tee -a $MAILFILE cp $TEMPDIR/core $DIR 2>&1 | tee -a $MAILFILE cp /usr/lib/tp/tpdaemon $DIR 2>&1 | tee -a $MAILFILE echo where | cdbx -L -c $TEMPDIR/core /usr/lib/tp/tpdaemon 2>&1 \ > $DIR/cdbx.where fi #------------------------------------------------------------------------------ # Copy /usr/adm/errfile #------------------------------------------------------------------------------ cp /usr/adm/errfile $DIR 2>&1 | tee -a $MAILFILE #------------------------------------------------------------------------------ # Save miscellaneous status displays - tpdaemon must be running # Allow $TIMEOUT minutes to collect all the information. # This is done last so in case tpdaemon was "BUSY" with something # earlier, we give him as much time as we can to finish it. #------------------------------------------------------------------------------ if [ "$TPD_ST" = "BUSY" ] then if [ "$CRASH_AVAIL" = "yes" ] then NUDAEMON=`echo proc | $CRASH 2>/dev/null | grep tpdaemon` else NUDAEMON=`ps -elf | grep tpdaemon` fi if [ "$NUDAEMON" != "$TPDAEMON" ] then TPD_ST=OK echo "*** tpdaemon status changed, set to OK" | tee -a $MAILFILE fi fi if [ "$TPD_ST" = "OK" ] then echo "*** Collecting output from daemon commands" | tee -a $MAILFILE TIMEOUT=1 echo "*** This step will be aborted automatically if not completed" echo "*** in $TIMEOUT minutes. Please wait." if cray2 then list=/usr/bin/tpstat else list=/bin/tpstat fi list="$list /etc/tpgstat /etc/tpdev /etc/tpmls /etc/tpmql" ( for cmd in $list do out=`basename $cmd`.out if [ -x $cmd ] then echo "*** $cmd --> $out" | tee -a $MAILFILE echo "*** $cmd `date`" > $DIR/$out $cmd 2>&1 >> $DIR/$out else echo "*** Not access to $cmd" | tee -a $MAILFILE fi done ) & statuspid=$! (sleep `expr $TIMEOUT \* 60` echo "*** command collection timed out" | tee -a $MAILFILE kill $statuspid) & sleeper=$! wait $statuspid; kill $sleeper echo "*** Daemon commands section completed" else echo "*** tpdaemon $TPD_ST - daemon commands skipped" | tee -a $MAILFILE fi #------------------------------------------------------------------------------ # Save ls -l of $TEMPDIR (/usr/spool/tape) and /usr/lib/tp #------------------------------------------------------------------------------ echo "*** Saving ls -l of $TEMPDIR to spool.ls" | tee -a $MAILFILE (echo "# ls -l $TEMPDIR";/bin/ls -l $TEMPDIR) > $DIR/spool.ls echo "*** Saving ls -l of /usr/lib/tp to libtp.ls" | tee -a $MAILFILE (echo "# ls -l /usr/lib/tp";/bin/ls -l /usr/lib/tp) > $DIR/libtp.ls #------------------------------------------------------------------------------ # Save versions of tpdaemon binaries in /usr/lib/tp #------------------------------------------------------------------------------ echo "*** Saving what strings from /usr/lib/tp to libtp.ver" | tee -a $MAILFILE echo "*** what strings from /usr/lib/tp" > $DIR/libtp.ver (cd /usr/lib/tp; for i in $COMS do if [ -x $i ] then echo "*** /usr/lib/tp/$i" ( echo ""; echo "***"; echo "*** $i"; echo "***"; echo "" what $i ) >> $DIR/libtp.ver else echo "*** Not access to /usr/lib/tp/$i" fi done ) #------------------------------------------------------------------------------ # Try to get IOS model D AMAP files #------------------------------------------------------------------------------ if model_d then ( echo /usr/src; /etc/mount | grep src ) | while read m on d rest do test -r $m/ios/iossys/cf.$SN/iosa/amap.D || continue echo "*** Copying amap.D for ios 0 from $m" | tee -a $MAILFILE cp $m/ios/iossys/cf.$SN/iosa/amap.D $DIR/iosa_amap.D test -r $m/ios/iossys/cf.$SN/iosb/amap.D || break echo "*** Copying amap.D for ios 1 from $m" | tee -a $MAILFILE cp $m/ios/iossys/cf.$SN/iosb/amap.D $DIR/iosb_amap.D break done fi #------------------------------------------------------------------------------ # Try to get Unicos load map (all Cray models) #------------------------------------------------------------------------------ /etc/mount | grep src | while read m on d rest do test -r $m/uts/cf.$SN/map || continue echo "*** Copying $m/uts/cf.$SN/map to unicos.map" cp $m/uts/cf.$SN/map $DIR/unicos.map done #------------------------------------------------------------------------------ # Try to get IOS related info for ELS machines #------------------------------------------------------------------------------ if crayels then mkdir -p $DIR/ioe 2>/dev/null chmod 777 $DIR/ioe if exdf -i /config > $DIR/ioe/config then echo "*** From IOS disk: /config --> ioe/config" | \ tee -a $MAILFILE grep '^[ ]*/dev' $DIR/ioe/config | sort | uniq | \ while read file r do name=`basename $file` case $name in console|disk|esdi|eth*|fd*) continue ;; das|bm|*net|hippi) continue ;; *tape*|s3560) ;; *) ;; esac exdf -i $file > $DIR/ioe/$name || continue echo "*** $file --> ioe/$name" | \ tee -a $MAILFILE done else echo "*** Not access to IOS disk" | tee -a $MAILFILE fi msgr "please save the output of the 'whatmic' command from the IOS" & fi #------------------------------------------------------------------------------ # End of collection phase #------------------------------------------------------------------------------ fi #------------------------------------------------------------------------------ # Start of solicitation phase #------------------------------------------------------------------------------ if [ "$SOLICIT" = "yes" ] then #------------------------------------------------------------------------------ # Get name and comments from operator, so that we have a record of what # actually happened. #------------------------------------------------------------------------------ name="" while [ "$name" = "" ] ; do echo "Please enter your name and press : \c" read name if [ "$name" = "" ] ; then echo "" fi done echo "*** tpdfixup run on $MACHINE $LEVEL by $MYID ($name) ***" > $COMMENTFILE echo "" >> $COMMENTFILE cat <$MAILFILE >> $COMMENTFILE cat <>$COMMENTFILE The tpdfixup script is used when tapes are either hung or behaving abnormally. The tape trace files and output from various tape commands will be saved, along with a problem description that YOU may provide NOW by editing this message! YOU ARE NOW IN 'vi'. Please include as much information as possible about the problem such as the drive names/numbers having problems, commands entered, and a problem description. Enter the vi command ':wq!' when done. drive(s) having problems: command(s) entered: problem description: 'moused' text from tape display, request messages, or error messages: enter the vi command ':wq!' when done updating this message. EOF2 echo "" echo "Please press when ready to enter the 'vi' editor." echo "You should enter a detailed problem description." read return line_no=`grep -n '^drive(s) ' $COMMENTFILE 2>/dev/null | sed 's/:.*$//'` if [ "$line_no" -gt 0 ] then line_no=+`expr $line_no + 1` else line_no= fi vi $line_no $COMMENTFILE cat $COMMENTFILE > $MAILFILE echo "" >> $MAILFILE #------------------------------------------------------------------------------ # End of solicitation phase #------------------------------------------------------------------------------ fi #------------------------------------------------------------------------------ # Exit at this point if restart not wanted #------------------------------------------------------------------------------ if [ "$RESTART" = "no" ] then echo "*** tpdfixup exiting without restart" | tee -a $MAILFILE echo "*** use `basename $0` -R to restart tpdaemon" | tee -a $MAILFILE exit 0 fi #------------------------------------------------------------------------------ # Try to gracefully stop the tape daemon if it exists and is sitting at # its select() system call. #------------------------------------------------------------------------------ TPDAEMON=`ps -elf | grep tpdaemon` if [ ! -z "$TPDAEMON" ] then # determine process ids: TPID (daemon), IDLIST (all) TPID=`echo $TPDAEMON | cut -f$PS_PID -d" "` eval `ps -lf -g $TPID | tee -a $MAILFILE | tail +2 | while read line do sh -c 'echo IDLIST=\"\\\$IDLIST $'$PS_PID'\"' dummy $line done` # now kill gracefully pri=`echo $TPDAEMON | cut -f$PS_PRI -d" "` if [ $pri -eq 26 ] then echo "*** tpdaemon in select:trying kill -2 ***" \ | tee -a $MAILFILE echo $TPDAEMON | tee -a $MAILFILE TPID=`echo $TPDAEMON | cut -d" " -f$PS_PID` kill -2 $TPID 2>&1 | tee -a $MAILFILE sleep 2 fi fi #------------------------------------------------------------------------------ # If the tape daemon is still around, then it must be hung up. In that # case, issue a "kill -9" for it and for each of its children. #------------------------------------------------------------------------------ TPDAEMON=`ps -elf | grep tpdaemon` if [ ! -z "$TPDAEMON" ] then echo "*** tpdaemon still around - trying kill -9 ***" | tee -a $MAILFILE for process in `echo $IDLIST` do echo "*** Killing pid $process ***" | tee -a $MAILFILE kill -9 $process 2>&1 | tee -a $MAILFILE sleep 1 done fi #------------------------------------------------------------------------------ # Reset Cray-2 CTC boxes #------------------------------------------------------------------------------ if cray2 then echo ctc | $CRASH 2>/dev/null | grep '^CTC #' | while read a1 a2 ctc do sleep 5 /etc/ctc $ctc down 2>&1 | tee -a $MAILFILE echo "*** /etc/ctc $ctc down : $?" | tee -a $MAILFILE sleep 5 /etc/ctc $ctc reset 2>&1 | tee -a $MAILFILE echo "*** /etc/ctc $ctc reset : $?" | tee -a $MAILFILE done fi #------------------------------------------------------------------------------ # Check to see if processes are still around #------------------------------------------------------------------------------ HOPELESS="FALSE" if [ ! -z "$IDLIST" ] then for process in `echo $IDLIST` do psline="`ps -elf | awk '$COL == PID {print}' COL=$PS_PID PID=$process -`" kill -0 $process if [ $? -eq 0 ] then echo "*** Cannot kill $process ***" | tee -a $MAILFILE echo "$psline" | tee -a $MAILFILE HOPELESS="TRUE" fi done fi #------------------------------------------------------------------------------ # If cleanup was successful, then restart the tape daemon. The procedure # varies from machine to machine. If some of the children could not be # terminated, report such to the operator and give up. #------------------------------------------------------------------------------ if [ "$HOPELESS" = "TRUE" ] then echo "*** Cannot restart tape daemon ***" | tee -a $MAILFILE else echo "*** Restarting tpdaemon" /etc/sdaemon tpdaemon 2>&1 | tee -a $MAILFILE fi exit 0