.title sddl Search and Destroy a Dangling Lock ;++ ; ; Program to search out and destroy dangling locks ; ; Theory of operation: ; o Use $GETSYI to get this node's Cluster System ID (CSID) ; o Get physical device name of disk from user with LIB$GET_INPUT ; o Use $GETDVI to get the device lock ID (if the disk is mounted/ ; allocated) ; - If none, use $ENQ to take out a NL mode lock and use that lock ; ID instead as a handle into the lock database ; o Use $GETLKI to get info about all locks on the device resource ; - Loop, looking at each lock: ; 1) Ignore NL locks (which may be from a $SHOW DEVICE command) ; (this also ignores the "handle" lock we got above) ; 2) Ignore all process (non-system) locks ; 3) Ignore the real device lock ID, if there is one ; 4) Ignore locks taken out from other nodes ; - If we find any lock which passes these hurdles, we $DEQ it ; - Otherwise, we print a message about looking on other nodes ; or rechecking the device name ; ;-- .library /sys$share:lib/ $ssdef ; For SS$_xxx symbols $dvidef ; For $GETDVI $syidef ; For $GETSYI $lckdef ; For $ENQ $lkidef ; For $GETLKI .macro check, ?l1 ; another blbs r0,l1 ; silly macro ret ; to do status l1: ; checking .endm ; Macro to define item lists .macro item type, length, bufaddr, retlen .word length .word type .address bufaddr .if blank .long 0 .endc .if not_blank .address retlen .Endc .Endm ;+ ; ; Main program ; ;- .psect $code$, rd,nowrt,exe,long .entry sddl,0 main:: ; ; Get the Cluster System ID (CSID) for the node on which we're running ; $getsyiw_g syi_arg ; Get cluster system ID for this node ; ; Get the name of the device which has a dangling lock ; callg get_input,g^lib$get_input ; Get the device name check ; ; Get lock ID, if mounted/allocated, and the device name, including ; allocation class ; $getdviw_g dvi_arg ; Get lock ID, if mounted/allocated, and check ; the device name, including allo-class, tstl lockid ; Did we get a lock ID? bneq gotlock ; Yes --> use that as our handle ; No: then $ENQ a NL lock on the device addw2 #4,alldev_len ; Add 4 to the length returned, for ; "SYS$", which we add to the front to ; form a resource name $cmkrnl_s routin=enq_routine check movl nllkid,lockid ; Lock ID of our NL-mode handle lock gotlock: $cmkrnl_s routin=lki_routine check ; ; Check bit 31 of the returned length to make sure we had enough buffer ; for all the lock info ; cvtwl lock_len_2,r3 bgeq buf_ok brw bufshort ; Buffer was too short --> error buf_ok: moval locks,r2 ; Base address of lock info array movzwl locks_len,r4 ; Grab total length of lock info addl2 r2,r4 ; Add base addr + length to get end addr lk_loop: ;+ ; Loop through the returned data, one lock at a time. ; Print out some info for each lock ; Move data into $FAO argument list: ;- movl lki$l_lkid(r2),fao_arg+fao$_p1 ; Move lock ID movzbl lki$b_grmode(r2),fao_arg+fao$_p2 ; Move grant mode movl lki$l_pid(r2),fao_arg+fao$_p3 ; Move process ID movl lki$l_csid(r2),fao_arg+fao$_p4 ; Move cluster sys ID $fao_g fao_arg check callg wrt_faobuf,g^lib$put_output check ;+ ; Check the lock; ignore it if it can't be a dangling one ; We will ignore: ; - all NL-mode locks (from SHOW DEVICE commands or this program itself) ; - all locks with a non-zero process ID (PID) ; - the lock with lock ID matching the one from $GETDVI ; which we ignore because it is a valid lock ; - all locks taken out from other nodes in the cluster (since if the ; hanging lock is one of them, we can't $DEQ it from this node) ;- cmpb lki$b_grmode(r2),#lck$k_nlmode ; Is granted mode NL? bneq 30$ ; No --> keep checking callg msg_nl,g^lib$put_output ; Yes: NL mode lock brw lk_skip ; Ignore it 30$: tstl lki$l_pid(r2) ; Check process ID beql 31$ ; Zero: system lock: keep going callg msg_pid,g^lib$put_output ;Non-zero: process lock brb lk_skip ; Ignore it 31$: cmpl lockid,lki$l_lkid(r2) ; Is it the real device lock? bneq 32$ ; No --> keep checking callg msg_dev,g^lib$put_output ;Yes: device mounted; lock's OK brb lk_skip ; Ignore it 32$: cmpl node_csid,lki$l_csid(r2) ;Was it $ENQ'd on this node? beql 33$ ; Yes --> must be dangling one movl lki$l_csid(r2),remote_csid ; No: $ENQ'd on another node $getsyiw_g syi2_arg ; Find out the nodename callg msg_node,g^lib$put_output ; Tell user which node it's on brb lk_skip ; Ignore it ;+ ; Here, we have found a dangling lock ;- 33$: callg msg_dangling,g^lib$put_output ; Tell user we found one check movl lki$l_lkid(r2),deq_arg+deq$_lkid ;Move LKID to arg. list $cmkrnl_s routin=deq_routine ; Release the dangling lock check callg msg_removed,g^lib$put_output ; Tell user we zapped it check lk_skip: addl2 r3,r2 ; Move to the next set of lock info cmpl r2,r4 ; At end? bgeq 40$ ; Yes --> brw lk_loop ; No --> continue with next lock 40$: ; See if we found a dangling lock tstl deq_arg+deq$_lkid ; Did we find a dangling lock? bneq deq_nllock ; Yes --> ;+ ; Didn't find a dangling lock. ; Tell the user to check the device name or look on other nodes. ;- callg msg_none,g^lib$put_output check callg msg_none2,g^lib$put_output check ;+ ; Dequeue the handle lock, if we needed one ;- deq_nllock: tstl nllkid ; Did we $ENQ a NL mode handle lock? beql exit ; No --> quit now movl lockid,deq_arg+deq$_lkid ; Move lock ID into arg. list $cmkrnl_s routin=deq_routine ; Yes: release it check ;+ ; Exit with normal status result ;- exit: $exit_s code=#SS$_NORMAL ;+ ; Lock info buffer was too short. ; Print an informative message and give up. ;- bufshort: callg msg_bufshort,g^lib$put_output check $exit_s code=#SS$_INSFMEM ;+ ; Routine to enqueue a null-mode lock in kernel mode ;- .entry enq_routine,0 $enqw_g enq_arg ; $ENQ a NL mode lock on the device ret ;+ ; Routine to get info on all locks taken out on the device resource ;- .entry lki_routine,0 $getlkiw_g lki_arg ret ;+ ; Routine to dequeue a lock in kernel mode ;- .entry deq_routine,0 $deq_g deq_arg ; $DEQ a NL mode lock on the device ret ;+ ; Read-only data ; ;- .psect $rodata$, rd,nowrt,noexe,long get_input: ; Argument list for LIB$GET_INPUT call .long 3 ; 3 arguments .address dev_desc ; Get device name .address prompt_desc ; Prompt .address dev_len ; Length of returned device name prompt_desc: .ascid "Name of device which has a dangling lock: " dvi_arg: $getdvi devnam=dev_desc,- ; Device name itmlst=dvi_list,- ; Item list iosb=iostat ; I/O status block dvi_list: item dvi$_lockid, 4, lockid item dvi$_alldevnam, 64, alldev, alldev_len .long 0 ; End of item list wrt_faobuf: ; Arg. list for LIB$PUT_OUTPUT call to write buf from $FAO .long 1 ; 1 argument .address fao_desc fao_str: .ascid "LKID !XL, GRMODE !XB, PID !XL, CSID !XL" .align long syi_arg: $getsyi csidadr=0, nodename=0,- ; Assume the local node itmlst=syi_list,- ; Item list iosb=iostat ; I/O status block syi_list: item syi$_node_csid, 4, node_csid .long 0 ; End of item list enq_arg: $enq lkmode=lck$k_nlmode,- ; Take out a null-mode lock lksb=lkstat,- ; Lock status block flags=lck$m_noqueue!lck$m_system!lck$m_noquota,- resnam=res_desc ; Resource name lki_arg: $getlki lkidadr=lockid,- ; Lock ID itmlst=lki_list,- ; Item list iosb=iostat ; I/O status block lki_list: item lki$_locks, 1500, locks, locks_len .long 0 ; End of item list syi2_arg: $getsyi csidadr=remote_csid,- ; Cluster system ID itmlst=syi2_list,- ; Item list iosb=iostat ; I/O status block syi2_list: item syi$_nodename, 15, remnode ;, remnode_len .long 0 ; End of item list msg_nl: ; Argument list for LIB$PUT_OUTPUT call .long 1 ; 1 argument .address msg_nl_desc msg_nl_desc: .ascid " Null-mode lock: ignored" msg_pid: ; Argument list for LIB$PUT_OUTPUT call .long 1 ; 1 argument .address msg_pid_desc msg_pid_desc: .ascid " Process lock: ignored" msg_dev: ; Argument list for LIB$PUT_OUTPUT call .long 1 ; 1 argument .address msg_dev_desc msg_dev_desc: .ascid " Regular device lock: ignored" msg_node: ; Argument list for LIB$PUT_OUTPUT call .long 1 ; 1 argument .address msg_node_desc msg_dangling: ; Argument list for LIB$PUT_OUTPUT call .long 1 ; 1 argument .address msg_dangling_desc msg_dangling_desc: .ascid " Dangling lock" msg_removed: ; Argument list for LIB$PUT_OUTPUT call .long 1 ; 1 argument .address msg_removed_desc msg_removed_desc: .ascid " *** Removed ***" msg_none: ; Argument list for LIB$PUT_OUTPUT call .long 1 ; 1 argument .address msg_none_desc msg_none2: ; Argument list for LIB$PUT_OUTPUT call .long 1 ; 1 argument .address msg_none2_desc msg_none_desc: .ascid - "No dangling lock found on this node. Verify device name, or run this" msg_none2_desc: .ascid - "program on other cluster node(s) listed as holding locks on the disk." msg_bufshort: ; Argument list for LIB$PUT_OUTPUT call .long 1 ; 1 argument .address msg_bufshort_desc ; Print error about short buf msg_bufshort_desc: .ascid - "Lock info buffer is too small (1500). Raise it, reassemble, & rerun." ;+ ; Read-write data ;- .psect $data$, rd,wrt,noexe,long iostat: .blkw 4 ;I/O status block ;+ ; Descriptor for device name which user inputs ;- dev_desc: ; input device name string descriptor dev_len: .long 255 .address dev_text dev_text: .blkb 255 .align long fao_desc: fao_len: .long 300 .address faobuf faobuf: .blkb 300 node_csid: .blkl 1 ; Cluster system ID of the node we're running on fao_arg: $fao ctrstr=fao_str,- ; control string outlen=fao_len,- ; output length outbuf=fao_desc,- ; buf to hold the result p1=0,- ; Lock ID (to be supplied later) p2=0,- ; Grant mode (TBS) p3=0,- ; Process ID (TBS) p4=0 ; Cluster system ID (TBS) ;+ ; Note: we make the resource name by putting "SYS$" in front of the ; allocation-class device name we get from $GETDVI (which always ; includes a leading underscore). ;- res_desc: ; Descriptor for device lock resource name alldev_len: .blkw 1 .word 0 .address resource resource: ; Resource name for device lock, which consists .ascii /SYS$/ ; of a prefix of "SYS$", to be followed by an alldev: .blkb 64 ; underline and and the device name (with its ; allocation class, if any) lockid: .blkl 1 ; Lock ID of a lock on the device (from $GETDVI) lkstat: .blkw 2 ; Lock status block (returned from $ENQ) nllkid: .long 0 ; Lock ID of a NL handle lock (zero if not used) ;+ ; Flag LCK$M_VALBLK is not specified, so we don't need the lock value ;- .blkb 16 ; Space for lock value block (unused) locks_len: .blkw 1 ; Total length of info returned from $GETLKI lock_len_2: .blkw 1 ; Length of info about each lock from $GETLKI; ; also, high bit set indicates insufficient buf locks: .blkb 1500 ; Buffer for lock info returned by $GETLKI remote_csid: .blkl 1 ; Remote system CSID from $GETLKI msg_node_desc: .long 34 ; Descriptor for remote node message .address msg_node_buf msg_node_buf: .ascii " Lock is on node: " remnode: .blkb 15 deq_arg: $deq lkid=0 ; Lock ID (to be filled in by a MOVL later) .end sddl