forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdlmglue.c
3103 lines (2549 loc) · 81.5 KB
/
dlmglue.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* -*- mode: c; c-basic-offset: 8; -*-
* vim: noexpandtab sw=8 ts=8 sts=0:
*
* dlmglue.c
*
* Code which implements an OCFS2 specific interface to our DLM.
*
* Copyright (C) 2003, 2004 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/mm.h>
#include <linux/crc32.h>
#include <linux/kthread.h>
#include <linux/pagemap.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <cluster/heartbeat.h>
#include <cluster/nodemanager.h>
#include <cluster/tcp.h>
#include <dlm/dlmapi.h>
#define MLOG_MASK_PREFIX ML_DLM_GLUE
#include <cluster/masklog.h>
#include "ocfs2.h"
#include "alloc.h"
#include "dcache.h"
#include "dlmglue.h"
#include "extent_map.h"
#include "file.h"
#include "heartbeat.h"
#include "inode.h"
#include "journal.h"
#include "slot_map.h"
#include "super.h"
#include "uptodate.h"
#include "vote.h"
#include "buffer_head_io.h"
struct ocfs2_mask_waiter {
struct list_head mw_item;
int mw_status;
struct completion mw_complete;
unsigned long mw_mask;
unsigned long mw_goal;
};
static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres);
static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres);
/*
* Return value from ->downconvert_worker functions.
*
* These control the precise actions of ocfs2_unblock_lock()
* and ocfs2_process_blocked_lock()
*
*/
enum ocfs2_unblock_action {
UNBLOCK_CONTINUE = 0, /* Continue downconvert */
UNBLOCK_CONTINUE_POST = 1, /* Continue downconvert, fire
* ->post_unlock callback */
UNBLOCK_STOP_POST = 2, /* Do not downconvert, fire
* ->post_unlock() callback. */
};
struct ocfs2_unblock_ctl {
int requeue;
enum ocfs2_unblock_action unblock_action;
};
static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
int new_level);
static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres);
static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres,
int blocking);
static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
int blocking);
static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
/* This aids in debugging situations where a bad LVB might be involved. */
static void ocfs2_dump_meta_lvb_info(u64 level,
const char *function,
unsigned int line,
struct ocfs2_lock_res *lockres)
{
struct ocfs2_meta_lvb *lvb = (struct ocfs2_meta_lvb *) lockres->l_lksb.lvb;
mlog(level, "LVB information for %s (called from %s:%u):\n",
lockres->l_name, function, line);
mlog(level, "version: %u, clusters: %u, generation: 0x%x\n",
lvb->lvb_version, be32_to_cpu(lvb->lvb_iclusters),
be32_to_cpu(lvb->lvb_igeneration));
mlog(level, "size: %llu, uid %u, gid %u, mode 0x%x\n",
(unsigned long long)be64_to_cpu(lvb->lvb_isize),
be32_to_cpu(lvb->lvb_iuid), be32_to_cpu(lvb->lvb_igid),
be16_to_cpu(lvb->lvb_imode));
mlog(level, "nlink %u, atime_packed 0x%llx, ctime_packed 0x%llx, "
"mtime_packed 0x%llx iattr 0x%x\n", be16_to_cpu(lvb->lvb_inlink),
(long long)be64_to_cpu(lvb->lvb_iatime_packed),
(long long)be64_to_cpu(lvb->lvb_ictime_packed),
(long long)be64_to_cpu(lvb->lvb_imtime_packed),
be32_to_cpu(lvb->lvb_iattr));
}
/*
* OCFS2 Lock Resource Operations
*
* These fine tune the behavior of the generic dlmglue locking infrastructure.
*
* The most basic of lock types can point ->l_priv to their respective
* struct ocfs2_super and allow the default actions to manage things.
*
* Right now, each lock type also needs to implement an init function,
* and trivial lock/unlock wrappers. ocfs2_simple_drop_lockres()
* should be called when the lock is no longer needed (i.e., object
* destruction time).
*/
struct ocfs2_lock_res_ops {
/*
* Translate an ocfs2_lock_res * into an ocfs2_super *. Define
* this callback if ->l_priv is not an ocfs2_super pointer
*/
struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *);
/*
* Optionally called in the downconvert (or "vote") thread
* after a successful downconvert. The lockres will not be
* referenced after this callback is called, so it is safe to
* free memory, etc.
*
* The exact semantics of when this is called are controlled
* by ->downconvert_worker()
*/
void (*post_unlock)(struct ocfs2_super *, struct ocfs2_lock_res *);
/*
* Allow a lock type to add checks to determine whether it is
* safe to downconvert a lock. Return 0 to re-queue the
* downconvert at a later time, nonzero to continue.
*
* For most locks, the default checks that there are no
* incompatible holders are sufficient.
*
* Called with the lockres spinlock held.
*/
int (*check_downconvert)(struct ocfs2_lock_res *, int);
/*
* Allows a lock type to populate the lock value block. This
* is called on downconvert, and when we drop a lock.
*
* Locks that want to use this should set LOCK_TYPE_USES_LVB
* in the flags field.
*
* Called with the lockres spinlock held.
*/
void (*set_lvb)(struct ocfs2_lock_res *);
/*
* Called from the downconvert thread when it is determined
* that a lock will be downconverted. This is called without
* any locks held so the function can do work that might
* schedule (syncing out data, etc).
*
* This should return any one of the ocfs2_unblock_action
* values, depending on what it wants the thread to do.
*/
int (*downconvert_worker)(struct ocfs2_lock_res *, int);
/*
* LOCK_TYPE_* flags which describe the specific requirements
* of a lock type. Descriptions of each individual flag follow.
*/
int flags;
};
/*
* Some locks want to "refresh" potentially stale data when a
* meaningful (PRMODE or EXMODE) lock level is first obtained. If this
* flag is set, the OCFS2_LOCK_NEEDS_REFRESH flag will be set on the
* individual lockres l_flags member from the ast function. It is
* expected that the locking wrapper will clear the
* OCFS2_LOCK_NEEDS_REFRESH flag when done.
*/
#define LOCK_TYPE_REQUIRES_REFRESH 0x1
/*
* Indicate that a lock type makes use of the lock value block. The
* ->set_lvb lock type callback must be defined.
*/
#define LOCK_TYPE_USES_LVB 0x2
static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = {
.get_osb = ocfs2_get_inode_osb,
.check_downconvert = ocfs2_check_meta_downconvert,
.set_lvb = ocfs2_set_meta_lvb,
.flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
};
static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = {
.get_osb = ocfs2_get_inode_osb,
.downconvert_worker = ocfs2_data_convert_worker,
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_super_lops = {
.flags = LOCK_TYPE_REQUIRES_REFRESH,
};
static struct ocfs2_lock_res_ops ocfs2_rename_lops = {
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.get_osb = ocfs2_get_dentry_osb,
.post_unlock = ocfs2_dentry_post_unlock,
.downconvert_worker = ocfs2_dentry_convert_worker,
.flags = 0,
};
static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = {
.get_osb = ocfs2_get_inode_osb,
.flags = 0,
};
static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
{
return lockres->l_type == OCFS2_LOCK_TYPE_META ||
lockres->l_type == OCFS2_LOCK_TYPE_DATA ||
lockres->l_type == OCFS2_LOCK_TYPE_RW ||
lockres->l_type == OCFS2_LOCK_TYPE_OPEN;
}
static inline struct inode *ocfs2_lock_res_inode(struct ocfs2_lock_res *lockres)
{
BUG_ON(!ocfs2_is_inode_lock(lockres));
return (struct inode *) lockres->l_priv;
}
static inline struct ocfs2_dentry_lock *ocfs2_lock_res_dl(struct ocfs2_lock_res *lockres)
{
BUG_ON(lockres->l_type != OCFS2_LOCK_TYPE_DENTRY);
return (struct ocfs2_dentry_lock *)lockres->l_priv;
}
static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
{
if (lockres->l_ops->get_osb)
return lockres->l_ops->get_osb(lockres);
return (struct ocfs2_super *)lockres->l_priv;
}
static int ocfs2_lock_create(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres,
int level,
int dlm_flags);
static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
int wanted);
static void ocfs2_cluster_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres,
int level);
static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres);
static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres);
static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres);
static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres, int level);
static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
int convert);
#define ocfs2_log_dlm_error(_func, _stat, _lockres) do { \
mlog(ML_ERROR, "Dlm error \"%s\" while calling %s on " \
"resource %s: %s\n", dlm_errname(_stat), _func, \
_lockres->l_name, dlm_errmsg(_stat)); \
} while (0)
static void ocfs2_vote_on_unlock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres);
static int ocfs2_meta_lock_update(struct inode *inode,
struct buffer_head **bh);
static void ocfs2_drop_osb_locks(struct ocfs2_super *osb);
static inline int ocfs2_highest_compat_lock_level(int level);
static void ocfs2_build_lock_name(enum ocfs2_lock_type type,
u64 blkno,
u32 generation,
char *name)
{
int len;
mlog_entry_void();
BUG_ON(type >= OCFS2_NUM_LOCK_TYPES);
len = snprintf(name, OCFS2_LOCK_ID_MAX_LEN, "%c%s%016llx%08x",
ocfs2_lock_type_char(type), OCFS2_LOCK_ID_PAD,
(long long)blkno, generation);
BUG_ON(len != (OCFS2_LOCK_ID_MAX_LEN - 1));
mlog(0, "built lock resource with name: %s\n", name);
mlog_exit_void();
}
static DEFINE_SPINLOCK(ocfs2_dlm_tracking_lock);
static void ocfs2_add_lockres_tracking(struct ocfs2_lock_res *res,
struct ocfs2_dlm_debug *dlm_debug)
{
mlog(0, "Add tracking for lockres %s\n", res->l_name);
spin_lock(&ocfs2_dlm_tracking_lock);
list_add(&res->l_debug_list, &dlm_debug->d_lockres_tracking);
spin_unlock(&ocfs2_dlm_tracking_lock);
}
static void ocfs2_remove_lockres_tracking(struct ocfs2_lock_res *res)
{
spin_lock(&ocfs2_dlm_tracking_lock);
if (!list_empty(&res->l_debug_list))
list_del_init(&res->l_debug_list);
spin_unlock(&ocfs2_dlm_tracking_lock);
}
static void ocfs2_lock_res_init_common(struct ocfs2_super *osb,
struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
struct ocfs2_lock_res_ops *ops,
void *priv)
{
res->l_type = type;
res->l_ops = ops;
res->l_priv = priv;
res->l_level = LKM_IVMODE;
res->l_requested = LKM_IVMODE;
res->l_blocking = LKM_IVMODE;
res->l_action = OCFS2_AST_INVALID;
res->l_unlock_action = OCFS2_UNLOCK_INVALID;
res->l_flags = OCFS2_LOCK_INITIALIZED;
ocfs2_add_lockres_tracking(res, osb->osb_dlm_debug);
}
void ocfs2_lock_res_init_once(struct ocfs2_lock_res *res)
{
/* This also clears out the lock status block */
memset(res, 0, sizeof(struct ocfs2_lock_res));
spin_lock_init(&res->l_lock);
init_waitqueue_head(&res->l_event);
INIT_LIST_HEAD(&res->l_blocked_list);
INIT_LIST_HEAD(&res->l_mask_waiters);
}
void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res,
enum ocfs2_lock_type type,
unsigned int generation,
struct inode *inode)
{
struct ocfs2_lock_res_ops *ops;
switch(type) {
case OCFS2_LOCK_TYPE_RW:
ops = &ocfs2_inode_rw_lops;
break;
case OCFS2_LOCK_TYPE_META:
ops = &ocfs2_inode_meta_lops;
break;
case OCFS2_LOCK_TYPE_DATA:
ops = &ocfs2_inode_data_lops;
break;
case OCFS2_LOCK_TYPE_OPEN:
ops = &ocfs2_inode_open_lops;
break;
default:
mlog_bug_on_msg(1, "type: %d\n", type);
ops = NULL; /* thanks, gcc */
break;
};
ocfs2_build_lock_name(type, OCFS2_I(inode)->ip_blkno,
generation, res->l_name);
ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), res, type, ops, inode);
}
static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres)
{
struct inode *inode = ocfs2_lock_res_inode(lockres);
return OCFS2_SB(inode->i_sb);
}
static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres)
{
__be64 inode_blkno_be;
memcpy(&inode_blkno_be, &lockres->l_name[OCFS2_DENTRY_LOCK_INO_START],
sizeof(__be64));
return be64_to_cpu(inode_blkno_be);
}
static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres)
{
struct ocfs2_dentry_lock *dl = lockres->l_priv;
return OCFS2_SB(dl->dl_inode->i_sb);
}
void ocfs2_dentry_lock_res_init(struct ocfs2_dentry_lock *dl,
u64 parent, struct inode *inode)
{
int len;
u64 inode_blkno = OCFS2_I(inode)->ip_blkno;
__be64 inode_blkno_be = cpu_to_be64(inode_blkno);
struct ocfs2_lock_res *lockres = &dl->dl_lockres;
ocfs2_lock_res_init_once(lockres);
/*
* Unfortunately, the standard lock naming scheme won't work
* here because we have two 16 byte values to use. Instead,
* we'll stuff the inode number as a binary value. We still
* want error prints to show something without garbling the
* display, so drop a null byte in there before the inode
* number. A future version of OCFS2 will likely use all
* binary lock names. The stringified names have been a
* tremendous aid in debugging, but now that the debugfs
* interface exists, we can mangle things there if need be.
*
* NOTE: We also drop the standard "pad" value (the total lock
* name size stays the same though - the last part is all
* zeros due to the memset in ocfs2_lock_res_init_once()
*/
len = snprintf(lockres->l_name, OCFS2_DENTRY_LOCK_INO_START,
"%c%016llx",
ocfs2_lock_type_char(OCFS2_LOCK_TYPE_DENTRY),
(long long)parent);
BUG_ON(len != (OCFS2_DENTRY_LOCK_INO_START - 1));
memcpy(&lockres->l_name[OCFS2_DENTRY_LOCK_INO_START], &inode_blkno_be,
sizeof(__be64));
ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres,
OCFS2_LOCK_TYPE_DENTRY, &ocfs2_dentry_lops,
dl);
}
static void ocfs2_super_lock_res_init(struct ocfs2_lock_res *res,
struct ocfs2_super *osb)
{
/* Superblock lockres doesn't come from a slab so we call init
* once on it manually. */
ocfs2_lock_res_init_once(res);
ocfs2_build_lock_name(OCFS2_LOCK_TYPE_SUPER, OCFS2_SUPER_BLOCK_BLKNO,
0, res->l_name);
ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_SUPER,
&ocfs2_super_lops, osb);
}
static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res,
struct ocfs2_super *osb)
{
/* Rename lockres doesn't come from a slab so we call init
* once on it manually. */
ocfs2_lock_res_init_once(res);
ocfs2_build_lock_name(OCFS2_LOCK_TYPE_RENAME, 0, 0, res->l_name);
ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_RENAME,
&ocfs2_rename_lops, osb);
}
void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
{
mlog_entry_void();
if (!(res->l_flags & OCFS2_LOCK_INITIALIZED))
return;
ocfs2_remove_lockres_tracking(res);
mlog_bug_on_msg(!list_empty(&res->l_blocked_list),
"Lockres %s is on the blocked list\n",
res->l_name);
mlog_bug_on_msg(!list_empty(&res->l_mask_waiters),
"Lockres %s has mask waiters pending\n",
res->l_name);
mlog_bug_on_msg(spin_is_locked(&res->l_lock),
"Lockres %s is locked\n",
res->l_name);
mlog_bug_on_msg(res->l_ro_holders,
"Lockres %s has %u ro holders\n",
res->l_name, res->l_ro_holders);
mlog_bug_on_msg(res->l_ex_holders,
"Lockres %s has %u ex holders\n",
res->l_name, res->l_ex_holders);
/* Need to clear out the lock status block for the dlm */
memset(&res->l_lksb, 0, sizeof(res->l_lksb));
res->l_flags = 0UL;
mlog_exit_void();
}
static inline void ocfs2_inc_holders(struct ocfs2_lock_res *lockres,
int level)
{
mlog_entry_void();
BUG_ON(!lockres);
switch(level) {
case LKM_EXMODE:
lockres->l_ex_holders++;
break;
case LKM_PRMODE:
lockres->l_ro_holders++;
break;
default:
BUG();
}
mlog_exit_void();
}
static inline void ocfs2_dec_holders(struct ocfs2_lock_res *lockres,
int level)
{
mlog_entry_void();
BUG_ON(!lockres);
switch(level) {
case LKM_EXMODE:
BUG_ON(!lockres->l_ex_holders);
lockres->l_ex_holders--;
break;
case LKM_PRMODE:
BUG_ON(!lockres->l_ro_holders);
lockres->l_ro_holders--;
break;
default:
BUG();
}
mlog_exit_void();
}
/* WARNING: This function lives in a world where the only three lock
* levels are EX, PR, and NL. It *will* have to be adjusted when more
* lock types are added. */
static inline int ocfs2_highest_compat_lock_level(int level)
{
int new_level = LKM_EXMODE;
if (level == LKM_EXMODE)
new_level = LKM_NLMODE;
else if (level == LKM_PRMODE)
new_level = LKM_PRMODE;
return new_level;
}
static void lockres_set_flags(struct ocfs2_lock_res *lockres,
unsigned long newflags)
{
struct ocfs2_mask_waiter *mw, *tmp;
assert_spin_locked(&lockres->l_lock);
lockres->l_flags = newflags;
list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
continue;
list_del_init(&mw->mw_item);
mw->mw_status = 0;
complete(&mw->mw_complete);
}
}
static void lockres_or_flags(struct ocfs2_lock_res *lockres, unsigned long or)
{
lockres_set_flags(lockres, lockres->l_flags | or);
}
static void lockres_clear_flags(struct ocfs2_lock_res *lockres,
unsigned long clear)
{
lockres_set_flags(lockres, lockres->l_flags & ~clear);
}
static inline void ocfs2_generic_handle_downconvert_action(struct ocfs2_lock_res *lockres)
{
mlog_entry_void();
BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
BUG_ON(lockres->l_blocking <= LKM_NLMODE);
lockres->l_level = lockres->l_requested;
if (lockres->l_level <=
ocfs2_highest_compat_lock_level(lockres->l_blocking)) {
lockres->l_blocking = LKM_NLMODE;
lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
}
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
mlog_exit_void();
}
static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lockres)
{
mlog_entry_void();
BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BUSY));
BUG_ON(!(lockres->l_flags & OCFS2_LOCK_ATTACHED));
/* Convert from RO to EX doesn't really need anything as our
* information is already up to data. Convert from NL to
* *anything* however should mark ourselves as needing an
* update */
if (lockres->l_level == LKM_NLMODE &&
lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
lockres->l_level = lockres->l_requested;
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
mlog_exit_void();
}
static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *lockres)
{
mlog_entry_void();
BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY)));
BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED);
if (lockres->l_requested > LKM_NLMODE &&
!(lockres->l_flags & OCFS2_LOCK_LOCAL) &&
lockres->l_ops->flags & LOCK_TYPE_REQUIRES_REFRESH)
lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
lockres->l_level = lockres->l_requested;
lockres_or_flags(lockres, OCFS2_LOCK_ATTACHED);
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
mlog_exit_void();
}
static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
int level)
{
int needs_downconvert = 0;
mlog_entry_void();
assert_spin_locked(&lockres->l_lock);
lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
if (level > lockres->l_blocking) {
/* only schedule a downconvert if we haven't already scheduled
* one that goes low enough to satisfy the level we're
* blocking. this also catches the case where we get
* duplicate BASTs */
if (ocfs2_highest_compat_lock_level(level) <
ocfs2_highest_compat_lock_level(lockres->l_blocking))
needs_downconvert = 1;
lockres->l_blocking = level;
}
mlog_exit(needs_downconvert);
return needs_downconvert;
}
static void ocfs2_blocking_ast(void *opaque, int level)
{
struct ocfs2_lock_res *lockres = opaque;
struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres);
int needs_downconvert;
unsigned long flags;
BUG_ON(level <= LKM_NLMODE);
mlog(0, "BAST fired for lockres %s, blocking %d, level %d type %s\n",
lockres->l_name, level, lockres->l_level,
ocfs2_lock_type_string(lockres->l_type));
spin_lock_irqsave(&lockres->l_lock, flags);
needs_downconvert = ocfs2_generic_handle_bast(lockres, level);
if (needs_downconvert)
ocfs2_schedule_blocked_lock(osb, lockres);
spin_unlock_irqrestore(&lockres->l_lock, flags);
wake_up(&lockres->l_event);
ocfs2_kick_vote_thread(osb);
}
static void ocfs2_locking_ast(void *opaque)
{
struct ocfs2_lock_res *lockres = opaque;
struct dlm_lockstatus *lksb = &lockres->l_lksb;
unsigned long flags;
spin_lock_irqsave(&lockres->l_lock, flags);
if (lksb->status != DLM_NORMAL) {
mlog(ML_ERROR, "lockres %s: lksb status value of %u!\n",
lockres->l_name, lksb->status);
spin_unlock_irqrestore(&lockres->l_lock, flags);
return;
}
switch(lockres->l_action) {
case OCFS2_AST_ATTACH:
ocfs2_generic_handle_attach_action(lockres);
lockres_clear_flags(lockres, OCFS2_LOCK_LOCAL);
break;
case OCFS2_AST_CONVERT:
ocfs2_generic_handle_convert_action(lockres);
break;
case OCFS2_AST_DOWNCONVERT:
ocfs2_generic_handle_downconvert_action(lockres);
break;
default:
mlog(ML_ERROR, "lockres %s: ast fired with invalid action: %u "
"lockres flags = 0x%lx, unlock action: %u\n",
lockres->l_name, lockres->l_action, lockres->l_flags,
lockres->l_unlock_action);
BUG();
}
/* set it to something invalid so if we get called again we
* can catch it. */
lockres->l_action = OCFS2_AST_INVALID;
wake_up(&lockres->l_event);
spin_unlock_irqrestore(&lockres->l_lock, flags);
}
static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
int convert)
{
unsigned long flags;
mlog_entry_void();
spin_lock_irqsave(&lockres->l_lock, flags);
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
if (convert)
lockres->l_action = OCFS2_AST_INVALID;
else
lockres->l_unlock_action = OCFS2_UNLOCK_INVALID;
spin_unlock_irqrestore(&lockres->l_lock, flags);
wake_up(&lockres->l_event);
mlog_exit_void();
}
/* Note: If we detect another process working on the lock (i.e.,
* OCFS2_LOCK_BUSY), we'll bail out returning 0. It's up to the caller
* to do the right thing in that case.
*/
static int ocfs2_lock_create(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres,
int level,
int dlm_flags)
{
int ret = 0;
enum dlm_status status = DLM_NORMAL;
unsigned long flags;
mlog_entry_void();
mlog(0, "lock %s, level = %d, flags = %d\n", lockres->l_name, level,
dlm_flags);
spin_lock_irqsave(&lockres->l_lock, flags);
if ((lockres->l_flags & OCFS2_LOCK_ATTACHED) ||
(lockres->l_flags & OCFS2_LOCK_BUSY)) {
spin_unlock_irqrestore(&lockres->l_lock, flags);
goto bail;
}
lockres->l_action = OCFS2_AST_ATTACH;
lockres->l_requested = level;
lockres_or_flags(lockres, OCFS2_LOCK_BUSY);
spin_unlock_irqrestore(&lockres->l_lock, flags);
status = dlmlock(osb->dlm,
level,
&lockres->l_lksb,
dlm_flags,
lockres->l_name,
OCFS2_LOCK_ID_MAX_LEN - 1,
ocfs2_locking_ast,
lockres,
ocfs2_blocking_ast);
if (status != DLM_NORMAL) {
ocfs2_log_dlm_error("dlmlock", status, lockres);
ret = -EINVAL;
ocfs2_recover_from_dlm_error(lockres, 1);
}
mlog(0, "lock %s, successfull return from dlmlock\n", lockres->l_name);
bail:
mlog_exit(ret);
return ret;
}
static inline int ocfs2_check_wait_flag(struct ocfs2_lock_res *lockres,
int flag)
{
unsigned long flags;
int ret;
spin_lock_irqsave(&lockres->l_lock, flags);
ret = lockres->l_flags & flag;
spin_unlock_irqrestore(&lockres->l_lock, flags);
return ret;
}
static inline void ocfs2_wait_on_busy_lock(struct ocfs2_lock_res *lockres)
{
wait_event(lockres->l_event,
!ocfs2_check_wait_flag(lockres, OCFS2_LOCK_BUSY));
}
static inline void ocfs2_wait_on_refreshing_lock(struct ocfs2_lock_res *lockres)
{
wait_event(lockres->l_event,
!ocfs2_check_wait_flag(lockres, OCFS2_LOCK_REFRESHING));
}
/* predict what lock level we'll be dropping down to on behalf
* of another node, and return true if the currently wanted
* level will be compatible with it. */
static inline int ocfs2_may_continue_on_blocked_lock(struct ocfs2_lock_res *lockres,
int wanted)
{
BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
return wanted <= ocfs2_highest_compat_lock_level(lockres->l_blocking);
}
static void ocfs2_init_mask_waiter(struct ocfs2_mask_waiter *mw)
{
INIT_LIST_HEAD(&mw->mw_item);
init_completion(&mw->mw_complete);
}
static int ocfs2_wait_for_mask(struct ocfs2_mask_waiter *mw)
{
wait_for_completion(&mw->mw_complete);
/* Re-arm the completion in case we want to wait on it again */
INIT_COMPLETION(mw->mw_complete);
return mw->mw_status;
}
static void lockres_add_mask_waiter(struct ocfs2_lock_res *lockres,
struct ocfs2_mask_waiter *mw,
unsigned long mask,
unsigned long goal)
{
BUG_ON(!list_empty(&mw->mw_item));
assert_spin_locked(&lockres->l_lock);
list_add_tail(&mw->mw_item, &lockres->l_mask_waiters);
mw->mw_mask = mask;
mw->mw_goal = goal;
}
/* returns 0 if the mw that was removed was already satisfied, -EBUSY
* if the mask still hadn't reached its goal */
static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres,
struct ocfs2_mask_waiter *mw)
{
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&lockres->l_lock, flags);
if (!list_empty(&mw->mw_item)) {
if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
ret = -EBUSY;
list_del_init(&mw->mw_item);
init_completion(&mw->mw_complete);
}
spin_unlock_irqrestore(&lockres->l_lock, flags);
return ret;
}
static int ocfs2_cluster_lock(struct ocfs2_super *osb,
struct ocfs2_lock_res *lockres,
int level,
int lkm_flags,
int arg_flags)
{
struct ocfs2_mask_waiter mw;
enum dlm_status status;
int wait, catch_signals = !(osb->s_mount_opt & OCFS2_MOUNT_NOINTR);
int ret = 0; /* gcc doesn't realize wait = 1 guarantees ret is set */
unsigned long flags;
mlog_entry_void();
ocfs2_init_mask_waiter(&mw);
if (lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
lkm_flags |= LKM_VALBLK;
again:
wait = 0;
if (catch_signals && signal_pending(current)) {
ret = -ERESTARTSYS;
goto out;
}
spin_lock_irqsave(&lockres->l_lock, flags);
mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
"Cluster lock called on freeing lockres %s! flags "
"0x%lx\n", lockres->l_name, lockres->l_flags);
/* We only compare against the currently granted level
* here. If the lock is blocked waiting on a downconvert,
* we'll get caught below. */
if (lockres->l_flags & OCFS2_LOCK_BUSY &&
level > lockres->l_level) {
/* is someone sitting in dlm_lock? If so, wait on
* them. */
lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0);
wait = 1;
goto unlock;
}
if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
!ocfs2_may_continue_on_blocked_lock(lockres, level)) {
/* is the lock is currently blocked on behalf of
* another node */
lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BLOCKED, 0);
wait = 1;
goto unlock;
}
if (level > lockres->l_level) {
if (lockres->l_action != OCFS2_AST_INVALID)
mlog(ML_ERROR, "lockres %s has action %u pending\n",
lockres->l_name, lockres->l_action);
if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) {
lockres->l_action = OCFS2_AST_ATTACH;
lkm_flags &= ~LKM_CONVERT;
} else {