forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dm.c
2869 lines (2346 loc) · 63.6 KB
/
dm.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (C) 2001, 2002 Sistina Software (UK) Limited.
* Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
#include "dm.h"
#include "dm-uevent.h"
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/moduleparam.h>
#include <linux/blkpg.h>
#include <linux/bio.h>
#include <linux/mempool.h>
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/hdreg.h>
#include <linux/delay.h>
#include <trace/events/block.h>
#define DM_MSG_PREFIX "core"
#ifdef CONFIG_PRINTK
/*
* ratelimit state to be used in DMXXX_LIMIT().
*/
DEFINE_RATELIMIT_STATE(dm_ratelimit_state,
DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
EXPORT_SYMBOL(dm_ratelimit_state);
#endif
/*
* Cookies are numeric values sent with CHANGE and REMOVE
* uevents while resuming, removing or renaming the device.
*/
#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
#define DM_COOKIE_LENGTH 24
static const char *_name = DM_NAME;
static unsigned int major = 0;
static unsigned int _major = 0;
static DEFINE_IDR(_minor_idr);
static DEFINE_SPINLOCK(_minor_lock);
/*
* For bio-based dm.
* One of these is allocated per bio.
*/
struct dm_io {
struct mapped_device *md;
int error;
atomic_t io_count;
struct bio *bio;
unsigned long start_time;
spinlock_t endio_lock;
};
/*
* For request-based dm.
* One of these is allocated per request.
*/
struct dm_rq_target_io {
struct mapped_device *md;
struct dm_target *ti;
struct request *orig, clone;
int error;
union map_info info;
};
/*
* For request-based dm - the bio clones we allocate are embedded in these
* structs.
*
* We allocate these with bio_alloc_bioset, using the front_pad parameter when
* the bioset is created - this means the bio has to come at the end of the
* struct.
*/
struct dm_rq_clone_bio_info {
struct bio *orig;
struct dm_rq_target_io *tio;
struct bio clone;
};
union map_info *dm_get_mapinfo(struct bio *bio)
{
if (bio && bio->bi_private)
return &((struct dm_target_io *)bio->bi_private)->info;
return NULL;
}
union map_info *dm_get_rq_mapinfo(struct request *rq)
{
if (rq && rq->end_io_data)
return &((struct dm_rq_target_io *)rq->end_io_data)->info;
return NULL;
}
EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
#define MINOR_ALLOCED ((void *)-1)
/*
* Bits for the md->flags field.
*/
#define DMF_BLOCK_IO_FOR_SUSPEND 0
#define DMF_SUSPENDED 1
#define DMF_FROZEN 2
#define DMF_FREEING 3
#define DMF_DELETING 4
#define DMF_NOFLUSH_SUSPENDING 5
#define DMF_MERGE_IS_OPTIONAL 6
/*
* A dummy definition to make RCU happy.
* struct dm_table should never be dereferenced in this file.
*/
struct dm_table {
int undefined__;
};
/*
* Work processed by per-device workqueue.
*/
struct mapped_device {
struct srcu_struct io_barrier;
struct mutex suspend_lock;
atomic_t holders;
atomic_t open_count;
/*
* The current mapping.
* Use dm_get_live_table{_fast} or take suspend_lock for
* dereference.
*/
struct dm_table *map;
unsigned long flags;
struct request_queue *queue;
unsigned type;
/* Protect queue and type against concurrent access. */
struct mutex type_lock;
struct target_type *immutable_target_type;
struct gendisk *disk;
char name[16];
void *interface_ptr;
/*
* A list of ios that arrived while we were suspended.
*/
atomic_t pending[2];
wait_queue_head_t wait;
struct work_struct work;
struct bio_list deferred;
spinlock_t deferred_lock;
/*
* Processing queue (flush)
*/
struct workqueue_struct *wq;
/*
* io objects are allocated from here.
*/
mempool_t *io_pool;
struct bio_set *bs;
/*
* Event handling.
*/
atomic_t event_nr;
wait_queue_head_t eventq;
atomic_t uevent_seq;
struct list_head uevent_list;
spinlock_t uevent_lock; /* Protect access to uevent_list */
/*
* freeze/thaw support require holding onto a super block
*/
struct super_block *frozen_sb;
struct block_device *bdev;
/* forced geometry settings */
struct hd_geometry geometry;
/* sysfs handle */
struct kobject kobj;
/* zero-length flush that will be cloned and submitted to targets */
struct bio flush_bio;
};
/*
* For mempools pre-allocation at the table loading time.
*/
struct dm_md_mempools {
mempool_t *io_pool;
struct bio_set *bs;
};
#define MIN_IOS 256
static struct kmem_cache *_io_cache;
static struct kmem_cache *_rq_tio_cache;
static int __init local_init(void)
{
int r = -ENOMEM;
/* allocate a slab for the dm_ios */
_io_cache = KMEM_CACHE(dm_io, 0);
if (!_io_cache)
return r;
_rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
if (!_rq_tio_cache)
goto out_free_io_cache;
r = dm_uevent_init();
if (r)
goto out_free_rq_tio_cache;
_major = major;
r = register_blkdev(_major, _name);
if (r < 0)
goto out_uevent_exit;
if (!_major)
_major = r;
return 0;
out_uevent_exit:
dm_uevent_exit();
out_free_rq_tio_cache:
kmem_cache_destroy(_rq_tio_cache);
out_free_io_cache:
kmem_cache_destroy(_io_cache);
return r;
}
static void local_exit(void)
{
kmem_cache_destroy(_rq_tio_cache);
kmem_cache_destroy(_io_cache);
unregister_blkdev(_major, _name);
dm_uevent_exit();
_major = 0;
DMINFO("cleaned up");
}
static int (*_inits[])(void) __initdata = {
local_init,
dm_target_init,
dm_linear_init,
dm_stripe_init,
dm_io_init,
dm_kcopyd_init,
dm_interface_init,
};
static void (*_exits[])(void) = {
local_exit,
dm_target_exit,
dm_linear_exit,
dm_stripe_exit,
dm_io_exit,
dm_kcopyd_exit,
dm_interface_exit,
};
static int __init dm_init(void)
{
const int count = ARRAY_SIZE(_inits);
int r, i;
for (i = 0; i < count; i++) {
r = _inits[i]();
if (r)
goto bad;
}
return 0;
bad:
while (i--)
_exits[i]();
return r;
}
static void __exit dm_exit(void)
{
int i = ARRAY_SIZE(_exits);
while (i--)
_exits[i]();
/*
* Should be empty by this point.
*/
idr_destroy(&_minor_idr);
}
/*
* Block device functions
*/
int dm_deleting_md(struct mapped_device *md)
{
return test_bit(DMF_DELETING, &md->flags);
}
static int dm_blk_open(struct block_device *bdev, fmode_t mode)
{
struct mapped_device *md;
spin_lock(&_minor_lock);
md = bdev->bd_disk->private_data;
if (!md)
goto out;
if (test_bit(DMF_FREEING, &md->flags) ||
dm_deleting_md(md)) {
md = NULL;
goto out;
}
dm_get(md);
atomic_inc(&md->open_count);
out:
spin_unlock(&_minor_lock);
return md ? 0 : -ENXIO;
}
static void dm_blk_close(struct gendisk *disk, fmode_t mode)
{
struct mapped_device *md = disk->private_data;
spin_lock(&_minor_lock);
atomic_dec(&md->open_count);
dm_put(md);
spin_unlock(&_minor_lock);
}
int dm_open_count(struct mapped_device *md)
{
return atomic_read(&md->open_count);
}
/*
* Guarantees nothing is using the device before it's deleted.
*/
int dm_lock_for_deletion(struct mapped_device *md)
{
int r = 0;
spin_lock(&_minor_lock);
if (dm_open_count(md))
r = -EBUSY;
else
set_bit(DMF_DELETING, &md->flags);
spin_unlock(&_minor_lock);
return r;
}
static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
struct mapped_device *md = bdev->bd_disk->private_data;
return dm_get_geometry(md, geo);
}
static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct mapped_device *md = bdev->bd_disk->private_data;
int srcu_idx;
struct dm_table *map;
struct dm_target *tgt;
int r = -ENOTTY;
retry:
map = dm_get_live_table(md, &srcu_idx);
if (!map || !dm_table_get_size(map))
goto out;
/* We only support devices that have a single target */
if (dm_table_get_num_targets(map) != 1)
goto out;
tgt = dm_table_get_target(map, 0);
if (dm_suspended_md(md)) {
r = -EAGAIN;
goto out;
}
if (tgt->type->ioctl)
r = tgt->type->ioctl(tgt, cmd, arg);
out:
dm_put_live_table(md, srcu_idx);
if (r == -ENOTCONN) {
msleep(10);
goto retry;
}
return r;
}
static struct dm_io *alloc_io(struct mapped_device *md)
{
return mempool_alloc(md->io_pool, GFP_NOIO);
}
static void free_io(struct mapped_device *md, struct dm_io *io)
{
mempool_free(io, md->io_pool);
}
static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
{
bio_put(&tio->clone);
}
static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
gfp_t gfp_mask)
{
return mempool_alloc(md->io_pool, gfp_mask);
}
static void free_rq_tio(struct dm_rq_target_io *tio)
{
mempool_free(tio, tio->md->io_pool);
}
static int md_in_flight(struct mapped_device *md)
{
return atomic_read(&md->pending[READ]) +
atomic_read(&md->pending[WRITE]);
}
static void start_io_acct(struct dm_io *io)
{
struct mapped_device *md = io->md;
int cpu;
int rw = bio_data_dir(io->bio);
io->start_time = jiffies;
cpu = part_stat_lock();
part_round_stats(cpu, &dm_disk(md)->part0);
part_stat_unlock();
atomic_set(&dm_disk(md)->part0.in_flight[rw],
atomic_inc_return(&md->pending[rw]));
}
static void end_io_acct(struct dm_io *io)
{
struct mapped_device *md = io->md;
struct bio *bio = io->bio;
unsigned long duration = jiffies - io->start_time;
int pending, cpu;
int rw = bio_data_dir(bio);
cpu = part_stat_lock();
part_round_stats(cpu, &dm_disk(md)->part0);
part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration);
part_stat_unlock();
/*
* After this is decremented the bio must not be touched if it is
* a flush.
*/
pending = atomic_dec_return(&md->pending[rw]);
atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
pending += atomic_read(&md->pending[rw^0x1]);
/* nudge anyone waiting on suspend queue */
if (!pending)
wake_up(&md->wait);
}
/*
* Add the bio to the list of deferred io.
*/
static void queue_io(struct mapped_device *md, struct bio *bio)
{
unsigned long flags;
spin_lock_irqsave(&md->deferred_lock, flags);
bio_list_add(&md->deferred, bio);
spin_unlock_irqrestore(&md->deferred_lock, flags);
queue_work(md->wq, &md->work);
}
/*
* Everyone (including functions in this file), should use this
* function to access the md->map field, and make sure they call
* dm_put_live_table() when finished.
*/
struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier)
{
*srcu_idx = srcu_read_lock(&md->io_barrier);
return srcu_dereference(md->map, &md->io_barrier);
}
void dm_put_live_table(struct mapped_device *md, int srcu_idx) __releases(md->io_barrier)
{
srcu_read_unlock(&md->io_barrier, srcu_idx);
}
void dm_sync_table(struct mapped_device *md)
{
synchronize_srcu(&md->io_barrier);
synchronize_rcu_expedited();
}
/*
* A fast alternative to dm_get_live_table/dm_put_live_table.
* The caller must not block between these two functions.
*/
static struct dm_table *dm_get_live_table_fast(struct mapped_device *md) __acquires(RCU)
{
rcu_read_lock();
return rcu_dereference(md->map);
}
static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
{
rcu_read_unlock();
}
/*
* Get the geometry associated with a dm device
*/
int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo)
{
*geo = md->geometry;
return 0;
}
/*
* Set the geometry of a device.
*/
int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo)
{
sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors;
if (geo->start > sz) {
DMWARN("Start sector is beyond the geometry limits.");
return -EINVAL;
}
md->geometry = *geo;
return 0;
}
/*-----------------------------------------------------------------
* CRUD START:
* A more elegant soln is in the works that uses the queue
* merge fn, unfortunately there are a couple of changes to
* the block layer that I want to make for this. So in the
* interests of getting something for people to use I give
* you this clearly demarcated crap.
*---------------------------------------------------------------*/
static int __noflush_suspending(struct mapped_device *md)
{
return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
}
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
*/
static void dec_pending(struct dm_io *io, int error)
{
unsigned long flags;
int io_error;
struct bio *bio;
struct mapped_device *md = io->md;
/* Push-back supersedes any I/O errors */
if (unlikely(error)) {
spin_lock_irqsave(&io->endio_lock, flags);
if (!(io->error > 0 && __noflush_suspending(md)))
io->error = error;
spin_unlock_irqrestore(&io->endio_lock, flags);
}
if (atomic_dec_and_test(&io->io_count)) {
if (io->error == DM_ENDIO_REQUEUE) {
/*
* Target requested pushing back the I/O.
*/
spin_lock_irqsave(&md->deferred_lock, flags);
if (__noflush_suspending(md))
bio_list_add_head(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
io->error = -EIO;
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
io_error = io->error;
bio = io->bio;
end_io_acct(io);
free_io(md, io);
if (io_error == DM_ENDIO_REQUEUE)
return;
if ((bio->bi_rw & REQ_FLUSH) && bio->bi_size) {
/*
* Preflush done for flush with data, reissue
* without REQ_FLUSH.
*/
bio->bi_rw &= ~REQ_FLUSH;
queue_io(md, bio);
} else {
/* done with normal IO or empty flush */
trace_block_bio_complete(md->queue, bio, io_error);
bio_endio(bio, io_error);
}
}
}
static void clone_endio(struct bio *bio, int error)
{
int r = 0;
struct dm_target_io *tio = bio->bi_private;
struct dm_io *io = tio->io;
struct mapped_device *md = tio->io->md;
dm_endio_fn endio = tio->ti->type->end_io;
if (!bio_flagged(bio, BIO_UPTODATE) && !error)
error = -EIO;
if (endio) {
r = endio(tio->ti, bio, error);
if (r < 0 || r == DM_ENDIO_REQUEUE)
/*
* error and requeue request are handled
* in dec_pending().
*/
error = r;
else if (r == DM_ENDIO_INCOMPLETE)
/* The target will handle the io */
return;
else if (r) {
DMWARN("unimplemented target endio return value: %d", r);
BUG();
}
}
free_tio(md, tio);
dec_pending(io, error);
}
/*
* Partial completion handling for request-based dm
*/
static void end_clone_bio(struct bio *clone, int error)
{
struct dm_rq_clone_bio_info *info = clone->bi_private;
struct dm_rq_target_io *tio = info->tio;
struct bio *bio = info->orig;
unsigned int nr_bytes = info->orig->bi_size;
bio_put(clone);
if (tio->error)
/*
* An error has already been detected on the request.
* Once error occurred, just let clone->end_io() handle
* the remainder.
*/
return;
else if (error) {
/*
* Don't notice the error to the upper layer yet.
* The error handling decision is made by the target driver,
* when the request is completed.
*/
tio->error = error;
return;
}
/*
* I/O for the bio successfully completed.
* Notice the data completion to the upper layer.
*/
/*
* bios are processed from the head of the list.
* So the completing bio should always be rq->bio.
* If it's not, something wrong is happening.
*/
if (tio->orig->bio != bio)
DMERR("bio completion is going in the middle of the request");
/*
* Update the original request.
* Do not use blk_end_request() here, because it may complete
* the original request before the clone, and break the ordering.
*/
blk_update_request(tio->orig, 0, nr_bytes);
}
/*
* Don't touch any member of the md after calling this function because
* the md may be freed in dm_put() at the end of this function.
* Or do dm_get() before calling this function and dm_put() later.
*/
static void rq_completed(struct mapped_device *md, int rw, int run_queue)
{
atomic_dec(&md->pending[rw]);
/* nudge anyone waiting on suspend queue */
if (!md_in_flight(md))
wake_up(&md->wait);
/*
* Run this off this callpath, as drivers could invoke end_io while
* inside their request_fn (and holding the queue lock). Calling
* back into ->request_fn() could deadlock attempting to grab the
* queue lock again.
*/
if (run_queue)
blk_run_queue_async(md->queue);
/*
* dm_put() must be at the end of this function. See the comment above
*/
dm_put(md);
}
static void free_rq_clone(struct request *clone)
{
struct dm_rq_target_io *tio = clone->end_io_data;
blk_rq_unprep_clone(clone);
free_rq_tio(tio);
}
/*
* Complete the clone and the original request.
* Must be called without queue lock.
*/
static void dm_end_request(struct request *clone, int error)
{
int rw = rq_data_dir(clone);
struct dm_rq_target_io *tio = clone->end_io_data;
struct mapped_device *md = tio->md;
struct request *rq = tio->orig;
if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
rq->errors = clone->errors;
rq->resid_len = clone->resid_len;
if (rq->sense)
/*
* We are using the sense buffer of the original
* request.
* So setting the length of the sense data is enough.
*/
rq->sense_len = clone->sense_len;
}
free_rq_clone(clone);
blk_end_request_all(rq, error);
rq_completed(md, rw, true);
}
static void dm_unprep_request(struct request *rq)
{
struct request *clone = rq->special;
rq->special = NULL;
rq->cmd_flags &= ~REQ_DONTPREP;
free_rq_clone(clone);
}
/*
* Requeue the original request of a clone.
*/
void dm_requeue_unmapped_request(struct request *clone)
{
int rw = rq_data_dir(clone);
struct dm_rq_target_io *tio = clone->end_io_data;
struct mapped_device *md = tio->md;
struct request *rq = tio->orig;
struct request_queue *q = rq->q;
unsigned long flags;
dm_unprep_request(rq);
spin_lock_irqsave(q->queue_lock, flags);
blk_requeue_request(q, rq);
spin_unlock_irqrestore(q->queue_lock, flags);
rq_completed(md, rw, 0);
}
EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request);
static void __stop_queue(struct request_queue *q)
{
blk_stop_queue(q);
}
static void stop_queue(struct request_queue *q)
{
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
__stop_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
static void __start_queue(struct request_queue *q)
{
if (blk_queue_stopped(q))
blk_start_queue(q);
}
static void start_queue(struct request_queue *q)
{
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
__start_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
static void dm_done(struct request *clone, int error, bool mapped)
{
int r = error;
struct dm_rq_target_io *tio = clone->end_io_data;
dm_request_endio_fn rq_end_io = NULL;
if (tio->ti) {
rq_end_io = tio->ti->type->rq_end_io;
if (mapped && rq_end_io)
r = rq_end_io(tio->ti, clone, error, &tio->info);
}
if (r <= 0)
/* The target wants to complete the I/O */
dm_end_request(clone, r);
else if (r == DM_ENDIO_INCOMPLETE)
/* The target will handle the I/O */
return;
else if (r == DM_ENDIO_REQUEUE)
/* The target wants to requeue the I/O */
dm_requeue_unmapped_request(clone);
else {
DMWARN("unimplemented target endio return value: %d", r);
BUG();
}
}
/*
* Request completion handler for request-based dm
*/
static void dm_softirq_done(struct request *rq)
{
bool mapped = true;
struct request *clone = rq->completion_data;
struct dm_rq_target_io *tio = clone->end_io_data;
if (rq->cmd_flags & REQ_FAILED)
mapped = false;
dm_done(clone, tio->error, mapped);
}
/*
* Complete the clone and the original request with the error status
* through softirq context.
*/
static void dm_complete_request(struct request *clone, int error)
{
struct dm_rq_target_io *tio = clone->end_io_data;
struct request *rq = tio->orig;
tio->error = error;
rq->completion_data = clone;
blk_complete_request(rq);
}
/*
* Complete the not-mapped clone and the original request with the error status
* through softirq context.
* Target's rq_end_io() function isn't called.
* This may be used when the target's map_rq() function fails.
*/
void dm_kill_unmapped_request(struct request *clone, int error)
{
struct dm_rq_target_io *tio = clone->end_io_data;
struct request *rq = tio->orig;
rq->cmd_flags |= REQ_FAILED;
dm_complete_request(clone, error);
}
EXPORT_SYMBOL_GPL(dm_kill_unmapped_request);
/*
* Called with the queue lock held
*/
static void end_clone_request(struct request *clone, int error)
{
/*
* For just cleaning up the information of the queue in which
* the clone was dispatched.
* The clone is *NOT* freed actually here because it is alloced from
* dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags.
*/
__blk_put_request(clone->q, clone);
/*
* Actual request completion is done in a softirq context which doesn't
* hold the queue lock. Otherwise, deadlock could occur because:
* - another request may be submitted by the upper level driver
* of the stacking during the completion
* - the submission which requires queue lock may be done
* against this queue
*/
dm_complete_request(clone, error);
}
/*
* Return maximum size of I/O possible at the supplied sector up to the current
* target boundary.
*/
static sector_t max_io_len_target_boundary(sector_t sector, struct dm_target *ti)
{
sector_t target_offset = dm_target_offset(ti, sector);
return ti->len - target_offset;
}
static sector_t max_io_len(sector_t sector, struct dm_target *ti)
{
sector_t len = max_io_len_target_boundary(sector, ti);
sector_t offset, max_len;
/*
* Does the target need to split even further?
*/
if (ti->max_io_len) {
offset = dm_target_offset(ti, sector);
if (unlikely(ti->max_io_len & (ti->max_io_len - 1)))
max_len = sector_div(offset, ti->max_io_len);
else
max_len = offset & (ti->max_io_len - 1);
max_len = ti->max_io_len - max_len;
if (len > max_len)
len = max_len;
}
return len;
}
int dm_set_target_max_io_len(struct dm_target *ti, sector_t len)
{
if (len > UINT_MAX) {
DMERR("Specified maximum size of target IO (%llu) exceeds limit (%u)",
(unsigned long long)len, UINT_MAX);
ti->error = "Maximum size of target IO is too large";
return -EINVAL;