forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
socket.c
2454 lines (2096 loc) · 58 KB
/
socket.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* NET An implementation of the SOCKET network access protocol.
*
* Version: @(#)socket.c 1.1.93 18/02/95
*
* Authors: Orest Zborowski, <[email protected]>
* Ross Biro
* Fred N. van Kempen, <[email protected]>
*
* Fixes:
* Anonymous : NOTSOCK/BADF cleanup. Error fix in
* shutdown()
* Alan Cox : verify_area() fixes
* Alan Cox : Removed DDI
* Jonathan Kamens : SOCK_DGRAM reconnect bug
* Alan Cox : Moved a load of checks to the very
* top level.
* Alan Cox : Move address structures to/from user
* mode above the protocol layers.
* Rob Janssen : Allow 0 length sends.
* Alan Cox : Asynchronous I/O support (cribbed from the
* tty drivers).
* Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
* Jeff Uphoff : Made max number of sockets command-line
* configurable.
* Matti Aarnio : Made the number of sockets dynamic,
* to be allocated when needed, and mr.
* Uphoff's max is used as max to be
* allowed to allocate.
* Linus : Argh. removed all the socket allocation
* altogether: it's in the inode now.
* Alan Cox : Made sock_alloc()/sock_release() public
* for NetROM and future kernel nfsd type
* stuff.
* Alan Cox : sendmsg/recvmsg basics.
* Tom Dyas : Export net symbols.
* Marcin Dalecki : Fixed problems with CONFIG_NET="n".
* Alan Cox : Added thread locking to sys_* calls
* for sockets. May have errors at the
* moment.
* Kevin Buhr : Fixed the dumb errors in the above.
* Andi Kleen : Some small cleanups, optimizations,
* and fixed a copy_from_user() bug.
* Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
* Tigran Aivazian : Made listen(2) backlog sanity checks
* protocol-independent
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
*
* This module is effectively the top level interface to the BSD socket
* paradigm.
*
* Based upon Swansea University Computer Society NET3.039
*/
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/file.h>
#include <linux/net.h>
#include <linux/interrupt.h>
#include <linux/thread_info.h>
#include <linux/rcupdate.h>
#include <linux/netdevice.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
#include <linux/wanrouter.h>
#include <linux/if_bridge.h>
#include <linux/if_frad.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/cache.h>
#include <linux/module.h>
#include <linux/highmem.h>
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <linux/kmod.h>
#include <linux/audit.h>
#include <linux/wireless.h>
#include <linux/nsproxy.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <net/compat.h>
#include <net/wext.h>
#include <net/sock.h>
#include <linux/netfilter.h>
static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file);
static unsigned int sock_poll(struct file *file,
struct poll_table_struct *wait);
static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
static long compat_sock_ioctl(struct file *file,
unsigned int cmd, unsigned long arg);
#endif
static int sock_fasync(int fd, struct file *filp, int on);
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more);
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
/*
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
* in the operation structures but are done directly via the socketcall() multiplexor.
*/
static const struct file_operations socket_file_ops = {
.owner = THIS_MODULE,
.llseek = no_llseek,
.aio_read = sock_aio_read,
.aio_write = sock_aio_write,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_sock_ioctl,
#endif
.mmap = sock_mmap,
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
.fasync = sock_fasync,
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
.splice_read = sock_splice_read,
};
/*
* The protocol list. Each protocol is registered in here.
*/
static DEFINE_SPINLOCK(net_family_lock);
static const struct net_proto_family *net_families[NPROTO] __read_mostly;
/*
* Statistics counters of the socket lists
*/
static DEFINE_PER_CPU(int, sockets_in_use) = 0;
/*
* Support routines.
* Move socket addresses back and forth across the kernel/user
* divide and look after the messy bits.
*/
#define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
16 for IP, 16 for IPX,
24 for IPv6,
about 80 for AX.25
must be at least one bigger than
the AF_UNIX size (see net/unix/af_unix.c
:unix_mkname()).
*/
/**
* move_addr_to_kernel - copy a socket address into kernel space
* @uaddr: Address in user space
* @kaddr: Address in kernel space
* @ulen: Length in user space
*
* The address is copied into kernel space. If the provided address is
* too long an error code of -EINVAL is returned. If the copy gives
* invalid addresses -EFAULT is returned. On a success 0 is returned.
*/
int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
{
if (ulen < 0 || ulen > sizeof(struct sockaddr_storage))
return -EINVAL;
if (ulen == 0)
return 0;
if (copy_from_user(kaddr, uaddr, ulen))
return -EFAULT;
return audit_sockaddr(ulen, kaddr);
}
/**
* move_addr_to_user - copy an address to user space
* @kaddr: kernel space address
* @klen: length of address in kernel
* @uaddr: user space address
* @ulen: pointer to user length field
*
* The value pointed to by ulen on entry is the buffer length available.
* This is overwritten with the buffer space used. -EINVAL is returned
* if an overlong buffer is specified or a negative buffer size. -EFAULT
* is returned if either the buffer or the length field are not
* accessible.
* After copying the data up to the limit the user specifies, the true
* length of the data is written over the length limit the user
* specified. Zero is returned for a success.
*/
int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
int __user *ulen)
{
int err;
int len;
err = get_user(len, ulen);
if (err)
return err;
if (len > klen)
len = klen;
if (len < 0 || len > sizeof(struct sockaddr_storage))
return -EINVAL;
if (len) {
if (audit_sockaddr(klen, kaddr))
return -ENOMEM;
if (copy_to_user(uaddr, kaddr, len))
return -EFAULT;
}
/*
* "fromlen shall refer to the value before truncation.."
* 1003.1g
*/
return __put_user(klen, ulen);
}
#define SOCKFS_MAGIC 0x534F434B
static struct kmem_cache *sock_inode_cachep __read_mostly;
static struct inode *sock_alloc_inode(struct super_block *sb)
{
struct socket_alloc *ei;
ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
if (!ei)
return NULL;
init_waitqueue_head(&ei->socket.wait);
ei->socket.fasync_list = NULL;
ei->socket.state = SS_UNCONNECTED;
ei->socket.flags = 0;
ei->socket.ops = NULL;
ei->socket.sk = NULL;
ei->socket.file = NULL;
return &ei->vfs_inode;
}
static void sock_destroy_inode(struct inode *inode)
{
kmem_cache_free(sock_inode_cachep,
container_of(inode, struct socket_alloc, vfs_inode));
}
static void init_once(void *foo)
{
struct socket_alloc *ei = (struct socket_alloc *)foo;
inode_init_once(&ei->vfs_inode);
}
static int init_inodecache(void)
{
sock_inode_cachep = kmem_cache_create("sock_inode_cache",
sizeof(struct socket_alloc),
0,
(SLAB_HWCACHE_ALIGN |
SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD),
init_once);
if (sock_inode_cachep == NULL)
return -ENOMEM;
return 0;
}
static struct super_operations sockfs_ops = {
.alloc_inode = sock_alloc_inode,
.destroy_inode =sock_destroy_inode,
.statfs = simple_statfs,
};
static int sockfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
struct vfsmount *mnt)
{
return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC,
mnt);
}
static struct vfsmount *sock_mnt __read_mostly;
static struct file_system_type sock_fs_type = {
.name = "sockfs",
.get_sb = sockfs_get_sb,
.kill_sb = kill_anon_super,
};
static int sockfs_delete_dentry(struct dentry *dentry)
{
/*
* At creation time, we pretended this dentry was hashed
* (by clearing DCACHE_UNHASHED bit in d_flags)
* At delete time, we restore the truth : not hashed.
* (so that dput() can proceed correctly)
*/
dentry->d_flags |= DCACHE_UNHASHED;
return 0;
}
/*
* sockfs_dname() is called from d_path().
*/
static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
{
return dynamic_dname(dentry, buffer, buflen, "socket:[%lu]",
dentry->d_inode->i_ino);
}
static const struct dentry_operations sockfs_dentry_operations = {
.d_delete = sockfs_delete_dentry,
.d_dname = sockfs_dname,
};
/*
* Obtains the first available file descriptor and sets it up for use.
*
* These functions create file structures and maps them to fd space
* of the current process. On success it returns file descriptor
* and file struct implicitly stored in sock->file.
* Note that another thread may close file descriptor before we return
* from this function. We use the fact that now we do not refer
* to socket after mapping. If one day we will need it, this
* function will increment ref. count on file by 1.
*
* In any case returned fd MAY BE not valid!
* This race condition is unavoidable
* with shared fd spaces, we cannot solve it inside kernel,
* but we take care of internal coherence yet.
*/
static int sock_alloc_fd(struct file **filep, int flags)
{
int fd;
fd = get_unused_fd_flags(flags);
if (likely(fd >= 0)) {
struct file *file = get_empty_filp();
*filep = file;
if (unlikely(!file)) {
put_unused_fd(fd);
return -ENFILE;
}
} else
*filep = NULL;
return fd;
}
static int sock_attach_fd(struct socket *sock, struct file *file, int flags)
{
struct dentry *dentry;
struct qstr name = { .name = "" };
dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name);
if (unlikely(!dentry))
return -ENOMEM;
dentry->d_op = &sockfs_dentry_operations;
/*
* We dont want to push this dentry into global dentry hash table.
* We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
* This permits a working /proc/$pid/fd/XXX on sockets
*/
dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(dentry, SOCK_INODE(sock));
sock->file = file;
init_file(file, sock_mnt, dentry, FMODE_READ | FMODE_WRITE,
&socket_file_ops);
SOCK_INODE(sock)->i_fop = &socket_file_ops;
file->f_flags = O_RDWR | (flags & O_NONBLOCK);
file->f_pos = 0;
file->private_data = sock;
return 0;
}
int sock_map_fd(struct socket *sock, int flags)
{
struct file *newfile;
int fd = sock_alloc_fd(&newfile, flags);
if (likely(fd >= 0)) {
int err = sock_attach_fd(sock, newfile, flags);
if (unlikely(err < 0)) {
put_filp(newfile);
put_unused_fd(fd);
return err;
}
fd_install(fd, newfile);
}
return fd;
}
static struct socket *sock_from_file(struct file *file, int *err)
{
if (file->f_op == &socket_file_ops)
return file->private_data; /* set in sock_map_fd */
*err = -ENOTSOCK;
return NULL;
}
/**
* sockfd_lookup - Go from a file number to its socket slot
* @fd: file handle
* @err: pointer to an error code return
*
* The file handle passed in is locked and the socket it is bound
* too is returned. If an error occurs the err pointer is overwritten
* with a negative errno code and NULL is returned. The function checks
* for both invalid handles and passing a handle which is not a socket.
*
* On a success the socket object pointer is returned.
*/
struct socket *sockfd_lookup(int fd, int *err)
{
struct file *file;
struct socket *sock;
file = fget(fd);
if (!file) {
*err = -EBADF;
return NULL;
}
sock = sock_from_file(file, err);
if (!sock)
fput(file);
return sock;
}
static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
{
struct file *file;
struct socket *sock;
*err = -EBADF;
file = fget_light(fd, fput_needed);
if (file) {
sock = sock_from_file(file, err);
if (sock)
return sock;
fput_light(file, *fput_needed);
}
return NULL;
}
/**
* sock_alloc - allocate a socket
*
* Allocate a new inode and socket object. The two are bound together
* and initialised. The socket is then returned. If we are out of inodes
* NULL is returned.
*/
static struct socket *sock_alloc(void)
{
struct inode *inode;
struct socket *sock;
inode = new_inode(sock_mnt->mnt_sb);
if (!inode)
return NULL;
sock = SOCKET_I(inode);
kmemcheck_annotate_bitfield(sock, type);
inode->i_mode = S_IFSOCK | S_IRWXUGO;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
percpu_add(sockets_in_use, 1);
return sock;
}
/*
* In theory you can't get an open on this inode, but /proc provides
* a back door. Remember to keep it shut otherwise you'll let the
* creepy crawlies in.
*/
static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
{
return -ENXIO;
}
const struct file_operations bad_sock_fops = {
.owner = THIS_MODULE,
.open = sock_no_open,
};
/**
* sock_release - close a socket
* @sock: socket to close
*
* The socket is released from the protocol stack if it has a release
* callback, and the inode is then released if the socket is bound to
* an inode not a file.
*/
void sock_release(struct socket *sock)
{
if (sock->ops) {
struct module *owner = sock->ops->owner;
sock->ops->release(sock);
sock->ops = NULL;
module_put(owner);
}
if (sock->fasync_list)
printk(KERN_ERR "sock_release: fasync list not empty!\n");
percpu_sub(sockets_in_use, 1);
if (!sock->file) {
iput(SOCK_INODE(sock));
return;
}
sock->file = NULL;
}
int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
union skb_shared_tx *shtx)
{
shtx->flags = 0;
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
shtx->hardware = 1;
if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
shtx->software = 1;
return 0;
}
EXPORT_SYMBOL(sock_tx_timestamp);
static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size)
{
struct sock_iocb *si = kiocb_to_siocb(iocb);
int err;
si->sock = sock;
si->scm = NULL;
si->msg = msg;
si->size = size;
err = security_socket_sendmsg(sock, msg, size);
if (err)
return err;
return sock->ops->sendmsg(iocb, sock, msg, size);
}
int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
ret = __sock_sendmsg(&iocb, sock, msg, size);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
mm_segment_t oldfs = get_fs();
int result;
set_fs(KERNEL_DS);
/*
* the following is safe, since for compiler definitions of kvec and
* iovec are identical, yielding the same in-core layout and alignment
*/
msg->msg_iov = (struct iovec *)vec;
msg->msg_iovlen = num;
result = sock_sendmsg(sock, msg, size);
set_fs(oldfs);
return result;
}
static int ktime2ts(ktime_t kt, struct timespec *ts)
{
if (kt.tv64) {
*ts = ktime_to_timespec(kt);
return 1;
} else {
return 0;
}
}
/*
* called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
*/
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
struct timespec ts[3];
int empty = 1;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
/* Race occurred between timestamp enabling and packet
receiving. Fill in the current time for now. */
if (need_software_tstamp && skb->tstamp.tv64 == 0)
__net_timestamp(skb);
if (need_software_tstamp) {
if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
struct timeval tv;
skb_get_timestamp(skb, &tv);
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
sizeof(tv), &tv);
} else {
struct timespec ts;
skb_get_timestampns(skb, &ts);
put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
sizeof(ts), &ts);
}
}
memset(ts, 0, sizeof(ts));
if (skb->tstamp.tv64 &&
sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
skb_get_timestampns(skb, ts + 0);
empty = 0;
}
if (shhwtstamps) {
if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
ktime2ts(shhwtstamps->syststamp, ts + 1))
empty = 0;
if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
ktime2ts(shhwtstamps->hwtstamp, ts + 2))
empty = 0;
}
if (!empty)
put_cmsg(msg, SOL_SOCKET,
SCM_TIMESTAMPING, sizeof(ts), &ts);
}
EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
int err;
struct sock_iocb *si = kiocb_to_siocb(iocb);
si->sock = sock;
si->scm = NULL;
si->msg = msg;
si->size = size;
si->flags = flags;
err = security_socket_recvmsg(sock, msg, size, flags);
if (err)
return err;
return sock->ops->recvmsg(iocb, sock, msg, size, flags);
}
int sock_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
struct kiocb iocb;
struct sock_iocb siocb;
int ret;
init_sync_kiocb(&iocb, NULL);
iocb.private = &siocb;
ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&iocb);
return ret;
}
int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size, int flags)
{
mm_segment_t oldfs = get_fs();
int result;
set_fs(KERNEL_DS);
/*
* the following is safe, since for compiler definitions of kvec and
* iovec are identical, yielding the same in-core layout and alignment
*/
msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
result = sock_recvmsg(sock, msg, size, flags);
set_fs(oldfs);
return result;
}
static void sock_aio_dtor(struct kiocb *iocb)
{
kfree(iocb->private);
}
static ssize_t sock_sendpage(struct file *file, struct page *page,
int offset, size_t size, loff_t *ppos, int more)
{
struct socket *sock;
int flags;
sock = file->private_data;
flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
if (more)
flags |= MSG_MORE;
return kernel_sendpage(sock, page, offset, size, flags);
}
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
struct socket *sock = file->private_data;
if (unlikely(!sock->ops->splice_read))
return -EINVAL;
return sock->ops->splice_read(sock, ppos, pipe, len, flags);
}
static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
struct sock_iocb *siocb)
{
if (!is_sync_kiocb(iocb)) {
siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
if (!siocb)
return NULL;
iocb->ki_dtor = sock_aio_dtor;
}
siocb->kiocb = iocb;
iocb->private = siocb;
return siocb;
}
static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
struct file *file, const struct iovec *iov,
unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
int i;
for (i = 0; i < nr_segs; i++)
size += iov[i].iov_len;
msg->msg_name = NULL;
msg->msg_namelen = 0;
msg->msg_control = NULL;
msg->msg_controllen = 0;
msg->msg_iov = (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}
static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;
if (pos != 0)
return -ESPIPE;
if (iocb->ki_left == 0) /* Match SYS5 behaviour */
return 0;
x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;
return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}
static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
struct file *file, const struct iovec *iov,
unsigned long nr_segs)
{
struct socket *sock = file->private_data;
size_t size = 0;
int i;
for (i = 0; i < nr_segs; i++)
size += iov[i].iov_len;
msg->msg_name = NULL;
msg->msg_namelen = 0;
msg->msg_control = NULL;
msg->msg_controllen = 0;
msg->msg_iov = (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
if (sock->type == SOCK_SEQPACKET)
msg->msg_flags |= MSG_EOR;
return __sock_sendmsg(iocb, sock, msg, size);
}
static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct sock_iocb siocb, *x;
if (pos != 0)
return -ESPIPE;
x = alloc_sock_iocb(iocb, &siocb);
if (!x)
return -ENOMEM;
return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
}
/*
* Atomic setting of ioctl hooks to avoid race
* with module unload.
*/
static DEFINE_MUTEX(br_ioctl_mutex);
static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
{
mutex_lock(&br_ioctl_mutex);
br_ioctl_hook = hook;
mutex_unlock(&br_ioctl_mutex);
}
EXPORT_SYMBOL(brioctl_set);
static DEFINE_MUTEX(vlan_ioctl_mutex);
static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
{
mutex_lock(&vlan_ioctl_mutex);
vlan_ioctl_hook = hook;
mutex_unlock(&vlan_ioctl_mutex);
}
EXPORT_SYMBOL(vlan_ioctl_set);
static DEFINE_MUTEX(dlci_ioctl_mutex);
static int (*dlci_ioctl_hook) (unsigned int, void __user *);
void dlci_ioctl_set(int (*hook) (unsigned int, void __user *))
{
mutex_lock(&dlci_ioctl_mutex);
dlci_ioctl_hook = hook;
mutex_unlock(&dlci_ioctl_mutex);
}
EXPORT_SYMBOL(dlci_ioctl_set);
/*
* With an ioctl, arg may well be a user mode pointer, but we don't know
* what to do with it - that's up to the protocol still.
*/
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
struct socket *sock;
struct sock *sk;
void __user *argp = (void __user *)arg;
int pid, err;
struct net *net;
sock = file->private_data;
sk = sock->sk;
net = sock_net(sk);
if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
err = dev_ioctl(net, cmd, argp);
} else
#ifdef CONFIG_WIRELESS_EXT
if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
err = dev_ioctl(net, cmd, argp);
} else
#endif /* CONFIG_WIRELESS_EXT */
switch (cmd) {
case FIOSETOWN:
case SIOCSPGRP:
err = -EFAULT;
if (get_user(pid, (int __user *)argp))
break;
err = f_setown(sock->file, pid, 1);
break;
case FIOGETOWN:
case SIOCGPGRP:
err = put_user(f_getown(sock->file),
(int __user *)argp);
break;
case SIOCGIFBR:
case SIOCSIFBR:
case SIOCBRADDBR:
case SIOCBRDELBR:
err = -ENOPKG;
if (!br_ioctl_hook)
request_module("bridge");
mutex_lock(&br_ioctl_mutex);
if (br_ioctl_hook)
err = br_ioctl_hook(net, cmd, argp);
mutex_unlock(&br_ioctl_mutex);
break;
case SIOCGIFVLAN:
case SIOCSIFVLAN:
err = -ENOPKG;
if (!vlan_ioctl_hook)
request_module("8021q");
mutex_lock(&vlan_ioctl_mutex);
if (vlan_ioctl_hook)
err = vlan_ioctl_hook(net, argp);
mutex_unlock(&vlan_ioctl_mutex);
break;
case SIOCADDDLCI:
case SIOCDELDLCI:
err = -ENOPKG;
if (!dlci_ioctl_hook)
request_module("dlci");
mutex_lock(&dlci_ioctl_mutex);
if (dlci_ioctl_hook)
err = dlci_ioctl_hook(cmd, argp);
mutex_unlock(&dlci_ioctl_mutex);
break;
default:
err = sock->ops->ioctl(sock, cmd, arg);
/*
* If this ioctl is unknown try to hand it down
* to the NIC driver.
*/
if (err == -ENOIOCTLCMD)
err = dev_ioctl(net, cmd, argp);
break;
}
return err;
}
int sock_create_lite(int family, int type, int protocol, struct socket **res)
{
int err;
struct socket *sock = NULL;
err = security_socket_create(family, type, protocol, 1);
if (err)
goto out;
sock = sock_alloc();
if (!sock) {
err = -ENOMEM;
goto out;
}
sock->type = type;
err = security_socket_post_create(sock, family, type, protocol, 1);
if (err)
goto out_release;
out:
*res = sock;
return err;
out_release:
sock_release(sock);