forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
/
socket.c
8228 lines (7065 loc) · 226 KB
/
socket.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* SCTP kernel implementation
* (C) Copyright IBM Corp. 2001, 2004
* Copyright (c) 1999-2000 Cisco, Inc.
* Copyright (c) 1999-2001 Motorola, Inc.
* Copyright (c) 2001-2003 Intel Corp.
* Copyright (c) 2001-2002 Nokia, Inc.
* Copyright (c) 2001 La Monte H.P. Yarroll
*
* This file is part of the SCTP kernel implementation
*
* These functions interface with the sockets layer to implement the
* SCTP Extensions for the Sockets API.
*
* Note that the descriptions from the specification are USER level
* functions--this file is the functions which populate the struct proto
* for SCTP which is the BOTTOM of the sockets interface.
*
* This SCTP implementation is free software;
* you can redistribute it and/or modify it under the terms of
* the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This SCTP implementation is distributed in the hope that it
* will be useful, but WITHOUT ANY WARRANTY; without even the implied
* ************************
* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU CC; see the file COPYING. If not, see
* <http://www.gnu.org/licenses/>.
*
* Please send any bug reports or fixes you make to the
* email address(es):
* lksctp developers <[email protected]>
*
* Written or modified by:
* La Monte H.P. Yarroll <[email protected]>
* Narasimha Budihal <[email protected]>
* Karl Knutson <[email protected]>
* Jon Grimm <[email protected]>
* Xingang Guo <[email protected]>
* Daisy Chang <[email protected]>
* Sridhar Samudrala <[email protected]>
* Inaky Perez-Gonzalez <[email protected]>
* Ardelle Fan <[email protected]>
* Ryan Layer <[email protected]>
* Anup Pemmaiah <[email protected]>
* Kevin Gao <[email protected]>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/hash.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/wait.h>
#include <linux/time.h>
#include <linux/sched/signal.h>
#include <linux/ip.h>
#include <linux/capability.h>
#include <linux/fcntl.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/compat.h>
#include <net/ip.h>
#include <net/icmp.h>
#include <net/route.h>
#include <net/ipv6.h>
#include <net/inet_common.h>
#include <net/busy_poll.h>
#include <linux/socket.h> /* for sa_family_t */
#include <linux/export.h>
#include <net/sock.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
/* Forward declarations for internal helper functions. */
static int sctp_writeable(struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p,
size_t msg_len);
static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
static void sctp_wait_for_close(struct sock *sk, long timeo);
static void sctp_destruct_sock(struct sock *sk);
static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
union sctp_addr *addr, int len);
static int sctp_bindx_add(struct sock *, struct sockaddr *, int);
static int sctp_bindx_rem(struct sock *, struct sockaddr *, int);
static int sctp_send_asconf_add_ip(struct sock *, struct sockaddr *, int);
static int sctp_send_asconf_del_ip(struct sock *, struct sockaddr *, int);
static int sctp_send_asconf(struct sctp_association *asoc,
struct sctp_chunk *chunk);
static int sctp_do_bind(struct sock *, union sctp_addr *, int);
static int sctp_autobind(struct sock *sk);
static void sctp_sock_migrate(struct sock *, struct sock *,
struct sctp_association *, sctp_socket_type_t);
static int sctp_memory_pressure;
static atomic_long_t sctp_memory_allocated;
struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
{
sctp_memory_pressure = 1;
}
/* Get the sndbuf space available at the time on the association. */
static inline int sctp_wspace(struct sctp_association *asoc)
{
int amt;
if (asoc->ep->sndbuf_policy)
amt = asoc->sndbuf_used;
else
amt = sk_wmem_alloc_get(asoc->base.sk);
if (amt >= asoc->base.sk->sk_sndbuf) {
if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK)
amt = 0;
else {
amt = sk_stream_wspace(asoc->base.sk);
if (amt < 0)
amt = 0;
}
} else {
amt = asoc->base.sk->sk_sndbuf - amt;
}
return amt;
}
/* Increment the used sndbuf space count of the corresponding association by
* the size of the outgoing data chunk.
* Also, set the skb destructor for sndbuf accounting later.
*
* Since it is always 1-1 between chunk and skb, and also a new skb is always
* allocated for chunk bundling in sctp_packet_transmit(), we can use the
* destructor in the data chunk skb for the purpose of the sndbuf space
* tracking.
*/
static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
{
struct sctp_association *asoc = chunk->asoc;
struct sock *sk = asoc->base.sk;
/* The sndbuf space is tracked per association. */
sctp_association_hold(asoc);
skb_set_owner_w(chunk->skb, sk);
chunk->skb->destructor = sctp_wfree;
/* Save the chunk pointer in skb for sctp_wfree to use later. */
skb_shinfo(chunk->skb)->destructor_arg = chunk;
asoc->sndbuf_used += SCTP_DATA_SNDSIZE(chunk) +
sizeof(struct sk_buff) +
sizeof(struct sctp_chunk);
atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
sk->sk_wmem_queued += chunk->skb->truesize;
sk_mem_charge(sk, chunk->skb->truesize);
}
/* Verify that this is a valid address. */
static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
int len)
{
struct sctp_af *af;
/* Verify basic sockaddr. */
af = sctp_sockaddr_af(sctp_sk(sk), addr, len);
if (!af)
return -EINVAL;
/* Is this a valid SCTP address? */
if (!af->addr_valid(addr, sctp_sk(sk), NULL))
return -EINVAL;
if (!sctp_sk(sk)->pf->send_verify(sctp_sk(sk), (addr)))
return -EINVAL;
return 0;
}
/* Look up the association by its id. If this is not a UDP-style
* socket, the ID field is always ignored.
*/
struct sctp_association *sctp_id2assoc(struct sock *sk, sctp_assoc_t id)
{
struct sctp_association *asoc = NULL;
/* If this is not a UDP-style socket, assoc id should be ignored. */
if (!sctp_style(sk, UDP)) {
/* Return NULL if the socket state is not ESTABLISHED. It
* could be a TCP-style listening socket or a socket which
* hasn't yet called connect() to establish an association.
*/
if (!sctp_sstate(sk, ESTABLISHED) && !sctp_sstate(sk, CLOSING))
return NULL;
/* Get the first and the only association from the list. */
if (!list_empty(&sctp_sk(sk)->ep->asocs))
asoc = list_entry(sctp_sk(sk)->ep->asocs.next,
struct sctp_association, asocs);
return asoc;
}
/* Otherwise this is a UDP-style socket. */
if (!id || (id == (sctp_assoc_t)-1))
return NULL;
spin_lock_bh(&sctp_assocs_id_lock);
asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id);
spin_unlock_bh(&sctp_assocs_id_lock);
if (!asoc || (asoc->base.sk != sk) || asoc->base.dead)
return NULL;
return asoc;
}
/* Look up the transport from an address and an assoc id. If both address and
* id are specified, the associations matching the address and the id should be
* the same.
*/
static struct sctp_transport *sctp_addr_id2transport(struct sock *sk,
struct sockaddr_storage *addr,
sctp_assoc_t id)
{
struct sctp_association *addr_asoc = NULL, *id_asoc = NULL;
struct sctp_af *af = sctp_get_af_specific(addr->ss_family);
union sctp_addr *laddr = (union sctp_addr *)addr;
struct sctp_transport *transport;
if (!af || sctp_verify_addr(sk, laddr, af->sockaddr_len))
return NULL;
addr_asoc = sctp_endpoint_lookup_assoc(sctp_sk(sk)->ep,
laddr,
&transport);
if (!addr_asoc)
return NULL;
id_asoc = sctp_id2assoc(sk, id);
if (id_asoc && (id_asoc != addr_asoc))
return NULL;
sctp_get_pf_specific(sk->sk_family)->addr_to_user(sctp_sk(sk),
(union sctp_addr *)addr);
return transport;
}
/* API 3.1.2 bind() - UDP Style Syntax
* The syntax of bind() is,
*
* ret = bind(int sd, struct sockaddr *addr, int addrlen);
*
* sd - the socket descriptor returned by socket().
* addr - the address structure (struct sockaddr_in or struct
* sockaddr_in6 [RFC 2553]),
* addr_len - the size of the address structure.
*/
static int sctp_bind(struct sock *sk, struct sockaddr *addr, int addr_len)
{
int retval = 0;
lock_sock(sk);
pr_debug("%s: sk:%p, addr:%p, addr_len:%d\n", __func__, sk,
addr, addr_len);
/* Disallow binding twice. */
if (!sctp_sk(sk)->ep->base.bind_addr.port)
retval = sctp_do_bind(sk, (union sctp_addr *)addr,
addr_len);
else
retval = -EINVAL;
release_sock(sk);
return retval;
}
static long sctp_get_port_local(struct sock *, union sctp_addr *);
/* Verify this is a valid sockaddr. */
static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
union sctp_addr *addr, int len)
{
struct sctp_af *af;
/* Check minimum size. */
if (len < sizeof (struct sockaddr))
return NULL;
/* V4 mapped address are really of AF_INET family */
if (addr->sa.sa_family == AF_INET6 &&
ipv6_addr_v4mapped(&addr->v6.sin6_addr)) {
if (!opt->pf->af_supported(AF_INET, opt))
return NULL;
} else {
/* Does this PF support this AF? */
if (!opt->pf->af_supported(addr->sa.sa_family, opt))
return NULL;
}
/* If we get this far, af is valid. */
af = sctp_get_af_specific(addr->sa.sa_family);
if (len < af->sockaddr_len)
return NULL;
return af;
}
/* Bind a local address either to an endpoint or to an association. */
static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
{
struct net *net = sock_net(sk);
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_endpoint *ep = sp->ep;
struct sctp_bind_addr *bp = &ep->base.bind_addr;
struct sctp_af *af;
unsigned short snum;
int ret = 0;
/* Common sockaddr verification. */
af = sctp_sockaddr_af(sp, addr, len);
if (!af) {
pr_debug("%s: sk:%p, newaddr:%p, len:%d EINVAL\n",
__func__, sk, addr, len);
return -EINVAL;
}
snum = ntohs(addr->v4.sin_port);
pr_debug("%s: sk:%p, new addr:%pISc, port:%d, new port:%d, len:%d\n",
__func__, sk, &addr->sa, bp->port, snum, len);
/* PF specific bind() address verification. */
if (!sp->pf->bind_verify(sp, addr))
return -EADDRNOTAVAIL;
/* We must either be unbound, or bind to the same port.
* It's OK to allow 0 ports if we are already bound.
* We'll just inhert an already bound port in this case
*/
if (bp->port) {
if (!snum)
snum = bp->port;
else if (snum != bp->port) {
pr_debug("%s: new port %d doesn't match existing port "
"%d\n", __func__, snum, bp->port);
return -EINVAL;
}
}
if (snum && snum < inet_prot_sock(net) &&
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
/* See if the address matches any of the addresses we may have
* already bound before checking against other endpoints.
*/
if (sctp_bind_addr_match(bp, addr, sp))
return -EINVAL;
/* Make sure we are allowed to bind here.
* The function sctp_get_port_local() does duplicate address
* detection.
*/
addr->v4.sin_port = htons(snum);
if ((ret = sctp_get_port_local(sk, addr))) {
return -EADDRINUSE;
}
/* Refresh ephemeral port. */
if (!bp->port)
bp->port = inet_sk(sk)->inet_num;
/* Add the address to the bind address list.
* Use GFP_ATOMIC since BHs will be disabled.
*/
ret = sctp_add_bind_addr(bp, addr, af->sockaddr_len,
SCTP_ADDR_SRC, GFP_ATOMIC);
/* Copy back into socket for getsockname() use. */
if (!ret) {
inet_sk(sk)->inet_sport = htons(inet_sk(sk)->inet_num);
sp->pf->to_sk_saddr(addr, sk);
}
return ret;
}
/* ADDIP Section 4.1.1 Congestion Control of ASCONF Chunks
*
* R1) One and only one ASCONF Chunk MAY be in transit and unacknowledged
* at any one time. If a sender, after sending an ASCONF chunk, decides
* it needs to transfer another ASCONF Chunk, it MUST wait until the
* ASCONF-ACK Chunk returns from the previous ASCONF Chunk before sending a
* subsequent ASCONF. Note this restriction binds each side, so at any
* time two ASCONF may be in-transit on any given association (one sent
* from each endpoint).
*/
static int sctp_send_asconf(struct sctp_association *asoc,
struct sctp_chunk *chunk)
{
struct net *net = sock_net(asoc->base.sk);
int retval = 0;
/* If there is an outstanding ASCONF chunk, queue it for later
* transmission.
*/
if (asoc->addip_last_asconf) {
list_add_tail(&chunk->list, &asoc->addip_chunk_list);
goto out;
}
/* Hold the chunk until an ASCONF_ACK is received. */
sctp_chunk_hold(chunk);
retval = sctp_primitive_ASCONF(net, asoc, chunk);
if (retval)
sctp_chunk_free(chunk);
else
asoc->addip_last_asconf = chunk;
out:
return retval;
}
/* Add a list of addresses as bind addresses to local endpoint or
* association.
*
* Basically run through each address specified in the addrs/addrcnt
* array/length pair, determine if it is IPv6 or IPv4 and call
* sctp_do_bind() on it.
*
* If any of them fails, then the operation will be reversed and the
* ones that were added will be removed.
*
* Only sctp_setsockopt_bindx() is supposed to call this function.
*/
static int sctp_bindx_add(struct sock *sk, struct sockaddr *addrs, int addrcnt)
{
int cnt;
int retval = 0;
void *addr_buf;
struct sockaddr *sa_addr;
struct sctp_af *af;
pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk,
addrs, addrcnt);
addr_buf = addrs;
for (cnt = 0; cnt < addrcnt; cnt++) {
/* The list may contain either IPv4 or IPv6 address;
* determine the address length for walking thru the list.
*/
sa_addr = addr_buf;
af = sctp_get_af_specific(sa_addr->sa_family);
if (!af) {
retval = -EINVAL;
goto err_bindx_add;
}
retval = sctp_do_bind(sk, (union sctp_addr *)sa_addr,
af->sockaddr_len);
addr_buf += af->sockaddr_len;
err_bindx_add:
if (retval < 0) {
/* Failed. Cleanup the ones that have been added */
if (cnt > 0)
sctp_bindx_rem(sk, addrs, cnt);
return retval;
}
}
return retval;
}
/* Send an ASCONF chunk with Add IP address parameters to all the peers of the
* associations that are part of the endpoint indicating that a list of local
* addresses are added to the endpoint.
*
* If any of the addresses is already in the bind address list of the
* association, we do not send the chunk for that association. But it will not
* affect other associations.
*
* Only sctp_setsockopt_bindx() is supposed to call this function.
*/
static int sctp_send_asconf_add_ip(struct sock *sk,
struct sockaddr *addrs,
int addrcnt)
{
struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
struct sctp_bind_addr *bp;
struct sctp_chunk *chunk;
struct sctp_sockaddr_entry *laddr;
union sctp_addr *addr;
union sctp_addr saveaddr;
void *addr_buf;
struct sctp_af *af;
struct list_head *p;
int i;
int retval = 0;
if (!net->sctp.addip_enable)
return retval;
sp = sctp_sk(sk);
ep = sp->ep;
pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
__func__, sk, addrs, addrcnt);
list_for_each_entry(asoc, &ep->asocs, asocs) {
if (!asoc->peer.asconf_capable)
continue;
if (asoc->peer.addip_disabled_mask & SCTP_PARAM_ADD_IP)
continue;
if (!sctp_state(asoc, ESTABLISHED))
continue;
/* Check if any address in the packed array of addresses is
* in the bind address list of the association. If so,
* do not send the asconf chunk to its peer, but continue with
* other associations.
*/
addr_buf = addrs;
for (i = 0; i < addrcnt; i++) {
addr = addr_buf;
af = sctp_get_af_specific(addr->v4.sin_family);
if (!af) {
retval = -EINVAL;
goto out;
}
if (sctp_assoc_lookup_laddr(asoc, addr))
break;
addr_buf += af->sockaddr_len;
}
if (i < addrcnt)
continue;
/* Use the first valid address in bind addr list of
* association as Address Parameter of ASCONF CHUNK.
*/
bp = &asoc->base.bind_addr;
p = bp->address_list.next;
laddr = list_entry(p, struct sctp_sockaddr_entry, list);
chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs,
addrcnt, SCTP_PARAM_ADD_IP);
if (!chunk) {
retval = -ENOMEM;
goto out;
}
/* Add the new addresses to the bind address list with
* use_as_src set to 0.
*/
addr_buf = addrs;
for (i = 0; i < addrcnt; i++) {
addr = addr_buf;
af = sctp_get_af_specific(addr->v4.sin_family);
memcpy(&saveaddr, addr, af->sockaddr_len);
retval = sctp_add_bind_addr(bp, &saveaddr,
sizeof(saveaddr),
SCTP_ADDR_NEW, GFP_ATOMIC);
addr_buf += af->sockaddr_len;
}
if (asoc->src_out_of_asoc_ok) {
struct sctp_transport *trans;
list_for_each_entry(trans,
&asoc->peer.transport_addr_list, transports) {
/* Clear the source and route cache */
sctp_transport_dst_release(trans);
trans->cwnd = min(4*asoc->pathmtu, max_t(__u32,
2*asoc->pathmtu, 4380));
trans->ssthresh = asoc->peer.i.a_rwnd;
trans->rto = asoc->rto_initial;
sctp_max_rto(asoc, trans);
trans->rtt = trans->srtt = trans->rttvar = 0;
sctp_transport_route(trans, NULL,
sctp_sk(asoc->base.sk));
}
}
retval = sctp_send_asconf(asoc, chunk);
}
out:
return retval;
}
/* Remove a list of addresses from bind addresses list. Do not remove the
* last address.
*
* Basically run through each address specified in the addrs/addrcnt
* array/length pair, determine if it is IPv6 or IPv4 and call
* sctp_del_bind() on it.
*
* If any of them fails, then the operation will be reversed and the
* ones that were removed will be added back.
*
* At least one address has to be left; if only one address is
* available, the operation will return -EBUSY.
*
* Only sctp_setsockopt_bindx() is supposed to call this function.
*/
static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
{
struct sctp_sock *sp = sctp_sk(sk);
struct sctp_endpoint *ep = sp->ep;
int cnt;
struct sctp_bind_addr *bp = &ep->base.bind_addr;
int retval = 0;
void *addr_buf;
union sctp_addr *sa_addr;
struct sctp_af *af;
pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
__func__, sk, addrs, addrcnt);
addr_buf = addrs;
for (cnt = 0; cnt < addrcnt; cnt++) {
/* If the bind address list is empty or if there is only one
* bind address, there is nothing more to be removed (we need
* at least one address here).
*/
if (list_empty(&bp->address_list) ||
(sctp_list_single_entry(&bp->address_list))) {
retval = -EBUSY;
goto err_bindx_rem;
}
sa_addr = addr_buf;
af = sctp_get_af_specific(sa_addr->sa.sa_family);
if (!af) {
retval = -EINVAL;
goto err_bindx_rem;
}
if (!af->addr_valid(sa_addr, sp, NULL)) {
retval = -EADDRNOTAVAIL;
goto err_bindx_rem;
}
if (sa_addr->v4.sin_port &&
sa_addr->v4.sin_port != htons(bp->port)) {
retval = -EINVAL;
goto err_bindx_rem;
}
if (!sa_addr->v4.sin_port)
sa_addr->v4.sin_port = htons(bp->port);
/* FIXME - There is probably a need to check if sk->sk_saddr and
* sk->sk_rcv_addr are currently set to one of the addresses to
* be removed. This is something which needs to be looked into
* when we are fixing the outstanding issues with multi-homing
* socket routing and failover schemes. Refer to comments in
* sctp_do_bind(). -daisy
*/
retval = sctp_del_bind_addr(bp, sa_addr);
addr_buf += af->sockaddr_len;
err_bindx_rem:
if (retval < 0) {
/* Failed. Add the ones that has been removed back */
if (cnt > 0)
sctp_bindx_add(sk, addrs, cnt);
return retval;
}
}
return retval;
}
/* Send an ASCONF chunk with Delete IP address parameters to all the peers of
* the associations that are part of the endpoint indicating that a list of
* local addresses are removed from the endpoint.
*
* If any of the addresses is already in the bind address list of the
* association, we do not send the chunk for that association. But it will not
* affect other associations.
*
* Only sctp_setsockopt_bindx() is supposed to call this function.
*/
static int sctp_send_asconf_del_ip(struct sock *sk,
struct sockaddr *addrs,
int addrcnt)
{
struct net *net = sock_net(sk);
struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
struct sctp_transport *transport;
struct sctp_bind_addr *bp;
struct sctp_chunk *chunk;
union sctp_addr *laddr;
void *addr_buf;
struct sctp_af *af;
struct sctp_sockaddr_entry *saddr;
int i;
int retval = 0;
int stored = 0;
chunk = NULL;
if (!net->sctp.addip_enable)
return retval;
sp = sctp_sk(sk);
ep = sp->ep;
pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n",
__func__, sk, addrs, addrcnt);
list_for_each_entry(asoc, &ep->asocs, asocs) {
if (!asoc->peer.asconf_capable)
continue;
if (asoc->peer.addip_disabled_mask & SCTP_PARAM_DEL_IP)
continue;
if (!sctp_state(asoc, ESTABLISHED))
continue;
/* Check if any address in the packed array of addresses is
* not present in the bind address list of the association.
* If so, do not send the asconf chunk to its peer, but
* continue with other associations.
*/
addr_buf = addrs;
for (i = 0; i < addrcnt; i++) {
laddr = addr_buf;
af = sctp_get_af_specific(laddr->v4.sin_family);
if (!af) {
retval = -EINVAL;
goto out;
}
if (!sctp_assoc_lookup_laddr(asoc, laddr))
break;
addr_buf += af->sockaddr_len;
}
if (i < addrcnt)
continue;
/* Find one address in the association's bind address list
* that is not in the packed array of addresses. This is to
* make sure that we do not delete all the addresses in the
* association.
*/
bp = &asoc->base.bind_addr;
laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs,
addrcnt, sp);
if ((laddr == NULL) && (addrcnt == 1)) {
if (asoc->asconf_addr_del_pending)
continue;
asoc->asconf_addr_del_pending =
kzalloc(sizeof(union sctp_addr), GFP_ATOMIC);
if (asoc->asconf_addr_del_pending == NULL) {
retval = -ENOMEM;
goto out;
}
asoc->asconf_addr_del_pending->sa.sa_family =
addrs->sa_family;
asoc->asconf_addr_del_pending->v4.sin_port =
htons(bp->port);
if (addrs->sa_family == AF_INET) {
struct sockaddr_in *sin;
sin = (struct sockaddr_in *)addrs;
asoc->asconf_addr_del_pending->v4.sin_addr.s_addr = sin->sin_addr.s_addr;
} else if (addrs->sa_family == AF_INET6) {
struct sockaddr_in6 *sin6;
sin6 = (struct sockaddr_in6 *)addrs;
asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr;
}
pr_debug("%s: keep the last address asoc:%p %pISc at %p\n",
__func__, asoc, &asoc->asconf_addr_del_pending->sa,
asoc->asconf_addr_del_pending);
asoc->src_out_of_asoc_ok = 1;
stored = 1;
goto skip_mkasconf;
}
if (laddr == NULL)
return -EINVAL;
/* We do not need RCU protection throughout this loop
* because this is done under a socket lock from the
* setsockopt call.
*/
chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt,
SCTP_PARAM_DEL_IP);
if (!chunk) {
retval = -ENOMEM;
goto out;
}
skip_mkasconf:
/* Reset use_as_src flag for the addresses in the bind address
* list that are to be deleted.
*/
addr_buf = addrs;
for (i = 0; i < addrcnt; i++) {
laddr = addr_buf;
af = sctp_get_af_specific(laddr->v4.sin_family);
list_for_each_entry(saddr, &bp->address_list, list) {
if (sctp_cmp_addr_exact(&saddr->a, laddr))
saddr->state = SCTP_ADDR_DEL;
}
addr_buf += af->sockaddr_len;
}
/* Update the route and saddr entries for all the transports
* as some of the addresses in the bind address list are
* about to be deleted and cannot be used as source addresses.
*/
list_for_each_entry(transport, &asoc->peer.transport_addr_list,
transports) {
sctp_transport_dst_release(transport);
sctp_transport_route(transport, NULL,
sctp_sk(asoc->base.sk));
}
if (stored)
/* We don't need to transmit ASCONF */
continue;
retval = sctp_send_asconf(asoc, chunk);
}
out:
return retval;
}
/* set addr events to assocs in the endpoint. ep and addr_wq must be locked */
int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw)
{
struct sock *sk = sctp_opt2sk(sp);
union sctp_addr *addr;
struct sctp_af *af;
/* It is safe to write port space in caller. */
addr = &addrw->a;
addr->v4.sin_port = htons(sp->ep->base.bind_addr.port);
af = sctp_get_af_specific(addr->sa.sa_family);
if (!af)
return -EINVAL;
if (sctp_verify_addr(sk, addr, af->sockaddr_len))
return -EINVAL;
if (addrw->state == SCTP_ADDR_NEW)
return sctp_send_asconf_add_ip(sk, (struct sockaddr *)addr, 1);
else
return sctp_send_asconf_del_ip(sk, (struct sockaddr *)addr, 1);
}
/* Helper for tunneling sctp_bindx() requests through sctp_setsockopt()
*
* API 8.1
* int sctp_bindx(int sd, struct sockaddr *addrs, int addrcnt,
* int flags);
*
* If sd is an IPv4 socket, the addresses passed must be IPv4 addresses.
* If the sd is an IPv6 socket, the addresses passed can either be IPv4
* or IPv6 addresses.
*
* A single address may be specified as INADDR_ANY or IN6ADDR_ANY, see
* Section 3.1.2 for this usage.
*
* addrs is a pointer to an array of one or more socket addresses. Each
* address is contained in its appropriate structure (i.e. struct
* sockaddr_in or struct sockaddr_in6) the family of the address type
* must be used to distinguish the address length (note that this
* representation is termed a "packed array" of addresses). The caller
* specifies the number of addresses in the array with addrcnt.
*
* On success, sctp_bindx() returns 0. On failure, sctp_bindx() returns
* -1, and sets errno to the appropriate error code.
*
* For SCTP, the port given in each socket address must be the same, or
* sctp_bindx() will fail, setting errno to EINVAL.
*
* The flags parameter is formed from the bitwise OR of zero or more of
* the following currently defined flags:
*
* SCTP_BINDX_ADD_ADDR
*
* SCTP_BINDX_REM_ADDR
*
* SCTP_BINDX_ADD_ADDR directs SCTP to add the given addresses to the
* association, and SCTP_BINDX_REM_ADDR directs SCTP to remove the given
* addresses from the association. The two flags are mutually exclusive;
* if both are given, sctp_bindx() will fail with EINVAL. A caller may
* not remove all addresses from an association; sctp_bindx() will
* reject such an attempt with EINVAL.
*
* An application can use sctp_bindx(SCTP_BINDX_ADD_ADDR) to associate
* additional addresses with an endpoint after calling bind(). Or use
* sctp_bindx(SCTP_BINDX_REM_ADDR) to remove some addresses a listening
* socket is associated with so that no new association accepted will be
* associated with those addresses. If the endpoint supports dynamic
* address a SCTP_BINDX_REM_ADDR or SCTP_BINDX_ADD_ADDR may cause a
* endpoint to send the appropriate message to the peer to change the
* peers address lists.
*
* Adding and removing addresses from a connected association is
* optional functionality. Implementations that do not support this
* functionality should return EOPNOTSUPP.
*
* Basically do nothing but copying the addresses from user to kernel
* land and invoking either sctp_bindx_add() or sctp_bindx_rem() on the sk.
* This is used for tunneling the sctp_bindx() request through sctp_setsockopt()
* from userspace.
*
* We don't use copy_from_user() for optimization: we first do the
* sanity checks (buffer size -fast- and access check-healthy
* pointer); if all of those succeed, then we can alloc the memory
* (expensive operation) needed to copy the data to kernel. Then we do
* the copying without checking the user space area
* (__copy_from_user()).
*
* On exit there is no need to do sockfd_put(), sys_setsockopt() does
* it.
*
* sk The sk of the socket
* addrs The pointer to the addresses in user land
* addrssize Size of the addrs buffer
* op Operation to perform (add or remove, see the flags of
* sctp_bindx)
*
* Returns 0 if ok, <0 errno code on error.
*/
static int sctp_setsockopt_bindx(struct sock *sk,
struct sockaddr __user *addrs,
int addrs_size, int op)
{
struct sockaddr *kaddrs;
int err;
int addrcnt = 0;
int walk_size = 0;
struct sockaddr *sa_addr;
void *addr_buf;
struct sctp_af *af;
pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n",
__func__, sk, addrs, addrs_size, op);
if (unlikely(addrs_size <= 0))
return -EINVAL;
/* Check the user passed a healthy pointer. */
if (unlikely(!access_ok(VERIFY_READ, addrs, addrs_size)))
return -EFAULT;
/* Alloc space for the address array in kernel memory. */
kaddrs = kmalloc(addrs_size, GFP_USER | __GFP_NOWARN);
if (unlikely(!kaddrs))
return -ENOMEM;
if (__copy_from_user(kaddrs, addrs, addrs_size)) {
kfree(kaddrs);
return -EFAULT;
}
/* Walk through the addrs buffer and count the number of addresses. */
addr_buf = kaddrs;
while (walk_size < addrs_size) {
if (walk_size + sizeof(sa_family_t) > addrs_size) {
kfree(kaddrs);
return -EINVAL;
}
sa_addr = addr_buf;
af = sctp_get_af_specific(sa_addr->sa_family);