forked from openvswitch/ovs
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathofproto-dpif-xlate.c
8369 lines (7381 loc) · 293 KB
/
ofproto-dpif-xlate.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Copyright (c) 2009-2017, 2019-2020 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. */
#include <config.h>
#include "ofproto/ofproto-dpif-xlate.h"
#include <errno.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <sys/socket.h>
#include "bfd.h"
#include "bitmap.h"
#include "bond.h"
#include "bundle.h"
#include "byte-order.h"
#include "cfm.h"
#include "connmgr.h"
#include "coverage.h"
#include "csum.h"
#include "dp-packet.h"
#include "dpif.h"
#include "in-band.h"
#include "lacp.h"
#include "learn.h"
#include "mac-learning.h"
#include "mcast-snooping.h"
#include "multipath.h"
#include "netdev-vport.h"
#include "netlink.h"
#include "nx-match.h"
#include "odp-execute.h"
#include "ofproto/ofproto-dpif-ipfix.h"
#include "ofproto/ofproto-dpif-mirror.h"
#include "ofproto/ofproto-dpif-monitor.h"
#include "ofproto/ofproto-dpif-sflow.h"
#include "ofproto/ofproto-dpif-trace.h"
#include "ofproto/ofproto-dpif-xlate-cache.h"
#include "ofproto/ofproto-dpif.h"
#include "ofproto/ofproto-provider.h"
#include "openvswitch/dynamic-string.h"
#include "openvswitch/meta-flow.h"
#include "openvswitch/list.h"
#include "openvswitch/ofp-actions.h"
#include "openvswitch/ofp-ed-props.h"
#include "openvswitch/vlog.h"
#include "ovs-lldp.h"
#include "ovs-router.h"
#include "packets.h"
#include "tnl-neigh-cache.h"
#include "tnl-ports.h"
#include "tunnel.h"
#include "util.h"
#include "uuid.h"
COVERAGE_DEFINE(xlate_actions);
COVERAGE_DEFINE(xlate_actions_oversize);
COVERAGE_DEFINE(xlate_actions_too_many_output);
VLOG_DEFINE_THIS_MODULE(ofproto_dpif_xlate);
/* Maximum depth of flow table recursion (due to resubmit actions) in a
* flow translation.
*
* The goal of limiting the depth of resubmits is to ensure that flow
* translation eventually terminates. Only resubmits to the same table or an
* earlier table count against the maximum depth. This is because resubmits to
* strictly monotonically increasing table IDs will eventually terminate, since
* any OpenFlow switch has a finite number of tables. OpenFlow tables are most
* commonly traversed in numerically increasing order, so this limit has little
* effect on conventionally designed OpenFlow pipelines.
*
* Outputs to patch ports and to groups also count against the depth limit. */
#define MAX_DEPTH 64
/* Maximum number of resubmit actions in a flow translation, whether they are
* recursive or not. */
#define MAX_RESUBMITS (MAX_DEPTH * MAX_DEPTH)
/* The structure holds an array of IP addresses assigned to a bridge and the
* number of elements in the array. These data are mutable and are evaluated
* when ARP or Neighbor Advertisement packets received on a native tunnel
* port are xlated. So 'ref_cnt' and RCU are used for synchronization. */
struct xbridge_addr {
struct in6_addr *addr; /* Array of IP addresses of xbridge. */
int n_addr; /* Number of IP addresses. */
struct ovs_refcount ref_cnt;
};
struct xbridge {
struct hmap_node hmap_node; /* Node in global 'xbridges' map. */
struct ofproto_dpif *ofproto; /* Key in global 'xbridges' map. */
struct ovs_list xbundles; /* Owned xbundles. */
struct hmap xports; /* Indexed by ofp_port. */
char *name; /* Name used in log messages. */
struct dpif *dpif; /* Datapath interface. */
struct mac_learning *ml; /* Mac learning handle. */
struct mcast_snooping *ms; /* Multicast Snooping handle. */
struct mbridge *mbridge; /* Mirroring. */
struct dpif_sflow *sflow; /* SFlow handle, or null. */
struct dpif_ipfix *ipfix; /* Ipfix handle, or null. */
struct netflow *netflow; /* Netflow handle, or null. */
struct stp *stp; /* STP or null if disabled. */
struct rstp *rstp; /* RSTP or null if disabled. */
bool has_in_band; /* Bridge has in band control? */
bool forward_bpdu; /* Bridge forwards STP BPDUs? */
/* Datapath feature support. */
struct dpif_backer_support support;
struct xbridge_addr *addr;
};
struct xbundle {
struct hmap_node hmap_node; /* In global 'xbundles' map. */
struct ofbundle *ofbundle; /* Key in global 'xbundles' map. */
struct ovs_list list_node; /* In parent 'xbridges' list. */
struct xbridge *xbridge; /* Parent xbridge. */
struct ovs_list xports; /* Contains "struct xport"s. */
char *name; /* Name used in log messages. */
struct bond *bond; /* Nonnull iff more than one port. */
struct lacp *lacp; /* LACP handle or null. */
enum port_vlan_mode vlan_mode; /* VLAN mode. */
uint16_t qinq_ethtype; /* Ethertype of dot1q-tunnel interface
* either 0x8100 or 0x88a8. */
int vlan; /* -1=trunk port, else a 12-bit VLAN ID. */
unsigned long *trunks; /* Bitmap of trunked VLANs, if 'vlan' == -1.
* NULL if all VLANs are trunked. */
unsigned long *cvlans; /* Bitmap of allowed customer vlans,
* NULL if all VLANs are allowed */
enum port_priority_tags_mode use_priority_tags;
/* Use 802.1p tag for frames in VLAN 0? */
bool floodable; /* No port has OFPUTIL_PC_NO_FLOOD set? */
bool protected; /* Protected port mode */
};
struct xport {
struct hmap_node hmap_node; /* Node in global 'xports' map. */
struct ofport_dpif *ofport; /* Key in global 'xports map. */
struct hmap_node ofp_node; /* Node in parent xbridge 'xports' map. */
ofp_port_t ofp_port; /* Key in parent xbridge 'xports' map. */
struct hmap_node uuid_node; /* Node in global 'xports_uuid' map. */
struct uuid uuid; /* Key in global 'xports_uuid' map. */
odp_port_t odp_port; /* Datapath port number or ODPP_NONE. */
struct ovs_list bundle_node; /* In parent xbundle (if it exists). */
struct xbundle *xbundle; /* Parent xbundle or null. */
struct netdev *netdev; /* 'ofport''s netdev. */
struct xbridge *xbridge; /* Parent bridge. */
struct xport *peer; /* Patch port peer or null. */
enum ofputil_port_config config; /* OpenFlow port configuration. */
enum ofputil_port_state state; /* OpenFlow port state. */
int stp_port_no; /* STP port number or -1 if not in use. */
struct rstp_port *rstp_port; /* RSTP port or null. */
struct hmap skb_priorities; /* Map of 'skb_priority_to_dscp's. */
bool may_enable; /* May be enabled in bonds. */
bool is_tunnel; /* Is a tunnel port. */
enum netdev_pt_mode pt_mode; /* packet_type handling. */
struct cfm *cfm; /* CFM handle or null. */
struct bfd *bfd; /* BFD handle or null. */
struct lldp *lldp; /* LLDP handle or null. */
};
struct xlate_ctx {
struct xlate_in *xin;
struct xlate_out *xout;
struct xlate_cfg *xcfg;
const struct xbridge *xbridge;
/* Flow at the last commit. */
struct flow base_flow;
/* Tunnel IP destination address as received. This is stored separately
* as the base_flow.tunnel is cleared on init to reflect the datapath
* behavior. Used to make sure not to send tunneled output to ourselves,
* which might lead to an infinite loop. This could happen easily
* if a tunnel is marked as 'ip_remote=flow', and the flow does not
* actually set the tun_dst field. */
struct in6_addr orig_tunnel_ipv6_dst;
/* Stack for the push and pop actions. See comment above nx_stack_push()
* in nx-match.c for info on how the stack is stored. */
struct ofpbuf stack;
/* The rule that we are currently translating, or NULL. */
struct rule_dpif *rule;
/* Flow translation populates this with wildcards relevant in translation.
* When 'xin->wc' is nonnull, this is the same pointer. When 'xin->wc' is
* null, this is a pointer to a temporary buffer. */
struct flow_wildcards *wc;
/* Output buffer for datapath actions. When 'xin->odp_actions' is nonnull,
* this is the same pointer. When 'xin->odp_actions' is null, this points
* to a scratch ofpbuf. This allows code to add actions to
* 'ctx->odp_actions' without worrying about whether the caller really
* wants actions. */
struct ofpbuf *odp_actions;
/* Statistics maintained by xlate_table_action().
*
* These statistics limit the amount of work that a single flow
* translation can perform. The goal of the first of these, 'depth', is
* primarily to prevent translation from performing an infinite amount of
* work. It counts the current depth of nested "resubmit"s (and a few
* other activities); when a resubmit returns, it decreases. Resubmits to
* tables in strictly monotonically increasing order don't contribute to
* 'depth' because they cannot cause a flow translation to take an infinite
* amount of time (because the number of tables is finite). Translation
* aborts when 'depth' exceeds MAX_DEPTH.
*
* 'resubmits', on the other hand, prevents flow translation from
* performing an extraordinarily large while still finite amount of work.
* It counts the total number of resubmits (and a few other activities)
* that have been executed. Returning from a resubmit does not affect this
* counter. Thus, this limits the amount of work that a particular
* translation can perform. Translation aborts when 'resubmits' exceeds
* MAX_RESUBMITS (which is much larger than MAX_DEPTH).
*/
int depth; /* Current resubmit nesting depth. */
int resubmits; /* Total number of resubmits. */
bool in_action_set; /* Currently translating action_set, if true. */
bool in_packet_out; /* Currently translating a packet_out msg, if
* true. */
bool pending_encap; /* True when waiting to commit a pending
* encap action. */
bool pending_decap; /* True when waiting to commit a pending
* decap action. */
struct ofpbuf *encap_data; /* May contain a pointer to an ofpbuf with
* context for the datapath encap action.*/
uint8_t table_id; /* OpenFlow table ID where flow was found. */
ovs_be64 rule_cookie; /* Cookie of the rule being translated. */
uint32_t orig_skb_priority; /* Priority when packet arrived. */
uint32_t sflow_n_outputs; /* Number of output ports. */
odp_port_t sflow_odp_port; /* Output port for composing sFlow action. */
ofp_port_t nf_output_iface; /* Output interface index for NetFlow. */
bool exit; /* No further actions should be processed. */
mirror_mask_t mirrors; /* Bitmap of associated mirrors. */
int mirror_snaplen; /* Max size of a mirror packet in byte. */
/* Freezing Translation
* ====================
*
* At some point during translation, the code may recognize the need to halt
* and checkpoint the translation in a way that it can be restarted again
* later. We call the checkpointing process "freezing" and the restarting
* process "thawing".
*
* The use cases for freezing are:
*
* - "Recirculation", where the translation process discovers that it
* doesn't have enough information to complete translation without
* actually executing the actions that have already been translated,
* which provides the additionally needed information. In these
* situations, translation freezes translation and assigns the frozen
* data a unique "recirculation ID", which it associates with the data
* in a table in userspace (see ofproto-dpif-rid.h). It also adds a
* OVS_ACTION_ATTR_RECIRC action specifying that ID to the datapath
* actions. When a packet hits that action, the datapath looks its
* flow up again using the ID. If there's a miss, it comes back to
* userspace, which find the recirculation table entry for the ID,
* thaws the associated frozen data, and continues translation from
* that point given the additional information that is now known.
*
* The archetypal example is MPLS. As MPLS is implemented in
* OpenFlow, the protocol that follows the last MPLS label becomes
* known only when that label is popped by an OpenFlow action. That
* means that Open vSwitch can't extract the headers beyond the MPLS
* labels until the pop action is executed. Thus, at that point
* translation uses the recirculation process to extract the headers
* beyond the MPLS labels.
*
* (OVS also uses OVS_ACTION_ATTR_RECIRC to implement hashing for
* output to bonds. OVS pre-populates all the datapath flows for bond
* output in the datapath, though, which means that the elaborate
* process of coming back to userspace for a second round of
* translation isn't needed, and so bonds don't follow the above
* process.)
*
* - "Continuation". A continuation is a way for an OpenFlow controller
* to interpose on a packet's traversal of the OpenFlow tables. When
* the translation process encounters a "controller" action with the
* "pause" flag, it freezes translation, serializes the frozen data,
* and sends it to an OpenFlow controller. The controller then
* examines and possibly modifies the frozen data and eventually sends
* it back to the switch, which thaws it and continues translation.
*
* The main problem of freezing translation is preserving state, so that
* when the translation is thawed later it resumes from where it left off,
* without disruption. In particular, actions must be preserved as follows:
*
* - If we're freezing because an action needed more information, the
* action that prompted it.
*
* - Any actions remaining to be translated within the current flow.
*
* - If translation was frozen within a NXAST_RESUBMIT, then any actions
* following the resubmit action. Resubmit actions can be nested, so
* this has to go all the way up the control stack.
*
* - The OpenFlow 1.1+ action set.
*
* State that actions and flow table lookups can depend on, such as the
* following, must also be preserved:
*
* - Metadata fields (input port, registers, OF1.1+ metadata, ...).
*
* - The stack used by NXAST_STACK_PUSH and NXAST_STACK_POP actions.
*
* - The table ID and cookie of the flow being translated at each level
* of the control stack, because these can become visible through
* OFPAT_CONTROLLER actions (and other ways).
*
* Translation allows for the control of this state preservation via these
* members. When a need to freeze translation is identified, the
* translation process:
*
* 1. Sets 'freezing' to true.
*
* 2. Sets 'exit' to true to tell later steps that we're exiting from the
* translation process.
*
* 3. Adds an OFPACT_UNROLL_XLATE action to 'frozen_actions', and points
* frozen_actions.header to the action to make it easy to find it later.
* This action holds the current table ID and cookie so that they can be
* restored during a post-recirculation upcall translation.
*
* 4. Adds the action that prompted recirculation and any actions following
* it within the same flow to 'frozen_actions', so that they can be
* executed during a post-recirculation upcall translation.
*
* 5. Returns.
*
* 6. The action that prompted recirculation might be nested in a stack of
* nested "resubmit"s that have actions remaining. Each of these notices
* that we're exiting and freezing and responds by adding more
* OFPACT_UNROLL_XLATE actions to 'frozen_actions', as necessary,
* followed by any actions that were yet unprocessed.
*
* If we're freezing because of recirculation, the caller generates a
* recirculation ID and associates all the state produced by this process
* with it. For post-recirculation upcall translation, the caller passes it
* back in for the new translation to execute. The process yielded a set of
* ofpacts that can be translated directly, so it is not much of a special
* case at that point.
*/
bool freezing;
bool recirc_update_dp_hash; /* Generated recirculation will be preceded
* by datapath HASH action to get an updated
* dp_hash after recirculation. */
uint32_t dp_hash_alg;
uint32_t dp_hash_basis;
struct ofpbuf frozen_actions;
const struct ofpact_controller *pause;
/* True if a packet was but is no longer MPLS (due to an MPLS pop action).
* This is a trigger for recirculation in cases where translating an action
* or looking up a flow requires access to the fields of the packet after
* the MPLS label stack that was originally present. */
bool was_mpls;
/* True if conntrack has been performed on this packet during processing
* on the current bridge. This is used to determine whether conntrack
* state from the datapath should be honored after thawing. */
bool conntracked;
/* Pointer to an embedded NAT action in a conntrack action, or NULL. */
struct ofpact_nat *ct_nat_action;
/* OpenFlow 1.1+ action set.
*
* 'action_set' accumulates "struct ofpact"s added by OFPACT_WRITE_ACTIONS.
* When translation is otherwise complete, ofpacts_execute_action_set()
* converts it to a set of "struct ofpact"s that can be translated into
* datapath actions. */
bool action_set_has_group; /* Action set contains OFPACT_GROUP? */
struct ofpbuf action_set; /* Action set. */
enum xlate_error error; /* Translation failed. */
};
/* Structure to track VLAN manipulation */
struct xvlan_single {
uint16_t tpid;
uint16_t vid;
uint16_t pcp;
};
struct xvlan {
struct xvlan_single v[FLOW_MAX_VLAN_HEADERS];
};
const char *xlate_strerror(enum xlate_error error)
{
switch (error) {
case XLATE_OK:
return "OK";
case XLATE_BRIDGE_NOT_FOUND:
return "Bridge not found";
case XLATE_RECURSION_TOO_DEEP:
return "Recursion too deep";
case XLATE_TOO_MANY_RESUBMITS:
return "Too many resubmits";
case XLATE_STACK_TOO_DEEP:
return "Stack too deep";
case XLATE_NO_RECIRCULATION_CONTEXT:
return "No recirculation context";
case XLATE_RECIRCULATION_CONFLICT:
return "Recirculation conflict";
case XLATE_TOO_MANY_MPLS_LABELS:
return "Too many MPLS labels";
case XLATE_INVALID_TUNNEL_METADATA:
return "Invalid tunnel metadata";
case XLATE_UNSUPPORTED_PACKET_TYPE:
return "Unsupported packet type";
case XLATE_CONGESTION_DROP:
return "Congestion Drop";
case XLATE_FORWARDING_DISABLED:
return "Forwarding is disabled";
case XLATE_MAX:
break;
}
return "Unknown error";
}
static void xlate_action_set(struct xlate_ctx *ctx);
static void xlate_commit_actions(struct xlate_ctx *ctx);
static void
patch_port_output(struct xlate_ctx *ctx, const struct xport *in_dev,
struct xport *out_dev, bool is_last_action);
static void
ctx_trigger_freeze(struct xlate_ctx *ctx)
{
ctx->exit = true;
ctx->freezing = true;
}
static void
ctx_trigger_recirculate_with_hash(struct xlate_ctx *ctx, uint32_t type,
uint32_t basis)
{
ctx->exit = true;
ctx->freezing = true;
ctx->recirc_update_dp_hash = true;
ctx->dp_hash_alg = type;
ctx->dp_hash_basis = basis;
}
static bool
ctx_first_frozen_action(const struct xlate_ctx *ctx)
{
return !ctx->frozen_actions.size;
}
static void
ctx_cancel_freeze(struct xlate_ctx *ctx)
{
if (ctx->freezing) {
ctx->freezing = false;
ctx->recirc_update_dp_hash = false;
ofpbuf_clear(&ctx->frozen_actions);
ctx->frozen_actions.header = NULL;
ctx->pause = NULL;
}
}
static void finish_freezing(struct xlate_ctx *ctx);
/* A controller may use OFPP_NONE as the ingress port to indicate that
* it did not arrive on a "real" port. 'ofpp_none_bundle' exists for
* when an input bundle is needed for validation (e.g., mirroring or
* OFPP_NORMAL processing). It is not connected to an 'ofproto' or have
* any 'port' structs, so care must be taken when dealing with it. */
static struct xbundle ofpp_none_bundle = {
.name = "OFPP_NONE",
.vlan_mode = PORT_VLAN_TRUNK
};
/* Node in 'xport''s 'skb_priorities' map. Used to maintain a map from
* 'priority' (the datapath's term for QoS queue) to the dscp bits which all
* traffic egressing the 'ofport' with that priority should be marked with. */
struct skb_priority_to_dscp {
struct hmap_node hmap_node; /* Node in 'ofport_dpif''s 'skb_priorities'. */
uint32_t skb_priority; /* Priority of this queue (see struct flow). */
uint8_t dscp; /* DSCP bits to mark outgoing traffic with. */
};
/* Xlate config contains hash maps of all bridges, bundles and ports.
* Xcfgp contains the pointer to the current xlate configuration.
* When the main thread needs to change the configuration, it copies xcfgp to
* new_xcfg and edits new_xcfg. This enables the use of RCU locking which
* does not block handler and revalidator threads. */
struct xlate_cfg {
struct hmap xbridges;
struct hmap xbundles;
struct hmap xports;
struct hmap xports_uuid;
};
static OVSRCU_TYPE(struct xlate_cfg *) xcfgp = OVSRCU_INITIALIZER(NULL);
static struct xlate_cfg *new_xcfg = NULL;
typedef void xlate_actions_handler(const struct ofpact *, size_t ofpacts_len,
struct xlate_ctx *, bool, bool);
static bool may_receive(const struct xport *, struct xlate_ctx *);
static void do_xlate_actions(const struct ofpact *, size_t ofpacts_len,
struct xlate_ctx *, bool, bool);
static void clone_xlate_actions(const struct ofpact *, size_t ofpacts_len,
struct xlate_ctx *, bool, bool);
static void xlate_normal(struct xlate_ctx *);
static void xlate_normal_flood(struct xlate_ctx *ct,
struct xbundle *in_xbundle, struct xvlan *);
static void xlate_table_action(struct xlate_ctx *, ofp_port_t in_port,
uint8_t table_id, bool may_packet_in,
bool honor_table_miss, bool with_ct_orig,
bool is_last_action, xlate_actions_handler *);
static bool input_vid_is_valid(const struct xlate_ctx *,
uint16_t vid, struct xbundle *);
static void xvlan_copy(struct xvlan *dst, const struct xvlan *src);
static void xvlan_pop(struct xvlan *src);
static void xvlan_push_uninit(struct xvlan *src);
static void xvlan_extract(const struct flow *, struct xvlan *);
static void xvlan_put(struct flow *, const struct xvlan *,
enum port_priority_tags_mode);
static void xvlan_input_translate(const struct xbundle *,
const struct xvlan *in,
struct xvlan *xvlan);
static void xvlan_output_translate(const struct xbundle *,
const struct xvlan *xvlan,
struct xvlan *out);
static void output_normal(struct xlate_ctx *, const struct xbundle *,
const struct xvlan *);
/* Optional bond recirculation parameter to compose_output_action(). */
struct xlate_bond_recirc {
uint32_t recirc_id; /* !0 Use recirculation instead of output. */
uint8_t hash_alg; /* !0 Compute hash for recirc before. */
uint32_t hash_basis; /* Compute hash for recirc before. */
};
static void compose_output_action(struct xlate_ctx *, ofp_port_t ofp_port,
const struct xlate_bond_recirc *xr,
bool is_last_action, bool truncate);
static struct xbridge *xbridge_lookup(struct xlate_cfg *,
const struct ofproto_dpif *);
static struct xbridge *xbridge_lookup_by_uuid(struct xlate_cfg *,
const struct uuid *);
static struct xbundle *xbundle_lookup(struct xlate_cfg *,
const struct ofbundle *);
static struct xport *xport_lookup(struct xlate_cfg *,
const struct ofport_dpif *);
static struct xport *xport_lookup_by_uuid(struct xlate_cfg *,
const struct uuid *);
static struct xport *get_ofp_port(const struct xbridge *, ofp_port_t ofp_port);
static struct skb_priority_to_dscp *get_skb_priority(const struct xport *,
uint32_t skb_priority);
static void clear_skb_priorities(struct xport *);
static size_t count_skb_priorities(const struct xport *);
static bool dscp_from_skb_priority(const struct xport *, uint32_t skb_priority,
uint8_t *dscp);
static void xlate_xbridge_init(struct xlate_cfg *, struct xbridge *);
static void xlate_xbundle_init(struct xlate_cfg *, struct xbundle *);
static void xlate_xport_init(struct xlate_cfg *, struct xport *);
static void xlate_xbridge_set(struct xbridge *, struct dpif *,
const struct mac_learning *, struct stp *,
struct rstp *, const struct mcast_snooping *,
const struct mbridge *,
const struct dpif_sflow *,
const struct dpif_ipfix *,
const struct netflow *,
bool forward_bpdu, bool has_in_band,
const struct dpif_backer_support *,
const struct xbridge_addr *);
static void xlate_xbundle_set(struct xbundle *xbundle,
enum port_vlan_mode vlan_mode,
uint16_t qinq_ethtype, int vlan,
unsigned long *trunks, unsigned long *cvlans,
enum port_priority_tags_mode,
const struct bond *bond, const struct lacp *lacp,
bool floodable, bool protected);
static void xlate_xport_set(struct xport *xport, odp_port_t odp_port,
const struct netdev *netdev, const struct cfm *cfm,
const struct bfd *bfd, const struct lldp *lldp,
int stp_port_no, const struct rstp_port *rstp_port,
enum ofputil_port_config config,
enum ofputil_port_state state, bool is_tunnel,
bool may_enable);
static void xlate_xbridge_remove(struct xlate_cfg *, struct xbridge *);
static void xlate_xbundle_remove(struct xlate_cfg *, struct xbundle *);
static void xlate_xport_remove(struct xlate_cfg *, struct xport *);
static void xlate_xbridge_copy(struct xbridge *);
static void xlate_xbundle_copy(struct xbridge *, struct xbundle *);
static void xlate_xport_copy(struct xbridge *, struct xbundle *,
struct xport *);
static void xlate_xcfg_free(struct xlate_cfg *);
/* Tracing helpers. */
/* If tracing is enabled in 'ctx', creates a new trace node and appends it to
* the list of nodes maintained in ctx->xin. The new node has type 'type' and
* its text is created from 'format' by treating it as a printf format string.
* Returns the list of nodes embedded within the new trace node; ordinarily,
* the calleer can ignore this, but it is useful if the caller needs to nest
* more trace nodes within the new node.
*
* If tracing is not enabled, does nothing and returns NULL. */
static struct ovs_list * OVS_PRINTF_FORMAT(3, 4)
xlate_report(const struct xlate_ctx *ctx, enum oftrace_node_type type,
const char *format, ...)
{
struct ovs_list *subtrace = NULL;
if (OVS_UNLIKELY(ctx->xin->trace)) {
va_list args;
va_start(args, format);
char *text = xvasprintf(format, args);
subtrace = &oftrace_report(ctx->xin->trace, type, text)->subs;
va_end(args);
free(text);
}
return subtrace;
}
/* This is like xlate_report() for errors that are serious enough that we
* should log them even if we are not tracing. */
static void OVS_PRINTF_FORMAT(2, 3)
xlate_report_error(const struct xlate_ctx *ctx, const char *format, ...)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
if (!OVS_UNLIKELY(ctx->xin->trace)
&& (!ctx->xin->packet || VLOG_DROP_WARN(&rl))) {
return;
}
struct ds s = DS_EMPTY_INITIALIZER;
va_list args;
va_start(args, format);
ds_put_format_valist(&s, format, args);
va_end(args);
if (ctx->xin->trace) {
oftrace_report(ctx->xin->trace, OFT_ERROR, ds_cstr(&s));
} else {
ds_put_format(&s, " on bridge %s while processing ",
ctx->xbridge->name);
flow_format(&s, &ctx->base_flow, NULL);
VLOG_WARN("%s", ds_cstr(&s));
}
ds_destroy(&s);
}
/* This is like xlate_report() for messages that should be logged
at the info level (even when not tracing). */
static void OVS_PRINTF_FORMAT(2, 3)
xlate_report_info(const struct xlate_ctx *ctx, const char *format, ...)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
if (!OVS_UNLIKELY(ctx->xin->trace)
&& (!ctx->xin->packet || VLOG_DROP_INFO(&rl))) {
return;
}
struct ds s = DS_EMPTY_INITIALIZER;
va_list args;
va_start(args, format);
ds_put_format_valist(&s, format, args);
va_end(args);
if (ctx->xin->trace) {
oftrace_report(ctx->xin->trace, OFT_WARN, ds_cstr(&s));
} else {
ds_put_format(&s, " on bridge %s while processing ",
ctx->xbridge->name);
flow_format(&s, &ctx->base_flow, NULL);
VLOG_INFO("%s", ds_cstr(&s));
}
ds_destroy(&s);
}
/* This is like xlate_report() for messages that should be logged at debug
* level (even if we are not tracing) because they can be valuable for
* debugging. */
static void OVS_PRINTF_FORMAT(3, 4)
xlate_report_debug(const struct xlate_ctx *ctx, enum oftrace_node_type type,
const char *format, ...)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300);
if (!OVS_UNLIKELY(ctx->xin->trace)
&& (!ctx->xin->packet || VLOG_DROP_DBG(&rl))) {
return;
}
struct ds s = DS_EMPTY_INITIALIZER;
va_list args;
va_start(args, format);
ds_put_format_valist(&s, format, args);
va_end(args);
if (ctx->xin->trace) {
oftrace_report(ctx->xin->trace, type, ds_cstr(&s));
} else {
VLOG_DBG("bridge %s: %s", ctx->xbridge->name, ds_cstr(&s));
}
ds_destroy(&s);
}
/* If tracing is enabled in 'ctx', appends a node of the given 'type' to the
* trace, whose text is 'title' followed by a formatted version of the
* 'ofpacts_len' OpenFlow actions in 'ofpacts'.
*
* If tracing is not enabled, does nothing. */
static void
xlate_report_actions(const struct xlate_ctx *ctx, enum oftrace_node_type type,
const char *title,
const struct ofpact *ofpacts, size_t ofpacts_len)
{
if (OVS_UNLIKELY(ctx->xin->trace)) {
struct ds s = DS_EMPTY_INITIALIZER;
ds_put_format(&s, "%s: ", title);
struct ofpact_format_params fp = { .s = &s };
ofpacts_format(ofpacts, ofpacts_len, &fp);
oftrace_report(ctx->xin->trace, type, ds_cstr(&s));
ds_destroy(&s);
}
}
/* If tracing is enabled in 'ctx', appends a node of type OFT_DETAIL to the
* trace, whose the message is a formatted version of the OpenFlow action set.
* 'verb' should be "was" or "is", depending on whether the action set reported
* is the new action set or the old one.
*
* If tracing is not enabled, does nothing. */
static void
xlate_report_action_set(const struct xlate_ctx *ctx, const char *verb)
{
if (OVS_UNLIKELY(ctx->xin->trace)) {
struct ofpbuf action_list;
ofpbuf_init(&action_list, 0);
ofpacts_execute_action_set(&action_list, &ctx->action_set);
if (action_list.size) {
struct ds s = DS_EMPTY_INITIALIZER;
struct ofpact_format_params fp = { .s = &s };
ofpacts_format(action_list.data, action_list.size, &fp);
xlate_report(ctx, OFT_DETAIL, "action set %s: %s",
verb, ds_cstr(&s));
ds_destroy(&s);
} else {
xlate_report(ctx, OFT_DETAIL, "action set %s empty", verb);
}
ofpbuf_uninit(&action_list);
}
}
/* If tracing is enabled in 'ctx', appends a node representing 'rule' (in
* OpenFlow table 'table_id') to the trace and makes this node the parent for
* future trace nodes. The caller should save ctx->xin->trace before calling
* this function, then after tracing all of the activities under the table,
* restore its previous value.
*
* If tracing is not enabled, does nothing. */
static void
xlate_report_table(const struct xlate_ctx *ctx, struct rule_dpif *rule,
uint8_t table_id)
{
if (OVS_LIKELY(!ctx->xin->trace)) {
return;
}
struct ds s = DS_EMPTY_INITIALIZER;
ds_put_format(&s, "%2d. ", table_id);
if (rule == ctx->xin->ofproto->miss_rule) {
ds_put_cstr(&s, "No match, and a \"packet-in\" is called for.");
} else if (rule == ctx->xin->ofproto->no_packet_in_rule) {
ds_put_cstr(&s, "No match.");
} else if (rule == ctx->xin->ofproto->drop_frags_rule) {
ds_put_cstr(&s, "Packets are IP fragments and "
"the fragment handling mode is \"drop\".");
} else {
struct ofputil_port_map map = OFPUTIL_PORT_MAP_INITIALIZER(&map);
if (ctx->xin->names) {
struct ofproto_dpif *ofprotop;
ofprotop = ofproto_dpif_lookup_by_name(ctx->xbridge->name);
ofproto_append_ports_to_map(&map, ofprotop->up.ports);
}
minimatch_format(&rule->up.cr.match,
ofproto_get_tun_tab(&ctx->xin->ofproto->up),
&map, &s, OFP_DEFAULT_PRIORITY);
ofputil_port_map_destroy(&map);
if (ds_last(&s) != ' ') {
ds_put_cstr(&s, ", ");
}
ds_put_format(&s, "priority %d", rule->up.cr.priority);
if (rule->up.flow_cookie) {
ds_put_format(&s, ", cookie %#"PRIx64,
ntohll(rule->up.flow_cookie));
}
}
ctx->xin->trace = &oftrace_report(ctx->xin->trace, OFT_TABLE,
ds_cstr(&s))->subs;
ds_destroy(&s);
}
/* If tracing is enabled in 'ctx', adds an OFT_DETAIL trace node to 'ctx'
* reporting the value of subfield 'sf'.
*
* If tracing is not enabled, does nothing. */
static void
xlate_report_subfield(const struct xlate_ctx *ctx,
const struct mf_subfield *sf)
{
if (OVS_UNLIKELY(ctx->xin->trace)) {
struct ds s = DS_EMPTY_INITIALIZER;
mf_format_subfield(sf, &s);
ds_put_cstr(&s, " is now ");
if (sf->ofs == 0 && sf->n_bits >= sf->field->n_bits) {
union mf_value value;
mf_get_value(sf->field, &ctx->xin->flow, &value);
mf_format(sf->field, &value, NULL, NULL, &s);
} else {
union mf_subvalue cst;
mf_read_subfield(sf, &ctx->xin->flow, &cst);
ds_put_hex(&s, &cst, sizeof cst);
}
xlate_report(ctx, OFT_DETAIL, "%s", ds_cstr(&s));
ds_destroy(&s);
}
}
static void
xlate_xbridge_init(struct xlate_cfg *xcfg, struct xbridge *xbridge)
{
ovs_list_init(&xbridge->xbundles);
hmap_init(&xbridge->xports);
hmap_insert(&xcfg->xbridges, &xbridge->hmap_node,
uuid_hash(&xbridge->ofproto->uuid));
}
static void
xlate_xbundle_init(struct xlate_cfg *xcfg, struct xbundle *xbundle)
{
ovs_list_init(&xbundle->xports);
ovs_list_insert(&xbundle->xbridge->xbundles, &xbundle->list_node);
hmap_insert(&xcfg->xbundles, &xbundle->hmap_node,
hash_pointer(xbundle->ofbundle, 0));
}
static void
xlate_xport_init(struct xlate_cfg *xcfg, struct xport *xport)
{
hmap_init(&xport->skb_priorities);
hmap_insert(&xcfg->xports, &xport->hmap_node,
hash_pointer(xport->ofport, 0));
hmap_insert(&xport->xbridge->xports, &xport->ofp_node,
hash_ofp_port(xport->ofp_port));
hmap_insert(&xcfg->xports_uuid, &xport->uuid_node,
uuid_hash(&xport->uuid));
}
static struct xbridge_addr *
xbridge_addr_create(struct xbridge *xbridge)
{
struct xbridge_addr *xbridge_addr = xbridge->addr;
struct in6_addr *addr = NULL, *mask = NULL;
struct netdev *dev;
int err, n_addr = 0;
err = netdev_open(xbridge->name, NULL, &dev);
if (!err) {
err = netdev_get_addr_list(dev, &addr, &mask, &n_addr);
if (!err) {
if (!xbridge->addr ||
n_addr != xbridge->addr->n_addr ||
(xbridge->addr->addr && memcmp(addr, xbridge->addr->addr,
sizeof(*addr) * n_addr))) {
xbridge_addr = xzalloc(sizeof *xbridge_addr);
xbridge_addr->addr = addr;
xbridge_addr->n_addr = n_addr;
ovs_refcount_init(&xbridge_addr->ref_cnt);
} else {
free(addr);
}
free(mask);
}
netdev_close(dev);
}
return xbridge_addr;
}
static struct xbridge_addr *
xbridge_addr_ref(const struct xbridge_addr *addr_)
{
struct xbridge_addr *addr = CONST_CAST(struct xbridge_addr *, addr_);
if (addr) {
ovs_refcount_ref(&addr->ref_cnt);
}
return addr;
}
static void
xbridge_addr_unref(struct xbridge_addr *addr)
{
if (addr && ovs_refcount_unref_relaxed(&addr->ref_cnt) == 1) {
free(addr->addr);
free(addr);
}
}
static void
xlate_xbridge_set(struct xbridge *xbridge,
struct dpif *dpif,
const struct mac_learning *ml, struct stp *stp,
struct rstp *rstp, const struct mcast_snooping *ms,
const struct mbridge *mbridge,
const struct dpif_sflow *sflow,
const struct dpif_ipfix *ipfix,
const struct netflow *netflow,
bool forward_bpdu, bool has_in_band,
const struct dpif_backer_support *support,
const struct xbridge_addr *addr)
{
if (xbridge->ml != ml) {
mac_learning_unref(xbridge->ml);
xbridge->ml = mac_learning_ref(ml);
}
if (xbridge->ms != ms) {
mcast_snooping_unref(xbridge->ms);
xbridge->ms = mcast_snooping_ref(ms);
}
if (xbridge->mbridge != mbridge) {
mbridge_unref(xbridge->mbridge);
xbridge->mbridge = mbridge_ref(mbridge);
}
if (xbridge->sflow != sflow) {
dpif_sflow_unref(xbridge->sflow);
xbridge->sflow = dpif_sflow_ref(sflow);
}
if (xbridge->ipfix != ipfix) {
dpif_ipfix_unref(xbridge->ipfix);
xbridge->ipfix = dpif_ipfix_ref(ipfix);
}
if (xbridge->stp != stp) {
stp_unref(xbridge->stp);
xbridge->stp = stp_ref(stp);
}
if (xbridge->rstp != rstp) {
rstp_unref(xbridge->rstp);
xbridge->rstp = rstp_ref(rstp);
}