From 1a03fc7da32e74aacbf638d3617a290ddffaa069 Mon Sep 17 00:00:00 2001 From: Babu Shanmugam Date: Wed, 5 Oct 2016 13:57:47 +0530 Subject: [PATCH] DSCP marking on packets This patch adds support for marking qos on IP packets based on arbitrary match criteria for a logical switch. Signed-off-by: Babu Shanmugam Suggested-by: Mickey Spiegel Acked-by: Mickey Spiegel [blp@ovn.org fixes races in the test and added ovn-trace checks] Signed-off-by: Ben Pfaff --- NEWS | 1 + ovn/lib/logical-fields.c | 2 +- ovn/northd/ovn-northd.8.xml | 47 +++++++++++++++----- ovn/northd/ovn-northd.c | 80 ++++++++++++++++++++++------------ ovn/ovn-nb.ovsschema | 26 ++++++++++- ovn/ovn-nb.xml | 57 ++++++++++++++++++++++++ ovn/utilities/ovn-nbctl.c | 5 +++ tests/ovn.at | 86 ++++++++++++++++++++++++++++++++++++- 8 files changed, 263 insertions(+), 41 deletions(-) diff --git a/NEWS b/NEWS index ee873d67e0b..95cb2b2b247 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,7 @@ Post-v2.6.0 --------------------- - OVN: * QoS is now implemented via egress shaping rather than ingress policing. + * DSCP marking is now supported, via the new northbound QoS table. - Fixed regression in table stats maintenance introduced in OVS 2.3.0, wherein the number of OpenFlow table hits and misses was not accurate. diff --git a/ovn/lib/logical-fields.c b/ovn/lib/logical-fields.c index 5229be37e22..2d3e2178e14 100644 --- a/ovn/lib/logical-fields.c +++ b/ovn/lib/logical-fields.c @@ -137,7 +137,7 @@ ovn_init_symtab(struct shash *symtab) expr_symtab_add_predicate(symtab, "ip6", "eth.type == 0x86dd"); expr_symtab_add_predicate(symtab, "ip", "ip4 || ip6"); expr_symtab_add_field(symtab, "ip.proto", MFF_IP_PROTO, "ip", true); - expr_symtab_add_field(symtab, "ip.dscp", MFF_IP_DSCP, "ip", false); + expr_symtab_add_field(symtab, "ip.dscp", MFF_IP_DSCP_SHIFTED, "ip", false); expr_symtab_add_field(symtab, "ip.ecn", MFF_IP_ECN, "ip", false); expr_symtab_add_field(symtab, "ip.ttl", MFF_IP_TTL, "ip", false); diff --git a/ovn/northd/ovn-northd.8.xml b/ovn/northd/ovn-northd.8.xml index 77eb3d18430..df53d4c7f56 100644 --- a/ovn/northd/ovn-northd.8.xml +++ b/ovn/northd/ovn-northd.8.xml @@ -362,7 +362,27 @@ -

Ingress Table 7: LB

+

Ingress Table 7: from-lport QoS marking

+ +

+ Logical flows in this table closely reproduce those in the + QoS table in the OVN_Northbound database + for the from-lport direction. +

+ +
    +
  • + For every qos_rules for every logical switch a flow will be added at + priorities mentioned in the QoS table. +
  • + +
  • + One priority-0 fallback flow that matches all packets and advances to + the next table. +
  • +
+ +

Ingress Table 8: LB

It contains a priority-0 flow that simply moves traffic to the next @@ -375,7 +395,7 @@ connection.)

-

Ingress Table 8: Stateful

+

Ingress Table 9: Stateful

  • @@ -412,7 +432,7 @@
-

Ingress Table 9: ARP/ND responder

+

Ingress Table 10: ARP/ND responder

This table implements ARP/ND responder for known IPs. It contains these @@ -507,7 +527,7 @@ nd_na { -

Ingress Table 10: DHCP option processing

+

Ingress Table 11: DHCP option processing

This table adds the DHCPv4 options to a DHCPv4 packet from the @@ -567,7 +587,7 @@ next; -

Ingress Table 11: DHCP responses

+

Ingress Table 12: DHCP responses

This table implements DHCP responder for the DHCP replies generated by @@ -649,7 +669,7 @@ output; -

Ingress Table 12: Destination Lookup

+

Ingress Table 13 Destination Lookup

This table implements switching behavior. It contains these logical @@ -716,7 +736,14 @@ output; to-lport ACLs.

-

Egress Table 5: Stateful

+

Egress Table 5: to-lport QoS marking

+ +

+ This is similar to ingress table QoS marking except for + to-lport qos rules. +

+ +

Egress Table 6: Stateful

This is similar to ingress table Stateful except that @@ -727,10 +754,10 @@ output; Also a priority 34000 logical flow is added for each logical port which has DHCPv4 options defined to allow the DHCPv4 reply packet and which has DHCPv6 options defined to allow the DHCPv6 reply packet from the - Ingress Table 11: DHCP responses. + Ingress Table 12: DHCP responses.

-

Egress Table 6: Egress Port Security - IP

+

Egress Table 7: Egress Port Security - IP

This is similar to the port security logic in table @@ -740,7 +767,7 @@ output; ip4.src and ip6.src

-

Egress Table 7: Egress Port Security - L2

+

Egress Table 8: Egress Port Security - L2

This is similar to the ingress port security logic in ingress table diff --git a/ovn/northd/ovn-northd.c b/ovn/northd/ovn-northd.c index 0150a8cdee8..ad4e38da950 100644 --- a/ovn/northd/ovn-northd.c +++ b/ovn/northd/ovn-northd.c @@ -96,21 +96,22 @@ enum ovn_datapath_type { * form the stage's full name, e.g. S_SWITCH_IN_PORT_SEC_L2, * S_ROUTER_OUT_DELIVERY. */ enum ovn_stage { -#define PIPELINE_STAGES \ - /* Logical switch ingress stages. */ \ - PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \ - PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \ - PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \ - PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \ - PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \ - PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \ - PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \ - PIPELINE_STAGE(SWITCH, IN, LB, 7, "ls_in_lb") \ - PIPELINE_STAGE(SWITCH, IN, STATEFUL, 8, "ls_in_stateful") \ - PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 9, "ls_in_arp_rsp") \ - PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 10, "ls_in_dhcp_options") \ - PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 11, "ls_in_dhcp_response") \ - PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 12, "ls_in_l2_lkup") \ +#define PIPELINE_STAGES \ + /* Logical switch ingress stages. */ \ + PIPELINE_STAGE(SWITCH, IN, PORT_SEC_L2, 0, "ls_in_port_sec_l2") \ + PIPELINE_STAGE(SWITCH, IN, PORT_SEC_IP, 1, "ls_in_port_sec_ip") \ + PIPELINE_STAGE(SWITCH, IN, PORT_SEC_ND, 2, "ls_in_port_sec_nd") \ + PIPELINE_STAGE(SWITCH, IN, PRE_ACL, 3, "ls_in_pre_acl") \ + PIPELINE_STAGE(SWITCH, IN, PRE_LB, 4, "ls_in_pre_lb") \ + PIPELINE_STAGE(SWITCH, IN, PRE_STATEFUL, 5, "ls_in_pre_stateful") \ + PIPELINE_STAGE(SWITCH, IN, ACL, 6, "ls_in_acl") \ + PIPELINE_STAGE(SWITCH, IN, QOS_MARK, 7, "ls_in_qos_mark") \ + PIPELINE_STAGE(SWITCH, IN, LB, 8, "ls_in_lb") \ + PIPELINE_STAGE(SWITCH, IN, STATEFUL, 9, "ls_in_stateful") \ + PIPELINE_STAGE(SWITCH, IN, ARP_ND_RSP, 10, "ls_in_arp_rsp") \ + PIPELINE_STAGE(SWITCH, IN, DHCP_OPTIONS, 11, "ls_in_dhcp_options") \ + PIPELINE_STAGE(SWITCH, IN, DHCP_RESPONSE, 12, "ls_in_dhcp_response") \ + PIPELINE_STAGE(SWITCH, IN, L2_LKUP, 13, "ls_in_l2_lkup") \ \ /* Logical switch egress stages. */ \ PIPELINE_STAGE(SWITCH, OUT, PRE_LB, 0, "ls_out_pre_lb") \ @@ -118,9 +119,10 @@ enum ovn_stage { PIPELINE_STAGE(SWITCH, OUT, PRE_STATEFUL, 2, "ls_out_pre_stateful") \ PIPELINE_STAGE(SWITCH, OUT, LB, 3, "ls_out_lb") \ PIPELINE_STAGE(SWITCH, OUT, ACL, 4, "ls_out_acl") \ - PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 5, "ls_out_stateful") \ - PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 6, "ls_out_port_sec_ip") \ - PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 7, "ls_out_port_sec_l2") \ + PIPELINE_STAGE(SWITCH, OUT, QOS_MARK, 5, "ls_out_qos_mark") \ + PIPELINE_STAGE(SWITCH, OUT, STATEFUL, 6, "ls_out_stateful") \ + PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_IP, 7, "ls_out_port_sec_ip") \ + PIPELINE_STAGE(SWITCH, OUT, PORT_SEC_L2, 8, "ls_out_port_sec_l2") \ \ /* Logical router ingress stages. */ \ PIPELINE_STAGE(ROUTER, IN, ADMISSION, 0, "lr_in_admission") \ @@ -2610,6 +2612,29 @@ build_acls(struct ovn_datapath *od, struct hmap *lflows) } } +static void +build_qos(struct ovn_datapath *od, struct hmap *lflows) { + ovn_lflow_add(lflows, od, S_SWITCH_IN_QOS_MARK, 0, "1", "next;"); + ovn_lflow_add(lflows, od, S_SWITCH_OUT_QOS_MARK, 0, "1", "next;"); + + for (size_t i = 0; i < od->nbs->n_qos_rules; i++) { + struct nbrec_qos *qos = od->nbs->qos_rules[i]; + bool ingress = !strcmp(qos->direction, "from-lport") ? true :false; + enum ovn_stage stage = ingress ? S_SWITCH_IN_QOS_MARK : S_SWITCH_OUT_QOS_MARK; + + if (!strcmp(qos->key_action, "dscp")) { + struct ds dscp_action = DS_EMPTY_INITIALIZER; + + ds_put_format(&dscp_action, "ip.dscp = %d; next;", + (uint8_t)qos->value_action); + ovn_lflow_add(lflows, od, stage, + qos->priority, + qos->match, ds_cstr(&dscp_action)); + ds_destroy(&dscp_action); + } + } +} + static void build_lb(struct ovn_datapath *od, struct hmap *lflows) { @@ -2715,7 +2740,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, struct ds actions = DS_EMPTY_INITIALIZER; /* Build pre-ACL and ACL tables for both ingress and egress. - * Ingress tables 3 and 4. Egress tables 0 and 1. */ + * Ingress tables 3 through 9. Egress tables 0 through 6. */ struct ovn_datapath *od; HMAP_FOR_EACH (od, key_node, datapaths) { if (!od->nbs) { @@ -2726,6 +2751,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, build_pre_lb(od, lflows); build_pre_stateful(od, lflows); build_acls(od, lflows); + build_qos(od, lflows); build_lb(od, lflows); build_stateful(od, lflows); } @@ -2797,7 +2823,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, ovn_lflow_add(lflows, od, S_SWITCH_IN_PORT_SEC_IP, 0, "1", "next;"); } - /* Ingress table 9: ARP/ND responder, skip requests coming from localnet + /* Ingress table 10: ARP/ND responder, skip requests coming from localnet * ports. (priority 100). */ HMAP_FOR_EACH (op, key_node, ports) { if (!op->nbsp) { @@ -2812,7 +2838,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, } } - /* Ingress table 9: ARP/ND responder, reply for known IPs. + /* Ingress table 10: ARP/ND responder, reply for known IPs. * (priority 50). */ HMAP_FOR_EACH (op, key_node, ports) { if (!op->nbsp) { @@ -2905,7 +2931,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, } } - /* Ingress table 9: ARP/ND responder, by default goto next. + /* Ingress table 10: ARP/ND responder, by default goto next. * (priority 0)*/ HMAP_FOR_EACH (od, key_node, datapaths) { if (!od->nbs) { @@ -2915,7 +2941,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, ovn_lflow_add(lflows, od, S_SWITCH_IN_ARP_ND_RSP, 0, "1", "next;"); } - /* Logical switch ingress table 10 and 11: DHCP options and response + /* Logical switch ingress table 11 and 12: DHCP options and response * priority 100 flows. */ HMAP_FOR_EACH (op, key_node, ports) { if (!op->nbsp) { @@ -2994,7 +3020,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, } } - /* Ingress table 10 and 11: DHCP options and response, by default goto next. + /* Ingress table 11 and 12: DHCP options and response, by default goto next. * (priority 0). */ HMAP_FOR_EACH (od, key_node, datapaths) { @@ -3006,7 +3032,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, ovn_lflow_add(lflows, od, S_SWITCH_IN_DHCP_RESPONSE, 0, "1", "next;"); } - /* Ingress table 12: Destination lookup, broadcast and multicast handling + /* Ingress table 13: Destination lookup, broadcast and multicast handling * (priority 100). */ HMAP_FOR_EACH (op, key_node, ports) { if (!op->nbsp) { @@ -3026,7 +3052,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, "outport = \""MC_FLOOD"\"; output;"); } - /* Ingress table 12: Destination lookup, unicast handling (priority 50), */ + /* Ingress table 13: Destination lookup, unicast handling (priority 50), */ HMAP_FOR_EACH (op, key_node, ports) { if (!op->nbsp) { continue; @@ -3073,7 +3099,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap *ports, } } - /* Ingress table 12: Destination lookup for unknown MACs (priority 0). */ + /* Ingress table 13: Destination lookup for unknown MACs (priority 0). */ HMAP_FOR_EACH (od, key_node, datapaths) { if (!od->nbs) { continue; diff --git a/ovn/ovn-nb.ovsschema b/ovn/ovn-nb.ovsschema index 5f2f2bf2233..865dd349189 100644 --- a/ovn/ovn-nb.ovsschema +++ b/ovn/ovn-nb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Northbound", - "version": "5.3.4", - "cksum": "1155817817 9975", + "version": "5.4.0", + "cksum": "4176761817 11225", "tables": { "NB_Global": { "columns": { @@ -26,6 +26,11 @@ "refType": "strong"}, "min": 0, "max": "unlimited"}}, + "qos_rules": {"type": {"key": {"type": "uuid", + "refTable": "QoS", + "refType": "strong"}, + "min": 0, + "max": "unlimited"}}, "load_balancer": {"type": {"key": {"type": "uuid", "refTable": "Load_Balancer", "refType": "strong"}, @@ -124,6 +129,23 @@ "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, "isRoot": false}, + "QoS": { + "columns": { + "priority": {"type": {"key": {"type": "integer", + "minInteger": 0, + "maxInteger": 32767}}}, + "direction": {"type": {"key": {"type": "string", + "enum": ["set", ["from-lport", "to-lport"]]}}}, + "match": {"type": "string"}, + "action": {"type": {"key": {"type": "string", + "enum": ["set", ["dscp"]]}, + "value": {"type": "integer", + "minInteger": 0, + "maxInteger": 63}}}, + "external_ids": { + "type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}}, + "isRoot": false}, "Logical_Router": { "columns": { "name": {"type": "string"}, diff --git a/ovn/ovn-nb.xml b/ovn/ovn-nb.xml index 32ddb7093ec..c2a1ebb3c32 100644 --- a/ovn/ovn-nb.xml +++ b/ovn/ovn-nb.xml @@ -119,6 +119,10 @@ Access control rules that apply to packets within the logical switch. + + QOS marking rules that apply to packets within the logical switch. + +

Additional configuration options for the logical switch. @@ -899,6 +903,59 @@ + +

+ Each row in this table represents one QOS rule for a logical switch + that points to it through its column. The column for the highest- + matching row in this table determines a packet's qos marking. If no row + matches, packets will not have any qos marking. +

+ + +

+ The QOS rule's priority. Rules with numerically higher priority + take precedence over those with lower. If two QOS rules with + the same priority both match, then the one actually applied to a + packet is undefined. +

+
+ + +

+ The value of this field is similar to column in the OVN Northbound + database's table. +

+
+ + +

+ The packets that the QOS rules should match, in the same expression + language used for the column in the OVN Southbound database's + table. The + outport logical port is only available in the + to-lport direction (the inport is + available in both directions). +

+
+ + +

The action to be performed on the matched packet

+
    +
  • + dscp: The value of this action should be in the + range of 0 to 63 (inclusive). +
  • +
+
+ + + See External IDs at the beginning of this document. + +
+

A port within an L3 logical router. diff --git a/ovn/utilities/ovn-nbctl.c b/ovn/utilities/ovn-nbctl.c index 7643241b45b..fe6f51556d4 100644 --- a/ovn/utilities/ovn-nbctl.c +++ b/ovn/utilities/ovn-nbctl.c @@ -2626,6 +2626,11 @@ static const struct ctl_table_class tables[] = { NULL}, {NULL, NULL, NULL}}}, + {&nbrec_table_qos, + {{&nbrec_table_qos, NULL, + NULL}, + {NULL, NULL, NULL}}}, + {NULL, {{NULL, NULL, NULL}, {NULL, NULL, NULL}}} }; diff --git a/tests/ovn.at b/tests/ovn.at index 3b27581fd28..948716b2792 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -5359,6 +5359,90 @@ AT_CHECK([ovn-nbctl lsp-del localvif2]) # because, localvif3 is still bound. OVS_WAIT_UNTIL([test `ovs-vsctl show | grep "Port patch-br-int-to-ln_port" | wc -l` -eq 1]) - OVN_CLEANUP([hv1],[hv2]) + +AT_CLEANUP + +AT_SETUP([ovn -- DSCP marking check]) +AT_KEYWORDS([ovn]) +ovn_start + +ovn-nbctl ls-add lsw0 +ovn-nbctl --wait=sb lsp-add lsw0 lp1 +ovn-nbctl --wait=sb lsp-add lsw0 lp2 +ovn-nbctl lsp-set-addresses lp1 f0:00:00:00:00:01 +ovn-nbctl lsp-set-addresses lp2 f0:00:00:00:00:02 +ovn-nbctl lsp-set-port-security lp1 f0:00:00:00:00:01 +ovn-nbctl lsp-set-port-security lp2 f0:00:00:00:00:02 +ovn-nbctl --wait=sb sync +net_add n1 +sim_add hv +as hv +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl add-port br-int vif1 -- set Interface vif1 external-ids:iface-id=lp1 options:tx_pcap=vif1-tx.pcap options:rxq_pcap=vif1-rx.pcap ofport-request=1 +ovs-vsctl add-port br-int vif2 -- set Interface vif2 external-ids:iface-id=lp2 options:tx_pcap=vif2-tx.pcap options:rxq_pcap=vif2-rx.pcap ofport-request=2 + +AT_CAPTURE_FILE([trace]) +ovn_trace () { + ovn-trace --all "$@" | tee trace | sed '1,/Minimal trace/d' +} + +# Extracts nw_tos from the final flow from ofproto/trace output and prints +# it on stdout. Prints "none" if no nw_tos was included. +get_final_nw_tos() { + if flow=$(grep '^Final flow:' stdout); then :; else + # The output didn't have a final flow. + return 99 + fi + + tos=$(echo "$flow" | sed -n 's/.*nw_tos=\([[0-9]]\{1,\}\).*/\1/p') + case $tos in + '') echo none ;; + *) echo $tos ;; + esac +} + +# check_tos TOS +# +# Checks that a packet from 1.1.1.1 to 1.1.1.2 gets its DSCP set to TOS. +check_tos() { + # First check with ovn-trace for logical flows. + echo "checking for tos $1" + (if test $1 != 0; then echo "ip.dscp = $1;"; fi; + echo 'output("lp2");') > expout + AT_CHECK_UNQUOTED([ovn_trace lsw0 'inport == "lp1" && eth.src == f0:00:00:00:00:01 && eth.dst == f0:00:00:00:00:02 && ip4.src == 1.1.1.1 && ip4.dst == 1.1.1.2'], [0], [expout]) + + # Then re-check with ofproto/trace for a physical packet. + AT_CHECK([ovs-appctl ofproto/trace br-int 'in_port=1,dl_src=f0:00:00:00:00:01,dl_dst=f0:00:00:00:00:02,dl_type=0x800,nw_src=1.1.1.1,nw_dst=1.1.1.2'], [0], [stdout-nolog]) + AT_CHECK_UNQUOTED([get_final_nw_tos], [0], [`expr $1 \* 4` +]) +} + +# check at L2 +AT_CHECK([ovn_trace lsw0 'inport == "lp1" && eth.src == f0:00:00:00:00:01 && eth.dst == f0:00:00:00:00:02'], [0], [output("lp2"); +]) +AT_CHECK([ovs-appctl ofproto/trace br-int 'in_port=1,dl_src=f0:00:00:00:00:01,dl_dst=f0:00:00:00:00:02'], [0], [stdout-nolog]) +AT_CHECK([get_final_nw_tos], [0], [none +]) + +# check at L3 without dscp marking +check_tos 0 + +# Mark DSCP with a valid value +qos_id=$(ovn-nbctl --wait=hv -- --id=@lp1-qos create QoS priority=100 action=dscp=48 match="inport\=\=\"lp1\"" direction="from-lport" -- set Logical_Switch lsw0 qos_rules=@lp1-qos) +check_tos 48 + +# Update the DSCP marking +ovn-nbctl --wait=hv set QoS $qos_id action=dscp=63 +check_tos 63 + +ovn-nbctl --wait=hv set QoS $qos_id match="outport\=\=\"lp2\"" direction="to-lport" +check_tos 63 + +# Disable DSCP marking +ovn-nbctl --wait=hv clear Logical_Switch lsw0 qos_rules +check_tos 0 + +OVN_CLEANUP([hv]) AT_CLEANUP