diff --git a/include/ovn/actions.h b/include/ovn/actions.h index bb8f1ac5e0b..9e4a5c5ab1e 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -70,7 +70,8 @@ struct simap; OVNACT(PUT_ND, ovnact_put_mac_bind) \ OVNACT(PUT_DHCPV4_OPTS, ovnact_put_dhcp_opts) \ OVNACT(PUT_DHCPV6_OPTS, ovnact_put_dhcp_opts) \ - OVNACT(SET_QUEUE, ovnact_set_queue) + OVNACT(SET_QUEUE, ovnact_set_queue) \ + OVNACT(DNS_LOOKUP, ovnact_dns_lookup) /* enum ovnact_type, with a member OVNACT_ for each action. */ enum OVS_PACKED_ENUM ovnact_type { @@ -258,6 +259,12 @@ struct ovnact_set_queue { uint16_t queue_id; }; +/* OVNACT_DNS_LOOKUP. */ +struct ovnact_dns_lookup { + struct ovnact ovnact; + struct expr_field dst; /* 1-bit destination field. */ +}; + /* Internal use by the helpers below. */ void ovnact_init(struct ovnact *, enum ovnact_type, size_t len); void *ovnact_put(struct ofpbuf *, enum ovnact_type, size_t len); @@ -385,6 +392,14 @@ enum action_opcode { * - Any number of DHCPv6 options. */ ACTION_OPCODE_PUT_DHCPV6_OPTS, + + /* "result = dns_lookup()". + * Arguments follow the action_header, in this format: + * - A 32-bit or 64-bit OXM header designating the result field. + * - A 32-bit integer specifying a bit offset within the result field. + * + */ + ACTION_OPCODE_DNS_LOOKUP, }; /* Header. */ diff --git a/lib/packets.h b/lib/packets.h index 6776be36dcc..639f5e44d41 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -1199,4 +1199,28 @@ void compose_nd_na(struct dp_packet *, const struct eth_addr eth_src, uint32_t packet_csum_pseudoheader(const struct ip_header *); void IP_ECN_set_ce(struct dp_packet *pkt, bool is_ipv6); +#define DNS_HEADER_LEN 12 +struct dns_header { + ovs_be16 id; + uint8_t lo_flag; /* QR (1), OPCODE (4), AA (1), TC (1) and RD (1) */ + uint8_t hi_flag; /* RA (1), Z (3) and RCODE (4) */ + ovs_be16 qdcount; /* Num of entries in the question section. */ + ovs_be16 ancount; /* Num of resource records in the answer section. */ + + /* Num of name server records in the authority record section. */ + ovs_be16 nscount; + + /* Num of resource records in the additional records section. */ + ovs_be16 arcount; +}; + +BUILD_ASSERT_DECL(DNS_HEADER_LEN == sizeof(struct dns_header)); + +#define DNS_QUERY_TYPE_A 0x01 +#define DNS_QUERY_TYPE_AAAA 0x1c +#define DNS_QUERY_TYPE_ANY 0xff + +#define DNS_CLASS_IN 0x01 +#define DNS_DEFAULT_RR_TTL 3600 + #endif /* packets.h */ diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c index 8c5042a590e..4b443595b1d 100644 --- a/ovn/controller/pinctrl.c +++ b/ovn/controller/pinctrl.c @@ -662,7 +662,251 @@ pinctrl_handle_put_dhcpv6_opts( } static void -process_packet_in(const struct ofp_header *msg) +put_be16(struct ofpbuf *buf, ovs_be16 x) +{ + ofpbuf_put(buf, &x, sizeof x); +} + +static void +put_be32(struct ofpbuf *buf, ovs_be32 x) +{ + ofpbuf_put(buf, &x, sizeof x); +} + +static void +pinctrl_handle_dns_lookup( + struct dp_packet *pkt_in, struct ofputil_packet_in *pin, + struct ofpbuf *userdata, struct ofpbuf *continuation, + struct controller_ctx *ctx) +{ + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + enum ofp_version version = rconn_get_version(swconn); + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); + struct dp_packet *pkt_out_ptr = NULL; + uint32_t success = 0; + + /* Parse result field. */ + const struct mf_field *f; + enum ofperr ofperr = nx_pull_header(userdata, NULL, &f, NULL); + if (ofperr) { + VLOG_WARN_RL(&rl, "bad result OXM (%s)", ofperr_to_string(ofperr)); + goto exit; + } + + /* Parse result offset. */ + ovs_be32 *ofsp = ofpbuf_try_pull(userdata, sizeof *ofsp); + if (!ofsp) { + VLOG_WARN_RL(&rl, "offset not present in the userdata"); + goto exit; + } + + /* Check that the result is valid and writable. */ + struct mf_subfield dst = { .field = f, .ofs = ntohl(*ofsp), .n_bits = 1 }; + ofperr = mf_check_dst(&dst, NULL); + if (ofperr) { + VLOG_WARN_RL(&rl, "bad result bit (%s)", ofperr_to_string(ofperr)); + goto exit; + } + + /* Extract the DNS header */ + struct dns_header const *in_dns_header = dp_packet_get_udp_payload(pkt_in); + + /* Check if it is DNS request or not */ + if (in_dns_header->lo_flag & 0x80) { + /* It's a DNS response packet which we are not interested in */ + goto exit; + } + + /* Check if at least one query request is present */ + if (!in_dns_header->qdcount) { + goto exit; + } + + struct udp_header *in_udp = dp_packet_l4(pkt_in); + size_t udp_len = ntohs(in_udp->udp_len); + size_t l4_len = dp_packet_l4_size(pkt_in); + uint8_t *end = (uint8_t *)in_udp + MIN(udp_len, l4_len); + uint8_t *in_dns_data = (uint8_t *)(in_dns_header + 1); + uint8_t *in_queryname = in_dns_data; + uint8_t idx = 0; + struct ds query_name; + ds_init(&query_name); + /* Extract the query_name. If the query name is - 'www.ovn.org' it would be + * encoded as (in hex) - 03 77 77 77 03 6f 76 63 03 6f 72 67 00. + */ + while ((in_dns_data + idx) < end && in_dns_data[idx]) { + uint8_t label_len = in_dns_data[idx++]; + if (in_dns_data + idx + label_len > end) { + ds_destroy(&query_name); + goto exit; + } + ds_put_buffer(&query_name, (const char *) in_dns_data + idx, label_len); + idx += label_len; + ds_put_char(&query_name, '.'); + } + + idx++; + ds_chomp(&query_name, '.'); + in_dns_data += idx; + + /* Query should have TYPE and CLASS fields */ + if (in_dns_data + (2 * sizeof(ovs_be16)) > end) { + ds_destroy(&query_name); + goto exit; + } + + uint16_t query_type = ntohs(*ALIGNED_CAST(const ovs_be16 *, in_dns_data)); + /* Supported query types - A, AAAA and ANY */ + if (!(query_type == DNS_QUERY_TYPE_A || query_type == DNS_QUERY_TYPE_AAAA + || query_type == DNS_QUERY_TYPE_ANY)) { + ds_destroy(&query_name); + goto exit; + } + + uint64_t dp_key = ntohll(pin->flow_metadata.flow.metadata); + const struct sbrec_dns *sbrec_dns; + const char *answer_ips = NULL; + SBREC_DNS_FOR_EACH(sbrec_dns, ctx->ovnsb_idl) { + for (size_t i = 0; i < sbrec_dns->n_datapaths; i++) { + if (sbrec_dns->datapaths[i]->tunnel_key == dp_key) { + answer_ips = smap_get(&sbrec_dns->records, + ds_cstr(&query_name)); + if (answer_ips) { + break; + } + } + } + + if (answer_ips) { + break; + } + } + + ds_destroy(&query_name); + if (!answer_ips) { + goto exit; + } + + struct lport_addresses ip_addrs; + if (!extract_ip_addresses(answer_ips, &ip_addrs)) { + goto exit; + } + + uint16_t ancount = 0; + uint64_t dns_ans_stub[128 / 8]; + struct ofpbuf dns_answer = OFPBUF_STUB_INITIALIZER(dns_ans_stub); + + if (query_type == DNS_QUERY_TYPE_A || query_type == DNS_QUERY_TYPE_ANY) { + for (size_t i = 0; i < ip_addrs.n_ipv4_addrs; i++) { + /* Copy the answer section */ + /* Format of the answer section is + * - NAME -> The domain name + * - TYPE -> 2 octets containing one of the RR type codes + * - CLASS -> 2 octets which specify the class of the data + * in the RDATA field. + * - TTL -> 32 bit unsigned int specifying the time + * interval (in secs) that the resource record + * may be cached before it should be discarded. + * - RDLENGTH -> 16 bit integer specifying the length of the + * RDATA field. + * - RDATA -> a variable length string of octets that + * describes the resource. In our case it will + * be IP address of the domain name. + */ + ofpbuf_put(&dns_answer, in_queryname, idx); + put_be16(&dns_answer, htons(DNS_QUERY_TYPE_A)); + put_be16(&dns_answer, htons(DNS_CLASS_IN)); + put_be32(&dns_answer, htonl(DNS_DEFAULT_RR_TTL)); + put_be16(&dns_answer, htons(sizeof(ovs_be32))); + put_be32(&dns_answer, ip_addrs.ipv4_addrs[i].addr); + ancount++; + } + } + + if (query_type == DNS_QUERY_TYPE_AAAA || + query_type == DNS_QUERY_TYPE_ANY) { + for (size_t i = 0; i < ip_addrs.n_ipv6_addrs; i++) { + ofpbuf_put(&dns_answer, in_queryname, idx); + put_be16(&dns_answer, htons(DNS_QUERY_TYPE_AAAA)); + put_be16(&dns_answer, htons(DNS_CLASS_IN)); + put_be32(&dns_answer, htonl(DNS_DEFAULT_RR_TTL)); + const struct in6_addr *ip6 = &ip_addrs.ipv6_addrs[i].addr; + put_be16(&dns_answer, htons(sizeof *ip6)); + ofpbuf_put(&dns_answer, ip6, sizeof *ip6); + ancount++; + } + } + + destroy_lport_addresses(&ip_addrs); + + if (!ancount) { + ofpbuf_uninit(&dns_answer); + goto exit; + } + + uint16_t new_l4_size = ntohs(in_udp->udp_len) + dns_answer.size; + size_t new_packet_size = pkt_in->l4_ofs + new_l4_size; + struct dp_packet pkt_out; + dp_packet_init(&pkt_out, new_packet_size); + dp_packet_clear(&pkt_out); + dp_packet_prealloc_tailroom(&pkt_out, new_packet_size); + pkt_out_ptr = &pkt_out; + + /* Copy the L2 and L3 headers from the pkt_in as they would remain same.*/ + dp_packet_put( + &pkt_out, dp_packet_pull(pkt_in, pkt_in->l4_ofs), pkt_in->l4_ofs); + + pkt_out.l2_5_ofs = pkt_in->l2_5_ofs; + pkt_out.l2_pad_size = pkt_in->l2_pad_size; + pkt_out.l3_ofs = pkt_in->l3_ofs; + pkt_out.l4_ofs = pkt_in->l4_ofs; + + struct udp_header *out_udp = dp_packet_put( + &pkt_out, dp_packet_pull(pkt_in, UDP_HEADER_LEN), UDP_HEADER_LEN); + + /* Copy the DNS header. */ + struct dns_header *out_dns_header = dp_packet_put( + &pkt_out, dp_packet_pull(pkt_in, sizeof *out_dns_header), + sizeof *out_dns_header); + + /* Set the response bit to 1 in the flags. */ + out_dns_header->lo_flag |= 0x80; + + /* Set the answer RR. */ + out_dns_header->ancount = htons(ancount); + + /* Copy the Query section. */ + dp_packet_put(&pkt_out, dp_packet_data(pkt_in), dp_packet_size(pkt_in)); + + /* Copy the answer sections. */ + dp_packet_put(&pkt_out, dns_answer.data, dns_answer.size); + ofpbuf_uninit(&dns_answer); + + out_udp->udp_len = htons(new_l4_size); + out_udp->udp_csum = 0; + + struct ip_header *out_ip = dp_packet_l3(&pkt_out); + out_ip->ip_tot_len = htons(pkt_out.l4_ofs - pkt_out.l3_ofs + new_l4_size); + /* Checksum needs to be initialized to zero. */ + out_ip->ip_csum = 0; + out_ip->ip_csum = csum(out_ip, sizeof *out_ip); + + pin->packet = dp_packet_data(&pkt_out); + pin->packet_len = dp_packet_size(&pkt_out); + + success = 1; +exit: + if (!ofperr) { + union mf_subvalue sv; + sv.u8_val = success; + mf_write_subfield(&dst, &sv, &pin->flow_metadata); + } + queue_msg(ofputil_encode_resume(pin, continuation, proto)); + dp_packet_uninit(pkt_out_ptr); +} + +static void +process_packet_in(const struct ofp_header *msg, struct controller_ctx *ctx) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -721,6 +965,10 @@ process_packet_in(const struct ofp_header *msg) &continuation); break; + case ACTION_OPCODE_DNS_LOOKUP: + pinctrl_handle_dns_lookup(&packet, &pin, &userdata, &continuation, ctx); + break; + default: VLOG_WARN_RL(&rl, "unrecognized packet-in opcode %"PRIu32, ntohl(ah->opcode)); @@ -729,7 +977,8 @@ process_packet_in(const struct ofp_header *msg) } static void -pinctrl_recv(const struct ofp_header *oh, enum ofptype type) +pinctrl_recv(const struct ofp_header *oh, enum ofptype type, + struct controller_ctx *ctx) { if (type == OFPTYPE_ECHO_REQUEST) { queue_msg(make_echo_reply(oh)); @@ -741,7 +990,7 @@ pinctrl_recv(const struct ofp_header *oh, enum ofptype type) config.miss_send_len = UINT16_MAX; set_switch_config(swconn, &config); } else if (type == OFPTYPE_PACKET_IN) { - process_packet_in(oh); + process_packet_in(oh, ctx); } else if (type != OFPTYPE_ECHO_REPLY && type != OFPTYPE_BARRIER_REPLY) { if (VLOG_IS_DBG_ENABLED()) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); @@ -787,7 +1036,7 @@ pinctrl_run(struct controller_ctx *ctx, const struct lport_index *lports, enum ofptype type; ofptype_decode(&type, oh); - pinctrl_recv(oh, type); + pinctrl_recv(oh, type, ctx); ofpbuf_delete(msg); } } diff --git a/ovn/lib/actions.c b/ovn/lib/actions.c index fff838b74ad..71f49c13fcc 100644 --- a/ovn/lib/actions.c +++ b/ovn/lib/actions.c @@ -1708,6 +1708,55 @@ static void ovnact_set_queue_free(struct ovnact_set_queue *a OVS_UNUSED) { } + +static void +parse_dns_lookup(struct action_context *ctx, const struct expr_field *dst, + struct ovnact_dns_lookup *dl) +{ + lexer_get(ctx->lexer); /* Skip dns_lookup. */ + lexer_get(ctx->lexer); /* Skip '('. */ + if (!lexer_match(ctx->lexer, LEX_T_RPAREN)) { + lexer_error(ctx->lexer, "dns_lookup doesn't take any parameters"); + return; + } + /* Validate that the destination is a 1-bit, modifiable field. */ + char *error = expr_type_check(dst, 1, true); + if (error) { + lexer_error(ctx->lexer, "%s", error); + free(error); + return; + } + dl->dst = *dst; + add_prerequisite(ctx, "udp"); +} + +static void +format_DNS_LOOKUP(const struct ovnact_dns_lookup *dl, struct ds *s) +{ + expr_field_format(&dl->dst, s); + ds_put_cstr(s, " = dns_lookup();"); +} + +static void +encode_DNS_LOOKUP(const struct ovnact_dns_lookup *dl, + const struct ovnact_encode_params *ep OVS_UNUSED, + struct ofpbuf *ofpacts) +{ + struct mf_subfield dst = expr_resolve_field(&dl->dst); + + size_t oc_offset = encode_start_controller_op(ACTION_OPCODE_DNS_LOOKUP, + true, ofpacts); + nx_put_header(ofpacts, dst.field->id, OFP13_VERSION, false); + ovs_be32 ofs = htonl(dst.ofs); + ofpbuf_put(ofpacts, &ofs, sizeof ofs); + encode_finish_controller_op(oc_offset, ofpacts); +} + + +static void +ovnact_dns_lookup_free(struct ovnact_dns_lookup *dl OVS_UNUSED) +{ +} /* Parses an assignment or exchange or put_dhcp_opts action. */ static void @@ -1731,6 +1780,9 @@ parse_set_action(struct action_context *ctx) && lexer_lookahead(ctx->lexer) == LEX_T_LPAREN) { parse_put_dhcp_opts(ctx, &lhs, ovnact_put_PUT_DHCPV6_OPTS( ctx->ovnacts)); + } else if (!strcmp(ctx->lexer->token.s, "dns_lookup") + && lexer_lookahead(ctx->lexer) == LEX_T_LPAREN) { + parse_dns_lookup(ctx, &lhs, ovnact_put_DNS_LOOKUP(ctx->ovnacts)); } else { parse_assignment_action(ctx, false, &lhs); } diff --git a/ovn/ovn-sb.ovsschema b/ovn/ovn-sb.ovsschema index a576dc4f3bf..8a39e61051d 100644 --- a/ovn/ovn-sb.ovsschema +++ b/ovn/ovn-sb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Southbound", - "version": "1.10.0", - "cksum": "860871483 9898", + "version": "1.11.0", + "cksum": "2783500150 10696", "tables": { "SB_Global": { "columns": { @@ -201,4 +201,19 @@ "value": "string", "min": 0, "max": "unlimited"}}}, - "maxRows": 1}}} + "maxRows": 1}, + "DNS": { + "columns": { + "records": {"type": {"key": "string", + "value": "string", + "min": 0, + "max": "unlimited"}}, + "datapaths": {"type": {"key": {"type": "uuid", + "refTable": "Datapath_Binding"}, + "min": 1, + "max": "unlimited"}}, + "external_ids": {"type": {"key": "string", + "value": "string", + "min": 0, + "max": "unlimited"}}}, + "isRoot": true}}} diff --git a/ovn/ovn-sb.xml b/ovn/ovn-sb.xml index 5542f7ee2f1..b92c2d06560 100644 --- a/ovn/ovn-sb.xml +++ b/ovn/ovn-sb.xml @@ -311,7 +311,7 @@ transmitted and received with reasonable performance. It is a hint to senders transmitting data to this chassis that they should use checksums to protect OVN metadata. ovn-controller - populates this key with the value defined in + populates this key with the value defined in column of the Open_vSwitch database's table. Other applications should treat this key as @@ -1465,6 +1465,53 @@ packet in that connection.

+ +
+ R = dns_lookup(); +
+ +
+

+ Parameters: No parameters. +

+ +

+ Result: stored to a 1-bit subfield R. +

+ +

+ Valid only in the ingress pipeline. +

+ +

+ When this action is applied to a valid DNS request (a UDP packet + typically directed to port 53), it attempts to resolve the query + using the contents of the table. If it is + successful, it changes the packet into a DNS reply and stores 1 in + R. If the action is applied to a non-DNS packet, an + invalid DNS request packet, or a valid DNS request for which the + table does not supply an answer, it leaves the + packet unchanged and stores 0 in R. +

+ +

+ Regardless of success, the action does not make any of the changes + to the flow that are necessary to direct the packet back to the + requester. The logical pipeline can implement this behavior with + matches and actions in later tables. +

+ +

+ Example: + + reg0[3] = dns_lookup(); + +

+ +

+ Prerequisite: udp +

+

@@ -2686,4 +2733,30 @@ tcp.flags = RST; + +

+ Each row in this table stores the DNS records. The OVN action + dns_lookup uses this table for DNS resolution. +

+ + + Key-value pair of DNS records with DNS query name as the key + and a string of IP address(es) separated by comma or space as the + value. + +

Example: "vm1.ovn.org" = "10.0.0.4 aef0::4"

+
+ + + The DNS records defined in the column will be + applied only to the DNS queries originating from the datapaths defined + in this column. + + + + + See External IDs at the beginning of this document. + + +
diff --git a/ovn/utilities/ovn-sbctl.c b/ovn/utilities/ovn-sbctl.c index ac292f31608..2eeb1114e34 100644 --- a/ovn/utilities/ovn-sbctl.c +++ b/ovn/utilities/ovn-sbctl.c @@ -1068,6 +1068,9 @@ static const struct ctl_table_class tables[SBREC_N_TABLES] = { [SBREC_TABLE_SSL].row_ids[0] = {&sbrec_table_sb_global, NULL, &sbrec_sb_global_col_ssl}, + + [SBREC_TABLE_DNS].row_ids[0] = + {&sbrec_table_dns, NULL, &sbrec_dns_col_records}, }; diff --git a/ovn/utilities/ovn-trace.c b/ovn/utilities/ovn-trace.c index e9463f02a70..5bb29e6c599 100644 --- a/ovn/utilities/ovn-trace.c +++ b/ovn/utilities/ovn-trace.c @@ -1475,6 +1475,18 @@ execute_next(const struct ovnact_next *next, trace__(dp, uflow, next->ltable, next->pipeline, super); } + +static void +execute_dns_lookup(const struct ovnact_dns_lookup *dl, struct flow *uflow, + struct ovs_list *super) +{ + struct mf_subfield sf = expr_resolve_field(&dl->dst); + union mf_subvalue sv = { .u8_val = 0 }; + mf_write_subfield_flow(&sf, &sv, uflow); + ovntrace_node_append(super, OVNTRACE_NODE_ERROR, + "*** dns_lookup action not implemented"); +} + static void execute_ct_next(const struct ovnact_ct_next *ct_next, const struct ovntrace_datapath *dp, struct flow *uflow, @@ -1637,6 +1649,10 @@ trace_actions(const struct ovnact *ovnacts, size_t ovnacts_len, * though, it would be easy enough to track the queue information * by adjusting uflow->skb_priority. */ break; + + case OVNACT_DNS_LOOKUP: + execute_dns_lookup(ovnact_get_DNS_LOOKUP(a), uflow, super); + break; } } diff --git a/tests/ovn.at b/tests/ovn.at index 088bbf6c012..bace273b77c 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -1027,6 +1027,13 @@ set_queue(61440); set_queue(65535); Queue ID 65535 for set_queue is not in valid range 0 to 61440. +# dns_lookup +reg1[0] = dns_lookup(); + encodes as controller(userdata=00.00.00.06.00.00.00.00.00.01.de.10.00.00.00.40,pause) + has prereqs udp +reg1[0] = dns_lookup("foo"); + dns_lookup doesn't take any parameters + # Contradictionary prerequisites (allowed but not useful): ip4.src = ip6.src[0..31]; encodes as move:NXM_NX_IPV6_SRC[0..31]->NXM_OF_IP_SRC[]