Skip to content

Commit

Permalink
net: ioctl: Use kernel memory on protocol ioctl callbacks
Browse files Browse the repository at this point in the history
Most of the ioctls to net protocols operates directly on userspace
argument (arg). Usually doing get_user()/put_user() directly in the
ioctl callback.  This is not flexible, because it is hard to reuse these
functions without passing userspace buffers.

Change the "struct proto" ioctls to avoid touching userspace memory and
operate on kernel buffers, i.e., all protocol's ioctl callbacks is
adapted to operate on a kernel memory other than on userspace (so, no
more {put,get}_user() and friends being called in the ioctl callback).

This changes the "struct proto" ioctl format in the following way:

    int                     (*ioctl)(struct sock *sk, int cmd,
-                                        unsigned long arg);
+                                        int *karg);

(Important to say that this patch does not touch the "struct proto_ops"
protocols)

So, the "karg" argument, which is passed to the ioctl callback, is a
pointer allocated to kernel space memory (inside a function wrapper).
This buffer (karg) may contain input argument (copied from userspace in
a prep function) and it might return a value/buffer, which is copied
back to userspace if necessary. There is not one-size-fits-all format
(that is I am using 'may' above), but basically, there are three type of
ioctls:

1) Do not read from userspace, returns a result to userspace
2) Read an input parameter from userspace, and does not return anything
  to userspace
3) Read an input from userspace, and return a buffer to userspace.

The default case (1) (where no input parameter is given, and an "int" is
returned to userspace) encompasses more than 90% of the cases, but there
are two other exceptions. Here is a list of exceptions:

* Protocol RAW:
   * cmd = SIOCGETVIFCNT:
     * input and output = struct sioc_vif_req
   * cmd = SIOCGETSGCNT
     * input and output = struct sioc_sg_req
   * Explanation: for the SIOCGETVIFCNT case, userspace passes the input
     argument, which is struct sioc_vif_req. Then the callback populates
     the struct, which is copied back to userspace.

* Protocol RAW6:
   * cmd = SIOCGETMIFCNT_IN6
     * input and output = struct sioc_mif_req6
   * cmd = SIOCGETSGCNT_IN6
     * input and output = struct sioc_sg_req6

* Protocol PHONET:
  * cmd == SIOCPNADDRESOURCE | SIOCPNDELRESOURCE
     * input int (4 bytes)
  * Nothing is copied back to userspace.

For the exception cases, functions sock_sk_ioctl_inout() will
copy the userspace input, and copy it back to kernel space.

The wrapper that prepare the buffer and put the buffer back to user is
sk_ioctl(), so, instead of calling sk->sk_prot->ioctl(), the callee now
calls sk_ioctl(), which will handle all cases.

Signed-off-by: Breno Leitao <[email protected]>
Reviewed-by: Willem de Bruijn <[email protected]>
Reviewed-by: David Ahern <[email protected]>
Reviewed-by: Kuniyuki Iwashima <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
leitao authored and kuba-moo committed Jun 16, 2023
1 parent 173780f commit e1d001f
Show file tree
Hide file tree
Showing 26 changed files with 267 additions and 129 deletions.
6 changes: 6 additions & 0 deletions include/linux/icmpv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,10 @@ static inline bool icmpv6_is_err(int type)
return false;
}

static inline int sk_is_icmpv6(struct sock *sk)
{
return sk->sk_family == AF_INET6 &&
inet_sk(sk)->inet_num == IPPROTO_ICMPV6;
}

#endif
22 changes: 20 additions & 2 deletions include/linux/mroute.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,19 @@ static inline int ip_mroute_opt(int opt)
return opt >= MRT_BASE && opt <= MRT_MAX;
}

static inline int sk_is_ipmr(struct sock *sk)
{
return sk->sk_family == AF_INET &&
inet_sk(sk)->inet_num == IPPROTO_IGMP;
}

int ip_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
int ip_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
int ipmr_ioctl(struct sock *sk, int cmd, void *arg);
int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
int ip_mr_init(void);
bool ipmr_rule_default(const struct fib_rule *rule);
int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
#else
static inline int ip_mroute_setsockopt(struct sock *sock, int optname,
sockptr_t optval, unsigned int optlen)
Expand All @@ -35,7 +42,7 @@ static inline int ip_mroute_getsockopt(struct sock *sk, int optname,
return -ENOPROTOOPT;
}

static inline int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
static inline int ipmr_ioctl(struct sock *sk, int cmd, void *arg)
{
return -ENOIOCTLCMD;
}
Expand All @@ -50,10 +57,21 @@ static inline int ip_mroute_opt(int opt)
return 0;
}

static inline int sk_is_ipmr(struct sock *sk)
{
return 0;
}

static inline bool ipmr_rule_default(const struct fib_rule *rule)
{
return true;
}

static inline int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd,
void __user *arg)
{
return 1;
}
#endif

#define VIFF_STATIC 0x8000
Expand Down
31 changes: 29 additions & 2 deletions include/linux/mroute6.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ struct sock;
extern int ip6_mroute_setsockopt(struct sock *, int, sockptr_t, unsigned int);
extern int ip6_mroute_getsockopt(struct sock *, int, sockptr_t, sockptr_t);
extern int ip6_mr_input(struct sk_buff *skb);
extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg);
extern int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
extern int ip6_mr_init(void);
extern void ip6_mr_cleanup(void);
int ip6mr_ioctl(struct sock *sk, int cmd, void *arg);
#else
static inline int ip6_mroute_setsockopt(struct sock *sock, int optname,
sockptr_t optval, unsigned int optlen)
Expand All @@ -48,7 +48,7 @@ int ip6_mroute_getsockopt(struct sock *sock,
}

static inline
int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
{
return -ENOIOCTLCMD;
}
Expand Down Expand Up @@ -100,6 +100,27 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
#ifdef CONFIG_IPV6_MROUTE
bool mroute6_is_socket(struct net *net, struct sk_buff *skb);
extern int ip6mr_sk_done(struct sock *sk);
static inline int ip6mr_sk_ioctl(struct sock *sk, unsigned int cmd,
void __user *arg)
{
switch (cmd) {
/* These userspace buffers will be consumed by ip6mr_ioctl() */
case SIOCGETMIFCNT_IN6: {
struct sioc_mif_req6 buffer;

return sock_ioctl_inout(sk, cmd, arg, &buffer,
sizeof(buffer));
}
case SIOCGETSGCNT_IN6: {
struct sioc_mif_req6 buffer;

return sock_ioctl_inout(sk, cmd, arg, &buffer,
sizeof(buffer));
}
}

return 1;
}
#else
static inline bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
Expand All @@ -109,5 +130,11 @@ static inline int ip6mr_sk_done(struct sock *sk)
{
return 0;
}

static inline int ip6mr_sk_ioctl(struct sock *sk, unsigned int cmd,
void __user *arg)
{
return 1;
}
#endif
#endif
21 changes: 21 additions & 0 deletions include/net/phonet/phonet.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,25 @@ void phonet_sysctl_exit(void);
int isi_register(void);
void isi_unregister(void);

static inline bool sk_is_phonet(struct sock *sk)
{
return sk->sk_family == PF_PHONET;
}

static inline int phonet_sk_ioctl(struct sock *sk, unsigned int cmd,
void __user *arg)
{
int karg;

switch (cmd) {
case SIOCPNADDRESOURCE:
case SIOCPNDELRESOURCE:
if (get_user(karg, (int __user *)arg))
return -EFAULT;

return sk->sk_prot->ioctl(sk, cmd, &karg);
}
/* A positive return value means that the ioctl was not processed */
return 1;
}
#endif
5 changes: 4 additions & 1 deletion include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,7 @@ struct proto {
bool kern);

int (*ioctl)(struct sock *sk, int cmd,
unsigned long arg);
int *karg);
int (*init)(struct sock *sk);
void (*destroy)(struct sock *sk);
void (*shutdown)(struct sock *sk, int how);
Expand Down Expand Up @@ -2974,6 +2974,9 @@ int sock_get_timeout(long timeo, void *optval, bool old_timeval);
int sock_copy_user_timeval(struct __kernel_sock_timeval *tv,
sockptr_t optval, int optlen, bool old_timeval);

int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
void __user *arg, void *karg, size_t size);
int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
static inline bool sk_is_readable(struct sock *sk)
{
if (sk->sk_prot->sock_is_readable)
Expand Down
2 changes: 1 addition & 1 deletion include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ void tcp_release_cb(struct sock *sk);
void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
void tcp_delack_timer_handler(struct sock *sk);
int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int tcp_ioctl(struct sock *sk, int cmd, int *karg);
int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
void tcp_rcv_space_adjust(struct sock *sk);
Expand Down
2 changes: 1 addition & 1 deletion include/net/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ void udp_flush_pending_frames(struct sock *sk);
int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size);
void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst);
int udp_rcv(struct sk_buff *skb);
int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int udp_ioctl(struct sock *sk, int cmd, int *karg);
int udp_init_sock(struct sock *sk);
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int __udp_disconnect(struct sock *sk, int flags);
Expand Down
64 changes: 64 additions & 0 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@
#include <linux/memcontrol.h>
#include <linux/prefetch.h>
#include <linux/compat.h>
#include <linux/mroute.h>
#include <linux/mroute6.h>
#include <linux/icmpv6.h>

#include <linux/uaccess.h>

Expand All @@ -138,6 +141,7 @@

#include <net/tcp.h>
#include <net/busy_poll.h>
#include <net/phonet/phonet.h>

#include <linux/ethtool.h>

Expand Down Expand Up @@ -4150,3 +4154,63 @@ int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
return sk->sk_prot->bind_add(sk, addr, addr_len);
}
EXPORT_SYMBOL(sock_bind_add);

/* Copy 'size' bytes from userspace and return `size` back to userspace */
int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
void __user *arg, void *karg, size_t size)
{
int ret;

if (copy_from_user(karg, arg, size))
return -EFAULT;

ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
if (ret)
return ret;

if (copy_to_user(arg, karg, size))
return -EFAULT;

return 0;
}
EXPORT_SYMBOL(sock_ioctl_inout);

/* This is the most common ioctl prep function, where the result (4 bytes) is
* copied back to userspace if the ioctl() returns successfully. No input is
* copied from userspace as input argument.
*/
static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
{
int ret, karg = 0;

ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
if (ret)
return ret;

return put_user(karg, (int __user *)arg);
}

/* A wrapper around sock ioctls, which copies the data from userspace
* (depending on the protocol/ioctl), and copies back the result to userspace.
* The main motivation for this function is to pass kernel memory to the
* protocol ioctl callbacks, instead of userspace memory.
*/
int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
int rc = 1;

if (sk_is_ipmr(sk))
rc = ipmr_sk_ioctl(sk, cmd, arg);
else if (sk_is_icmpv6(sk))
rc = ip6mr_sk_ioctl(sk, cmd, arg);
else if (sk_is_phonet(sk))
rc = phonet_sk_ioctl(sk, cmd, arg);

/* If ioctl was processed, returns its value */
if (rc <= 0)
return rc;

/* Otherwise call the default handler */
return sock_ioctl_out(sk, cmd, arg);
}
EXPORT_SYMBOL(sk_ioctl);
2 changes: 1 addition & 1 deletion net/dccp/dccp.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
int dccp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int dccp_ioctl(struct sock *sk, int cmd, int *karg);
int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
int *addr_len);
Expand Down
12 changes: 6 additions & 6 deletions net/dccp/proto.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,

EXPORT_SYMBOL_GPL(dccp_poll);

int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
int dccp_ioctl(struct sock *sk, int cmd, int *karg)
{
int rc = -ENOTCONN;

Expand All @@ -373,27 +373,27 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)

switch (cmd) {
case SIOCOUTQ: {
int amount = sk_wmem_alloc_get(sk);
*karg = sk_wmem_alloc_get(sk);
/* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
* always 0, comparably to UDP.
*/

rc = put_user(amount, (int __user *)arg);
rc = 0;
}
break;
case SIOCINQ: {
struct sk_buff *skb;
unsigned long amount = 0;
*karg = 0;

skb = skb_peek(&sk->sk_receive_queue);
if (skb != NULL) {
/*
* We will only return the amount of this packet since
* that is all that will be read.
*/
amount = skb->len;
*karg = skb->len;
}
rc = put_user(amount, (int __user *)arg);
rc = 0;
}
break;
default:
Expand Down
15 changes: 7 additions & 8 deletions net/ieee802154/socket.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
default:
if (!sk->sk_prot->ioctl)
return -ENOIOCTLCMD;
return sk->sk_prot->ioctl(sk, cmd, arg);
return sk_ioctl(sk, cmd, (void __user *)arg);
}
}

Expand Down Expand Up @@ -531,33 +531,32 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
return err;
}

static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
static int dgram_ioctl(struct sock *sk, int cmd, int *karg)
{
switch (cmd) {
case SIOCOUTQ:
{
int amount = sk_wmem_alloc_get(sk);
*karg = sk_wmem_alloc_get(sk);

return put_user(amount, (int __user *)arg);
return 0;
}

case SIOCINQ:
{
struct sk_buff *skb;
unsigned long amount;

amount = 0;
*karg = 0;
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb) {
/* We will only return the amount
* of this packet since that is all
* that will be read.
*/
amount = skb->len - ieee802154_hdr_length(skb);
*karg = skb->len - ieee802154_hdr_length(skb);
}
spin_unlock_bh(&sk->sk_receive_queue.lock);
return put_user(amount, (int __user *)arg);
return 0;
}
}

Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/af_inet.c
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
default:
if (sk->sk_prot->ioctl)
err = sk->sk_prot->ioctl(sk, cmd, arg);
err = sk_ioctl(sk, cmd, (void __user *)arg);
else
err = -ENOIOCTLCMD;
break;
Expand Down
Loading

0 comments on commit e1d001f

Please sign in to comment.