Skip to content

Commit

Permalink
Merge branch 'master' of git://blackhole.kfki.hu/nf-next
Browse files Browse the repository at this point in the history
Jozsef Kadlecsik says:

====================
ipset patches for nf-next

Please consider to apply the next bunch of patches for ipset. First
comes the small changes, then the bugfixes and at the end the RCU
related patches.

* Use MSEC_PER_SEC consistently instead of the number.
* Use SET_WITH_*() helpers to test set extensions from Sergey Popovich.
* Check extensions attributes before getting extensions from Sergey Popovich.
* Permit CIDR equal to the host address CIDR in IPv6 from Sergey Popovich.
* Make sure we always return line number on batch in the case of error
  from Sergey Popovich.
* Check CIDR value only when attribute is given from Sergey Popovich.
* Fix cidr handling for hash:*net* types, reported by Jonathan Johnson.
* Fix parallel resizing and listing of the same set so that the original
  set is kept for the whole dumping.
* Make sure listing doesn't grab a set which is just being destroyed.
* Remove rbtree from ip_set_hash_netiface.c in order to introduce RCU.
* Replace rwlock_t with spinlock_t in "struct ip_set", change the locking
  in the core and simplifications in the timeout routines.
* Introduce RCU locking in bitmap:* types with a slight modification in the
  logic on how an element is added.
* Introduce RCU locking in hash:* types. This is the most complex part of
  the changes.
* Introduce RCU locking in list type where standard rculist is used.
* Fix coding styles reported by checkpatch.pl.
====================

Signed-off-by: Pablo Neira Ayuso <[email protected]>
  • Loading branch information
ummakynes committed Jun 15, 2015
2 parents f09becc + ca0f6a5 commit 53b8762
Show file tree
Hide file tree
Showing 25 changed files with 1,319 additions and 1,312 deletions.
29 changes: 17 additions & 12 deletions include/linux/netfilter/ipset/ip_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,13 @@ struct ip_set_counter {
atomic64_t packets;
};

struct ip_set_comment_rcu {
struct rcu_head rcu;
char str[0];
};

struct ip_set_comment {
char *str;
struct ip_set_comment_rcu __rcu *c;
};

struct ip_set_skbinfo {
Expand Down Expand Up @@ -176,6 +181,9 @@ struct ip_set_type_variant {
/* List elements */
int (*list)(const struct ip_set *set, struct sk_buff *skb,
struct netlink_callback *cb);
/* Keep listing private when resizing runs parallel */
void (*uref)(struct ip_set *set, struct netlink_callback *cb,
bool start);

/* Return true if "b" set is the same as "a"
* according to the create set parameters */
Expand Down Expand Up @@ -223,7 +231,7 @@ struct ip_set {
/* The name of the set */
char name[IPSET_MAXNAMELEN];
/* Lock protecting the set data */
rwlock_t lock;
spinlock_t lock;
/* References to the set */
u32 ref;
/* The core set type */
Expand Down Expand Up @@ -341,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
cpu_to_be64((u64)skbinfo->skbmark << 32 |
skbinfo->skbmarkmask))) ||
(skbinfo->skbprio &&
nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
cpu_to_be32(skbinfo->skbprio))) ||
(skbinfo->skbqueue &&
nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
cpu_to_be16(skbinfo->skbqueue)));

}

static inline void
Expand Down Expand Up @@ -380,12 +387,12 @@ ip_set_init_counter(struct ip_set_counter *counter,

/* Netlink CB args */
enum {
IPSET_CB_NET = 0,
IPSET_CB_DUMP,
IPSET_CB_INDEX,
IPSET_CB_ARG0,
IPSET_CB_NET = 0, /* net namespace */
IPSET_CB_DUMP, /* dump single set/all sets */
IPSET_CB_INDEX, /* set index */
IPSET_CB_PRIVATE, /* set private data */
IPSET_CB_ARG0, /* type specific */
IPSET_CB_ARG1,
IPSET_CB_ARG2,
};

/* register and unregister set references */
Expand Down Expand Up @@ -545,8 +552,6 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
{ .bytes = ULLONG_MAX, .packets = ULLONG_MAX, \
.timeout = (set)->timeout }

#define IP_SET_INIT_CIDR(a, b) ((a) ? (a) : (b))

#define IPSET_CONCAT(a, b) a##b
#define IPSET_TOKEN(a, b) IPSET_CONCAT(a, b)

Expand Down
38 changes: 27 additions & 11 deletions include/linux/netfilter/ipset/ip_set_comment.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,41 +16,57 @@ ip_set_comment_uget(struct nlattr *tb)
return nla_data(tb);
}

/* Called from uadd only, protected by the set spinlock.
* The kadt functions don't use the comment extensions in any way.
*/
static inline void
ip_set_init_comment(struct ip_set_comment *comment,
const struct ip_set_ext *ext)
{
struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
size_t len = ext->comment ? strlen(ext->comment) : 0;

if (unlikely(comment->str)) {
kfree(comment->str);
comment->str = NULL;
if (unlikely(c)) {
kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
}
if (!len)
return;
if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
len = IPSET_MAX_COMMENT_SIZE;
comment->str = kzalloc(len + 1, GFP_ATOMIC);
if (unlikely(!comment->str))
c = kzalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
if (unlikely(!c))
return;
strlcpy(comment->str, ext->comment, len + 1);
strlcpy(c->str, ext->comment, len + 1);
rcu_assign_pointer(comment->c, c);
}

/* Used only when dumping a set, protected by rcu_read_lock_bh() */
static inline int
ip_set_put_comment(struct sk_buff *skb, struct ip_set_comment *comment)
{
if (!comment->str)
struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);

if (!c)
return 0;
return nla_put_string(skb, IPSET_ATTR_COMMENT, comment->str);
return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
}

/* Called from uadd/udel, flush or the garbage collectors protected
* by the set spinlock.
* Called when the set is destroyed and when there can't be any user
* of the set data anymore.
*/
static inline void
ip_set_comment_free(struct ip_set_comment *comment)
{
if (unlikely(!comment->str))
struct ip_set_comment_rcu *c;

c = rcu_dereference_protected(comment->c, 1);
if (unlikely(!c))
return;
kfree(comment->str);
comment->str = NULL;
kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
}

#endif
Expand Down
27 changes: 11 additions & 16 deletions include/linux/netfilter/ipset/ip_set_timeout.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,38 +40,33 @@ ip_set_timeout_uget(struct nlattr *tb)
}

static inline bool
ip_set_timeout_test(unsigned long timeout)
ip_set_timeout_expired(unsigned long *t)
{
return timeout == IPSET_ELEM_PERMANENT ||
time_is_after_jiffies(timeout);
}

static inline bool
ip_set_timeout_expired(unsigned long *timeout)
{
return *timeout != IPSET_ELEM_PERMANENT &&
time_is_before_jiffies(*timeout);
return *t != IPSET_ELEM_PERMANENT && time_is_before_jiffies(*t);
}

static inline void
ip_set_timeout_set(unsigned long *timeout, u32 t)
ip_set_timeout_set(unsigned long *timeout, u32 value)
{
if (!t) {
unsigned long t;

if (!value) {
*timeout = IPSET_ELEM_PERMANENT;
return;
}

*timeout = msecs_to_jiffies(t * 1000) + jiffies;
if (*timeout == IPSET_ELEM_PERMANENT)
t = msecs_to_jiffies(value * MSEC_PER_SEC) + jiffies;
if (t == IPSET_ELEM_PERMANENT)
/* Bingo! :-) */
(*timeout)--;
t--;
*timeout = t;
}

static inline u32
ip_set_timeout_get(unsigned long *timeout)
{
return *timeout == IPSET_ELEM_PERMANENT ? 0 :
jiffies_to_msecs(*timeout - jiffies)/1000;
jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
}

#endif /* __KERNEL__ */
Expand Down
6 changes: 3 additions & 3 deletions include/uapi/linux/netfilter/ipset/ip_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
/* The protocol version */
#define IPSET_PROTOCOL 6

/* The maximum permissible comment length we will accept over netlink */
#define IPSET_MAX_COMMENT_SIZE 255

/* The max length of strings including NUL: set and type identifiers */
#define IPSET_MAXNAMELEN 32

/* The maximum permissible comment length we will accept over netlink */
#define IPSET_MAX_COMMENT_SIZE 255

/* Message types and commands */
enum ipset_cmd {
IPSET_CMD_NONE,
Expand Down
44 changes: 29 additions & 15 deletions net/netfilter/ipset/ip_set_bitmap_gen.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
struct mtype *map = set->data;

init_timer(&map->gc);
map->gc.data = (unsigned long) set;
map->gc.data = (unsigned long)set;
map->gc.function = gc;
map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
Expand Down Expand Up @@ -144,10 +144,12 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,

if (ret == IPSET_ADD_FAILED) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(x, set)))
ip_set_timeout_expired(ext_timeout(x, set))) {
ret = 0;
else if (!(flags & IPSET_FLAG_EXIST))
} else if (!(flags & IPSET_FLAG_EXIST)) {
set_bit(e->id, map->members);
return -IPSET_ERR_EXIST;
}
/* Element is re-added, cleanup extensions */
ip_set_ext_destroy(set, x);
}
Expand All @@ -165,6 +167,10 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_init_comment(ext_comment(x, set), ext);
if (SET_WITH_SKBINFO(set))
ip_set_init_skbinfo(ext_skbinfo(x, set), ext);

/* Activate element */
set_bit(e->id, map->members);

return 0;
}

Expand Down Expand Up @@ -203,33 +209,38 @@ mtype_list(const struct ip_set *set,
struct nlattr *adt, *nested;
void *x;
u32 id, first = cb->args[IPSET_CB_ARG0];
int ret = 0;

adt = ipset_nest_start(skb, IPSET_ATTR_ADT);
if (!adt)
return -EMSGSIZE;
/* Extensions may be replaced */
rcu_read_lock();
for (; cb->args[IPSET_CB_ARG0] < map->elements;
cb->args[IPSET_CB_ARG0]++) {
id = cb->args[IPSET_CB_ARG0];
x = get_ext(set, map, id);
if (!test_bit(id, map->members) ||
(SET_WITH_TIMEOUT(set) &&
#ifdef IP_SET_BITMAP_STORED_TIMEOUT
mtype_is_filled((const struct mtype_elem *) x) &&
mtype_is_filled((const struct mtype_elem *)x) &&
#endif
ip_set_timeout_expired(ext_timeout(x, set))))
continue;
nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
if (!nested) {
if (id == first) {
nla_nest_cancel(skb, adt);
return -EMSGSIZE;
} else
goto nla_put_failure;
ret = -EMSGSIZE;
goto out;
}

goto nla_put_failure;
}
if (mtype_do_list(skb, map, id, set->dsize))
goto nla_put_failure;
if (ip_set_put_extensions(skb, set, x,
mtype_is_filled((const struct mtype_elem *) x)))
mtype_is_filled((const struct mtype_elem *)x)))
goto nla_put_failure;
ipset_nest_end(skb, nested);
}
Expand All @@ -238,29 +249,32 @@ mtype_list(const struct ip_set *set,
/* Set listing finished */
cb->args[IPSET_CB_ARG0] = 0;

return 0;
goto out;

nla_put_failure:
nla_nest_cancel(skb, nested);
if (unlikely(id == first)) {
cb->args[IPSET_CB_ARG0] = 0;
return -EMSGSIZE;
ret = -EMSGSIZE;
}
ipset_nest_end(skb, adt);
return 0;
out:
rcu_read_unlock();
return ret;
}

static void
mtype_gc(unsigned long ul_set)
{
struct ip_set *set = (struct ip_set *) ul_set;
struct ip_set *set = (struct ip_set *)ul_set;
struct mtype *map = set->data;
void *x;
u32 id;

/* We run parallel with other readers (test element)
* but adding/deleting new entries is locked out */
read_lock_bh(&set->lock);
* but adding/deleting new entries is locked out
*/
spin_lock_bh(&set->lock);
for (id = 0; id < map->elements; id++)
if (mtype_gc_test(id, map, set->dsize)) {
x = get_ext(set, map, id);
Expand All @@ -269,7 +283,7 @@ mtype_gc(unsigned long ul_set)
ip_set_ext_destroy(set, x);
}
}
read_unlock_bh(&set->lock);
spin_unlock_bh(&set->lock);

map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
add_timer(&map->gc);
Expand Down
Loading

0 comments on commit 53b8762

Please sign in to comment.