Skip to content

Commit

Permalink
Added the SRANDMEMBER key <count> variant.
Browse files Browse the repository at this point in the history
SRANDMEMBER called with just the key argument can just return a single
random element from a Redis Set. However many users need to return
multiple unique elements from a Set, this is not a trivial problem to
handle in the client side, and for truly good performance a C
implementation was required.

After many requests for this feature it was finally implemented.

The problem implementing this command is the strategy to follow when
the number of elements the user asks for is near to the number of
elements that are already inside the set. In this case asking random
elements to the dictionary API, and trying to add it to a temporary set,
may result into an extremely poor performance, as most add operations
will be wasted on duplicated elements.

For this reason this implementation uses a different strategy in this
case: the Set is copied, and random elements are returned to reach the
specified count.

The code actually uses 4 different algorithms optimized for the
different cases.

If the count is negative, the command changes behavior and allows for
duplicated elements in the returned subset.
  • Loading branch information
antirez committed Sep 21, 2012
1 parent d310fbe commit be90c80
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/redis.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ struct redisCommand redisCommandTable[] = {
{"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0},
{"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0},
{"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0},
{"srandmember",srandmemberCommand,2,"rR",0,NULL,1,1,1,0,0},
{"srandmember",srandmemberCommand,-2,"rR",0,NULL,1,1,1,0,0},
{"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
{"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
{"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},
Expand Down
154 changes: 154 additions & 0 deletions src/t_set.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* Set Commands
*----------------------------------------------------------------------------*/

void sunionDiffGenericCommand(redisClient *c, robj **setkeys, int setnum, robj *dstkey, int op);

/* Factory method to return a set that *can* hold "value". When the object has
* an integer-encodable value, an intset will be returned. Otherwise a regular
* hash table. */
Expand Down Expand Up @@ -360,11 +362,163 @@ void spopCommand(redisClient *c) {
server.dirty++;
}

/* handle the "SRANDMEMBER key <count>" variant. The normal version of the
* command is handled by the srandmemberCommand() function itself. */

/* How many times bigger should be the set compared to the requested size
* for us to don't use the "remove elements" strategy? Read later in the
* implementation for more info. */
#define SRANDMEMBER_SUB_STRATEGY_MUL 3

void srandmemberWithCountCommand(redisClient *c) {
long l;
unsigned long count, size;
int uniq = 1;
robj *set, *ele;
int64_t llele;
int encoding;

dict *d;

if (getLongFromObjectOrReply(c,c->argv[2],&l,NULL) != REDIS_OK) return;
if (l >= 0) {
count = (unsigned) l;
} else {
/* A negative count means: return the same elements multiple times
* (i.e. don't remove the extracted element after every extraction). */
count = -l;
uniq = 0;
}

if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.emptymultibulk))
== NULL || checkType(c,set,REDIS_SET)) return;
size = setTypeSize(set);

/* If count is zero, serve it ASAP to avoid special cases later. */
if (count == 0) {
addReply(c,shared.emptymultibulk);
return;
}

/* CASE 1: The count was negative, so the extraction method is just:
* "return N random elements" sampling the whole set every time.
* This case is trivial and can be served without auxiliary data
* structures. */
if (!uniq) {
addReplyMultiBulkLen(c,count);
while(count--) {
encoding = setTypeRandomElement(set,&ele,&llele);
if (encoding == REDIS_ENCODING_INTSET) {
addReplyBulkLongLong(c,llele);
} else {
addReplyBulk(c,ele);
}
}
return;
}

/* CASE 2:
* The number of requested elements is greater than the number of
* elements inside the set: simply return the whole set. */
if (count >= size) {
sunionDiffGenericCommand(c,c->argv,c->argc-1,NULL,REDIS_OP_UNION);
return;
}

/* For CASE 3 and CASE 4 we need an auxiliary dictionary. */
d = dictCreate(&setDictType,NULL);

/* CASE 3:
* The number of elements inside the set is not greater than
* SRANDMEMBER_SUB_STRATEGY_MUL times the number of requested elements.
* In this case we create a set from scratch with all the elements, and
* subtract random elements to reach the requested number of elements.
*
* This is done because if the number of requsted elements is just
* a bit less than the number of elements in the set, the natural approach
* used into CASE 3 is highly inefficient. */
if (count*SRANDMEMBER_SUB_STRATEGY_MUL > size) {
setTypeIterator *si;

/* Add all the elements into the temporary dictionary. */
si = setTypeInitIterator(set);
while((encoding = setTypeNext(si,&ele,&llele)) != -1) {
int retval;

if (encoding == REDIS_ENCODING_INTSET) {
retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
} else if (ele->encoding == REDIS_ENCODING_RAW) {
retval = dictAdd(d,dupStringObject(ele),NULL);
} else if (ele->encoding == REDIS_ENCODING_INT) {
retval = dictAdd(d,
createStringObjectFromLongLong((long)ele->ptr),NULL);
}
redisAssert(retval == DICT_OK);
}
setTypeReleaseIterator(si);
redisAssert(dictSize(d) == size);

/* Remove random elements to reach the right count. */
while(size > count) {
dictEntry *de;

de = dictGetRandomKey(d);
dictDelete(d,dictGetKey(de));
size--;
}
}

/* CASE 4: We have a big set compared to the requested number of elements.
* In this case we can simply get random elements from the set and add
* to the temporary set, trying to eventually get enough unique elements
* to reach the specified count. */
else {
unsigned long added = 0;

while(added < count) {
int retval;

encoding = setTypeRandomElement(set,&ele,&llele);
if (encoding == REDIS_ENCODING_INTSET) {
retval = dictAdd(d,createStringObjectFromLongLong(llele),NULL);
} else if (ele->encoding == REDIS_ENCODING_RAW) {
retval = dictAdd(d,dupStringObject(ele),NULL);
} else if (ele->encoding == REDIS_ENCODING_INT) {
retval = dictAdd(d,
createStringObjectFromLongLong((long)ele->ptr),NULL);
}

if (retval == DICT_OK) added++;
}
}

/* CASE 3 & 4: send the result to the user. */
{
dictIterator *di;
dictEntry *de;

addReplyMultiBulkLen(c,count);
di = dictGetIterator(d);
while((de = dictNext(di)) != NULL)
addReplyBulk(c,dictGetKey(de));
dictReleaseIterator(di);
dictRelease(d);
}
}

void srandmemberCommand(redisClient *c) {
robj *set, *ele;
int64_t llele;
int encoding;

if (c->argc == 3) {
srandmemberWithCountCommand(c);
return;
} else if (c->argc > 3) {
addReply(c,shared.syntaxerr);
return;
}

if ((set = lookupKeyReadOrReply(c,c->argv[1],shared.nullbulk)) == NULL ||
checkType(c,set,REDIS_SET)) return;

Expand Down

0 comments on commit be90c80

Please sign in to comment.