Skip to content

Commit

Permalink
Better performance for slicing factors and ordered factors.
Browse files Browse the repository at this point in the history
  • Loading branch information
romainfrancois committed Jul 25, 2019
1 parent ef1c4b9 commit ec09492
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 1 deletion.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# dplyr (development version)

* Better performance for extracting slices of factors and ordered factors (#4501).

* `group_by()` does not create an arbitrary NA group when grouping by factors with `drop = TRUE` (#4460).

* `rbind_all()` and `rbind_list()` have been removed (@bjungbogati, #4433).
Expand Down
3 changes: 3 additions & 0 deletions inst/include/dplyr/symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,13 @@ struct strings {
static SEXP POSIXct;
static SEXP POSIXt;
static SEXP Date;
static SEXP factor;
static SEXP ordered;
};

struct vectors {
static SEXP factor;
static SEXP ordered;
};

} // namespace dplyr
Expand Down
19 changes: 19 additions & 0 deletions inst/include/dplyr/visitors/subset/column_subset.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,18 @@ inline bool is_trivial_Date(SEXP x, SEXP klass) {
return TYPEOF(x) == REALSXP && TYPEOF(klass) == STRSXP && Rf_length(klass) == 1 && STRING_ELT(klass, 0) == strings::Date;
}

inline bool is_bare_factor_class(SEXP kls) {
return TYPEOF(kls) == STRSXP &&
(
(Rf_xlength(kls) == 1 && STRING_ELT(kls, 0) == strings::factor) ||
(Rf_xlength(kls) == 2 && STRING_ELT(kls, 0) == strings::ordered && STRING_ELT(kls, 1) == strings::factor)
);
}

inline bool is_bare_factor(SEXP x) {
return Rf_isFactor(x) && is_bare_factor_class(Rf_getAttrib(x, R_ClassSymbol));
}

template <typename Index>
SEXP column_subset(SEXP x, const Index& index, SEXP frame) {
if (Rf_inherits(x, "data.frame")) {
Expand Down Expand Up @@ -174,6 +186,13 @@ SEXP column_subset(SEXP x, const Index& index, SEXP frame) {
}
}

// use fast slicing for factors
if (is_bare_factor(x)) {
Rcpp::Shield<SEXP> out(column_subset_impl<INTSXP, Index>(x, index));
copy_most_attributes(out, x);
return out;
}

// special case POSIXct (#3988)
if (is_trivial_POSIXct(x, klass)) {
return column_subset_impl<REALSXP, Index>(x, index);
Expand Down
14 changes: 13 additions & 1 deletion src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ SEXP get_factor_classes() {
return klasses;
}

SEXP get_ordered_classes() {
static Rcpp::CharacterVector klasses(2);
klasses[0] = "ordered";
klasses[1] = "factor";
return klasses;
}


SEXP mark_precious(SEXP x) {
R_PreserveObject(x);
return x;
Expand Down Expand Up @@ -104,9 +112,13 @@ SEXP symbols::names = R_NamesSymbol;
SEXP symbols::formula = Rf_install("formula");
SEXP fns::quote = Rf_eval(Rf_install("quote"), R_BaseEnv);

SEXP vectors::factor = get_factor_classes();
SEXP vectors::ordered = get_ordered_classes();

SEXP strings::POSIXct = STRING_ELT(get_time_classes(), 0);
SEXP strings::POSIXt = STRING_ELT(get_time_classes(), 1);
SEXP strings::Date = STRING_ELT(get_date_classes(), 0);
SEXP strings::factor = STRING_ELT(vectors::factor, 0);
SEXP strings::ordered = STRING_ELT(vectors::ordered, 0);

SEXP vectors::factor = get_factor_classes();
}

0 comments on commit ec09492

Please sign in to comment.