Skip to content

Commit

Permalink
optimize append_any more (JuliaLang#31895)
Browse files Browse the repository at this point in the history
* inference: improve sizeof tfunc

Primitive types also always have a sizeof. This is particularly relevant for `Ptr`,
since `elsize` uses `sizeof(Ptr)` to compute the stride.

* apply: unify append_any implementations

Rather than optimize two copies of the same function,
we can just have one version of the function that handles everything.
  • Loading branch information
vtjnash authored May 6, 2019
2 parents 7ef9e75 + 5eece3f commit 9e9bcb8
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 143 deletions.
2 changes: 1 addition & 1 deletion base/compiler/abstractinterpretation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ function abstract_apply(@nospecialize(aft), aargtypes::Vector{Any}, vtypes::VarT
tail = tuple_tail_elem(unwrapva(ct[end]), cti)
push!(ctypes´, push!(ct[1:(end - 1)], tail))
else
push!(ctypes´, append_any(ct, cti))
push!(ctypes´, append!(ct[:], cti))
end
end
end
Expand Down
24 changes: 14 additions & 10 deletions base/compiler/tfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -302,33 +302,37 @@ function sizeof_nothrow(@nospecialize(x))
else
x = widenconst(x)
end
isconstType(x) && (x = x.parameters[1])
if isa(x, Union)
return sizeof_nothrow(x.a) && sizeof_nothrow(x.b)
end
isconstType(x) && (x = x.parameters[1]) # since sizeof(typeof(x)) == sizeof(x)
x === DataType && return false
return isconcretetype(x)
return isconcretetype(x) || isprimitivetype(x)
end
function _const_sizeof(@nospecialize(x))
# Constant Vector does not have constant size
isa(x, Vector) && return Int
size = try
Core.sizeof(x)
catch ex
# Might return
# "argument is an abstract type; size is indeterminate" or
# "type does not have a fixed size"
isa(ex, ErrorException) || rethrow()
return Int
end
Core.sizeof(x)
catch ex
# Might return
# "argument is an abstract type; size is indeterminate" or
# "type does not have a fixed size"
isa(ex, ErrorException) || rethrow()
return Int
end
return Const(size)
end
function sizeof_tfunc(@nospecialize(x),)
isa(x, Const) && return _const_sizeof(x.val)
isa(x, Conditional) && return _const_sizeof(Bool)
isconstType(x) && return _const_sizeof(x.parameters[1])
x = widenconst(x)
if isa(x, Union)
return tmerge(sizeof_tfunc(x.a), sizeof_tfunc(x.b))
end
x !== DataType && isconcretetype(x) && return _const_sizeof(x)
isprimitivetype(x) && return _const_sizeof(x)
return Int
end
add_tfunc(Core.sizeof, 1, 1, sizeof_tfunc, 0)
Expand Down
77 changes: 0 additions & 77 deletions base/essentials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -659,83 +659,6 @@ function isassigned(v::SimpleVector, i::Int)
end


# used by ... syntax to access the `iterate` function from inside the Core._apply implementation
# must be a separate function from append(), since Core._apply needs this exact function
function append_any(xs...)
@nospecialize
lx = length(xs)
l = 4
i = 1
out = Vector{Any}(undef, l)
for xi in 1:lx
x = @inbounds xs[xi]
# handle some common cases, where we know the length
# and can inline the iterator because the runtime
# has an optimized version of the iterator
if x isa SimpleVector
lx = length(x)
if i + lx - 1 > l
ladd = lx > 16 ? lx : 16
_growend!(out, ladd)
l += ladd
end
for j in 1:lx
y = @inbounds x[j]
arrayset(false, out, y, i)
i += 1
end
elseif x isa Tuple
lx = nfields(x)
if i + lx - 1 > l
ladd = lx > 16 ? lx : 16
_growend!(out, ladd)
l += ladd
end
for j in 1:lx
y = getfield(x, j, false)
arrayset(false, out, y, i)
i += 1
end
elseif x isa NamedTuple
lx = nfields(x)
if i + lx - 1 > l
ladd = lx > 16 ? lx : 16
_growend!(out, ladd)
l += ladd
end
for j in 1:lx
y = getfield(x, j, false)
arrayset(false, out, y, i)
i += 1
end
elseif x isa Array
lx = length(x)
if i + lx - 1 > l
ladd = lx > 16 ? lx : 16
_growend!(out, ladd)
l += ladd
end
for j in 1:lx
y = arrayref(false, x, j)
arrayset(false, out, y, i)
i += 1
end
else
for y in x
if i > l
_growend!(out, 16)
l += 16
end
arrayset(false, out, y, i)
i += 1
end
end
end
_deleteend!(out, l - i + 1)
return out
end


"""
Colon()
Expand Down
161 changes: 112 additions & 49 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,21 +445,49 @@ JL_CALLABLE(jl_f_ifelse)

// apply ----------------------------------------------------------------------

jl_function_t *jl_append_any_func;
static NOINLINE jl_svec_t *_copy_to(size_t newalloc, jl_value_t **oldargs, size_t oldalloc)
{
size_t j;
jl_svec_t *newheap = jl_alloc_svec_uninit(newalloc);
jl_value_t **newargs = jl_svec_data(newheap);
for (j = 0; j < oldalloc; j++)
newargs[j] = oldargs[j];
for (; j < newalloc; j++)
newargs[j] = NULL;
return newheap;
}

void STATIC_INLINE _grow_to(jl_value_t **root, jl_value_t ***oldargs, jl_svec_t **arg_heap, size_t *n_alloc, size_t newalloc, size_t extra)
{
size_t oldalloc = *n_alloc;
if (oldalloc >= newalloc)
return;
if (extra)
// grow by an extra 50% if newalloc is still only a guess
newalloc += oldalloc / 2 + 16;
jl_svec_t *newheap = _copy_to(newalloc, *oldargs, oldalloc);
*root = (jl_value_t*)newheap;
*arg_heap = newheap;
*oldargs = jl_svec_data(newheap);
*n_alloc = newalloc;
}

static jl_function_t *jl_iterate_func;

JL_CALLABLE(jl_f__apply)
{
JL_NARGSV(apply, 1);
jl_function_t *f = args[0];
if (nargs == 2) {
// some common simple cases
if (f == jl_builtin_svec) {
if (jl_is_svec(args[1]))
return args[1];
if (jl_is_array(args[1])) {
size_t n = jl_array_len(args[1]);
jl_svec_t *t = jl_alloc_svec(n);
JL_GC_PUSH1(&t);
for(size_t i=0; i < n; i++) {
for (size_t i = 0; i < n; i++) {
jl_svecset(t, i, jl_arrayref((jl_array_t*)args[1], i));
}
JL_GC_POP();
Expand All @@ -470,85 +498,93 @@ JL_CALLABLE(jl_f__apply)
return args[1];
}
}
size_t n=0, i, j;
for(i=1; i < nargs; i++) {
// estimate how many real arguments we appear to have
size_t precount = 1;
size_t extra = 0;
size_t i;
for (i = 1; i < nargs; i++) {
if (jl_is_svec(args[i])) {
n += jl_svec_len(args[i]);
precount += jl_svec_len(args[i]);
}
else if (jl_is_tuple(args[i]) || jl_is_namedtuple(args[i])) {
n += jl_nfields(args[i]);
precount += jl_nfields(args[i]);
}
else if (jl_is_array(args[i])) {
n += jl_array_len(args[i]);
precount += jl_array_len(args[i]);
}
else {
if (jl_append_any_func == NULL) {
jl_append_any_func =
(jl_function_t*)jl_get_global(jl_top_module, jl_symbol("append_any"));
if (jl_append_any_func == NULL) {
// error if append_any not available
JL_TYPECHK(apply, tuple, jl_typeof(args[i]));
}
}
jl_array_t *argarr = NULL;
JL_GC_PUSH2(&argarr, &f);
args[0] = jl_append_any_func;
argarr = (jl_array_t*)jl_apply(args, nargs);
assert(jl_typeis(argarr, jl_array_any_type));
jl_array_grow_beg(argarr, 1);
jl_array_ptr_set(argarr, 0, f);
args[0] = f;
jl_value_t *result = jl_apply(jl_array_ptr_data(argarr), jl_array_len(argarr));
JL_GC_POP();
return result;
extra += 1;
}
}
jl_value_t **newargs;
n++;
int onstack = (n < jl_page_size/sizeof(jl_value_t*));
JL_GC_PUSHARGS(newargs, onstack ? n : 1);
if (extra && jl_iterate_func == NULL) {
jl_iterate_func = jl_get_function(jl_top_module, "iterate");
if (jl_iterate_func == NULL)
jl_undefined_var_error(jl_symbol("iterate"));
}
// allocate space for the argument array and gc roots for it
// based on our previous estimates
// use the stack if we have a good estimate that it is small
// otherwise, use the heap and grow it incrementally
// and if there are any extra elements, we'll also need a couple extra roots
int onstack = (precount + 32 * extra < jl_page_size / sizeof(jl_value_t*));
size_t stackalloc = onstack ? (precount + 4 * extra + (extra ? 16 : 0)) : 1;
size_t n_alloc;
jl_value_t **roots;
JL_GC_PUSHARGS(roots, stackalloc + (extra ? 2 : 0));
jl_value_t **newargs = NULL;
jl_svec_t *arg_heap = NULL;
if (!onstack) {
if (onstack) {
newargs = roots;
n_alloc = stackalloc;
}
else {
// put arguments on the heap if there are too many
arg_heap = jl_alloc_svec(n);
newargs[0] = (jl_value_t*)arg_heap;
newargs = jl_svec_data(arg_heap);
n_alloc = 0;
_grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, precount, extra);
}
// GC Note: here we assume that the return value of `jl_svecref`,
// `jl_array_ptr_ref` will not be young if `arg_heap` becomes old
// since they are allocated before `arg_heap`. Otherwise,
// we need to add write barrier for !onstack
newargs[0] = f;
n = 1;
for(i=1; i < nargs; i++) {
precount -= 1;
size_t n = 1;
for (i = 1; i < nargs; i++) {
jl_value_t *ai = args[i];
if (jl_is_svec(ai)) {
jl_svec_t *t = (jl_svec_t*)ai;
size_t al = jl_svec_len(t);
for(j=0; j < al; j++)
size_t j, al = jl_svec_len(t);
precount = (precount > al) ? precount - al : 0;
_grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, n + precount + al, extra);
for (j = 0; j < al; j++) {
newargs[n++] = jl_svecref(t, j);
// GC Note: here we assume that the return value of `jl_svecref`
// will not be young if `arg_heap` becomes old
// since they are allocated before `arg_heap`. Otherwise,
// we need to add write barrier for !onstack
}
}
else if (jl_is_tuple(ai) || jl_is_namedtuple(ai)) {
size_t al = jl_nfields(ai);
for(j=0; j < al; j++) {
size_t j, al = jl_nfields(ai);
precount = (precount > al) ? precount - al : 0;
_grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, n + precount + al, extra);
for (j = 0; j < al; j++) {
// jl_fieldref may allocate.
newargs[n++] = jl_fieldref(ai, j);
if (arg_heap)
jl_gc_wb(arg_heap, newargs[n - 1]);
}
}
else {
assert(jl_is_array(ai));
else if (jl_is_array(ai)) {
jl_array_t *aai = (jl_array_t*)ai;
size_t al = jl_array_len(aai);
size_t j, al = jl_array_len(aai);
precount = (precount > al) ? precount - al : 0;
_grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, n + precount + al, extra);
if (aai->flags.ptrarray) {
for (j = 0; j < al; j++) {
jl_value_t *arg = jl_array_ptr_ref(aai, j);
// apply with array splatting may have embedded NULL value
// #11772
// apply with array splatting may have embedded NULL value (#11772)
if (__unlikely(arg == NULL))
jl_throw(jl_undefref_exception);
newargs[n++] = arg;
if (arg_heap)
jl_gc_wb(arg_heap, arg);
}
}
else {
Expand All @@ -559,6 +595,33 @@ JL_CALLABLE(jl_f__apply)
}
}
}
else {
assert(extra > 0);
jl_value_t *args[3];
args[0] = jl_iterate_func;
args[1] = ai;
jl_value_t *next = jl_apply(args, 2);
while (next != jl_nothing) {
roots[stackalloc] = next;
jl_value_t *value = jl_fieldref(next, 0);
roots[stackalloc + 1] = next;
jl_value_t *state = jl_fieldref(next, 1);
roots[stackalloc] = state;
_grow_to(&roots[0], &newargs, &arg_heap, &n_alloc, n + precount + 1, extra);
newargs[n++] = value;
if (arg_heap)
jl_gc_wb(arg_heap, value);
roots[stackalloc + 1] = NULL;
args[2] = state;
next = jl_apply(args, 3);
}
roots[stackalloc] = NULL;
extra -= 1;
}
}
if (arg_heap) {
// optimization: keep only the first root, free the others
((void**)roots)[-2] = (void*)(((size_t)1) << 1);
}
jl_value_t *result = jl_apply(newargs, n);
JL_GC_POP();
Expand Down
2 changes: 0 additions & 2 deletions src/module.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ jl_module_t *jl_main_module = NULL;
jl_module_t *jl_core_module = NULL;
jl_module_t *jl_base_module = NULL;
jl_module_t *jl_top_module = NULL;
extern jl_function_t *jl_append_any_func;

JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name)
{
Expand Down Expand Up @@ -77,7 +76,6 @@ JL_DLLEXPORT void jl_set_istopmod(jl_module_t *self, uint8_t isprimary)
self->istopmod = 1;
if (isprimary) {
jl_top_module = self;
jl_append_any_func = NULL;
}
}

Expand Down
Loading

0 comments on commit 9e9bcb8

Please sign in to comment.