Skip to content

Commit

Permalink
cs: new vfasl writer to support cross compilation
Browse files Browse the repository at this point in the history
Replace the vfasl writer (which was in C) with a new implementation
(in Scheme). The main result is that the vfasl writer can be used in
cross-build mode.

Racket uses the vfasl format for its boot images, because they can
load faster --- cutting the Chez Scheme plus boot files startup time
in half, which saves about 40msec on a typical machine. That's not
enough to matter for something like DrRacket, but it can matter for
small Racket scripts. Formerly, cross builds disabled vfasl
generation.

A vfasl file is roughly an image of code and data as it will appear in
memory, and a relatively fast linking step makes the image work in a
running process. The old implementation was in C because it reused GC
structures and code, treating fasl creation as copying objects into a
vfasl image instead of a new generation. The new implementation is
more like a fasl reader, loading objects into a vfasl image instead of
the live heap. The two implementations are about the same amount of
code and both involve a certain amount of repeated implementation
(i.e., imitating a collection or fasl load), but the Scheme
implementation is more flexible and works for cross compilation.
  • Loading branch information
mflatt committed Dec 18, 2020
1 parent a08a6b4 commit b7c0130
Show file tree
Hide file tree
Showing 36 changed files with 1,687 additions and 1,425 deletions.
2 changes: 1 addition & 1 deletion .makefile
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ RACKET_FOR_BOOTFILES = $(RACKET)
RACKET_FOR_BUILD = $(RACKET)

# This branch name changes each time the pb boot files are updated:
PB_BRANCH == circa-7.9.0.13-1
PB_BRANCH == circa-7.9.0.14-2
PB_REPO = https://github.com/racket/pb

# Alternative source for Chez Scheme boot files, normally set by
Expand Down
12 changes: 6 additions & 6 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ RACKETCS_SUFFIX =
RACKET =
RACKET_FOR_BOOTFILES = $(RACKET)
RACKET_FOR_BUILD = $(RACKET)
PB_BRANCH = circa-7.9.0.13-1
PB_BRANCH = circa-7.9.0.14-2
PB_REPO = https://github.com/racket/pb
EXTRA_REPOS_BASE =
CS_CROSS_SUFFIX =
Expand Down Expand Up @@ -307,18 +307,18 @@ maybe-fetch-pb-as-is:
echo done
fetch-pb-from:
mkdir -p racket/src/ChezScheme/boot
if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.9.0.13-1 $(PB_REPO) racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.9.0.13-1:remotes/origin/circa-7.9.0.13-1 ; fi
cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.9.0.13-1
if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.9.0.14-2 $(PB_REPO) racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.9.0.14-2:remotes/origin/circa-7.9.0.14-2 ; fi
cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.9.0.14-2
pb-fetch:
$(MAKE) fetch-pb EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" PB_REPO="$(PB_REPO)"
pb-build:
cd racket/src/ChezScheme && racket rktboot/main.rkt --machine pb
pb-stage:
cd racket/src/ChezScheme/boot/pb && git branch circa-7.9.0.13-1
cd racket/src/ChezScheme/boot/pb && git checkout circa-7.9.0.13-1
cd racket/src/ChezScheme/boot/pb && git branch circa-7.9.0.14-2
cd racket/src/ChezScheme/boot/pb && git checkout circa-7.9.0.14-2
cd racket/src/ChezScheme/boot/pb && git add . && git commit --amend -m "new build"
pb-push:
cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.9.0.13-1
cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.9.0.14-2
win-cs-base:
IF "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-bc-then-cs-base SETUP_BOOT_MODE=--boot WIN32_BUILD_LEVEL=bc PLAIN_RACKET=racket\racketbc DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETBC_SUFFIX="$(RACKETBC_SUFFIX)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)"
IF not "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-just-cs-base SETUP_BOOT_MODE=--chain DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" RACKET_FOR_BUILD="$(RACKET_FOR_BUILD)"
Expand Down
2 changes: 1 addition & 1 deletion pkgs/base/info.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

;; In the Racket source repo, this version should change only when
;; "racket_version.h" changes:
(define version "7.9.0.13")
(define version "7.9.0.14")

(define deps `("racket-lib"
["racket" #:version ,version]))
Expand Down
7 changes: 4 additions & 3 deletions pkgs/racket-doc/scribblings/reference/places.scrbl
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,16 @@ take advantage of machines with multiple processors, cores, or
hardware threads.

@margin-note{Currently, parallel support for places is enabled
only for the 3m (main) and CS variants of Racket, and only
only for the CS and 3m variants of Racket, and for 3m, only
by default for Windows, Linux x86/x86_64, and Mac OS x86/x86_64. To
enable support for other platforms, use @DFlag{enable-places} with
enable support for other platforms with 3m, use @DFlag{enable-places} with
@exec{configure} when building Racket. The @racket[place-enabled?]
function reports whether places run in parallel.

Implementation and operating-system constraints may limit the
scalability of places. For example, although places can perform
garbage collections independently in the 3m variant, a garbage collection
garbage collections in parallel in the CS variant or independently
in the 3m variant, a garbage collection
may need to manipulate a page table that is shared across all
places, and that shared page table can be a bottleneck with enough
places---perhaps around 8 or 16.}
Expand Down
57 changes: 56 additions & 1 deletion racket/src/ChezScheme/IMPLEMENTATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ found in the "c" directory.

Some key files in "s":

* "cmacro.ss": object layouts and other global constants
* "cmacro.ss": object layouts and other global constants, including
constants that are needed by both the compiler and the kernel

* "syntax.ss": the macro expander

Expand Down Expand Up @@ -141,6 +142,28 @@ Tests go in "mats/*...*.ms". In "*machine-type*/mats", you can use
changing `7.ms`. Makefile variables like `o` control the way tests
are run; for example, use `make o=3 7.mo` to test in unsafe mode.

# Compiled Files and Boot Files

A Scheme file conventionally uses the suffix ".ss" and it's compiled
form uses the suffix ".so". The format of a compiled file is closely
related to the fasl format that is exposed by `fasl-write` and
`fasl-read`, but you can't compile Scheme code to some value that is
written with `fasl-write`. Instead, `compile-file` and related
functions directly generate compiled code in a fasled form that
includes needed linking information.

A boot file, usually with the suffix ".boot", has the same format as a
compiled file, but with an extra header that identifies it as a boot
file and takes care of some singleton objects, such as `#!base-rtd`
and the stub to invoke compiled code.

The vfasl format is used for the same purposes as the fasl format, but
mostly for boot files. It is always platform-specific and its content
is very close to the form that the content will take when loaded into
memory. It can load especially quickly with streamlined linking and
interning of symbols and record types, especially in uncompressed
form. The build scripts do not convert boot files to vfasl format.

# Scheme Objects

A Scheme object is represented at run time by a pointer. The low bits
Expand Down Expand Up @@ -212,6 +235,13 @@ contain the value `type-inexactnum`. The `iptr` type for `type` means
"a pointer-sized signed integer". The `ptr` type for `real` and `imag`
means "pointer" or "Scheme object".

If you create a new type of object, then several pieces need to be
updated: the garbage collector (in "mkgc.ss" and "gc.c"), the compiler
to implement primitives that generate the kind of objects, the fasl
writer (in "fasl.ss"), the fasl reader (in "fasl.c"), the fasl reader
used by `strip-fasl-file` and `vfasl-convert-file` (in "strip.ss"),
the vfasl writer (in "vfasl.ss"), and the inspector (in "inspect.ss").

# Functions and Calls

Scheme code does not use the C stack, except to the degree that it
Expand Down Expand Up @@ -1079,6 +1109,31 @@ The `asm-foreign-callable` function returns 4 values:
Generate the code for a C return, including any teardown needed to
balance `c-init`.

# Cross Compilation and Compile-Time Constants

When cross compiling, there are two notions of quantities/properties
like the size of pointers or endianness: the host notion and the
target platform's notion. A function like `(native-endianness)` always
reports the host's notion. A constant like `(constant
native-endianness)` refers to the target machine notion.

Cross compilation works by starting with a Chez Scheme that runs on
the host machine and then re-compiling a subset of the Chez Scheme
implementation to run on the host machine but with `constant` values
suitable for the target machine. The recompiled parts are assembled
into an `xpatch` file that can be loaded to replace functions like
`compile-file` and `vfasl-convert-file` with ones that use the
target-machine constants. Loading an `xpatch` file tends to make
compilation or fasl operations for the host machine inaccessible, so a
given Chez Scheme process is only good for targeting one particular
platform.

When working on the compiler or fasl-related tools, take care to use
the right notion of a quantity or property. If you need the host
value, then there must be some function that provides the value. If
you need the target machine's value, then it must be accessed using
`constant`.

# Changing the Version Number

To change the version number:
Expand Down
1 change: 0 additions & 1 deletion racket/src/ChezScheme/c/Mf-base
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ gc-011.o gc-par.o gc-ocd.o gc-oce.o: gc.c
gc-011.o gc-ocd.o: ${Include}/gc-ocd.inc
gc-oce.o: ${Include}/gc-oce.inc
gc-par.o: ${Include}/gc-par.inc
vfasl.o: ${Include}/vfasl.inc
gcwrapper.o: ${Include}/heapcheck.inc

../zlib/zlib.h ../zlib/zconf.h: ../zlib/configure.log
Expand Down
14 changes: 7 additions & 7 deletions racket/src/ChezScheme/c/externs.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,11 @@ extern void S_phantom_bytevector_adjust PROTO((ptr ph, uptr new_sz));

/* fasl.c */
extern void S_fasl_init PROTO((void));
ptr S_fasl_read PROTO((INT fd, IFASLCODE situation, ptr path, ptr externals));
ptr S_bv_fasl_read PROTO((ptr bv, int ty, uptr offset, uptr len, ptr path, ptr externals));
ptr S_boot_read PROTO((INT fd, const char *path));
char *S_format_scheme_version PROTO((uptr n));
char *S_lookup_machine_type PROTO((uptr n));
extern ptr S_fasl_read PROTO((INT fd, IFASLCODE situation, ptr path, ptr externals));
extern ptr S_bv_fasl_read PROTO((ptr bv, int ty, uptr offset, uptr len, ptr path, ptr externals));
extern ptr S_boot_read PROTO((INT fd, const char *path));
extern char *S_format_scheme_version PROTO((uptr n));
extern char *S_lookup_machine_type PROTO((uptr n));
extern void S_set_code_obj PROTO((char *who, IFASLCODE typ, ptr p, iptr n,
ptr x, iptr o));
extern ptr S_get_code_obj PROTO((IFASLCODE typ, ptr p, iptr n, iptr o));
Expand All @@ -131,10 +131,8 @@ extern void S_swap_dounderflow_header_endian PROTO((ptr code));
#endif

/* vfasl.c */
extern ptr S_to_vfasl PROTO((ptr v));
extern ptr S_vfasl PROTO((ptr bv, void *stream, iptr offset, iptr len));
extern ptr S_vfasl_to PROTO((ptr v));
extern IBOOL S_vfasl_can_combinep(ptr v);

/* flushcache.c */
extern void S_record_code_mod PROTO((ptr tc, uptr addr, uptr bytes));
Expand Down Expand Up @@ -201,6 +199,8 @@ extern ptr S_intern4 PROTO((ptr sym));
extern void S_intern_gensym PROTO((ptr g));
extern void S_retrofit_nonprocedure_code PROTO((void));
extern ptr S_mkstring PROTO((const string_char *s, iptr n));
extern I32 S_symbol_hash32(ptr str);
extern I64 S_symbol_hash64(ptr str);

/* io.c */
extern IBOOL S_file_existsp PROTO((const char *inpath, IBOOL followp));
Expand Down
78 changes: 56 additions & 22 deletions racket/src/ChezScheme/c/fasl.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@
* limitations under the License.
*/

/* The fasl writer is in "fasl.ss".
There's a second fasl reader and writer in "strip.ss", so it has
to be kept in sync with this one. */

/* fasl representation:
*
* <fasl-file> -> <fasl-group>*
Expand Down Expand Up @@ -52,6 +56,8 @@
*
* -> {bytevector}<uptr n><octet elt1>...<octet eltn>
*
* -> {stencil-vector}<uptr mask><octet elt1>...<octet eltn>
*
* -> {immediate}<uptr>
*
* -> {small-integer}<iptr>
Expand Down Expand Up @@ -115,6 +121,8 @@
* ...
* <faslreloc> # last relocation entry
*
* -> {begin}<va>...<val> # all but last is intended to be a {graph-def}
*
* <faslreloc> -> <byte type-etc> # bit 0: extended entry, bit 1: expect item offset, bit 2+: type
* <uptr code-offset>
* <uptr item-offset> # omitted if bit 1 of type-etc is 0
Expand Down Expand Up @@ -1545,19 +1553,27 @@ ptr S_get_code_obj(typ, p, n, o) IFASLCODE typ; iptr n, o; ptr p; {
return (ptr)(item - o);
}


#ifdef PORTABLE_BYTECODE

/* Address pieces in a movz,movk,movk,movk sequence are upper 16 bits */
#define ADDRESS_BITS_SHIFT 16
#define ADDRESS_BITS_MASK ((U32)0xffff0000)
#define ADDRESS_BITS_MASK ((U32)0xFFFF0000)
#define DEST_REG_MASK 0xF00

static void pb_set_abs(void *address, uptr item) {
((U32 *)address)[0] = ((((U32 *)address)[0] & ~ADDRESS_BITS_MASK) | ((item & 0xFFFF) << ADDRESS_BITS_SHIFT));
((U32 *)address)[1] = ((((U32 *)address)[1] & ~ADDRESS_BITS_MASK) | (((item >> 16) & 0xFFFF) << ADDRESS_BITS_SHIFT));
/* First word can have an arbitrary value due to vfasl offset
storage, so get the target register from the end: */
#if ptr_bytes == 8
((U32 *)address)[2] = ((((U32 *)address)[2] & ~ADDRESS_BITS_MASK) | (((item >> 32) & 0xFFFF) << ADDRESS_BITS_SHIFT));
((U32 *)address)[3] = ((((U32 *)address)[3] & ~ADDRESS_BITS_MASK) | (((item >> 48) & 0xFFFF) << ADDRESS_BITS_SHIFT));
int dest_reg = ((U32 *)address)[3] & DEST_REG_MASK;
#else
int dest_reg = ((U32 *)address)[1] & DEST_REG_MASK;
#endif

((U32 *)address)[0] = (pb_mov16_pb_zero_bits_pb_shift0 | dest_reg | ((item & 0xFFFF) << ADDRESS_BITS_SHIFT));
((U32 *)address)[1] = (pb_mov16_pb_keep_bits_pb_shift1 | dest_reg | (((item >> 16) & 0xFFFF) << ADDRESS_BITS_SHIFT));
#if ptr_bytes == 8
((U32 *)address)[2] = (pb_mov16_pb_keep_bits_pb_shift2 | dest_reg | (((item >> 32) & 0xFFFF) << ADDRESS_BITS_SHIFT));
((U32 *)address)[3] = (pb_mov16_pb_keep_bits_pb_shift3 | dest_reg | (((item >> 48) & 0xFFFF) << ADDRESS_BITS_SHIFT));
#endif
}

Expand All @@ -1571,17 +1587,17 @@ static uptr pb_get_abs(void *address) {
);
}

#endif /* AARCH64 */
#endif /* PORTABLE_BYTECODE */

#ifdef ARMV6
static void arm32_set_abs(void *address, uptr item) {
/* code generator produces ldrlit destreg, 0; brai 0; long 0 */
/* we change long 0 => long item */
*((U32 *)address + 2) = item;
/* given address is at long 0, which we change to `item` */
*((U32 *)address) = item;
}

static uptr arm32_get_abs(void *address) {
return *((U32 *)address + 2);
return *((U32 *)address);
}

#define MAKE_B(n) (0xEA000000 | (n))
Expand Down Expand Up @@ -1634,11 +1650,24 @@ static uptr arm32_get_jump(void *address) {
#define ADDRESS_BITS_SHIFT 5
#define ADDRESS_BITS_MASK ((U32)0x1fffe0)

/* Dest register in either movz or movk: */
#define DEST_REG_MASK 0x1F

#define MOVZ_OPCODE 0xD2800000
#define MOVK_OPCODE 0xF2800000
#define SHIFT16_OPCODE 0x00200000
#define SHIFT32_OPCODE 0x00400000
#define SHIFT48_OPCODE 0x00600000

static void arm64_set_abs(void *address, uptr item) {
((U32 *)address)[0] = ((((U32 *)address)[0] & ~ADDRESS_BITS_MASK) | ((item & 0xFFFF) << ADDRESS_BITS_SHIFT));
((U32 *)address)[1] = ((((U32 *)address)[1] & ~ADDRESS_BITS_MASK) | (((item >> 16) & 0xFFFF) << ADDRESS_BITS_SHIFT));
((U32 *)address)[2] = ((((U32 *)address)[2] & ~ADDRESS_BITS_MASK) | (((item >> 32) & 0xFFFF) << ADDRESS_BITS_SHIFT));
((U32 *)address)[3] = ((((U32 *)address)[3] & ~ADDRESS_BITS_MASK) | (((item >> 48) & 0xFFFF) << ADDRESS_BITS_SHIFT));
/* First word can have an arbitrary value due to vfasl offset
storage, so get the target register from the end: */
int dest_reg = ((U32 *)address)[3] & DEST_REG_MASK;

((U32 *)address)[0] = (MOVZ_OPCODE | dest_reg | ((item & 0xFFFF) << ADDRESS_BITS_SHIFT));
((U32 *)address)[1] = (MOVK_OPCODE | SHIFT16_OPCODE | dest_reg | (((item >> 16) & 0xFFFF) << ADDRESS_BITS_SHIFT));
((U32 *)address)[2] = (MOVK_OPCODE | SHIFT32_OPCODE | dest_reg | (((item >> 32) & 0xFFFF) << ADDRESS_BITS_SHIFT));
((U32 *)address)[3] = (MOVK_OPCODE | SHIFT48_OPCODE | dest_reg | (((item >> 48) & 0xFFFF) << ADDRESS_BITS_SHIFT));
}

static uptr arm64_get_abs(void *address) {
Expand All @@ -1655,21 +1684,26 @@ static uptr arm64_get_abs(void *address) {
#define UPDATE_ADDIS(item, instr) (((instr) & ~0xFFFF) | (((item) >> 16) & 0xFFFF))
#define UPDATE_ADDI(item, instr) (((instr) & ~0xFFFF) | ((item) & 0xFFFF))

#define MAKE_B(disp, callp) ((18 << 26) | (((disp) & 0xFFFFFF) << 2) | (callp))
#define MAKE_ADDIS(item) ((15 << 26) | (((item) >> 16) & 0xFFFF))
#define MAKE_ORI(item) ((24 << 26) | ((item) & 0xFFFF))
#define MAKE_NOP ((24 << 26))
#define MAKE_MTCTR ((31 << 26) | (9 << 16) | (467 << 1))
#define MAKE_BCTR(callp) ((19 << 26) | (20 << 21) | (528 << 1) | (callp))
#define MAKE_B(disp, callp) ((18 << 26) | (((disp) & 0xFFFFFF) << 2) | (callp))
#define MAKE_ADDIS(item) ((15 << 26) | (((item) >> 16) & 0xFFFF))
#define MAKE_ADDI(item) ((14 << 26) | ((item) & 0xFFFF))
#define MAKE_ORI(item) ((24 << 26) | ((item) & 0xFFFF))
#define MAKE_NOP ((24 << 26))
#define MAKE_MTCTR ((31 << 26) | (9 << 16) | (467 << 1))
#define MAKE_BCTR(callp) ((19 << 26) | (20 << 21) | (528 << 1) | (callp))

#define DEST_REG_MASK (0x1F << 21)

static void ppc32_set_abs(void *address, uptr item) {
/* code generator produces addis destreg, %r0, 0 (hi) ; addi destreg, destreg, 0 (lo) */
/* we change 0 (hi) => upper 16 bits of address */
/* we change 0 (lo) => lower 16 bits of address */
/* low part is signed: if negative, increment high part */
/* but the first word may have been overritten for vfasl */
int dest_reg = (*((U32 *)address + 1)) & DEST_REG_MASK;
item = item + (item << 1 & 0x10000);
*((U32 *)address + 0) = UPDATE_ADDIS(item, *((U32 *)address + 0));
*((U32 *)address + 1) = UPDATE_ADDI(item, *((U32 *)address + 1));
*((U32 *)address + 0) = dest_reg | MAKE_ADDIS(item);
*((U32 *)address + 1) = dest_reg | dest_reg >> 5 | MAKE_ADDI(item);
}

static uptr ppc32_get_abs(void *address) {
Expand Down
Loading

0 comments on commit b7c0130

Please sign in to comment.